diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/TweetCountryEngagementRealTimeAggregateFeatureHydrator.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/TweetCountryEngagementRealTimeAggregateFeatureHydrator.docx new file mode 100644 index 000000000..ad7648571 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/TweetCountryEngagementRealTimeAggregateFeatureHydrator.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/TweetCountryEngagementRealTimeAggregateFeatureHydrator.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/TweetCountryEngagementRealTimeAggregateFeatureHydrator.scala deleted file mode 100644 index cd9e34a3e..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/TweetCountryEngagementRealTimeAggregateFeatureHydrator.scala +++ /dev/null @@ -1,58 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.feature_hydrator.real_time_aggregates - -import com.google.inject.name.Named -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.home_mixer.param.HomeMixerInjectionNames.TweetCountryEngagementCache -import com.twitter.home_mixer.util.CandidatesUtil -import com.twitter.ml.api.DataRecord -import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate -import com.twitter.product_mixer.core.feature.FeatureWithDefaultOnFailure -import com.twitter.product_mixer.core.feature.datarecord.DataRecordInAFeature -import com.twitter.product_mixer.core.model.common.CandidateWithFeatures -import com.twitter.product_mixer.core.model.common.identifier.FeatureHydratorIdentifier -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.servo.cache.ReadCache -import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregateGroup -import com.twitter.timelines.prediction.common.aggregates.real_time.TimelinesOnlineAggregationFeaturesOnlyConfig._ -import javax.inject.Inject -import javax.inject.Singleton - -object TweetCountryEngagementRealTimeAggregateFeature - extends DataRecordInAFeature[TweetCandidate] - with FeatureWithDefaultOnFailure[TweetCandidate, DataRecord] { - override def defaultValue: DataRecord = new DataRecord() -} - -@Singleton -class TweetCountryEngagementRealTimeAggregateFeatureHydrator @Inject() ( - @Named(TweetCountryEngagementCache) override val client: ReadCache[(Long, String), DataRecord], - override val statsReceiver: StatsReceiver) - extends BaseRealTimeAggregateBulkCandidateFeatureHydrator[(Long, String)] { - - override val identifier: FeatureHydratorIdentifier = - FeatureHydratorIdentifier("TweetCountryEngagementRealTimeAggregate") - - override val outputFeature: DataRecordInAFeature[TweetCandidate] = - TweetCountryEngagementRealTimeAggregateFeature - - override val aggregateGroups: Seq[AggregateGroup] = Seq( - tweetCountryRealTimeAggregates, - tweetCountryPrivateEngagementsRealTimeAggregates - ) - - override val aggregateGroupToPrefix: Map[AggregateGroup, String] = Map( - tweetCountryRealTimeAggregates -> "tweet-country_code.timelines.tweet_country_engagement_real_time_aggregates.", - tweetCountryPrivateEngagementsRealTimeAggregates -> "tweet-country_code.timelines.tweet_country_private_engagement_real_time_aggregates." - ) - - override def keysFromQueryAndCandidates( - query: PipelineQuery, - candidates: Seq[CandidateWithFeatures[TweetCandidate]] - ): Seq[Option[(Long, String)]] = { - val countryCode = query.clientContext.countryCode - candidates.map { candidate => - val originalTweetId = CandidatesUtil.getOriginalTweetId(candidate) - countryCode.map((originalTweetId, _)) - } - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/TweetEngagementRealTimeAggregateFeatureHydrator.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/TweetEngagementRealTimeAggregateFeatureHydrator.docx new file mode 100644 index 000000000..61f029061 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/TweetEngagementRealTimeAggregateFeatureHydrator.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/TweetEngagementRealTimeAggregateFeatureHydrator.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/TweetEngagementRealTimeAggregateFeatureHydrator.scala deleted file mode 100644 index 99bae79d9..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/TweetEngagementRealTimeAggregateFeatureHydrator.scala +++ /dev/null @@ -1,61 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.feature_hydrator.real_time_aggregates - -import com.google.inject.name.Named -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.home_mixer.param.HomeMixerInjectionNames.TweetEngagementCache -import com.twitter.home_mixer.util.CandidatesUtil -import com.twitter.ml.api.DataRecord -import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate -import com.twitter.product_mixer.core.feature.FeatureWithDefaultOnFailure -import com.twitter.product_mixer.core.feature.datarecord.DataRecordInAFeature -import com.twitter.product_mixer.core.model.common.CandidateWithFeatures -import com.twitter.product_mixer.core.model.common.identifier.FeatureHydratorIdentifier -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.servo.cache.ReadCache -import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregateGroup -import com.twitter.timelines.prediction.common.aggregates.real_time.TimelinesOnlineAggregationFeaturesOnlyConfig._ -import javax.inject.Inject -import javax.inject.Singleton - -object TweetEngagementRealTimeAggregateFeature - extends DataRecordInAFeature[TweetCandidate] - with FeatureWithDefaultOnFailure[TweetCandidate, DataRecord] { - override def defaultValue: DataRecord = new DataRecord() -} - -@Singleton -class TweetEngagementRealTimeAggregateFeatureHydrator @Inject() ( - @Named(TweetEngagementCache) override val client: ReadCache[Long, DataRecord], - override val statsReceiver: StatsReceiver) - extends BaseRealTimeAggregateBulkCandidateFeatureHydrator[Long] { - - override val identifier: FeatureHydratorIdentifier = - FeatureHydratorIdentifier("TweetEngagementRealTimeAggregate") - - override val outputFeature: DataRecordInAFeature[TweetCandidate] = - TweetEngagementRealTimeAggregateFeature - - override val aggregateGroups: Seq[AggregateGroup] = Seq( - tweetEngagement30MinuteCountsProd, - tweetEngagementTotalCountsProd, - tweetEngagementUserStateRealTimeAggregatesProd, - tweetNegativeEngagementUserStateRealTimeAggregates, - tweetNegativeEngagement6HourCounts, - tweetNegativeEngagementTotalCounts, - tweetShareEngagementsRealTimeAggregates, - tweetBCEDwellEngagementsRealTimeAggregates - ) - - override val aggregateGroupToPrefix: Map[AggregateGroup, String] = Map( - tweetShareEngagementsRealTimeAggregates -> "original_tweet.timelines.tweet_share_engagements_real_time_aggregates.", - tweetBCEDwellEngagementsRealTimeAggregates -> "original_tweet.timelines.tweet_bce_dwell_engagements_real_time_aggregates." - ) - - override def keysFromQueryAndCandidates( - query: PipelineQuery, - candidates: Seq[CandidateWithFeatures[TweetCandidate]] - ): Seq[Option[Long]] = { - candidates - .map(candidate => Some(CandidatesUtil.getOriginalTweetId(candidate))) - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/TwitterListEngagementRealTimeAggregateFeatureHydrator.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/TwitterListEngagementRealTimeAggregateFeatureHydrator.docx new file mode 100644 index 000000000..f078f8d69 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/TwitterListEngagementRealTimeAggregateFeatureHydrator.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/TwitterListEngagementRealTimeAggregateFeatureHydrator.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/TwitterListEngagementRealTimeAggregateFeatureHydrator.scala deleted file mode 100644 index b5f3af2a9..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/TwitterListEngagementRealTimeAggregateFeatureHydrator.scala +++ /dev/null @@ -1,57 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.feature_hydrator.real_time_aggregates - -import com.google.inject.name.Named -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.home_mixer.model.HomeFeatures.TwitterListIdFeature -import com.twitter.home_mixer.param.HomeMixerInjectionNames.TwitterListEngagementCache -import com.twitter.ml.api.DataRecord -import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate -import com.twitter.product_mixer.core.feature.FeatureWithDefaultOnFailure -import com.twitter.product_mixer.core.feature.datarecord.DataRecordInAFeature -import com.twitter.product_mixer.core.model.common.CandidateWithFeatures -import com.twitter.product_mixer.core.model.common.identifier.FeatureHydratorIdentifier -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.servo.cache.ReadCache -import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregateGroup -import com.twitter.timelines.prediction.common.aggregates.real_time.TimelinesOnlineAggregationFeaturesOnlyConfig._ -import javax.inject.Inject -import javax.inject.Singleton - -object TwitterListEngagementRealTimeAggregateFeature - extends DataRecordInAFeature[TweetCandidate] - with FeatureWithDefaultOnFailure[TweetCandidate, DataRecord] { - override def defaultValue: DataRecord = new DataRecord() -} - -@Singleton -class TwitterListEngagementRealTimeAggregateFeatureHydrator @Inject() ( - @Named(TwitterListEngagementCache) override val client: ReadCache[Long, DataRecord], - override val statsReceiver: StatsReceiver) - extends BaseRealTimeAggregateBulkCandidateFeatureHydrator[Long] { - - override val identifier: FeatureHydratorIdentifier = - FeatureHydratorIdentifier("TwitterListEngagementRealTimeAggregate") - - override val outputFeature: DataRecordInAFeature[TweetCandidate] = - TwitterListEngagementRealTimeAggregateFeature - - override val aggregateGroups: Seq[AggregateGroup] = Seq( - listEngagementRealTimeAggregatesProd - ) - - override val aggregateGroupToPrefix: Map[AggregateGroup, String] = Map( - listEngagementRealTimeAggregatesProd -> "twitter_list.timelines.twitter_list_engagement_real_time_aggregates." - ) - - override def keysFromQueryAndCandidates( - query: PipelineQuery, - candidates: Seq[CandidateWithFeatures[TweetCandidate]] - ): Seq[Option[Long]] = { - candidates.map { candidate => - candidate.features - .getTry(TwitterListIdFeature) - .toOption - .flatten - } - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/UserAuthorEngagementRealTimeAggregateFeatureHydrator.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/UserAuthorEngagementRealTimeAggregateFeatureHydrator.docx new file mode 100644 index 000000000..e9fa05120 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/UserAuthorEngagementRealTimeAggregateFeatureHydrator.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/UserAuthorEngagementRealTimeAggregateFeatureHydrator.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/UserAuthorEngagementRealTimeAggregateFeatureHydrator.scala deleted file mode 100644 index 2c13fc0f6..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/UserAuthorEngagementRealTimeAggregateFeatureHydrator.scala +++ /dev/null @@ -1,59 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.feature_hydrator.real_time_aggregates - -import com.google.inject.name.Named -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.home_mixer.param.HomeMixerInjectionNames.UserAuthorEngagementCache -import com.twitter.home_mixer.util.CandidatesUtil -import com.twitter.ml.api.DataRecord -import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate -import com.twitter.product_mixer.core.feature.FeatureWithDefaultOnFailure -import com.twitter.product_mixer.core.feature.datarecord.DataRecordInAFeature -import com.twitter.product_mixer.core.model.common.CandidateWithFeatures -import com.twitter.product_mixer.core.model.common.identifier.FeatureHydratorIdentifier -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.servo.cache.ReadCache -import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregateGroup -import com.twitter.timelines.prediction.common.aggregates.real_time.TimelinesOnlineAggregationFeaturesOnlyConfig._ -import javax.inject.Inject -import javax.inject.Singleton - -object UserAuthorEngagementRealTimeAggregateFeature - extends DataRecordInAFeature[TweetCandidate] - with FeatureWithDefaultOnFailure[TweetCandidate, DataRecord] { - override def defaultValue: DataRecord = new DataRecord() -} - -@Singleton -class UserAuthorEngagementRealTimeAggregateFeatureHydrator @Inject() ( - @Named(UserAuthorEngagementCache) override val client: ReadCache[(Long, Long), DataRecord], - override val statsReceiver: StatsReceiver) - extends BaseRealTimeAggregateBulkCandidateFeatureHydrator[(Long, Long)] { - - override val identifier: FeatureHydratorIdentifier = - FeatureHydratorIdentifier("UserAuthorEngagementRealTimeAggregate") - - override val outputFeature: DataRecordInAFeature[TweetCandidate] = - UserAuthorEngagementRealTimeAggregateFeature - - override val aggregateGroups: Seq[AggregateGroup] = Seq( - userAuthorEngagementRealTimeAggregatesProd, - userAuthorShareEngagementsRealTimeAggregates - ) - - override val aggregateGroupToPrefix: Map[AggregateGroup, String] = Map( - userAuthorEngagementRealTimeAggregatesProd -> "user-author.timelines.user_author_engagement_real_time_aggregates.", - userAuthorShareEngagementsRealTimeAggregates -> "user-author.timelines.user_author_share_engagements_real_time_aggregates." - ) - - override def keysFromQueryAndCandidates( - query: PipelineQuery, - candidates: Seq[CandidateWithFeatures[TweetCandidate]] - ): Seq[Option[(Long, Long)]] = { - val userId = query.getRequiredUserId - candidates.map { candidate => - CandidatesUtil - .getOriginalAuthorId(candidate.features) - .map((userId, _)) - } - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/UserEngagementRealTimeAggregatesFeatureHydrator.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/UserEngagementRealTimeAggregatesFeatureHydrator.docx new file mode 100644 index 000000000..cb8c1de43 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/UserEngagementRealTimeAggregatesFeatureHydrator.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/UserEngagementRealTimeAggregatesFeatureHydrator.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/UserEngagementRealTimeAggregatesFeatureHydrator.scala deleted file mode 100644 index cc05b52aa..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates/UserEngagementRealTimeAggregatesFeatureHydrator.scala +++ /dev/null @@ -1,56 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.feature_hydrator.real_time_aggregates - -import com.google.inject.name.Named -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.home_mixer.param.HomeMixerInjectionNames.UserEngagementCache -import com.twitter.ml.api.DataRecord -import com.twitter.product_mixer.core.feature.FeatureWithDefaultOnFailure -import com.twitter.product_mixer.core.feature.datarecord.DataRecordInAFeature -import com.twitter.product_mixer.core.model.common.identifier.FeatureHydratorIdentifier -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.servo.cache.ReadCache -import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregateGroup -import com.twitter.timelines.prediction.common.aggregates.real_time.TimelinesOnlineAggregationFeaturesOnlyConfig._ -import javax.inject.Inject -import javax.inject.Singleton - -object UserEngagementRealTimeAggregateFeature - extends DataRecordInAFeature[PipelineQuery] - with FeatureWithDefaultOnFailure[PipelineQuery, DataRecord] { - override def defaultValue: DataRecord = new DataRecord() -} - -@Singleton -class UserEngagementRealTimeAggregatesFeatureHydrator @Inject() ( - @Named(UserEngagementCache) override val client: ReadCache[Long, DataRecord], - override val statsReceiver: StatsReceiver) - extends BaseRealTimeAggregateQueryFeatureHydrator[Long] { - - override val identifier: FeatureHydratorIdentifier = - FeatureHydratorIdentifier("UserEngagementRealTimeAggregates") - - override val outputFeature: DataRecordInAFeature[PipelineQuery] = - UserEngagementRealTimeAggregateFeature - - val aggregateGroups: Seq[AggregateGroup] = Seq( - userEngagementRealTimeAggregatesProd, - userShareEngagementsRealTimeAggregates, - userBCEDwellEngagementsRealTimeAggregates, - userEngagement48HourRealTimeAggregatesProd, - userNegativeEngagementAuthorUserState72HourRealTimeAggregates, - userNegativeEngagementAuthorUserStateRealTimeAggregates, - userProfileEngagementRealTimeAggregates, - ) - - override val aggregateGroupToPrefix: Map[AggregateGroup, String] = Map( - userShareEngagementsRealTimeAggregates -> "user.timelines.user_share_engagements_real_time_aggregates.", - userBCEDwellEngagementsRealTimeAggregates -> "user.timelines.user_bce_dwell_engagements_real_time_aggregates.", - userEngagement48HourRealTimeAggregatesProd -> "user.timelines.user_engagement_48_hour_real_time_aggregates.", - userNegativeEngagementAuthorUserState72HourRealTimeAggregates -> "user.timelines.user_negative_engagement_author_user_state_72_hour_real_time_aggregates.", - userProfileEngagementRealTimeAggregates -> "user.timelines.user_profile_engagement_real_time_aggregates." - ) - - override def keysFromQueryAndCandidates(query: PipelineQuery): Option[Long] = { - Some(query.getRequiredUserId) - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/BUILD.bazel deleted file mode 100644 index 07a2e7cb3..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/BUILD.bazel +++ /dev/null @@ -1,14 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/util", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/model/candidate", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/filter", - "stitch/stitch-core", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/BUILD.docx new file mode 100644 index 000000000..ddecb521b Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/DuplicateConversationTweetsFilter.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/DuplicateConversationTweetsFilter.docx new file mode 100644 index 000000000..2777a3b19 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/DuplicateConversationTweetsFilter.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/DuplicateConversationTweetsFilter.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/DuplicateConversationTweetsFilter.scala deleted file mode 100644 index adc11d255..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/DuplicateConversationTweetsFilter.scala +++ /dev/null @@ -1,37 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.filter - -import com.twitter.home_mixer.model.HomeFeatures.AncestorsFeature -import com.twitter.home_mixer.util.CandidatesUtil -import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate -import com.twitter.product_mixer.core.functional_component.filter.Filter -import com.twitter.product_mixer.core.functional_component.filter.FilterResult -import com.twitter.product_mixer.core.model.common.CandidateWithFeatures -import com.twitter.product_mixer.core.model.common.identifier.FilterIdentifier -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.stitch.Stitch - -/** - * Remove any candidate that is in the ancestor list of any reply, including retweets of ancestors. - * - * E.g. if B replied to A and D was a retweet of A, we would prefer to drop D since otherwise - * we may end up serving the same tweet twice in the timeline (e.g. serving both A->B and D). - */ -object DuplicateConversationTweetsFilter extends Filter[PipelineQuery, TweetCandidate] { - - override val identifier: FilterIdentifier = FilterIdentifier("DuplicateConversationTweets") - - override def apply( - query: PipelineQuery, - candidates: Seq[CandidateWithFeatures[TweetCandidate]] - ): Stitch[FilterResult[TweetCandidate]] = { - val allAncestors = candidates - .flatMap(_.features.getOrElse(AncestorsFeature, Seq.empty)) - .map(_.tweetId).toSet - - val (kept, removed) = candidates.partition { candidate => - !allAncestors.contains(CandidatesUtil.getOriginalTweetId(candidate)) - } - - Stitch.value(FilterResult(kept = kept.map(_.candidate), removed = removed.map(_.candidate))) - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/OutOfNetworkCompetitorFilter.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/OutOfNetworkCompetitorFilter.docx new file mode 100644 index 000000000..a968fea76 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/OutOfNetworkCompetitorFilter.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/OutOfNetworkCompetitorFilter.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/OutOfNetworkCompetitorFilter.scala deleted file mode 100644 index 3c6024268..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/OutOfNetworkCompetitorFilter.scala +++ /dev/null @@ -1,38 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.filter - -import com.twitter.home_mixer.model.HomeFeatures.AuthorIdFeature -import com.twitter.home_mixer.model.HomeFeatures.InNetworkFeature -import com.twitter.home_mixer.model.HomeFeatures.IsRetweetFeature -import com.twitter.home_mixer.product.scored_tweets.param.ScoredTweetsParam.CompetitorSetParam -import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate -import com.twitter.product_mixer.core.functional_component.filter.Filter -import com.twitter.product_mixer.core.functional_component.filter.FilterResult -import com.twitter.product_mixer.core.model.common.CandidateWithFeatures -import com.twitter.product_mixer.core.model.common.identifier.FilterIdentifier -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.stitch.Stitch - -object OutOfNetworkCompetitorFilter extends Filter[PipelineQuery, TweetCandidate] { - - override val identifier: FilterIdentifier = FilterIdentifier("OutOfNetworkCompetitor") - - override def apply( - query: PipelineQuery, - candidates: Seq[CandidateWithFeatures[TweetCandidate]] - ): Stitch[FilterResult[TweetCandidate]] = { - val competitorAuthors = query.params(CompetitorSetParam) - val (removed, kept) = - candidates.partition(isOutOfNetworkTweetFromCompetitor(_, competitorAuthors)) - - Stitch.value(FilterResult(kept = kept.map(_.candidate), removed = removed.map(_.candidate))) - } - - def isOutOfNetworkTweetFromCompetitor( - candidate: CandidateWithFeatures[TweetCandidate], - competitorAuthors: Set[Long] - ): Boolean = { - !candidate.features.getOrElse(InNetworkFeature, true) && - !candidate.features.getOrElse(IsRetweetFeature, false) && - candidate.features.getOrElse(AuthorIdFeature, None).exists(competitorAuthors.contains) - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/OutOfNetworkCompetitorURLFilter.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/OutOfNetworkCompetitorURLFilter.docx new file mode 100644 index 000000000..071b5bc68 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/OutOfNetworkCompetitorURLFilter.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/OutOfNetworkCompetitorURLFilter.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/OutOfNetworkCompetitorURLFilter.scala deleted file mode 100644 index 00e2bb200..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/OutOfNetworkCompetitorURLFilter.scala +++ /dev/null @@ -1,38 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.filter - -import com.twitter.home_mixer.model.HomeFeatures.InNetworkFeature -import com.twitter.home_mixer.model.HomeFeatures.IsRetweetFeature -import com.twitter.home_mixer.model.HomeFeatures.TweetUrlsFeature -import com.twitter.home_mixer.product.scored_tweets.param.ScoredTweetsParam.CompetitorURLSeqParam -import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate -import com.twitter.product_mixer.core.functional_component.filter.Filter -import com.twitter.product_mixer.core.functional_component.filter.FilterResult -import com.twitter.product_mixer.core.model.common.CandidateWithFeatures -import com.twitter.product_mixer.core.model.common.identifier.FilterIdentifier -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.stitch.Stitch - -object OutOfNetworkCompetitorURLFilter extends Filter[PipelineQuery, TweetCandidate] { - - override val identifier: FilterIdentifier = FilterIdentifier("OutOfNetworkCompetitorURL") - - override def apply( - query: PipelineQuery, - candidates: Seq[CandidateWithFeatures[TweetCandidate]] - ): Stitch[FilterResult[TweetCandidate]] = { - val competitorUrls = query.params(CompetitorURLSeqParam).toSet - val (removed, kept) = candidates.partition(hasOutOfNetworkUrlFromCompetitor(_, competitorUrls)) - - Stitch.value(FilterResult(kept = kept.map(_.candidate), removed = removed.map(_.candidate))) - } - - def hasOutOfNetworkUrlFromCompetitor( - candidate: CandidateWithFeatures[TweetCandidate], - competitorUrls: Set[String] - ): Boolean = { - !candidate.features.getOrElse(InNetworkFeature, true) && - !candidate.features.getOrElse(IsRetweetFeature, false) && - candidate.features - .getOrElse(TweetUrlsFeature, Seq.empty).toSet.intersect(competitorUrls).nonEmpty - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/RetweetSourceTweetRemovingFilter.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/RetweetSourceTweetRemovingFilter.docx new file mode 100644 index 000000000..d1446109c Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/RetweetSourceTweetRemovingFilter.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/RetweetSourceTweetRemovingFilter.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/RetweetSourceTweetRemovingFilter.scala deleted file mode 100644 index 5900756d3..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/RetweetSourceTweetRemovingFilter.scala +++ /dev/null @@ -1,40 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.filter - -import com.twitter.home_mixer.model.HomeFeatures.EarlybirdFeature -import com.twitter.home_mixer.model.HomeFeatures.InReplyToTweetIdFeature -import com.twitter.home_mixer.util.ReplyRetweetUtil -import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate -import com.twitter.product_mixer.core.functional_component.filter.Filter -import com.twitter.product_mixer.core.functional_component.filter.FilterResult -import com.twitter.product_mixer.core.model.common.CandidateWithFeatures -import com.twitter.product_mixer.core.model.common.identifier.FilterIdentifier -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.stitch.Stitch - -/** - * This filter removes source tweets of retweets, added via second EB call in TLR - */ -object RetweetSourceTweetRemovingFilter extends Filter[PipelineQuery, TweetCandidate] { - - override val identifier: FilterIdentifier = FilterIdentifier("RetweetSourceTweetRemoving") - - override def apply( - query: PipelineQuery, - candidates: Seq[CandidateWithFeatures[TweetCandidate]] - ): Stitch[FilterResult[TweetCandidate]] = { - val (kept, removed) = - candidates.partition( - _.features.getOrElse(EarlybirdFeature, None).exists(_.isSourceTweet)) match { - case (sourceTweets, nonSourceTweets) => - val inReplyToTweetIds: Set[Long] = - nonSourceTweets - .filter(ReplyRetweetUtil.isEligibleReply(_)).flatMap( - _.features.getOrElse(InReplyToTweetIdFeature, None)).toSet - val (keptSourceTweets, removedSourceTweets) = sourceTweets - .map(_.candidate) - .partition(candidate => inReplyToTweetIds.contains(candidate.id)) - (nonSourceTweets.map(_.candidate) ++ keptSourceTweets, removedSourceTweets) - } - Stitch.value(FilterResult(kept = kept, removed = removed)) - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/ScoredTweetsSocialContextFilter.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/ScoredTweetsSocialContextFilter.docx new file mode 100644 index 000000000..0c03ebc3b Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/ScoredTweetsSocialContextFilter.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/ScoredTweetsSocialContextFilter.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/ScoredTweetsSocialContextFilter.scala deleted file mode 100644 index fef427a6d..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/filter/ScoredTweetsSocialContextFilter.scala +++ /dev/null @@ -1,61 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.filter - -import com.twitter.home_mixer.model.HomeFeatures._ -import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.functional_component.filter.Filter -import com.twitter.product_mixer.core.functional_component.filter.FilterResult -import com.twitter.product_mixer.core.model.common.CandidateWithFeatures -import com.twitter.product_mixer.core.model.common.identifier.FilterIdentifier -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.stitch.Stitch -import com.twitter.timelineservice.suggests.{thriftscala => st} - -object ScoredTweetsSocialContextFilter extends Filter[PipelineQuery, TweetCandidate] { - - override val identifier: FilterIdentifier = FilterIdentifier("ScoredTweetsSocialContext") - - // Tweets from candidate sources which don't need generic like/follow/topic proof - private val AllowedSources: Set[st.SuggestType] = Set( - st.SuggestType.RankedListTweet, - st.SuggestType.RecommendedTrendTweet, - st.SuggestType.MediaTweet - ) - - override def apply( - query: PipelineQuery, - candidates: Seq[CandidateWithFeatures[TweetCandidate]] - ): Stitch[FilterResult[TweetCandidate]] = { - val validTweetIds = candidates - .filter { candidate => - candidate.features.getOrElse(InNetworkFeature, true) || - candidate.features.getOrElse(SuggestTypeFeature, None).exists(AllowedSources.contains) || - candidate.features.getOrElse(InReplyToTweetIdFeature, None).isDefined || - hasLikedBySocialContext(candidate.features) || - hasFollowedBySocialContext(candidate.features) || - hasTopicSocialContext(candidate.features) - }.map(_.candidate.id).toSet - - val (kept, removed) = - candidates.map(_.candidate).partition(candidate => validTweetIds.contains(candidate.id)) - - Stitch.value(FilterResult(kept = kept, removed = removed)) - } - - private def hasLikedBySocialContext(candidateFeatures: FeatureMap): Boolean = - candidateFeatures - .getOrElse(SGSValidLikedByUserIdsFeature, Seq.empty) - .exists( - candidateFeatures - .getOrElse(PerspectiveFilteredLikedByUserIdsFeature, Seq.empty) - .toSet.contains - ) - - private def hasFollowedBySocialContext(candidateFeatures: FeatureMap): Boolean = - candidateFeatures.getOrElse(SGSValidFollowedByUserIdsFeature, Seq.empty).nonEmpty - - private def hasTopicSocialContext(candidateFeatures: FeatureMap): Boolean = { - candidateFeatures.getOrElse(TopicIdSocialContextFeature, None).isDefined && - candidateFeatures.getOrElse(TopicContextFunctionalityTypeFeature, None).isDefined - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/gate/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/gate/BUILD.bazel deleted file mode 100644 index 6d3fd9b1c..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/gate/BUILD.bazel +++ /dev/null @@ -1,12 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/util", - "home-mixer/thrift/src/main/thrift:thrift-scala", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/gate", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/gate/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/gate/BUILD.docx new file mode 100644 index 000000000..866f3b43d Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/gate/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/gate/MinCachedTweetsGate.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/gate/MinCachedTweetsGate.docx new file mode 100644 index 000000000..335bac310 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/gate/MinCachedTweetsGate.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/gate/MinCachedTweetsGate.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/gate/MinCachedTweetsGate.scala deleted file mode 100644 index bdeb33a92..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/gate/MinCachedTweetsGate.scala +++ /dev/null @@ -1,34 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.gate - -import com.twitter.home_mixer.product.scored_tweets.gate.MinCachedTweetsGate.identifierSuffix -import com.twitter.home_mixer.util.CachedScoredTweetsHelper -import com.twitter.product_mixer.core.functional_component.gate.Gate -import com.twitter.product_mixer.core.model.common.identifier.CandidatePipelineIdentifier -import com.twitter.product_mixer.core.model.common.identifier.GateIdentifier -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.stitch.Stitch -import com.twitter.timelines.configapi.Param - -case class MinCachedTweetsGate( - candidatePipelineIdentifier: CandidatePipelineIdentifier, - minCachedTweetsParam: Param[Int]) - extends Gate[PipelineQuery] { - - override val identifier: GateIdentifier = - GateIdentifier(candidatePipelineIdentifier + identifierSuffix) - - override def shouldContinue(query: PipelineQuery): Stitch[Boolean] = { - val minCachedTweets = query.params(minCachedTweetsParam) - val cachedScoredTweets = - query.features.map(CachedScoredTweetsHelper.unseenCachedScoredTweets).getOrElse(Seq.empty) - val numCachedTweets = cachedScoredTweets.count { tweet => - tweet.candidatePipelineIdentifier.exists( - CandidatePipelineIdentifier(_).equals(candidatePipelineIdentifier)) - } - Stitch.value(numCachedTweets < minCachedTweets) - } -} - -object MinCachedTweetsGate { - val identifierSuffix = "MinCachedTweets" -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/gate/MinTimeSinceLastRequestGate.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/gate/MinTimeSinceLastRequestGate.docx new file mode 100644 index 000000000..e9bf9ce0f Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/gate/MinTimeSinceLastRequestGate.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/gate/MinTimeSinceLastRequestGate.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/gate/MinTimeSinceLastRequestGate.scala deleted file mode 100644 index 050f2ab67..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/gate/MinTimeSinceLastRequestGate.scala +++ /dev/null @@ -1,27 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.gate - -import com.twitter.conversions.DurationOps._ -import com.twitter.home_mixer.model.HomeFeatures.LastNonPollingTimeFeature -import com.twitter.product_mixer.core.functional_component.gate.Gate -import com.twitter.product_mixer.core.model.common.identifier.GateIdentifier -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.stitch.Stitch - -/** - * Gate continues if the amount of time passed since the previous request is greater - * than the configured amount or if the previous request time in not available - */ -object MinTimeSinceLastRequestGate extends Gate[PipelineQuery] { - - override val identifier: GateIdentifier = GateIdentifier("TimeSinceLastRequest") - - private val MinTimeSinceLastRequest = 24.hours - - override def shouldContinue(query: PipelineQuery): Stitch[Boolean] = Stitch.value { - query.features.exists { features => - features - .getOrElse(LastNonPollingTimeFeature, None) - .forall(lnpt => (query.queryTime - lnpt) > MinTimeSinceLastRequest) - } - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/marshaller/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/marshaller/BUILD.bazel deleted file mode 100644 index 4347c1093..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/marshaller/BUILD.bazel +++ /dev/null @@ -1,15 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model/request", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model", - "home-mixer/thrift/src/main/thrift:thrift-scala", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/marshaller/response/urt", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/premarshaller", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/model/common", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/marshaller/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/marshaller/BUILD.docx new file mode 100644 index 000000000..a70dbcd67 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/marshaller/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/marshaller/ScoredTweetsResponseDomainMarshaller.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/marshaller/ScoredTweetsResponseDomainMarshaller.docx new file mode 100644 index 000000000..024415026 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/marshaller/ScoredTweetsResponseDomainMarshaller.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/marshaller/ScoredTweetsResponseDomainMarshaller.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/marshaller/ScoredTweetsResponseDomainMarshaller.scala deleted file mode 100644 index 796970bec..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/marshaller/ScoredTweetsResponseDomainMarshaller.scala +++ /dev/null @@ -1,22 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.marshaller - -import com.twitter.home_mixer.product.scored_tweets.model.ScoredTweetsQuery -import com.twitter.home_mixer.product.scored_tweets.model.ScoredTweetsResponse -import com.twitter.product_mixer.core.functional_component.premarshaller.DomainMarshaller -import com.twitter.product_mixer.core.model.common.identifier.DomainMarshallerIdentifier -import com.twitter.product_mixer.core.model.common.presentation.CandidateWithDetails - -/** - * Creates a domain model of the Scored Tweets product response from the set of candidates selected - */ -object ScoredTweetsResponseDomainMarshaller - extends DomainMarshaller[ScoredTweetsQuery, ScoredTweetsResponse] { - - override val identifier: DomainMarshallerIdentifier = - DomainMarshallerIdentifier("ScoredTweetsResponse") - - override def apply( - query: ScoredTweetsQuery, - selections: Seq[CandidateWithDetails] - ): ScoredTweetsResponse = ScoredTweetsResponse(scoredTweets = selections) -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/marshaller/ScoredTweetsResponseTransportMarshaller.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/marshaller/ScoredTweetsResponseTransportMarshaller.docx new file mode 100644 index 000000000..950a15636 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/marshaller/ScoredTweetsResponseTransportMarshaller.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/marshaller/ScoredTweetsResponseTransportMarshaller.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/marshaller/ScoredTweetsResponseTransportMarshaller.scala deleted file mode 100644 index 27486768b..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/marshaller/ScoredTweetsResponseTransportMarshaller.scala +++ /dev/null @@ -1,70 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.marshaller - -import com.twitter.home_mixer.model.HomeFeatures._ -import com.twitter.home_mixer.product.scored_tweets.model.ScoredTweetsResponse -import com.twitter.home_mixer.{thriftscala => t} -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.functional_component.marshaller.TransportMarshaller -import com.twitter.product_mixer.core.functional_component.marshaller.response.urt.metadata.TopicContextFunctionalityTypeMarshaller -import com.twitter.product_mixer.core.model.common.identifier.TransportMarshallerIdentifier - -/** - * Marshall the domain model into our transport (Thrift) model. - */ -object ScoredTweetsResponseTransportMarshaller - extends TransportMarshaller[ScoredTweetsResponse, t.ScoredTweetsResponse] { - - override val identifier: TransportMarshallerIdentifier = - TransportMarshallerIdentifier("ScoredTweetsResponse") - - override def apply(input: ScoredTweetsResponse): t.ScoredTweetsResponse = { - val scoredTweets = input.scoredTweets.map { tweet => - mkScoredTweet(tweet.candidateIdLong, tweet.features) - } - t.ScoredTweetsResponse(scoredTweets) - } - - private def mkScoredTweet(tweetId: Long, features: FeatureMap): t.ScoredTweet = { - val topicFunctionalityType = features - .getOrElse(TopicContextFunctionalityTypeFeature, None) - .map(TopicContextFunctionalityTypeMarshaller(_)) - - t.ScoredTweet( - tweetId = tweetId, - authorId = features.get(AuthorIdFeature).get, - score = features.get(ScoreFeature), - suggestType = features.get(SuggestTypeFeature), - sourceTweetId = features.getOrElse(SourceTweetIdFeature, None), - sourceUserId = features.getOrElse(SourceUserIdFeature, None), - quotedTweetId = features.getOrElse(QuotedTweetIdFeature, None), - quotedUserId = features.getOrElse(QuotedUserIdFeature, None), - inReplyToTweetId = features.getOrElse(InReplyToTweetIdFeature, None), - inReplyToUserId = features.getOrElse(InReplyToUserIdFeature, None), - directedAtUserId = features.getOrElse(DirectedAtUserIdFeature, None), - inNetwork = Some(features.getOrElse(InNetworkFeature, true)), - sgsValidLikedByUserIds = Some(features.getOrElse(SGSValidLikedByUserIdsFeature, Seq.empty)), - sgsValidFollowedByUserIds = - Some(features.getOrElse(SGSValidFollowedByUserIdsFeature, Seq.empty)), - topicId = features.getOrElse(TopicIdSocialContextFeature, None), - topicFunctionalityType = topicFunctionalityType, - ancestors = Some(features.getOrElse(AncestorsFeature, Seq.empty)), - isReadFromCache = Some(features.getOrElse(IsReadFromCacheFeature, false)), - streamToKafka = Some(features.getOrElse(StreamToKafkaFeature, false)), - exclusiveConversationAuthorId = - features.getOrElse(ExclusiveConversationAuthorIdFeature, None), - authorMetadata = Some( - t.AuthorMetadata( - blueVerified = features.getOrElse(AuthorIsBlueVerifiedFeature, false), - goldVerified = features.getOrElse(AuthorIsGoldVerifiedFeature, false), - grayVerified = features.getOrElse(AuthorIsGrayVerifiedFeature, false), - legacyVerified = features.getOrElse(AuthorIsLegacyVerifiedFeature, false), - creator = features.getOrElse(AuthorIsCreatorFeature, false) - )), - lastScoredTimestampMs = None, - candidatePipelineIdentifier = None, - tweetUrls = None, - perspectiveFilteredLikedByUserIds = - Some(features.getOrElse(PerspectiveFilteredLikedByUserIdsFeature, Seq.empty)), - ) - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model/BUILD.bazel deleted file mode 100644 index 6414453db..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model/BUILD.bazel +++ /dev/null @@ -1,17 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model/request", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/model/cursor", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/pipeline", - "src/thrift/com/twitter/timelineservice/server/internal:thrift-scala", - ], - exports = [ - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/model/cursor", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/pipeline", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model/BUILD.docx new file mode 100644 index 000000000..d4ceb1699 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model/ScoredTweetsQuery.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model/ScoredTweetsQuery.docx new file mode 100644 index 000000000..4b9ecdaf8 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model/ScoredTweetsQuery.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model/ScoredTweetsQuery.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model/ScoredTweetsQuery.scala deleted file mode 100644 index a2eb3a466..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model/ScoredTweetsQuery.scala +++ /dev/null @@ -1,39 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.model - -import com.twitter.home_mixer.model.request.DeviceContext -import com.twitter.home_mixer.model.request.HasDeviceContext -import com.twitter.home_mixer.model.request.HasSeenTweetIds -import com.twitter.home_mixer.model.request.ScoredTweetsProduct -import com.twitter.product_mixer.component_library.model.cursor.UrtOrderedCursor -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.model.marshalling.request._ -import com.twitter.product_mixer.core.pipeline.HasPipelineCursor -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.product_mixer.core.quality_factor.HasQualityFactorStatus -import com.twitter.product_mixer.core.quality_factor.QualityFactorStatus -import com.twitter.timelines.configapi.Params - -case class ScoredTweetsQuery( - override val params: Params, - override val clientContext: ClientContext, - override val pipelineCursor: Option[UrtOrderedCursor], - override val requestedMaxResults: Option[Int], - override val debugOptions: Option[DebugOptions], - override val features: Option[FeatureMap], - override val deviceContext: Option[DeviceContext], - override val seenTweetIds: Option[Seq[Long]], - override val qualityFactorStatus: Option[QualityFactorStatus]) - extends PipelineQuery - with HasPipelineCursor[UrtOrderedCursor] - with HasDeviceContext - with HasSeenTweetIds - with HasQualityFactorStatus { - override val product: Product = ScoredTweetsProduct - - override def withFeatureMap(features: FeatureMap): ScoredTweetsQuery = - copy(features = Some(features)) - - override def withQualityFactorStatus( - qualityFactorStatus: QualityFactorStatus - ): ScoredTweetsQuery = copy(qualityFactorStatus = Some(qualityFactorStatus)) -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model/ScoredTweetsResponse.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model/ScoredTweetsResponse.docx new file mode 100644 index 000000000..be417f04a Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model/ScoredTweetsResponse.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model/ScoredTweetsResponse.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model/ScoredTweetsResponse.scala deleted file mode 100644 index e9bd7cd61..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model/ScoredTweetsResponse.scala +++ /dev/null @@ -1,6 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.model - -import com.twitter.product_mixer.core.model.common.presentation.CandidateWithDetails -import com.twitter.product_mixer.core.model.marshalling.HasMarshalling - -case class ScoredTweetsResponse(scoredTweets: Seq[CandidateWithDetails]) extends HasMarshalling diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param/BUILD.bazel deleted file mode 100644 index 065841642..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param/BUILD.bazel +++ /dev/null @@ -1,11 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "configapi/configapi-core/src/main/scala/com/twitter/timelines/configapi", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/param/decider", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/product", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param/BUILD.docx new file mode 100644 index 000000000..2eac239f8 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param/ScoredTweetsParam.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param/ScoredTweetsParam.docx new file mode 100644 index 000000000..e3d95ebf7 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param/ScoredTweetsParam.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param/ScoredTweetsParam.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param/ScoredTweetsParam.scala deleted file mode 100644 index 9720b9344..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param/ScoredTweetsParam.scala +++ /dev/null @@ -1,361 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.param - -import com.twitter.conversions.DurationOps._ -import com.twitter.home_mixer.param.decider.DeciderKey -import com.twitter.timelines.configapi.DurationConversion -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.HasDurationConversion -import com.twitter.timelines.configapi.decider.BooleanDeciderParam -import com.twitter.util.Duration - -object ScoredTweetsParam { - val SupportedClientFSName = "scored_tweets_supported_client" - - object CandidatePipeline { - object EnableInNetworkParam - extends BooleanDeciderParam(DeciderKey.EnableScoredTweetsInNetworkCandidatePipeline) - - object EnableTweetMixerParam - extends BooleanDeciderParam(DeciderKey.EnableScoredTweetsTweetMixerCandidatePipeline) - - object EnableUtegParam - extends BooleanDeciderParam(DeciderKey.EnableScoredTweetsUtegCandidatePipeline) - - object EnableFrsParam - extends BooleanDeciderParam(DeciderKey.EnableScoredTweetsFrsCandidatePipeline) - - object EnableListsParam - extends BooleanDeciderParam(DeciderKey.EnableScoredTweetsListsCandidatePipeline) - - object EnablePopularVideosParam - extends BooleanDeciderParam(DeciderKey.EnableScoredTweetsPopularVideosCandidatePipeline) - - object EnableBackfillParam - extends BooleanDeciderParam(DeciderKey.EnableScoredTweetsBackfillCandidatePipeline) - } - - object EnableBackfillCandidatePipelineParam - extends FSParam[Boolean]( - name = "scored_tweets_enable_backfill_candidate_pipeline", - default = true - ) - - object QualityFactor { - object InNetworkMaxTweetsToScoreParam - extends FSBoundedParam[Int]( - name = "scored_tweets_quality_factor_earlybird_max_tweets_to_score", - default = 500, - min = 0, - max = 10000 - ) - - object UtegMaxTweetsToScoreParam - extends FSBoundedParam[Int]( - name = "scored_tweets_quality_factor_uteg_max_tweets_to_score", - default = 500, - min = 0, - max = 10000 - ) - - object FrsMaxTweetsToScoreParam - extends FSBoundedParam[Int]( - name = "scored_tweets_quality_factor_frs_max_tweets_to_score", - default = 500, - min = 0, - max = 10000 - ) - - object TweetMixerMaxTweetsToScoreParam - extends FSBoundedParam[Int]( - name = "scored_tweets_quality_factor_tweet_mixer_max_tweets_to_score", - default = 500, - min = 0, - max = 10000 - ) - - object ListsMaxTweetsToScoreParam - extends FSBoundedParam[Int]( - name = "scored_tweets_quality_factor_lists_max_tweets_to_score", - default = 500, - min = 0, - max = 100 - ) - - object PopularVideosMaxTweetsToScoreParam - extends FSBoundedParam[Int]( - name = "scored_tweets_quality_factor_popular_videos_max_tweets_to_score", - default = 40, - min = 0, - max = 10000 - ) - - object BackfillMaxTweetsToScoreParam - extends FSBoundedParam[Int]( - name = "scored_tweets_quality_factor_backfill_max_tweets_to_score", - default = 500, - min = 0, - max = 10000 - ) - } - - object ServerMaxResultsParam - extends FSBoundedParam[Int]( - name = "scored_tweets_server_max_results", - default = 120, - min = 1, - max = 500 - ) - - object MaxInNetworkResultsParam - extends FSBoundedParam[Int]( - name = "scored_tweets_max_in_network_results", - default = 60, - min = 1, - max = 500 - ) - - object MaxOutOfNetworkResultsParam - extends FSBoundedParam[Int]( - name = "scored_tweets_max_out_of_network_results", - default = 60, - min = 1, - max = 500 - ) - - object CachedScoredTweets { - object TTLParam - extends FSBoundedParam[Duration]( - name = "scored_tweets_cached_scored_tweets_ttl_minutes", - default = 3.minutes, - min = 0.minute, - max = 60.minutes - ) - with HasDurationConversion { - override val durationConversion: DurationConversion = DurationConversion.FromMinutes - } - - object MinCachedTweetsParam - extends FSBoundedParam[Int]( - name = "scored_tweets_cached_scored_tweets_min_cached_tweets", - default = 30, - min = 0, - max = 1000 - ) - } - - object Scoring { - object HomeModelParam - extends FSParam[String](name = "scored_tweets_home_model", default = "Home") - - object ModelWeights { - - object FavParam - extends FSBoundedParam[Double]( - name = "scored_tweets_model_weight_fav", - default = 1.0, - min = 0.0, - max = 100.0 - ) - - object RetweetParam - extends FSBoundedParam[Double]( - name = "scored_tweets_model_weight_retweet", - default = 1.0, - min = 0.0, - max = 100.0 - ) - - object ReplyParam - extends FSBoundedParam[Double]( - name = "scored_tweets_model_weight_reply", - default = 1.0, - min = 0.0, - max = 100.0 - ) - - object GoodProfileClickParam - extends FSBoundedParam[Double]( - name = "scored_tweets_model_weight_good_profile_click", - default = 1.0, - min = 0.0, - max = 1000000.0 - ) - - object VideoPlayback50Param - extends FSBoundedParam[Double]( - name = "scored_tweets_model_weight_video_playback50", - default = 1.0, - min = 0.0, - max = 100.0 - ) - - object ReplyEngagedByAuthorParam - extends FSBoundedParam[Double]( - name = "scored_tweets_model_weight_reply_engaged_by_author", - default = 1.0, - min = 0.0, - max = 200.0 - ) - - object GoodClickParam - extends FSBoundedParam[Double]( - name = "scored_tweets_model_weight_good_click", - default = 1.0, - min = 0.0, - max = 1000000.0 - ) - - object GoodClickV2Param - extends FSBoundedParam[Double]( - name = "scored_tweets_model_weight_good_click_v2", - default = 1.0, - min = 0.0, - max = 1000000.0 - ) - - object TweetDetailDwellParam - extends FSBoundedParam[Double]( - name = "scored_tweets_model_weight_tweet_detail_dwell", - default = 0.0, - min = 0.0, - max = 100.0 - ) - - object ProfileDwelledParam - extends FSBoundedParam[Double]( - name = "scored_tweets_model_weight_profile_dwelled", - default = 0.0, - min = 0.0, - max = 100.0 - ) - - object BookmarkParam - extends FSBoundedParam[Double]( - name = "scored_tweets_model_weight_bookmark", - default = 0.0, - min = 0.0, - max = 100.0 - ) - - object ShareParam - extends FSBoundedParam[Double]( - name = "scored_tweets_model_weight_share", - default = 0.0, - min = 0.0, - max = 100.0 - ) - - object ShareMenuClickParam - extends FSBoundedParam[Double]( - name = "scored_tweets_model_weight_share_menu_click", - default = 0.0, - min = 0.0, - max = 100.0 - ) - - object NegativeFeedbackV2Param - extends FSBoundedParam[Double]( - name = "scored_tweets_model_weight_negative_feedback_v2", - default = 1.0, - min = -1000.0, - max = 0.0 - ) - - object ReportParam - extends FSBoundedParam[Double]( - name = "scored_tweets_model_weight_report", - default = 1.0, - min = -20000.0, - max = 0.0 - ) - - object WeakNegativeFeedbackParam - extends FSBoundedParam[Double]( - name = "scored_tweets_model_weight_weak_negative_feedback", - default = 0.0, - min = -1000.0, - max = 0.0 - ) - - object StrongNegativeFeedbackParam - extends FSBoundedParam[Double]( - name = "scored_tweets_model_weight_strong_negative_feedback", - default = 0.0, - min = -1000.0, - max = 0.0 - ) - } - } - - object EnableSimClustersSimilarityFeatureHydrationDeciderParam - extends BooleanDeciderParam(decider = DeciderKey.EnableSimClustersSimilarityFeatureHydration) - - object CompetitorSetParam - extends FSParam[Set[Long]](name = "scored_tweets_competitor_list", default = Set.empty) - - object CompetitorURLSeqParam - extends FSParam[Seq[String]](name = "scored_tweets_competitor_url_list", default = Seq.empty) - - object BlueVerifiedAuthorInNetworkMultiplierParam - extends FSBoundedParam[Double]( - name = "scored_tweets_blue_verified_author_in_network_multiplier", - default = 4.0, - min = 0.0, - max = 100.0 - ) - - object BlueVerifiedAuthorOutOfNetworkMultiplierParam - extends FSBoundedParam[Double]( - name = "scored_tweets_blue_verified_author_out_of_network_multiplier", - default = 2.0, - min = 0.0, - max = 100.0 - ) - - object CreatorInNetworkMultiplierParam - extends FSBoundedParam[Double]( - name = "scored_tweets_creator_in_network_multiplier", - default = 1.1, - min = 0.0, - max = 100.0 - ) - - object CreatorOutOfNetworkMultiplierParam - extends FSBoundedParam[Double]( - name = "scored_tweets_creator_out_of_network_multiplier", - default = 1.3, - min = 0.0, - max = 100.0 - ) - - object OutOfNetworkScaleFactorParam - extends FSBoundedParam[Double]( - name = "scored_tweets_out_of_network_scale_factor", - default = 1.0, - min = 0.0, - max = 100.0 - ) - - object EnableScribeScoredCandidatesParam - extends FSParam[Boolean](name = "scored_tweets_enable_scribing", default = false) - - object EarlybirdTensorflowModel { - - object InNetworkParam - extends FSParam[String]( - name = "scored_tweets_in_network_earlybird_tensorflow_model", - default = "timelines_recap_replica") - - object FrsParam - extends FSParam[String]( - name = "scored_tweets_frs_earlybird_tensorflow_model", - default = "timelines_rectweet_replica") - - object UtegParam - extends FSParam[String]( - name = "scored_tweets_uteg_earlybird_tensorflow_model", - default = "timelines_rectweet_replica") - } - -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param/ScoredTweetsParamConfig.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param/ScoredTweetsParamConfig.docx new file mode 100644 index 000000000..2431bad85 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param/ScoredTweetsParamConfig.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param/ScoredTweetsParamConfig.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param/ScoredTweetsParamConfig.scala deleted file mode 100644 index 10b9de49d..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param/ScoredTweetsParamConfig.scala +++ /dev/null @@ -1,89 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.param - -import com.twitter.home_mixer.param.decider.DeciderKey -import com.twitter.home_mixer.product.scored_tweets.param.ScoredTweetsParam._ -import com.twitter.product_mixer.core.product.ProductParamConfig -import com.twitter.servo.decider.DeciderKeyName -import javax.inject.Inject -import javax.inject.Singleton - -@Singleton -class ScoredTweetsParamConfig @Inject() () extends ProductParamConfig { - override val enabledDeciderKey: DeciderKeyName = DeciderKey.EnableScoredTweetsProduct - override val supportedClientFSName: String = SupportedClientFSName - - override val booleanDeciderOverrides = Seq( - CandidatePipeline.EnableBackfillParam, - CandidatePipeline.EnableTweetMixerParam, - CandidatePipeline.EnableFrsParam, - CandidatePipeline.EnableInNetworkParam, - CandidatePipeline.EnableListsParam, - CandidatePipeline.EnablePopularVideosParam, - CandidatePipeline.EnableUtegParam, - ScoredTweetsParam.EnableSimClustersSimilarityFeatureHydrationDeciderParam - ) - - override val booleanFSOverrides = Seq( - EnableBackfillCandidatePipelineParam, - EnableScribeScoredCandidatesParam - ) - - override val boundedIntFSOverrides = Seq( - CachedScoredTweets.MinCachedTweetsParam, - MaxInNetworkResultsParam, - MaxOutOfNetworkResultsParam, - QualityFactor.BackfillMaxTweetsToScoreParam, - QualityFactor.TweetMixerMaxTweetsToScoreParam, - QualityFactor.FrsMaxTweetsToScoreParam, - QualityFactor.InNetworkMaxTweetsToScoreParam, - QualityFactor.ListsMaxTweetsToScoreParam, - QualityFactor.PopularVideosMaxTweetsToScoreParam, - QualityFactor.UtegMaxTweetsToScoreParam, - ServerMaxResultsParam - ) - - override val boundedDurationFSOverrides = Seq( - CachedScoredTweets.TTLParam - ) - - override val stringFSOverrides = Seq( - Scoring.HomeModelParam, - EarlybirdTensorflowModel.InNetworkParam, - EarlybirdTensorflowModel.FrsParam, - EarlybirdTensorflowModel.UtegParam - ) - - override val boundedDoubleFSOverrides = Seq( - BlueVerifiedAuthorInNetworkMultiplierParam, - BlueVerifiedAuthorOutOfNetworkMultiplierParam, - CreatorInNetworkMultiplierParam, - CreatorOutOfNetworkMultiplierParam, - OutOfNetworkScaleFactorParam, - // Model Weights - Scoring.ModelWeights.FavParam, - Scoring.ModelWeights.ReplyParam, - Scoring.ModelWeights.RetweetParam, - Scoring.ModelWeights.GoodClickParam, - Scoring.ModelWeights.GoodClickV2Param, - Scoring.ModelWeights.GoodProfileClickParam, - Scoring.ModelWeights.ReplyEngagedByAuthorParam, - Scoring.ModelWeights.VideoPlayback50Param, - Scoring.ModelWeights.ReportParam, - Scoring.ModelWeights.NegativeFeedbackV2Param, - Scoring.ModelWeights.TweetDetailDwellParam, - Scoring.ModelWeights.ProfileDwelledParam, - Scoring.ModelWeights.BookmarkParam, - Scoring.ModelWeights.ShareParam, - Scoring.ModelWeights.ShareMenuClickParam, - Scoring.ModelWeights.StrongNegativeFeedbackParam, - Scoring.ModelWeights.WeakNegativeFeedbackParam - ) - - override val longSetFSOverrides = Seq( - CompetitorSetParam - ) - - override val stringSeqFSOverrides = Seq( - CompetitorURLSeqParam - ) -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/BUILD.bazel deleted file mode 100644 index 15fc94f47..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/BUILD.bazel +++ /dev/null @@ -1,23 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - dependencies = [ - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model/request", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/util", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/transformer", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/pipeline", - "src/thrift/com/twitter/timelineranker:thrift-scala", - "timelineranker/common/src/main/scala/com/twitter/timelineranker/model", - "timelines:util", - "timelines/src/main/scala/com/twitter/timelines/common/model", - "timelines/src/main/scala/com/twitter/timelines/earlybird/common/options", - "timelines/src/main/scala/com/twitter/timelines/earlybird/common/utils", - "timelines/src/main/scala/com/twitter/timelines/model/candidate", - "timelineservice/common:model", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/BUILD.docx new file mode 100644 index 000000000..b3ed61fcd Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerFrsQueryTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerFrsQueryTransformer.docx new file mode 100644 index 000000000..fce0b8257 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerFrsQueryTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerFrsQueryTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerFrsQueryTransformer.scala deleted file mode 100644 index 2592ec82e..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerFrsQueryTransformer.scala +++ /dev/null @@ -1,64 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.query_transformer - -import com.twitter.conversions.DurationOps._ -import com.twitter.core_workflows.user_model.{thriftscala => um} -import com.twitter.home_mixer.model.HomeFeatures.UserStateFeature -import com.twitter.home_mixer.model.request.HasDeviceContext -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.FrsSeedUserIdsFeature -import com.twitter.home_mixer.product.scored_tweets.param.ScoredTweetsParam -import com.twitter.home_mixer.product.scored_tweets.query_transformer.TimelineRankerFrsQueryTransformer._ -import com.twitter.product_mixer.core.functional_component.transformer.CandidatePipelineQueryTransformer -import com.twitter.product_mixer.core.model.common.identifier.CandidatePipelineIdentifier -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.product_mixer.core.quality_factor.HasQualityFactorStatus -import com.twitter.timelineranker.{thriftscala => t} -import com.twitter.timelines.common.model.TweetKindOption -import com.twitter.timelines.model.candidate.CandidateTweetSourceId - -object TimelineRankerFrsQueryTransformer { - private val DefaultSinceDuration = 24.hours - private val ExpandedSinceDuration = 48.hours - private val MaxTweetsToFetch = 100 - - private val tweetKindOptions: TweetKindOption.ValueSet = - TweetKindOption(includeOriginalTweetsAndQuotes = true) - - private val UserStatesForExtendedSinceDuration: Set[um.UserState] = Set( - um.UserState.Light, - um.UserState.MediumNonTweeter, - um.UserState.MediumTweeter, - um.UserState.NearZero, - um.UserState.New, - um.UserState.VeryLight - ) -} - -case class TimelineRankerFrsQueryTransformer[ - Query <: PipelineQuery with HasQualityFactorStatus with HasDeviceContext -]( - override val candidatePipelineIdentifier: CandidatePipelineIdentifier, - override val maxTweetsToFetch: Int = MaxTweetsToFetch) - extends CandidatePipelineQueryTransformer[Query, t.RecapQuery] - with TimelineRankerQueryTransformer[Query] { - - override val candidateTweetSourceId = CandidateTweetSourceId.FrsTweet - override val options = tweetKindOptions - - override def getTensorflowModel(query: Query): Option[String] = { - Some(query.params(ScoredTweetsParam.EarlybirdTensorflowModel.FrsParam)) - } - - override def seedAuthorIds(query: Query): Option[Seq[Long]] = { - query.features.flatMap(_.getOrElse(FrsSeedUserIdsFeature, None)) - } - - override def transform(input: Query): t.RecapQuery = { - val userState = input.features.get.getOrElse(UserStateFeature, None) - - val sinceDuration = - if (userState.exists(UserStatesForExtendedSinceDuration.contains)) ExpandedSinceDuration - else DefaultSinceDuration - - buildTimelineRankerQuery(input, sinceDuration).toThriftRecapQuery - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerInNetworkQueryTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerInNetworkQueryTransformer.docx new file mode 100644 index 000000000..d797937e7 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerInNetworkQueryTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerInNetworkQueryTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerInNetworkQueryTransformer.scala deleted file mode 100644 index 4514dc2c4..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerInNetworkQueryTransformer.scala +++ /dev/null @@ -1,63 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.query_transformer - -import com.twitter.conversions.DurationOps._ -import com.twitter.core_workflows.user_model.{thriftscala => um} -import com.twitter.home_mixer.model.HomeFeatures.UserStateFeature -import com.twitter.home_mixer.model.request.HasDeviceContext -import com.twitter.home_mixer.product.scored_tweets.param.ScoredTweetsParam -import com.twitter.home_mixer.product.scored_tweets.query_transformer.TimelineRankerInNetworkQueryTransformer._ -import com.twitter.product_mixer.core.functional_component.transformer.CandidatePipelineQueryTransformer -import com.twitter.product_mixer.core.model.common.identifier.CandidatePipelineIdentifier -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.product_mixer.core.quality_factor.HasQualityFactorStatus -import com.twitter.timelineranker.{thriftscala => t} -import com.twitter.timelines.common.model.TweetKindOption -import com.twitter.timelines.model.candidate.CandidateTweetSourceId - -object TimelineRankerInNetworkQueryTransformer { - private val DefaultSinceDuration = 24.hours - private val ExpandedSinceDuration = 48.hours - private val MaxTweetsToFetch = 600 - - private val tweetKindOptions: TweetKindOption.ValueSet = TweetKindOption( - includeReplies = true, - includeRetweets = true, - includeOriginalTweetsAndQuotes = true, - includeExtendedReplies = true - ) - - private val UserStatesForExtendedSinceDuration: Set[um.UserState] = Set( - um.UserState.Light, - um.UserState.MediumNonTweeter, - um.UserState.MediumTweeter, - um.UserState.NearZero, - um.UserState.New, - um.UserState.VeryLight - ) -} - -case class TimelineRankerInNetworkQueryTransformer[ - Query <: PipelineQuery with HasQualityFactorStatus with HasDeviceContext -]( - override val candidatePipelineIdentifier: CandidatePipelineIdentifier, - override val maxTweetsToFetch: Int = MaxTweetsToFetch) - extends CandidatePipelineQueryTransformer[Query, t.RecapQuery] - with TimelineRankerQueryTransformer[Query] { - - override val candidateTweetSourceId = CandidateTweetSourceId.RecycledTweet - override val options = tweetKindOptions - - override def getTensorflowModel(query: Query): Option[String] = { - Some(query.params(ScoredTweetsParam.EarlybirdTensorflowModel.InNetworkParam)) - } - - override def transform(input: Query): t.RecapQuery = { - val userState = input.features.get.getOrElse(UserStateFeature, None) - - val sinceDuration = - if (userState.exists(UserStatesForExtendedSinceDuration.contains)) ExpandedSinceDuration - else DefaultSinceDuration - - buildTimelineRankerQuery(input, sinceDuration).toThriftRecapQuery - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerQueryTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerQueryTransformer.docx new file mode 100644 index 000000000..a3ef66bc1 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerQueryTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerQueryTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerQueryTransformer.scala deleted file mode 100644 index e187a0aa0..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerQueryTransformer.scala +++ /dev/null @@ -1,108 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.query_transformer - -import com.twitter.home_mixer.model.HomeFeatures.RealGraphInNetworkScoresFeature -import com.twitter.home_mixer.model.request.HasDeviceContext -import com.twitter.home_mixer.product.scored_tweets.query_transformer.TimelineRankerQueryTransformer._ -import com.twitter.home_mixer.util.CachedScoredTweetsHelper -import com.twitter.home_mixer.util.earlybird.EarlybirdRequestUtil -import com.twitter.product_mixer.core.model.common.identifier.CandidatePipelineIdentifier -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.product_mixer.core.quality_factor.HasQualityFactorStatus -import com.twitter.timelineranker.{model => tlr} -import com.twitter.timelines.common.model.TweetKindOption -import com.twitter.timelines.earlybird.common.options.EarlybirdOptions -import com.twitter.timelines.earlybird.common.options.EarlybirdScoringModelConfig -import com.twitter.timelines.earlybird.common.utils.SearchOperator -import com.twitter.timelines.model.UserId -import com.twitter.timelines.model.candidate.CandidateTweetSourceId -import com.twitter.timelines.util.SnowflakeSortIndexHelper -import com.twitter.util.Duration -import com.twitter.util.Time - -object TimelineRankerQueryTransformer { - - /** - * Specifies the maximum number of excluded tweet ids to include in the search index query. - * Earlybird's named multi term disjunction map feature supports up to 1500 tweet ids. - */ - private val EarlybirdMaxExcludedTweets = 1500 - - /** - * Maximum number of query hits each earlybird shard is allowed to accumulate before - * early-terminating the query and reducing the hits to MaxNumEarlybirdResults. - */ - private val EarlybirdMaxHits = 1000 - - /** - * Maximum number of results TLR should retrieve from each earlybird shard. - */ - private val EarlybirdMaxResults = 300 -} - -trait TimelineRankerQueryTransformer[ - Query <: PipelineQuery with HasQualityFactorStatus with HasDeviceContext] { - def maxTweetsToFetch: Int - def options: TweetKindOption.ValueSet = TweetKindOption.Default - def candidateTweetSourceId: CandidateTweetSourceId.Value - def utegLikedByTweetsOptions(query: Query): Option[tlr.UtegLikedByTweetsOptions] = None - def seedAuthorIds(query: Query): Option[Seq[Long]] = None - def candidatePipelineIdentifier: CandidatePipelineIdentifier - def earlybirdModels: Seq[EarlybirdScoringModelConfig] = - EarlybirdRequestUtil.EarlybirdScoringModels.UnifiedEngagementProd - def getTensorflowModel(query: Query): Option[String] = None - - def buildTimelineRankerQuery(query: Query, sinceDuration: Duration): tlr.RecapQuery = { - val sinceTime: Time = sinceDuration.ago - val untilTime: Time = Time.now - - val fromTweetIdExclusive = SnowflakeSortIndexHelper.timestampToFakeId(sinceTime) - val toTweetIdExclusive = SnowflakeSortIndexHelper.timestampToFakeId(untilTime) - val range = tlr.TweetIdRange(Some(fromTweetIdExclusive), Some(toTweetIdExclusive)) - - val excludedTweetIds = query.features.map { featureMap => - CachedScoredTweetsHelper.tweetImpressionsAndCachedScoredTweetsInRange( - featureMap, - candidatePipelineIdentifier, - EarlybirdMaxExcludedTweets, - sinceTime, - untilTime) - } - - val maxCount = - (query.getQualityFactorCurrentValue(candidatePipelineIdentifier) * maxTweetsToFetch).toInt - - val authorScoreMap = query.features - .map(_.getOrElse(RealGraphInNetworkScoresFeature, Map.empty[UserId, Double])) - .getOrElse(Map.empty) - - val deviceContext = - query.deviceContext.map(_.toTimelineServiceDeviceContext(query.clientContext)) - - val tensorflowModel = getTensorflowModel(query) - - val earlyBirdOptions = EarlybirdOptions( - maxNumHitsPerShard = EarlybirdMaxHits, - maxNumResultsPerShard = EarlybirdMaxResults, - models = earlybirdModels, - authorScoreMap = authorScoreMap, - skipVeryRecentTweets = true, - tensorflowModel = tensorflowModel - ) - - tlr.RecapQuery( - userId = query.getRequiredUserId, - maxCount = Some(maxCount), - range = Some(range), - options = options, - searchOperator = SearchOperator.Exclude, - earlybirdOptions = Some(earlyBirdOptions), - deviceContext = deviceContext, - authorIds = seedAuthorIds(query), - excludedTweetIds = excludedTweetIds, - utegLikedByTweetsOptions = utegLikedByTweetsOptions(query), - searchClientSubId = None, - candidateTweetSourceId = Some(candidateTweetSourceId), - hydratesContentFeatures = Some(false) - ) - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerUtegQueryTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerUtegQueryTransformer.docx new file mode 100644 index 000000000..7ec09faee Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerUtegQueryTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerUtegQueryTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerUtegQueryTransformer.scala deleted file mode 100644 index ea051f331..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/TimelineRankerUtegQueryTransformer.scala +++ /dev/null @@ -1,59 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.query_transformer - -import com.twitter.conversions.DurationOps._ -import com.twitter.home_mixer.model.HomeFeatures.RealGraphInNetworkScoresFeature -import com.twitter.home_mixer.model.request.HasDeviceContext -import com.twitter.home_mixer.product.scored_tweets.param.ScoredTweetsParam -import com.twitter.home_mixer.product.scored_tweets.query_transformer.TimelineRankerUtegQueryTransformer._ -import com.twitter.home_mixer.util.earlybird.EarlybirdRequestUtil -import com.twitter.product_mixer.core.functional_component.transformer.CandidatePipelineQueryTransformer -import com.twitter.product_mixer.core.model.common.identifier.CandidatePipelineIdentifier -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.product_mixer.core.quality_factor.HasQualityFactorStatus -import com.twitter.timelineranker.{model => tlr} -import com.twitter.timelineranker.{thriftscala => t} -import com.twitter.timelines.common.model.TweetKindOption -import com.twitter.timelines.earlybird.common.options.EarlybirdScoringModelConfig -import com.twitter.timelines.model.UserId -import com.twitter.timelines.model.candidate.CandidateTweetSourceId - -object TimelineRankerUtegQueryTransformer { - private val SinceDuration = 24.hours - private val MaxTweetsToFetch = 300 - private val MaxUtegCandidates = 800 - - private val tweetKindOptions = - TweetKindOption(includeOriginalTweetsAndQuotes = true, includeReplies = true) - - def utegEarlybirdModels: Seq[EarlybirdScoringModelConfig] = - EarlybirdRequestUtil.EarlybirdScoringModels.UnifiedEngagementRectweet -} - -case class TimelineRankerUtegQueryTransformer[ - Query <: PipelineQuery with HasQualityFactorStatus with HasDeviceContext -]( - override val candidatePipelineIdentifier: CandidatePipelineIdentifier, - override val maxTweetsToFetch: Int = MaxTweetsToFetch) - extends CandidatePipelineQueryTransformer[Query, t.UtegLikedByTweetsQuery] - with TimelineRankerQueryTransformer[Query] { - - override val candidateTweetSourceId = CandidateTweetSourceId.RecommendedTweet - override val options = tweetKindOptions - override val earlybirdModels = utegEarlybirdModels - override def getTensorflowModel(query: Query): Option[String] = { - Some(query.params(ScoredTweetsParam.EarlybirdTensorflowModel.UtegParam)) - } - - override def utegLikedByTweetsOptions(input: Query): Option[tlr.UtegLikedByTweetsOptions] = Some( - tlr.UtegLikedByTweetsOptions( - utegCount = MaxUtegCandidates, - isInNetwork = false, - weightedFollowings = input.features - .map(_.getOrElse(RealGraphInNetworkScoresFeature, Map.empty[UserId, Double])) - .getOrElse(Map.empty) - ) - ) - - override def transform(input: Query): t.UtegLikedByTweetsQuery = - buildTimelineRankerQuery(input, SinceDuration).toThriftUtegLikedByTweetsQuery -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/BUILD.bazel deleted file mode 100644 index a337a328b..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/BUILD.bazel +++ /dev/null @@ -1,23 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - dependencies = [ - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model/request", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/util", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/feature_hydrator/query/social_graph", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/transformer", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/pipeline", - "src/thrift/com/twitter/search:earlybird-scala", - "timelines:util", - "timelines/src/main/scala/com/twitter/timelines/clients/relevance_search", - "timelines/src/main/scala/com/twitter/timelines/common/model", - "timelines/src/main/scala/com/twitter/timelines/earlybird/common/utils", - "timelines/src/main/scala/com/twitter/timelines/model/candidate", - "timelines/src/main/scala/com/twitter/timelines/model/types", - "timelines/src/main/scala/com/twitter/timelines/util/stats", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/BUILD.docx new file mode 100644 index 000000000..40184839d Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/EarlybirdFrsQueryTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/EarlybirdFrsQueryTransformer.docx new file mode 100644 index 000000000..bdc242445 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/EarlybirdFrsQueryTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/EarlybirdFrsQueryTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/EarlybirdFrsQueryTransformer.scala deleted file mode 100644 index 9b2ac341e..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/EarlybirdFrsQueryTransformer.scala +++ /dev/null @@ -1,41 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.query_transformer.earlybird - -import com.twitter.conversions.DurationOps._ -import com.twitter.home_mixer.model.request.HasDeviceContext -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.FrsSeedUserIdsFeature -import com.twitter.home_mixer.product.scored_tweets.query_transformer.earlybird.EarlybirdFrsQueryTransformer._ -import com.twitter.product_mixer.core.functional_component.transformer.CandidatePipelineQueryTransformer -import com.twitter.product_mixer.core.model.common.identifier.CandidatePipelineIdentifier -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.product_mixer.core.quality_factor.HasQualityFactorStatus -import com.twitter.search.earlybird.{thriftscala => eb} -import com.twitter.timelines.common.model.TweetKindOption - -object EarlybirdFrsQueryTransformer { - private val SinceDuration = 24.hours - private val MaxTweetsToFetch = 100 - private val TensorflowModel = Some("timelines_rectweet_replica") - - private val TweetKindOptions: TweetKindOption.ValueSet = - TweetKindOption(includeOriginalTweetsAndQuotes = true) -} - -case class EarlybirdFrsQueryTransformer[ - Query <: PipelineQuery with HasQualityFactorStatus with HasDeviceContext -]( - candidatePipelineIdentifier: CandidatePipelineIdentifier, - override val clientId: Option[String]) - extends CandidatePipelineQueryTransformer[Query, eb.EarlybirdRequest] - with EarlybirdQueryTransformer[Query] { - - override val tweetKindOptions: TweetKindOption.ValueSet = TweetKindOptions - override val maxTweetsToFetch: Int = MaxTweetsToFetch - override val tensorflowModel: Option[String] = TensorflowModel - - override def transform(query: Query): eb.EarlybirdRequest = { - val seedUserIds = query.features - .flatMap(_.getOrElse(FrsSeedUserIdsFeature, None)) - .getOrElse(Seq.empty).toSet - buildEarlybirdQuery(query, SinceDuration, seedUserIds) - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/EarlybirdInNetworkQueryTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/EarlybirdInNetworkQueryTransformer.docx new file mode 100644 index 000000000..bc5448bbf Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/EarlybirdInNetworkQueryTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/EarlybirdInNetworkQueryTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/EarlybirdInNetworkQueryTransformer.scala deleted file mode 100644 index 5c5464b8b..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/EarlybirdInNetworkQueryTransformer.scala +++ /dev/null @@ -1,68 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.query_transformer.earlybird - -import com.twitter.conversions.DurationOps._ -import com.twitter.core_workflows.user_model.{thriftscala => um} -import com.twitter.home_mixer.model.HomeFeatures.UserStateFeature -import com.twitter.home_mixer.model.request.HasDeviceContext -import com.twitter.home_mixer.product.scored_tweets.query_transformer.earlybird.EarlybirdInNetworkQueryTransformer._ -import com.twitter.product_mixer.component_library.feature_hydrator.query.social_graph.SGSFollowedUsersFeature -import com.twitter.product_mixer.core.functional_component.transformer.CandidatePipelineQueryTransformer -import com.twitter.product_mixer.core.model.common.identifier.CandidatePipelineIdentifier -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.product_mixer.core.quality_factor.HasQualityFactorStatus -import com.twitter.search.earlybird.{thriftscala => eb} -import com.twitter.timelines.common.model.TweetKindOption - -object EarlybirdInNetworkQueryTransformer { - private val DefaultSinceDuration = 24.hours - private val ExpandedSinceDuration = 48.hours - private val MaxTweetsToFetch = 600 - private val TensorflowModel = Some("timelines_recap_replica") - - private val TweetKindOptions: TweetKindOption.ValueSet = TweetKindOption( - includeReplies = true, - includeRetweets = true, - includeOriginalTweetsAndQuotes = true, - includeExtendedReplies = true - ) - - private val UserStatesForExtendedSinceDuration: Set[um.UserState] = Set( - um.UserState.Light, - um.UserState.MediumNonTweeter, - um.UserState.MediumTweeter, - um.UserState.NearZero, - um.UserState.New, - um.UserState.VeryLight - ) -} - -case class EarlybirdInNetworkQueryTransformer[ - Query <: PipelineQuery with HasQualityFactorStatus with HasDeviceContext -]( - candidatePipelineIdentifier: CandidatePipelineIdentifier, - override val clientId: Option[String]) - extends CandidatePipelineQueryTransformer[Query, eb.EarlybirdRequest] - with EarlybirdQueryTransformer[Query] { - - override val tweetKindOptions: TweetKindOption.ValueSet = TweetKindOptions - override val maxTweetsToFetch: Int = MaxTweetsToFetch - override val tensorflowModel: Option[String] = TensorflowModel - - override def transform(query: Query): eb.EarlybirdRequest = { - - val userState = query.features.get.getOrElse(UserStateFeature, None) - - val sinceDuration = - if (userState.exists(UserStatesForExtendedSinceDuration.contains)) ExpandedSinceDuration - else DefaultSinceDuration - - val followedUserIds = - query.features - .map( - _.getOrElse( - SGSFollowedUsersFeature, - Seq.empty)).toSeq.flatten.toSet + query.getRequiredUserId - - buildEarlybirdQuery(query, sinceDuration, followedUserIds) - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/EarlybirdQueryTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/EarlybirdQueryTransformer.docx new file mode 100644 index 000000000..043082f6e Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/EarlybirdQueryTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/EarlybirdQueryTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/EarlybirdQueryTransformer.scala deleted file mode 100644 index 0e51c54ea..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/query_transformer/earlybird/EarlybirdQueryTransformer.scala +++ /dev/null @@ -1,70 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.query_transformer.earlybird - -import com.twitter.home_mixer.model.HomeFeatures.RealGraphInNetworkScoresFeature -import com.twitter.home_mixer.model.request.HasDeviceContext -import com.twitter.home_mixer.util.CachedScoredTweetsHelper -import com.twitter.home_mixer.util.earlybird.EarlybirdRequestUtil -import com.twitter.product_mixer.core.model.common.identifier.CandidatePipelineIdentifier -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.product_mixer.core.quality_factor.HasQualityFactorStatus -import com.twitter.search.earlybird.{thriftscala => eb} -import com.twitter.timelines.clients.relevance_search.SearchClient.TweetTypes -import com.twitter.timelines.common.model.TweetKindOption -import com.twitter.timelines.util.SnowflakeSortIndexHelper -import com.twitter.util.Duration -import com.twitter.util.Time - -trait EarlybirdQueryTransformer[ - Query <: PipelineQuery with HasQualityFactorStatus with HasDeviceContext] { - - def candidatePipelineIdentifier: CandidatePipelineIdentifier - def clientId: Option[String] = None - def maxTweetsToFetch: Int = 100 - def tweetKindOptions: TweetKindOption.ValueSet - def tensorflowModel: Option[String] = None - - private val EarlybirdMaxExcludedTweets = 1500 - - def buildEarlybirdQuery( - query: Query, - sinceDuration: Duration, - followedUserIds: Set[Long] = Set.empty - ): eb.EarlybirdRequest = { - val sinceTime: Time = sinceDuration.ago - val untilTime: Time = Time.now - - val fromTweetIdExclusive = SnowflakeSortIndexHelper.timestampToFakeId(sinceTime) - val toTweetIdExclusive = SnowflakeSortIndexHelper.timestampToFakeId(untilTime) - - val excludedTweetIds = query.features.map { featureMap => - CachedScoredTweetsHelper.tweetImpressionsAndCachedScoredTweetsInRange( - featureMap, - candidatePipelineIdentifier, - EarlybirdMaxExcludedTweets, - sinceTime, - untilTime) - } - - val maxCount = - (query.getQualityFactorCurrentValue(candidatePipelineIdentifier) * maxTweetsToFetch).toInt - - val authorScoreMap = query.features - .map(_.getOrElse(RealGraphInNetworkScoresFeature, Map.empty[Long, Double])) - .getOrElse(Map.empty) - - EarlybirdRequestUtil.getTweetsRequest( - userId = Some(query.getRequiredUserId), - clientId = clientId, - skipVeryRecentTweets = true, - followedUserIds = followedUserIds, - retweetsMutedUserIds = Set.empty, - beforeTweetIdExclusive = Some(toTweetIdExclusive), - afterTweetIdExclusive = Some(fromTweetIdExclusive), - excludedTweetIds = excludedTweetIds.map(_.toSet), - maxCount = maxCount, - tweetTypes = TweetTypes.fromTweetKindOption(tweetKindOptions), - authorScoreMap = Some(authorScoreMap), - tensorflowModel = tensorflowModel - ) - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/BUILD.bazel deleted file mode 100644 index 30774b91f..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/BUILD.bazel +++ /dev/null @@ -1,16 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - dependencies = [ - "explore/explore-ranker/thrift/src/main/thrift:thrift-scala", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/marshaller/timelines", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content", - "home-mixer/thrift/src/main/thrift:thrift-scala", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/transformer", - "src/thrift/com/twitter/timelineranker:thrift-scala", - "topic-social-proof/server/src/main/thrift:thrift-scala", - "tweet-mixer/thrift/src/main/thrift:thrift-scala", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/BUILD.docx new file mode 100644 index 000000000..96097b679 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/CachedScoredTweetsResponseFeatureTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/CachedScoredTweetsResponseFeatureTransformer.docx new file mode 100644 index 000000000..8762cf430 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/CachedScoredTweetsResponseFeatureTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/CachedScoredTweetsResponseFeatureTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/CachedScoredTweetsResponseFeatureTransformer.scala deleted file mode 100644 index 0fbe7a438..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/CachedScoredTweetsResponseFeatureTransformer.scala +++ /dev/null @@ -1,119 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.response_transformer - -import com.twitter.home_mixer.marshaller.timelines.TopicContextFunctionalityTypeUnmarshaller -import com.twitter.home_mixer.model.HomeFeatures.AncestorsFeature -import com.twitter.home_mixer.model.HomeFeatures.AuthorIdFeature -import com.twitter.home_mixer.model.HomeFeatures.AuthorIsBlueVerifiedFeature -import com.twitter.home_mixer.model.HomeFeatures.AuthorIsCreatorFeature -import com.twitter.home_mixer.model.HomeFeatures.AuthorIsGoldVerifiedFeature -import com.twitter.home_mixer.model.HomeFeatures.AuthorIsGrayVerifiedFeature -import com.twitter.home_mixer.model.HomeFeatures.AuthorIsLegacyVerifiedFeature -import com.twitter.home_mixer.model.HomeFeatures.CachedCandidatePipelineIdentifierFeature -import com.twitter.home_mixer.model.HomeFeatures.DirectedAtUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.ExclusiveConversationAuthorIdFeature -import com.twitter.home_mixer.model.HomeFeatures.InNetworkFeature -import com.twitter.home_mixer.model.HomeFeatures.InReplyToTweetIdFeature -import com.twitter.home_mixer.model.HomeFeatures.InReplyToUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.IsReadFromCacheFeature -import com.twitter.home_mixer.model.HomeFeatures.IsRetweetFeature -import com.twitter.home_mixer.model.HomeFeatures.LastScoredTimestampMsFeature -import com.twitter.home_mixer.model.HomeFeatures.PerspectiveFilteredLikedByUserIdsFeature -import com.twitter.home_mixer.model.HomeFeatures.QuotedTweetIdFeature -import com.twitter.home_mixer.model.HomeFeatures.QuotedUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.SGSValidFollowedByUserIdsFeature -import com.twitter.home_mixer.model.HomeFeatures.SGSValidLikedByUserIdsFeature -import com.twitter.home_mixer.model.HomeFeatures.ScoreFeature -import com.twitter.home_mixer.model.HomeFeatures.SourceTweetIdFeature -import com.twitter.home_mixer.model.HomeFeatures.SourceUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.StreamToKafkaFeature -import com.twitter.home_mixer.model.HomeFeatures.SuggestTypeFeature -import com.twitter.home_mixer.model.HomeFeatures.TopicContextFunctionalityTypeFeature -import com.twitter.home_mixer.model.HomeFeatures.TopicIdSocialContextFeature -import com.twitter.home_mixer.model.HomeFeatures.TweetUrlsFeature -import com.twitter.home_mixer.model.HomeFeatures.WeightedModelScoreFeature -import com.twitter.home_mixer.{thriftscala => hmt} -import com.twitter.product_mixer.core.feature.Feature -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.feature.featuremap.FeatureMapBuilder -import com.twitter.product_mixer.core.functional_component.transformer.CandidateFeatureTransformer -import com.twitter.product_mixer.core.model.common.identifier.TransformerIdentifier - -object CachedScoredTweetsResponseFeatureTransformer - extends CandidateFeatureTransformer[hmt.ScoredTweet] { - - override val identifier: TransformerIdentifier = - TransformerIdentifier("CachedScoredTweetsResponse") - - override val features: Set[Feature[_, _]] = Set( - AncestorsFeature, - AuthorIdFeature, - AuthorIsBlueVerifiedFeature, - AuthorIsCreatorFeature, - AuthorIsGoldVerifiedFeature, - AuthorIsGrayVerifiedFeature, - AuthorIsLegacyVerifiedFeature, - CachedCandidatePipelineIdentifierFeature, - DirectedAtUserIdFeature, - ExclusiveConversationAuthorIdFeature, - InNetworkFeature, - InReplyToTweetIdFeature, - InReplyToUserIdFeature, - IsReadFromCacheFeature, - IsRetweetFeature, - LastScoredTimestampMsFeature, - PerspectiveFilteredLikedByUserIdsFeature, - QuotedTweetIdFeature, - QuotedUserIdFeature, - SGSValidFollowedByUserIdsFeature, - SGSValidLikedByUserIdsFeature, - ScoreFeature, - SourceTweetIdFeature, - SourceUserIdFeature, - StreamToKafkaFeature, - SuggestTypeFeature, - TopicContextFunctionalityTypeFeature, - TopicIdSocialContextFeature, - TweetUrlsFeature, - WeightedModelScoreFeature - ) - - override def transform(candidate: hmt.ScoredTweet): FeatureMap = - FeatureMapBuilder() - .add(AncestorsFeature, candidate.ancestors.getOrElse(Seq.empty)) - .add(AuthorIdFeature, Some(candidate.authorId)) - .add(AuthorIsBlueVerifiedFeature, candidate.authorMetadata.exists(_.blueVerified)) - .add(AuthorIsGoldVerifiedFeature, candidate.authorMetadata.exists(_.goldVerified)) - .add(AuthorIsGrayVerifiedFeature, candidate.authorMetadata.exists(_.grayVerified)) - .add(AuthorIsLegacyVerifiedFeature, candidate.authorMetadata.exists(_.legacyVerified)) - .add(AuthorIsCreatorFeature, candidate.authorMetadata.exists(_.creator)) - .add(CachedCandidatePipelineIdentifierFeature, candidate.candidatePipelineIdentifier) - .add(DirectedAtUserIdFeature, candidate.directedAtUserId) - .add(ExclusiveConversationAuthorIdFeature, candidate.exclusiveConversationAuthorId) - .add(InNetworkFeature, candidate.inNetwork.getOrElse(true)) - .add(InReplyToTweetIdFeature, candidate.inReplyToTweetId) - .add(InReplyToUserIdFeature, candidate.inReplyToUserId) - .add(IsReadFromCacheFeature, true) - .add(IsRetweetFeature, candidate.sourceTweetId.isDefined) - .add(LastScoredTimestampMsFeature, candidate.lastScoredTimestampMs) - .add( - PerspectiveFilteredLikedByUserIdsFeature, - candidate.perspectiveFilteredLikedByUserIds.getOrElse(Seq.empty)) - .add(QuotedTweetIdFeature, candidate.quotedTweetId) - .add(QuotedUserIdFeature, candidate.quotedUserId) - .add(ScoreFeature, candidate.score) - .add(SGSValidLikedByUserIdsFeature, candidate.sgsValidLikedByUserIds.getOrElse(Seq.empty)) - .add( - SGSValidFollowedByUserIdsFeature, - candidate.sgsValidFollowedByUserIds.getOrElse(Seq.empty)) - .add(SourceTweetIdFeature, candidate.sourceTweetId) - .add(SourceUserIdFeature, candidate.sourceUserId) - .add(StreamToKafkaFeature, false) - .add(SuggestTypeFeature, candidate.suggestType) - .add( - TopicContextFunctionalityTypeFeature, - candidate.topicFunctionalityType.map(TopicContextFunctionalityTypeUnmarshaller(_))) - .add(TopicIdSocialContextFeature, candidate.topicId) - .add(TweetUrlsFeature, candidate.tweetUrls.getOrElse(Seq.empty)) - .add(WeightedModelScoreFeature, candidate.score) - .build() -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsBackfillResponseFeatureTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsBackfillResponseFeatureTransformer.docx new file mode 100644 index 000000000..f464c0388 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsBackfillResponseFeatureTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsBackfillResponseFeatureTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsBackfillResponseFeatureTransformer.scala deleted file mode 100644 index 806c0d11d..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsBackfillResponseFeatureTransformer.scala +++ /dev/null @@ -1,30 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.response_transformer - -import com.twitter.home_mixer.model.HomeFeatures.CandidateSourceIdFeature -import com.twitter.home_mixer.model.HomeFeatures.FromInNetworkSourceFeature -import com.twitter.home_mixer.model.HomeFeatures.SuggestTypeFeature -import com.twitter.product_mixer.core.feature.Feature -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.feature.featuremap.FeatureMapBuilder -import com.twitter.product_mixer.core.functional_component.transformer.CandidateFeatureTransformer -import com.twitter.product_mixer.core.model.common.identifier.TransformerIdentifier -import com.twitter.timelineservice.suggests.logging.candidate_tweet_source_id.{thriftscala => cts} -import com.twitter.timelineservice.suggests.{thriftscala => st} - -object ScoredTweetsBackfillResponseFeatureTransformer extends CandidateFeatureTransformer[Long] { - - override val identifier: TransformerIdentifier = - TransformerIdentifier("ScoredTweetsBackfillResponse") - - override val features: Set[Feature[_, _]] = Set( - CandidateSourceIdFeature, - FromInNetworkSourceFeature, - SuggestTypeFeature - ) - - override def transform(candidate: Long): FeatureMap = FeatureMapBuilder() - .add(CandidateSourceIdFeature, Some(cts.CandidateTweetSourceId.BackfillOrganicTweet)) - .add(FromInNetworkSourceFeature, true) - .add(SuggestTypeFeature, Some(st.SuggestType.RankedOrganicTweet)) - .build() -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsFrsResponseFeatureTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsFrsResponseFeatureTransformer.docx new file mode 100644 index 000000000..817bfbf40 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsFrsResponseFeatureTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsFrsResponseFeatureTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsFrsResponseFeatureTransformer.scala deleted file mode 100644 index 077dccf24..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsFrsResponseFeatureTransformer.scala +++ /dev/null @@ -1,31 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.response_transformer - -import com.twitter.home_mixer.model.HomeFeatures.CandidateSourceIdFeature -import com.twitter.home_mixer.model.HomeFeatures.SuggestTypeFeature -import com.twitter.product_mixer.core.feature.Feature -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.feature.featuremap.FeatureMapBuilder -import com.twitter.product_mixer.core.functional_component.transformer.CandidateFeatureTransformer -import com.twitter.product_mixer.core.model.common.identifier.TransformerIdentifier -import com.twitter.timelineranker.{thriftscala => tlr} -import com.twitter.timelineservice.suggests.logging.candidate_tweet_source_id.{thriftscala => cts} -import com.twitter.timelineservice.suggests.{thriftscala => st} - -object ScoredTweetsFrsResponseFeatureTransformer - extends CandidateFeatureTransformer[tlr.CandidateTweet] { - - override val identifier: TransformerIdentifier = TransformerIdentifier("ScoredTweetsFrsResponse") - - override val features: Set[Feature[_, _]] = TimelineRankerResponseTransformer.features - - override def transform(candidate: tlr.CandidateTweet): FeatureMap = { - val baseFeatures = TimelineRankerResponseTransformer.transform(candidate) - - val features = FeatureMapBuilder() - .add(CandidateSourceIdFeature, Some(cts.CandidateTweetSourceId.FrsTweet)) - .add(SuggestTypeFeature, Some(st.SuggestType.FrsTweet)) - .build() - - baseFeatures ++ features - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsInNetworkResponseFeatureTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsInNetworkResponseFeatureTransformer.docx new file mode 100644 index 000000000..eb9cb2c3e Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsInNetworkResponseFeatureTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsInNetworkResponseFeatureTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsInNetworkResponseFeatureTransformer.scala deleted file mode 100644 index d7d23bc98..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsInNetworkResponseFeatureTransformer.scala +++ /dev/null @@ -1,34 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.response_transformer - -import com.twitter.home_mixer.model.HomeFeatures.CandidateSourceIdFeature -import com.twitter.home_mixer.model.HomeFeatures.FromInNetworkSourceFeature -import com.twitter.home_mixer.model.HomeFeatures.SuggestTypeFeature -import com.twitter.product_mixer.core.feature.Feature -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.feature.featuremap.FeatureMapBuilder -import com.twitter.product_mixer.core.functional_component.transformer.CandidateFeatureTransformer -import com.twitter.product_mixer.core.model.common.identifier.TransformerIdentifier -import com.twitter.timelineranker.{thriftscala => tlr} -import com.twitter.timelineservice.suggests.logging.candidate_tweet_source_id.{thriftscala => cts} -import com.twitter.timelineservice.suggests.{thriftscala => st} - -object ScoredTweetsInNetworkResponseFeatureTransformer - extends CandidateFeatureTransformer[tlr.CandidateTweet] { - - override val identifier: TransformerIdentifier = - TransformerIdentifier("ScoredTweetsInNetworkResponse") - - override val features: Set[Feature[_, _]] = TimelineRankerResponseTransformer.features - - override def transform(candidate: tlr.CandidateTweet): FeatureMap = { - val baseFeatures = TimelineRankerResponseTransformer.transform(candidate) - - val features = FeatureMapBuilder() - .add(CandidateSourceIdFeature, Some(cts.CandidateTweetSourceId.RecycledTweet)) - .add(FromInNetworkSourceFeature, true) - .add(SuggestTypeFeature, Some(st.SuggestType.RankedTimelineTweet)) - .build() - - baseFeatures ++ features - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsListsResponseFeatureTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsListsResponseFeatureTransformer.docx new file mode 100644 index 000000000..23b64deb7 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsListsResponseFeatureTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsListsResponseFeatureTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsListsResponseFeatureTransformer.scala deleted file mode 100644 index 6a48e84c8..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsListsResponseFeatureTransformer.scala +++ /dev/null @@ -1,45 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.response_transformer - -import com.twitter.home_mixer.model.HomeFeatures.AuthorIdFeature -import com.twitter.home_mixer.model.HomeFeatures.CandidateSourceIdFeature -import com.twitter.home_mixer.model.HomeFeatures.FromInNetworkSourceFeature -import com.twitter.home_mixer.model.HomeFeatures.IsRetweetFeature -import com.twitter.home_mixer.model.HomeFeatures.SourceTweetIdFeature -import com.twitter.home_mixer.model.HomeFeatures.SourceUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.SuggestTypeFeature -import com.twitter.product_mixer.core.feature.Feature -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.feature.featuremap.FeatureMapBuilder -import com.twitter.product_mixer.core.functional_component.transformer.CandidateFeatureTransformer -import com.twitter.product_mixer.core.model.common.identifier.TransformerIdentifier -import com.twitter.timelineservice.{thriftscala => t} -import com.twitter.timelineservice.suggests.{thriftscala => st} -import com.twitter.timelineservice.suggests.logging.candidate_tweet_source_id.{thriftscala => cts} - -object ScoredTweetsListsResponseFeatureTransformer extends CandidateFeatureTransformer[t.Tweet] { - - override val identifier: TransformerIdentifier = - TransformerIdentifier("ScoredTweetsListsResponse") - - override val features: Set[Feature[_, _]] = Set( - AuthorIdFeature, - CandidateSourceIdFeature, - FromInNetworkSourceFeature, - IsRetweetFeature, - SuggestTypeFeature, - SourceTweetIdFeature, - SourceUserIdFeature, - ) - - override def transform(candidate: t.Tweet): FeatureMap = { - FeatureMapBuilder() - .add(AuthorIdFeature, candidate.userId) - .add(CandidateSourceIdFeature, Some(cts.CandidateTweetSourceId.ListTweet)) - .add(FromInNetworkSourceFeature, false) - .add(IsRetweetFeature, candidate.sourceStatusId.isDefined) - .add(SuggestTypeFeature, Some(st.SuggestType.RankedListTweet)) - .add(SourceTweetIdFeature, candidate.sourceStatusId) - .add(SourceUserIdFeature, candidate.sourceUserId) - .build() - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsPopularVideosResponseFeatureTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsPopularVideosResponseFeatureTransformer.docx new file mode 100644 index 000000000..c5bb44aed Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsPopularVideosResponseFeatureTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsPopularVideosResponseFeatureTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsPopularVideosResponseFeatureTransformer.scala deleted file mode 100644 index 6657e6f5c..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsPopularVideosResponseFeatureTransformer.scala +++ /dev/null @@ -1,46 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.response_transformer - -import com.twitter.explore_ranker.{thriftscala => ert} -import com.twitter.home_mixer.model.HomeFeatures.AuthorIdFeature -import com.twitter.home_mixer.model.HomeFeatures.CandidateSourceIdFeature -import com.twitter.home_mixer.model.HomeFeatures.FromInNetworkSourceFeature -import com.twitter.home_mixer.model.HomeFeatures.HasVideoFeature -import com.twitter.home_mixer.model.HomeFeatures.IsRandomTweetFeature -import com.twitter.home_mixer.model.HomeFeatures.StreamToKafkaFeature -import com.twitter.home_mixer.model.HomeFeatures.SuggestTypeFeature -import com.twitter.product_mixer.core.feature.Feature -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.feature.featuremap.FeatureMapBuilder -import com.twitter.product_mixer.core.functional_component.transformer.CandidateFeatureTransformer -import com.twitter.product_mixer.core.model.common.identifier.TransformerIdentifier -import com.twitter.timelineservice.suggests.logging.candidate_tweet_source_id.{thriftscala => cts} -import com.twitter.timelineservice.suggests.{thriftscala => st} - -object ScoredTweetsPopularVideosResponseFeatureTransformer - extends CandidateFeatureTransformer[ert.ExploreTweetRecommendation] { - - override val identifier: TransformerIdentifier = - TransformerIdentifier("ScoredTweetsPopularVideosResponse") - - override val features: Set[Feature[_, _]] = Set( - AuthorIdFeature, - CandidateSourceIdFeature, - FromInNetworkSourceFeature, - HasVideoFeature, - IsRandomTweetFeature, - StreamToKafkaFeature, - SuggestTypeFeature - ) - - override def transform(candidate: ert.ExploreTweetRecommendation): FeatureMap = { - FeatureMapBuilder() - .add(AuthorIdFeature, candidate.authorId) - .add(CandidateSourceIdFeature, Some(cts.CandidateTweetSourceId.MediaTweet)) - .add(FromInNetworkSourceFeature, false) - .add(HasVideoFeature, candidate.mediaType.contains(ert.MediaType.Video)) - .add(IsRandomTweetFeature, false) - .add(StreamToKafkaFeature, true) - .add(SuggestTypeFeature, Some(st.SuggestType.MediaTweet)) - .build() - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsTweetMixerResponseFeatureTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsTweetMixerResponseFeatureTransformer.docx new file mode 100644 index 000000000..6334fc6fe Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsTweetMixerResponseFeatureTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsTweetMixerResponseFeatureTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsTweetMixerResponseFeatureTransformer.scala deleted file mode 100644 index 4312b5104..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsTweetMixerResponseFeatureTransformer.scala +++ /dev/null @@ -1,52 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.response_transformer - -import com.twitter.tweet_mixer.{thriftscala => tmt} -import com.twitter.home_mixer.model.HomeFeatures._ -import com.twitter.product_mixer.core.feature.Feature -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.feature.featuremap.FeatureMapBuilder -import com.twitter.product_mixer.core.functional_component.transformer.CandidateFeatureTransformer -import com.twitter.product_mixer.core.model.common.identifier.TransformerIdentifier -import com.twitter.timelineservice.suggests.logging.candidate_tweet_source_id.{thriftscala => cts} -import com.twitter.timelineservice.suggests.{thriftscala => st} -import com.twitter.tsp.{thriftscala => tsp} - -object ScoredTweetsTweetMixerResponseFeatureTransformer - extends CandidateFeatureTransformer[tmt.TweetResult] { - - override val identifier: TransformerIdentifier = - TransformerIdentifier("ScoredTweetsTweetMixerResponse") - - override val features: Set[Feature[_, _]] = Set( - CandidateSourceIdFeature, - FromInNetworkSourceFeature, - IsRandomTweetFeature, - StreamToKafkaFeature, - SuggestTypeFeature, - TSPMetricTagFeature - ) - - override def transform(candidate: tmt.TweetResult): FeatureMap = { - val tweetMixerMetricTags = candidate.metricTags.getOrElse(Seq.empty) - val tspMetricTag = tweetMixerMetricTags - .map(TweetMixerMetricTagToTspMetricTag) - .filter(_.nonEmpty).map(_.get).toSet - - FeatureMapBuilder() - .add(CandidateSourceIdFeature, Some(cts.CandidateTweetSourceId.Simcluster)) - .add(FromInNetworkSourceFeature, false) - .add(IsRandomTweetFeature, false) - .add(StreamToKafkaFeature, true) - .add(SuggestTypeFeature, Some(st.SuggestType.ScTweet)) - .add(TSPMetricTagFeature, tspMetricTag) - .build() - } - - private def TweetMixerMetricTagToTspMetricTag( - tweetMixerMetricTag: tmt.MetricTag - ): Option[tsp.MetricTag] = tweetMixerMetricTag match { - case tmt.MetricTag.TweetFavorite => Some(tsp.MetricTag.TweetFavorite) - case tmt.MetricTag.Retweet => Some(tsp.MetricTag.Retweet) - case _ => None - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsUtegResponseFeatureTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsUtegResponseFeatureTransformer.docx new file mode 100644 index 000000000..959283e6a Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsUtegResponseFeatureTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsUtegResponseFeatureTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsUtegResponseFeatureTransformer.scala deleted file mode 100644 index e7bd61b2b..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/ScoredTweetsUtegResponseFeatureTransformer.scala +++ /dev/null @@ -1,31 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.response_transformer - -import com.twitter.home_mixer.model.HomeFeatures.CandidateSourceIdFeature -import com.twitter.home_mixer.model.HomeFeatures.SuggestTypeFeature -import com.twitter.product_mixer.core.feature.Feature -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.feature.featuremap.FeatureMapBuilder -import com.twitter.product_mixer.core.functional_component.transformer.CandidateFeatureTransformer -import com.twitter.product_mixer.core.model.common.identifier.TransformerIdentifier -import com.twitter.timelineranker.{thriftscala => tlr} -import com.twitter.timelineservice.suggests.logging.candidate_tweet_source_id.{thriftscala => cts} -import com.twitter.timelineservice.suggests.{thriftscala => st} - -object ScoredTweetsUtegResponseFeatureTransformer - extends CandidateFeatureTransformer[tlr.CandidateTweet] { - - override val identifier: TransformerIdentifier = TransformerIdentifier("ScoredTweetsUtegResponse") - - override val features: Set[Feature[_, _]] = TimelineRankerResponseTransformer.features - - override def transform(candidate: tlr.CandidateTweet): FeatureMap = { - val baseFeatures = TimelineRankerResponseTransformer.transform(candidate) - - val features = FeatureMapBuilder() - .add(CandidateSourceIdFeature, Some(cts.CandidateTweetSourceId.RecommendedTweet)) - .add(SuggestTypeFeature, Some(st.SuggestType.ActivityTweet)) - .build() - - baseFeatures ++ features - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/TimelineRankerResponseTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/TimelineRankerResponseTransformer.docx new file mode 100644 index 000000000..f7d375fe5 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/TimelineRankerResponseTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/TimelineRankerResponseTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/TimelineRankerResponseTransformer.scala deleted file mode 100644 index a261b2fc2..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/TimelineRankerResponseTransformer.scala +++ /dev/null @@ -1,91 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.response_transformer - -import com.twitter.home_mixer.model.HomeFeatures.AuthorIdFeature -import com.twitter.home_mixer.model.HomeFeatures.CandidateSourceIdFeature -import com.twitter.home_mixer.model.HomeFeatures.DirectedAtUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.EarlybirdFeature -import com.twitter.home_mixer.model.HomeFeatures.EarlybirdScoreFeature -import com.twitter.home_mixer.model.HomeFeatures.ExclusiveConversationAuthorIdFeature -import com.twitter.home_mixer.model.HomeFeatures.FromInNetworkSourceFeature -import com.twitter.home_mixer.model.HomeFeatures.HasImageFeature -import com.twitter.home_mixer.model.HomeFeatures.HasVideoFeature -import com.twitter.home_mixer.model.HomeFeatures.InReplyToTweetIdFeature -import com.twitter.home_mixer.model.HomeFeatures.InReplyToUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.IsRandomTweetFeature -import com.twitter.home_mixer.model.HomeFeatures.IsRetweetFeature -import com.twitter.home_mixer.model.HomeFeatures.MentionScreenNameFeature -import com.twitter.home_mixer.model.HomeFeatures.MentionUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.QuotedTweetIdFeature -import com.twitter.home_mixer.model.HomeFeatures.QuotedUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.SourceTweetIdFeature -import com.twitter.home_mixer.model.HomeFeatures.SourceUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.StreamToKafkaFeature -import com.twitter.home_mixer.model.HomeFeatures.SuggestTypeFeature -import com.twitter.home_mixer.model.HomeFeatures.TweetUrlsFeature -import com.twitter.home_mixer.util.tweetypie.content.TweetMediaFeaturesExtractor -import com.twitter.product_mixer.core.feature.Feature -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.feature.featuremap.FeatureMapBuilder -import com.twitter.timelineranker.{thriftscala => tlr} - -object TimelineRankerResponseTransformer { - - val features: Set[Feature[_, _]] = Set( - AuthorIdFeature, - CandidateSourceIdFeature, - DirectedAtUserIdFeature, - EarlybirdFeature, - EarlybirdScoreFeature, - ExclusiveConversationAuthorIdFeature, - FromInNetworkSourceFeature, - HasImageFeature, - HasVideoFeature, - InReplyToTweetIdFeature, - InReplyToUserIdFeature, - IsRandomTweetFeature, - IsRetweetFeature, - MentionScreenNameFeature, - MentionUserIdFeature, - StreamToKafkaFeature, - QuotedTweetIdFeature, - QuotedUserIdFeature, - SourceTweetIdFeature, - SourceUserIdFeature, - SuggestTypeFeature, - TweetUrlsFeature - ) - - def transform(candidate: tlr.CandidateTweet): FeatureMap = { - val tweet = candidate.tweet - val quotedTweet = tweet.filter(_.quotedTweet.exists(_.tweetId != 0)).flatMap(_.quotedTweet) - val mentions = tweet.flatMap(_.mentions).getOrElse(Seq.empty) - val coreData = tweet.flatMap(_.coreData) - val share = coreData.flatMap(_.share) - val reply = coreData.flatMap(_.reply) - - FeatureMapBuilder() - .add(AuthorIdFeature, coreData.map(_.userId)) - .add(DirectedAtUserIdFeature, coreData.flatMap(_.directedAtUser.map(_.userId))) - .add(EarlybirdFeature, candidate.features) - .add(EarlybirdScoreFeature, candidate.features.map(_.earlybirdScore)) - .add( - ExclusiveConversationAuthorIdFeature, - tweet.flatMap(_.exclusiveTweetControl.map(_.conversationAuthorId))) - .add(FromInNetworkSourceFeature, false) - .add(HasImageFeature, tweet.exists(TweetMediaFeaturesExtractor.hasImage)) - .add(HasVideoFeature, tweet.exists(TweetMediaFeaturesExtractor.hasVideo)) - .add(InReplyToTweetIdFeature, reply.flatMap(_.inReplyToStatusId)) - .add(InReplyToUserIdFeature, reply.map(_.inReplyToUserId)) - .add(IsRandomTweetFeature, candidate.features.exists(_.isRandomTweet.getOrElse(false))) - .add(IsRetweetFeature, share.isDefined) - .add(MentionScreenNameFeature, mentions.map(_.screenName)) - .add(MentionUserIdFeature, mentions.flatMap(_.userId)) - .add(StreamToKafkaFeature, true) - .add(QuotedTweetIdFeature, quotedTweet.map(_.tweetId)) - .add(QuotedUserIdFeature, quotedTweet.map(_.userId)) - .add(SourceTweetIdFeature, share.map(_.sourceStatusId)) - .add(SourceUserIdFeature, share.map(_.sourceUserId)) - .add(TweetUrlsFeature, candidate.features.flatMap(_.urlsList).getOrElse(Seq.empty)) - .build() - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/BUILD.bazel deleted file mode 100644 index 81cf19e9b..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/BUILD.bazel +++ /dev/null @@ -1,13 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - dependencies = [ - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content", - "home-mixer/thrift/src/main/thrift:thrift-scala", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/transformer", - "src/thrift/com/twitter/search:earlybird-scala", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/BUILD.docx new file mode 100644 index 000000000..5cbf1c8ce Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/EarlybirdResponseTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/EarlybirdResponseTransformer.docx new file mode 100644 index 000000000..2dd5c333a Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/EarlybirdResponseTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/EarlybirdResponseTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/EarlybirdResponseTransformer.scala deleted file mode 100644 index f0b1b59b1..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/EarlybirdResponseTransformer.scala +++ /dev/null @@ -1,92 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.response_transformer.earlybird - -import com.twitter.home_mixer.model.HomeFeatures.AuthorIdFeature -import com.twitter.home_mixer.model.HomeFeatures.CandidateSourceIdFeature -import com.twitter.home_mixer.model.HomeFeatures.DirectedAtUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.EarlybirdScoreFeature -import com.twitter.home_mixer.model.HomeFeatures.EarlybirdSearchResultFeature -import com.twitter.home_mixer.model.HomeFeatures.ExclusiveConversationAuthorIdFeature -import com.twitter.home_mixer.model.HomeFeatures.FromInNetworkSourceFeature -import com.twitter.home_mixer.model.HomeFeatures.HasImageFeature -import com.twitter.home_mixer.model.HomeFeatures.HasVideoFeature -import com.twitter.home_mixer.model.HomeFeatures.InReplyToTweetIdFeature -import com.twitter.home_mixer.model.HomeFeatures.InReplyToUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.IsRandomTweetFeature -import com.twitter.home_mixer.model.HomeFeatures.IsRetweetFeature -import com.twitter.home_mixer.model.HomeFeatures.MentionScreenNameFeature -import com.twitter.home_mixer.model.HomeFeatures.MentionUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.QuotedTweetIdFeature -import com.twitter.home_mixer.model.HomeFeatures.QuotedUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.SourceTweetIdFeature -import com.twitter.home_mixer.model.HomeFeatures.SourceUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.StreamToKafkaFeature -import com.twitter.home_mixer.model.HomeFeatures.SuggestTypeFeature -import com.twitter.home_mixer.model.HomeFeatures.TweetUrlsFeature -import com.twitter.home_mixer.util.tweetypie.content.TweetMediaFeaturesExtractor -import com.twitter.product_mixer.core.feature.Feature -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.feature.featuremap.FeatureMapBuilder -import com.twitter.search.earlybird.{thriftscala => eb} - -object EarlybirdResponseTransformer { - - val features: Set[Feature[_, _]] = Set( - AuthorIdFeature, - CandidateSourceIdFeature, - DirectedAtUserIdFeature, - EarlybirdScoreFeature, - EarlybirdSearchResultFeature, - ExclusiveConversationAuthorIdFeature, - FromInNetworkSourceFeature, - HasImageFeature, - HasVideoFeature, - InReplyToTweetIdFeature, - InReplyToUserIdFeature, - IsRandomTweetFeature, - IsRetweetFeature, - MentionScreenNameFeature, - MentionUserIdFeature, - StreamToKafkaFeature, - QuotedTweetIdFeature, - QuotedUserIdFeature, - SourceTweetIdFeature, - SourceUserIdFeature, - SuggestTypeFeature, - TweetUrlsFeature - ) - - def transform(candidate: eb.ThriftSearchResult): FeatureMap = { - val tweet = candidate.tweetypieTweet - val quotedTweet = tweet.flatMap(_.quotedTweet) - val mentions = tweet.flatMap(_.mentions).getOrElse(Seq.empty) - val coreData = tweet.flatMap(_.coreData) - val share = coreData.flatMap(_.share) - val reply = coreData.flatMap(_.reply) - FeatureMapBuilder() - .add(AuthorIdFeature, coreData.map(_.userId)) - .add(DirectedAtUserIdFeature, coreData.flatMap(_.directedAtUser.map(_.userId))) - .add(EarlybirdSearchResultFeature, Some(candidate)) - .add(EarlybirdScoreFeature, candidate.metadata.flatMap(_.score)) - .add( - ExclusiveConversationAuthorIdFeature, - tweet.flatMap(_.exclusiveTweetControl.map(_.conversationAuthorId))) - .add(FromInNetworkSourceFeature, false) - .add(HasImageFeature, tweet.exists(TweetMediaFeaturesExtractor.hasImage)) - .add(HasVideoFeature, tweet.exists(TweetMediaFeaturesExtractor.hasVideo)) - .add(InReplyToTweetIdFeature, reply.flatMap(_.inReplyToStatusId)) - .add(InReplyToUserIdFeature, reply.map(_.inReplyToUserId)) - .add(IsRandomTweetFeature, candidate.tweetFeatures.exists(_.isRandomTweet.getOrElse(false))) - .add(IsRetweetFeature, share.isDefined) - .add(MentionScreenNameFeature, mentions.map(_.screenName)) - .add(MentionUserIdFeature, mentions.flatMap(_.userId)) - .add(StreamToKafkaFeature, true) - .add(QuotedTweetIdFeature, quotedTweet.map(_.tweetId)) - .add(QuotedUserIdFeature, quotedTweet.map(_.userId)) - .add(SourceTweetIdFeature, share.map(_.sourceStatusId)) - .add(SourceUserIdFeature, share.map(_.sourceUserId)) - .add( - TweetUrlsFeature, - candidate.metadata.flatMap(_.tweetUrls.map(_.map(_.originalUrl))).getOrElse(Seq.empty)) - .build() - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/ScoredTweetsEarlybirdFrsResponseFeatureTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/ScoredTweetsEarlybirdFrsResponseFeatureTransformer.docx new file mode 100644 index 000000000..0e7145d01 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/ScoredTweetsEarlybirdFrsResponseFeatureTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/ScoredTweetsEarlybirdFrsResponseFeatureTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/ScoredTweetsEarlybirdFrsResponseFeatureTransformer.scala deleted file mode 100644 index bb9ea8bee..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/ScoredTweetsEarlybirdFrsResponseFeatureTransformer.scala +++ /dev/null @@ -1,33 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.response_transformer.earlybird - -import com.twitter.home_mixer.model.HomeFeatures.CandidateSourceIdFeature -import com.twitter.home_mixer.model.HomeFeatures.SuggestTypeFeature -import com.twitter.product_mixer.core.feature.Feature -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.feature.featuremap.FeatureMapBuilder -import com.twitter.product_mixer.core.functional_component.transformer.CandidateFeatureTransformer -import com.twitter.product_mixer.core.model.common.identifier.TransformerIdentifier -import com.twitter.search.earlybird.{thriftscala => eb} -import com.twitter.timelineservice.suggests.logging.candidate_tweet_source_id.{thriftscala => cts} -import com.twitter.timelineservice.suggests.{thriftscala => st} - -object ScoredTweetsEarlybirdFrsResponseFeatureTransformer - extends CandidateFeatureTransformer[eb.ThriftSearchResult] { - - override val identifier: TransformerIdentifier = - TransformerIdentifier("ScoredTweetsEarlybirdFrsResponse") - - override val features: Set[Feature[_, _]] = EarlybirdResponseTransformer.features - - override def transform(candidate: eb.ThriftSearchResult): FeatureMap = { - - val baseFeatures = EarlybirdResponseTransformer.transform(candidate) - - val features = FeatureMapBuilder() - .add(CandidateSourceIdFeature, Some(cts.CandidateTweetSourceId.FrsTweet)) - .add(SuggestTypeFeature, Some(st.SuggestType.FrsTweet)) - .build() - - baseFeatures ++ features - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/ScoredTweetsEarlybirdInNetworkResponseFeatureTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/ScoredTweetsEarlybirdInNetworkResponseFeatureTransformer.docx new file mode 100644 index 000000000..bf6f79194 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/ScoredTweetsEarlybirdInNetworkResponseFeatureTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/ScoredTweetsEarlybirdInNetworkResponseFeatureTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/ScoredTweetsEarlybirdInNetworkResponseFeatureTransformer.scala deleted file mode 100644 index 6b6a9d003..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/response_transformer/earlybird/ScoredTweetsEarlybirdInNetworkResponseFeatureTransformer.scala +++ /dev/null @@ -1,32 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.response_transformer.earlybird - -import com.twitter.home_mixer.model.HomeFeatures.CandidateSourceIdFeature -import com.twitter.home_mixer.model.HomeFeatures.SuggestTypeFeature -import com.twitter.product_mixer.core.feature.Feature -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.feature.featuremap.FeatureMapBuilder -import com.twitter.product_mixer.core.functional_component.transformer.CandidateFeatureTransformer -import com.twitter.product_mixer.core.model.common.identifier.TransformerIdentifier -import com.twitter.search.earlybird.{thriftscala => eb} -import com.twitter.timelineservice.suggests.logging.candidate_tweet_source_id.{thriftscala => cts} -import com.twitter.timelineservice.suggests.{thriftscala => st} - -object ScoredTweetsEarlybirdInNetworkResponseFeatureTransformer - extends CandidateFeatureTransformer[eb.ThriftSearchResult] { - override val identifier: TransformerIdentifier = - TransformerIdentifier("ScoredTweetsEarlybirdInNetworkResponse") - - override val features: Set[Feature[_, _]] = EarlybirdResponseTransformer.features - - override def transform(candidate: eb.ThriftSearchResult): FeatureMap = { - - val baseFeatures = EarlybirdResponseTransformer.transform(candidate) - - val features = FeatureMapBuilder() - .add(CandidateSourceIdFeature, Some(cts.CandidateTweetSourceId.RecycledTweet)) - .add(SuggestTypeFeature, Some(st.SuggestType.RecycledTweet)) - .build() - - baseFeatures ++ features - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/BUILD.bazel deleted file mode 100644 index 91ff12ee1..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/BUILD.bazel +++ /dev/null @@ -1,20 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - dependencies = [ - "home-mixer/server/src/main/scala/com/twitter/home_mixer/functional_component/scorer", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model/request", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/module", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/util", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/feature/featuremap/datarecord", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/pipeline", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/product", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/util", - "src/scala/com/twitter/timelines/prediction/features/recap", - "timelineservice/common:model", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/BUILD.docx new file mode 100644 index 000000000..ed9232f13 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/DiversityDiscountProvider.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/DiversityDiscountProvider.docx new file mode 100644 index 000000000..5f5f1013d Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/DiversityDiscountProvider.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/DiversityDiscountProvider.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/DiversityDiscountProvider.scala deleted file mode 100644 index 62dea8748..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/DiversityDiscountProvider.scala +++ /dev/null @@ -1,63 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.scorer - -import com.twitter.home_mixer.model.HomeFeatures.AuthorIdFeature -import com.twitter.home_mixer.model.HomeFeatures.ScoreFeature -import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate -import com.twitter.product_mixer.core.model.common.CandidateWithFeatures - -trait DiversityDiscountProvider { - - /** - * Fetch the ID of the entity to diversify - */ - def entityId(candidate: CandidateWithFeatures[TweetCandidate]): Option[Long] - - /** - * Compute discount factor for each candidate based on position (zero-based) - * relative to other candidates associated with the same entity - */ - def discount(position: Int): Double - - /** - * Return candidate IDs sorted by score in descending order - */ - def sort(candidates: Seq[CandidateWithFeatures[TweetCandidate]]): Seq[Long] = candidates - .map { candidate => - (candidate.candidate.id, candidate.features.getOrElse(ScoreFeature, None).getOrElse(0.0)) - } - .sortBy(_._2)(Ordering.Double.reverse) - .map(_._1) - - /** - * Group by the specified entity ID (e.g. authors, likers, followers) - * Sort each group by score in descending order - * Determine the discount factor based on the position of each candidate - */ - def apply( - candidates: Seq[CandidateWithFeatures[TweetCandidate]] - ): Map[Long, Double] = candidates - .groupBy(entityId) - .flatMap { - case (entityIdOpt, entityCandidates) => - val sortedCandidateIds = sort(entityCandidates) - - if (entityIdOpt.isDefined) { - sortedCandidateIds.zipWithIndex.map { - case (candidateId, index) => - candidateId -> discount(index) - } - } else sortedCandidateIds.map(_ -> 1.0) - } -} - -object AuthorDiversityDiscountProvider extends DiversityDiscountProvider { - private val Decay = 0.5 - private val Floor = 0.25 - - override def entityId(candidate: CandidateWithFeatures[TweetCandidate]): Option[Long] = - candidate.features.getOrElse(AuthorIdFeature, None) - - // Provides an exponential decay based discount by position (with a floor) - override def discount(position: Int): Double = - (1 - Floor) * Math.pow(Decay, position) + Floor -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/HeuristicScorer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/HeuristicScorer.docx new file mode 100644 index 000000000..2a130121f Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/HeuristicScorer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/HeuristicScorer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/HeuristicScorer.scala deleted file mode 100644 index 765523b53..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/HeuristicScorer.scala +++ /dev/null @@ -1,46 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.scorer - -import com.twitter.home_mixer.model.HomeFeatures.ScoreFeature -import com.twitter.home_mixer.product.scored_tweets.model.ScoredTweetsQuery -import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate -import com.twitter.product_mixer.core.feature.Feature -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.feature.featuremap.FeatureMapBuilder -import com.twitter.product_mixer.core.functional_component.scorer.Scorer -import com.twitter.product_mixer.core.model.common.CandidateWithFeatures -import com.twitter.product_mixer.core.model.common.identifier.ScorerIdentifier -import com.twitter.stitch.Stitch - -/** - * Apply various heuristics to the model score - */ -object HeuristicScorer extends Scorer[ScoredTweetsQuery, TweetCandidate] { - - override val identifier: ScorerIdentifier = ScorerIdentifier("Heuristic") - - override val features: Set[Feature[_, _]] = Set(ScoreFeature) - - override def apply( - query: ScoredTweetsQuery, - candidates: Seq[CandidateWithFeatures[TweetCandidate]] - ): Stitch[Seq[FeatureMap]] = { - val rescorers = Seq( - RescoreOutOfNetwork, - RescoreReplies, - RescoreBlueVerified, - RescoreCreators, - RescoreMTLNormalization, - RescoreAuthorDiversity(AuthorDiversityDiscountProvider(candidates)), - RescoreFeedbackFatigue(query) - ) - - val updatedScores = candidates.map { candidate => - val score = candidate.features.getOrElse(ScoreFeature, None) - val scaleFactor = rescorers.map(_(query, candidate)).product - val updatedScore = score.map(_ * scaleFactor) - FeatureMapBuilder().add(ScoreFeature, updatedScore).build() - } - - Stitch.value(updatedScores) - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/NaviModelScorer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/NaviModelScorer.docx new file mode 100644 index 000000000..323b74ba4 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/NaviModelScorer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/NaviModelScorer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/NaviModelScorer.scala deleted file mode 100644 index cbde87f73..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/NaviModelScorer.scala +++ /dev/null @@ -1,179 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.scorer - -import com.twitter.finagle.stats.Stat -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.home_mixer.model.HomeFeatures.ScoreFeature -import com.twitter.home_mixer.model.HomeFeatures.WeightedModelScoreFeature -import com.twitter.home_mixer.product.scored_tweets.model.ScoredTweetsQuery -import com.twitter.home_mixer.product.scored_tweets.scorer.PredictedScoreFeature.PredictedScoreFeatures -import com.twitter.ml.api.DataRecord -import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate -import com.twitter.product_mixer.core.feature.Feature -import com.twitter.product_mixer.core.feature.FeatureWithDefaultOnFailure -import com.twitter.product_mixer.core.feature.datarecord.DataRecordInAFeature -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.feature.featuremap.datarecord.AllFeatures -import com.twitter.product_mixer.core.feature.featuremap.datarecord.DataRecordConverter -import com.twitter.product_mixer.core.feature.featuremap.datarecord.DataRecordExtractor -import com.twitter.product_mixer.core.functional_component.scorer.Scorer -import com.twitter.product_mixer.core.model.common.CandidateWithFeatures -import com.twitter.product_mixer.core.model.common.identifier.ScorerIdentifier -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.product_mixer.core.pipeline.pipeline_failure.IllegalStateFailure -import com.twitter.product_mixer.core.pipeline.pipeline_failure.PipelineFailure -import com.twitter.product_mixer.core.util.OffloadFuturePools -import com.twitter.stitch.Stitch -import com.twitter.timelines.clients.predictionservice.PredictionGRPCService -import com.twitter.timelines.clients.predictionservice.PredictionServiceGRPCClient -import com.twitter.util.Future -import com.twitter.util.Return -import javax.inject.Inject -import javax.inject.Singleton - -object CommonFeaturesDataRecordFeature - extends DataRecordInAFeature[PipelineQuery] - with FeatureWithDefaultOnFailure[PipelineQuery, DataRecord] { - override def defaultValue: DataRecord = new DataRecord() -} - -object CandidateFeaturesDataRecordFeature - extends DataRecordInAFeature[TweetCandidate] - with FeatureWithDefaultOnFailure[TweetCandidate, DataRecord] { - override def defaultValue: DataRecord = new DataRecord() -} - -@Singleton -case class NaviModelScorer @Inject() ( - predictionGRPCService: PredictionGRPCService, - statsReceiver: StatsReceiver) - extends Scorer[ScoredTweetsQuery, TweetCandidate] { - - override val identifier: ScorerIdentifier = ScorerIdentifier("NaviModel") - - override val features: Set[Feature[_, _]] = Set( - CommonFeaturesDataRecordFeature, - CandidateFeaturesDataRecordFeature, - WeightedModelScoreFeature, - ScoreFeature - ) ++ PredictedScoreFeatures.asInstanceOf[Set[Feature[_, _]]] - - private val queryDataRecordAdapter = new DataRecordConverter(AllFeatures()) - private val candidatesDataRecordAdapter = new DataRecordConverter(AllFeatures()) - private val resultDataRecordExtractor = new DataRecordExtractor(PredictedScoreFeatures) - - private val scopedStatsReceiver = statsReceiver.scope(getClass.getSimpleName) - private val failuresStat = scopedStatsReceiver.stat("failures") - private val responsesStat = scopedStatsReceiver.stat("responses") - private val invalidResponsesCounter = scopedStatsReceiver.counter("invalidResponses") - private val candidatesDataRecordAdapterLatencyStat = - scopedStatsReceiver.scope("candidatesDataRecordAdapter").stat("latency_ms") - - private val StatsReadabilityMultiplier = 1000 - private val Epsilon = 0.001 - private val PredictedScoreStatName = f"predictedScore${StatsReadabilityMultiplier}x" - private val MissingScoreStatName = "missingScore" - private val scoreStat = scopedStatsReceiver.stat(f"score${StatsReadabilityMultiplier}x") - - private val RequestBatchSize = 64 - private val DataRecordConstructionParallelism = 32 - private val ModelId = "Home" - - private val modelClient = new PredictionServiceGRPCClient( - service = predictionGRPCService, - statsReceiver = statsReceiver, - requestBatchSize = RequestBatchSize, - useCompact = false - ) - - override def apply( - query: ScoredTweetsQuery, - candidates: Seq[CandidateWithFeatures[TweetCandidate]] - ): Stitch[Seq[FeatureMap]] = { - val commonRecord = query.features.map(queryDataRecordAdapter.toDataRecord) - val candidateRecords: Future[Seq[DataRecord]] = - Stat.time(candidatesDataRecordAdapterLatencyStat) { - OffloadFuturePools.parallelize[FeatureMap, DataRecord]( - inputSeq = candidates.map(_.features), - transformer = candidatesDataRecordAdapter.toDataRecord(_), - parallelism = DataRecordConstructionParallelism, - default = new DataRecord - ) - } - - val scoreFeatureMaps = candidateRecords.flatMap { records => - val predictionResponses = - modelClient.getPredictions(records, commonRecord, modelId = Some(ModelId)) - - predictionResponses.map { responses => - failuresStat.add(responses.count(_.isThrow)) - responsesStat.add(responses.size) - - if (responses.size == candidates.size) { - val predictedScoreFeatureMaps = responses.map { - case Return(dataRecord) => resultDataRecordExtractor.fromDataRecord(dataRecord) - case _ => resultDataRecordExtractor.fromDataRecord(new DataRecord()) - } - - // Add Data Record to candidate Feature Map for logging in later stages - predictedScoreFeatureMaps.zip(records).map { - case (predictedScoreFeatureMap, candidateRecord) => - val weightedModelScore = computeWeightedModelScore(query, predictedScoreFeatureMap) - scoreStat.add((weightedModelScore * StatsReadabilityMultiplier).toFloat) - - predictedScoreFeatureMap + - (CandidateFeaturesDataRecordFeature, candidateRecord) + - (CommonFeaturesDataRecordFeature, commonRecord.getOrElse(new DataRecord())) + - (ScoreFeature, Some(weightedModelScore)) + - (WeightedModelScoreFeature, Some(weightedModelScore)) - } - } else { - invalidResponsesCounter.incr() - throw PipelineFailure(IllegalStateFailure, "Result size mismatched candidates size") - } - } - } - - Stitch.callFuture(scoreFeatureMaps) - } - - /** - * Compute the weighted sum of predicted scores of all engagements - * Convert negative score to positive, if needed - */ - private def computeWeightedModelScore( - query: PipelineQuery, - features: FeatureMap - ): Double = { - val weightedScoreAndModelWeightSeq = PredictedScoreFeatures.toSeq.map { predictedScoreFeature => - val predictedScoreOpt = predictedScoreFeature.extractScore(features) - - predictedScoreOpt match { - case Some(predictedScore) => - scopedStatsReceiver - .stat(predictedScoreFeature.statName, PredictedScoreStatName) - .add((predictedScore * StatsReadabilityMultiplier).toFloat) - case None => - scopedStatsReceiver.counter(predictedScoreFeature.statName, MissingScoreStatName).incr() - } - - val weight = query.params(predictedScoreFeature.modelWeightParam) - val weightedScore = predictedScoreOpt.getOrElse(0.0) * weight - (weightedScore, weight) - } - - val (weightedScores, modelWeights) = weightedScoreAndModelWeightSeq.unzip - val combinedScoreSum = weightedScores.sum - - val positiveModelWeightsSum = modelWeights.filter(_ > 0.0).sum - val negativeModelWeightsSum = modelWeights.filter(_ < 0).sum.abs - val modelWeightsSum = positiveModelWeightsSum + negativeModelWeightsSum - - val weightedScoresSum = - if (modelWeightsSum == 0) combinedScoreSum.max(0.0) - else if (combinedScoreSum < 0) - (combinedScoreSum + negativeModelWeightsSum) / modelWeightsSum * Epsilon - else combinedScoreSum + Epsilon - - weightedScoresSum - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/PredictedScoreFeature.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/PredictedScoreFeature.docx new file mode 100644 index 000000000..a3dca3ae6 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/PredictedScoreFeature.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/PredictedScoreFeature.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/PredictedScoreFeature.scala deleted file mode 100644 index f24223a99..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/PredictedScoreFeature.scala +++ /dev/null @@ -1,166 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.scorer - -import com.twitter.dal.personal_data.{thriftjava => pd} -import com.twitter.home_mixer.product.scored_tweets.param.ScoredTweetsParam.Scoring.ModelWeights -import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate -import com.twitter.product_mixer.core.feature.datarecord.DataRecordOptionalFeature -import com.twitter.product_mixer.core.feature.datarecord.DoubleDataRecordCompatible -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.prediction.features.recap.RecapFeatures - -sealed trait PredictedScoreFeature - extends DataRecordOptionalFeature[TweetCandidate, Double] - with DoubleDataRecordCompatible { - - override val personalDataTypes: Set[pd.PersonalDataType] = Set.empty - def statName: String - def modelWeightParam: FSBoundedParam[Double] - def extractScore: FeatureMap => Option[Double] = _.getOrElse(this, None) -} - -object PredictedFavoriteScoreFeature extends PredictedScoreFeature { - override val featureName: String = RecapFeatures.PREDICTED_IS_FAVORITED.getFeatureName - override val statName = "fav" - override val modelWeightParam = ModelWeights.FavParam -} - -object PredictedReplyScoreFeature extends PredictedScoreFeature { - override val featureName: String = RecapFeatures.PREDICTED_IS_REPLIED.getFeatureName - override val statName = "reply" - override val modelWeightParam = ModelWeights.ReplyParam -} - -object PredictedRetweetScoreFeature extends PredictedScoreFeature { - override val featureName: String = RecapFeatures.PREDICTED_IS_RETWEETED.getFeatureName - override val statName = "retweet" - override val modelWeightParam = ModelWeights.RetweetParam -} - -object PredictedReplyEngagedByAuthorScoreFeature extends PredictedScoreFeature { - override val featureName: String = - RecapFeatures.PREDICTED_IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR.getFeatureName - override val statName = "reply_engaged_by_author" - override val modelWeightParam = ModelWeights.ReplyEngagedByAuthorParam -} - -object PredictedGoodClickConvoDescFavoritedOrRepliedScoreFeature extends PredictedScoreFeature { - override val featureName: String = RecapFeatures.PREDICTED_IS_GOOD_CLICKED_V1.getFeatureName - override val statName = "good_click_convo_desc_favorited_or_replied" - override val modelWeightParam = ModelWeights.GoodClickParam - - override def extractScore: FeatureMap => Option[Double] = { featureMap => - val goodClickV1Opt = featureMap.getOrElse(this, None) - val goodClickV2Opt = featureMap.getOrElse(PredictedGoodClickConvoDescUamGt2ScoreFeature, None) - - (goodClickV1Opt, goodClickV2Opt) match { - case (Some(v1Score), Some(v2Score)) => Some(Math.max(v1Score, v2Score)) - case _ => goodClickV1Opt.orElse(goodClickV2Opt) - } - } -} - -object PredictedGoodClickConvoDescUamGt2ScoreFeature extends PredictedScoreFeature { - override val featureName: String = RecapFeatures.PREDICTED_IS_GOOD_CLICKED_V2.getFeatureName - override val statName = "good_click_convo_desc_uam_gt_2" - override val modelWeightParam = ModelWeights.GoodClickV2Param -} - -object PredictedGoodProfileClickScoreFeature extends PredictedScoreFeature { - override val featureName: String = - RecapFeatures.PREDICTED_IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED.getFeatureName - override val statName = "good_profile_click" - override val modelWeightParam = ModelWeights.GoodProfileClickParam -} - -object PredictedVideoPlayback50ScoreFeature extends PredictedScoreFeature { - override val featureName: String = RecapFeatures.PREDICTED_IS_VIDEO_PLAYBACK_50.getFeatureName - override val statName = "video_playback_50" - override val modelWeightParam = ModelWeights.VideoPlayback50Param -} - -object PredictedTweetDetailDwellScoreFeature extends PredictedScoreFeature { - override val featureName: String = - RecapFeatures.PREDICTED_IS_TWEET_DETAIL_DWELLED_15_SEC.getFeatureName - override val statName = "tweet_detail_dwell" - override val modelWeightParam = ModelWeights.TweetDetailDwellParam -} - -object PredictedProfileDwelledScoreFeature extends PredictedScoreFeature { - override val featureName: String = - RecapFeatures.PREDICTED_IS_PROFILE_DWELLED_20_SEC.getFeatureName - override val statName = "profile_dwell" - override val modelWeightParam = ModelWeights.ProfileDwelledParam -} - -object PredictedBookmarkScoreFeature extends PredictedScoreFeature { - override val featureName: String = RecapFeatures.PREDICTED_IS_BOOKMARKED.getFeatureName - override val statName = "bookmark" - override val modelWeightParam = ModelWeights.BookmarkParam -} - -object PredictedShareScoreFeature extends PredictedScoreFeature { - override val featureName: String = - RecapFeatures.PREDICTED_IS_SHARED.getFeatureName - override val statName = "share" - override val modelWeightParam = ModelWeights.ShareParam -} - -object PredictedShareMenuClickScoreFeature extends PredictedScoreFeature { - override val featureName: String = - RecapFeatures.PREDICTED_IS_SHARE_MENU_CLICKED.getFeatureName - override val statName = "share_menu_click" - override val modelWeightParam = ModelWeights.ShareMenuClickParam -} - -// Negative Engagements -object PredictedNegativeFeedbackV2ScoreFeature extends PredictedScoreFeature { - override val featureName: String = - RecapFeatures.PREDICTED_IS_NEGATIVE_FEEDBACK_V2.getFeatureName - override val statName = "negative_feedback_v2" - override val modelWeightParam = ModelWeights.NegativeFeedbackV2Param -} - -object PredictedReportedScoreFeature extends PredictedScoreFeature { - override val featureName: String = - RecapFeatures.PREDICTED_IS_REPORT_TWEET_CLICKED.getFeatureName - override val statName = "reported" - override val modelWeightParam = ModelWeights.ReportParam -} - -object PredictedStrongNegativeFeedbackScoreFeature extends PredictedScoreFeature { - override val featureName: String = - RecapFeatures.PREDICTED_IS_STRONG_NEGATIVE_FEEDBACK.getFeatureName - override val statName = "strong_negative_feedback" - override val modelWeightParam = ModelWeights.StrongNegativeFeedbackParam -} - -object PredictedWeakNegativeFeedbackScoreFeature extends PredictedScoreFeature { - override val featureName: String = - RecapFeatures.PREDICTED_IS_WEAK_NEGATIVE_FEEDBACK.getFeatureName - override val statName = "weak_negative_feedback" - override val modelWeightParam = ModelWeights.WeakNegativeFeedbackParam -} - -object PredictedScoreFeature { - val PredictedScoreFeatures: Set[PredictedScoreFeature] = Set( - PredictedFavoriteScoreFeature, - PredictedReplyScoreFeature, - PredictedRetweetScoreFeature, - PredictedReplyEngagedByAuthorScoreFeature, - PredictedGoodClickConvoDescFavoritedOrRepliedScoreFeature, - PredictedGoodClickConvoDescUamGt2ScoreFeature, - PredictedGoodProfileClickScoreFeature, - PredictedVideoPlayback50ScoreFeature, - PredictedTweetDetailDwellScoreFeature, - PredictedProfileDwelledScoreFeature, - PredictedBookmarkScoreFeature, - PredictedShareScoreFeature, - PredictedShareMenuClickScoreFeature, - // Negative Engagements - PredictedNegativeFeedbackV2ScoreFeature, - PredictedReportedScoreFeature, - PredictedStrongNegativeFeedbackScoreFeature, - PredictedWeakNegativeFeedbackScoreFeature, - ) -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/RescoringFactorProvider.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/RescoringFactorProvider.docx new file mode 100644 index 000000000..c6cd72a0c Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/RescoringFactorProvider.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/RescoringFactorProvider.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/RescoringFactorProvider.scala deleted file mode 100644 index d9538b66d..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer/RescoringFactorProvider.scala +++ /dev/null @@ -1,180 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.scorer - -import com.twitter.home_mixer.functional_component.scorer.FeedbackFatigueScorer -import com.twitter.home_mixer.model.HomeFeatures -import com.twitter.home_mixer.model.HomeFeatures.AuthorIsBlueVerifiedFeature -import com.twitter.home_mixer.model.HomeFeatures.AuthorIsCreatorFeature -import com.twitter.home_mixer.model.HomeFeatures.FeedbackHistoryFeature -import com.twitter.home_mixer.model.HomeFeatures.InNetworkFeature -import com.twitter.home_mixer.model.HomeFeatures.InReplyToTweetIdFeature -import com.twitter.home_mixer.product.scored_tweets.param.ScoredTweetsParam.BlueVerifiedAuthorInNetworkMultiplierParam -import com.twitter.home_mixer.product.scored_tweets.param.ScoredTweetsParam.BlueVerifiedAuthorOutOfNetworkMultiplierParam -import com.twitter.home_mixer.product.scored_tweets.param.ScoredTweetsParam.CreatorInNetworkMultiplierParam -import com.twitter.home_mixer.product.scored_tweets.param.ScoredTweetsParam.CreatorOutOfNetworkMultiplierParam -import com.twitter.home_mixer.product.scored_tweets.param.ScoredTweetsParam.OutOfNetworkScaleFactorParam -import com.twitter.home_mixer.util.CandidatesUtil -import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.model.common.CandidateWithFeatures -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.timelineservice.{thriftscala => tls} - -trait RescoringFactorProvider { - - def selector(candidate: CandidateWithFeatures[TweetCandidate]): Boolean - - def factor( - query: PipelineQuery, - candidate: CandidateWithFeatures[TweetCandidate] - ): Double - - def apply( - query: PipelineQuery, - candidate: CandidateWithFeatures[TweetCandidate], - ): Double = if (selector(candidate)) factor(query, candidate) else 1.0 -} - -/** - * Re-scoring multiplier to apply to authors who are eligible subscription content creators - */ -object RescoreCreators extends RescoringFactorProvider { - - def selector(candidate: CandidateWithFeatures[TweetCandidate]): Boolean = - candidate.features.getOrElse(AuthorIsCreatorFeature, false) && - CandidatesUtil.isOriginalTweet(candidate) - - def factor( - query: PipelineQuery, - candidate: CandidateWithFeatures[TweetCandidate] - ): Double = - if (candidate.features.getOrElse(InNetworkFeature, false)) - query.params(CreatorInNetworkMultiplierParam) - else query.params(CreatorOutOfNetworkMultiplierParam) -} - -/** - * Re-scoring multiplier to apply to authors who are verified by Twitter Blue - */ -object RescoreBlueVerified extends RescoringFactorProvider { - - def selector(candidate: CandidateWithFeatures[TweetCandidate]): Boolean = - candidate.features.getOrElse(AuthorIsBlueVerifiedFeature, false) && - CandidatesUtil.isOriginalTweet(candidate) - - def factor( - query: PipelineQuery, - candidate: CandidateWithFeatures[TweetCandidate] - ): Double = - if (candidate.features.getOrElse(InNetworkFeature, false)) - query.params(BlueVerifiedAuthorInNetworkMultiplierParam) - else query.params(BlueVerifiedAuthorOutOfNetworkMultiplierParam) -} - -/** - * Re-scoring multiplier to apply to out-of-network tweets - */ -object RescoreOutOfNetwork extends RescoringFactorProvider { - - def selector(candidate: CandidateWithFeatures[TweetCandidate]): Boolean = - !candidate.features.getOrElse(InNetworkFeature, false) - - def factor( - query: PipelineQuery, - candidate: CandidateWithFeatures[TweetCandidate] - ): Double = query.params(OutOfNetworkScaleFactorParam) -} - -/** - * Re-scoring multiplier to apply to reply candidates - */ -object RescoreReplies extends RescoringFactorProvider { - - private val ScaleFactor = 0.75 - - def selector(candidate: CandidateWithFeatures[TweetCandidate]): Boolean = - candidate.features.getOrElse(InReplyToTweetIdFeature, None).isDefined - - def factor( - query: PipelineQuery, - candidate: CandidateWithFeatures[TweetCandidate] - ): Double = ScaleFactor -} - -/** - * Re-scoring multiplier to calibrate multi-tasks learning model prediction - */ -object RescoreMTLNormalization extends RescoringFactorProvider { - - private val ScaleFactor = 1.0 - - def selector(candidate: CandidateWithFeatures[TweetCandidate]): Boolean = { - candidate.features.contains(HomeFeatures.FocalTweetAuthorIdFeature) - } - - def factor( - query: PipelineQuery, - candidate: CandidateWithFeatures[TweetCandidate] - ): Double = ScaleFactor -} - -/** - * Re-scoring multiplier to apply to multiple tweets from the same author - */ -case class RescoreAuthorDiversity(diversityDiscounts: Map[Long, Double]) - extends RescoringFactorProvider { - - def selector(candidate: CandidateWithFeatures[TweetCandidate]): Boolean = - diversityDiscounts.contains(candidate.candidate.id) - - def factor( - query: PipelineQuery, - candidate: CandidateWithFeatures[TweetCandidate] - ): Double = diversityDiscounts(candidate.candidate.id) -} - -case class RescoreFeedbackFatigue(query: PipelineQuery) extends RescoringFactorProvider { - - def selector(candidate: CandidateWithFeatures[TweetCandidate]): Boolean = true - - private val feedbackEntriesByEngagementType = - query.features - .getOrElse(FeatureMap.empty).getOrElse(FeedbackHistoryFeature, Seq.empty) - .filter { entry => - val timeSinceFeedback = query.queryTime.minus(entry.timestamp) - timeSinceFeedback < FeedbackFatigueScorer.DurationForFiltering + FeedbackFatigueScorer.DurationForDiscounting && - entry.feedbackType == tls.FeedbackType.SeeFewer - }.groupBy(_.engagementType) - - private val authorsToDiscount = - FeedbackFatigueScorer.getUserDiscounts( - query.queryTime, - feedbackEntriesByEngagementType.getOrElse(tls.FeedbackEngagementType.Tweet, Seq.empty)) - - private val likersToDiscount = - FeedbackFatigueScorer.getUserDiscounts( - query.queryTime, - feedbackEntriesByEngagementType.getOrElse(tls.FeedbackEngagementType.Like, Seq.empty)) - - private val followersToDiscount = - FeedbackFatigueScorer.getUserDiscounts( - query.queryTime, - feedbackEntriesByEngagementType.getOrElse(tls.FeedbackEngagementType.Follow, Seq.empty)) - - private val retweetersToDiscount = - FeedbackFatigueScorer.getUserDiscounts( - query.queryTime, - feedbackEntriesByEngagementType.getOrElse(tls.FeedbackEngagementType.Retweet, Seq.empty)) - - def factor( - query: PipelineQuery, - candidate: CandidateWithFeatures[TweetCandidate] - ): Double = { - FeedbackFatigueScorer.getScoreMultiplier( - candidate, - authorsToDiscount, - likersToDiscount, - followersToDiscount, - retweetersToDiscount - ) - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scoring_pipeline/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scoring_pipeline/BUILD.bazel deleted file mode 100644 index a7fccd8ff..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scoring_pipeline/BUILD.bazel +++ /dev/null @@ -1,25 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "home-mixer/server/src/main/scala/com/twitter/home_mixer/functional_component/feature_hydrator", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/functional_component/scorer", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model/request", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/module", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/candidate_pipeline", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/offline_aggregates", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/real_time_aggregates", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/util", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/gate", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/selector", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/feature/featuremap/datarecord", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/pipeline/scoring", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scoring_pipeline/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scoring_pipeline/BUILD.docx new file mode 100644 index 000000000..4330a5e84 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scoring_pipeline/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scoring_pipeline/ScoredTweetsHeuristicScoringPipelineConfig.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scoring_pipeline/ScoredTweetsHeuristicScoringPipelineConfig.docx new file mode 100644 index 000000000..4ff998e38 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scoring_pipeline/ScoredTweetsHeuristicScoringPipelineConfig.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scoring_pipeline/ScoredTweetsHeuristicScoringPipelineConfig.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scoring_pipeline/ScoredTweetsHeuristicScoringPipelineConfig.scala deleted file mode 100644 index aedfc15b5..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scoring_pipeline/ScoredTweetsHeuristicScoringPipelineConfig.scala +++ /dev/null @@ -1,23 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.scoring_pipeline - -import com.twitter.home_mixer.product.scored_tweets.model.ScoredTweetsQuery -import com.twitter.home_mixer.product.scored_tweets.scorer.HeuristicScorer -import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate -import com.twitter.product_mixer.component_library.selector.InsertAppendResults -import com.twitter.product_mixer.core.functional_component.common.AllPipelines -import com.twitter.product_mixer.core.functional_component.scorer.Scorer -import com.twitter.product_mixer.core.functional_component.selector.Selector -import com.twitter.product_mixer.core.model.common.identifier.ScoringPipelineIdentifier -import com.twitter.product_mixer.core.pipeline.scoring.ScoringPipelineConfig - -object ScoredTweetsHeuristicScoringPipelineConfig - extends ScoringPipelineConfig[ScoredTweetsQuery, TweetCandidate] { - - override val identifier: ScoringPipelineIdentifier = - ScoringPipelineIdentifier("ScoredTweetsHeuristic") - - override val selectors: Seq[Selector[ScoredTweetsQuery]] = Seq(InsertAppendResults(AllPipelines)) - - override val scorers: Seq[Scorer[ScoredTweetsQuery, TweetCandidate]] = - Seq(HeuristicScorer) -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scoring_pipeline/ScoredTweetsModelScoringPipelineConfig.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scoring_pipeline/ScoredTweetsModelScoringPipelineConfig.docx new file mode 100644 index 000000000..c8957b53e Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scoring_pipeline/ScoredTweetsModelScoringPipelineConfig.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scoring_pipeline/ScoredTweetsModelScoringPipelineConfig.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scoring_pipeline/ScoredTweetsModelScoringPipelineConfig.scala deleted file mode 100644 index ab1b49a83..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scoring_pipeline/ScoredTweetsModelScoringPipelineConfig.scala +++ /dev/null @@ -1,223 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.scoring_pipeline - -import com.twitter.home_mixer.functional_component.feature_hydrator._ -import com.twitter.home_mixer.model.HomeFeatures.EarlybirdScoreFeature -import com.twitter.home_mixer.product.scored_tweets.candidate_pipeline.CachedScoredTweetsCandidatePipelineConfig -import com.twitter.home_mixer.product.scored_tweets.candidate_pipeline.ScoredTweetsBackfillCandidatePipelineConfig -import com.twitter.home_mixer.product.scored_tweets.candidate_pipeline.ScoredTweetsFrsCandidatePipelineConfig -import com.twitter.home_mixer.product.scored_tweets.candidate_pipeline.ScoredTweetsInNetworkCandidatePipelineConfig -import com.twitter.home_mixer.product.scored_tweets.candidate_pipeline.ScoredTweetsListsCandidatePipelineConfig -import com.twitter.home_mixer.product.scored_tweets.candidate_pipeline.ScoredTweetsPopularVideosCandidatePipelineConfig -import com.twitter.home_mixer.product.scored_tweets.candidate_pipeline.ScoredTweetsTweetMixerCandidatePipelineConfig -import com.twitter.home_mixer.product.scored_tweets.candidate_pipeline.ScoredTweetsUtegCandidatePipelineConfig -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.AncestorFeatureHydrator -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.AuthorFeatureHydrator -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.AuthorIsCreatorFeatureHydrator -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.EarlybirdFeatureHydrator -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.GizmoduckAuthorFeatureHydrator -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.GraphTwoHopFeatureHydrator -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.MetricCenterUserCountingFeatureHydrator -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.RealGraphViewerAuthorFeatureHydrator -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.RealGraphViewerRelatedUsersFeatureHydrator -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.RealTimeInteractionGraphEdgeFeatureHydrator -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.SimClustersEngagementSimilarityFeatureHydrator -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.SimClustersUserTweetScoresHydrator -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.TSPInferredTopicFeatureHydrator -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.TweetMetaDataFeatureHydrator -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.TweetTimeFeatureHydrator -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.TweetypieContentFeatureHydrator -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.TwhinAuthorFollowFeatureHydrator -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.UtegFeatureHydrator -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.offline_aggregates.Phase1EdgeAggregateFeatureHydrator -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.offline_aggregates.Phase2EdgeAggregateFeatureHydrator -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.real_time_aggregates._ -import com.twitter.home_mixer.product.scored_tweets.model.ScoredTweetsQuery -import com.twitter.home_mixer.product.scored_tweets.param.ScoredTweetsParam.QualityFactor -import com.twitter.home_mixer.product.scored_tweets.scorer.NaviModelScorer -import com.twitter.home_mixer.util.CandidatesUtil -import com.twitter.product_mixer.component_library.gate.NonEmptyCandidatesGate -import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate -import com.twitter.product_mixer.component_library.selector.DropMaxCandidates -import com.twitter.product_mixer.component_library.selector.InsertAppendResults -import com.twitter.product_mixer.component_library.selector.UpdateSortCandidates -import com.twitter.product_mixer.core.functional_component.common.AllExceptPipelines -import com.twitter.product_mixer.core.functional_component.common.SpecificPipeline -import com.twitter.product_mixer.core.functional_component.common.SpecificPipelines -import com.twitter.product_mixer.core.functional_component.feature_hydrator.BaseCandidateFeatureHydrator -import com.twitter.product_mixer.core.functional_component.gate.BaseGate -import com.twitter.product_mixer.core.functional_component.scorer.Scorer -import com.twitter.product_mixer.core.functional_component.selector.Selector -import com.twitter.product_mixer.core.model.common.identifier.CandidatePipelineIdentifier -import com.twitter.product_mixer.core.model.common.identifier.ScoringPipelineIdentifier -import com.twitter.product_mixer.core.model.common.presentation.CandidateWithDetails -import com.twitter.product_mixer.core.model.common.presentation.ItemCandidateWithDetails -import com.twitter.product_mixer.core.pipeline.pipeline_failure.PipelineFailure -import com.twitter.product_mixer.core.pipeline.pipeline_failure.UnexpectedCandidateResult -import com.twitter.product_mixer.core.pipeline.scoring.ScoringPipelineConfig -import com.twitter.timelines.configapi.Param - -import javax.inject.Inject -import javax.inject.Singleton - -@Singleton -class ScoredTweetsModelScoringPipelineConfig @Inject() ( - // candidate sources - scoredTweetsInNetworkCandidatePipelineConfig: ScoredTweetsInNetworkCandidatePipelineConfig, - scoredTweetsUtegCandidatePipelineConfig: ScoredTweetsUtegCandidatePipelineConfig, - scoredTweetsTweetMixerCandidatePipelineConfig: ScoredTweetsTweetMixerCandidatePipelineConfig, - scoredTweetsFrsCandidatePipelineConfig: ScoredTweetsFrsCandidatePipelineConfig, - scoredTweetsListsCandidatePipelineConfig: ScoredTweetsListsCandidatePipelineConfig, - scoredTweetsPopularVideosCandidatePipelineConfig: ScoredTweetsPopularVideosCandidatePipelineConfig, - scoredTweetsBackfillCandidatePipelineConfig: ScoredTweetsBackfillCandidatePipelineConfig, - // feature hydrators - ancestorFeatureHydrator: AncestorFeatureHydrator, - authorFeatureHydrator: AuthorFeatureHydrator, - authorIsCreatorFeatureHydrator: AuthorIsCreatorFeatureHydrator, - earlybirdFeatureHydrator: EarlybirdFeatureHydrator, - gizmoduckAuthorSafetyFeatureHydrator: GizmoduckAuthorFeatureHydrator, - graphTwoHopFeatureHydrator: GraphTwoHopFeatureHydrator, - metricCenterUserCountingFeatureHydrator: MetricCenterUserCountingFeatureHydrator, - perspectiveFilteredSocialContextFeatureHydrator: PerspectiveFilteredSocialContextFeatureHydrator, - realGraphViewerAuthorFeatureHydrator: RealGraphViewerAuthorFeatureHydrator, - realGraphViewerRelatedUsersFeatureHydrator: RealGraphViewerRelatedUsersFeatureHydrator, - realTimeInteractionGraphEdgeFeatureHydrator: RealTimeInteractionGraphEdgeFeatureHydrator, - sgsValidSocialContextFeatureHydrator: SGSValidSocialContextFeatureHydrator, - simClustersEngagementSimilarityFeatureHydrator: SimClustersEngagementSimilarityFeatureHydrator, - simClustersUserTweetScoresHydrator: SimClustersUserTweetScoresHydrator, - tspInferredTopicFeatureHydrator: TSPInferredTopicFeatureHydrator, - tweetypieContentFeatureHydrator: TweetypieContentFeatureHydrator, - twhinAuthorFollowFeatureHydrator: TwhinAuthorFollowFeatureHydrator, - utegFeatureHydrator: UtegFeatureHydrator, - // real time aggregate feature hydrators - engagementsReceivedByAuthorRealTimeAggregateFeatureHydrator: EngagementsReceivedByAuthorRealTimeAggregateFeatureHydrator, - topicCountryEngagementRealTimeAggregateFeatureHydrator: TopicCountryEngagementRealTimeAggregateFeatureHydrator, - topicEngagementRealTimeAggregateFeatureHydrator: TopicEngagementRealTimeAggregateFeatureHydrator, - tweetCountryEngagementRealTimeAggregateFeatureHydrator: TweetCountryEngagementRealTimeAggregateFeatureHydrator, - tweetEngagementRealTimeAggregateFeatureHydrator: TweetEngagementRealTimeAggregateFeatureHydrator, - twitterListEngagementRealTimeAggregateFeatureHydrator: TwitterListEngagementRealTimeAggregateFeatureHydrator, - userAuthorEngagementRealTimeAggregateFeatureHydrator: UserAuthorEngagementRealTimeAggregateFeatureHydrator, - // offline aggregate feature hydrators - phase1EdgeAggregateFeatureHydrator: Phase1EdgeAggregateFeatureHydrator, - phase2EdgeAggregateFeatureHydrator: Phase2EdgeAggregateFeatureHydrator, - // model - naviModelScorer: NaviModelScorer) - extends ScoringPipelineConfig[ScoredTweetsQuery, TweetCandidate] { - - override val identifier: ScoringPipelineIdentifier = - ScoringPipelineIdentifier("ScoredTweetsModel") - - private val nonCachedScoringPipelineScope = AllExceptPipelines( - pipelinesToExclude = Set(CachedScoredTweetsCandidatePipelineConfig.Identifier) - ) - - override val gates: Seq[BaseGate[ScoredTweetsQuery]] = Seq( - NonEmptyCandidatesGate(nonCachedScoringPipelineScope) - ) - - private val earlybirdScorePipelineScope = Set( - scoredTweetsInNetworkCandidatePipelineConfig.identifier, - scoredTweetsUtegCandidatePipelineConfig.identifier, - scoredTweetsFrsCandidatePipelineConfig.identifier - ) - - private val earlybirdScoreOrdering: Ordering[CandidateWithDetails] = - Ordering.by[CandidateWithDetails, Double] { - case ItemCandidateWithDetails(_, _, features) => - -features.getOrElse(EarlybirdScoreFeature, None).getOrElse(0.0) - case _ => throw PipelineFailure(UnexpectedCandidateResult, "Invalid candidate type") - } - - private def qualityFactorDropMaxCandidates( - pipelineIdentifier: CandidatePipelineIdentifier, - qualityFactorParam: Param[Int] - ): DropMaxCandidates[ScoredTweetsQuery] = { - new DropMaxCandidates( - pipelineScope = SpecificPipelines(pipelineIdentifier), - maxSelector = (query, _, _) => - (query.getQualityFactorCurrentValue(identifier) * - query.params(qualityFactorParam)).toInt - ) - } - - override val selectors: Seq[Selector[ScoredTweetsQuery]] = Seq( - UpdateSortCandidates(SpecificPipelines(earlybirdScorePipelineScope), earlybirdScoreOrdering), - UpdateSortCandidates( - SpecificPipeline(scoredTweetsBackfillCandidatePipelineConfig.identifier), - CandidatesUtil.reverseChronTweetsOrdering - ), - qualityFactorDropMaxCandidates( - scoredTweetsInNetworkCandidatePipelineConfig.identifier, - QualityFactor.InNetworkMaxTweetsToScoreParam - ), - qualityFactorDropMaxCandidates( - scoredTweetsUtegCandidatePipelineConfig.identifier, - QualityFactor.UtegMaxTweetsToScoreParam - ), - qualityFactorDropMaxCandidates( - scoredTweetsFrsCandidatePipelineConfig.identifier, - QualityFactor.FrsMaxTweetsToScoreParam - ), - qualityFactorDropMaxCandidates( - scoredTweetsTweetMixerCandidatePipelineConfig.identifier, - QualityFactor.TweetMixerMaxTweetsToScoreParam - ), - qualityFactorDropMaxCandidates( - scoredTweetsListsCandidatePipelineConfig.identifier, - QualityFactor.ListsMaxTweetsToScoreParam - ), - qualityFactorDropMaxCandidates( - scoredTweetsPopularVideosCandidatePipelineConfig.identifier, - QualityFactor.PopularVideosMaxTweetsToScoreParam - ), - qualityFactorDropMaxCandidates( - scoredTweetsBackfillCandidatePipelineConfig.identifier, - QualityFactor.BackfillMaxTweetsToScoreParam - ), - // Select candidates for Heavy Ranker Feature Hydration and Scoring - InsertAppendResults(nonCachedScoringPipelineScope) - ) - - override val preScoringFeatureHydrationPhase1: Seq[ - BaseCandidateFeatureHydrator[ScoredTweetsQuery, TweetCandidate, _] - ] = Seq( - TweetMetaDataFeatureHydrator, - ancestorFeatureHydrator, - authorFeatureHydrator, - authorIsCreatorFeatureHydrator, - earlybirdFeatureHydrator, - gizmoduckAuthorSafetyFeatureHydrator, - graphTwoHopFeatureHydrator, - metricCenterUserCountingFeatureHydrator, - realTimeInteractionGraphEdgeFeatureHydrator, - realGraphViewerAuthorFeatureHydrator, - simClustersEngagementSimilarityFeatureHydrator, - simClustersUserTweetScoresHydrator, - InNetworkFeatureHydrator, - tspInferredTopicFeatureHydrator, - tweetypieContentFeatureHydrator, - twhinAuthorFollowFeatureHydrator, - utegFeatureHydrator, - // real time aggregates - engagementsReceivedByAuthorRealTimeAggregateFeatureHydrator, - tweetCountryEngagementRealTimeAggregateFeatureHydrator, - tweetEngagementRealTimeAggregateFeatureHydrator, - twitterListEngagementRealTimeAggregateFeatureHydrator, - userAuthorEngagementRealTimeAggregateFeatureHydrator, - // offline aggregates - phase1EdgeAggregateFeatureHydrator - ) - - override val preScoringFeatureHydrationPhase2: Seq[ - BaseCandidateFeatureHydrator[ScoredTweetsQuery, TweetCandidate, _] - ] = Seq( - perspectiveFilteredSocialContextFeatureHydrator, - phase2EdgeAggregateFeatureHydrator, - realGraphViewerRelatedUsersFeatureHydrator, - sgsValidSocialContextFeatureHydrator, - TweetTimeFeatureHydrator, - topicCountryEngagementRealTimeAggregateFeatureHydrator, - topicEngagementRealTimeAggregateFeatureHydrator - ) - - override val scorers: Seq[Scorer[ScoredTweetsQuery, TweetCandidate]] = Seq(naviModelScorer) -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/selector/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/selector/BUILD.bazel deleted file mode 100644 index c5dbb187f..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/selector/BUILD.bazel +++ /dev/null @@ -1,12 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/selector", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/model/common/presentation", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/pipeline", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/selector/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/selector/BUILD.docx new file mode 100644 index 000000000..2e7401cf5 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/selector/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/selector/KeepBestOutOfNetworkCandidatePerAuthorPerSuggestType.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/selector/KeepBestOutOfNetworkCandidatePerAuthorPerSuggestType.docx new file mode 100644 index 000000000..c2821d972 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/selector/KeepBestOutOfNetworkCandidatePerAuthorPerSuggestType.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/selector/KeepBestOutOfNetworkCandidatePerAuthorPerSuggestType.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/selector/KeepBestOutOfNetworkCandidatePerAuthorPerSuggestType.scala deleted file mode 100644 index c485c4c2a..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/selector/KeepBestOutOfNetworkCandidatePerAuthorPerSuggestType.scala +++ /dev/null @@ -1,39 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.selector - -import com.twitter.home_mixer.model.HomeFeatures.AuthorIdFeature -import com.twitter.home_mixer.model.HomeFeatures.InNetworkFeature -import com.twitter.home_mixer.model.HomeFeatures.ScoreFeature -import com.twitter.home_mixer.model.HomeFeatures.SuggestTypeFeature -import com.twitter.product_mixer.core.functional_component.common.CandidateScope -import com.twitter.product_mixer.core.functional_component.selector.Selector -import com.twitter.product_mixer.core.functional_component.selector.SelectorResult -import com.twitter.product_mixer.core.model.common.presentation.CandidateWithDetails -import com.twitter.product_mixer.core.pipeline.PipelineQuery - -case class KeepBestOutOfNetworkCandidatePerAuthorPerSuggestType( - override val pipelineScope: CandidateScope) - extends Selector[PipelineQuery] { - - override def apply( - query: PipelineQuery, - remainingCandidates: Seq[CandidateWithDetails], - result: Seq[CandidateWithDetails] - ): SelectorResult = { - val (selectedCandidates, otherCandidates) = - remainingCandidates.partition(candidate => - pipelineScope.contains(candidate) && !candidate.features.getOrElse(InNetworkFeature, true)) - - val filteredCandidates = selectedCandidates - .groupBy { candidate => - ( - candidate.features.getOrElse(AuthorIdFeature, None), - candidate.features.getOrElse(SuggestTypeFeature, None) - ) - } - .values.map(_.maxBy(_.features.getOrElse(ScoreFeature, None))) - .toSeq - - val updatedCandidates = otherCandidates ++ filteredCandidates - SelectorResult(remainingCandidates = updatedCandidates, result = result) - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/BUILD.bazel deleted file mode 100644 index 2147ee217..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/BUILD.bazel +++ /dev/null @@ -1,33 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "finagle/finagle-mysql/src/main/scala", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/functional_component/feature_hydrator", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model/request", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/param", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/feature_hydrator/adapters/non_ml_features", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/model", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/param", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/scorer", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/service", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/util", - "home-mixer/thrift/src/main/thrift:thrift-scala", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/side_effect", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/feature/featuremap/datarecord", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/marshaller/response/urt", - "servo/repo/src/main/scala", - "servo/util/src/main/scala", - "src/scala/com/twitter/timelines/prediction/common/adapters", - "src/thrift/com/twitter/timelines/suggests/common:data_record_metadata-scala", - "src/thrift/com/twitter/timelines/suggests/common:poly_data_record-java", - "src/thrift/com/twitter/timelines/timeline_logging:thrift-scala", - "timelines/ml:pldr-client", - "timelines/ml:pldr-conversion", - "timelines/ml/cont_train/common/domain/src/main/scala/com/twitter/timelines/ml/cont_train/common/domain/non_scalding", - "timelines/src/main/scala/com/twitter/timelines/util/stats", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/BUILD.docx new file mode 100644 index 000000000..0a9913013 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/CachedScoredTweetsSideEffect.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/CachedScoredTweetsSideEffect.docx new file mode 100644 index 000000000..fe846d80e Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/CachedScoredTweetsSideEffect.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/CachedScoredTweetsSideEffect.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/CachedScoredTweetsSideEffect.scala deleted file mode 100644 index 3d66ff54a..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/CachedScoredTweetsSideEffect.scala +++ /dev/null @@ -1,134 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.side_effect - -import com.twitter.home_mixer.model.HomeFeatures.AncestorsFeature -import com.twitter.home_mixer.model.HomeFeatures.AuthorIdFeature -import com.twitter.home_mixer.model.HomeFeatures.AuthorIsBlueVerifiedFeature -import com.twitter.home_mixer.model.HomeFeatures.AuthorIsCreatorFeature -import com.twitter.home_mixer.model.HomeFeatures.AuthorIsGoldVerifiedFeature -import com.twitter.home_mixer.model.HomeFeatures.AuthorIsGrayVerifiedFeature -import com.twitter.home_mixer.model.HomeFeatures.AuthorIsLegacyVerifiedFeature -import com.twitter.home_mixer.model.HomeFeatures.CachedCandidatePipelineIdentifierFeature -import com.twitter.home_mixer.model.HomeFeatures.DirectedAtUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.ExclusiveConversationAuthorIdFeature -import com.twitter.home_mixer.model.HomeFeatures.InNetworkFeature -import com.twitter.home_mixer.model.HomeFeatures.InReplyToTweetIdFeature -import com.twitter.home_mixer.model.HomeFeatures.InReplyToUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.LastScoredTimestampMsFeature -import com.twitter.home_mixer.model.HomeFeatures.PerspectiveFilteredLikedByUserIdsFeature -import com.twitter.home_mixer.model.HomeFeatures.QuotedTweetIdFeature -import com.twitter.home_mixer.model.HomeFeatures.QuotedUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.SGSValidFollowedByUserIdsFeature -import com.twitter.home_mixer.model.HomeFeatures.SGSValidLikedByUserIdsFeature -import com.twitter.home_mixer.model.HomeFeatures.ScoreFeature -import com.twitter.home_mixer.model.HomeFeatures.SourceTweetIdFeature -import com.twitter.home_mixer.model.HomeFeatures.SourceUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.SuggestTypeFeature -import com.twitter.home_mixer.model.HomeFeatures.TopicContextFunctionalityTypeFeature -import com.twitter.home_mixer.model.HomeFeatures.TopicIdSocialContextFeature -import com.twitter.home_mixer.model.HomeFeatures.TweetUrlsFeature -import com.twitter.home_mixer.model.HomeFeatures.WeightedModelScoreFeature -import com.twitter.home_mixer.product.scored_tweets.model.ScoredTweetsResponse -import com.twitter.home_mixer.product.scored_tweets.param.ScoredTweetsParam.CachedScoredTweets -import com.twitter.home_mixer.service.HomeMixerAlertConfig -import com.twitter.home_mixer.{thriftscala => hmt} -import com.twitter.product_mixer.core.functional_component.marshaller.response.urt.metadata.TopicContextFunctionalityTypeMarshaller -import com.twitter.product_mixer.core.functional_component.side_effect.PipelineResultSideEffect -import com.twitter.product_mixer.core.model.common.identifier.SideEffectIdentifier -import com.twitter.product_mixer.core.model.common.presentation.CandidateWithDetails -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.servo.cache.TtlCache -import com.twitter.stitch.Stitch -import javax.inject.Inject -import javax.inject.Singleton - -@Singleton -class CachedScoredTweetsSideEffect @Inject() ( - scoredTweetsCache: TtlCache[Long, hmt.ScoredTweetsResponse]) - extends PipelineResultSideEffect[PipelineQuery, ScoredTweetsResponse] { - - override val identifier: SideEffectIdentifier = SideEffectIdentifier("CachedScoredTweets") - - private val MaxTweetsToCache = 1000 - - def buildCachedScoredTweets( - query: PipelineQuery, - candidates: Seq[CandidateWithDetails] - ): hmt.ScoredTweetsResponse = { - val tweets = candidates.map { candidate => - val sgsValidLikedByUserIds = - candidate.features.getOrElse(SGSValidLikedByUserIdsFeature, Seq.empty) - val sgsValidFollowedByUserIds = - candidate.features.getOrElse(SGSValidFollowedByUserIdsFeature, Seq.empty) - val perspectiveFilteredLikedByUserIds = - candidate.features.getOrElse(PerspectiveFilteredLikedByUserIdsFeature, Seq.empty) - val ancestors = candidate.features.getOrElse(AncestorsFeature, Seq.empty) - - hmt.ScoredTweet( - tweetId = candidate.candidateIdLong, - authorId = candidate.features.get(AuthorIdFeature).get, - // Cache the model score instead of the final score because rescoring is per-request - score = candidate.features.getOrElse(WeightedModelScoreFeature, None), - suggestType = candidate.features.getOrElse(SuggestTypeFeature, None), - sourceTweetId = candidate.features.getOrElse(SourceTweetIdFeature, None), - sourceUserId = candidate.features.getOrElse(SourceUserIdFeature, None), - quotedTweetId = candidate.features.getOrElse(QuotedTweetIdFeature, None), - quotedUserId = candidate.features.getOrElse(QuotedUserIdFeature, None), - inReplyToTweetId = candidate.features.getOrElse(InReplyToTweetIdFeature, None), - inReplyToUserId = candidate.features.getOrElse(InReplyToUserIdFeature, None), - directedAtUserId = candidate.features.getOrElse(DirectedAtUserIdFeature, None), - inNetwork = Some(candidate.features.getOrElse(InNetworkFeature, true)), - sgsValidLikedByUserIds = Some(sgsValidLikedByUserIds), - sgsValidFollowedByUserIds = Some(sgsValidFollowedByUserIds), - topicId = candidate.features.getOrElse(TopicIdSocialContextFeature, None), - topicFunctionalityType = candidate.features - .getOrElse(TopicContextFunctionalityTypeFeature, None).map( - TopicContextFunctionalityTypeMarshaller(_)), - ancestors = if (ancestors.nonEmpty) Some(ancestors) else None, - isReadFromCache = Some(true), - streamToKafka = Some(false), - exclusiveConversationAuthorId = candidate.features - .getOrElse(ExclusiveConversationAuthorIdFeature, None), - authorMetadata = Some( - hmt.AuthorMetadata( - blueVerified = candidate.features.getOrElse(AuthorIsBlueVerifiedFeature, false), - goldVerified = candidate.features.getOrElse(AuthorIsGoldVerifiedFeature, false), - grayVerified = candidate.features.getOrElse(AuthorIsGrayVerifiedFeature, false), - legacyVerified = candidate.features.getOrElse(AuthorIsLegacyVerifiedFeature, false), - creator = candidate.features.getOrElse(AuthorIsCreatorFeature, false) - )), - lastScoredTimestampMs = candidate.features - .getOrElse(LastScoredTimestampMsFeature, Some(query.queryTime.inMilliseconds)), - candidatePipelineIdentifier = candidate.features - .getOrElse(CachedCandidatePipelineIdentifierFeature, Some(candidate.source.name)), - tweetUrls = Some(candidate.features.getOrElse(TweetUrlsFeature, Seq.empty)), - perspectiveFilteredLikedByUserIds = Some(perspectiveFilteredLikedByUserIds) - ) - } - - hmt.ScoredTweetsResponse(tweets) - } - - final override def apply( - inputs: PipelineResultSideEffect.Inputs[PipelineQuery, ScoredTweetsResponse] - ): Stitch[Unit] = { - val candidates = - (inputs.selectedCandidates ++ inputs.remainingCandidates ++ inputs.droppedCandidates) - .filter(_.features.getOrElse(ScoreFeature, None).exists(_ > 0.0)) - - val truncatedCandidates = - if (candidates.size > MaxTweetsToCache) - candidates - .sortBy(-_.features.getOrElse(ScoreFeature, None).getOrElse(0.0)).take(MaxTweetsToCache) - else candidates - - if (truncatedCandidates.nonEmpty) { - val ttl = inputs.query.params(CachedScoredTweets.TTLParam) - val scoredTweets = buildCachedScoredTweets(inputs.query, truncatedCandidates) - Stitch.callFuture(scoredTweetsCache.set(inputs.query.getRequiredUserId, scoredTweets, ttl)) - } else Stitch.Unit - } - - override val alerts = Seq( - HomeMixerAlertConfig.BusinessHours.defaultSuccessRateAlert(99.4) - ) -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/ScribeScoredCandidatesSideEffect.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/ScribeScoredCandidatesSideEffect.docx new file mode 100644 index 000000000..2fd4bc049 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/ScribeScoredCandidatesSideEffect.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/ScribeScoredCandidatesSideEffect.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/ScribeScoredCandidatesSideEffect.scala deleted file mode 100644 index ef7e3b41a..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/ScribeScoredCandidatesSideEffect.scala +++ /dev/null @@ -1,126 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.side_effect - -import com.twitter.finagle.tracing.Trace -import com.twitter.home_mixer.model.HomeFeatures.AncestorsFeature -import com.twitter.home_mixer.model.HomeFeatures.AuthorIdFeature -import com.twitter.home_mixer.model.HomeFeatures.DirectedAtUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.EarlybirdScoreFeature -import com.twitter.home_mixer.model.HomeFeatures.FavoritedByUserIdsFeature -import com.twitter.home_mixer.model.HomeFeatures.FollowedByUserIdsFeature -import com.twitter.home_mixer.model.HomeFeatures.FromInNetworkSourceFeature -import com.twitter.home_mixer.model.HomeFeatures.InReplyToTweetIdFeature -import com.twitter.home_mixer.model.HomeFeatures.InReplyToUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.QuotedTweetIdFeature -import com.twitter.home_mixer.model.HomeFeatures.QuotedUserIdFeature -import com.twitter.home_mixer.model.HomeFeatures.RequestJoinIdFeature -import com.twitter.home_mixer.model.HomeFeatures.ScoreFeature -import com.twitter.home_mixer.model.HomeFeatures.SuggestTypeFeature -import com.twitter.home_mixer.param.HomeMixerFlagName.ScribeScoredCandidatesFlag -import com.twitter.home_mixer.product.scored_tweets.model.ScoredTweetsQuery -import com.twitter.home_mixer.product.scored_tweets.model.ScoredTweetsResponse -import com.twitter.home_mixer.product.scored_tweets.param.ScoredTweetsParam.EnableScribeScoredCandidatesParam -import com.twitter.inject.annotations.Flag -import com.twitter.logpipeline.client.common.EventPublisher -import com.twitter.product_mixer.component_library.side_effect.ScribeLogEventSideEffect -import com.twitter.product_mixer.core.feature.Feature -import com.twitter.product_mixer.core.functional_component.side_effect.PipelineResultSideEffect -import com.twitter.product_mixer.core.model.common.identifier.SideEffectIdentifier -import com.twitter.product_mixer.core.model.common.presentation.CandidatePipelines -import com.twitter.product_mixer.core.model.common.presentation.CandidateWithDetails -import com.twitter.timelines.timeline_logging.{thriftscala => t} -import javax.inject.Inject -import javax.inject.Singleton -import com.twitter.util.logging.Logging - -/** - * Side effect that logs scored candidates from scoring pipelines - */ -@Singleton -class ScribeScoredCandidatesSideEffect @Inject() ( - @Flag(ScribeScoredCandidatesFlag) enableScribeScoredCandidates: Boolean, - eventBusPublisher: EventPublisher[t.ScoredCandidate]) - extends ScribeLogEventSideEffect[ - t.ScoredCandidate, - ScoredTweetsQuery, - ScoredTweetsResponse - ] - with PipelineResultSideEffect.Conditionally[ - ScoredTweetsQuery, - ScoredTweetsResponse - ] - with Logging { - - override val identifier: SideEffectIdentifier = - SideEffectIdentifier("ScribeScoredCandidates") - - override def onlyIf( - query: ScoredTweetsQuery, - selectedCandidates: Seq[CandidateWithDetails], - remainingCandidates: Seq[CandidateWithDetails], - droppedCandidates: Seq[CandidateWithDetails], - response: ScoredTweetsResponse - ): Boolean = enableScribeScoredCandidates && query.params(EnableScribeScoredCandidatesParam) - - /** - * Build the log events from query, selections and response - * - * @param query PipelineQuery - * @param selectedCandidates Result after Selectors are executed - * @param remainingCandidates Candidates which were not selected - * @param droppedCandidates Candidates dropped during selection - * @param response Result after Unmarshalling - * - * @return LogEvent in thrift - */ - override def buildLogEvents( - query: ScoredTweetsQuery, - selectedCandidates: Seq[CandidateWithDetails], - remainingCandidates: Seq[CandidateWithDetails], - droppedCandidates: Seq[CandidateWithDetails], - response: ScoredTweetsResponse - ): Seq[t.ScoredCandidate] = { - val returned = (selectedCandidates ++ remainingCandidates).map(toThrift(_, query, false)) - val dropped = droppedCandidates.map(toThrift(_, query, true)) - returned ++ dropped - } - - private def toThrift( - candidate: CandidateWithDetails, - query: ScoredTweetsQuery, - isDropped: Boolean - ): t.ScoredCandidate = { - t.ScoredCandidate( - tweetId = candidate.candidateIdLong, - viewerId = query.getOptionalUserId, - authorId = candidate.features.getOrElse(AuthorIdFeature, None), - traceId = Some(Trace.id.traceId.toLong), - requestJoinId = query.features.flatMap(_.getOrElse(RequestJoinIdFeature, None)), - score = candidate.features.getOrElse(ScoreFeature, None), - suggestType = candidate.features.getOrElse(SuggestTypeFeature, None).map(_.name), - isInNetwork = candidate.features.getTry(FromInNetworkSourceFeature).toOption, - inReplyToTweetId = candidate.features.getOrElse(InReplyToTweetIdFeature, None), - inReplyToUserId = candidate.features.getOrElse(InReplyToUserIdFeature, None), - quotedTweetId = candidate.features.getOrElse(QuotedTweetIdFeature, None), - quotedUserId = candidate.features.getOrElse(QuotedUserIdFeature, None), - directedAtUserId = candidate.features.getOrElse(DirectedAtUserIdFeature, None), - favoritedByUserIds = convertSeqFeature(candidate, FavoritedByUserIdsFeature), - followedByUserIds = convertSeqFeature(candidate, FollowedByUserIdsFeature), - ancestors = convertSeqFeature(candidate, AncestorsFeature), - requestTimeMs = Some(query.queryTime.inMilliseconds), - candidatePipelineIdentifier = - candidate.features.getTry(CandidatePipelines).toOption.map(_.head.name), - earlybirdScore = candidate.features.getOrElse(EarlybirdScoreFeature, None), - isDropped = Some(isDropped) - ) - } - - private def convertSeqFeature[T]( - candidateWithDetails: CandidateWithDetails, - feature: Feature[_, Seq[T]] - ): Option[Seq[T]] = - Option( - candidateWithDetails.features - .getOrElse(feature, Seq.empty)).filter(_.nonEmpty) - - override val logPipelinePublisher: EventPublisher[t.ScoredCandidate] = eventBusPublisher -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/ScribeServedCommonFeaturesAndCandidateFeaturesSideEffect.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/ScribeServedCommonFeaturesAndCandidateFeaturesSideEffect.docx new file mode 100644 index 000000000..6f976c737 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/ScribeServedCommonFeaturesAndCandidateFeaturesSideEffect.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/ScribeServedCommonFeaturesAndCandidateFeaturesSideEffect.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/ScribeServedCommonFeaturesAndCandidateFeaturesSideEffect.scala deleted file mode 100644 index c93adee27..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/scored_tweets/side_effect/ScribeServedCommonFeaturesAndCandidateFeaturesSideEffect.scala +++ /dev/null @@ -1,225 +0,0 @@ -package com.twitter.home_mixer.product.scored_tweets.side_effect - -import com.twitter.conversions.DurationOps._ -import com.twitter.finagle.mysql.Client -import com.twitter.finagle.mysql.Transactions -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finagle.util.DefaultTimer -import com.twitter.home_mixer.model.HomeFeatures.ServedRequestIdFeature -import com.twitter.home_mixer.model.HomeFeatures.SourceTweetIdFeature -import com.twitter.home_mixer.param.HomeMixerFlagName.DataRecordMetadataStoreConfigsYmlFlag -import com.twitter.home_mixer.param.HomeMixerFlagName.ScribeServedCommonFeaturesAndCandidateFeaturesFlag -import com.twitter.home_mixer.param.HomeMixerInjectionNames.CandidateFeaturesScribeEventPublisher -import com.twitter.home_mixer.param.HomeMixerInjectionNames.CommonFeaturesScribeEventPublisher -import com.twitter.home_mixer.param.HomeMixerInjectionNames.MinimumFeaturesScribeEventPublisher -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.adapters.non_ml_features.NonMLCandidateFeatures -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.adapters.non_ml_features.NonMLCandidateFeaturesAdapter -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.adapters.non_ml_features.NonMLCommonFeatures -import com.twitter.home_mixer.product.scored_tweets.feature_hydrator.adapters.non_ml_features.NonMLCommonFeaturesAdapter -import com.twitter.home_mixer.product.scored_tweets.model.ScoredTweetsQuery -import com.twitter.home_mixer.product.scored_tweets.model.ScoredTweetsResponse -import com.twitter.home_mixer.product.scored_tweets.scorer.CandidateFeaturesDataRecordFeature -import com.twitter.home_mixer.product.scored_tweets.scorer.CommonFeaturesDataRecordFeature -import com.twitter.home_mixer.product.scored_tweets.scorer.PredictedScoreFeature.PredictedScoreFeatures -import com.twitter.home_mixer.util.CandidatesUtil.getOriginalAuthorId -import com.twitter.inject.annotations.Flag -import com.twitter.logpipeline.client.common.EventPublisher -import com.twitter.ml.api.DataRecordMerger -import com.twitter.product_mixer.core.feature.featuremap.datarecord.DataRecordConverter -import com.twitter.product_mixer.core.feature.featuremap.datarecord.SpecificFeatures -import com.twitter.product_mixer.core.functional_component.side_effect.PipelineResultSideEffect -import com.twitter.product_mixer.core.model.common.identifier.SideEffectIdentifier -import com.twitter.product_mixer.core.model.common.presentation.CandidateWithDetails -import com.twitter.stitch.Stitch -import com.twitter.timelines.ml.cont_train.common.domain.non_scalding.CandidateAndCommonFeaturesStreamingUtils -import com.twitter.timelines.ml.pldr.client.MysqlClientUtils -import com.twitter.timelines.ml.pldr.client.VersionedMetadataCacheClient -import com.twitter.timelines.ml.pldr.conversion.VersionIdAndFeatures -import com.twitter.timelines.suggests.common.data_record_metadata.{thriftscala => drmd} -import com.twitter.timelines.suggests.common.poly_data_record.{thriftjava => pldr} -import com.twitter.timelines.util.stats.OptionObserver -import com.twitter.util.Time -import com.twitter.util.Try -import com.twitter.util.logging.Logging -import javax.inject.Inject -import javax.inject.Named -import javax.inject.Singleton -import scala.collection.JavaConverters._ - -/** - * (1) Scribe common features sent to prediction service + some other features as PLDR format into logs - * (2) Scribe candidate features sent to prediction service + some other features as PLDR format into another logs - */ -@Singleton -class ScribeServedCommonFeaturesAndCandidateFeaturesSideEffect @Inject() ( - @Flag(DataRecordMetadataStoreConfigsYmlFlag) dataRecordMetadataStoreConfigsYml: String, - @Flag(ScribeServedCommonFeaturesAndCandidateFeaturesFlag) enableScribeServedCommonFeaturesAndCandidateFeatures: Boolean, - @Named(CommonFeaturesScribeEventPublisher) commonFeaturesScribeEventPublisher: EventPublisher[ - pldr.PolyDataRecord - ], - @Named(CandidateFeaturesScribeEventPublisher) candidateFeaturesScribeEventPublisher: EventPublisher[ - pldr.PolyDataRecord - ], - @Named(MinimumFeaturesScribeEventPublisher) minimumFeaturesScribeEventPublisher: EventPublisher[ - pldr.PolyDataRecord - ], - statsReceiver: StatsReceiver, -) extends PipelineResultSideEffect[ScoredTweetsQuery, ScoredTweetsResponse] - with PipelineResultSideEffect.Conditionally[ScoredTweetsQuery, ScoredTweetsResponse] - with Logging { - - override val identifier: SideEffectIdentifier = - SideEffectIdentifier("ScribeServedCommonFeaturesAndCandidateFeatures") - - private val drMerger = new DataRecordMerger - private val postScoringCandidateFeatures = SpecificFeatures(PredictedScoreFeatures) - private val postScoringCandidateFeaturesDataRecordAdapter = - new DataRecordConverter(postScoringCandidateFeatures) - - private val scopedStatsReceiver = statsReceiver.scope(getClass.getSimpleName) - private val metadataFetchFailedCounter = scopedStatsReceiver.counter("metadataFetchFailed") - private val commonFeaturesScribeCounter = scopedStatsReceiver.counter("commonFeaturesScribe") - private val commonFeaturesPLDROptionObserver = - OptionObserver(scopedStatsReceiver.scope("commonFeaturesPLDR")) - private val candidateFeaturesScribeCounter = - scopedStatsReceiver.counter("candidateFeaturesScribe") - private val candidateFeaturesPLDROptionObserver = - OptionObserver(scopedStatsReceiver.scope("candidateFeaturesPLDR")) - private val minimumFeaturesPLDROptionObserver = - OptionObserver(scopedStatsReceiver.scope("minimumFeaturesPLDR")) - private val minimumFeaturesScribeCounter = - scopedStatsReceiver.counter("minimumFeaturesScribe") - - lazy private val dataRecordMetadataStoreClient: Option[Client with Transactions] = - Try { - MysqlClientUtils.mysqlClientProvider( - MysqlClientUtils.parseConfigFromYaml(dataRecordMetadataStoreConfigsYml)) - }.onFailure { e => info(s"Error building MySQL client: $e") }.toOption - - lazy private val versionedMetadataCacheClientOpt: Option[ - VersionedMetadataCacheClient[Map[drmd.FeaturesCategory, Option[VersionIdAndFeatures]]] - ] = - dataRecordMetadataStoreClient.map { mysqlClient => - new VersionedMetadataCacheClient[Map[drmd.FeaturesCategory, Option[VersionIdAndFeatures]]]( - maximumSize = 1, - expireDurationOpt = None, - mysqlClient = mysqlClient, - transform = CandidateAndCommonFeaturesStreamingUtils.metadataTransformer, - statsReceiver = statsReceiver - ) - } - - versionedMetadataCacheClientOpt.foreach { versionedMetadataCacheClient => - versionedMetadataCacheClient - .metadataFetchTimerTask( - CandidateAndCommonFeaturesStreamingUtils.metadataFetchKey, - metadataFetchTimer = DefaultTimer, - metadataFetchInterval = 90.seconds, - metadataFetchFailedCounter = metadataFetchFailedCounter - ) - } - - override def onlyIf( - query: ScoredTweetsQuery, - selectedCandidates: Seq[CandidateWithDetails], - remainingCandidates: Seq[CandidateWithDetails], - droppedCandidates: Seq[CandidateWithDetails], - response: ScoredTweetsResponse - ): Boolean = enableScribeServedCommonFeaturesAndCandidateFeatures - - override def apply( - inputs: PipelineResultSideEffect.Inputs[ScoredTweetsQuery, ScoredTweetsResponse] - ): Stitch[Unit] = { - Stitch.value { - val servedTimestamp: Long = Time.now.inMilliseconds - val nonMLCommonFeatures = NonMLCommonFeatures( - userId = inputs.query.getRequiredUserId, - predictionRequestId = - inputs.query.features.flatMap(_.getOrElse(ServedRequestIdFeature, None)), - servedTimestamp = servedTimestamp - ) - val nonMLCommonFeaturesDataRecord = - NonMLCommonFeaturesAdapter.adaptToDataRecords(nonMLCommonFeatures).asScala.head - - /** - * Steps of scribing common features - * (1) fetch common features as data record - * (2) extract additional feature as data record, e.g. predictionRequestId which is used as join key in downstream jobs - * (3) merge two data records above and convert the merged data record to pldr - * (4) publish pldr - */ - val commonFeaturesDataRecordOpt = - inputs.selectedCandidates.headOption.map(_.features.get(CommonFeaturesDataRecordFeature)) - val commonFeaturesPLDROpt = commonFeaturesDataRecordOpt.flatMap { commonFeaturesDataRecord => - drMerger.merge(commonFeaturesDataRecord, nonMLCommonFeaturesDataRecord) - - CandidateAndCommonFeaturesStreamingUtils.commonFeaturesToPolyDataRecord( - versionedMetadataCacheClientOpt = versionedMetadataCacheClientOpt, - commonFeatures = commonFeaturesDataRecord, - valueFormat = pldr.PolyDataRecord._Fields.LITE_COMPACT_DATA_RECORD - ) - } - - commonFeaturesPLDROptionObserver(commonFeaturesPLDROpt).foreach { pldr => - commonFeaturesScribeEventPublisher.publish(pldr) - commonFeaturesScribeCounter.incr() - } - - /** - * steps of scribing candidate features - * (1) fetch candidate features as data record - * (2) extract additional features (mostly non ML features including predicted scores, predictionRequestId, userId, tweetId) - * (3) merge data records and convert the merged data record into pldr - * (4) publish pldr - */ - inputs.selectedCandidates.foreach { candidate => - val candidateFeaturesDataRecord = candidate.features.get(CandidateFeaturesDataRecordFeature) - - /** - * extract predicted scores as data record and merge it into original data record - */ - val postScoringCandidateFeaturesDataRecord = - postScoringCandidateFeaturesDataRecordAdapter.toDataRecord(candidate.features) - drMerger.merge(candidateFeaturesDataRecord, postScoringCandidateFeaturesDataRecord) - - /** - * extract non ML common features as data record and merge it into original data record - */ - drMerger.merge(candidateFeaturesDataRecord, nonMLCommonFeaturesDataRecord) - - /** - * extract non ML candidate features as data record and merge it into original data record - */ - val nonMLCandidateFeatures = NonMLCandidateFeatures( - tweetId = candidate.candidateIdLong, - sourceTweetId = candidate.features.getOrElse(SourceTweetIdFeature, None), - originalAuthorId = getOriginalAuthorId(candidate.features) - ) - val nonMLCandidateFeaturesDataRecord = - NonMLCandidateFeaturesAdapter.adaptToDataRecords(nonMLCandidateFeatures).asScala.head - drMerger.merge(candidateFeaturesDataRecord, nonMLCandidateFeaturesDataRecord) - - val candidateFeaturesPLDROpt = - CandidateAndCommonFeaturesStreamingUtils.candidateFeaturesToPolyDataRecord( - versionedMetadataCacheClientOpt = versionedMetadataCacheClientOpt, - candidateFeatures = candidateFeaturesDataRecord, - valueFormat = pldr.PolyDataRecord._Fields.LITE_COMPACT_DATA_RECORD - ) - - candidateFeaturesPLDROptionObserver(candidateFeaturesPLDROpt).foreach { pldr => - candidateFeaturesScribeEventPublisher.publish(pldr) - candidateFeaturesScribeCounter.incr() - } - - // scribe minimum features which are used to join labels from client events. - val minimumFeaturesPLDROpt = candidateFeaturesPLDROpt - .map(CandidateAndCommonFeaturesStreamingUtils.extractMinimumFeaturesFromPldr) - .map(pldr.PolyDataRecord.dataRecord) - minimumFeaturesPLDROptionObserver(minimumFeaturesPLDROpt).foreach { pldr => - minimumFeaturesScribeEventPublisher.publish(pldr) - minimumFeaturesScribeCounter.incr() - } - } - } - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/BUILD.bazel deleted file mode 100644 index c1b5032db..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/BUILD.bazel +++ /dev/null @@ -1,82 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/javax/inject:javax.inject", - "ads-injection/lib/src/main/scala/com/twitter/goldfinch/api", - "finagle/finagle-memcached/src/main/scala", - "finatra/inject/inject-core/src/main/scala", - "finatra/inject/inject-core/src/main/scala/com/twitter/inject", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/candidate_pipeline", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/functional_component/candidate_source", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/functional_component/decorator", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/functional_component/decorator/builder", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/functional_component/decorator/urt/builder", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/functional_component/feature_hydrator", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/functional_component/filter", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/functional_component/gate", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/functional_component/selector", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/functional_component/side_effect", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/marshaller/timelines", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model/request", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/param", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/following/model", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/model", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/param", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/service", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/util", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/candidate_source/tweetconvosvc", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/decorator/urt", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/feature_hydrator/candidate/param_gated", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/feature_hydrator/query/async", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/feature_hydrator/query/impressed_tweets", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/feature_hydrator/query/param_gated", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/feature_hydrator/query/social_graph", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/filter", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/gate", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/model/candidate", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/model/presentation/urt", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/premarshaller/urt", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/selector", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/side_effect", - "product-mixer/core/src/main/java/com/twitter/product_mixer/core/product/guice/scope", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/configapi", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/decorator", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/decorator/urt/builder", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/marshaller/response/urt", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/model/common/presentation", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/model/marshalling/request", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/model/marshalling/response/urt", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/model/marshalling/response/urt/item", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/pipeline/candidate", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/pipeline/mixer", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/pipeline/product", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/product", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/product/guice", - "src/java/com/twitter/search/common/schema/base", - "src/java/com/twitter/search/common/schema/earlybird", - "src/java/com/twitter/search/common/util/lang", - "src/java/com/twitter/search/queryparser/query:core-query-nodes", - "src/java/com/twitter/search/queryparser/query/search:search-query-nodes", - "src/thrift/com/twitter/search:earlybird-scala", - "src/thrift/com/twitter/search/common:constants-java", - "src/thrift/com/twitter/tweetypie:service-scala", - "stitch/stitch-gizmoduck", - "stitch/stitch-tweetypie", - "stringcenter/client", - "stringcenter/client/src/main/java", - "timelinemixer/common/src/main/scala/com/twitter/timelinemixer/clients/manhattan", - "timelinemixer/server/src/main/scala/com/twitter/timelinemixer/injection/model/candidate", - "timelinemixer/server/src/main/scala/com/twitter/timelinemixer/injection/store/persistence", - "timelines/src/main/scala/com/twitter/timelines/clients/relevance_search", - "timelines/src/main/scala/com/twitter/timelines/injection/scribe", - ], - exports = [ - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/param", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/decorator/urt", - "src/thrift/com/twitter/timelines/render:thrift-scala", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/BUILD.docx new file mode 100644 index 000000000..df24109ac Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedEarlybirdCandidatePipelineConfig.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedEarlybirdCandidatePipelineConfig.docx new file mode 100644 index 000000000..134725c98 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedEarlybirdCandidatePipelineConfig.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedEarlybirdCandidatePipelineConfig.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedEarlybirdCandidatePipelineConfig.scala deleted file mode 100644 index c331f955b..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedEarlybirdCandidatePipelineConfig.scala +++ /dev/null @@ -1,64 +0,0 @@ -package com.twitter.home_mixer.product.subscribed - -import com.google.inject.Inject -import com.twitter.home_mixer.functional_component.candidate_source.EarlybirdCandidateSource -import com.twitter.home_mixer.product.subscribed.model.SubscribedQuery -import com.twitter.product_mixer.component_library.feature_hydrator.query.social_graph.SGSSubscribedUsersFeature -import com.twitter.product_mixer.component_library.filter.TweetVisibilityFilter -import com.twitter.product_mixer.component_library.gate.NonEmptySeqFeatureGate -import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate -import com.twitter.product_mixer.core.functional_component.candidate_source.BaseCandidateSource -import com.twitter.product_mixer.core.functional_component.filter.Filter -import com.twitter.product_mixer.core.functional_component.gate.Gate -import com.twitter.product_mixer.core.functional_component.transformer.CandidateFeatureTransformer -import com.twitter.product_mixer.core.functional_component.transformer.CandidatePipelineQueryTransformer -import com.twitter.product_mixer.core.functional_component.transformer.CandidatePipelineResultsTransformer -import com.twitter.product_mixer.core.model.common.identifier.CandidatePipelineIdentifier -import com.twitter.product_mixer.core.pipeline.candidate.CandidatePipelineConfig -import com.twitter.search.earlybird.{thriftscala => t} -import com.twitter.spam.rtf.thriftscala.SafetyLevel.TimelineHomeSubscribed -import com.twitter.stitch.tweetypie.{TweetyPie => TweetypieStitchClient} -import com.twitter.tweetypie.thriftscala.TweetVisibilityPolicy - -class SubscribedEarlybirdCandidatePipelineConfig @Inject() ( - earlybirdCandidateSource: EarlybirdCandidateSource, - tweetyPieStitchClient: TweetypieStitchClient, - subscribedEarlybirdQueryTransformer: SubscribedEarlybirdQueryTransformer) - extends CandidatePipelineConfig[ - SubscribedQuery, - t.EarlybirdRequest, - t.ThriftSearchResult, - TweetCandidate - ] { - override val identifier: CandidatePipelineIdentifier = - CandidatePipelineIdentifier("SubscribedEarlybird") - - override val candidateSource: BaseCandidateSource[t.EarlybirdRequest, t.ThriftSearchResult] = - earlybirdCandidateSource - - override val gates: Seq[Gate[SubscribedQuery]] = Seq( - NonEmptySeqFeatureGate(SGSSubscribedUsersFeature) - ) - - override def filters: Seq[Filter[SubscribedQuery, TweetCandidate]] = Seq( - new TweetVisibilityFilter( - tweetyPieStitchClient, - TweetVisibilityPolicy.UserVisible, - TimelineHomeSubscribed - ) - ) - - override val queryTransformer: CandidatePipelineQueryTransformer[ - SubscribedQuery, - t.EarlybirdRequest - ] = subscribedEarlybirdQueryTransformer - - override val featuresFromCandidateSourceTransformers: Seq[ - CandidateFeatureTransformer[t.ThriftSearchResult] - ] = Seq(SubscribedEarlybirdResponseFeatureTransformer) - - override val resultTransformer: CandidatePipelineResultsTransformer[ - t.ThriftSearchResult, - TweetCandidate - ] = { sourceResult => TweetCandidate(id = sourceResult.id) } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedEarlybirdQueryTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedEarlybirdQueryTransformer.docx new file mode 100644 index 000000000..e2d61afbd Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedEarlybirdQueryTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedEarlybirdQueryTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedEarlybirdQueryTransformer.scala deleted file mode 100644 index 6e0d57c13..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedEarlybirdQueryTransformer.scala +++ /dev/null @@ -1,67 +0,0 @@ -package com.twitter.home_mixer.product.subscribed - -import com.twitter.finagle.thrift.ClientId -import com.twitter.finagle.tracing.Trace -import com.twitter.home_mixer.product.subscribed.model.SubscribedQuery -import com.twitter.home_mixer.product.subscribed.param.SubscribedParam.ServerMaxResultsParam -import com.twitter.product_mixer.component_library.feature_hydrator.query.social_graph.SGSSubscribedUsersFeature -import com.twitter.product_mixer.core.functional_component.transformer.CandidatePipelineQueryTransformer -import com.twitter.product_mixer.core.model.marshalling.response.urt.operation.BottomCursor -import com.twitter.product_mixer.core.model.marshalling.response.urt.operation.GapCursor -import com.twitter.product_mixer.core.model.marshalling.response.urt.operation.TopCursor -import com.twitter.product_mixer.core.pipeline.pipeline_failure.MalformedCursor -import com.twitter.product_mixer.core.pipeline.pipeline_failure.PipelineFailure -import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant -import com.twitter.search.earlybird.{thriftscala => t} -import com.twitter.search.queryparser.query.Conjunction -import com.twitter.search.queryparser.query.search.SearchOperator -import javax.inject.Inject -import javax.inject.Singleton - -@Singleton -case class SubscribedEarlybirdQueryTransformer @Inject() (clientId: ClientId) - extends CandidatePipelineQueryTransformer[SubscribedQuery, t.EarlybirdRequest] { - - override def transform(query: SubscribedQuery): t.EarlybirdRequest = { - val subscribedUserIds = - query.features.map(_.get(SGSSubscribedUsersFeature)).getOrElse(Seq.empty) - - val subscribedUsersQuery = new SearchOperator.Builder() - .setType(SearchOperator.Type.FILTER) - .addOperand(EarlybirdFieldConstant.EXCLUSIVE_FILTER_TERM) - .build() - - val searchQuery = query.pipelineCursor - .map { cursor => - val sinceIdQuery = - (id: Long) => new SearchOperator(SearchOperator.Type.SINCE_ID, id.toString) - val maxIdQuery = // max ID is inclusive, so subtract 1 - (id: Long) => new SearchOperator(SearchOperator.Type.MAX_ID, (id - 1).toString) - - (cursor.cursorType, cursor.id, cursor.gapBoundaryId) match { - case (Some(TopCursor), Some(sinceId), _) => - new Conjunction(sinceIdQuery(sinceId), subscribedUsersQuery) - case (Some(BottomCursor), Some(maxId), _) => - new Conjunction(maxIdQuery(maxId), subscribedUsersQuery) - case (Some(GapCursor), Some(maxId), Some(sinceId)) => - new Conjunction(sinceIdQuery(sinceId), maxIdQuery(maxId), subscribedUsersQuery) - case (Some(GapCursor), _, _) => - throw PipelineFailure(MalformedCursor, "Invalid cursor " + cursor.toString) - case _ => subscribedUsersQuery - } - }.getOrElse(subscribedUsersQuery) - - t.EarlybirdRequest( - searchQuery = t.ThriftSearchQuery( - serializedQuery = Some(searchQuery.serialize), - fromUserIDFilter64 = Some(subscribedUserIds), - numResults = query.requestedMaxResults.getOrElse(query.params(ServerMaxResultsParam)), - rankingMode = t.ThriftSearchRankingMode.Recency, - ), - getOlderResults = Some(true), // needed for archive access to older tweets - clientRequestID = Some(s"${Trace.id.traceId}"), - numResultsToReturnAtRoot = Some(query.params(ServerMaxResultsParam)), - clientId = Some(clientId.name), - ) - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedEarlybirdResponseFeatureTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedEarlybirdResponseFeatureTransformer.docx new file mode 100644 index 000000000..ab4a0526f Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedEarlybirdResponseFeatureTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedEarlybirdResponseFeatureTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedEarlybirdResponseFeatureTransformer.scala deleted file mode 100644 index a8136102c..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedEarlybirdResponseFeatureTransformer.scala +++ /dev/null @@ -1,38 +0,0 @@ -package com.twitter.home_mixer.product.subscribed - -import com.twitter.home_mixer.model.HomeFeatures.AuthorIdFeature -import com.twitter.home_mixer.model.HomeFeatures.InReplyToTweetIdFeature -import com.twitter.home_mixer.model.HomeFeatures.IsRetweetFeature -import com.twitter.home_mixer.model.HomeFeatures.SourceTweetIdFeature -import com.twitter.home_mixer.model.HomeFeatures.SourceUserIdFeature -import com.twitter.product_mixer.core.feature.Feature -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.feature.featuremap.FeatureMapBuilder -import com.twitter.product_mixer.core.functional_component.transformer.CandidateFeatureTransformer -import com.twitter.product_mixer.core.model.common.identifier.TransformerIdentifier -import com.twitter.search.earlybird.{thriftscala => t} - -object SubscribedEarlybirdResponseFeatureTransformer - extends CandidateFeatureTransformer[t.ThriftSearchResult] { - - override val identifier: TransformerIdentifier = - TransformerIdentifier("SubscribedEarlybirdResponse") - - override val features: Set[Feature[_, _]] = Set( - AuthorIdFeature, - InReplyToTweetIdFeature, - IsRetweetFeature, - SourceTweetIdFeature, - SourceUserIdFeature, - ) - - override def transform(candidate: t.ThriftSearchResult): FeatureMap = FeatureMapBuilder() - .add(AuthorIdFeature, candidate.tweetypieTweet.flatMap(_.coreData.map(_.userId))) - .add( - InReplyToTweetIdFeature, - candidate.tweetypieTweet.flatMap(_.coreData.flatMap(_.reply.flatMap(_.inReplyToStatusId)))) - .add(IsRetweetFeature, candidate.metadata.exists(_.isRetweet.contains(true))) - .add(SourceTweetIdFeature, candidate.sourceTweetypieTweet.map(_.id)) - .add(SourceUserIdFeature, candidate.sourceTweetypieTweet.flatMap(_.coreData.map(_.userId))) - .build() -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedMixerPipelineConfig.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedMixerPipelineConfig.docx new file mode 100644 index 000000000..3e04859ca Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedMixerPipelineConfig.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedMixerPipelineConfig.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedMixerPipelineConfig.scala deleted file mode 100644 index 5b1f74b37..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedMixerPipelineConfig.scala +++ /dev/null @@ -1,221 +0,0 @@ -package com.twitter.home_mixer.product.subscribed - -import com.twitter.clientapp.{thriftscala => ca} -import com.twitter.home_mixer.candidate_pipeline.ConversationServiceCandidatePipelineConfigBuilder -import com.twitter.home_mixer.candidate_pipeline.EditedTweetsCandidatePipelineConfig -import com.twitter.home_mixer.candidate_pipeline.NewTweetsPillCandidatePipelineConfig -import com.twitter.home_mixer.functional_component.decorator.HomeConversationServiceCandidateDecorator -import com.twitter.home_mixer.functional_component.decorator.urt.builder.HomeFeedbackActionInfoBuilder -import com.twitter.home_mixer.functional_component.feature_hydrator._ -import com.twitter.home_mixer.functional_component.selector.UpdateHomeClientEventDetails -import com.twitter.home_mixer.functional_component.selector.UpdateNewTweetsPillDecoration -import com.twitter.home_mixer.functional_component.side_effect._ -import com.twitter.home_mixer.model.GapIncludeInstruction -import com.twitter.home_mixer.param.HomeGlobalParams.MaxNumberReplaceInstructionsParam -import com.twitter.home_mixer.param.HomeMixerFlagName.ScribeClientEventsFlag -import com.twitter.home_mixer.product.following.model.HomeMixerExternalStrings -import com.twitter.home_mixer.product.subscribed.model.SubscribedQuery -import com.twitter.home_mixer.product.subscribed.param.SubscribedParam.ServerMaxResultsParam -import com.twitter.home_mixer.util.CandidatesUtil -import com.twitter.inject.annotations.Flag -import com.twitter.logpipeline.client.common.EventPublisher -import com.twitter.product_mixer.component_library.feature_hydrator.query.async.AsyncQueryFeatureHydrator -import com.twitter.product_mixer.component_library.feature_hydrator.query.impressed_tweets.ImpressedTweetsQueryFeatureHydrator -import com.twitter.product_mixer.component_library.feature_hydrator.query.social_graph.SGSFollowedUsersQueryFeatureHydrator -import com.twitter.product_mixer.component_library.feature_hydrator.query.social_graph.SGSSubscribedUsersQueryFeatureHydrator -import com.twitter.product_mixer.component_library.gate.NonEmptyCandidatesGate -import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate -import com.twitter.product_mixer.component_library.premarshaller.urt.UrtDomainMarshaller -import com.twitter.product_mixer.component_library.premarshaller.urt.builder.AddEntriesWithReplaceAndShowAlertInstructionBuilder -import com.twitter.product_mixer.component_library.premarshaller.urt.builder.OrderedBottomCursorBuilder -import com.twitter.product_mixer.component_library.premarshaller.urt.builder.OrderedGapCursorBuilder -import com.twitter.product_mixer.component_library.premarshaller.urt.builder.OrderedTopCursorBuilder -import com.twitter.product_mixer.component_library.premarshaller.urt.builder.ReplaceAllEntries -import com.twitter.product_mixer.component_library.premarshaller.urt.builder.ReplaceEntryInstructionBuilder -import com.twitter.product_mixer.component_library.premarshaller.urt.builder.ShowAlertInstructionBuilder -import com.twitter.product_mixer.component_library.premarshaller.urt.builder.StaticTimelineScribeConfigBuilder -import com.twitter.product_mixer.component_library.premarshaller.urt.builder.UrtMetadataBuilder -import com.twitter.product_mixer.component_library.selector.DropMaxCandidates -import com.twitter.product_mixer.component_library.selector.InsertAppendResults -import com.twitter.product_mixer.component_library.selector.SelectConditionally -import com.twitter.product_mixer.component_library.selector.UpdateSortCandidates -import com.twitter.product_mixer.core.functional_component.common.SpecificPipeline -import com.twitter.product_mixer.core.functional_component.common.SpecificPipelines -import com.twitter.product_mixer.core.functional_component.feature_hydrator.QueryFeatureHydrator -import com.twitter.product_mixer.core.functional_component.marshaller.TransportMarshaller -import com.twitter.product_mixer.core.functional_component.marshaller.response.urt.UrtTransportMarshaller -import com.twitter.product_mixer.core.functional_component.premarshaller.DomainMarshaller -import com.twitter.product_mixer.core.functional_component.selector.Selector -import com.twitter.product_mixer.core.functional_component.side_effect.PipelineResultSideEffect -import com.twitter.product_mixer.core.model.common.UniversalNoun -import com.twitter.product_mixer.core.model.common.identifier.CandidatePipelineIdentifier -import com.twitter.product_mixer.core.model.common.identifier.MixerPipelineIdentifier -import com.twitter.product_mixer.core.model.marshalling.response.urt.Timeline -import com.twitter.product_mixer.core.model.marshalling.response.urt.TimelineModule -import com.twitter.product_mixer.core.model.marshalling.response.urt.TimelineScribeConfig -import com.twitter.product_mixer.core.model.marshalling.response.urt.item.tweet.TweetItem -import com.twitter.product_mixer.core.pipeline.FailOpenPolicy -import com.twitter.product_mixer.core.pipeline.candidate.CandidatePipelineConfig -import com.twitter.product_mixer.core.pipeline.candidate.DependentCandidatePipelineConfig -import com.twitter.product_mixer.core.pipeline.mixer.MixerPipelineConfig -import com.twitter.product_mixer.core.product.guice.scope.ProductScoped -import com.twitter.stringcenter.client.StringCenter -import com.twitter.timelines.render.{thriftscala => urt} -import javax.inject.Inject -import javax.inject.Provider -import javax.inject.Singleton - -@Singleton -class SubscribedMixerPipelineConfig @Inject() ( - subscribedEarlybirdCandidatePipelineConfig: SubscribedEarlybirdCandidatePipelineConfig, - conversationServiceCandidatePipelineConfigBuilder: ConversationServiceCandidatePipelineConfigBuilder[ - SubscribedQuery - ], - homeFeedbackActionInfoBuilder: HomeFeedbackActionInfoBuilder, - editedTweetsCandidatePipelineConfig: EditedTweetsCandidatePipelineConfig, - newTweetsPillCandidatePipelineConfig: NewTweetsPillCandidatePipelineConfig[SubscribedQuery], - dismissInfoQueryFeatureHydrator: DismissInfoQueryFeatureHydrator, - gizmoduckUserQueryFeatureHydrator: GizmoduckUserQueryFeatureHydrator, - requestQueryFeatureHydrator: RequestQueryFeatureHydrator[SubscribedQuery], - sgsFollowedUsersQueryFeatureHydrator: SGSFollowedUsersQueryFeatureHydrator, - sgsSubscribedUsersQueryFeatureHydrator: SGSSubscribedUsersQueryFeatureHydrator, - manhattanTweetImpressionsQueryFeatureHydrator: TweetImpressionsQueryFeatureHydrator[ - SubscribedQuery - ], - memcacheTweetImpressionsQueryFeatureHydrator: ImpressedTweetsQueryFeatureHydrator, - publishClientSentImpressionsEventBusSideEffect: PublishClientSentImpressionsEventBusSideEffect, - publishClientSentImpressionsManhattanSideEffect: PublishClientSentImpressionsManhattanSideEffect, - homeTimelineServedCandidatesSideEffect: HomeScribeServedCandidatesSideEffect, - clientEventsScribeEventPublisher: EventPublisher[ca.LogEvent], - externalStrings: HomeMixerExternalStrings, - @ProductScoped stringCenterProvider: Provider[StringCenter], - urtTransportMarshaller: UrtTransportMarshaller, - @Flag(ScribeClientEventsFlag) enableScribeClientEvents: Boolean) - extends MixerPipelineConfig[SubscribedQuery, Timeline, urt.TimelineResponse] { - - override val identifier: MixerPipelineIdentifier = MixerPipelineIdentifier("Subscribed") - - private val dependentCandidatesStep = MixerPipelineConfig.dependentCandidatePipelinesStep - private val resultSelectorsStep = MixerPipelineConfig.resultSelectorsStep - - override val fetchQueryFeatures: Seq[QueryFeatureHydrator[SubscribedQuery]] = Seq( - requestQueryFeatureHydrator, - sgsFollowedUsersQueryFeatureHydrator, - sgsSubscribedUsersQueryFeatureHydrator, - AsyncQueryFeatureHydrator(dependentCandidatesStep, dismissInfoQueryFeatureHydrator), - AsyncQueryFeatureHydrator(dependentCandidatesStep, gizmoduckUserQueryFeatureHydrator), - AsyncQueryFeatureHydrator(resultSelectorsStep, manhattanTweetImpressionsQueryFeatureHydrator), - AsyncQueryFeatureHydrator(resultSelectorsStep, memcacheTweetImpressionsQueryFeatureHydrator) - ) - - private val earlybirdCandidatePipelineScope = - SpecificPipeline(subscribedEarlybirdCandidatePipelineConfig.identifier) - - private val conversationServiceCandidatePipelineConfig = - conversationServiceCandidatePipelineConfigBuilder.build( - Seq(NonEmptyCandidatesGate(earlybirdCandidatePipelineScope)), - HomeConversationServiceCandidateDecorator(homeFeedbackActionInfoBuilder) - ) - - override val candidatePipelines: Seq[CandidatePipelineConfig[SubscribedQuery, _, _, _]] = - Seq(subscribedEarlybirdCandidatePipelineConfig) - - override val dependentCandidatePipelines: Seq[ - DependentCandidatePipelineConfig[SubscribedQuery, _, _, _] - ] = Seq( - conversationServiceCandidatePipelineConfig, - editedTweetsCandidatePipelineConfig, - newTweetsPillCandidatePipelineConfig - ) - - override val failOpenPolicies: Map[CandidatePipelineIdentifier, FailOpenPolicy] = Map( - editedTweetsCandidatePipelineConfig.identifier -> FailOpenPolicy.Always, - newTweetsPillCandidatePipelineConfig.identifier -> FailOpenPolicy.Always, - ) - - override val resultSelectors: Seq[Selector[SubscribedQuery]] = Seq( - UpdateSortCandidates( - ordering = CandidatesUtil.reverseChronTweetsOrdering, - candidatePipeline = conversationServiceCandidatePipelineConfig.identifier - ), - DropMaxCandidates( - candidatePipeline = editedTweetsCandidatePipelineConfig.identifier, - maxSelectionsParam = MaxNumberReplaceInstructionsParam - ), - DropMaxCandidates( - candidatePipeline = conversationServiceCandidatePipelineConfig.identifier, - maxSelectionsParam = ServerMaxResultsParam - ), - InsertAppendResults(candidatePipeline = conversationServiceCandidatePipelineConfig.identifier), - // This selector must come after the tweets are inserted into the results - UpdateNewTweetsPillDecoration( - pipelineScope = SpecificPipelines( - conversationServiceCandidatePipelineConfig.identifier, - newTweetsPillCandidatePipelineConfig.identifier - ), - stringCenter = stringCenterProvider.get(), - seeNewTweetsString = externalStrings.seeNewTweetsString, - tweetedString = externalStrings.tweetedString - ), - InsertAppendResults(candidatePipeline = editedTweetsCandidatePipelineConfig.identifier), - SelectConditionally( - selector = - InsertAppendResults(candidatePipeline = newTweetsPillCandidatePipelineConfig.identifier), - includeSelector = (_, _, results) => CandidatesUtil.containsType[TweetCandidate](results) - ), - UpdateHomeClientEventDetails( - candidatePipelines = Set(conversationServiceCandidatePipelineConfig.identifier) - ), - ) - - private val homeScribeClientEventSideEffect = HomeScribeClientEventSideEffect( - enableScribeClientEvents = enableScribeClientEvents, - logPipelinePublisher = clientEventsScribeEventPublisher, - injectedTweetsCandidatePipelineIdentifiers = - Seq(conversationServiceCandidatePipelineConfig.identifier), - ) - - override val resultSideEffects: Seq[PipelineResultSideEffect[SubscribedQuery, Timeline]] = Seq( - homeScribeClientEventSideEffect, - homeTimelineServedCandidatesSideEffect, - publishClientSentImpressionsEventBusSideEffect, - publishClientSentImpressionsManhattanSideEffect - ) - - override val domainMarshaller: DomainMarshaller[SubscribedQuery, Timeline] = { - val instructionBuilders = Seq( - ReplaceEntryInstructionBuilder(ReplaceAllEntries), - AddEntriesWithReplaceAndShowAlertInstructionBuilder(), - ShowAlertInstructionBuilder(), - ) - - val idSelector: PartialFunction[UniversalNoun[_], Long] = { - // exclude ads while determining tweet cursor values - case item: TweetItem if item.promotedMetadata.isEmpty => item.id - case module: TimelineModule - if module.items.headOption.exists(_.item.isInstanceOf[TweetItem]) => - module.items.last.item match { case item: TweetItem => item.id } - } - - val topCursorBuilder = OrderedTopCursorBuilder(idSelector) - val bottomCursorBuilder = - OrderedBottomCursorBuilder(idSelector, GapIncludeInstruction.inverse()) - val gapCursorBuilder = OrderedGapCursorBuilder(idSelector, GapIncludeInstruction) - - val metadataBuilder = UrtMetadataBuilder( - title = None, - scribeConfigBuilder = Some( - StaticTimelineScribeConfigBuilder( - TimelineScribeConfig(page = Some("subscribed"), section = None, entityToken = None))) - ) - - UrtDomainMarshaller( - instructionBuilders = instructionBuilders, - metadataBuilder = Some(metadataBuilder), - cursorBuilders = Seq(topCursorBuilder, bottomCursorBuilder, gapCursorBuilder) - ) - } - - override val transportMarshaller: TransportMarshaller[Timeline, urt.TimelineResponse] = - urtTransportMarshaller -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedProductPipelineConfig.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedProductPipelineConfig.docx new file mode 100644 index 000000000..eafb106a0 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedProductPipelineConfig.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedProductPipelineConfig.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedProductPipelineConfig.scala deleted file mode 100644 index 0d524391a..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/SubscribedProductPipelineConfig.scala +++ /dev/null @@ -1,123 +0,0 @@ -package com.twitter.home_mixer.product.subscribed - -import com.twitter.conversions.DurationOps._ -import com.twitter.home_mixer.marshaller.timelines.ChronologicalCursorUnmarshaller -import com.twitter.home_mixer.model.request.HomeMixerRequest -import com.twitter.home_mixer.model.request.SubscribedProduct -import com.twitter.home_mixer.model.request.SubscribedProductContext -import com.twitter.home_mixer.product.subscribed.model.SubscribedQuery -import com.twitter.home_mixer.product.subscribed.param.SubscribedParam.ServerMaxResultsParam -import com.twitter.home_mixer.service.HomeMixerAccessPolicy.DefaultHomeMixerAccessPolicy -import com.twitter.home_mixer.service.HomeMixerAlertConfig.DefaultNotificationGroup -import com.twitter.product_mixer.component_library.model.cursor.UrtOrderedCursor -import com.twitter.product_mixer.component_library.premarshaller.cursor.UrtCursorSerializer -import com.twitter.product_mixer.core.functional_component.common.access_policy.AccessPolicy -import com.twitter.product_mixer.core.functional_component.common.alert.Alert -import com.twitter.product_mixer.core.functional_component.common.alert.LatencyAlert -import com.twitter.product_mixer.core.functional_component.common.alert.P99 -import com.twitter.product_mixer.core.functional_component.common.alert.SuccessRateAlert -import com.twitter.product_mixer.core.functional_component.common.alert.ThroughputAlert -import com.twitter.product_mixer.core.functional_component.common.alert.predicate.TriggerIfAbove -import com.twitter.product_mixer.core.functional_component.common.alert.predicate.TriggerIfBelow -import com.twitter.product_mixer.core.functional_component.common.alert.predicate.TriggerIfLatencyAbove -import com.twitter.product_mixer.core.model.common.identifier.ComponentIdentifier -import com.twitter.product_mixer.core.model.common.identifier.ProductPipelineIdentifier -import com.twitter.product_mixer.core.model.marshalling.request.Product -import com.twitter.product_mixer.core.model.marshalling.response.urt.operation.GapCursor -import com.twitter.product_mixer.core.model.marshalling.response.urt.operation.TopCursor -import com.twitter.product_mixer.core.pipeline.PipelineConfig -import com.twitter.product_mixer.core.pipeline.pipeline_failure.BadRequest -import com.twitter.product_mixer.core.pipeline.pipeline_failure.MalformedCursor -import com.twitter.product_mixer.core.pipeline.pipeline_failure.PipelineFailure -import com.twitter.product_mixer.core.pipeline.product.ProductPipelineConfig -import com.twitter.product_mixer.core.product.ProductParamConfig -import com.twitter.product_mixer.core.util.SortIndexBuilder -import com.twitter.timelines.configapi.Params -import com.twitter.timelines.render.{thriftscala => urt} -import com.twitter.timelines.util.RequestCursorSerializer -import com.twitter.util.Time -import com.twitter.util.Try -import javax.inject.Inject -import javax.inject.Singleton - -@Singleton -class SubscribedProductPipelineConfig @Inject() ( - subscribedMixerPipelineConfig: SubscribedMixerPipelineConfig, - subscribedParamConfig: param.SubscribedParamConfig) - extends ProductPipelineConfig[HomeMixerRequest, SubscribedQuery, urt.TimelineResponse] { - - override val identifier: ProductPipelineIdentifier = ProductPipelineIdentifier("Subscribed") - - override val product: Product = SubscribedProduct - override val paramConfig: ProductParamConfig = subscribedParamConfig - - override def pipelineQueryTransformer( - request: HomeMixerRequest, - params: Params - ): SubscribedQuery = { - val context = request.productContext match { - case Some(context: SubscribedProductContext) => context - case _ => throw PipelineFailure(BadRequest, "SubscribedProductContext not found") - } - - val debugOptions = request.debugParams.flatMap(_.debugOptions) - - /** - * Unlike other clients, newly created tweets on Android have the sort index set to the current - * time instead of the top sort index + 1, so these tweets get stuck at the top of the timeline - * if subsequent timeline responses use the sort index from the previous response instead of - * the current time. - */ - val pipelineCursor = request.serializedRequestCursor.flatMap { cursor => - Try(UrtCursorSerializer.deserializeOrderedCursor(cursor)) - .getOrElse(ChronologicalCursorUnmarshaller(RequestCursorSerializer.deserialize(cursor))) - .map { - case UrtOrderedCursor(_, id, Some(GapCursor), gapBoundaryId) - if id.isEmpty || gapBoundaryId.isEmpty => - throw PipelineFailure(MalformedCursor, "Gap Cursor bounds not defined") - case topCursor @ UrtOrderedCursor(_, _, Some(TopCursor), _) => - val queryTime = debugOptions.flatMap(_.requestTimeOverride).getOrElse(Time.now) - topCursor.copy(initialSortIndex = SortIndexBuilder.timeToId(queryTime)) - case cursor => cursor - } - } - - SubscribedQuery( - params = params, - clientContext = request.clientContext, - features = None, - pipelineCursor = pipelineCursor, - requestedMaxResults = Some(params(ServerMaxResultsParam)), - debugOptions = debugOptions, - deviceContext = context.deviceContext, - seenTweetIds = context.seenTweetIds - ) - } - - override val pipelines: Seq[PipelineConfig] = Seq(subscribedMixerPipelineConfig) - - override def pipelineSelector( - query: SubscribedQuery - ): ComponentIdentifier = subscribedMixerPipelineConfig.identifier - - override val alerts: Seq[Alert] = Seq( - SuccessRateAlert( - notificationGroup = DefaultNotificationGroup, - warnPredicate = TriggerIfBelow(99.9, 20, 30), - criticalPredicate = TriggerIfBelow(99.9, 30, 30), - ), - LatencyAlert( - notificationGroup = DefaultNotificationGroup, - percentile = P99, - warnPredicate = TriggerIfLatencyAbove(1100.millis, 15, 30), - criticalPredicate = TriggerIfLatencyAbove(1200.millis, 15, 30) - ), - ThroughputAlert( - notificationGroup = DefaultNotificationGroup, - warnPredicate = TriggerIfAbove(18000), - criticalPredicate = TriggerIfAbove(20000) - ) - ) - - override val debugAccessPolicies: Set[AccessPolicy] = DefaultHomeMixerAccessPolicy -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/model/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/model/BUILD.bazel deleted file mode 100644 index b821a8f8d..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/model/BUILD.bazel +++ /dev/null @@ -1,20 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model/request", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/param", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/model/cursor", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/pipeline/candidate/flexible_injection_pipeline", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/pipeline", - "stringcenter/client", - "stringcenter/client/src/main/java", - ], - exports = [ - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/model/cursor", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/pipeline", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/model/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/model/BUILD.docx new file mode 100644 index 000000000..6da0329ba Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/model/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/model/SubscribedQuery.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/model/SubscribedQuery.docx new file mode 100644 index 000000000..011227875 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/model/SubscribedQuery.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/model/SubscribedQuery.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/model/SubscribedQuery.scala deleted file mode 100644 index 2085ef54f..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/model/SubscribedQuery.scala +++ /dev/null @@ -1,31 +0,0 @@ -package com.twitter.home_mixer.product.subscribed.model - -import com.twitter.home_mixer.model.request.DeviceContext -import com.twitter.home_mixer.model.request.HasDeviceContext -import com.twitter.home_mixer.model.request.HasSeenTweetIds -import com.twitter.home_mixer.model.request.SubscribedProduct -import com.twitter.product_mixer.component_library.model.cursor.UrtOrderedCursor -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.model.marshalling.request._ -import com.twitter.product_mixer.core.pipeline.HasPipelineCursor -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.timelines.configapi.Params - -case class SubscribedQuery( - override val params: Params, - override val clientContext: ClientContext, - override val pipelineCursor: Option[UrtOrderedCursor], - override val requestedMaxResults: Option[Int], - override val debugOptions: Option[DebugOptions], - override val features: Option[FeatureMap], - override val deviceContext: Option[DeviceContext], - override val seenTweetIds: Option[Seq[Long]]) - extends PipelineQuery - with HasPipelineCursor[UrtOrderedCursor] - with HasDeviceContext - with HasSeenTweetIds { - override val product: Product = SubscribedProduct - - override def withFeatureMap(features: FeatureMap): SubscribedQuery = - copy(features = Some(features)) -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/param/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/param/BUILD.bazel deleted file mode 100644 index a56e3a1fd..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/param/BUILD.bazel +++ /dev/null @@ -1,14 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/javax/inject:javax.inject", - "configapi/configapi-core/src/main/scala/com/twitter/timelines/configapi", - "home-mixer/server/src/main/scala/com/twitter/home_mixer/param/decider", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/decorator/urt", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/product", - "util/util-core/src/main/scala/com/twitter/conversions", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/param/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/param/BUILD.docx new file mode 100644 index 000000000..57d9420d5 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/param/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/param/SubscribedParam.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/param/SubscribedParam.docx new file mode 100644 index 000000000..4b6818b42 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/param/SubscribedParam.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/param/SubscribedParam.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/param/SubscribedParam.scala deleted file mode 100644 index 9e4ade43a..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/param/SubscribedParam.scala +++ /dev/null @@ -1,15 +0,0 @@ -package com.twitter.home_mixer.product.subscribed.param - -import com.twitter.timelines.configapi.FSBoundedParam - -object SubscribedParam { - val SupportedClientFSName = "subscribed_supported_client" - - object ServerMaxResultsParam - extends FSBoundedParam[Int]( - name = "subscribed_server_max_results", - default = 100, - min = 1, - max = 500 - ) -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/param/SubscribedParamConfig.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/param/SubscribedParamConfig.docx new file mode 100644 index 000000000..9b13f70b2 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/param/SubscribedParamConfig.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/param/SubscribedParamConfig.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/param/SubscribedParamConfig.scala deleted file mode 100644 index 58ce7ec35..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/product/subscribed/param/SubscribedParamConfig.scala +++ /dev/null @@ -1,18 +0,0 @@ -package com.twitter.home_mixer.product.subscribed.param - -import com.twitter.home_mixer.param.decider.DeciderKey -import com.twitter.home_mixer.product.subscribed.param.SubscribedParam._ -import com.twitter.product_mixer.core.product.ProductParamConfig -import com.twitter.servo.decider.DeciderKeyName -import javax.inject.Inject -import javax.inject.Singleton - -@Singleton -class SubscribedParamConfig @Inject() () extends ProductParamConfig { - override val enabledDeciderKey: DeciderKeyName = DeciderKey.EnableSubscribedProduct - override val supportedClientFSName: String = SupportedClientFSName - - override val boundedIntFSOverrides = Seq( - ServerMaxResultsParam - ) -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/BUILD.bazel deleted file mode 100644 index c0211ff72..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/BUILD.bazel +++ /dev/null @@ -1,15 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/javax/inject:javax.inject", - "configapi/configapi-core", - "home-mixer/thrift/src/main/thrift:thrift-scala", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/pipeline/product", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/product", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/product/registry", - "stitch/stitch-core", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/BUILD.docx new file mode 100644 index 000000000..dca80f30e Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/HomeMixerAccessPolicy.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/HomeMixerAccessPolicy.docx new file mode 100644 index 000000000..3e0518778 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/HomeMixerAccessPolicy.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/HomeMixerAccessPolicy.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/HomeMixerAccessPolicy.scala deleted file mode 100644 index 853f4b56a..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/HomeMixerAccessPolicy.scala +++ /dev/null @@ -1,13 +0,0 @@ -package com.twitter.home_mixer.service - -import com.twitter.product_mixer.core.functional_component.common.access_policy.AccessPolicy -import com.twitter.product_mixer.core.functional_component.common.access_policy.AllowedLdapGroups - -object HomeMixerAccessPolicy { - - /** - * Access policies can be configured on a product-by-product basis but you may also want products - * to have a common policy. - */ - val DefaultHomeMixerAccessPolicy: Set[AccessPolicy] = Set(AllowedLdapGroups(Set.empty[String])) -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/HomeMixerAlertConfig.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/HomeMixerAlertConfig.docx new file mode 100644 index 000000000..04b110075 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/HomeMixerAlertConfig.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/HomeMixerAlertConfig.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/HomeMixerAlertConfig.scala deleted file mode 100644 index 597fd4d36..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/HomeMixerAlertConfig.scala +++ /dev/null @@ -1,66 +0,0 @@ -package com.twitter.home_mixer.service - -import com.twitter.conversions.DurationOps._ -import com.twitter.product_mixer.core.functional_component.common.alert.Destination -import com.twitter.product_mixer.core.functional_component.common.alert.EmptyResponseRateAlert -import com.twitter.product_mixer.core.functional_component.common.alert.LatencyAlert -import com.twitter.product_mixer.core.functional_component.common.alert.NotificationGroup -import com.twitter.product_mixer.core.functional_component.common.alert.P99 -import com.twitter.product_mixer.core.functional_component.common.alert.Percentile -import com.twitter.product_mixer.core.functional_component.common.alert.SuccessRateAlert -import com.twitter.product_mixer.core.functional_component.common.alert.predicate.TriggerIfAbove -import com.twitter.product_mixer.core.functional_component.common.alert.predicate.TriggerIfBelow -import com.twitter.product_mixer.core.functional_component.common.alert.predicate.TriggerIfLatencyAbove -import com.twitter.util.Duration - -/** - * Notifications (email, pagerduty, etc) can be specific per-alert but it is common for multiple - * products to share notification configuration. - */ -object HomeMixerAlertConfig { - val DefaultNotificationGroup: NotificationGroup = NotificationGroup( - warn = Destination(emails = Seq("")), - critical = Destination(emails = Seq("")) - ) - - object BusinessHours { - val DefaultNotificationGroup: NotificationGroup = NotificationGroup( - warn = Destination(emails = Seq("")), - critical = Destination(emails = - Seq("")) - ) - - def defaultEmptyResponseRateAlert(warnThreshold: Double = 50, criticalThreshold: Double = 80) = - EmptyResponseRateAlert( - notificationGroup = DefaultNotificationGroup, - warnPredicate = TriggerIfAbove(warnThreshold), - criticalPredicate = TriggerIfAbove(criticalThreshold) - ) - - def defaultSuccessRateAlert( - threshold: Double = 99.5, - warnDatapointsPastThreshold: Int = 20, - criticalDatapointsPastThreshold: Int = 30, - duration: Int = 30 - ) = SuccessRateAlert( - notificationGroup = DefaultNotificationGroup, - warnPredicate = TriggerIfBelow(threshold, warnDatapointsPastThreshold, duration), - criticalPredicate = TriggerIfBelow(threshold, criticalDatapointsPastThreshold, duration), - ) - - def defaultLatencyAlert( - latencyThreshold: Duration = 200.millis, - warningDatapointsPastThreshold: Int = 15, - criticalDatapointsPastThreshold: Int = 30, - duration: Int = 30, - percentile: Percentile = P99 - ): LatencyAlert = LatencyAlert( - notificationGroup = DefaultNotificationGroup, - percentile = percentile, - warnPredicate = - TriggerIfLatencyAbove(latencyThreshold, warningDatapointsPastThreshold, duration), - criticalPredicate = - TriggerIfLatencyAbove(latencyThreshold, criticalDatapointsPastThreshold, duration) - ) - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/ScoredTweetsService.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/ScoredTweetsService.docx new file mode 100644 index 000000000..5194c08e9 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/ScoredTweetsService.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/ScoredTweetsService.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/ScoredTweetsService.scala deleted file mode 100644 index 158e5ee45..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/service/ScoredTweetsService.scala +++ /dev/null @@ -1,24 +0,0 @@ -package com.twitter.home_mixer.service - -import com.twitter.home_mixer.{thriftscala => t} -import com.twitter.product_mixer.core.model.marshalling.request.Request -import com.twitter.product_mixer.core.pipeline.product.ProductPipelineRequest -import com.twitter.product_mixer.core.product.registry.ProductPipelineRegistry -import com.twitter.stitch.Stitch -import com.twitter.timelines.configapi.Params -import javax.inject.Inject -import javax.inject.Singleton -import scala.reflect.runtime.universe._ - -@Singleton -class ScoredTweetsService @Inject() (productPipelineRegistry: ProductPipelineRegistry) { - - def getScoredTweetsResponse[RequestType <: Request]( - request: RequestType, - params: Params - )( - implicit requestTypeTag: TypeTag[RequestType] - ): Stitch[t.ScoredTweetsResponse] = productPipelineRegistry - .getProductPipeline[RequestType, t.ScoredTweetsResponse](request.product) - .process(ProductPipelineRequest(request, params)) -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/store/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/store/BUILD.bazel deleted file mode 100644 index c4855d9e7..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/store/BUILD.bazel +++ /dev/null @@ -1,13 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/com/twitter/bijection:scrooge", - "3rdparty/jvm/com/twitter/storehaus:core", - "src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala", - "stitch/stitch-core", - "storage/clients/manhattan/client/src/main/scala", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/store/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/store/BUILD.docx new file mode 100644 index 000000000..f5ee9965e Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/store/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/store/RealGraphInNetworkScoresStore.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/store/RealGraphInNetworkScoresStore.docx new file mode 100644 index 000000000..e16859128 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/store/RealGraphInNetworkScoresStore.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/store/RealGraphInNetworkScoresStore.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/store/RealGraphInNetworkScoresStore.scala deleted file mode 100644 index ce0b182be..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/store/RealGraphInNetworkScoresStore.scala +++ /dev/null @@ -1,34 +0,0 @@ -package com.twitter.home_mixer.store - -import com.twitter.bijection.Injection -import com.twitter.home_mixer.store.ManhattanRealGraphKVDescriptor._ -import com.twitter.stitch.Stitch -import com.twitter.storage.client.manhattan.bijections.Bijections -import com.twitter.storage.client.manhattan.bijections.Bijections.BinaryScalaInjection -import com.twitter.storage.client.manhattan.kv.ManhattanKVEndpoint -import com.twitter.storage.client.manhattan.kv.impl.ReadOnlyKeyDescriptor -import com.twitter.storage.client.manhattan.kv.impl.ValueDescriptor -import com.twitter.storehaus.ReadableStore -import com.twitter.util.Future -import com.twitter.wtf.candidate.{thriftscala => wtf} - -object ManhattanRealGraphKVDescriptor { - implicit val byteArray2Buf = Bijections.BytesBijection - - val realGraphDatasetName = "real_graph_scores_in" - val keyInjection = Injection.connect[Long, Array[Byte]].andThen(Bijections.BytesInjection) - val keyDesc = ReadOnlyKeyDescriptor(keyInjection) - val valueDesc = ValueDescriptor(BinaryScalaInjection(wtf.CandidateSeq)) - val realGraphDatasetKey = keyDesc.withDataset(realGraphDatasetName) -} - -/** - * Hydrates real graph in network scores for a viewer - */ -class RealGraphInNetworkScoresStore(manhattanKVEndpoint: ManhattanKVEndpoint) - extends ReadableStore[Long, Seq[wtf.Candidate]] { - - override def get(viewerId: Long): Future[Option[Seq[wtf.Candidate]]] = Stitch - .run(manhattanKVEndpoint.get(realGraphDatasetKey.withPkey(viewerId), valueDesc)) - .map(_.map(mhResponse => mhResponse.contents.candidates)) -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/BUILD.bazel deleted file mode 100644 index 8e04fa11f..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/BUILD.bazel +++ /dev/null @@ -1,25 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model", - "home-mixer/thrift/src/main/thrift:thrift-scala", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/feature_hydrator/query/impressed_tweets", - "product-mixer/component-library/src/main/scala/com/twitter/product_mixer/component_library/model/candidate", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/model/common/presentation", - "servo/repo/src/main/scala", - "snowflake/src/main/scala/com/twitter/snowflake/id", - "src/java/com/twitter/ml/api:api-base", - "src/java/com/twitter/ml/api/util", - "src/java/com/twitter/search/common/util/lang", - "src/scala/com/twitter/ml/api/util", - "src/thrift/com/twitter/ml/api:data-java", - "src/thrift/com/twitter/search/common:constants-java", - "src/thrift/com/twitter/search/common:constants-scala", - "src/thrift/com/twitter/service/metastore/gen:thrift-java", - "src/thrift/com/twitter/service/metastore/gen:thrift-scala", - "storage/clients/manhattan/client/src/main/scala", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/BUILD.docx new file mode 100644 index 000000000..cc13d47a8 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/CachedScoredTweetsHelper.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/CachedScoredTweetsHelper.docx new file mode 100644 index 000000000..ce65fe3e3 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/CachedScoredTweetsHelper.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/CachedScoredTweetsHelper.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/CachedScoredTweetsHelper.scala deleted file mode 100644 index e0fbdd76f..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/CachedScoredTweetsHelper.scala +++ /dev/null @@ -1,50 +0,0 @@ -package com.twitter.home_mixer.util - -import com.twitter.home_mixer.model.HomeFeatures.CachedScoredTweetsFeature -import com.twitter.home_mixer.{thriftscala => hmt} -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.model.common.identifier.CandidatePipelineIdentifier -import com.twitter.snowflake.id.SnowflakeId -import com.twitter.util.Time - -object CachedScoredTweetsHelper { - - def tweetImpressionsAndCachedScoredTweets( - features: FeatureMap, - candidatePipelineIdentifier: CandidatePipelineIdentifier - ): Seq[Long] = { - val tweetImpressions = TweetImpressionsHelper.tweetImpressions(features) - val cachedScoredTweets = features - .getOrElse(CachedScoredTweetsFeature, Seq.empty) - .filter { tweet => - tweet.candidatePipelineIdentifier.exists( - CandidatePipelineIdentifier(_).equals(candidatePipelineIdentifier)) - }.map(_.tweetId) - - (tweetImpressions ++ cachedScoredTweets).toSeq - } - - def tweetImpressionsAndCachedScoredTweetsInRange( - features: FeatureMap, - candidatePipelineIdentifier: CandidatePipelineIdentifier, - maxNumImpressions: Int, - sinceTime: Time, - untilTime: Time - ): Seq[Long] = - tweetImpressionsAndCachedScoredTweets(features, candidatePipelineIdentifier) - .filter { tweetId => SnowflakeId.isSnowflakeId(tweetId) } - .filter { tweetId => - val creationTime = SnowflakeId.timeFromId(tweetId) - sinceTime <= creationTime && untilTime >= creationTime - }.take(maxNumImpressions) - - def unseenCachedScoredTweets( - features: FeatureMap - ): Seq[hmt.ScoredTweet] = { - val seenTweetIds = TweetImpressionsHelper.tweetImpressions(features) - - features - .getOrElse(CachedScoredTweetsFeature, Seq.empty) - .filter(tweet => !seenTweetIds.contains(tweet.tweetId)) - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/CandidatesUtil.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/CandidatesUtil.docx new file mode 100644 index 000000000..9d13dc4ab Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/CandidatesUtil.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/CandidatesUtil.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/CandidatesUtil.scala deleted file mode 100644 index 06fe5646c..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/CandidatesUtil.scala +++ /dev/null @@ -1,116 +0,0 @@ -package com.twitter.home_mixer.util - -import com.twitter.home_mixer.model.HomeFeatures.AuthorIdFeature -import com.twitter.home_mixer.model.HomeFeatures.FavoritedByUserIdsFeature -import com.twitter.home_mixer.model.HomeFeatures.HasImageFeature -import com.twitter.home_mixer.model.HomeFeatures.InReplyToTweetIdFeature -import com.twitter.home_mixer.model.HomeFeatures.IsRetweetFeature -import com.twitter.home_mixer.model.HomeFeatures.MediaUnderstandingAnnotationIdsFeature -import com.twitter.home_mixer.model.HomeFeatures.RepliedByEngagerIdsFeature -import com.twitter.home_mixer.model.HomeFeatures.RetweetedByEngagerIdsFeature -import com.twitter.home_mixer.model.HomeFeatures.ScoreFeature -import com.twitter.home_mixer.model.HomeFeatures.SourceTweetIdFeature -import com.twitter.home_mixer.model.HomeFeatures.SourceUserIdFeature -import com.twitter.product_mixer.component_library.model.candidate.CursorCandidate -import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap -import com.twitter.product_mixer.core.model.common.CandidateWithFeatures -import com.twitter.product_mixer.core.model.common.UniversalNoun -import com.twitter.product_mixer.core.model.common.presentation.CandidateWithDetails -import com.twitter.product_mixer.core.model.common.presentation.ItemCandidateWithDetails -import com.twitter.product_mixer.core.model.common.presentation.ModuleCandidateWithDetails -import com.twitter.product_mixer.core.pipeline.PipelineQuery -import com.twitter.product_mixer.core.pipeline.pipeline_failure.PipelineFailure -import com.twitter.product_mixer.core.pipeline.pipeline_failure.UnexpectedCandidateResult -import scala.reflect.ClassTag - -object CandidatesUtil { - def getItemCandidates(candidates: Seq[CandidateWithDetails]): Seq[ItemCandidateWithDetails] = { - candidates.collect { - case item: ItemCandidateWithDetails if !item.isCandidateType[CursorCandidate] => Seq(item) - case module: ModuleCandidateWithDetails => module.candidates - }.flatten - } - - def getItemCandidatesWithOnlyModuleLast( - candidates: Seq[CandidateWithDetails] - ): Seq[ItemCandidateWithDetails] = { - candidates.collect { - case item: ItemCandidateWithDetails if !item.isCandidateType[CursorCandidate] => item - case module: ModuleCandidateWithDetails => module.candidates.last - } - } - - def containsType[CandidateType <: UniversalNoun[_]]( - candidates: Seq[CandidateWithDetails] - )( - implicit tag: ClassTag[CandidateType] - ): Boolean = candidates.exists { - case ItemCandidateWithDetails(_: CandidateType, _, _) => true - case module: ModuleCandidateWithDetails => - module.candidates.head.isCandidateType[CandidateType]() - case _ => false - } - - def getOriginalTweetId(candidate: CandidateWithFeatures[TweetCandidate]): Long = { - if (candidate.features.getOrElse(IsRetweetFeature, false)) - candidate.features.getOrElse(SourceTweetIdFeature, None).getOrElse(candidate.candidate.id) - else - candidate.candidate.id - } - - def getOriginalAuthorId(candidateFeatures: FeatureMap): Option[Long] = - if (candidateFeatures.getOrElse(IsRetweetFeature, false)) - candidateFeatures.getOrElse(SourceUserIdFeature, None) - else candidateFeatures.getOrElse(AuthorIdFeature, None) - - def isOriginalTweet(candidate: CandidateWithFeatures[TweetCandidate]): Boolean = - !candidate.features.getOrElse(IsRetweetFeature, false) && - candidate.features.getOrElse(InReplyToTweetIdFeature, None).isEmpty - - def getEngagerUserIds( - candidateFeatures: FeatureMap - ): Seq[Long] = { - candidateFeatures.getOrElse(FavoritedByUserIdsFeature, Seq.empty) ++ - candidateFeatures.getOrElse(RetweetedByEngagerIdsFeature, Seq.empty) ++ - candidateFeatures.getOrElse(RepliedByEngagerIdsFeature, Seq.empty) - } - - def getMediaUnderstandingAnnotationIds( - candidateFeatures: FeatureMap - ): Seq[Long] = { - if (candidateFeatures.get(HasImageFeature)) - candidateFeatures.getOrElse(MediaUnderstandingAnnotationIdsFeature, Seq.empty) - else Seq.empty - } - - def getTweetIdAndSourceId(candidate: CandidateWithFeatures[TweetCandidate]): Seq[Long] = - Seq(candidate.candidate.id) ++ candidate.features.getOrElse(SourceTweetIdFeature, None) - - def isAuthoredByViewer(query: PipelineQuery, candidateFeatures: FeatureMap): Boolean = - candidateFeatures.getOrElse(AuthorIdFeature, None).contains(query.getRequiredUserId) || - (candidateFeatures.getOrElse(IsRetweetFeature, false) && - candidateFeatures.getOrElse(SourceUserIdFeature, None).contains(query.getRequiredUserId)) - - val reverseChronTweetsOrdering: Ordering[CandidateWithDetails] = - Ordering.by[CandidateWithDetails, Long] { - case ItemCandidateWithDetails(candidate: TweetCandidate, _, _) => -candidate.id - case ModuleCandidateWithDetails(candidates, _, _) if candidates.nonEmpty => - -candidates.last.candidateIdLong - case _ => throw PipelineFailure(UnexpectedCandidateResult, "Invalid candidate type") - } - - val scoreOrdering: Ordering[CandidateWithDetails] = Ordering.by[CandidateWithDetails, Double] { - case ItemCandidateWithDetails(_, _, features) => - -features.getOrElse(ScoreFeature, None).getOrElse(0.0) - case ModuleCandidateWithDetails(candidates, _, _) => - -candidates.last.features.getOrElse(ScoreFeature, None).getOrElse(0.0) - case _ => throw PipelineFailure(UnexpectedCandidateResult, "Invalid candidate type") - } - - val conversationModuleTweetsOrdering: Ordering[CandidateWithDetails] = - Ordering.by[CandidateWithDetails, Long] { - case ItemCandidateWithDetails(candidate: TweetCandidate, _, _) => candidate.id - case _ => throw PipelineFailure(UnexpectedCandidateResult, "Only Item candidate expected") - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/DataRecordUtil.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/DataRecordUtil.docx new file mode 100644 index 000000000..7c6d50722 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/DataRecordUtil.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/DataRecordUtil.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/DataRecordUtil.scala deleted file mode 100644 index b972b8158..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/DataRecordUtil.scala +++ /dev/null @@ -1,31 +0,0 @@ -package com.twitter.home_mixer.util - -import com.twitter.ml.api.DataRecord -import com.twitter.ml.api.FeatureContext -import com.twitter.ml.api.util.SRichDataRecord -import com.twitter.ml.api.Feature -import java.lang.{Double => JDouble} - -object DataRecordUtil { - def applyRename( - dataRecord: DataRecord, - featureContext: FeatureContext, - renamedFeatureContext: FeatureContext, - featureRenamingMap: Map[Feature[_], Feature[_]] - ): DataRecord = { - val richFullDr = new SRichDataRecord(dataRecord, featureContext) - val richNewDr = new SRichDataRecord(new DataRecord, renamedFeatureContext) - val featureIterator = featureContext.iterator() - featureIterator.forEachRemaining { feature => - if (richFullDr.hasFeature(feature)) { - val renamedFeature = featureRenamingMap.getOrElse(feature, feature) - - val typedFeature = feature.asInstanceOf[Feature[JDouble]] - val typedRenamedFeature = renamedFeature.asInstanceOf[Feature[JDouble]] - - richNewDr.setFeatureValue(typedRenamedFeature, richFullDr.getFeatureValue(typedFeature)) - } - } - richNewDr.getRecord - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/InjectionTransformer.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/InjectionTransformer.docx new file mode 100644 index 000000000..db6b93b6b Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/InjectionTransformer.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/InjectionTransformer.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/InjectionTransformer.scala deleted file mode 100644 index f45102f37..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/InjectionTransformer.scala +++ /dev/null @@ -1,43 +0,0 @@ -package com.twitter.home_mixer.util - -import com.twitter.bijection.Injection -import com.twitter.io.Buf -import com.twitter.servo.util.Transformer -import com.twitter.storage.client.manhattan.bijections.Bijections -import com.twitter.util.Return -import com.twitter.util.Try -import java.nio.ByteBuffer - -object InjectionTransformerImplicits { - implicit class ByteArrayInjectionToByteBufferTransformer[A](baInj: Injection[A, Array[Byte]]) { - - private val bbInj: Injection[A, ByteBuffer] = baInj - .andThen(Bijections.byteArray2Buf) - .andThen(Bijections.byteBuffer2Buf.inverse) - - def toByteBufferTransformer(): Transformer[A, ByteBuffer] = new InjectionTransformer(bbInj) - def toByteArrayTransformer(): Transformer[A, Array[Byte]] = new InjectionTransformer(baInj) - } - - implicit class BufInjectionToByteBufferTransformer[A](bufInj: Injection[A, Buf]) { - - private val bbInj: Injection[A, ByteBuffer] = bufInj.andThen(Bijections.byteBuffer2Buf.inverse) - private val baInj: Injection[A, Array[Byte]] = bufInj.andThen(Bijections.byteArray2Buf.inverse) - - def toByteBufferTransformer(): Transformer[A, ByteBuffer] = new InjectionTransformer(bbInj) - def toByteArrayTransformer(): Transformer[A, Array[Byte]] = new InjectionTransformer(baInj) - } - - implicit class ByteBufferInjectionToByteBufferTransformer[A](bbInj: Injection[A, ByteBuffer]) { - - private val baInj: Injection[A, Array[Byte]] = bbInj.andThen(Bijections.bb2ba) - - def toByteBufferTransformer(): Transformer[A, ByteBuffer] = new InjectionTransformer(bbInj) - def toByteArrayTransformer(): Transformer[A, Array[Byte]] = new InjectionTransformer(baInj) - } -} - -class InjectionTransformer[A, B](inj: Injection[A, B]) extends Transformer[A, B] { - override def to(a: A): Try[B] = Return(inj(a)) - override def from(b: B): Try[A] = Try.fromScala(inj.invert(b)) -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/LanguageUtil.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/LanguageUtil.docx new file mode 100644 index 000000000..74f342c80 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/LanguageUtil.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/LanguageUtil.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/LanguageUtil.scala deleted file mode 100644 index 23c77c27f..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/LanguageUtil.scala +++ /dev/null @@ -1,93 +0,0 @@ -package com.twitter.home_mixer.util - -import com.twitter.search.common.constants.{thriftscala => scc} -import com.twitter.search.common.util.lang.ThriftLanguageUtil -import com.twitter.service.metastore.gen.{thriftscala => smg} - -object LanguageUtil { - - private val DefaultMinProducedLanguageRatio = 0.05 - private val DefaultMinConsumedLanguageConfidence = 0.8 - - /** - * Computes a list of languages based on UserLanguages information retrieved from Metastore. - * - * The list is sorted in descending order of confidence score associated with each language. - * That is, language with highest confidence value is in index 0. - */ - def computeLanguages( - userLanguages: smg.UserLanguages, - minProducedLanguageRatio: Double = DefaultMinProducedLanguageRatio, - minConsumedLanguageConfidence: Double = DefaultMinConsumedLanguageConfidence - ): Seq[scc.ThriftLanguage] = { - val languageConfidenceMap = computeLanguageConfidenceMap( - userLanguages, - minProducedLanguageRatio, - minConsumedLanguageConfidence - ) - languageConfidenceMap.toSeq.sortBy(-_._2).map(_._1) // _1 = language, _2 = score - } - - /** - * Computes confidence map based on UserLanguages information retrieved from Metastore. - * where, - * key = language code - * value = level of confidence that the language is applicable to a user. - */ - private def computeLanguageConfidenceMap( - userLanguages: smg.UserLanguages, - minProducedLanguageRatio: Double, - minConsumedLanguageConfidence: Double - ): Map[scc.ThriftLanguage, Double] = { - - val producedLanguages = getLanguageMap(userLanguages.produced) - val consumedLanguages = getLanguageMap(userLanguages.consumed) - val languages = (producedLanguages.keys ++ consumedLanguages.keys).toSet - var maxConfidence = 0.0 - - val confidenceMap = languages.map { language => - val produceRatio = producedLanguages - .get(language) - .map { score => if (score < minProducedLanguageRatio) 0.0 else score } - .getOrElse(0.0) - - val consumeConfidence = consumedLanguages - .get(language) - .map { score => if (score < minConsumedLanguageConfidence) 0.0 else score } - .getOrElse(0.0) - - val overallConfidence = (0.3 + 4 * produceRatio) * (0.1 + consumeConfidence) - maxConfidence = Math.max(maxConfidence, overallConfidence) - - (language -> overallConfidence) - }.toMap - - val normalizedConfidenceMap = if (maxConfidence > 0) { - confidenceMap.map { - case (language, confidenceScore) => - val normalizedScore = (confidenceScore / maxConfidence * 0.9) + 0.1 - (language -> normalizedScore) - } - } else { - confidenceMap - } - normalizedConfidenceMap - } - - private def getLanguageMap( - scoredLanguages: Seq[smg.ScoredString] - ): Map[scc.ThriftLanguage, Double] = { - scoredLanguages.flatMap { scoredLanguage => - getThriftLanguage(scoredLanguage.item).map { language => (language -> scoredLanguage.weight) } - }.toMap - } - - private def getThriftLanguage(languageName: String): Option[scc.ThriftLanguage] = { - val languageOrdinal = ThriftLanguageUtil.getThriftLanguageOf(languageName).ordinal - val language = scc.ThriftLanguage(languageOrdinal) - language match { - case scc.ThriftLanguage.Unknown => None - case _ => Some(language) - } - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/MissingKeyException.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/MissingKeyException.docx new file mode 100644 index 000000000..d6bb57e69 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/MissingKeyException.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/MissingKeyException.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/MissingKeyException.scala deleted file mode 100644 index ae8fd4ded..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/MissingKeyException.scala +++ /dev/null @@ -1,5 +0,0 @@ -package com.twitter.home_mixer.util - -object MissingKeyException extends Exception("Missing key") { - override def toString: String = getMessage -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/ObservedKeyValueResultHandler.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/ObservedKeyValueResultHandler.docx new file mode 100644 index 000000000..f36e9e7e5 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/ObservedKeyValueResultHandler.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/ObservedKeyValueResultHandler.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/ObservedKeyValueResultHandler.scala deleted file mode 100644 index 0cd0cd60b..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/ObservedKeyValueResultHandler.scala +++ /dev/null @@ -1,43 +0,0 @@ -package com.twitter.home_mixer.util - -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.servo.keyvalue.KeyValueResult -import com.twitter.util.Return -import com.twitter.util.Throw -import com.twitter.util.Try - -trait ObservedKeyValueResultHandler { - val statsReceiver: StatsReceiver - val statScope: String - - private lazy val scopedStatsReceiver = statsReceiver.scope(statScope) - private lazy val keyTotalCounter = scopedStatsReceiver.counter("key/total") - private lazy val keyFoundCounter = scopedStatsReceiver.counter("key/found") - private lazy val keyLossCounter = scopedStatsReceiver.counter("key/loss") - private lazy val keyFailureCounter = scopedStatsReceiver.counter("key/failure") - - def observedGet[K, V]( - key: Option[K], - keyValueResult: KeyValueResult[K, V], - ): Try[Option[V]] = { - if (key.nonEmpty) { - keyTotalCounter.incr() - keyValueResult(key.get) match { - case Return(Some(value)) => - keyFoundCounter.incr() - Return(Some(value)) - case Return(None) => - keyLossCounter.incr() - Return(None) - case Throw(exception) => - keyFailureCounter.incr() - Throw(exception) - case _ => - // never reaches here - Return(None) - } - } else { - Throw(MissingKeyException) - } - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/ReplyRetweetUtil.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/ReplyRetweetUtil.docx new file mode 100644 index 000000000..019ac97ba Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/ReplyRetweetUtil.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/ReplyRetweetUtil.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/ReplyRetweetUtil.scala deleted file mode 100644 index 13758ec5d..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/ReplyRetweetUtil.scala +++ /dev/null @@ -1,120 +0,0 @@ -package com.twitter.home_mixer.util - -import com.twitter.home_mixer.model.HomeFeatures._ -import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate -import com.twitter.product_mixer.core.model.common.CandidateWithFeatures - -object ReplyRetweetUtil { - - def isEligibleReply(candidate: CandidateWithFeatures[TweetCandidate]): Boolean = { - candidate.features.getOrElse(InReplyToTweetIdFeature, None).nonEmpty && - !candidate.features.getOrElse(IsRetweetFeature, false) - } - - /** - * Builds a map from reply tweet to all ancestors that are also hydrated candidates. If a reply - * does not have any ancestors which are also candidates, it will not add to the returned Map. - * Make sure ancestors are bottom-up ordered such that: - * (1) if parent tweet is a candidate, it should be the first item at the returned ancestors; - * (2) if root tweet is a candidate, it should be the last item at the returned ancestors. - * Retweets of replies or replies to retweets are not included. - */ - def replyToAncestorTweetCandidatesMap( - candidates: Seq[CandidateWithFeatures[TweetCandidate]] - ): Map[Long, Seq[CandidateWithFeatures[TweetCandidate]]] = { - val replyToAncestorTweetIdsMap: Map[Long, Seq[Long]] = - candidates.flatMap { candidate => - if (isEligibleReply(candidate)) { - val ancestorIds = - if (candidate.features.getOrElse(AncestorsFeature, Seq.empty).nonEmpty) { - candidate.features.getOrElse(AncestorsFeature, Seq.empty).map(_.tweetId) - } else { - Seq( - candidate.features.getOrElse(InReplyToTweetIdFeature, None), - candidate.features.getOrElse(ConversationModuleIdFeature, None) - ).flatten.distinct - } - Some(candidate.candidate.id -> ancestorIds) - } else { - None - } - }.toMap - - val ancestorTweetIds = replyToAncestorTweetIdsMap.values.flatten.toSet - val ancestorTweetsMapById: Map[Long, CandidateWithFeatures[TweetCandidate]] = candidates - .filter { maybeAncestor => - ancestorTweetIds.contains(maybeAncestor.candidate.id) - }.map { ancestor => - ancestor.candidate.id -> ancestor - }.toMap - - replyToAncestorTweetIdsMap - .mapValues { ancestorTweetIds => - ancestorTweetIds.flatMap { ancestorTweetId => - ancestorTweetsMapById.get(ancestorTweetId) - } - }.filter { - case (reply, ancestors) => - ancestors.nonEmpty - } - } - - /** - * This map is the opposite of [[replyToAncestorTweetCandidatesMap]]. - * Builds a map from ancestor tweet to all descendant replies that are also hydrated candidates. - * Currently, we only return two ancestors at most: one is inReplyToTweetId and the other - * is conversationId. - * Retweets of replies are not included. - */ - def ancestorTweetIdToDescendantRepliesMap( - candidates: Seq[CandidateWithFeatures[TweetCandidate]] - ): Map[Long, Seq[CandidateWithFeatures[TweetCandidate]]] = { - val tweetToCandidateMap = candidates.map(c => c.candidate.id -> c).toMap - replyToAncestorTweetCandidatesMap(candidates).toSeq - .flatMap { - case (reply, ancestorTweets) => - ancestorTweets.map { ancestor => - (ancestor.candidate.id, reply) - } - }.groupBy { case (ancestor, reply) => ancestor } - .mapValues { ancestorReplyPairs => - ancestorReplyPairs.map(_._2).distinct - }.mapValues(tweetIds => tweetIds.map(tid => tweetToCandidateMap(tid))) - } - - /** - * Builds a map from reply tweet to inReplyToTweet which is also a candidate. - * Retweets of replies or replies to retweets are not included - */ - def replyTweetIdToInReplyToTweetMap( - candidates: Seq[CandidateWithFeatures[TweetCandidate]] - ): Map[Long, CandidateWithFeatures[TweetCandidate]] = { - val eligibleReplyCandidates = candidates.filter { candidate => - isEligibleReply(candidate) && candidate.features - .getOrElse(InReplyToTweetIdFeature, None) - .nonEmpty - } - - val inReplyToTweetIds = eligibleReplyCandidates - .flatMap(_.features.getOrElse(InReplyToTweetIdFeature, None)) - .toSet - - val inReplyToTweetIdToTweetMap: Map[Long, CandidateWithFeatures[TweetCandidate]] = candidates - .filter { maybeInReplyToTweet => - inReplyToTweetIds.contains(maybeInReplyToTweet.candidate.id) - }.map { inReplyToTweet => - inReplyToTweet.candidate.id -> inReplyToTweet - }.toMap - - eligibleReplyCandidates.flatMap { reply => - val inReplyToTweetId = reply.features.getOrElse(InReplyToTweetIdFeature, None) - if (inReplyToTweetId.nonEmpty) { - inReplyToTweetIdToTweetMap.get(inReplyToTweetId.get).map { inReplyToTweet => - reply.candidate.id -> inReplyToTweet - } - } else { - None - } - }.toMap - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/TensorFlowUtil.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/TensorFlowUtil.docx new file mode 100644 index 000000000..b74c05782 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/TensorFlowUtil.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/TensorFlowUtil.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/TensorFlowUtil.scala deleted file mode 100644 index 05d7c2127..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/TensorFlowUtil.scala +++ /dev/null @@ -1,32 +0,0 @@ -package com.twitter.home_mixer.util - -import com.twitter.ml.api.thriftscala.FloatTensor -import com.twitter.ml.api.util.BufferToIterators.RichFloatBuffer -import java.nio.ByteBuffer -import java.nio.ByteOrder - -/** - * Contains functionality to transform data records and Tensors - */ - -object TensorFlowUtil { - - private def skipEmbeddingBBHeader(bb: ByteBuffer): ByteBuffer = { - val bb_copy = bb.duplicate() - bb_copy.getLong() - bb_copy - } - - private def byteBufferToFloatIterator( - bb: ByteBuffer - ): Iterator[Float] = { - bb.order(ByteOrder.LITTLE_ENDIAN).asFloatBuffer.iterator - } - - def embeddingByteBufferToFloatTensor( - bb: ByteBuffer - ): FloatTensor = { - val bb_content = skipEmbeddingBBHeader(bb) - FloatTensor(byteBufferToFloatIterator(bb_content).map(_.toDouble).toList) - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/TweetImpressionsHelper.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/TweetImpressionsHelper.docx new file mode 100644 index 000000000..ed0a6ffd2 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/TweetImpressionsHelper.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/TweetImpressionsHelper.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/TweetImpressionsHelper.scala deleted file mode 100644 index eabaac9e3..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/TweetImpressionsHelper.scala +++ /dev/null @@ -1,15 +0,0 @@ -package com.twitter.home_mixer.util - -import com.twitter.home_mixer.model.HomeFeatures.TweetImpressionsFeature -import com.twitter.product_mixer.component_library.feature_hydrator.query.impressed_tweets.ImpressedTweets -import com.twitter.product_mixer.core.feature.featuremap.FeatureMap - -object TweetImpressionsHelper { - def tweetImpressions(features: FeatureMap): Set[Long] = { - val manhattanImpressions = - features.getOrElse(TweetImpressionsFeature, Seq.empty).flatMap(_.tweetIds) - val memcacheImpressions = features.getOrElse(ImpressedTweets, Seq.empty) - - (manhattanImpressions ++ memcacheImpressions).toSet - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/BUILD.bazel deleted file mode 100644 index 2b5722179..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/BUILD.bazel +++ /dev/null @@ -1,23 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "home-mixer/thrift/src/main/thrift:thrift-scala", - "src/java/com/twitter/search/common/schema/base", - "src/java/com/twitter/search/common/schema/earlybird", - "src/java/com/twitter/search/common/util/lang", - "src/java/com/twitter/search/queryparser/query:core-query-nodes", - "src/java/com/twitter/search/queryparser/query/search:search-query-nodes", - "src/thrift/com/twitter/search:earlybird-scala", - "src/thrift/com/twitter/search/common:constants-scala", - "src/thrift/com/twitter/search/common:query-scala", - "src/thrift/com/twitter/search/common:ranking-scala", - "timelines/src/main/scala/com/twitter/timelines/clients/relevance_search", - "timelines/src/main/scala/com/twitter/timelines/earlybird/common/options", - "timelines/src/main/scala/com/twitter/timelines/earlybird/common/utils", - "timelines/src/main/scala/com/twitter/timelines/model/types", - "timelines/src/main/scala/com/twitter/timelines/util/stats", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/BUILD.docx new file mode 100644 index 000000000..08da14cef Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/EarlybirdRequestUtil.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/EarlybirdRequestUtil.docx new file mode 100644 index 000000000..d2f67dcd7 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/EarlybirdRequestUtil.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/EarlybirdRequestUtil.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/EarlybirdRequestUtil.scala deleted file mode 100644 index 4c482b251..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/EarlybirdRequestUtil.scala +++ /dev/null @@ -1,182 +0,0 @@ -package com.twitter.home_mixer.util.earlybird - -import com.twitter.conversions.DurationOps._ -import com.twitter.search.common.query.thriftjava.{thriftscala => scq} -import com.twitter.search.common.ranking.{thriftscala => scr} -import com.twitter.search.earlybird.{thriftscala => eb} -import com.twitter.timelines.clients.relevance_search.SearchClient.TweetFeatures -import com.twitter.timelines.clients.relevance_search.SearchClient.TweetTypes -import com.twitter.timelines.clients.relevance_search.SearchQueryBuilder -import com.twitter.timelines.clients.relevance_search.SearchQueryBuilder.QueryWithNamedDisjunctions -import com.twitter.timelines.earlybird.common.options.EarlybirdScoringModelConfig -import com.twitter.timelines.earlybird.common.utils.SearchOperator -import com.twitter.util.Duration - -object EarlybirdRequestUtil { - - val DefaultMaxHitsToProcess = 1000 - val DefaultSearchProcessingTimeout: Duration = 200.milliseconds - val DefaultHydrationMaxNumResultsPerShard = 1000 - val DefaultQueryMaxNumResultsPerShard = 300 - val DefaultHydrationCollectorParams = mkCollectorParams(DefaultHydrationMaxNumResultsPerShard) - - private val queryBuilder = new SearchQueryBuilder - - object EarlybirdScoringModels { - val UnifiedEngagementProd: Seq[EarlybirdScoringModelConfig] = Seq( - EarlybirdScoringModelConfig("timelines_unified_engagement_prod.schema_based", 1.0) - ) - - val UnifiedEngagementRectweet: Seq[EarlybirdScoringModelConfig] = Seq( - EarlybirdScoringModelConfig("timelines_unified_engagement_rectweet.schema_based", 1.0) - ) - } - - private[earlybird] def mkCollectorParams(numResultsToReturn: Int): scq.CollectorParams = { - scq.CollectorParams( - // numResultsToReturn defines how many results each EB shard will return to search root - numResultsToReturn = numResultsToReturn, - // terminationParams.maxHitsToProcess is used for early terminating per shard results fetching. - terminationParams = Some( - scq.CollectorTerminationParams( - maxHitsToProcess = Some(DefaultMaxHitsToProcess), - timeoutMs = DefaultSearchProcessingTimeout.inMilliseconds.toInt - )) - ) - } - - private def getRankingParams( - authorScoreMap: Option[Map[Long, Double]], - tensorflowModel: Option[String], - ebModels: Seq[EarlybirdScoringModelConfig] - ): Option[scr.ThriftRankingParams] = { - if (tensorflowModel.nonEmpty) { - Some( - scr.ThriftRankingParams( - `type` = Some(scr.ThriftScoringFunctionType.TensorflowBased), - selectedTensorflowModel = tensorflowModel, - minScore = -1.0e100, - applyBoosts = false, - authorSpecificScoreAdjustments = authorScoreMap - ) - ) - } else if (ebModels.nonEmpty) { - Some( - scr.ThriftRankingParams( - `type` = Some(scr.ThriftScoringFunctionType.ModelBased), - selectedModels = Some(ebModels.map(m => m.name -> m.weight).toMap), - applyBoosts = false, - minScore = -1.0e100, - authorSpecificScoreAdjustments = authorScoreMap - ) - ) - } else None - } - - def getTweetsRequest( - userId: Option[Long], - clientId: Option[String], - skipVeryRecentTweets: Boolean, - followedUserIds: Set[Long], - retweetsMutedUserIds: Set[Long], - beforeTweetIdExclusive: Option[Long], - afterTweetIdExclusive: Option[Long], - excludedTweetIds: Option[Set[Long]] = None, - maxCount: Int, - tweetTypes: TweetTypes.ValueSet, - authorScoreMap: Option[Map[Long, Double]] = None, - tensorflowModel: Option[String] = None, - ebModels: Seq[EarlybirdScoringModelConfig] = Seq.empty, - queryMaxNumResultsPerShard: Int = DefaultQueryMaxNumResultsPerShard - ): eb.EarlybirdRequest = { - - val QueryWithNamedDisjunctions(query, namedDisjunctionMap) = queryBuilder.create( - followedUserIds, - retweetsMutedUserIds, - beforeTweetIdExclusive, - afterTweetIdExclusive, - semanticCoreIds = None, - languages = None, - tweetTypes = tweetTypes, - searchOperator = SearchOperator.Exclude, - tweetFeatures = TweetFeatures.All, - excludedTweetIds = excludedTweetIds.getOrElse(Set.empty), - enableExcludeSourceTweetIdsQuery = false - ) - val ebRankingParams = getRankingParams(authorScoreMap, tensorflowModel, ebModels) - val relOptions = RelevanceSearchUtil.RelevanceOptions.copy( - rankingParams = ebRankingParams - ) - - val followedUserIdsSeq = followedUserIds.toSeq - val namedDisjunctionMapOpt = - if (namedDisjunctionMap.isEmpty) None - else Some(namedDisjunctionMap.mapValues(_.toSeq)) - - val thriftQuery = eb.ThriftSearchQuery( - serializedQuery = Some(query.serialize), - fromUserIDFilter64 = Some(followedUserIdsSeq), - numResults = maxCount, - collectConversationId = true, - rankingMode = eb.ThriftSearchRankingMode.Relevance, - relevanceOptions = Some(relOptions), - collectorParams = Some(mkCollectorParams(queryMaxNumResultsPerShard)), - facetFieldNames = Some(RelevanceSearchUtil.FacetsToFetch), - resultMetadataOptions = Some(RelevanceSearchUtil.MetadataOptions), - searcherId = userId, - searchStatusIds = None, - namedDisjunctionMap = namedDisjunctionMapOpt - ) - - eb.EarlybirdRequest( - searchQuery = thriftQuery, - clientId = clientId, - getOlderResults = Some(false), - followedUserIds = Some(followedUserIdsSeq), - getProtectedTweetsOnly = Some(false), - timeoutMs = DefaultSearchProcessingTimeout.inMilliseconds.toInt, - skipVeryRecentTweets = skipVeryRecentTweets, - numResultsToReturnAtRoot = Some(maxCount) - ) - } - - def getTweetsFeaturesRequest( - userId: Option[Long], - tweetIds: Option[Seq[Long]], - clientId: Option[String], - getOnlyProtectedTweets: Boolean = false, - authorScoreMap: Option[Map[Long, Double]] = None, - tensorflowModel: Option[String] = None, - ebModels: Seq[EarlybirdScoringModelConfig] = Seq.empty - ): eb.EarlybirdRequest = { - - val candidateSize = tweetIds.getOrElse(Seq.empty).size - val ebRankingParams = getRankingParams(authorScoreMap, tensorflowModel, ebModels) - val relOptions = RelevanceSearchUtil.RelevanceOptions.copy( - rankingParams = ebRankingParams - ) - val thriftQuery = eb.ThriftSearchQuery( - numResults = candidateSize, - collectConversationId = true, - rankingMode = eb.ThriftSearchRankingMode.Relevance, - relevanceOptions = Some(relOptions), - collectorParams = Some(DefaultHydrationCollectorParams), - facetFieldNames = Some(RelevanceSearchUtil.FacetsToFetch), - resultMetadataOptions = Some(RelevanceSearchUtil.MetadataOptions), - searcherId = userId, - searchStatusIds = tweetIds.map(_.toSet), - ) - - eb.EarlybirdRequest( - searchQuery = thriftQuery, - clientId = clientId, - getOlderResults = Some(false), - getProtectedTweetsOnly = Some(getOnlyProtectedTweets), - timeoutMs = DefaultSearchProcessingTimeout.inMilliseconds.toInt, - skipVeryRecentTweets = true, - // This param decides # of tweets to return from search superRoot and realtime/protected/Archive roots. - // It takes higher precedence than ThriftSearchQuery.numResults - numResultsToReturnAtRoot = Some(candidateSize) - ) - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/EarlybirdResponseUtil.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/EarlybirdResponseUtil.docx new file mode 100644 index 000000000..edff8ebff Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/EarlybirdResponseUtil.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/EarlybirdResponseUtil.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/EarlybirdResponseUtil.scala deleted file mode 100644 index 06e0dd708..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/EarlybirdResponseUtil.scala +++ /dev/null @@ -1,411 +0,0 @@ -package com.twitter.home_mixer.util.earlybird - -import com.twitter.search.common.constants.{thriftscala => scc} -import com.twitter.search.common.features.{thriftscala => sc} -import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant -import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant._ -import com.twitter.search.common.util.lang.ThriftLanguageUtil -import com.twitter.search.earlybird.{thriftscala => eb} -import com.twitter.timelines.earlybird.common.utils.InNetworkEngagement - -object EarlybirdResponseUtil { - - private[earlybird] val Mentions: String = "mentions" - private[earlybird] val Hashtags: String = "hashtags" - private val CharsToRemoveFromMentions: Set[Char] = "@".toSet - private val CharsToRemoveFromHashtags: Set[Char] = "#".toSet - - // Default value of settings of ThriftTweetFeatures. - private[earlybird] val DefaultEarlybirdFeatures: sc.ThriftTweetFeatures = sc.ThriftTweetFeatures() - private[earlybird] val DefaultCount = 0 - private[earlybird] val DefaultLanguage = 0 - private[earlybird] val DefaultScore = 0.0 - - private[earlybird] def getTweetCountByAuthorId( - searchResults: Seq[eb.ThriftSearchResult] - ): Map[Long, Int] = { - searchResults - .groupBy { result => - result.metadata.map(_.fromUserId).getOrElse(0L) - }.mapValues(_.size).withDefaultValue(0) - } - - private[earlybird] def getLanguage(uiLanguageCode: Option[String]): Option[scc.ThriftLanguage] = { - uiLanguageCode.flatMap { languageCode => - scc.ThriftLanguage.get(ThriftLanguageUtil.getThriftLanguageOf(languageCode).getValue) - } - } - - private def getMentions(result: eb.ThriftSearchResult): Seq[String] = { - val facetLabels = result.metadata.flatMap(_.facetLabels).getOrElse(Seq.empty) - getFacets(facetLabels, Mentions, CharsToRemoveFromMentions) - } - - private def getHashtags(result: eb.ThriftSearchResult): Seq[String] = { - val facetLabels = result.metadata.flatMap(_.facetLabels).getOrElse(Seq.empty) - getFacets(facetLabels, Hashtags, CharsToRemoveFromHashtags) - } - - private def getFacets( - facetLabels: Seq[eb.ThriftFacetLabel], - facetName: String, - charsToRemove: Set[Char] - ): Seq[String] = { - facetLabels.filter(_.fieldName == facetName).map(_.label.filterNot(charsToRemove)) - } - - private def isUserMentioned( - screenName: Option[String], - mentions: Seq[String], - mentionsInSourceTweet: Seq[String] - ): Boolean = - isUserMentioned(screenName, mentions) || isUserMentioned(screenName, mentionsInSourceTweet) - - private def isUserMentioned( - screenName: Option[String], - mentions: Seq[String] - ): Boolean = { - screenName - .exists { screenName => mentions.exists(_.equalsIgnoreCase(screenName)) } - } - - private[earlybird] def isUsersMainLanguage( - tweetLanguage: scc.ThriftLanguage, - userLanguages: Seq[scc.ThriftLanguage] - ): Boolean = { - (tweetLanguage != scc.ThriftLanguage.Unknown) && userLanguages.headOption.contains( - tweetLanguage) - } - - private[earlybird] def isUsersLanguage( - tweetLanguage: scc.ThriftLanguage, - userLanguages: Seq[scc.ThriftLanguage] - ): Boolean = { - (tweetLanguage != scc.ThriftLanguage.Unknown) && userLanguages.contains(tweetLanguage) - } - - private[earlybird] def isUILanguage( - tweetLanguage: scc.ThriftLanguage, - uiLanguage: Option[scc.ThriftLanguage] - ): Boolean = { - (tweetLanguage != scc.ThriftLanguage.Unknown) && uiLanguage.contains(tweetLanguage) - } - - private def getBooleanOptFeature( - featureName: EarlybirdFieldConstant, - resultMapOpt: Option[scala.collection.Map[Int, Boolean]], - defaultValue: Boolean = false, - ): Option[Boolean] = { - resultMapOpt.map { - _.getOrElse(featureName.getFieldId, defaultValue) - } - } - - private def getDoubleAsIntOptFeature( - featureName: EarlybirdFieldConstant, - resultMapOpt: Option[scala.collection.Map[Int, Double]] - ): Option[Int] = { - if (resultMapOpt.exists(_.contains(featureName.getFieldId))) - resultMapOpt - .map { - _.get(featureName.getFieldId) - } - .flatMap { doubleValue => - doubleValue.map(_.toInt) - } - else - None - } - - private def getIntOptFeature( - featureName: EarlybirdFieldConstant, - resultMapOpt: Option[scala.collection.Map[Int, Int]] - ): Option[Int] = { - if (resultMapOpt.exists(_.contains(featureName.getFieldId))) - resultMapOpt.flatMap { - _.get(featureName.getFieldId) - } - else - None - } - - def getTweetThriftFeaturesByTweetId( - searcherUserId: Long, - screenName: Option[String], - userLanguages: Seq[scc.ThriftLanguage], - uiLanguageCode: Option[String] = None, - followedUserIds: Set[Long], - mutuallyFollowingUserIds: Set[Long], - searchResults: Seq[eb.ThriftSearchResult], - sourceTweetSearchResults: Seq[eb.ThriftSearchResult], - ): Map[Long, sc.ThriftTweetFeatures] = { - - val allSearchResults = searchResults ++ sourceTweetSearchResults - val sourceTweetSearchResultById = - sourceTweetSearchResults.map(result => (result.id -> result)).toMap - val inNetworkEngagement = - InNetworkEngagement(followedUserIds.toSeq, mutuallyFollowingUserIds, allSearchResults) - searchResults.map { searchResult => - val features = getThriftTweetFeaturesFromSearchResult( - searcherUserId, - screenName, - userLanguages, - getLanguage(uiLanguageCode), - getTweetCountByAuthorId(searchResults), - followedUserIds, - mutuallyFollowingUserIds, - sourceTweetSearchResultById, - inNetworkEngagement, - searchResult - ) - (searchResult.id -> features) - }.toMap - } - - private[earlybird] def getThriftTweetFeaturesFromSearchResult( - searcherUserId: Long, - screenName: Option[String], - userLanguages: Seq[scc.ThriftLanguage], - uiLanguage: Option[scc.ThriftLanguage], - tweetCountByAuthorId: Map[Long, Int], - followedUserIds: Set[Long], - mutuallyFollowingUserIds: Set[Long], - sourceTweetSearchResultById: Map[Long, eb.ThriftSearchResult], - inNetworkEngagement: InNetworkEngagement, - searchResult: eb.ThriftSearchResult, - ): sc.ThriftTweetFeatures = { - val applyFeatures = (applyUserIndependentFeatures( - searchResult - )(_)).andThen( - applyUserDependentFeatures( - searcherUserId, - screenName, - userLanguages, - uiLanguage, - tweetCountByAuthorId, - followedUserIds, - mutuallyFollowingUserIds, - sourceTweetSearchResultById, - inNetworkEngagement, - searchResult - )(_) - ) - val tweetFeatures = searchResult.tweetFeatures.getOrElse(DefaultEarlybirdFeatures) - applyFeatures(tweetFeatures) - } - - private[earlybird] def applyUserIndependentFeatures( - result: eb.ThriftSearchResult - )( - thriftTweetFeatures: sc.ThriftTweetFeatures - ): sc.ThriftTweetFeatures = { - - val features = result.metadata - .map { metadata => - val isRetweet = metadata.isRetweet.getOrElse(false) - val isReply = metadata.isReply.getOrElse(false) - - // Facets. - val mentions = getMentions(result) - val hashtags = getHashtags(result) - - val searchResultSchemaFeatures = metadata.extraMetadata.flatMap(_.features) - val booleanSearchResultSchemaFeatures = searchResultSchemaFeatures.flatMap(_.boolValues) - val intSearchResultSchemaFeatures = searchResultSchemaFeatures.flatMap(_.intValues) - val doubleSearchResultSchemaFeatures = searchResultSchemaFeatures.flatMap(_.doubleValues) - - thriftTweetFeatures.copy( - // Info about the Tweet. - isRetweet = isRetweet, - isOffensive = metadata.isOffensive.getOrElse(false), - isReply = isReply, - fromVerifiedAccount = metadata.fromVerifiedAccount.getOrElse(false), - cardType = metadata.cardType, - signature = metadata.signature, - language = metadata.language, - isAuthorNSFW = metadata.isUserNSFW.getOrElse(false), - isAuthorBot = metadata.isUserBot.getOrElse(false), - isAuthorSpam = metadata.isUserSpam.getOrElse(false), - isSensitiveContent = - metadata.extraMetadata.flatMap(_.isSensitiveContent).getOrElse(false), - isAuthorProfileEgg = metadata.extraMetadata.flatMap(_.profileIsEggFlag).getOrElse(false), - isAuthorNew = metadata.extraMetadata.flatMap(_.isUserNewFlag).getOrElse(false), - linkLanguage = metadata.extraMetadata.flatMap(_.linkLanguage).getOrElse(DefaultLanguage), - // Info about Tweet content/media. - hasCard = metadata.hasCard.getOrElse(false), - hasImage = metadata.hasImage.getOrElse(false), - hasNews = metadata.hasNews.getOrElse(false), - hasVideo = metadata.hasVideo.getOrElse(false), - hasConsumerVideo = metadata.hasConsumerVideo.getOrElse(false), - hasProVideo = metadata.hasProVideo.getOrElse(false), - hasVine = metadata.hasVine.getOrElse(false), - hasPeriscope = metadata.hasPeriscope.getOrElse(false), - hasNativeVideo = metadata.hasNativeVideo.getOrElse(false), - hasNativeImage = metadata.hasNativeImage.getOrElse(false), - hasLink = metadata.hasLink.getOrElse(false), - hasVisibleLink = metadata.hasVisibleLink.getOrElse(false), - hasTrend = metadata.hasTrend.getOrElse(false), - hasMultipleHashtagsOrTrends = metadata.hasMultipleHashtagsOrTrends.getOrElse(false), - hasQuote = metadata.extraMetadata.flatMap(_.hasQuote), - urlsList = metadata.tweetUrls.map { - _.map(_.originalUrl) - }, - hasMultipleMedia = - metadata.extraMetadata.flatMap(_.hasMultipleMediaFlag).getOrElse(false), - visibleTokenRatio = getIntOptFeature(VISIBLE_TOKEN_RATIO, intSearchResultSchemaFeatures), - // Various counts. - favCount = metadata.favCount.getOrElse(DefaultCount), - replyCount = metadata.replyCount.getOrElse(DefaultCount), - retweetCount = metadata.retweetCount.getOrElse(DefaultCount), - quoteCount = metadata.extraMetadata.flatMap(_.quotedCount), - embedsImpressionCount = metadata.embedsImpressionCount.getOrElse(DefaultCount), - embedsUrlCount = metadata.embedsUrlCount.getOrElse(DefaultCount), - videoViewCount = metadata.videoViewCount.getOrElse(DefaultCount), - numMentions = metadata.extraMetadata.flatMap(_.numMentions).getOrElse(DefaultCount), - numHashtags = metadata.extraMetadata.flatMap(_.numHashtags).getOrElse(DefaultCount), - favCountV2 = metadata.extraMetadata.flatMap(_.favCountV2), - replyCountV2 = metadata.extraMetadata.flatMap(_.replyCountV2), - retweetCountV2 = metadata.extraMetadata.flatMap(_.retweetCountV2), - weightedFavoriteCount = metadata.extraMetadata.flatMap(_.weightedFavCount), - weightedReplyCount = metadata.extraMetadata.flatMap(_.weightedReplyCount), - weightedRetweetCount = metadata.extraMetadata.flatMap(_.weightedRetweetCount), - weightedQuoteCount = metadata.extraMetadata.flatMap(_.weightedQuoteCount), - embedsImpressionCountV2 = - getDoubleAsIntOptFeature(EMBEDS_IMPRESSION_COUNT_V2, doubleSearchResultSchemaFeatures), - embedsUrlCountV2 = - getDoubleAsIntOptFeature(EMBEDS_URL_COUNT_V2, doubleSearchResultSchemaFeatures), - decayedFavoriteCount = - getDoubleAsIntOptFeature(DECAYED_FAVORITE_COUNT, doubleSearchResultSchemaFeatures), - decayedRetweetCount = - getDoubleAsIntOptFeature(DECAYED_RETWEET_COUNT, doubleSearchResultSchemaFeatures), - decayedReplyCount = - getDoubleAsIntOptFeature(DECAYED_REPLY_COUNT, doubleSearchResultSchemaFeatures), - decayedQuoteCount = - getDoubleAsIntOptFeature(DECAYED_QUOTE_COUNT, doubleSearchResultSchemaFeatures), - fakeFavoriteCount = - getDoubleAsIntOptFeature(FAKE_FAVORITE_COUNT, doubleSearchResultSchemaFeatures), - fakeRetweetCount = - getDoubleAsIntOptFeature(FAKE_RETWEET_COUNT, doubleSearchResultSchemaFeatures), - fakeReplyCount = - getDoubleAsIntOptFeature(FAKE_REPLY_COUNT, doubleSearchResultSchemaFeatures), - fakeQuoteCount = - getDoubleAsIntOptFeature(FAKE_QUOTE_COUNT, doubleSearchResultSchemaFeatures), - // Scores. - textScore = metadata.textScore.getOrElse(DefaultScore), - earlybirdScore = metadata.score.getOrElse(DefaultScore), - parusScore = metadata.parusScore.getOrElse(DefaultScore), - userRep = metadata.userRep.getOrElse(DefaultScore), - pBlockScore = metadata.extraMetadata.flatMap(_.pBlockScore), - toxicityScore = metadata.extraMetadata.flatMap(_.toxicityScore), - pSpammyTweetScore = metadata.extraMetadata.flatMap(_.pSpammyTweetScore), - pReportedTweetScore = metadata.extraMetadata.flatMap(_.pReportedTweetScore), - pSpammyTweetContent = metadata.extraMetadata.flatMap(_.spammyTweetContentScore), - // Safety Signals - labelAbusiveFlag = - getBooleanOptFeature(LABEL_ABUSIVE_FLAG, booleanSearchResultSchemaFeatures), - labelAbusiveHiRclFlag = - getBooleanOptFeature(LABEL_ABUSIVE_HI_RCL_FLAG, booleanSearchResultSchemaFeatures), - labelDupContentFlag = - getBooleanOptFeature(LABEL_DUP_CONTENT_FLAG, booleanSearchResultSchemaFeatures), - labelNsfwHiPrcFlag = - getBooleanOptFeature(LABEL_NSFW_HI_PRC_FLAG, booleanSearchResultSchemaFeatures), - labelNsfwHiRclFlag = - getBooleanOptFeature(LABEL_NSFW_HI_RCL_FLAG, booleanSearchResultSchemaFeatures), - labelSpamFlag = getBooleanOptFeature(LABEL_SPAM_FLAG, booleanSearchResultSchemaFeatures), - labelSpamHiRclFlag = - getBooleanOptFeature(LABEL_SPAM_HI_RCL_FLAG, booleanSearchResultSchemaFeatures), - // Periscope Features - periscopeExists = - getBooleanOptFeature(PERISCOPE_EXISTS, booleanSearchResultSchemaFeatures), - periscopeHasBeenFeatured = - getBooleanOptFeature(PERISCOPE_HAS_BEEN_FEATURED, booleanSearchResultSchemaFeatures), - periscopeIsCurrentlyFeatured = getBooleanOptFeature( - PERISCOPE_IS_CURRENTLY_FEATURED, - booleanSearchResultSchemaFeatures), - periscopeIsFromQualitySource = getBooleanOptFeature( - PERISCOPE_IS_FROM_QUALITY_SOURCE, - booleanSearchResultSchemaFeatures), - periscopeIsLive = - getBooleanOptFeature(PERISCOPE_IS_LIVE, booleanSearchResultSchemaFeatures), - // Last Engagement Features - lastFavSinceCreationHrs = - getIntOptFeature(LAST_FAVORITE_SINCE_CREATION_HRS, intSearchResultSchemaFeatures), - lastRetweetSinceCreationHrs = - getIntOptFeature(LAST_RETWEET_SINCE_CREATION_HRS, intSearchResultSchemaFeatures), - lastReplySinceCreationHrs = - getIntOptFeature(LAST_REPLY_SINCE_CREATION_HRS, intSearchResultSchemaFeatures), - lastQuoteSinceCreationHrs = - getIntOptFeature(LAST_QUOTE_SINCE_CREATION_HRS, intSearchResultSchemaFeatures), - likedByUserIds = metadata.extraMetadata.flatMap(_.likedByUserIds), - mentionsList = if (mentions.nonEmpty) Some(mentions) else None, - hashtagsList = if (hashtags.nonEmpty) Some(hashtags) else None, - isComposerSourceCamera = - getBooleanOptFeature(COMPOSER_SOURCE_IS_CAMERA_FLAG, booleanSearchResultSchemaFeatures), - ) - } - .getOrElse(thriftTweetFeatures) - - features - } - - private def applyUserDependentFeatures( - searcherUserId: Long, - screenName: Option[String], - userLanguages: Seq[scc.ThriftLanguage], - uiLanguage: Option[scc.ThriftLanguage], - tweetCountByAuthorId: Map[Long, Int], - followedUserIds: Set[Long], - mutuallyFollowingUserIds: Set[Long], - sourceTweetSearchResultById: Map[Long, eb.ThriftSearchResult], - inNetworkEngagement: InNetworkEngagement, - result: eb.ThriftSearchResult - )( - thriftTweetFeatures: sc.ThriftTweetFeatures - ): sc.ThriftTweetFeatures = { - result.metadata - .map { metadata => - val isRetweet = metadata.isRetweet.getOrElse(false) - val sourceTweet = - if (isRetweet) sourceTweetSearchResultById.get(metadata.sharedStatusId) - else None - val mentionsInSourceTweet = sourceTweet.map(getMentions).getOrElse(Seq.empty) - - val isReply = metadata.isReply.getOrElse(false) - val replyToSearcher = isReply && (metadata.referencedTweetAuthorId == searcherUserId) - val replyOther = isReply && !replyToSearcher - val retweetOther = isRetweet && (metadata.referencedTweetAuthorId != searcherUserId) - val tweetLanguage = metadata.language.getOrElse(scc.ThriftLanguage.Unknown) - - val referencedTweetAuthorId = - if (metadata.referencedTweetAuthorId > 0) Some(metadata.referencedTweetAuthorId) else None - val inReplyToUserId = if (!isRetweet) referencedTweetAuthorId else None - - thriftTweetFeatures.copy( - // Info about the Tweet. - fromSearcher = metadata.fromUserId == searcherUserId, - probablyFromFollowedAuthor = followedUserIds.contains(metadata.fromUserId), - fromMutualFollow = mutuallyFollowingUserIds.contains(metadata.fromUserId), - replySearcher = replyToSearcher, - replyOther = replyOther, - retweetOther = retweetOther, - mentionSearcher = isUserMentioned(screenName, getMentions(result), mentionsInSourceTweet), - // Info about Tweet content/media. - matchesSearcherMainLang = isUsersMainLanguage(tweetLanguage, userLanguages), - matchesSearcherLangs = isUsersLanguage(tweetLanguage, userLanguages), - matchesUILang = isUILanguage(tweetLanguage, uiLanguage), - // Various counts. - prevUserTweetEngagement = - metadata.extraMetadata.flatMap(_.prevUserTweetEngagement).getOrElse(DefaultCount), - tweetCountFromUserInSnapshot = tweetCountByAuthorId(metadata.fromUserId), - bidirectionalReplyCount = inNetworkEngagement.biDirectionalReplyCounts(result.id), - unidirectionalReplyCount = inNetworkEngagement.uniDirectionalReplyCounts(result.id), - bidirectionalRetweetCount = inNetworkEngagement.biDirectionalRetweetCounts(result.id), - unidirectionalRetweetCount = inNetworkEngagement.uniDirectionalRetweetCounts(result.id), - conversationCount = inNetworkEngagement.descendantReplyCounts(result.id), - directedAtUserIdIsInFirstDegree = - if (isReply) inReplyToUserId.map(followedUserIds.contains) else None, - ) - } - .getOrElse(thriftTweetFeatures) - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/RelevanceSearchUtil.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/RelevanceSearchUtil.docx new file mode 100644 index 000000000..3450b2642 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/RelevanceSearchUtil.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/RelevanceSearchUtil.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/RelevanceSearchUtil.scala deleted file mode 100644 index 40b727141..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/earlybird/RelevanceSearchUtil.scala +++ /dev/null @@ -1,38 +0,0 @@ -package com.twitter.home_mixer.util.earlybird - -import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant -import com.twitter.search.earlybird.{thriftscala => eb} - -object RelevanceSearchUtil { - - val Mentions: String = EarlybirdFieldConstant.MENTIONS_FACET - val Hashtags: String = EarlybirdFieldConstant.HASHTAGS_FACET - val FacetsToFetch: Seq[String] = Seq(Mentions, Hashtags) - - val MetadataOptions: eb.ThriftSearchResultMetadataOptions = { - eb.ThriftSearchResultMetadataOptions( - getTweetUrls = true, - getResultLocation = false, - getLuceneScore = false, - getInReplyToStatusId = true, - getReferencedTweetAuthorId = true, - getMediaBits = true, - getAllFeatures = true, - returnSearchResultFeatures = true, - // Set getExclusiveConversationAuthorId in order to retrieve Exclusive / SuperFollow tweets. - getExclusiveConversationAuthorId = true - ) - } - - val RelevanceOptions: eb.ThriftSearchRelevanceOptions = { - eb.ThriftSearchRelevanceOptions( - proximityScoring = true, - maxConsecutiveSameUser = Some(2), - rankingParams = None, - maxHitsToProcess = Some(500), - maxUserBlendCount = Some(3), - proximityPhraseWeight = 9.0, - returnAllResults = Some(true) - ) - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/BUILD.bazel deleted file mode 100644 index dbfee21f4..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/BUILD.bazel +++ /dev/null @@ -1,10 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model", - "home-mixer/thrift/src/main/thrift:thrift-scala", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/BUILD.docx new file mode 100644 index 000000000..6ba582362 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/RequestFields.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/RequestFields.docx new file mode 100644 index 000000000..7b6dc878c Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/RequestFields.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/RequestFields.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/RequestFields.scala deleted file mode 100644 index e20aead94..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/RequestFields.scala +++ /dev/null @@ -1,58 +0,0 @@ -package com.twitter.home_mixer.util.tweetypie - -import com.twitter.tweetypie.{thriftscala => tp} - -object RequestFields { - - val CoreTweetFields: Set[tp.TweetInclude] = Set[tp.TweetInclude]( - tp.TweetInclude.TweetFieldId(tp.Tweet.IdField.id), - tp.TweetInclude.TweetFieldId(tp.Tweet.CoreDataField.id) - ) - val MediaFields: Set[tp.TweetInclude] = Set[tp.TweetInclude]( - tp.TweetInclude.TweetFieldId(tp.Tweet.MediaField.id), - ) - val SelfThreadFields: Set[tp.TweetInclude] = Set[tp.TweetInclude]( - tp.TweetInclude.TweetFieldId(tp.Tweet.SelfThreadMetadataField.id) - ) - val MentionsTweetFields: Set[tp.TweetInclude] = Set[tp.TweetInclude]( - tp.TweetInclude.TweetFieldId(tp.Tweet.MentionsField.id) - ) - val SemanticAnnotationTweetFields: Set[tp.TweetInclude] = Set[tp.TweetInclude]( - tp.TweetInclude.TweetFieldId(tp.Tweet.EscherbirdEntityAnnotationsField.id) - ) - val NsfwLabelFields: Set[tp.TweetInclude] = Set[tp.TweetInclude]( - // Tweet fields containing NSFW related attributes. - tp.TweetInclude.TweetFieldId(tp.Tweet.NsfwHighRecallLabelField.id), - tp.TweetInclude.TweetFieldId(tp.Tweet.NsfwHighPrecisionLabelField.id), - tp.TweetInclude.TweetFieldId(tp.Tweet.NsfaHighRecallLabelField.id) - ) - val SafetyLabelFields: Set[tp.TweetInclude] = Set[tp.TweetInclude]( - // Tweet fields containing RTF labels for abuse and spam. - tp.TweetInclude.TweetFieldId(tp.Tweet.SpamLabelField.id), - tp.TweetInclude.TweetFieldId(tp.Tweet.AbusiveLabelField.id) - ) - val ConversationControlField: Set[tp.TweetInclude] = - Set[tp.TweetInclude](tp.TweetInclude.TweetFieldId(tp.Tweet.ConversationControlField.id)) - - val TweetTPHydrationFields: Set[tp.TweetInclude] = CoreTweetFields ++ - NsfwLabelFields ++ - SafetyLabelFields ++ - SemanticAnnotationTweetFields ++ - Set( - tp.TweetInclude.TweetFieldId(tp.Tweet.TakedownCountryCodesField.id), - // QTs imply a TweetyPie -> SGS request dependency - tp.TweetInclude.TweetFieldId(tp.Tweet.QuotedTweetField.id), - tp.TweetInclude.TweetFieldId(tp.Tweet.CommunitiesField.id), - // Field required for determining if a Tweet was created via News Camera. - tp.TweetInclude.TweetFieldId(tp.Tweet.ComposerSourceField.id), - tp.TweetInclude.TweetFieldId(tp.Tweet.LanguageField.id) - ) - - val TweetStaticEntitiesFields: Set[tp.TweetInclude] = - MentionsTweetFields ++ CoreTweetFields ++ SemanticAnnotationTweetFields ++ MediaFields - - val ContentFields: Set[tp.TweetInclude] = CoreTweetFields ++ MediaFields ++ SelfThreadFields ++ - ConversationControlField ++ SemanticAnnotationTweetFields ++ - Set[tp.TweetInclude]( - tp.TweetInclude.MediaEntityFieldId(tp.MediaEntity.AdditionalMetadataField.id)) -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/BUILD.bazel b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/BUILD.bazel deleted file mode 100644 index 5acfd98e0..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/BUILD.bazel +++ /dev/null @@ -1,19 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "home-mixer/server/src/main/scala/com/twitter/home_mixer/model", - "home-mixer/thrift/src/main/thrift:thrift-scala", - "src/java/com/twitter/common/text/tagger", - "src/java/com/twitter/common/text/token", - "src/java/com/twitter/common_internal/text", - "src/java/com/twitter/common_internal/text/version", - "src/java/com/twitter/search/common/util/text", - "src/thrift/com/twitter/search/common:features-scala", - "src/thrift/com/twitter/tweetypie:media-entity-scala", - "src/thrift/com/twitter/tweetypie:service-scala", - "src/thrift/com/twitter/tweetypie:tweet-scala", - ], -) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/BUILD.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/BUILD.docx new file mode 100644 index 000000000..4e44f7b10 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/BUILD.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/FeatureExtractionHelper.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/FeatureExtractionHelper.docx new file mode 100644 index 000000000..72ff7f673 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/FeatureExtractionHelper.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/FeatureExtractionHelper.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/FeatureExtractionHelper.scala deleted file mode 100644 index 07cbdebe4..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/FeatureExtractionHelper.scala +++ /dev/null @@ -1,29 +0,0 @@ -package com.twitter.home_mixer.util.tweetypie.content - -import com.twitter.home_mixer.model.ContentFeatures -import com.twitter.tweetypie.{thriftscala => tp} - -object FeatureExtractionHelper { - - def extractFeatures( - tweet: tp.Tweet - ): ContentFeatures = { - val contentFeaturesFromTweet = ContentFeatures.Empty.copy( - selfThreadMetadata = tweet.selfThreadMetadata - ) - - val contentFeaturesWithText = TweetTextFeaturesExtractor.addTextFeaturesFromTweet( - contentFeaturesFromTweet, - tweet - ) - val contentFeaturesWithMedia = TweetMediaFeaturesExtractor.addMediaFeaturesFromTweet( - contentFeaturesWithText, - tweet - ) - - contentFeaturesWithMedia.copy( - conversationControl = tweet.conversationControl, - semanticCoreAnnotations = tweet.escherbirdEntityAnnotations.map(_.entityAnnotations) - ) - } -} diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/TweetMediaFeaturesExtractor.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/TweetMediaFeaturesExtractor.docx new file mode 100644 index 000000000..89d34a4ca Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/TweetMediaFeaturesExtractor.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/TweetMediaFeaturesExtractor.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/TweetMediaFeaturesExtractor.scala deleted file mode 100644 index 0a5a93a2e..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/TweetMediaFeaturesExtractor.scala +++ /dev/null @@ -1,285 +0,0 @@ -package com.twitter.home_mixer.util.tweetypie.content - -import com.twitter.home_mixer.model.ContentFeatures -import com.twitter.mediaservices.commons.mediainformation.{thriftscala => mi} -import com.twitter.mediaservices.commons.tweetmedia.{thriftscala => tm} -import com.twitter.mediaservices.commons.{thriftscala => ms} -import com.twitter.tweetypie.{thriftscala => tp} -import scala.collection.Map - -object TweetMediaFeaturesExtractor { - - private val ImageCategories = Set( - ms.MediaCategory.TweetImage.value, - ms.MediaCategory.TweetGif.value - ) - private val VideoCategories = Set( - ms.MediaCategory.TweetVideo.value, - ms.MediaCategory.AmplifyVideo.value - ) - - def hasImage(tweet: tp.Tweet): Boolean = hasMediaByCategory(tweet, ImageCategories) - - def hasVideo(tweet: tp.Tweet): Boolean = hasMediaByCategory(tweet, VideoCategories) - - private def hasMediaByCategory(tweet: tp.Tweet, categories: Set[Int]): Boolean = { - tweet.media.exists { mediaEntities => - mediaEntities.exists { mediaEntity => - mediaEntity.mediaKey.map(_.mediaCategory).exists { mediaCategory => - categories.contains(mediaCategory.value) - } - } - } - } - - def addMediaFeaturesFromTweet( - inputFeatures: ContentFeatures, - tweet: tp.Tweet, - ): ContentFeatures = { - val featuresWithMediaEntity = tweet.media - .map { mediaEntities => - val sizeFeatures = getSizeFeatures(mediaEntities) - val playbackFeatures = getPlaybackFeatures(mediaEntities) - val mediaWidths = sizeFeatures.map(_.width.toShort) - val mediaHeights = sizeFeatures.map(_.height.toShort) - val resizeMethods = sizeFeatures.map(_.resizeMethod.toShort) - val faceMapAreas = getFaceMapAreas(mediaEntities) - val sortedColorPalette = getSortedColorPalette(mediaEntities) - val stickerFeatures = getStickerFeatures(mediaEntities) - val mediaOriginProviders = getMediaOriginProviders(mediaEntities) - val isManaged = getIsManaged(mediaEntities) - val is360 = getIs360(mediaEntities) - val viewCount = getViewCount(mediaEntities) - val userDefinedProductMetadataFeatures = - getUserDefinedProductMetadataFeatures(mediaEntities) - val isMonetizable = - getOptBooleanFromSeqOpt(userDefinedProductMetadataFeatures.map(_.isMonetizable)) - val isEmbeddable = - getOptBooleanFromSeqOpt(userDefinedProductMetadataFeatures.map(_.isEmbeddable)) - val hasSelectedPreviewImage = - getOptBooleanFromSeqOpt(userDefinedProductMetadataFeatures.map(_.hasSelectedPreviewImage)) - val hasTitle = getOptBooleanFromSeqOpt(userDefinedProductMetadataFeatures.map(_.hasTitle)) - val hasDescription = - getOptBooleanFromSeqOpt(userDefinedProductMetadataFeatures.map(_.hasDescription)) - val hasVisitSiteCallToAction = getOptBooleanFromSeqOpt( - userDefinedProductMetadataFeatures.map(_.hasVisitSiteCallToAction)) - val hasAppInstallCallToAction = getOptBooleanFromSeqOpt( - userDefinedProductMetadataFeatures.map(_.hasAppInstallCallToAction)) - val hasWatchNowCallToAction = - getOptBooleanFromSeqOpt(userDefinedProductMetadataFeatures.map(_.hasWatchNowCallToAction)) - - inputFeatures.copy( - videoDurationMs = playbackFeatures.durationMs, - bitRate = playbackFeatures.bitRate, - aspectRatioNum = playbackFeatures.aspectRatioNum, - aspectRatioDen = playbackFeatures.aspectRatioDen, - widths = Some(mediaWidths), - heights = Some(mediaHeights), - resizeMethods = Some(resizeMethods), - faceAreas = Some(faceMapAreas), - dominantColorRed = sortedColorPalette.headOption.map(_.rgb.red), - dominantColorBlue = sortedColorPalette.headOption.map(_.rgb.blue), - dominantColorGreen = sortedColorPalette.headOption.map(_.rgb.green), - dominantColorPercentage = sortedColorPalette.headOption.map(_.percentage), - numColors = Some(sortedColorPalette.size.toShort), - stickerIds = Some(stickerFeatures), - mediaOriginProviders = Some(mediaOriginProviders), - isManaged = Some(isManaged), - is360 = Some(is360), - viewCount = viewCount, - isMonetizable = isMonetizable, - isEmbeddable = isEmbeddable, - hasSelectedPreviewImage = hasSelectedPreviewImage, - hasTitle = hasTitle, - hasDescription = hasDescription, - hasVisitSiteCallToAction = hasVisitSiteCallToAction, - hasAppInstallCallToAction = hasAppInstallCallToAction, - hasWatchNowCallToAction = hasWatchNowCallToAction - ) - } - .getOrElse(inputFeatures) - - val featuresWithMediaTags = tweet.mediaTags - .map { mediaTags => - val mediaTagScreenNames = getMediaTagScreenNames(mediaTags.tagMap) - val numMediaTags = mediaTagScreenNames.size - - featuresWithMediaEntity.copy( - mediaTagScreenNames = Some(mediaTagScreenNames), - numMediaTags = Some(numMediaTags.toShort) - ) - } - .getOrElse(featuresWithMediaEntity) - - featuresWithMediaTags - .copy(media = tweet.media) - } - - private def getSizeFeatures(mediaEntities: Seq[tp.MediaEntity]): Seq[MediaSizeFeatures] = { - mediaEntities.map { mediaEntity => - mediaEntity.sizes.foldLeft(MediaSizeFeatures(0, 0, 0))((accDimensions, dimensions) => - MediaSizeFeatures( - width = math.max(dimensions.width, accDimensions.width), - height = math.max(dimensions.height, accDimensions.height), - resizeMethod = math.max(dimensions.resizeMethod.getValue, accDimensions.resizeMethod) - )) - } - } - - private def getPlaybackFeatures(mediaEntities: Seq[tp.MediaEntity]): PlaybackFeatures = { - val allPlaybackFeatures = mediaEntities - .flatMap { mediaEntity => - mediaEntity.mediaInfo map { - case videoEntity: tm.MediaInfo.VideoInfo => - PlaybackFeatures( - durationMs = Some(videoEntity.videoInfo.durationMillis), - bitRate = videoEntity.videoInfo.variants.maxBy(_.bitRate).bitRate, - aspectRatioNum = Some(videoEntity.videoInfo.aspectRatio.numerator), - aspectRatioDen = Some(videoEntity.videoInfo.aspectRatio.denominator) - ) - case gifEntity: tm.MediaInfo.AnimatedGifInfo => - PlaybackFeatures( - durationMs = None, - bitRate = gifEntity.animatedGifInfo.variants.maxBy(_.bitRate).bitRate, - aspectRatioNum = Some(gifEntity.animatedGifInfo.aspectRatio.numerator), - aspectRatioDen = Some(gifEntity.animatedGifInfo.aspectRatio.denominator) - ) - case _ => PlaybackFeatures(None, None, None, None) - } - } - .collect { - case playbackFeatures: PlaybackFeatures => playbackFeatures - } - - if (allPlaybackFeatures.nonEmpty) allPlaybackFeatures.maxBy(_.durationMs) - else PlaybackFeatures(None, None, None, None) - } - - private def getMediaTagScreenNames(tagMap: Map[Long, Seq[tp.MediaTag]]): Seq[String] = - tagMap.values - .flatMap(seqMediaTag => seqMediaTag.flatMap(_.screenName)) - .toSeq - - // Areas of the faces identified in the media entities - private def getFaceMapAreas(mediaEntities: Seq[tp.MediaEntity]): Seq[Int] = { - for { - mediaEntity <- mediaEntities - metadata <- mediaEntity.additionalMetadata.toSeq - faceData <- metadata.faceData - faces <- faceData.faces - } yield { - faces - .getOrElse("orig", Seq.empty[mi.Face]) - .flatMap(f => f.boundingBox.map(bb => bb.width * bb.height)) - } - }.flatten - - // All ColorPalettes in the media sorted by the percentage in descending order - private def getSortedColorPalette( - mediaEntities: Seq[tp.MediaEntity] - ): Seq[mi.ColorPaletteItem] = { - for { - mediaEntity <- mediaEntities - metadata <- mediaEntity.additionalMetadata.toSeq - colorInfo <- metadata.colorInfo - } yield { - colorInfo.palette - } - }.flatten.sortBy(-_.percentage) - - // Id's of stickers applied by the user - private def getStickerFeatures(mediaEntities: Seq[tp.MediaEntity]): Seq[Long] = { - for { - mediaEntity <- mediaEntities - metadata <- mediaEntity.additionalMetadata.toSeq - stickerInfo <- metadata.stickerInfo - } yield { - stickerInfo.stickers.map(_.id) - } - }.flatten - - // 3rd party media providers. eg. giphy for gifs - private def getMediaOriginProviders(mediaEntities: Seq[tp.MediaEntity]): Seq[String] = - for { - mediaEntity <- mediaEntities - metadata <- mediaEntity.additionalMetadata.toSeq - mediaOrigin <- metadata.foundMediaOrigin - } yield { - mediaOrigin.provider - } - - private def getIsManaged(mediaEntities: Seq[tp.MediaEntity]): Boolean = { - for { - mediaEntity <- mediaEntities - metadata <- mediaEntity.additionalMetadata.toSeq - managementInfo <- metadata.managementInfo - } yield { - managementInfo.managed - } - }.contains(true) - - private def getIs360(mediaEntities: Seq[tp.MediaEntity]): Boolean = { - for { - mediaEntity <- mediaEntities - metadata <- mediaEntity.additionalMetadata.toSeq - info360 <- metadata.info360 - } yield { - info360.is360 - } - }.contains(Some(true)) - - private def getViewCount(mediaEntities: Seq[tp.MediaEntity]): Option[Long] = { - for { - mediaEntity <- mediaEntities - metadata <- mediaEntity.additionalMetadata.toSeq - engagementInfo <- metadata.engagementInfo - viewCounts <- engagementInfo.viewCount - } yield { - viewCounts - } - }.reduceOption(_ max _) - - // metadata defined by the user when uploading the image - private def getUserDefinedProductMetadataFeatures( - mediaEntities: Seq[tp.MediaEntity] - ): Seq[UserDefinedProductMetadataFeatures] = - for { - mediaEntity <- mediaEntities - userDefinedMetadata <- mediaEntity.metadata - } yield { - UserDefinedProductMetadataFeatures( - isMonetizable = userDefinedMetadata.monetizable, - isEmbeddable = userDefinedMetadata.embeddable, - hasSelectedPreviewImage = Some(userDefinedMetadata.previewImage.nonEmpty), - hasTitle = userDefinedMetadata.title.map(_.nonEmpty), - hasDescription = userDefinedMetadata.description.map(_.nonEmpty), - hasVisitSiteCallToAction = userDefinedMetadata.callToActions.map(_.visitSite.nonEmpty), - hasAppInstallCallToAction = userDefinedMetadata.callToActions.map(_.appInstall.nonEmpty), - hasWatchNowCallToAction = userDefinedMetadata.callToActions.map(_.watchNow.nonEmpty) - ) - } - - private def getOptBooleanFromSeqOpt( - seqOpt: Seq[Option[Boolean]] - ): Option[Boolean] = Some( - seqOpt.exists(boolOpt => boolOpt.contains(true)) - ) -} - -case class MediaSizeFeatures(width: Int, height: Int, resizeMethod: Int) - -case class PlaybackFeatures( - durationMs: Option[Int], - bitRate: Option[Int], - aspectRatioNum: Option[Short], - aspectRatioDen: Option[Short]) - -case class UserDefinedProductMetadataFeatures( - isMonetizable: Option[Boolean], - isEmbeddable: Option[Boolean], - hasSelectedPreviewImage: Option[Boolean], - hasTitle: Option[Boolean], - hasDescription: Option[Boolean], - hasVisitSiteCallToAction: Option[Boolean], - hasAppInstallCallToAction: Option[Boolean], - hasWatchNowCallToAction: Option[Boolean]) diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/TweetTextFeaturesExtractor.docx b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/TweetTextFeaturesExtractor.docx new file mode 100644 index 000000000..9164e5bc9 Binary files /dev/null and b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/TweetTextFeaturesExtractor.docx differ diff --git a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/TweetTextFeaturesExtractor.scala b/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/TweetTextFeaturesExtractor.scala deleted file mode 100644 index 0a403d98a..000000000 --- a/home-mixer/server/src/main/scala/com/twitter/home_mixer/util/tweetypie/content/TweetTextFeaturesExtractor.scala +++ /dev/null @@ -1,44 +0,0 @@ -package com.twitter.home_mixer.util.tweetypie.content - -import com.twitter.home_mixer.model.ContentFeatures -import com.twitter.tweetypie.{thriftscala => tp} - -object TweetTextFeaturesExtractor { - - private val QUESTION_MARK_CHARS = Set( - '\u003F', '\u00BF', '\u037E', '\u055E', '\u061F', '\u1367', '\u1945', '\u2047', '\u2048', - '\u2049', '\u2753', '\u2754', '\u2CFA', '\u2CFB', '\u2E2E', '\uA60F', '\uA6F7', '\uFE16', - '\uFE56', '\uFF1F', '\u1114', '\u1E95' - ) - private val NEW_LINE_REGEX = "\r\n|\r|\n".r - - def addTextFeaturesFromTweet( - inputFeatures: ContentFeatures, - tweet: tp.Tweet - ): ContentFeatures = { - tweet.coreData - .map { coreData => - val tweetText = coreData.text - - inputFeatures.copy( - hasQuestion = hasQuestionCharacter(tweetText), - length = getLength(tweetText).toShort, - numCaps = getCaps(tweetText).toShort, - numWhiteSpaces = getSpaces(tweetText).toShort, - numNewlines = Some(getNumNewlines(tweetText)), - ) - } - .getOrElse(inputFeatures) - } - - def getLength(text: String): Int = - text.codePointCount(0, text.length()) - - def getCaps(text: String): Int = text.count(Character.isUpperCase) - - def getSpaces(text: String): Int = text.count(Character.isWhitespace) - - def hasQuestionCharacter(text: String): Boolean = text.exists(QUESTION_MARK_CHARS.contains) - - def getNumNewlines(text: String): Short = NEW_LINE_REGEX.findAllIn(text).length.toShort -} diff --git a/navi/README.docx b/navi/README.docx new file mode 100644 index 000000000..01987aabd Binary files /dev/null and b/navi/README.docx differ diff --git a/navi/README.md b/navi/README.md deleted file mode 100644 index 4e7d325f7..000000000 --- a/navi/README.md +++ /dev/null @@ -1,41 +0,0 @@ -# Navi: High-Performance Machine Learning Serving Server in Rust - -Navi is a high-performance, versatile machine learning serving server implemented in Rust and tailored for production usage. It's designed to efficiently serve within the Twitter tech stack, offering top-notch performance while focusing on core features. - -## Key Features - -- **Minimalist Design Optimized for Production Use Cases**: Navi delivers ultra-high performance, stability, and availability, engineered to handle real-world application demands with a streamlined codebase. -- **gRPC API Compatibility with TensorFlow Serving**: Seamless integration with existing TensorFlow Serving clients via its gRPC API, enabling easy integration, smooth deployment, and scaling in production environments. -- **Plugin Architecture for Different Runtimes**: Navi's pluggable architecture supports various machine learning runtimes, providing adaptability and extensibility for diverse use cases. Out-of-the-box support is available for TensorFlow and Onnx Runtime, with PyTorch in an experimental state. - -## Current State - -While Navi's features may not be as comprehensive as its open-source counterparts, its performance-first mindset makes it highly efficient. -- Navi for TensorFlow is currently the most feature-complete, supporting multiple input tensors of different types (float, int, string, etc.). -- Navi for Onnx primarily supports one input tensor of type string, used in Twitter's home recommendation with a proprietary BatchPredictRequest format. -- Navi for Pytorch is compilable and runnable but not yet production-ready in terms of performance and stability. - -## Directory Structure - -- `navi`: The main code repository for Navi -- `dr_transform`: Twitter-specific converter that converts BatchPredictionRequest Thrift to ndarray -- `segdense`: Twitter-specific config to specify how to retrieve feature values from BatchPredictionRequest -- `thrift_bpr_adapter`: generated thrift code for BatchPredictionRequest - -## Content -We have included all *.rs source code files that make up the main Navi binaries for you to examine. However, we have not included the test and benchmark code, or various configuration files, due to data security concerns. - -## Run -In navi/navi, you can run the following commands: -- `scripts/run_tf2.sh` for [TensorFlow](https://www.tensorflow.org/) -- `scripts/run_onnx.sh` for [Onnx](https://onnx.ai/) - -Do note that you need to create a models directory and create some versions, preferably using epoch time, e.g., `1679693908377`. -so the models structure looks like: - models/ - -web_click - - 1809000 - - 1809010 - -## Build -You can adapt the above scripts to build using Cargo. diff --git a/navi/dr_transform/Cargo.docx b/navi/dr_transform/Cargo.docx new file mode 100644 index 000000000..7a886dbd1 Binary files /dev/null and b/navi/dr_transform/Cargo.docx differ diff --git a/navi/dr_transform/Cargo.toml b/navi/dr_transform/Cargo.toml deleted file mode 100644 index cff73375b..000000000 --- a/navi/dr_transform/Cargo.toml +++ /dev/null @@ -1,32 +0,0 @@ -[package] -name = "dr_transform" -version = "0.1.0" -edition = "2021" - -[dependencies] -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" -json = "0.12.4" -bpr_thrift = { path = "../thrift_bpr_adapter/thrift/"} -segdense = { path = "../segdense/"} -thrift = "0.17.0" -ndarray = "0.15" -base64 = "0.20.0" -npyz = "0.7.2" -log = "0.4.17" -env_logger = "0.9.0" -prometheus = "0.13.1" -once_cell = "1.17.0" -rand = "0.8.5" -itertools = "0.10.5" -anyhow = "1.0.70" -[target.'cfg(not(target_os="linux"))'.dependencies] -ort = {git ="https://github.com/pykeio/ort.git", features=["profiling"], tag="v1.14.6"} -[target.'cfg(target_os="linux")'.dependencies] -ort = {git ="https://github.com/pykeio/ort.git", features=["profiling", "tensorrt", "cuda", "copy-dylibs"], tag="v1.14.6"} -[dev-dependencies] -criterion = "0.3.0" - -[[bench]] -name = "bpr_benchmark" -harness = false diff --git a/navi/dr_transform/src/all_config.docx b/navi/dr_transform/src/all_config.docx new file mode 100644 index 000000000..9acfaf195 Binary files /dev/null and b/navi/dr_transform/src/all_config.docx differ diff --git a/navi/dr_transform/src/all_config.rs b/navi/dr_transform/src/all_config.rs deleted file mode 100644 index d5c52c362..000000000 --- a/navi/dr_transform/src/all_config.rs +++ /dev/null @@ -1,49 +0,0 @@ -use serde::{Deserialize, Serialize}; - -use serde_json::Error; - -#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct AllConfig { - #[serde(rename = "train_data")] - pub train_data: TrainData, -} - -#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct TrainData { - #[serde(rename = "seg_dense_schema")] - pub seg_dense_schema: SegDenseSchema, -} - -#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct SegDenseSchema { - #[serde(rename = "renamed_features")] - pub renamed_features: RenamedFeatures, -} - -#[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct RenamedFeatures { - pub continuous: String, - pub binary: String, - pub discrete: String, - #[serde(rename = "author_embedding")] - pub author_embedding: String, - #[serde(rename = "user_embedding")] - pub user_embedding: String, - #[serde(rename = "user_eng_embedding")] - pub user_eng_embedding: String, - #[serde(rename = "meta__author_id")] - pub meta_author_id: String, - #[serde(rename = "meta__user_id")] - pub meta_user_id: String, - #[serde(rename = "meta__tweet_id")] - pub meta_tweet_id: String, -} - -pub fn parse(json_str: &str) -> Result { - let all_config: AllConfig = serde_json::from_str(json_str)?; - Ok(all_config) -} diff --git a/navi/dr_transform/src/converter.docx b/navi/dr_transform/src/converter.docx new file mode 100644 index 000000000..03cc280af Binary files /dev/null and b/navi/dr_transform/src/converter.docx differ diff --git a/navi/dr_transform/src/converter.rs b/navi/dr_transform/src/converter.rs deleted file mode 100644 index 3097aedc0..000000000 --- a/navi/dr_transform/src/converter.rs +++ /dev/null @@ -1,616 +0,0 @@ -use std::collections::BTreeSet; -use std::fmt::{self, Debug, Display}; -use std::fs; - -use crate::all_config; -use crate::all_config::AllConfig; -use anyhow::{bail, Context}; -use bpr_thrift::data::DataRecord; -use bpr_thrift::prediction_service::BatchPredictionRequest; -use bpr_thrift::tensor::GeneralTensor; -use log::debug; -use ndarray::Array2; -use once_cell::sync::OnceCell; -use ort::tensor::InputTensor; -use prometheus::{HistogramOpts, HistogramVec}; -use segdense::mapper::{FeatureMapper, MapReader}; -use segdense::segdense_transform_spec_home_recap_2022::{DensificationTransformSpec, Root}; -use segdense::util; -use thrift::protocol::{TBinaryInputProtocol, TSerializable}; -use thrift::transport::TBufferChannel; - -pub fn log_feature_match( - dr: &DataRecord, - seg_dense_config: &DensificationTransformSpec, - dr_type: String, -) { - // Note the following algorithm matches features from config using linear search. - // Also the record source is MinDataRecord. This includes only binary and continous features for now. - - for (feature_id, feature_value) in dr.continuous_features.as_ref().unwrap() { - debug!( - "{} - Continous Datarecord => Feature ID: {}, Feature value: {}", - dr_type, feature_id, feature_value - ); - for input_feature in &seg_dense_config.cont.input_features { - if input_feature.feature_id == *feature_id { - debug!("Matching input feature: {:?}", input_feature) - } - } - } - - for feature_id in dr.binary_features.as_ref().unwrap() { - debug!( - "{} - Binary Datarecord => Feature ID: {}", - dr_type, feature_id - ); - for input_feature in &seg_dense_config.binary.input_features { - if input_feature.feature_id == *feature_id { - debug!("Found input feature: {:?}", input_feature) - } - } - } -} - -pub fn log_feature_matches(drs: &Vec, seg_dense_config: &DensificationTransformSpec) { - for dr in drs { - log_feature_match(dr, seg_dense_config, String::from("individual")); - } -} - -pub trait Converter: Send + Sync + Debug + 'static + Display { - fn convert(&self, input: Vec>) -> (Vec, Vec); -} - -#[derive(Debug)] -#[allow(dead_code)] -pub struct BatchPredictionRequestToTorchTensorConverter { - all_config: AllConfig, - seg_dense_config: Root, - all_config_path: String, - seg_dense_config_path: String, - feature_mapper: FeatureMapper, - user_embedding_feature_id: i64, - user_eng_embedding_feature_id: i64, - author_embedding_feature_id: i64, - discrete_features_to_report: BTreeSet, - continuous_features_to_report: BTreeSet, - discrete_feature_metrics: &'static HistogramVec, - continuous_feature_metrics: &'static HistogramVec, -} - -impl Display for BatchPredictionRequestToTorchTensorConverter { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "all_config_path: {}, seg_dense_config_path:{}", - self.all_config_path, self.seg_dense_config_path - ) - } -} - -impl BatchPredictionRequestToTorchTensorConverter { - pub fn new( - model_dir: &str, - model_version: &str, - reporting_feature_ids: Vec<(i64, &str)>, - register_metric_fn: Option, - ) -> anyhow::Result { - let all_config_path = format!("{}/{}/all_config.json", model_dir, model_version); - let seg_dense_config_path = format!( - "{}/{}/segdense_transform_spec_home_recap_2022.json", - model_dir, model_version - ); - let seg_dense_config = util::load_config(&seg_dense_config_path)?; - let all_config = all_config::parse( - &fs::read_to_string(&all_config_path) - .with_context(|| "error loading all_config.json - ")?, - )?; - - let feature_mapper = util::load_from_parsed_config(seg_dense_config.clone())?; - - let user_embedding_feature_id = Self::get_feature_id( - &all_config - .train_data - .seg_dense_schema - .renamed_features - .user_embedding, - &seg_dense_config, - ); - let user_eng_embedding_feature_id = Self::get_feature_id( - &all_config - .train_data - .seg_dense_schema - .renamed_features - .user_eng_embedding, - &seg_dense_config, - ); - let author_embedding_feature_id = Self::get_feature_id( - &all_config - .train_data - .seg_dense_schema - .renamed_features - .author_embedding, - &seg_dense_config, - ); - static METRICS: OnceCell<(HistogramVec, HistogramVec)> = OnceCell::new(); - let (discrete_feature_metrics, continuous_feature_metrics) = METRICS.get_or_init(|| { - let discrete = HistogramVec::new( - HistogramOpts::new(":navi:feature_id:discrete", "Discrete Feature ID values") - .buckets(Vec::from(&[ - 0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 110.0, - 120.0, 130.0, 140.0, 150.0, 160.0, 170.0, 180.0, 190.0, 200.0, 250.0, - 300.0, 500.0, 1000.0, 10000.0, 100000.0, - ] as &'static [f64])), - &["feature_id"], - ) - .expect("metric cannot be created"); - let continuous = HistogramVec::new( - HistogramOpts::new( - ":navi:feature_id:continuous", - "continuous Feature ID values", - ) - .buckets(Vec::from(&[ - 0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 110.0, 120.0, - 130.0, 140.0, 150.0, 160.0, 170.0, 180.0, 190.0, 200.0, 250.0, 300.0, 500.0, - 1000.0, 10000.0, 100000.0, - ] as &'static [f64])), - &["feature_id"], - ) - .expect("metric cannot be created"); - register_metric_fn.map(|r| { - r(&discrete); - r(&continuous); - }); - (discrete, continuous) - }); - - let mut discrete_features_to_report = BTreeSet::new(); - let mut continuous_features_to_report = BTreeSet::new(); - - for (feature_id, feature_type) in reporting_feature_ids.iter() { - match *feature_type { - "discrete" => discrete_features_to_report.insert(feature_id.clone()), - "continuous" => continuous_features_to_report.insert(feature_id.clone()), - _ => bail!( - "Invalid feature type {} for reporting metrics!", - feature_type - ), - }; - } - - Ok(BatchPredictionRequestToTorchTensorConverter { - all_config, - seg_dense_config, - all_config_path, - seg_dense_config_path, - feature_mapper, - user_embedding_feature_id, - user_eng_embedding_feature_id, - author_embedding_feature_id, - discrete_features_to_report, - continuous_features_to_report, - discrete_feature_metrics, - continuous_feature_metrics, - }) - } - - fn get_feature_id(feature_name: &str, seg_dense_config: &Root) -> i64 { - // given a feature name, we get the complex feature type id - for feature in &seg_dense_config.complex_feature_type_transform_spec { - if feature.full_feature_name == feature_name { - return feature.feature_id; - } - } - -1 - } - - fn parse_batch_prediction_request(bytes: Vec) -> BatchPredictionRequest { - // parse batch prediction request into a struct from byte array repr. - let mut bc = TBufferChannel::with_capacity(bytes.len(), 0); - bc.set_readable_bytes(&bytes); - let mut protocol = TBinaryInputProtocol::new(bc, true); - BatchPredictionRequest::read_from_in_protocol(&mut protocol).unwrap() - } - - fn get_embedding_tensors( - &self, - bprs: &[BatchPredictionRequest], - feature_id: i64, - batch_size: &[usize], - ) -> Array2 { - // given an embedding feature id, extract the float tensor array into tensors. - let cols: usize = 200; - let rows: usize = batch_size[batch_size.len() - 1]; - let total_size = rows * cols; - - let mut working_set = vec![0 as f32; total_size]; - let mut bpr_start = 0; - for (bpr, &bpr_end) in bprs.iter().zip(batch_size) { - if bpr.common_features.is_some() { - if bpr.common_features.as_ref().unwrap().tensors.is_some() { - if bpr - .common_features - .as_ref() - .unwrap() - .tensors - .as_ref() - .unwrap() - .contains_key(&feature_id) - { - let source_tensor = bpr - .common_features - .as_ref() - .unwrap() - .tensors - .as_ref() - .unwrap() - .get(&feature_id) - .unwrap(); - let tensor = match source_tensor { - GeneralTensor::FloatTensor(float_tensor) => - //Tensor::of_slice( - { - float_tensor - .floats - .iter() - .map(|x| x.into_inner() as f32) - .collect::>() - } - _ => vec![0 as f32; cols], - }; - - // since the tensor is found in common feature, add it in all batches - for row in bpr_start..bpr_end { - for col in 0..cols { - working_set[row * cols + col] = tensor[col]; - } - } - } - } - } - // find the feature in individual feature list and add to corresponding batch. - for (index, datarecord) in bpr.individual_features_list.iter().enumerate() { - if datarecord.tensors.is_some() - && datarecord - .tensors - .as_ref() - .unwrap() - .contains_key(&feature_id) - { - let source_tensor = datarecord - .tensors - .as_ref() - .unwrap() - .get(&feature_id) - .unwrap(); - let tensor = match source_tensor { - GeneralTensor::FloatTensor(float_tensor) => float_tensor - .floats - .iter() - .map(|x| x.into_inner() as f32) - .collect::>(), - _ => vec![0 as f32; cols], - }; - for col in 0..cols { - working_set[(bpr_start + index) * cols + col] = tensor[col]; - } - } - } - bpr_start = bpr_end; - } - Array2::::from_shape_vec([rows, cols], working_set).unwrap() - } - - // Todo : Refactor, create a generic version with different type and field accessors - // Example paramterize and then instiantiate the following - // (FLOAT --> FLOAT, DataRecord.continuous_feature) - // (BOOL --> INT64, DataRecord.binary_feature) - // (INT64 --> INT64, DataRecord.discrete_feature) - fn get_continuous(&self, bprs: &[BatchPredictionRequest], batch_ends: &[usize]) -> InputTensor { - // These need to be part of model schema - let rows: usize = batch_ends[batch_ends.len() - 1]; - let cols: usize = 5293; - let full_size: usize = rows * cols; - let default_val = f32::NAN; - - let mut tensor = vec![default_val; full_size]; - - let mut bpr_start = 0; - for (bpr, &bpr_end) in bprs.iter().zip(batch_ends) { - // Common features - if bpr.common_features.is_some() - && bpr - .common_features - .as_ref() - .unwrap() - .continuous_features - .is_some() - { - let common_features = bpr - .common_features - .as_ref() - .unwrap() - .continuous_features - .as_ref() - .unwrap(); - - for feature in common_features { - match self.feature_mapper.get(feature.0) { - Some(f_info) => { - let idx = f_info.index_within_tensor as usize; - if idx < cols { - // Set value in each row - for r in bpr_start..bpr_end { - let flat_index: usize = r * cols + idx; - tensor[flat_index] = feature.1.into_inner() as f32; - } - } - } - None => (), - } - if self.continuous_features_to_report.contains(feature.0) { - self.continuous_feature_metrics - .with_label_values(&[feature.0.to_string().as_str()]) - .observe(feature.1.into_inner()) - } else if self.discrete_features_to_report.contains(feature.0) { - self.discrete_feature_metrics - .with_label_values(&[feature.0.to_string().as_str()]) - .observe(feature.1.into_inner()) - } - } - } - - // Process the batch of datarecords - for r in bpr_start..bpr_end { - let dr: &DataRecord = - &bpr.individual_features_list[usize::try_from(r - bpr_start).unwrap()]; - if dr.continuous_features.is_some() { - for feature in dr.continuous_features.as_ref().unwrap() { - match self.feature_mapper.get(&feature.0) { - Some(f_info) => { - let idx = f_info.index_within_tensor as usize; - let flat_index: usize = r * cols + idx; - if flat_index < tensor.len() && idx < cols { - tensor[flat_index] = feature.1.into_inner() as f32; - } - } - None => (), - } - if self.continuous_features_to_report.contains(feature.0) { - self.continuous_feature_metrics - .with_label_values(&[feature.0.to_string().as_str()]) - .observe(feature.1.into_inner() as f64) - } else if self.discrete_features_to_report.contains(feature.0) { - self.discrete_feature_metrics - .with_label_values(&[feature.0.to_string().as_str()]) - .observe(feature.1.into_inner() as f64) - } - } - } - } - bpr_start = bpr_end; - } - - InputTensor::FloatTensor( - Array2::::from_shape_vec([rows, cols], tensor) - .unwrap() - .into_dyn(), - ) - } - - fn get_binary(&self, bprs: &[BatchPredictionRequest], batch_ends: &[usize]) -> InputTensor { - // These need to be part of model schema - let rows: usize = batch_ends[batch_ends.len() - 1]; - let cols: usize = 149; - let full_size: usize = rows * cols; - let default_val: i64 = 0; - - let mut v = vec![default_val; full_size]; - - let mut bpr_start = 0; - for (bpr, &bpr_end) in bprs.iter().zip(batch_ends) { - // Common features - if bpr.common_features.is_some() - && bpr - .common_features - .as_ref() - .unwrap() - .binary_features - .is_some() - { - let common_features = bpr - .common_features - .as_ref() - .unwrap() - .binary_features - .as_ref() - .unwrap(); - - for feature in common_features { - match self.feature_mapper.get(feature) { - Some(f_info) => { - let idx = f_info.index_within_tensor as usize; - if idx < cols { - // Set value in each row - for r in bpr_start..bpr_end { - let flat_index: usize = r * cols + idx; - v[flat_index] = 1; - } - } - } - None => (), - } - } - } - - // Process the batch of datarecords - for r in bpr_start..bpr_end { - let dr: &DataRecord = &bpr.individual_features_list[r - bpr_start]; - if dr.binary_features.is_some() { - for feature in dr.binary_features.as_ref().unwrap() { - match self.feature_mapper.get(&feature) { - Some(f_info) => { - let idx = f_info.index_within_tensor as usize; - let flat_index: usize = r * cols + idx; - v[flat_index] = 1; - } - None => (), - } - } - } - } - bpr_start = bpr_end; - } - InputTensor::Int64Tensor( - Array2::::from_shape_vec([rows, cols], v) - .unwrap() - .into_dyn(), - ) - } - - #[allow(dead_code)] - fn get_discrete(&self, bprs: &[BatchPredictionRequest], batch_ends: &[usize]) -> InputTensor { - // These need to be part of model schema - let rows: usize = batch_ends[batch_ends.len() - 1]; - let cols: usize = 320; - let full_size: usize = rows * cols; - let default_val: i64 = 0; - - let mut v = vec![default_val; full_size]; - - let mut bpr_start = 0; - for (bpr, &bpr_end) in bprs.iter().zip(batch_ends) { - // Common features - if bpr.common_features.is_some() - && bpr - .common_features - .as_ref() - .unwrap() - .discrete_features - .is_some() - { - let common_features = bpr - .common_features - .as_ref() - .unwrap() - .discrete_features - .as_ref() - .unwrap(); - - for feature in common_features { - match self.feature_mapper.get(feature.0) { - Some(f_info) => { - let idx = f_info.index_within_tensor as usize; - if idx < cols { - // Set value in each row - for r in bpr_start..bpr_end { - let flat_index: usize = r * cols + idx; - v[flat_index] = *feature.1; - } - } - } - None => (), - } - if self.discrete_features_to_report.contains(feature.0) { - self.discrete_feature_metrics - .with_label_values(&[feature.0.to_string().as_str()]) - .observe(*feature.1 as f64) - } - } - } - - // Process the batch of datarecords - for r in bpr_start..bpr_end { - let dr: &DataRecord = &bpr.individual_features_list[usize::try_from(r).unwrap()]; - if dr.discrete_features.is_some() { - for feature in dr.discrete_features.as_ref().unwrap() { - match self.feature_mapper.get(&feature.0) { - Some(f_info) => { - let idx = f_info.index_within_tensor as usize; - let flat_index: usize = r * cols + idx; - if flat_index < v.len() && idx < cols { - v[flat_index] = *feature.1; - } - } - None => (), - } - if self.discrete_features_to_report.contains(feature.0) { - self.discrete_feature_metrics - .with_label_values(&[feature.0.to_string().as_str()]) - .observe(*feature.1 as f64) - } - } - } - } - bpr_start = bpr_end; - } - InputTensor::Int64Tensor( - Array2::::from_shape_vec([rows, cols], v) - .unwrap() - .into_dyn(), - ) - } - - fn get_user_embedding( - &self, - bprs: &[BatchPredictionRequest], - batch_ends: &[usize], - ) -> InputTensor { - InputTensor::FloatTensor( - self.get_embedding_tensors(bprs, self.user_embedding_feature_id, batch_ends) - .into_dyn(), - ) - } - - fn get_eng_embedding( - &self, - bpr: &[BatchPredictionRequest], - batch_ends: &[usize], - ) -> InputTensor { - InputTensor::FloatTensor( - self.get_embedding_tensors(bpr, self.user_eng_embedding_feature_id, batch_ends) - .into_dyn(), - ) - } - - fn get_author_embedding( - &self, - bpr: &[BatchPredictionRequest], - batch_ends: &[usize], - ) -> InputTensor { - InputTensor::FloatTensor( - self.get_embedding_tensors(bpr, self.author_embedding_feature_id, batch_ends) - .into_dyn(), - ) - } -} - -impl Converter for BatchPredictionRequestToTorchTensorConverter { - fn convert(&self, batched_bytes: Vec>) -> (Vec, Vec) { - let bprs = batched_bytes - .into_iter() - .map(|bytes| { - BatchPredictionRequestToTorchTensorConverter::parse_batch_prediction_request(bytes) - }) - .collect::>(); - let batch_ends = bprs - .iter() - .map(|bpr| bpr.individual_features_list.len()) - .scan(0usize, |acc, e| { - //running total - *acc = *acc + e; - Some(*acc) - }) - .collect::>(); - - let t1 = self.get_continuous(&bprs, &batch_ends); - let t2 = self.get_binary(&bprs, &batch_ends); - //let _t3 = self.get_discrete(&bprs, &batch_ends); - let t4 = self.get_user_embedding(&bprs, &batch_ends); - let t5 = self.get_eng_embedding(&bprs, &batch_ends); - let t6 = self.get_author_embedding(&bprs, &batch_ends); - - (vec![t1, t2, t4, t5, t6], batch_ends) - } -} diff --git a/navi/dr_transform/src/lib.docx b/navi/dr_transform/src/lib.docx new file mode 100644 index 000000000..81aeac9ba Binary files /dev/null and b/navi/dr_transform/src/lib.docx differ diff --git a/navi/dr_transform/src/lib.rs b/navi/dr_transform/src/lib.rs deleted file mode 100644 index ea3b25a55..000000000 --- a/navi/dr_transform/src/lib.rs +++ /dev/null @@ -1,6 +0,0 @@ -pub mod all_config; -pub mod converter; -#[cfg(test)] -mod test; -pub mod util; -pub extern crate ort; diff --git a/navi/dr_transform/src/util.docx b/navi/dr_transform/src/util.docx new file mode 100644 index 000000000..91f69e368 Binary files /dev/null and b/navi/dr_transform/src/util.docx differ diff --git a/navi/dr_transform/src/util.rs b/navi/dr_transform/src/util.rs deleted file mode 100644 index 83b99805a..000000000 --- a/navi/dr_transform/src/util.rs +++ /dev/null @@ -1,32 +0,0 @@ -use npyz::WriterBuilder; -use npyz::{AutoSerialize, WriteOptions}; -use std::io::BufWriter; -use std::{ - fs::File, - io::{self, BufRead}, -}; - -pub fn load_batch_prediction_request_base64(file_name: &str) -> Vec> { - let file = File::open(file_name).expect("could not read file"); - let mut result = vec![]; - for (mut line_count, line) in io::BufReader::new(file).lines().enumerate() { - line_count += 1; - match base64::decode(line.unwrap().trim()) { - Ok(payload) => result.push(payload), - Err(err) => println!("error decoding line {file_name}:{line_count} - {err}"), - } - } - println!("result len: {}", result.len()); - result -} - -pub fn save_to_npy(data: &[T], save_to: String) { - let mut writer = WriteOptions::new() - .default_dtype() - .shape(&[data.len() as u64, 1]) - .writer(BufWriter::new(File::create(save_to).unwrap())) - .begin_nd() - .unwrap(); - writer.extend(data.to_owned()).unwrap(); - writer.finish().unwrap(); -} diff --git a/navi/navi/Cargo.docx b/navi/navi/Cargo.docx new file mode 100644 index 000000000..64b7aa0ac Binary files /dev/null and b/navi/navi/Cargo.docx differ diff --git a/navi/navi/Cargo.toml b/navi/navi/Cargo.toml deleted file mode 100644 index e355ea2a7..000000000 --- a/navi/navi/Cargo.toml +++ /dev/null @@ -1,81 +0,0 @@ -[package] -name = "navi" -version = "2.0.45" -edition = "2021" - -[[bin]] -name = "navi" -path = "src/bin/navi.rs" -required-features=["tf"] -[[bin]] -name = "navi_torch" -path = "src/bin/navi_torch.rs" -required-features=["torch"] -[[bin]] -name = "navi_onnx" -path = "src/bin/navi_onnx.rs" -required-features=["onnx"] -[[bin]] -name = "navi_onnx_test" -path = "src/bin/bin_tests/navi_onnx_test.rs" -[[bin]] -name = "navi_torch_test" -path = "src/bin/bin_tests/navi_torch_test.rs" -required-features=["torch"] - -[features] -default=[] -navi_console=[] -torch=["tch"] -onnx=[] -tf=["tensorflow"] -[dependencies] -itertools = "0.10.5" -anyhow = "1.0.57" -arrayvec = "0.7.2" -clap = { version = "4.0.32", features = ["derive"] } -console-subscriber = "0.1.6" -time = { version = "0.3.20", features = ["parsing"] } -env_logger = "0.10.0" -flamegraph = "0.6.1" -fnv = "1.0.7" -futures = { version = "0.3", default-features = false } -image = "0.24.5" -indexmap = "1.8.1" -lazy_static = "1.4" -libloading = "0.7" -log = "0.4.17" -ndarray-rand = "0.14.0" -prometheus = "0.13.1" -prost = "0.9" -prost-types = "0.9" -parking_lot = "0.12.1" -rand = "0.8.5" -rand_pcg = "0.3.1" -random = "0.12.2" -x509-parser = "0.15.0" -sha256 = "1.0.3" -tonic = { version = "0.6.2", features=['compression', 'tls'] } -tokio = { version = "1.17.0", features = ["macros", "rt-multi-thread", "fs", "process"] } -warp = "0.3" -npyz = "0.7.3" -base64 = "0.21.0" -histogram = "0.6.9" -tch = {version = "0.10.3", optional = true} -tensorflow = { version = "0.18.0", optional = true } -once_cell = {version = "1.17.1"} -ndarray = "0.15" -serde = "1.0.154" -serde_json = "1.0.94" -dr_transform = { path = "../dr_transform"} -[build-dependencies] -tonic-build = {version = "0.6.2", features=['prost', "compression"] } -[profile.release] -debug = true -[dev-dependencies] -ndarray-rand = "0.14.0" -tokio-test = "*" -assert_cmd = "2.0" -criterion = "0.4.0" - - diff --git a/navi/navi/build.docx b/navi/navi/build.docx new file mode 100644 index 000000000..5cbab187e Binary files /dev/null and b/navi/navi/build.docx differ diff --git a/navi/navi/build.rs b/navi/navi/build.rs deleted file mode 100644 index 8757a1823..000000000 --- a/navi/navi/build.rs +++ /dev/null @@ -1,13 +0,0 @@ -fn main() -> Result<(), Box> { - //::compile_protos("proto/tensorflow_serving/apis/prediction_service.proto")?; - tonic_build::configure().compile( - &[ - "proto/tensorflow_serving/apis/prediction_service.proto", - "proto/tensorflow/core/protobuf/config.proto", - "proto/tensorflow_serving/apis/prediction_log.proto", - "proto/kfserving/grpc_predict_v2.proto", - ], - &["proto"], - )?; - Ok(()) -} diff --git a/navi/navi/proto/kfserving/grpc_predict_v2.docx b/navi/navi/proto/kfserving/grpc_predict_v2.docx new file mode 100644 index 000000000..ee312c4f4 Binary files /dev/null and b/navi/navi/proto/kfserving/grpc_predict_v2.docx differ diff --git a/navi/navi/proto/kfserving/grpc_predict_v2.proto b/navi/navi/proto/kfserving/grpc_predict_v2.proto deleted file mode 100644 index 6b2475a2e..000000000 --- a/navi/navi/proto/kfserving/grpc_predict_v2.proto +++ /dev/null @@ -1,326 +0,0 @@ -// Copyright 2020 kubeflow.org. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; -package inference; - -// Inference Server GRPC endpoints. -service GRPCInferenceService -{ - // The ServerLive API indicates if the inference server is able to receive - // and respond to metadata and inference requests. - rpc ServerLive(ServerLiveRequest) returns (ServerLiveResponse) {} - - // The ServerReady API indicates if the server is ready for inferencing. - rpc ServerReady(ServerReadyRequest) returns (ServerReadyResponse) {} - - // The ModelReady API indicates if a specific model is ready for inferencing. - rpc ModelReady(ModelReadyRequest) returns (ModelReadyResponse) {} - - // The ServerMetadata API provides information about the server. Errors are - // indicated by the google.rpc.Status returned for the request. The OK code - // indicates success and other codes indicate failure. - rpc ServerMetadata(ServerMetadataRequest) returns (ServerMetadataResponse) {} - - // The per-model metadata API provides information about a model. Errors are - // indicated by the google.rpc.Status returned for the request. The OK code - // indicates success and other codes indicate failure. - rpc ModelMetadata(ModelMetadataRequest) returns (ModelMetadataResponse) {} - - // The ModelInfer API performs inference using the specified model. Errors are - // indicated by the google.rpc.Status returned for the request. The OK code - // indicates success and other codes indicate failure. - rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {} -} - -message ServerLiveRequest {} - -message ServerLiveResponse -{ - // True if the inference server is live, false if not live. - bool live = 1; -} - -message ServerReadyRequest {} - -message ServerReadyResponse -{ - // True if the inference server is ready, false if not ready. - bool ready = 1; -} - -message ModelReadyRequest -{ - // The name of the model to check for readiness. - string name = 1; - - // The version of the model to check for readiness. If not given the - // server will choose a version based on the model and internal policy. - string version = 2; -} - -message ModelReadyResponse -{ - // True if the model is ready, false if not ready. - bool ready = 1; -} - -message ServerMetadataRequest {} - -message ServerMetadataResponse -{ - // The server name. - string name = 1; - - // The server version. - string version = 2; - - // The extensions supported by the server. - repeated string extensions = 3; -} - -message ModelMetadataRequest -{ - // The name of the model. - string name = 1; - - // The version of the model to check for readiness. If not given the - // server will choose a version based on the model and internal policy. - string version = 2; -} - -message ModelMetadataResponse -{ - // Metadata for a tensor. - message TensorMetadata - { - // The tensor name. - string name = 1; - - // The tensor data type. - string datatype = 2; - - // The tensor shape. A variable-size dimension is represented - // by a -1 value. - repeated int64 shape = 3; - } - - // The model name. - string name = 1; - - // The versions of the model available on the server. - repeated string versions = 2; - - // The model's platform. See Platforms. - string platform = 3; - - // The model's inputs. - repeated TensorMetadata inputs = 4; - - // The model's outputs. - repeated TensorMetadata outputs = 5; -} - -message ModelInferRequest -{ - // An input tensor for an inference request. - message InferInputTensor - { - // The tensor name. - string name = 1; - - // The tensor data type. - string datatype = 2; - - // The tensor shape. - repeated int64 shape = 3; - - // Optional inference input tensor parameters. - map parameters = 4; - - // The tensor contents using a data-type format. This field must - // not be specified if "raw" tensor contents are being used for - // the inference request. - InferTensorContents contents = 5; - } - - // An output tensor requested for an inference request. - message InferRequestedOutputTensor - { - // The tensor name. - string name = 1; - - // Optional requested output tensor parameters. - map parameters = 2; - } - - // The name of the model to use for inferencing. - string model_name = 1; - - // The version of the model to use for inference. If not given the - // server will choose a version based on the model and internal policy. - string model_version = 2; - - // Optional identifier for the request. If specified will be - // returned in the response. - string id = 3; - - // Optional inference parameters. - map parameters = 4; - - // The input tensors for the inference. - repeated InferInputTensor inputs = 5; - - // The requested output tensors for the inference. Optional, if not - // specified all outputs produced by the model will be returned. - repeated InferRequestedOutputTensor outputs = 6; - - // The data contained in an input tensor can be represented in "raw" - // bytes form or in the repeated type that matches the tensor's data - // type. To use the raw representation 'raw_input_contents' must be - // initialized with data for each tensor in the same order as - // 'inputs'. For each tensor, the size of this content must match - // what is expected by the tensor's shape and data type. The raw - // data must be the flattened, one-dimensional, row-major order of - // the tensor elements without any stride or padding between the - // elements. Note that the FP16 and BF16 data types must be represented as - // raw content as there is no specific data type for a 16-bit float type. - // - // If this field is specified then InferInputTensor::contents must - // not be specified for any input tensor. - repeated bytes raw_input_contents = 7; -} - -message ModelInferResponse -{ - // An output tensor returned for an inference request. - message InferOutputTensor - { - // The tensor name. - string name = 1; - - // The tensor data type. - string datatype = 2; - - // The tensor shape. - repeated int64 shape = 3; - - // Optional output tensor parameters. - map parameters = 4; - - // The tensor contents using a data-type format. This field must - // not be specified if "raw" tensor contents are being used for - // the inference response. - InferTensorContents contents = 5; - } - - // The name of the model used for inference. - string model_name = 1; - - // The version of the model used for inference. - string model_version = 2; - - // The id of the inference request if one was specified. - string id = 3; - - // Optional inference response parameters. - map parameters = 4; - - // The output tensors holding inference results. - repeated InferOutputTensor outputs = 5; - - // The data contained in an output tensor can be represented in - // "raw" bytes form or in the repeated type that matches the - // tensor's data type. To use the raw representation 'raw_output_contents' - // must be initialized with data for each tensor in the same order as - // 'outputs'. For each tensor, the size of this content must match - // what is expected by the tensor's shape and data type. The raw - // data must be the flattened, one-dimensional, row-major order of - // the tensor elements without any stride or padding between the - // elements. Note that the FP16 and BF16 data types must be represented as - // raw content as there is no specific data type for a 16-bit float type. - // - // If this field is specified then InferOutputTensor::contents must - // not be specified for any output tensor. - repeated bytes raw_output_contents = 6; -} - -// An inference parameter value. The Parameters message describes a -// “name”/”value” pair, where the “name” is the name of the parameter -// and the “value” is a boolean, integer, or string corresponding to -// the parameter. -message InferParameter -{ - // The parameter value can be a string, an int64, a boolean - // or a message specific to a predefined parameter. - oneof parameter_choice - { - // A boolean parameter value. - bool bool_param = 1; - - // An int64 parameter value. - int64 int64_param = 2; - - // A string parameter value. - string string_param = 3; - } -} - -// The data contained in a tensor represented by the repeated type -// that matches the tensor's data type. Protobuf oneof is not used -// because oneofs cannot contain repeated fields. -message InferTensorContents -{ - // Representation for BOOL data type. The size must match what is - // expected by the tensor's shape. The contents must be the flattened, - // one-dimensional, row-major order of the tensor elements. - repeated bool bool_contents = 1; - - // Representation for INT8, INT16, and INT32 data types. The size - // must match what is expected by the tensor's shape. The contents - // must be the flattened, one-dimensional, row-major order of the - // tensor elements. - repeated int32 int_contents = 2; - - // Representation for INT64 data types. The size must match what - // is expected by the tensor's shape. The contents must be the - // flattened, one-dimensional, row-major order of the tensor elements. - repeated int64 int64_contents = 3; - - // Representation for UINT8, UINT16, and UINT32 data types. The size - // must match what is expected by the tensor's shape. The contents - // must be the flattened, one-dimensional, row-major order of the - // tensor elements. - repeated uint32 uint_contents = 4; - - // Representation for UINT64 data types. The size must match what - // is expected by the tensor's shape. The contents must be the - // flattened, one-dimensional, row-major order of the tensor elements. - repeated uint64 uint64_contents = 5; - - // Representation for FP32 data type. The size must match what is - // expected by the tensor's shape. The contents must be the flattened, - // one-dimensional, row-major order of the tensor elements. - repeated float fp32_contents = 6; - - // Representation for FP64 data type. The size must match what is - // expected by the tensor's shape. The contents must be the flattened, - // one-dimensional, row-major order of the tensor elements. - repeated double fp64_contents = 7; - - // Representation for BYTES data type. The size must match what is - // expected by the tensor's shape. The contents must be the flattened, - // one-dimensional, row-major order of the tensor elements. - repeated bytes bytes_contents = 8; -} diff --git a/navi/navi/proto/tensorflow/core/example/example.docx b/navi/navi/proto/tensorflow/core/example/example.docx new file mode 100644 index 000000000..ea2068ee1 Binary files /dev/null and b/navi/navi/proto/tensorflow/core/example/example.docx differ diff --git a/navi/navi/proto/tensorflow/core/example/example.proto b/navi/navi/proto/tensorflow/core/example/example.proto deleted file mode 100644 index 0b49514e5..000000000 --- a/navi/navi/proto/tensorflow/core/example/example.proto +++ /dev/null @@ -1,306 +0,0 @@ -// Protocol messages for describing input data Examples for machine learning -// model training or inference. -syntax = "proto3"; - -package tensorflow; - -import "tensorflow/core/example/feature.proto"; - -option cc_enable_arenas = true; -option java_outer_classname = "ExampleProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.example"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/example"; - -// LINT.IfChange -// An Example is a mostly-normalized data format for storing data for -// training and inference. It contains a key-value store (features); where -// each key (string) maps to a Feature message (which is oneof packed BytesList, -// FloatList, or Int64List). This flexible and compact format allows the -// storage of large amounts of typed data, but requires that the data shape -// and use be determined by the configuration files and parsers that are used to -// read and write this format. That is, the Example is mostly *not* a -// self-describing format. In TensorFlow, Examples are read in row-major -// format, so any configuration that describes data with rank-2 or above -// should keep this in mind. For example, to store an M x N matrix of Bytes, -// the BytesList must contain M*N bytes, with M rows of N contiguous values -// each. That is, the BytesList value must store the matrix as: -// .... row 0 .... .... row 1 .... // ........... // ... row M-1 .... -// -// An Example for a movie recommendation application: -// features { -// feature { -// key: "age" -// value { float_list { -// value: 29.0 -// }} -// } -// feature { -// key: "movie" -// value { bytes_list { -// value: "The Shawshank Redemption" -// value: "Fight Club" -// }} -// } -// feature { -// key: "movie_ratings" -// value { float_list { -// value: 9.0 -// value: 9.7 -// }} -// } -// feature { -// key: "suggestion" -// value { bytes_list { -// value: "Inception" -// }} -// } -// # Note that this feature exists to be used as a label in training. -// # E.g., if training a logistic regression model to predict purchase -// # probability in our learning tool we would set the label feature to -// # "suggestion_purchased". -// feature { -// key: "suggestion_purchased" -// value { float_list { -// value: 1.0 -// }} -// } -// # Similar to "suggestion_purchased" above this feature exists to be used -// # as a label in training. -// # E.g., if training a linear regression model to predict purchase -// # price in our learning tool we would set the label feature to -// # "purchase_price". -// feature { -// key: "purchase_price" -// value { float_list { -// value: 9.99 -// }} -// } -// } -// -// A conformant Example data set obeys the following conventions: -// - If a Feature K exists in one example with data type T, it must be of -// type T in all other examples when present. It may be omitted. -// - The number of instances of Feature K list data may vary across examples, -// depending on the requirements of the model. -// - If a Feature K doesn't exist in an example, a K-specific default will be -// used, if configured. -// - If a Feature K exists in an example but contains no items, the intent -// is considered to be an empty tensor and no default will be used. - -message Example { - Features features = 1; -} - -// A SequenceExample is an Example representing one or more sequences, and -// some context. The context contains features which apply to the entire -// example. The feature_lists contain a key, value map where each key is -// associated with a repeated set of Features (a FeatureList). -// A FeatureList thus represents the values of a feature identified by its key -// over time / frames. -// -// Below is a SequenceExample for a movie recommendation application recording a -// sequence of ratings by a user. The time-independent features ("locale", -// "age", "favorites") describing the user are part of the context. The sequence -// of movies the user rated are part of the feature_lists. For each movie in the -// sequence we have information on its name and actors and the user's rating. -// This information is recorded in three separate feature_list(s). -// In the example below there are only two movies. All three feature_list(s), -// namely "movie_ratings", "movie_names", and "actors" have a feature value for -// both movies. Note, that "actors" is itself a bytes_list with multiple -// strings per movie. -// -// context: { -// feature: { -// key : "locale" -// value: { -// bytes_list: { -// value: [ "pt_BR" ] -// } -// } -// } -// feature: { -// key : "age" -// value: { -// float_list: { -// value: [ 19.0 ] -// } -// } -// } -// feature: { -// key : "favorites" -// value: { -// bytes_list: { -// value: [ "Majesty Rose", "Savannah Outen", "One Direction" ] -// } -// } -// } -// } -// feature_lists: { -// feature_list: { -// key : "movie_ratings" -// value: { -// feature: { -// float_list: { -// value: [ 4.5 ] -// } -// } -// feature: { -// float_list: { -// value: [ 5.0 ] -// } -// } -// } -// } -// feature_list: { -// key : "movie_names" -// value: { -// feature: { -// bytes_list: { -// value: [ "The Shawshank Redemption" ] -// } -// } -// feature: { -// bytes_list: { -// value: [ "Fight Club" ] -// } -// } -// } -// } -// feature_list: { -// key : "actors" -// value: { -// feature: { -// bytes_list: { -// value: [ "Tim Robbins", "Morgan Freeman" ] -// } -// } -// feature: { -// bytes_list: { -// value: [ "Brad Pitt", "Edward Norton", "Helena Bonham Carter" ] -// } -// } -// } -// } -// } -// -// A conformant SequenceExample data set obeys the following conventions: -// -// Context: -// - All conformant context features K must obey the same conventions as -// a conformant Example's features (see above). -// Feature lists: -// - A FeatureList L may be missing in an example; it is up to the -// parser configuration to determine if this is allowed or considered -// an empty list (zero length). -// - If a FeatureList L exists, it may be empty (zero length). -// - If a FeatureList L is non-empty, all features within the FeatureList -// must have the same data type T. Even across SequenceExamples, the type T -// of the FeatureList identified by the same key must be the same. An entry -// without any values may serve as an empty feature. -// - If a FeatureList L is non-empty, it is up to the parser configuration -// to determine if all features within the FeatureList must -// have the same size. The same holds for this FeatureList across multiple -// examples. -// - For sequence modeling, e.g.: -// http://colah.github.io/posts/2015-08-Understanding-LSTMs/ -// https://github.com/tensorflow/nmt -// the feature lists represent a sequence of frames. -// In this scenario, all FeatureLists in a SequenceExample have the same -// number of Feature messages, so that the ith element in each FeatureList -// is part of the ith frame (or time step). -// Examples of conformant and non-conformant examples' FeatureLists: -// -// Conformant FeatureLists: -// feature_lists: { feature_list: { -// key: "movie_ratings" -// value: { feature: { float_list: { value: [ 4.5 ] } } -// feature: { float_list: { value: [ 5.0 ] } } } -// } } -// -// Non-conformant FeatureLists (mismatched types): -// feature_lists: { feature_list: { -// key: "movie_ratings" -// value: { feature: { float_list: { value: [ 4.5 ] } } -// feature: { int64_list: { value: [ 5 ] } } } -// } } -// -// Conditionally conformant FeatureLists, the parser configuration determines -// if the feature sizes must match: -// feature_lists: { feature_list: { -// key: "movie_ratings" -// value: { feature: { float_list: { value: [ 4.5 ] } } -// feature: { float_list: { value: [ 5.0, 6.0 ] } } } -// } } -// -// Conformant pair of SequenceExample -// feature_lists: { feature_list: { -// key: "movie_ratings" -// value: { feature: { float_list: { value: [ 4.5 ] } } -// feature: { float_list: { value: [ 5.0 ] } } } -// } } -// and: -// feature_lists: { feature_list: { -// key: "movie_ratings" -// value: { feature: { float_list: { value: [ 4.5 ] } } -// feature: { float_list: { value: [ 5.0 ] } } -// feature: { float_list: { value: [ 2.0 ] } } } -// } } -// -// Conformant pair of SequenceExample -// feature_lists: { feature_list: { -// key: "movie_ratings" -// value: { feature: { float_list: { value: [ 4.5 ] } } -// feature: { float_list: { value: [ 5.0 ] } } } -// } } -// and: -// feature_lists: { feature_list: { -// key: "movie_ratings" -// value: { } -// } } -// -// Conditionally conformant pair of SequenceExample, the parser configuration -// determines if the second feature_lists is consistent (zero-length) or -// invalid (missing "movie_ratings"): -// feature_lists: { feature_list: { -// key: "movie_ratings" -// value: { feature: { float_list: { value: [ 4.5 ] } } -// feature: { float_list: { value: [ 5.0 ] } } } -// } } -// and: -// feature_lists: { } -// -// Non-conformant pair of SequenceExample (mismatched types) -// feature_lists: { feature_list: { -// key: "movie_ratings" -// value: { feature: { float_list: { value: [ 4.5 ] } } -// feature: { float_list: { value: [ 5.0 ] } } } -// } } -// and: -// feature_lists: { feature_list: { -// key: "movie_ratings" -// value: { feature: { int64_list: { value: [ 4 ] } } -// feature: { int64_list: { value: [ 5 ] } } -// feature: { int64_list: { value: [ 2 ] } } } -// } } -// -// Conditionally conformant pair of SequenceExample; the parser configuration -// determines if the feature sizes must match: -// feature_lists: { feature_list: { -// key: "movie_ratings" -// value: { feature: { float_list: { value: [ 4.5 ] } } -// feature: { float_list: { value: [ 5.0 ] } } } -// } } -// and: -// feature_lists: { feature_list: { -// key: "movie_ratings" -// value: { feature: { float_list: { value: [ 4.0 ] } } -// feature: { float_list: { value: [ 5.0, 3.0 ] } } -// } } - -message SequenceExample { - Features context = 1; - FeatureLists feature_lists = 2; -} -// LINT.ThenChange( -// https://www.tensorflow.org/code/tensorflow/python/training/training.py) diff --git a/navi/navi/proto/tensorflow/core/example/feature.docx b/navi/navi/proto/tensorflow/core/example/feature.docx new file mode 100644 index 000000000..7735043cb Binary files /dev/null and b/navi/navi/proto/tensorflow/core/example/feature.docx differ diff --git a/navi/navi/proto/tensorflow/core/example/feature.proto b/navi/navi/proto/tensorflow/core/example/feature.proto deleted file mode 100644 index e532747f3..000000000 --- a/navi/navi/proto/tensorflow/core/example/feature.proto +++ /dev/null @@ -1,110 +0,0 @@ -// Protocol messages for describing features for machine learning model -// training or inference. -// -// There are three base Feature types: -// - bytes -// - float -// - int64 -// -// A Feature contains Lists which may hold zero or more values. These -// lists are the base values BytesList, FloatList, Int64List. -// -// Features are organized into categories by name. The Features message -// contains the mapping from name to Feature. -// -// Example Features for a movie recommendation application: -// feature { -// key: "age" -// value { float_list { -// value: 29.0 -// }} -// } -// feature { -// key: "movie" -// value { bytes_list { -// value: "The Shawshank Redemption" -// value: "Fight Club" -// }} -// } -// feature { -// key: "movie_ratings" -// value { float_list { -// value: 9.0 -// value: 9.7 -// }} -// } -// feature { -// key: "suggestion" -// value { bytes_list { -// value: "Inception" -// }} -// } -// feature { -// key: "suggestion_purchased" -// value { int64_list { -// value: 1 -// }} -// } -// feature { -// key: "purchase_price" -// value { float_list { -// value: 9.99 -// }} -// } -// - -syntax = "proto3"; - -package tensorflow; - -option cc_enable_arenas = true; -option java_outer_classname = "FeatureProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.example"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/example"; - -// LINT.IfChange -// Containers to hold repeated fundamental values. -message BytesList { - repeated bytes value = 1; -} -message FloatList { - repeated float value = 1 [packed = true]; -} -message Int64List { - repeated int64 value = 1 [packed = true]; -} - -// Containers for non-sequential data. -message Feature { - // Each feature can be exactly one kind. - oneof kind { - BytesList bytes_list = 1; - FloatList float_list = 2; - Int64List int64_list = 3; - } -} - -message Features { - // Map from feature name to feature. - map feature = 1; -} - -// Containers for sequential data. -// -// A FeatureList contains lists of Features. These may hold zero or more -// Feature values. -// -// FeatureLists are organized into categories by name. The FeatureLists message -// contains the mapping from name to FeatureList. -// -message FeatureList { - repeated Feature feature = 1; -} - -message FeatureLists { - // Map from feature name to feature list. - map feature_list = 1; -} -// LINT.ThenChange( -// https://www.tensorflow.org/code/tensorflow/python/training/training.py) diff --git a/navi/navi/proto/tensorflow/core/framework/allocation_description.docx b/navi/navi/proto/tensorflow/core/framework/allocation_description.docx new file mode 100644 index 000000000..18cb174e8 Binary files /dev/null and b/navi/navi/proto/tensorflow/core/framework/allocation_description.docx differ diff --git a/navi/navi/proto/tensorflow/core/framework/allocation_description.proto b/navi/navi/proto/tensorflow/core/framework/allocation_description.proto deleted file mode 100644 index f18caa40b..000000000 --- a/navi/navi/proto/tensorflow/core/framework/allocation_description.proto +++ /dev/null @@ -1,29 +0,0 @@ -syntax = "proto3"; - -package tensorflow; - -option cc_enable_arenas = true; -option java_outer_classname = "AllocationDescriptionProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/allocation_description_go_proto"; - -message AllocationDescription { - // Total number of bytes requested - int64 requested_bytes = 1; - - // Total number of bytes allocated if known - int64 allocated_bytes = 2; - - // Name of the allocator used - string allocator_name = 3; - - // Identifier of the allocated buffer if known - int64 allocation_id = 4; - - // Set if this tensor only has one remaining reference - bool has_single_reference = 5; - - // Address of the allocation. - uint64 ptr = 6; -} diff --git a/navi/navi/proto/tensorflow/core/framework/api_def.docx b/navi/navi/proto/tensorflow/core/framework/api_def.docx new file mode 100644 index 000000000..0bb173e3c Binary files /dev/null and b/navi/navi/proto/tensorflow/core/framework/api_def.docx differ diff --git a/navi/navi/proto/tensorflow/core/framework/api_def.proto b/navi/navi/proto/tensorflow/core/framework/api_def.proto deleted file mode 100644 index 1823ce64f..000000000 --- a/navi/navi/proto/tensorflow/core/framework/api_def.proto +++ /dev/null @@ -1,138 +0,0 @@ -// Defines the text format for including per-op API definition and -// overrides for client language op code generators. - -syntax = "proto3"; - -package tensorflow; - -import "tensorflow/core/framework/attr_value.proto"; - -option cc_enable_arenas = true; -option java_outer_classname = "ApiDefProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/api_def_go_proto"; - -// Used to specify and override the default API & behavior in the -// generated code for client languages, from what you would get from -// the OpDef alone. There will be a set of ApiDefs that are common -// to all client languages, and another set per client language. -// The per-client-language ApiDefs will inherit values from the -// common ApiDefs which it can either replace or modify. -// -// We separate the API definition from the OpDef so we can evolve the -// API while remaining backwards compatible when interpreting old -// graphs. Overrides go in an "api_def.pbtxt" file with a text-format -// ApiDefs message. -// -// WARNING: Be *very* careful changing the API for any existing op -- -// you can change the semantics of existing code. These changes may -// need to wait until a major release of TensorFlow to avoid breaking -// our compatibility promises. -message ApiDef { - // Name of the op (in the OpDef) to specify the API for. - string graph_op_name = 1; - // If this op is deprecated, set deprecation message to the message - // that should be logged when this op is used. - // The message should indicate alternative op to use, if any. - string deprecation_message = 12; - // Major version when the op will be deleted. For e.g. set this - // value to 2 if op API should be removed in TensorFlow 2.0 and - // deprecated in versions before that. - int32 deprecation_version = 13; - - enum Visibility { - // Normally this is "VISIBLE" unless you are inheriting a - // different value from another ApiDef. - DEFAULT_VISIBILITY = 0; - // Publicly visible in the API. - VISIBLE = 1; - // Do not include this op in the generated API. If visibility is - // set to 'SKIP', other fields are ignored for this op. - SKIP = 2; - // Hide this op by putting it into an internal namespace (or whatever - // is appropriate in the target language). - HIDDEN = 3; - } - Visibility visibility = 2; - - // If you specify any endpoint, this will replace all of the - // inherited endpoints. The first endpoint should be the - // "canonical" endpoint, and should not be deprecated (unless all - // endpoints are deprecated). - message Endpoint { - // Name should be either like "CamelCaseName" or - // "Package.CamelCaseName". Client-language-specific ApiDefs may - // use a snake_case convention instead of CamelCase. - string name = 1; - - // Set if this endpoint is deprecated. If set to true, a message suggesting - // to use a non-deprecated endpoint instead will be printed. If all - // endpoints are deprecated, set deprecation_message in ApiDef instead. - bool deprecated = 3; - - // Major version when an endpoint will be deleted. For e.g. set this - // value to 2 if endpoint should be removed in TensorFlow 2.0 and - // deprecated in versions before that. - int32 deprecation_version = 4; - } - repeated Endpoint endpoint = 3; - - message Arg { - string name = 1; - - // Change the name used to access this arg in the API from what - // is used in the GraphDef. Note that these names in `backticks` - // will also be replaced in the summary & description fields. - string rename_to = 2; - - // Note: this will replace any inherited arg doc. There is no - // current way of modifying arg descriptions (other than replacing - // them entirely) as can be done with op descriptions. - string description = 3; - } - repeated Arg in_arg = 4; - repeated Arg out_arg = 5; - // List of original in_arg names to specify new argument order. - // Length of arg_order should be either empty to keep current order - // or match size of in_arg. - repeated string arg_order = 11; - - // Description of the graph-construction-time configuration of this - // Op. That is to say, this describes the attr fields that will - // be specified in the NodeDef. - message Attr { - string name = 1; - - // Change the name used to access this attr in the API from what - // is used in the GraphDef. Note that these names in `backticks` - // will also be replaced in the summary & description fields. - string rename_to = 2; - - // Specify a new default value to use for this attr. This default - // will be used when creating new graphs, as opposed to the - // default in the OpDef, which will be used when interpreting old - // GraphDefs. - AttrValue default_value = 3; - - // Note: this will replace any inherited attr doc, there is no current - // way of modifying attr descriptions as can be done with op descriptions. - string description = 4; - } - repeated Attr attr = 6; - - // One-line human-readable description of what the Op does. - string summary = 7; - - // Additional, longer human-readable description of what the Op does. - string description = 8; - - // Modify an existing/inherited description by adding text to the beginning - // or end. - string description_prefix = 9; - string description_suffix = 10; -} - -message ApiDefs { - repeated ApiDef op = 1; -} diff --git a/navi/navi/proto/tensorflow/core/framework/attr_value.docx b/navi/navi/proto/tensorflow/core/framework/attr_value.docx new file mode 100644 index 000000000..60d8bb433 Binary files /dev/null and b/navi/navi/proto/tensorflow/core/framework/attr_value.docx differ diff --git a/navi/navi/proto/tensorflow/core/framework/attr_value.proto b/navi/navi/proto/tensorflow/core/framework/attr_value.proto deleted file mode 100644 index 2e913130d..000000000 --- a/navi/navi/proto/tensorflow/core/framework/attr_value.proto +++ /dev/null @@ -1,64 +0,0 @@ -syntax = "proto3"; - -package tensorflow; - -import "tensorflow/core/framework/tensor.proto"; -import "tensorflow/core/framework/tensor_shape.proto"; -import "tensorflow/core/framework/types.proto"; - -option cc_enable_arenas = true; -option java_outer_classname = "AttrValueProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/attr_value_go_proto"; - -// Protocol buffer representing the value for an attr used to configure an Op. -// Comment indicates the corresponding attr type. Only the field matching the -// attr type may be filled. -message AttrValue { - // LINT.IfChange - message ListValue { - repeated bytes s = 2; // "list(string)" - repeated int64 i = 3 [packed = true]; // "list(int)" - repeated float f = 4 [packed = true]; // "list(float)" - repeated bool b = 5 [packed = true]; // "list(bool)" - repeated DataType type = 6 [packed = true]; // "list(type)" - repeated TensorShapeProto shape = 7; // "list(shape)" - repeated TensorProto tensor = 8; // "list(tensor)" - repeated NameAttrList func = 9; // "list(attr)" - } - // LINT.ThenChange(https://www.tensorflow.org/code/tensorflow/c/c_api.cc) - - oneof value { - bytes s = 2; // "string" - int64 i = 3; // "int" - float f = 4; // "float" - bool b = 5; // "bool" - DataType type = 6; // "type" - TensorShapeProto shape = 7; // "shape" - TensorProto tensor = 8; // "tensor" - ListValue list = 1; // any "list(...)" - - // "func" represents a function. func.name is a function's name or - // a primitive op's name. func.attr.first is the name of an attr - // defined for that function. func.attr.second is the value for - // that attr in the instantiation. - NameAttrList func = 10; - - // This is a placeholder only used in nodes defined inside a - // function. It indicates the attr value will be supplied when - // the function is instantiated. For example, let us suppose a - // node "N" in function "FN". "N" has an attr "A" with value - // placeholder = "foo". When FN is instantiated with attr "foo" - // set to "bar", the instantiated node N's attr A will have been - // given the value "bar". - string placeholder = 9; - } -} - -// A list of attr names and their values. The whole list is attached -// with a string name. E.g., MatMul[T=float]. -message NameAttrList { - string name = 1; - map attr = 2; -} diff --git a/navi/navi/proto/tensorflow/core/framework/cost_graph.docx b/navi/navi/proto/tensorflow/core/framework/cost_graph.docx new file mode 100644 index 000000000..e88f05577 Binary files /dev/null and b/navi/navi/proto/tensorflow/core/framework/cost_graph.docx differ diff --git a/navi/navi/proto/tensorflow/core/framework/cost_graph.proto b/navi/navi/proto/tensorflow/core/framework/cost_graph.proto deleted file mode 100644 index 42c9e23cf..000000000 --- a/navi/navi/proto/tensorflow/core/framework/cost_graph.proto +++ /dev/null @@ -1,89 +0,0 @@ -syntax = "proto3"; - -package tensorflow; - -import "tensorflow/core/framework/tensor_shape.proto"; -import "tensorflow/core/framework/types.proto"; - -option cc_enable_arenas = true; -option java_outer_classname = "CostGraphProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/cost_graph_go_proto"; - -message CostGraphDef { - message Node { - // The name of the node. Names are globally unique. - string name = 1; - - // The device of the node. Can be empty if the node is mapped to the - // default partition or partitioning hasn't been run yet. - string device = 2; - - // The id of the node. Node ids are only unique inside a partition. - int32 id = 3; - - // Inputs of this node. They must be executed before this node can be - // executed. An input is a particular output of another node, specified - // by the node id and the output index. - message InputInfo { - int32 preceding_node = 1; - int32 preceding_port = 2; - } - repeated InputInfo input_info = 4; - - // Outputs of this node. - message OutputInfo { - int64 size = 1; - // If >= 0, the output is an alias of an input. Note that an alias input - // may itself be an alias. The algorithm will therefore need to follow - // those pointers. - int64 alias_input_port = 2; - TensorShapeProto shape = 3; - DataType dtype = 4; - } - repeated OutputInfo output_info = 5; - - // Temporary memory used by this node. - int64 temporary_memory_size = 6; - - // Persistent memory used by this node. - int64 persistent_memory_size = 12; - - int64 host_temp_memory_size = 10 [deprecated = true]; - int64 device_temp_memory_size = 11 [deprecated = true]; - int64 device_persistent_memory_size = 16 [deprecated = true]; - - // Estimate of the computational cost of this node, in microseconds. - int64 compute_cost = 9; - - // Analytical estimate of the computational cost of this node, in - // microseconds. - int64 compute_time = 14; - - // Analytical estimate of the memory access cost of this node, in - // microseconds. - int64 memory_time = 15; - - // If true, the output is permanent: it can't be discarded, because this - // node is part of the "final output". Nodes may depend on final nodes. - bool is_final = 7; - - // Ids of the control inputs for this node. - repeated int32 control_input = 8; - - // Are the costs inaccurate? - bool inaccurate = 17; - } - repeated Node node = 1; - - // Total cost of this graph, typically used for balancing decisions. - message AggregatedCost { - // Aggregated cost value. - float cost = 1; - - // Aggregated cost dimension (e.g. 'memory', 'compute', 'network'). - string dimension = 2; - } - repeated AggregatedCost cost = 2; -} diff --git a/navi/navi/proto/tensorflow/core/framework/dataset_metadata.docx b/navi/navi/proto/tensorflow/core/framework/dataset_metadata.docx new file mode 100644 index 000000000..6b44c59bf Binary files /dev/null and b/navi/navi/proto/tensorflow/core/framework/dataset_metadata.docx differ diff --git a/navi/navi/proto/tensorflow/core/framework/dataset_metadata.proto b/navi/navi/proto/tensorflow/core/framework/dataset_metadata.proto deleted file mode 100644 index 0e667dd48..000000000 --- a/navi/navi/proto/tensorflow/core/framework/dataset_metadata.proto +++ /dev/null @@ -1,10 +0,0 @@ -syntax = "proto3"; - -package tensorflow.data; - -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/dataset_metadata_go_proto"; - -// next: 2 -message Metadata { - bytes name = 1; -} diff --git a/navi/navi/proto/tensorflow/core/framework/dataset_options.docx b/navi/navi/proto/tensorflow/core/framework/dataset_options.docx new file mode 100644 index 000000000..d3c18af56 Binary files /dev/null and b/navi/navi/proto/tensorflow/core/framework/dataset_options.docx differ diff --git a/navi/navi/proto/tensorflow/core/framework/dataset_options.proto b/navi/navi/proto/tensorflow/core/framework/dataset_options.proto deleted file mode 100644 index 3919d51c1..000000000 --- a/navi/navi/proto/tensorflow/core/framework/dataset_options.proto +++ /dev/null @@ -1,196 +0,0 @@ -syntax = "proto3"; - -package tensorflow.data; - -import "tensorflow/core/framework/model.proto"; - -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/dataset_options_go_proto"; - -// Represents the type of auto-sharding we enable. -enum AutoShardPolicy { - // AUTO: Attempts FILE-based sharding, falling back to DATA-based sharding. - AUTO = 0; - // FILE: Shards by input files (i.e. each worker will get a set of files to - // process). When this option is selected, make sure that there is at least as - // many files as workers. If there are fewer input files than workers, a - // runtime error will be raised. - FILE = 1; - // DATA: Shards by elements produced by the dataset. Each worker will process - // the whole dataset and discard the portion that is not for itself. Note that - // for this mode to correctly partitions the dataset elements, the dataset - // needs to produce elements in a deterministic order. - DATA = 2; - // HINT: Looks for the presence of `shard(SHARD_HINT, ...)` which is treated - // as a placeholder to replace with `shard(num_workers, worker_index)`. - HINT = 3; - // OFF: No sharding will be performed. - OFF = -1; -} - -// next: 5 -message AutotuneOptions { - // Whether to automatically tune performance knobs. - oneof optional_enabled { - bool enabled = 1; - } - // When autotuning is enabled (through autotune), determines the CPU budget to - // use. Values greater than the number of schedulable CPU cores are allowed - // but may result in CPU contention. - oneof optional_cpu_budget { - int32 cpu_budget = 2; - } - // When autotuning is enabled (through autotune), determines the RAM budget to - // use. Values greater than the available RAM in bytes may result in OOM. If - // 0, defaults to half of the available RAM in bytes. - oneof optional_ram_budget { - int64 ram_budget = 3; - } - - // When autotuning is enabled (through autotune), determines the algorithm to - // use. If not explicitly set by user, autotuning will follow HILL_CLIMB - // algorithm but has more flexibility to tune parameters more aggressively, - // in which case the behavior is implementation specific and may change over - // time. - oneof optional_autotune_algorithm { - model.AutotuneAlgorithm autotune_algorithm = 4; - } -} - -// next: 2 -message CardinalityOptions { - enum ComputeLevel { - CARDINALITY_COMPUTE_UNSPECIFIED = 0; - // Cardinality will only be computed if it can be determined in a cheap - // manner (ie. without reading from file sources). If the cardinality would - // be nontrivial to compute, Cardinality() will return UNKNOWN_CARDINALITY. - CARDINALITY_COMPUTE_LOW = 1; - // Moderate effort will be made to determine cardinality, such as reading - // index data from source files. If significant work is needed to compute - // cardinality (e.g. reading entire source file contents or executing user - // defined functions), Cardinality() will return UNKNOWN_CARDINALITY. - CARDINALITY_COMPUTE_MODERATE = 2; - } - ComputeLevel compute_level = 1; -} - -// next: 3 -message DistributeOptions { - AutoShardPolicy auto_shard_policy = 1; - // The number of devices attached to this input pipeline. - oneof optional_num_devices { - int32 num_devices = 2; - } -} - -// next: 18 -message OptimizationOptions { - // Whether to apply default graph optimizations. If False, only graph - // optimizations that have been explicitly enabled will be applied. - oneof optional_apply_default_optimizations { - bool apply_default_optimizations = 1; - } - reserved 2; - reserved 3; - reserved 4; - reserved 5; - // Whether to fuse filter transformations. - oneof optional_filter_fusion { - bool filter_fusion = 6; - } - // NOTE: field id 7 deleted in June 2021. - reserved 7; - // NOTE: field id 8 deleted in June 2021. - reserved 8; - // Whether to fuse map and batch transformations. - oneof optional_map_and_batch_fusion { - bool map_and_batch_fusion = 9; - } - // Whether to fuse map and filter transformations. - oneof optional_map_and_filter_fusion { - bool map_and_filter_fusion = 10; - } - // Whether to fuse map transformations. - oneof optional_map_fusion { - bool map_fusion = 11; - } - // Whether to parallelize stateless map transformations. - oneof optional_map_parallelization { - bool map_parallelization = 12; - } - - // NOTE: field id 13 deleted in June 2021. - reserved 13; - - // Whether to eliminate no-op transformations. - oneof optional_noop_elimination { - bool noop_elimination = 14; - } - // Whether to parallelize copying of batch elements. This optimization is - // highly experimental and can cause performance degradation (e.g. when the - // parallelization overhead exceeds the benefits of performing the data copies - // in parallel). You should only enable this optimization if a) your input - // pipeline is bottlenecked on batching and b) you have validated that this - // optimization improves performance. - oneof optional_parallel_batch { - bool parallel_batch = 15; - } - // Field id 16 was removed in 06/2021. - reserved 16; - // Whether to fuse shuffle and repeat transformations. - oneof optional_shuffle_and_repeat_fusion { - bool shuffle_and_repeat_fusion = 17; - } -} - -// next: 3 -message ThreadingOptions { - // If set, it overrides the maximum degree of intra-op parallelism. - oneof optional_max_intra_op_parallelism { - int32 max_intra_op_parallelism = 1; - } - // If set, the dataset will use a private threadpool of the given size. - oneof optional_private_threadpool_size { - int32 private_threadpool_size = 2; - } -} - -// Represents how to handle external state during serialization. -enum ExternalStatePolicy { - POLICY_WARN = 0; - POLICY_IGNORE = 1; - POLICY_FAIL = 2; -} - -// Message stored with Dataset objects to control how datasets are processed and -// optimized. -// -// next: 8 -message Options { - // Whether the outputs need to be produced in deterministic order. - oneof optional_deterministic { - bool deterministic = 1; - } - // The distribution strategy options associated with the dataset. - AutotuneOptions autotune_options = 7; - // The distribution strategy options associated with the dataset. - DistributeOptions distribute_options = 2; - // The optimization options associated with the dataset. - OptimizationOptions optimization_options = 3; - // Whether to introduce 'slack' in the last `prefetch` of the input pipeline, - // if it exists. This may reduce CPU contention with accelerator host-side - // activity at the start of a step. The slack frequency is determined by the - // number of devices attached to this input pipeline. - oneof optional_slack { - bool slack = 4; - } - // The threading options associated with the dataset. - ThreadingOptions threading_options = 5; - // This option can be used to override the default policy for how to handle - // external state when serializing a dataset or checkpointing its iterator. - // There are three settings available - IGNORE: External state is ignored - // without a warning; WARN: External state is ignored and a warning is logged; - // FAIL: External state results in an error. - oneof optional_external_state_policy { - ExternalStatePolicy external_state_policy = 6; - } -} diff --git a/navi/navi/proto/tensorflow/core/framework/device_attributes.docx b/navi/navi/proto/tensorflow/core/framework/device_attributes.docx new file mode 100644 index 000000000..373ebd3e4 Binary files /dev/null and b/navi/navi/proto/tensorflow/core/framework/device_attributes.docx differ diff --git a/navi/navi/proto/tensorflow/core/framework/device_attributes.proto b/navi/navi/proto/tensorflow/core/framework/device_attributes.proto deleted file mode 100644 index 5f568e255..000000000 --- a/navi/navi/proto/tensorflow/core/framework/device_attributes.proto +++ /dev/null @@ -1,58 +0,0 @@ -syntax = "proto3"; - -package tensorflow; - -option cc_enable_arenas = true; -option java_outer_classname = "DeviceAttributesProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/device_attributes_go_proto"; - -message InterconnectLink { - int32 device_id = 1; - string type = 2; - int32 strength = 3; -} - -message LocalLinks { - repeated InterconnectLink link = 1; -} - -message DeviceLocality { - // Optional bus locality of device. Default value of 0 means - // no specific locality. Specific localities are indexed from 1. - int32 bus_id = 1; - - // Optional NUMA locality of device. - int32 numa_node = 2; - - // Optional local interconnect links to other devices. - LocalLinks links = 3; -} - -message DeviceAttributes { - // Fully specified name of the device within a cluster. - string name = 1; - - // String representation of device_type. - string device_type = 2; - - // Memory capacity of device in bytes. - int64 memory_limit = 4; - - // Platform-specific data about device that may be useful - // for supporting efficient data transfers. - DeviceLocality locality = 5; - - // A device is assigned a global unique number each time it is - // initialized. "incarnation" should never be 0. - fixed64 incarnation = 6; - - // String representation of the physical device that this device maps to. - string physical_device_desc = 7; - - // A physical device ID for use in XLA DeviceAssignments, unique across - // clients in a multi-client setup. Set to -1 if unavailable, non-negative - // otherwise. - int64 xla_global_id = 8; -} diff --git a/navi/navi/proto/tensorflow/core/framework/full_type.docx b/navi/navi/proto/tensorflow/core/framework/full_type.docx new file mode 100644 index 000000000..a63fb58a3 Binary files /dev/null and b/navi/navi/proto/tensorflow/core/framework/full_type.docx differ diff --git a/navi/navi/proto/tensorflow/core/framework/full_type.proto b/navi/navi/proto/tensorflow/core/framework/full_type.proto deleted file mode 100644 index ddf05ec8f..000000000 --- a/navi/navi/proto/tensorflow/core/framework/full_type.proto +++ /dev/null @@ -1,276 +0,0 @@ -syntax = "proto3"; - -package tensorflow; - -option cc_enable_arenas = true; -option java_outer_classname = "FullTypeProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; -option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/full_type_go_proto"; - -// Experimental. Represents the complete type information of a TensorFlow value. -enum FullTypeId { - // The default represents an uninitialized values. - TFT_UNSET = 0; - - // Type symbols. Used to construct more complex type expressions like - // algebraic data types. - - // Type variables may serve as placeholder for any other type ID in type - // templates. - // - // Examples: - // TFT_DATASET[TFT_VAR["T"]] is a Dataset returning a type indicated by "T". - // TFT_TENSOR[TFT_VAR["T"]] is a Tensor of n element type indicated by "T". - // TFT_TENSOR[TFT_VAR["T"]], TFT_TENSOR[TFT_VAR["T"]] are two tensors of - // identical element types. - // TFT_TENSOR[TFT_VAR["P"]], TFT_TENSOR[TFT_VAR["Q"]] are two tensors of - // independent element types. - // - TFT_VAR = 1; - - // Wildcard type. Describes a parameter of unknown type. In TensorFlow, that - // can mean either a "Top" type (accepts any type), or a dynamically typed - // object whose type is unknown in context. - // Important: "unknown" does not necessarily mean undeterminable! - TFT_ANY = 2; - - // The algebraic product type. This is an algebraic type that may be used just - // for logical grouping. Not to confused with TFT_TUPLE which describes a - // concrete object of several elements. - // - // Example: - // TFT_DATASET[TFT_PRODUCT[TFT_TENSOR[TFT_INT32], TFT_TENSOR[TFT_FLOAT64]]] - // is a Dataset producing two tensors, an integer one and a float one. - // - TFT_PRODUCT = 3; - - // Represents a named field, with the name stored in the attribute. - // - // Parametrization: - // TFT_NAMED[]{} - // * is the type of the field - // * is the field name, as string (thpugh can theoretically be an int - // as well) - // - // Example: - // TFT_RECORD[ - // TFT_NAMED[TFT_TENSOR[TFT_INT32]]{'foo'}, - // TFT_NAMED[TFT_TENSOR[TFT_FLOAT32]]{'bar'}, - // ] - // is a structure with two fields, an int tensor "foo" and a float tensor - // "bar". - TFT_NAMED = 4; - - // Template definition. Expands the variables by repeating a template as - // arguments of container. - // - // Parametrization: - // TFT_FOR_EACH[,