diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/sql/user_video_tweet_fav_engagement_generation.docx b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/sql/user_video_tweet_fav_engagement_generation.docx
new file mode 100644
index 000000000..fc15d9491
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/sql/user_video_tweet_fav_engagement_generation.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/sql/user_video_tweet_fav_engagement_generation.sql b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/sql/user_video_tweet_fav_engagement_generation.sql
deleted file mode 100644
index 56b0f73a8..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/sql/user_video_tweet_fav_engagement_generation.sql
+++ /dev/null
@@ -1,69 +0,0 @@
-WITH
-  vars AS (
-    SELECT
-      TIMESTAMP("{START_TIME}") AS start_date,
-      TIMESTAMP("{END_TIME}") AS end_date,
-  ),
-
-  -- Get raw user-tweet interaction events from UUA (We will use fav engagements here)
-  raw_engagements AS (
-    SELECT
-      userIdentifier.userId AS userId,
-      eventMetadata.sourceTimestampMs AS tsMillis,
-      CASE
-          WHEN actionType IN ({CONTRIBUTING_ACTION_TYPES_STR}) THEN {CONTRIBUTING_ACTION_TWEET_ID_COLUMN}
-          WHEN actionType IN ({UNDO_ACTION_TYPES_STR}) THEN {UNDO_ACTION_TWEET_ID_COLUMN}
-      END AS tweetId,
-      CASE
-        WHEN actionType IN ({CONTRIBUTING_ACTION_TYPES_STR}) THEN 1
-        WHEN actionType IN ({UNDO_ACTION_TYPES_STR}) THEN -1
-      END AS doOrUndo
-    FROM `twttr-bql-unified-prod.unified_user_actions_engagements.streaming_unified_user_actions_engagements`, vars
-    WHERE (DATE(dateHour) >= DATE(vars.start_date) AND DATE(dateHour) <= DATE(vars.end_date))
-      AND eventMetadata.sourceTimestampMs >= UNIX_MILLIS(vars.start_date)
-      AND eventMetadata.sourceTimestampMs <= UNIX_MILLIS(vars.end_date)
-      AND (actionType IN ({CONTRIBUTING_ACTION_TYPES_STR})
-            OR actionType IN ({UNDO_ACTION_TYPES_STR}))
-  ),
-
-  -- Get video tweet ids
-  video_tweet_ids AS (
-      WITH vars AS (
-        SELECT
-          TIMESTAMP("{START_TIME}") AS start_date,
-          TIMESTAMP("{END_TIME}") AS end_date
-      ),
-
-      -- Get raw user-tweet interaction events from UUA
-      video_view_engagements AS (
-        SELECT item.tweetInfo.actionTweetId AS tweetId
-        FROM `twttr-bql-unified-prod.unified_user_actions_engagements.streaming_unified_user_actions_engagements`, vars
-        WHERE (DATE(dateHour) >= DATE(vars.start_date) AND DATE(dateHour) <= DATE(vars.end_date))
-          AND eventMetadata.sourceTimestampMs >= UNIX_MILLIS(start_date)
-          AND eventMetadata.sourceTimestampMs <= UNIX_MILLIS(end_date)
-          AND (actionType IN ("ClientTweetVideoPlayback50")
-                OR actionType IN ("ClientTweetVideoPlayback95"))
-      )
-
-      SELECT DISTINCT(tweetId)
-      FROM video_view_engagements
-  ),
-
-  -- Join video tweet ids
-  video_tweets_engagements AS (
-      SELECT raw_engagements.*
-      FROM raw_engagements JOIN video_tweet_ids USING(tweetId)
-  ),
-
-  -- Group by userId and tweetId
-  user_tweet_engagement_pairs AS (
-    SELECT userId, tweetId, ARRAY_AGG(STRUCT(doOrUndo, tsMillis) ORDER BY tsMillis DESC LIMIT 1) AS details, COUNT(*) AS cnt
-    FROM video_tweets_engagements
-    GROUP BY userId, tweetId
-  )
-
--- Remove undo events
-SELECT userId, tweetId, CAST(dt.tsMillis  AS FLOAT64) AS tsMillis
-FROM user_tweet_engagement_pairs, vars
-CROSS JOIN UNNEST(details) AS dt
-WHERE dt.doOrUndo = 1
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/BUILD b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/BUILD
deleted file mode 100644
index 43135fdf9..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/BUILD
+++ /dev/null
@@ -1,110 +0,0 @@
-scala_library(
-    name = "bq_generation",
-    sources = [
-        "**/*.scala",
-    ],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "beam-internal/src/main/scala/com/twitter/beam/io/dal",
-        "beam-internal/src/main/scala/com/twitter/scio_internal/job",
-        "beam-internal/src/main/scala/com/twitter/scio_internal/runner/dataflow",
-        "src/scala/com/twitter/simclusters_v2/hdfs_sources:offline_tweet_recommendations_from_interested_in_20M_145K_2020-scala",
-        "src/scala/com/twitter/simclusters_v2/hdfs_sources:offline_tweet_recommendations_from_interested_in_20M_145K_2020_HL_0_EL_15-scala",
-        "src/scala/com/twitter/simclusters_v2/hdfs_sources:offline_tweet_recommendations_from_interested_in_20M_145K_2020_HL_2_EL_15-scala",
-        "src/scala/com/twitter/simclusters_v2/hdfs_sources:offline_tweet_recommendations_from_interested_in_20M_145K_2020_HL_2_EL_50-scala",
-        "src/scala/com/twitter/simclusters_v2/hdfs_sources:offline_tweet_recommendations_from_interested_in_20M_145K_2020_HL_8_EL_50-scala",
-        "src/scala/com/twitter/simclusters_v2/hdfs_sources:offline_tweet_recommendations_from_mts_consumer_embeddings-scala",
-        "src/scala/com/twitter/simclusters_v2/scio/bq_generation/common",
-        "src/scala/com/twitter/simclusters_v2/scio/bq_generation/sql",
-        "src/scala/com/twitter/wtf/beam/bq_embedding_export:bq_embedding_export_lib",
-        "tcdc/bq_blaster/src/main/scala/com/twitter/tcdc/bqblaster/beam",
-    ],
-)
-
-jvm_binary(
-    name = "iikf-tweets-ann-adhoc-job",
-    main = "com.twitter.simclusters_v2.scio.bq_generation.tweets_ann.IIKF2020TweetsANNBQAdhocJob",
-    platform = "java8",
-    dependencies = [
-        ":bq_generation",
-    ],
-)
-
-jvm_binary(
-    name = "iikf-hl-8-el-50-tweets-ann-adhoc-job",
-    main = "com.twitter.simclusters_v2.scio.bq_generation.tweets_ann.IIKF2020Hl8El50TweetsANNBQAdhocJob",
-    platform = "java8",
-    dependencies = [
-        ":bq_generation",
-    ],
-)
-
-jvm_binary(
-    name = "iikf-tweets-ann-batch-job",
-    main = "com.twitter.simclusters_v2.scio.bq_generation.tweets_ann.IIKF2020TweetsANNBQBatchJob",
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        ":bq_generation",
-    ],
-)
-
-jvm_binary(
-    name = "iikf-hl-0-el-15-tweets-ann-batch-job",
-    main = "com.twitter.simclusters_v2.scio.bq_generation.tweets_ann.IIKF2020Hl0El15TweetsANNBQBatchJob",
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        ":bq_generation",
-    ],
-)
-
-jvm_binary(
-    name = "iikf-hl-2-el-15-tweets-ann-batch-job",
-    main = "com.twitter.simclusters_v2.scio.bq_generation.tweets_ann.IIKF2020Hl2El15TweetsANNBQBatchJob",
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        ":bq_generation",
-    ],
-)
-
-jvm_binary(
-    name = "iikf-hl-2-el-50-tweets-ann-batch-job",
-    main = "com.twitter.simclusters_v2.scio.bq_generation.tweets_ann.IIKF2020Hl2El50TweetsANNBQBatchJob",
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        ":bq_generation",
-    ],
-)
-
-jvm_binary(
-    name = "iikf-hl-8-el-50-tweets-ann-batch-job",
-    main = "com.twitter.simclusters_v2.scio.bq_generation.tweets_ann.IIKF2020Hl8El50TweetsANNBQBatchJob",
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        ":bq_generation",
-    ],
-)
-
-jvm_binary(
-    name = "mts-consumer-embeddings-tweets-ann-adhoc-job",
-    main = "com.twitter.simclusters_v2.scio.bq_generation.tweets_ann.MTSConsumerEmbeddingsTweetsANNBQAdhocJob",
-    platform = "java8",
-    dependencies = [
-        ":bq_generation",
-    ],
-)
-
-jvm_binary(
-    name = "mts-consumer-embeddings-tweets-ann-batch-job",
-    main = "com.twitter.simclusters_v2.scio.bq_generation.tweets_ann.MTSConsumerEmbeddingsTweetsANNBQBatchJob",
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        ":bq_generation",
-    ],
-)
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/BUILD.docx b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/BUILD.docx
new file mode 100644
index 000000000..6592db693
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/BUILD.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/Config.docx b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/Config.docx
new file mode 100644
index 000000000..083f5a2c4
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/Config.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/Config.scala b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/Config.scala
deleted file mode 100644
index 9046768bb..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/Config.scala
+++ /dev/null
@@ -1,33 +0,0 @@
-package com.twitter.simclusters_v2.scio.bq_generation.tweets_ann
-
-object Config {
-  /*
-   * Common root path
-   */
-  val RootMHPath: String = "manhattan_sequence_files/offline_sann/"
-  val RootThriftPath: String = "processed/offline_sann/"
-  val AdhocRootPath = "adhoc/offline_sann/"
-
-  /*
-   * Variables for MH output path
-   */
-  val IIKFANNOutputPath: String = "tweets_ann/iikf"
-  val IIKFHL0EL15ANNOutputPath: String = "tweets_ann/iikf_hl_0_el_15"
-  val IIKFHL2EL15ANNOutputPath: String = "tweets_ann/iikf_hl_2_el_15"
-  val IIKFHL2EL50ANNOutputPath: String = "tweets_ann/iikf_hl_2_el_50"
-  val IIKFHL8EL50ANNOutputPath: String = "tweets_ann/iikf_hl_8_el_50"
-  val MTSConsumerEmbeddingsANNOutputPath: String = "tweets_ann/mts_consumer_embeddings"
-
-  /*
-   * Variables for tweet embeddings generation
-   */
-  val SimClustersTweetEmbeddingsGenerationHalfLife: Int = 28800000 // 8hrs in ms
-  val SimClustersTweetEmbeddingsGenerationEmbeddingLength: Int = 15
-
-  /*
-   * Variables for ANN
-   */
-  val SimClustersANNTopNClustersPerSourceEmbedding: Int = 20
-  val SimClustersANNTopMTweetsPerCluster: Int = 50
-  val SimClustersANNTopKTweetsPerUserRequest: Int = 200
-}
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/README b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/README
deleted file mode 100644
index 7947963af..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/README
+++ /dev/null
@@ -1,95 +0,0 @@
-To run iikf-tweets-ann-adhoc-job (adhoc):
-bin/d6w create \
-  ${GCP_PROJECT_NAME}/us-central1/iikf-tweets-ann-adhoc-job \
-  src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-tweets-ann-adhoc-job.d6w \
-  --jar dist/iikf-tweets-ann-adhoc-job.jar \
-  --bind=profile.project=${GCP_PROJECT_NAME} \
-  --bind=profile.user_name=your_ldap \
-  --bind=profile.date="2022-03-28" \
-  --bind=profile.machine="n2-highmem-4" \
-  --bind=profile.job_name="iikf-tweets-ann-adhoc-job" --ignore-existing
-
-To run iikf-hl-8-el-50-tweets-ann-adhoc-job (adhoc):
-bin/d6w create \
-  ${GCP_PROJECT_NAME}/us-central1/iikf-hl-8-el-50-tweets-ann-adhoc-job \
-  src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-8-el-50-tweets-ann-adhoc-job.d6w \
-  --jar dist/iikf-hl-8-el-50-tweets-ann-adhoc-job.jar \
-  --bind=profile.project=${GCP_PROJECT_NAME} \
-  --bind=profile.user_name=your_ldap \
-  --bind=profile.date="2022-03-28" \
-  --bind=profile.machine="n2-highmem-4" \
-  --bind=profile.job_name="iikf-hl-8-el-50-tweets-ann-adhoc-job" --ignore-existing
-
-To run mts-consumer-embeddings-tweets-ann-adhoc-job (adhoc)
-bin/d6w create \
-  ${GCP_PROJECT_NAME}/us-central1/mts-consumer-embeddings-tweets-ann-adhoc-job \
-  src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/mts-consumer-embeddings-tweets-ann-adhoc-job.d6w \
-  --jar dist/mts-consumer-embeddings-tweets-ann-adhoc-job.jar \
-  --bind=profile.project=${GCP_PROJECT_NAME} \
-  --bind=profile.user_name=your_ldap \
-  --bind=profile.date="2022-03-28" \
-  --bind=profile.machine="n2-highmem-4" \
-  --bind=profile.job_name="mts-consumer-embeddings-tweets-ann-adhoc-job" --ignore-existing
-
-
-To schedule iikf-tweets-ann-batch-job (batch)
-bin/d6w schedule \
-  ${GCP_PROJECT_NAME}/us-central1/iikf-tweets-ann-batch-job \
-  src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-tweets-ann-batch-job.d6w \
-  --bind=profile.project=${GCP_PROJECT_NAME} \
-  --bind=profile.user_name=cassowary \
-  --bind=profile.date="2022-03-26" \
-  --bind=profile.machine="n2-highmem-4" \
-  --bind=profile.job_name="iikf-tweets-ann-batch-job"
-
-To schedule iikf-hl-0-el-15-tweets-ann-batch-job (batch)
-bin/d6w schedule \
-  ${GCP_PROJECT_NAME}/us-central1/iikf-hl-0-el-15-tweets-ann-batch-job \
-  src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-0-el-15-tweets-ann-batch-job.d6w \
-  --bind=profile.project=${GCP_PROJECT_NAME} \
-  --bind=profile.user_name=cassowary \
-  --bind=profile.date="2022-03-26" \
-  --bind=profile.machine="n2-highmem-4" \
-  --bind=profile.job_name="iikf-hl-0-el-15-tweets-ann-batch-job"
-
-To schedule iikf-hl-2-el-15-tweets-ann-batch-job (batch)
-bin/d6w schedule \
-  ${GCP_PROJECT_NAME}/us-central1/iikf-hl-2-el-15-tweets-ann-batch-job \
-  src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-2-el-15-tweets-ann-batch-job.d6w \
-  --bind=profile.project=${GCP_PROJECT_NAME} \
-  --bind=profile.user_name=cassowary \
-  --bind=profile.date="2022-03-26" \
-  --bind=profile.machine="n2-highmem-4" \
-  --bind=profile.job_name="iikf-hl-2-el-15-tweets-ann-batch-job"
-
-To schedule iikf-hl-2-el-50-tweets-ann-batch-job (batch)
-bin/d6w schedule \
-  ${GCP_PROJECT_NAME}/us-central1/iikf-hl-2-el-50-tweets-ann-batch-job \
-  src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-2-el-50-tweets-ann-batch-job.d6w \
-  --bind=profile.project=${GCP_PROJECT_NAME} \
-  --bind=profile.user_name=cassowary \
-  --bind=profile.date="2022-03-26" \
-  --bind=profile.machine="n2-highmem-4" \
-  --bind=profile.job_name="iikf-hl-2-el-50-tweets-ann-batch-job"
-
-To schedule iikf-hl-8-el-50-tweets-ann-batch-job (batch)
-bin/d6w schedule \
-  ${GCP_PROJECT_NAME}/us-central1/iikf-hl-8-el-50-tweets-ann-batch-job \
-  src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-8-el-50-tweets-ann-batch-job.d6w \
-  --bind=profile.project=${GCP_PROJECT_NAME} \
-  --bind=profile.user_name=cassowary \
-  --bind=profile.date="2022-03-26" \
-  --bind=profile.machine="n2-highmem-4" \
-  --bind=profile.job_name="iikf-hl-8-el-50-tweets-ann-batch-job"
-
-To schedule mts-consumer-embeddings-tweets-ann-batch-job(batch)
-bin/d6w schedule \
-  ${GCP_PROJECT_NAME}/us-central1/mts-consumer-embeddings-tweets-ann-batch-job \
-  src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/mts-consumer-embeddings-tweets-ann-batch-job.d6w \
-  --bind=profile.project=${GCP_PROJECT_NAME} \
-  --bind=profile.user_name=cassowary \
-  --bind=profile.date="2022-03-26" \
-  --bind=profile.machine="n2-highmem-4" \
-  --bind=profile.job_name="mts-consumer-embeddings-tweets-ann-batch-job"
-
-
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/README.docx b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/README.docx
new file mode 100644
index 000000000..67919c3be
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/README.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/TweetsANNFromBQ.docx b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/TweetsANNFromBQ.docx
new file mode 100644
index 000000000..002b8808e
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/TweetsANNFromBQ.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/TweetsANNFromBQ.scala b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/TweetsANNFromBQ.scala
deleted file mode 100644
index 23663ab9a..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/TweetsANNFromBQ.scala
+++ /dev/null
@@ -1,120 +0,0 @@
-package com.twitter.simclusters_v2.scio.bq_generation
-package tweets_ann
-
-import com.spotify.scio.ScioContext
-import com.spotify.scio.values.SCollection
-import com.twitter.simclusters_v2.thriftscala.CandidateTweet
-import com.twitter.wtf.beam.bq_embedding_export.BQQueryUtils
-import org.apache.avro.generic.GenericData
-import org.apache.avro.generic.GenericRecord
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO
-import org.apache.beam.sdk.io.gcp.bigquery.SchemaAndRecord
-import org.apache.beam.sdk.transforms.SerializableFunction
-import org.joda.time.DateTime
-import scala.collection.mutable.ListBuffer
-
-object TweetsANNFromBQ {
-  // Default ANN config variables
-  val topNClustersPerSourceEmbedding = Config.SimClustersANNTopNClustersPerSourceEmbedding
-  val topMTweetsPerCluster = Config.SimClustersANNTopMTweetsPerCluster
-  val topKTweetsPerUserRequest = Config.SimClustersANNTopKTweetsPerUserRequest
-
-  // SQL file paths
-  val tweetsANNSQLPath =
-    s"/com/twitter/simclusters_v2/scio/bq_generation/sql/tweets_ann.sql"
-  val tweetsEmbeddingGenerationSQLPath =
-    s"/com/twitter/simclusters_v2/scio/bq_generation/sql/tweet_embeddings_generation.sql"
-
-  // Function that parses the GenericRecord results we read from BQ
-  val parseUserToTweetRecommendationsFunc =
-    new SerializableFunction[SchemaAndRecord, UserToTweetRecommendations] {
-      override def apply(record: SchemaAndRecord): UserToTweetRecommendations = {
-        val genericRecord: GenericRecord = record.getRecord()
-        UserToTweetRecommendations(
-          userId = genericRecord.get("userId").toString.toLong,
-          tweetCandidates = parseTweetIdColumn(genericRecord, "tweets"),
-        )
-      }
-    }
-
-  // Parse tweetId candidates column
-  def parseTweetIdColumn(
-    genericRecord: GenericRecord,
-    columnName: String
-  ): List[CandidateTweet] = {
-    val tweetIds: GenericData.Array[GenericRecord] =
-      genericRecord.get(columnName).asInstanceOf[GenericData.Array[GenericRecord]]
-    val results: ListBuffer[CandidateTweet] = new ListBuffer[CandidateTweet]()
-    tweetIds.forEach((sc: GenericRecord) => {
-      results += CandidateTweet(
-        tweetId = sc.get("tweetId").toString.toLong,
-        score = Some(sc.get("logCosineSimilarityScore").toString.toDouble)
-      )
-    })
-    results.toList
-  }
-
-  def getTweetEmbeddingsSQL(
-    queryDate: DateTime,
-    consumerEmbeddingsSQL: String,
-    tweetEmbeddingsSQLPath: String,
-    tweetEmbeddingsHalfLife: Int,
-    tweetEmbeddingsLength: Int
-  ): String = {
-    // We read one day of fav events to construct our tweet embeddings
-    val templateVariables =
-      Map(
-        "CONSUMER_EMBEDDINGS_SQL" -> consumerEmbeddingsSQL,
-        "QUERY_DATE" -> queryDate.toString(),
-        "START_TIME" -> queryDate.minusDays(1).toString(),
-        "END_TIME" -> queryDate.toString(),
-        "MIN_SCORE_THRESHOLD" -> 0.0.toString,
-        "HALF_LIFE" -> tweetEmbeddingsHalfLife.toString,
-        "TWEET_EMBEDDING_LENGTH" -> tweetEmbeddingsLength.toString,
-        "NO_OLDER_TWEETS_THAN_DATE" -> queryDate.minusDays(1).toString(),
-      )
-    BQQueryUtils.getBQQueryFromSqlFile(tweetEmbeddingsSQLPath, templateVariables)
-  }
-
-  def getTweetRecommendationsBQ(
-    sc: ScioContext,
-    queryTimestamp: DateTime,
-    consumerEmbeddingsSQL: String,
-    tweetEmbeddingsHalfLife: Int,
-    tweetEmbeddingsLength: Int
-  ): SCollection[UserToTweetRecommendations] = {
-    // Get the tweet embeddings SQL string based on the provided consumerEmbeddingsSQL
-    val tweetEmbeddingsSQL =
-      getTweetEmbeddingsSQL(
-        queryTimestamp,
-        consumerEmbeddingsSQL,
-        tweetsEmbeddingGenerationSQLPath,
-        tweetEmbeddingsHalfLife,
-        tweetEmbeddingsLength
-      )
-
-    // Define template variables which we would like to be replaced in the corresponding sql file
-    val templateVariables =
-      Map(
-        "CONSUMER_EMBEDDINGS_SQL" -> consumerEmbeddingsSQL,
-        "TWEET_EMBEDDINGS_SQL" -> tweetEmbeddingsSQL,
-        "TOP_N_CLUSTER_PER_SOURCE_EMBEDDING" -> topNClustersPerSourceEmbedding.toString,
-        "TOP_M_TWEETS_PER_CLUSTER" -> topMTweetsPerCluster.toString,
-        "TOP_K_TWEETS_PER_USER_REQUEST" -> topKTweetsPerUserRequest.toString
-      )
-    val query = BQQueryUtils.getBQQueryFromSqlFile(tweetsANNSQLPath, templateVariables)
-
-    // Run SimClusters ANN on BQ and parse the results
-    sc.customInput(
-      s"SimClusters BQ ANN",
-      BigQueryIO
-        .read(parseUserToTweetRecommendationsFunc)
-        .fromQuery(query)
-        .usingStandardSql()
-    )
-  }
-
-  case class UserToTweetRecommendations(
-    userId: Long,
-    tweetCandidates: List[CandidateTweet])
-}
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/TweetsANNJob.docx b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/TweetsANNJob.docx
new file mode 100644
index 000000000..39d74cceb
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/TweetsANNJob.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/TweetsANNJob.scala b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/TweetsANNJob.scala
deleted file mode 100644
index 81a89f3ff..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/TweetsANNJob.scala
+++ /dev/null
@@ -1,297 +0,0 @@
-package com.twitter.simclusters_v2.scio.bq_generation
-package tweets_ann
-
-import com.google.api.services.bigquery.model.TimePartitioning
-import com.spotify.scio.ScioContext
-import com.spotify.scio.coders.Coder
-import com.twitter.beam.io.dal.DAL
-import com.twitter.beam.io.fs.multiformat.PathLayout
-import com.twitter.beam.job.DateRangeOptions
-import com.twitter.conversions.DurationOps.richDurationFromInt
-import com.twitter.dal.client.dataset.KeyValDALDataset
-import com.twitter.scalding_internal.multiformat.format.keyval.KeyVal
-import com.twitter.scio_internal.coders.ThriftStructLazyBinaryScroogeCoder
-import com.twitter.scio_internal.job.ScioBeamJob
-import com.twitter.scrooge.ThriftStruct
-import com.twitter.simclusters_v2.scio.bq_generation.common.BQGenerationUtil.getMTSConsumerEmbeddingsFav90P20MSQL
-import com.twitter.simclusters_v2.scio.bq_generation.common.BQGenerationUtil.getInterestedIn2020SQL
-import com.twitter.simclusters_v2.scio.bq_generation.tweets_ann.TweetsANNFromBQ.getTweetRecommendationsBQ
-import com.twitter.simclusters_v2.hdfs_sources.OfflineTweetRecommendationsFromInterestedIn20M145K2020ScalaDataset
-import com.twitter.simclusters_v2.hdfs_sources.OfflineTweetRecommendationsFromInterestedIn20M145K2020Hl0El15ScalaDataset
-import com.twitter.simclusters_v2.hdfs_sources.OfflineTweetRecommendationsFromInterestedIn20M145K2020Hl2El15ScalaDataset
-import com.twitter.simclusters_v2.hdfs_sources.OfflineTweetRecommendationsFromInterestedIn20M145K2020Hl2El50ScalaDataset
-import com.twitter.simclusters_v2.hdfs_sources.OfflineTweetRecommendationsFromInterestedIn20M145K2020Hl8El50ScalaDataset
-import com.twitter.simclusters_v2.hdfs_sources.OfflineTweetRecommendationsFromMtsConsumerEmbeddingsScalaDataset
-import com.twitter.simclusters_v2.scio.bq_generation.common.BQTableDetails
-import com.twitter.simclusters_v2.thriftscala.CandidateTweets
-import com.twitter.simclusters_v2.thriftscala.CandidateTweetsList
-import com.twitter.tcdc.bqblaster.beam.syntax.BigQueryIOHelpers
-import com.twitter.tcdc.bqblaster.beam.BQBlasterIO.AvroConverter
-import com.twitter.tcdc.bqblaster.core.avro.TypedProjection
-import com.twitter.tcdc.bqblaster.core.transform.RootTransform
-import java.time.Instant
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO
-import org.joda.time.DateTime
-
-trait TweetsANNJob extends ScioBeamJob[DateRangeOptions] {
-  // Configs to set for different type of embeddings and jobs
-  val isAdhoc: Boolean
-  val getConsumerEmbeddingsSQLFunc: (DateTime, Int) => String
-  val outputTable: BQTableDetails
-  val keyValDatasetOutputPath: String
-  val tweetRecommentationsSnapshotDataset: KeyValDALDataset[KeyVal[Long, CandidateTweetsList]]
-  val tweetEmbeddingsGenerationHalfLife: Int = Config.SimClustersTweetEmbeddingsGenerationHalfLife
-  val tweetEmbeddingsGenerationEmbeddingLength: Int =
-    Config.SimClustersTweetEmbeddingsGenerationEmbeddingLength
-
-  // Base configs
-  val projectId = "twttr-recos-ml-prod"
-  val environment: DAL.Env = if (isAdhoc) DAL.Environment.Dev else DAL.Environment.Prod
-
-  override implicit def scroogeCoder[T <: ThriftStruct: Manifest]: Coder[T] =
-    ThriftStructLazyBinaryScroogeCoder.scroogeCoder
-
-  override def configurePipeline(sc: ScioContext, opts: DateRangeOptions): Unit = {
-    // The time when the job is scheduled
-    val queryTimestamp = opts.interval.getEnd
-
-    // Read consumer embeddings SQL
-    val consumerEmbeddingsSQL = getConsumerEmbeddingsSQLFunc(queryTimestamp, 14)
-
-    // Generate tweet embeddings and tweet ANN results
-    val tweetRecommendations =
-      getTweetRecommendationsBQ(
-        sc,
-        queryTimestamp,
-        consumerEmbeddingsSQL,
-        tweetEmbeddingsGenerationHalfLife,
-        tweetEmbeddingsGenerationEmbeddingLength
-      )
-
-    // Setup BQ writer
-    val ingestionTime = opts.getDate().value.getEnd.toDate
-    val bqFieldsTransform = RootTransform
-      .Builder()
-      .withPrependedFields("ingestionTime" -> TypedProjection.fromConstant(ingestionTime))
-    val timePartitioning = new TimePartitioning()
-      .setType("HOUR").setField("ingestionTime").setExpirationMs(3.days.inMilliseconds)
-    val bqWriter = BigQueryIO
-      .write[CandidateTweets]
-      .to(outputTable.toString)
-      .withExtendedErrorInfo()
-      .withTimePartitioning(timePartitioning)
-      .withLoadJobProjectId(projectId)
-      .withThriftSupport(bqFieldsTransform.build(), AvroConverter.Legacy)
-      .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED)
-      .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND)
-
-    // Save Tweet ANN results to BQ
-    tweetRecommendations
-      .map { userToTweetRecommendations =>
-        {
-          CandidateTweets(
-            targetUserId = userToTweetRecommendations.userId,
-            recommendedTweets = userToTweetRecommendations.tweetCandidates)
-        }
-      }
-      .saveAsCustomOutput(s"WriteToBQTable - ${outputTable}", bqWriter)
-
-    // Save Tweet ANN results as KeyValSnapshotDataset
-    tweetRecommendations
-      .map { userToTweetRecommendations =>
-        KeyVal(
-          userToTweetRecommendations.userId,
-          CandidateTweetsList(userToTweetRecommendations.tweetCandidates))
-      }.saveAsCustomOutput(
-        name = "WriteTweetRecommendationsToKeyValDataset",
-        DAL.writeVersionedKeyVal(
-          tweetRecommentationsSnapshotDataset,
-          PathLayout.VersionedPath(prefix =
-            ((if (!isAdhoc)
-                Config.RootMHPath
-              else
-                Config.AdhocRootPath)
-              + keyValDatasetOutputPath)),
-          instant = Instant.ofEpochMilli(opts.interval.getEndMillis - 1L),
-          environmentOverride = environment,
-        )
-      )
-  }
-
-}
-
-/**
- * Scio job for adhoc run for tweet recommendations from IIKF 2020
- */
-object IIKF2020TweetsANNBQAdhocJob extends TweetsANNJob {
-  override val isAdhoc = true
-  override val getConsumerEmbeddingsSQLFunc = getInterestedIn2020SQL
-  override val outputTable = BQTableDetails(
-    "twttr-recos-ml-prod",
-    "multi_type_simclusters",
-    "offline_tweet_recommendations_from_interested_in_20M_145K_2020_adhoc")
-  override val keyValDatasetOutputPath = Config.IIKFANNOutputPath
-  override val tweetRecommentationsSnapshotDataset: KeyValDALDataset[
-    KeyVal[Long, CandidateTweetsList]
-  ] =
-    OfflineTweetRecommendationsFromInterestedIn20M145K2020ScalaDataset
-}
-
-/**
- * Scio job for adhoc run for tweet recommendations from IIKF 2020 with
- * - Half life = 8hrs
- * - Embedding Length = 50
- */
-object IIKF2020Hl8El50TweetsANNBQAdhocJob extends TweetsANNJob {
-  override val isAdhoc = true
-  override val getConsumerEmbeddingsSQLFunc = getInterestedIn2020SQL
-  override val outputTable = BQTableDetails(
-    "twttr-recos-ml-prod",
-    "multi_type_simclusters",
-    "offline_tweet_recommendations_from_interested_in_20M_145K_2020_HL_8_EL_50_adhoc")
-  override val keyValDatasetOutputPath = Config.IIKFHL8EL50ANNOutputPath
-  override val tweetEmbeddingsGenerationEmbeddingLength: Int = 50
-  override val tweetRecommentationsSnapshotDataset: KeyValDALDataset[
-    KeyVal[Long, CandidateTweetsList]
-  ] = {
-    OfflineTweetRecommendationsFromInterestedIn20M145K2020Hl8El50ScalaDataset
-  }
-}
-
-/**
- * Scio job for adhoc run for tweet recommendations from MTS Consumer Embeddings
- */
-object MTSConsumerEmbeddingsTweetsANNBQAdhocJob extends TweetsANNJob {
-  override val isAdhoc = true
-  override val getConsumerEmbeddingsSQLFunc = getMTSConsumerEmbeddingsFav90P20MSQL
-  override val outputTable = BQTableDetails(
-    "twttr-recos-ml-prod",
-    "multi_type_simclusters",
-    "offline_tweet_recommendations_from_mts_consumer_embeddings_adhoc")
-  override val keyValDatasetOutputPath = Config.MTSConsumerEmbeddingsANNOutputPath
-  override val tweetRecommentationsSnapshotDataset: KeyValDALDataset[
-    KeyVal[Long, CandidateTweetsList]
-  ] =
-    OfflineTweetRecommendationsFromMtsConsumerEmbeddingsScalaDataset
-}
-
-/**
-Scio job for batch run for tweet recommendations from IIKF 2020
-The schedule cmd needs to be run only if there is any change in the config
- */
-object IIKF2020TweetsANNBQBatchJob extends TweetsANNJob {
-  override val isAdhoc = false
-  override val getConsumerEmbeddingsSQLFunc = getInterestedIn2020SQL
-  override val outputTable = BQTableDetails(
-    "twttr-bq-cassowary-prod",
-    "user",
-    "offline_tweet_recommendations_from_interested_in_20M_145K_2020")
-  override val keyValDatasetOutputPath = Config.IIKFANNOutputPath
-  override val tweetRecommentationsSnapshotDataset: KeyValDALDataset[
-    KeyVal[Long, CandidateTweetsList]
-  ] =
-    OfflineTweetRecommendationsFromInterestedIn20M145K2020ScalaDataset
-}
-
-/**
-Scio job for batch run for tweet recommendations from IIKF 2020 with parameter setup:
- - Half Life: None, no decay, direct sum
- - Embedding Length: 15
-The schedule cmd needs to be run only if there is any change in the config
- */
-object IIKF2020Hl0El15TweetsANNBQBatchJob extends TweetsANNJob {
-  override val isAdhoc = false
-  override val getConsumerEmbeddingsSQLFunc = getInterestedIn2020SQL
-  override val outputTable = BQTableDetails(
-    "twttr-bq-cassowary-prod",
-    "user",
-    "offline_tweet_recommendations_from_interested_in_20M_145K_2020_HL_0_EL_15")
-  override val keyValDatasetOutputPath = Config.IIKFHL0EL15ANNOutputPath
-  override val tweetEmbeddingsGenerationHalfLife: Int = -1
-  override val tweetRecommentationsSnapshotDataset: KeyValDALDataset[
-    KeyVal[Long, CandidateTweetsList]
-  ] =
-    OfflineTweetRecommendationsFromInterestedIn20M145K2020Hl0El15ScalaDataset
-}
-
-/**
-Scio job for batch run for tweet recommendations from IIKF 2020 with parameter setup:
- - Half Life: 2hrs
- - Embedding Length: 15
-The schedule cmd needs to be run only if there is any change in the config
- */
-object IIKF2020Hl2El15TweetsANNBQBatchJob extends TweetsANNJob {
-  override val isAdhoc = false
-  override val getConsumerEmbeddingsSQLFunc = getInterestedIn2020SQL
-  override val outputTable = BQTableDetails(
-    "twttr-bq-cassowary-prod",
-    "user",
-    "offline_tweet_recommendations_from_interested_in_20M_145K_2020_HL_2_EL_15")
-  override val keyValDatasetOutputPath = Config.IIKFHL2EL15ANNOutputPath
-  override val tweetEmbeddingsGenerationHalfLife: Int = 7200000 // 2hrs in ms
-  override val tweetRecommentationsSnapshotDataset: KeyValDALDataset[
-    KeyVal[Long, CandidateTweetsList]
-  ] =
-    OfflineTweetRecommendationsFromInterestedIn20M145K2020Hl2El15ScalaDataset
-}
-
-/**
-Scio job for batch run for tweet recommendations from IIKF 2020 with parameter setup:
- - Half Life: 2hrs
- - Embedding Length: 50
-The schedule cmd needs to be run only if there is any change in the config
- */
-object IIKF2020Hl2El50TweetsANNBQBatchJob extends TweetsANNJob {
-  override val isAdhoc = false
-  override val getConsumerEmbeddingsSQLFunc = getInterestedIn2020SQL
-  override val outputTable = BQTableDetails(
-    "twttr-bq-cassowary-prod",
-    "user",
-    "offline_tweet_recommendations_from_interested_in_20M_145K_2020_HL_2_EL_50")
-  override val keyValDatasetOutputPath = Config.IIKFHL2EL50ANNOutputPath
-  override val tweetEmbeddingsGenerationHalfLife: Int = 7200000 // 2hrs in ms
-  override val tweetEmbeddingsGenerationEmbeddingLength: Int = 50
-  override val tweetRecommentationsSnapshotDataset: KeyValDALDataset[
-    KeyVal[Long, CandidateTweetsList]
-  ] =
-    OfflineTweetRecommendationsFromInterestedIn20M145K2020Hl2El50ScalaDataset
-}
-
-/**
-Scio job for batch run for tweet recommendations from IIKF 2020 with parameter setup:
- - Half Life: 8hrs
- - Embedding Length: 50
-The schedule cmd needs to be run only if there is any change in the config
- */
-object IIKF2020Hl8El50TweetsANNBQBatchJob extends TweetsANNJob {
-  override val isAdhoc = false
-  override val getConsumerEmbeddingsSQLFunc = getInterestedIn2020SQL
-  override val outputTable = BQTableDetails(
-    "twttr-bq-cassowary-prod",
-    "user",
-    "offline_tweet_recommendations_from_interested_in_20M_145K_2020_HL_8_EL_50")
-  override val keyValDatasetOutputPath = Config.IIKFHL8EL50ANNOutputPath
-  override val tweetEmbeddingsGenerationEmbeddingLength: Int = 50
-  override val tweetRecommentationsSnapshotDataset: KeyValDALDataset[
-    KeyVal[Long, CandidateTweetsList]
-  ] =
-    OfflineTweetRecommendationsFromInterestedIn20M145K2020Hl8El50ScalaDataset
-}
-
-/**
-Scio job for batch run for tweet recommendations from MTS Consumer Embeddings
-The schedule cmd needs to be run only if there is any change in the config
- */
-object MTSConsumerEmbeddingsTweetsANNBQBatchJob extends TweetsANNJob {
-  override val isAdhoc = false
-  override val getConsumerEmbeddingsSQLFunc = getMTSConsumerEmbeddingsFav90P20MSQL
-  override val outputTable = BQTableDetails(
-    "twttr-bq-cassowary-prod",
-    "user",
-    "offline_tweet_recommendations_from_mts_consumer_embeddings")
-  override val keyValDatasetOutputPath = Config.MTSConsumerEmbeddingsANNOutputPath
-  override val tweetRecommentationsSnapshotDataset: KeyValDALDataset[
-    KeyVal[Long, CandidateTweetsList]
-  ] =
-    OfflineTweetRecommendationsFromMtsConsumerEmbeddingsScalaDataset
-}
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-0-el-15-tweets-ann-batch-job.d6w b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-0-el-15-tweets-ann-batch-job.d6w
deleted file mode 100644
index b86af2653..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-0-el-15-tweets-ann-batch-job.d6w
+++ /dev/null
@@ -1,39 +0,0 @@
-class Profile(Struct):
-  project = Required(String)
-  date = Required(String)
-  environment = Default(String, 'dev')
-  job_name = Default(String, 'iikf-hl-0-el-15-tweets-ann-batch-job')
-  machine = Default(String, 'n2-highmem-4')
-
-job = Job(
-   name='{{profile.job_name}}',
-   project='{{profile.project}}',
-   staging_bucket='{{profile.project}}',
-   service_account='{{profile.user_name}}-shdw@twttr-dp-svc-accounts.iam.gserviceaccount.com',
-   region='us-central1',
-   worker_config=WorkerConfig(
-       num_workers=2,
-       worker_machine_type='{{profile.machine}}',
-       worker_disk_type=WorkerDiskType('HDD'),
-   ),
-   extra_args={
-     "date": '{{profile.date}}'
-   },
-   service_identifier='twtr:svc:{{profile.user_name}}:{{profile.job_name}}:{{profile.environment}}:{{profile.cluster}}',
-   deployment_config=BatchDeploymentConfig(
-     role='{{profile.user_name}}',
-     environment='prod',
-     build_target='src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann:iikf-hl-0-el-15-tweets-ann-batch-job',
-     gcp_deployment_credentials='/var/lib/tss/keys/{{profile.user_name}}/cloud/gcp/dp/shadow.json',
-     statebird_config=StatebirdConfig(
-       batch_width='PT4H',
-       first_time='{{profile.date}}',
-     ),
-     workflow_config=WorkflowConfig(
-      play=True,
-     ),
-     timeout='PT24H'
-   )
-)
-
-jobs=[job]
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-0-el-15-tweets-ann-batch-job.docx b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-0-el-15-tweets-ann-batch-job.docx
new file mode 100644
index 000000000..39c6d2a75
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-0-el-15-tweets-ann-batch-job.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-2-el-15-tweets-ann-batch-job.d6w b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-2-el-15-tweets-ann-batch-job.d6w
deleted file mode 100644
index 55a9b5382..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-2-el-15-tweets-ann-batch-job.d6w
+++ /dev/null
@@ -1,39 +0,0 @@
-class Profile(Struct):
-  project = Required(String)
-  date = Required(String)
-  environment = Default(String, 'dev')
-  job_name = Default(String, 'iikf-hl-2-el-15-tweets-ann-batch-job')
-  machine = Default(String, 'n2-highmem-4')
-
-job = Job(
-   name='{{profile.job_name}}',
-   project='{{profile.project}}',
-   staging_bucket='{{profile.project}}',
-   service_account='{{profile.user_name}}-shdw@twttr-dp-svc-accounts.iam.gserviceaccount.com',
-   region='us-central1',
-   worker_config=WorkerConfig(
-       num_workers=2,
-       worker_machine_type='{{profile.machine}}',
-       worker_disk_type=WorkerDiskType('HDD'),
-   ),
-   extra_args={
-     "date": '{{profile.date}}'
-   },
-   service_identifier='twtr:svc:{{profile.user_name}}:{{profile.job_name}}:{{profile.environment}}:{{profile.cluster}}',
-   deployment_config=BatchDeploymentConfig(
-     role='{{profile.user_name}}',
-     environment='prod',
-     build_target='src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann:iikf-hl-2-el-15-tweets-ann-batch-job',
-     gcp_deployment_credentials='/var/lib/tss/keys/{{profile.user_name}}/cloud/gcp/dp/shadow.json',
-     statebird_config=StatebirdConfig(
-       batch_width='PT4H',
-       first_time='{{profile.date}}',
-     ),
-     workflow_config=WorkflowConfig(
-      play=True,
-     ),
-     timeout='PT24H'
-   )
-)
-
-jobs=[job]
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-2-el-15-tweets-ann-batch-job.docx b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-2-el-15-tweets-ann-batch-job.docx
new file mode 100644
index 000000000..79456d69b
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-2-el-15-tweets-ann-batch-job.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-2-el-50-tweets-ann-batch-job.d6w b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-2-el-50-tweets-ann-batch-job.d6w
deleted file mode 100644
index 6fdd1c2f2..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-2-el-50-tweets-ann-batch-job.d6w
+++ /dev/null
@@ -1,39 +0,0 @@
-class Profile(Struct):
-  project = Required(String)
-  date = Required(String)
-  environment = Default(String, 'dev')
-  job_name = Default(String, 'iikf-hl-2-el-50-tweets-ann-batch-job')
-  machine = Default(String, 'n2-highmem-4')
-
-job = Job(
-   name='{{profile.job_name}}',
-   project='{{profile.project}}',
-   staging_bucket='{{profile.project}}',
-   service_account='{{profile.user_name}}-shdw@twttr-dp-svc-accounts.iam.gserviceaccount.com',
-   region='us-central1',
-   worker_config=WorkerConfig(
-       num_workers=2,
-       worker_machine_type='{{profile.machine}}',
-       worker_disk_type=WorkerDiskType('HDD'),
-   ),
-   extra_args={
-     "date": '{{profile.date}}'
-   },
-   service_identifier='twtr:svc:{{profile.user_name}}:{{profile.job_name}}:{{profile.environment}}:{{profile.cluster}}',
-   deployment_config=BatchDeploymentConfig(
-     role='{{profile.user_name}}',
-     environment='prod',
-     build_target='src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann:iikf-hl-2-el-50-tweets-ann-batch-job',
-     gcp_deployment_credentials='/var/lib/tss/keys/{{profile.user_name}}/cloud/gcp/dp/shadow.json',
-     statebird_config=StatebirdConfig(
-       batch_width='PT4H',
-       first_time='{{profile.date}}',
-     ),
-     workflow_config=WorkflowConfig(
-      play=True,
-     ),
-     timeout='PT24H'
-   )
-)
-
-jobs=[job]
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-2-el-50-tweets-ann-batch-job.docx b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-2-el-50-tweets-ann-batch-job.docx
new file mode 100644
index 000000000..0be822f56
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-2-el-50-tweets-ann-batch-job.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-8-el-50-tweets-ann-adhoc-job.d6w b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-8-el-50-tweets-ann-adhoc-job.d6w
deleted file mode 100644
index beb0dbc93..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-8-el-50-tweets-ann-adhoc-job.d6w
+++ /dev/null
@@ -1,39 +0,0 @@
-class Profile(Struct):
-  project = Required(String)
-  date = Required(String)
-  environment = Default(String, 'dev')
-  job_name = Default(String, 'iikf-hl-8-el-50-tweets-ann-batch-job')
-  machine = Default(String, 'n2-highmem-4')
-
-job = Job(
-   name='{{profile.job_name}}',
-   project='{{profile.project}}',
-   staging_bucket='{{profile.project}}',
-   service_account='{{profile.user_name}}-shdw@twttr-dp-svc-accounts.iam.gserviceaccount.com',
-   region='us-central1',
-   worker_config=WorkerConfig(
-       num_workers=2,
-       worker_machine_type='{{profile.machine}}',
-       worker_disk_type=WorkerDiskType('HDD'),
-   ),
-   extra_args={
-     "date": '{{profile.date}}'
-   },
-   service_identifier='twtr:svc:{{profile.user_name}}:{{profile.job_name}}:{{profile.environment}}:{{profile.cluster}}',
-   deployment_config=BatchDeploymentConfig(
-     role='{{profile.user_name}}',
-     environment='prod',
-     build_target='src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann:iikf-hl-8-el-50-tweets-ann-batch-job',
-     gcp_deployment_credentials='/var/lib/tss/keys/{{profile.user_name}}/cloud/gcp/dp/shadow.json',
-     statebird_config=StatebirdConfig(
-       batch_width='PT4H',
-       first_time='{{profile.date}}',
-     ),
-     workflow_config=WorkflowConfig(
-      play=True,
-     ),
-     timeout='PT24H'
-   )
-)
-
-jobs=[job]
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-8-el-50-tweets-ann-adhoc-job.docx b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-8-el-50-tweets-ann-adhoc-job.docx
new file mode 100644
index 000000000..34da366e9
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-8-el-50-tweets-ann-adhoc-job.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-8-el-50-tweets-ann-batch-job.d6w b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-8-el-50-tweets-ann-batch-job.d6w
deleted file mode 100644
index beb0dbc93..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-8-el-50-tweets-ann-batch-job.d6w
+++ /dev/null
@@ -1,39 +0,0 @@
-class Profile(Struct):
-  project = Required(String)
-  date = Required(String)
-  environment = Default(String, 'dev')
-  job_name = Default(String, 'iikf-hl-8-el-50-tweets-ann-batch-job')
-  machine = Default(String, 'n2-highmem-4')
-
-job = Job(
-   name='{{profile.job_name}}',
-   project='{{profile.project}}',
-   staging_bucket='{{profile.project}}',
-   service_account='{{profile.user_name}}-shdw@twttr-dp-svc-accounts.iam.gserviceaccount.com',
-   region='us-central1',
-   worker_config=WorkerConfig(
-       num_workers=2,
-       worker_machine_type='{{profile.machine}}',
-       worker_disk_type=WorkerDiskType('HDD'),
-   ),
-   extra_args={
-     "date": '{{profile.date}}'
-   },
-   service_identifier='twtr:svc:{{profile.user_name}}:{{profile.job_name}}:{{profile.environment}}:{{profile.cluster}}',
-   deployment_config=BatchDeploymentConfig(
-     role='{{profile.user_name}}',
-     environment='prod',
-     build_target='src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann:iikf-hl-8-el-50-tweets-ann-batch-job',
-     gcp_deployment_credentials='/var/lib/tss/keys/{{profile.user_name}}/cloud/gcp/dp/shadow.json',
-     statebird_config=StatebirdConfig(
-       batch_width='PT4H',
-       first_time='{{profile.date}}',
-     ),
-     workflow_config=WorkflowConfig(
-      play=True,
-     ),
-     timeout='PT24H'
-   )
-)
-
-jobs=[job]
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-8-el-50-tweets-ann-batch-job.docx b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-8-el-50-tweets-ann-batch-job.docx
new file mode 100644
index 000000000..34da366e9
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-hl-8-el-50-tweets-ann-batch-job.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-tweets-ann-adhoc-job.d6w b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-tweets-ann-adhoc-job.d6w
deleted file mode 100644
index 6cc067816..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-tweets-ann-adhoc-job.d6w
+++ /dev/null
@@ -1,34 +0,0 @@
-class Profile(Struct):
-  project = Required(String)
-  date = Required(String)
-  environment = Default(String, 'dev')
-  job_name = Default(String, 'iikf-tweets-ann-adhoc-job')
-  machine = Default(String, 'n2-highmem-4')
-
-job = Job(
-   name='{{profile.job_name}}',
-   project='{{profile.project}}',
-   staging_bucket='{{profile.project}}',
-   service_account='{{profile.user_name}}-shdw@twttr-dp-svc-accounts.iam.gserviceaccount.com',
-   region='us-central1',
-   worker_config=WorkerConfig(
-       num_workers=2,
-       worker_machine_type='{{profile.machine}}',
-       worker_disk_type=WorkerDiskType('HDD'),
-   ),
-   extra_args={
-     "date": '{{profile.date}}'
-   },
-   service_identifier='twtr:svc:{{profile.user_name}}:{{profile.job_name}}:{{profile.environment}}:{{profile.cluster}}',
-   deployment_config=BatchDeploymentConfig(
-     role='{{profile.user_name}}',
-     build_target='src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann:iikf-tweets-ann-adhoc-job',
-     gcp_deployment_credentials='/var/lib/tss/keys/{{profile.user_name}}/cloud/gcp/dp/shadow.json',
-     statebird_config=StatebirdConfig(
-       batch_width='PT2H',
-       first_time='{{profile.date}}',
-     ),
-   )
-)
-
-jobs=[job]
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-tweets-ann-adhoc-job.docx b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-tweets-ann-adhoc-job.docx
new file mode 100644
index 000000000..91d944343
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-tweets-ann-adhoc-job.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-tweets-ann-batch-job.d6w b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-tweets-ann-batch-job.d6w
deleted file mode 100644
index 065a83eec..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-tweets-ann-batch-job.d6w
+++ /dev/null
@@ -1,39 +0,0 @@
-class Profile(Struct):
-  project = Required(String)
-  date = Required(String)
-  environment = Default(String, 'dev')
-  job_name = Default(String, 'iikf-tweets-ann-batch-job')
-  machine = Default(String, 'n2-highmem-4')
-
-job = Job(
-   name='{{profile.job_name}}',
-   project='{{profile.project}}',
-   staging_bucket='{{profile.project}}',
-   service_account='{{profile.user_name}}-shdw@twttr-dp-svc-accounts.iam.gserviceaccount.com',
-   region='us-central1',
-   worker_config=WorkerConfig(
-       num_workers=2,
-       worker_machine_type='{{profile.machine}}',
-       worker_disk_type=WorkerDiskType('HDD'),
-   ),
-   extra_args={
-     "date": '{{profile.date}}'
-   },
-   service_identifier='twtr:svc:{{profile.user_name}}:{{profile.job_name}}:{{profile.environment}}:{{profile.cluster}}',
-   deployment_config=BatchDeploymentConfig(
-     role='{{profile.user_name}}',
-     environment='prod',
-     build_target='src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann:iikf-tweets-ann-batch-job',
-     gcp_deployment_credentials='/var/lib/tss/keys/{{profile.user_name}}/cloud/gcp/dp/shadow.json',
-     statebird_config=StatebirdConfig(
-       batch_width='PT4H',
-       first_time='{{profile.date}}',
-     ),
-     workflow_config=WorkflowConfig(
-      play=True,
-     ),
-     timeout='PT24H'
-   )
-)
-
-jobs=[job]
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-tweets-ann-batch-job.docx b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-tweets-ann-batch-job.docx
new file mode 100644
index 000000000..29c103027
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/iikf-tweets-ann-batch-job.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/mts-consumer-embeddings-tweets-ann-adhoc-job.d6w b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/mts-consumer-embeddings-tweets-ann-adhoc-job.d6w
deleted file mode 100644
index c7f921708..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/mts-consumer-embeddings-tweets-ann-adhoc-job.d6w
+++ /dev/null
@@ -1,34 +0,0 @@
-class Profile(Struct):
-  project = Required(String)
-  date = Required(String)
-  environment = Default(String, 'dev')
-  job_name = Default(String, 'mts-consumer-embeddings-tweets-ann-adhoc-job')
-  machine = Default(String, 'n2-highmem-4')
-
-job = Job(
-   name='{{profile.job_name}}',
-   project='{{profile.project}}',
-   staging_bucket='{{profile.project}}',
-   service_account='{{profile.user_name}}-shdw@twttr-dp-svc-accounts.iam.gserviceaccount.com',
-   region='us-central1',
-   worker_config=WorkerConfig(
-       num_workers=2,
-       worker_machine_type='{{profile.machine}}',
-       worker_disk_type=WorkerDiskType('HDD'),
-   ),
-   extra_args={
-     "date": '{{profile.date}}'
-   },
-   service_identifier='twtr:svc:{{profile.user_name}}:{{profile.job_name}}:{{profile.environment}}:{{profile.cluster}}',
-   deployment_config=BatchDeploymentConfig(
-     role='{{profile.user_name}}',
-     build_target='src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann:mts-consumer-embeddings-tweets-ann-adhoc-job',
-     gcp_deployment_credentials='/var/lib/tss/keys/{{profile.user_name}}/cloud/gcp/dp/shadow.json',
-     statebird_config=StatebirdConfig(
-       batch_width='PT2H',
-       first_time='{{profile.date}}',
-     ),
-   )
-)
-
-jobs=[job]
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/mts-consumer-embeddings-tweets-ann-adhoc-job.docx b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/mts-consumer-embeddings-tweets-ann-adhoc-job.docx
new file mode 100644
index 000000000..5d3384630
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/mts-consumer-embeddings-tweets-ann-adhoc-job.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/mts-consumer-embeddings-tweets-ann-batch-job.d6w b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/mts-consumer-embeddings-tweets-ann-batch-job.d6w
deleted file mode 100644
index d87e68e9f..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/mts-consumer-embeddings-tweets-ann-batch-job.d6w
+++ /dev/null
@@ -1,39 +0,0 @@
-class Profile(Struct):
-  project = Required(String)
-  date = Required(String)
-  environment = Default(String, 'prod')
-  job_name = Default(String, 'mts-consumer-embeddings-tweets-ann-batch-job')
-  machine = Default(String, 'n2-highmem-4')
-
-job = Job(
-   name='{{profile.job_name}}',
-   project='{{profile.project}}',
-   staging_bucket='{{profile.project}}',
-   service_account='{{profile.user_name}}-shdw@twttr-dp-svc-accounts.iam.gserviceaccount.com',
-   region='us-central1',
-   worker_config=WorkerConfig(
-       num_workers=2,
-       worker_machine_type='{{profile.machine}}',
-       worker_disk_type=WorkerDiskType('HDD'),
-   ),
-   extra_args={
-     "date": '{{profile.date}}'
-   },
-   service_identifier='twtr:svc:{{profile.user_name}}:{{profile.job_name}}:{{profile.environment}}:{{profile.cluster}}',
-   deployment_config=BatchDeploymentConfig(
-     role='{{profile.user_name}}',
-     environment='prod',
-     build_target='src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann:mts-consumer-embeddings-tweets-ann-batch-job',
-     gcp_deployment_credentials='/var/lib/tss/keys/{{profile.user_name}}/cloud/gcp/dp/shadow.json',
-     statebird_config=StatebirdConfig(
-       batch_width='PT4H',
-       first_time='{{profile.date}}',
-     ),
-     workflow_config=WorkflowConfig(
-      play=True,
-     ),
-     timeout='PT24H'
-   )
-)
-
-jobs=[job]
diff --git a/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/mts-consumer-embeddings-tweets-ann-batch-job.docx b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/mts-consumer-embeddings-tweets-ann-batch-job.docx
new file mode 100644
index 000000000..c199ce187
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/bq_generation/tweets_ann/mts-consumer-embeddings-tweets-ann-batch-job.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/common/BUILD b/src/scala/com/twitter/simclusters_v2/scio/common/BUILD
deleted file mode 100644
index 1ad664680..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/common/BUILD
+++ /dev/null
@@ -1,21 +0,0 @@
-scala_library(
-    sources = [
-        "*.scala",
-    ],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "beam-internal/src/main/scala/com/twitter/beam/io/dal",
-        "beam-internal/src/main/scala/com/twitter/scio_internal/runner/dataflow",
-        "flockdb-tools/datasets/flock:flock-blocks-edges-scala",
-        "flockdb-tools/datasets/flock:flock-follows-edges-scala",
-        "flockdb-tools/datasets/flock:flock-report-as-abuse-edges-scala",
-        "flockdb-tools/datasets/flock:flock-report-as-spam-edges-scala",
-        "iesource/processing/events/src/main/scala/com/twitter/iesource/processing/events/batch:server_engagements-scala",
-        "src/scala/com/twitter/simclusters_v2/scalding",
-        "src/thrift/com/twitter/twadoop/user/gen:gen-scala",
-        "tweetsource/public_tweets/src/main/scala/com/twitter/tweetsource/public_tweets:public_tweets-scala",
-        "usersource/snapshot/src/main/scala/com/twitter/usersource/snapshot/flat:usersource_flat-scala",
-        "usersource/snapshot/src/main/thrift/com/twitter/usersource/snapshot/flat:flat-scala",
-    ],
-)
diff --git a/src/scala/com/twitter/simclusters_v2/scio/common/BUILD.docx b/src/scala/com/twitter/simclusters_v2/scio/common/BUILD.docx
new file mode 100644
index 000000000..c62ea5776
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/common/BUILD.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/common/ExternalDataSources.docx b/src/scala/com/twitter/simclusters_v2/scio/common/ExternalDataSources.docx
new file mode 100644
index 000000000..ed5dcfd8c
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/common/ExternalDataSources.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/common/ExternalDataSources.scala b/src/scala/com/twitter/simclusters_v2/scio/common/ExternalDataSources.scala
deleted file mode 100644
index ed9e1aa2d..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/common/ExternalDataSources.scala
+++ /dev/null
@@ -1,301 +0,0 @@
-package com.twitter.simclusters_v2.scio.common
-
-import com.spotify.scio.ScioContext
-import com.spotify.scio.values.SCollection
-import com.twitter.beam.io.dal.DAL
-import com.twitter.common.util.Clock
-import com.twitter.common_header.thriftscala.CommonHeader
-import com.twitter.common_header.thriftscala.IdType
-import com.twitter.common_header.thriftscala.VersionedCommonHeader
-import com.twitter.frigate.data_pipeline.magicrecs.magicrecs_notifications_lite.thriftscala.MagicRecsNotificationLite
-import com.twitter.frigate.data_pipeline.scalding.magicrecs.magicrecs_notification_lite.MagicrecsNotificationLite1DayLagScalaDataset
-import com.twitter.iesource.thriftscala.InteractionEvent
-import com.twitter.iesource.thriftscala.InteractionTargetType
-import com.twitter.interests_ds.jobs.interests_service.UserTopicRelationSnapshotScalaDataset
-import com.twitter.interests.thriftscala.InterestRelationType
-import com.twitter.interests.thriftscala.UserInterestsRelationSnapshot
-import com.twitter.penguin.scalding.datasets.PenguinUserLanguagesScalaDataset
-import com.twitter.search.adaptive.scribing.thriftscala.AdaptiveSearchScribeLog
-import com.twitter.simclusters_v2.hdfs_sources.UserUserFavGraphScalaDataset
-import com.twitter.simclusters_v2.scalding.embedding.common.ExternalDataSources.ValidFlockEdgeStateId
-import com.twitter.simclusters_v2.scalding.embedding.common.ExternalDataSources.getStandardLanguageCode
-import com.twitter.twadoop.user.gen.thriftscala.CombinedUser
-import flockdb_tools.datasets.flock.FlockBlocksEdgesScalaDataset
-import flockdb_tools.datasets.flock.FlockFollowsEdgesScalaDataset
-import flockdb_tools.datasets.flock.FlockReportAsAbuseEdgesScalaDataset
-import flockdb_tools.datasets.flock.FlockReportAsSpamEdgesScalaDataset
-import org.joda.time.Interval
-import com.twitter.simclusters_v2.thriftscala.EdgeWithDecayedWeights
-import com.twitter.usersource.snapshot.combined.UsersourceScalaDataset
-import com.twitter.usersource.snapshot.flat.UsersourceFlatScalaDataset
-import com.twitter.util.Duration
-import twadoop_config.configuration.log_categories.group.search.AdaptiveSearchScalaDataset
-
-object ExternalDataSources {
-  def userSource(
-    noOlderThan: Duration = Duration.fromDays(7)
-  )(
-    implicit sc: ScioContext
-  ): SCollection[CombinedUser] = {
-    sc.customInput(
-      "ReadUserSource",
-      DAL
-        .readMostRecentSnapshotNoOlderThan(
-          UsersourceScalaDataset,
-          noOlderThan,
-          Clock.SYSTEM_CLOCK,
-          DAL.Environment.Prod
-        )
-    )
-  }
-
-  def userCountrySource(
-    noOlderThan: Duration = Duration.fromDays(7)
-  )(
-    implicit sc: ScioContext
-  ): SCollection[(Long, String)] = {
-    sc.customInput(
-        "ReadUserCountrySource",
-        DAL
-          .readMostRecentSnapshotNoOlderThan(
-            UsersourceFlatScalaDataset,
-            noOlderThan,
-            Clock.SYSTEM_CLOCK,
-            DAL.Environment.Prod,
-          )
-      ).flatMap { flatUser =>
-        for {
-          userId <- flatUser.id
-          country <- flatUser.accountCountryCode
-        } yield {
-          (userId, country.toUpperCase)
-        }
-      }.distinct
-  }
-
-  def userUserFavSource(
-    noOlderThan: Duration = Duration.fromDays(14)
-  )(
-    implicit sc: ScioContext
-  ): SCollection[EdgeWithDecayedWeights] = {
-    sc.customInput(
-      "ReadUserUserFavSource",
-      DAL
-        .readMostRecentSnapshotNoOlderThan(
-          UserUserFavGraphScalaDataset,
-          noOlderThan,
-          Clock.SYSTEM_CLOCK,
-          DAL.Environment.Prod
-        )
-    )
-  }
-
-  def inferredUserConsumedLanguageSource(
-    noOlderThan: Duration = Duration.fromDays(7)
-  )(
-    implicit sc: ScioContext
-  ): SCollection[(Long, Seq[(String, Double)])] = {
-    sc.customInput(
-        "ReadInferredUserConsumedLanguageSource",
-        DAL
-          .readMostRecentSnapshotNoOlderThan(
-            PenguinUserLanguagesScalaDataset,
-            noOlderThan,
-            Clock.SYSTEM_CLOCK,
-            DAL.Environment.Prod
-          )
-      ).map { kv =>
-        val consumed = kv.value.consumed
-          .collect {
-            case scoredString if scoredString.weight > 0.001 => //throw away 5% outliers
-              (getStandardLanguageCode(scoredString.item), scoredString.weight)
-          }.collect {
-            case (Some(language), score) => (language, score)
-          }
-        (kv.key, consumed)
-      }
-  }
-
-  def flockBlockSource(
-    noOlderThan: Duration = Duration.fromDays(7)
-  )(
-    implicit sc: ScioContext
-  ): SCollection[(Long, Long)] = {
-    sc.customInput(
-        "ReadFlockBlock",
-        DAL.readMostRecentSnapshotNoOlderThan(
-          FlockBlocksEdgesScalaDataset,
-          noOlderThan,
-          Clock.SYSTEM_CLOCK,
-          DAL.Environment.Prod))
-      .collect {
-        case edge if edge.state == ValidFlockEdgeStateId =>
-          (edge.sourceId, edge.destinationId)
-      }
-  }
-
-  def flockFollowSource(
-    noOlderThan: Duration = Duration.fromDays(7)
-  )(
-    implicit sc: ScioContext
-  ): SCollection[(Long, Long)] = {
-    sc.customInput(
-        "ReadFlockFollow",
-        DAL
-          .readMostRecentSnapshotNoOlderThan(
-            FlockFollowsEdgesScalaDataset,
-            noOlderThan,
-            Clock.SYSTEM_CLOCK,
-            DAL.Environment.Prod))
-      .collect {
-        case edge if edge.state == ValidFlockEdgeStateId =>
-          (edge.sourceId, edge.destinationId)
-      }
-  }
-
-  def flockReportAsAbuseSource(
-    noOlderThan: Duration = Duration.fromDays(7)
-  )(
-    implicit sc: ScioContext
-  ): SCollection[(Long, Long)] = {
-    sc.customInput(
-        "ReadFlockReportAsAbuseJava",
-        DAL
-          .readMostRecentSnapshotNoOlderThan(
-            FlockReportAsAbuseEdgesScalaDataset,
-            noOlderThan,
-            Clock.SYSTEM_CLOCK,
-            DAL.Environment.Prod)
-      )
-      .collect {
-        case edge if edge.state == ValidFlockEdgeStateId =>
-          (edge.sourceId, edge.destinationId)
-      }
-  }
-
-  def flockReportAsSpamSource(
-    noOlderThan: Duration = Duration.fromDays(7)
-  )(
-    implicit sc: ScioContext
-  ): SCollection[(Long, Long)] = {
-    sc.customInput(
-        "ReadFlockReportAsSpam",
-        DAL
-          .readMostRecentSnapshotNoOlderThan(
-            FlockReportAsSpamEdgesScalaDataset,
-            noOlderThan,
-            Clock.SYSTEM_CLOCK,
-            DAL.Environment.Prod))
-      .collect {
-        case edge if edge.state == ValidFlockEdgeStateId =>
-          (edge.sourceId, edge.destinationId)
-      }
-  }
-
-  def ieSourceTweetEngagementsSource(
-    interval: Interval
-  )(
-    implicit sc: ScioContext
-  ): SCollection[InteractionEvent] = {
-    sc.customInput(
-        "ReadIeSourceTweetEngagementsSource",
-        DAL
-          .read(
-            com.twitter.iesource.processing.events.batch.ServerEngagementsScalaDataset,
-            interval,
-            DAL.Environment.Prod,
-          )
-      ).filter { event =>
-        // filter out logged out users because their favorites are less reliable
-        event.engagingUserId > 0L && event.targetType == InteractionTargetType.Tweet
-      }
-  }
-
-  def topicFollowGraphSource(
-    noOlderThan: Duration = Duration.fromDays(7)
-  )(
-    implicit sc: ScioContext
-  ): SCollection[(Long, Long)] = {
-    // The implementation here is slightly different than the topicFollowGraphSource function in
-    // src/scala/com/twitter/simclusters_v2/scalding/embedding/common/ExternalDataSources.scala
-    // We don't do an additional hashJoin on uttFollowableEntitiesSource.
-    sc.customInput(
-        "ReadTopicFollowGraphSource",
-        DAL
-          .readMostRecentSnapshotNoOlderThan(
-            UserTopicRelationSnapshotScalaDataset,
-            noOlderThan,
-            Clock.SYSTEM_CLOCK,
-            DAL.Environment.Prod
-          )
-      ).collect {
-        case userInterestsRelationSnapshot: UserInterestsRelationSnapshot
-            if userInterestsRelationSnapshot.interestType == "UTT" &&
-              userInterestsRelationSnapshot.relation == InterestRelationType.Followed =>
-          (userInterestsRelationSnapshot.interestId, userInterestsRelationSnapshot.userId)
-      }
-  }
-
-  def magicRecsNotficationOpenOrClickEventsSource(
-    interval: Interval
-  )(
-    implicit sc: ScioContext
-  ): SCollection[MagicRecsNotificationLite] = {
-    sc.customInput(
-        "ReadMagicRecsNotficationOpenOrClickEventsSource",
-        DAL
-          .read(MagicrecsNotificationLite1DayLagScalaDataset, interval, DAL.Environment.Prod))
-      .filter { entry =>
-        // keep entries with a valid userId and tweetId, opened or clicked timestamp defined
-        val userIdExists = entry.targetUserId.isDefined
-        val tweetIdExists = entry.tweetId.isDefined
-        val openOrClickExists =
-          entry.openTimestampMs.isDefined || entry.ntabClickTimestampMs.isDefined
-        userIdExists && tweetIdExists && openOrClickExists
-      }
-  }
-
-  def adaptiveSearchScribeLogsSource(
-    interval: Interval
-  )(
-    implicit sc: ScioContext
-  ): SCollection[(Long, String)] = {
-    sc.customInput(
-        "ReadAdaptiveSearchScribeLogsSource",
-        DAL
-          .read(AdaptiveSearchScalaDataset, interval, DAL.Environment.Prod))
-      .flatMap({ scribeLog: AdaptiveSearchScribeLog =>
-        for {
-          userId <- userIdFromBlenderAdaptiveScribeLog(scribeLog)
-          // filter out logged out search queries
-          if userId != 0
-          queryString <- scribeLog.requestLog.flatMap(_.request).flatMap(_.rawQuery)
-        } yield {
-          (userId, Set(queryString))
-        }
-      })
-      // if a user searches for the same query multiple times, there could be duplicates.
-      // De-dup them to get the distinct queries searched by a user
-      .sumByKey
-      .flatMap {
-        case (userId, distinctQuerySet) =>
-          distinctQuerySet.map { query =>
-            (userId, query)
-          }
-      }
-  }
-
-  private def userIdFromBlenderAdaptiveScribeLog(
-    blenderAdaptiveLog: AdaptiveSearchScribeLog
-  ): Option[Long] = {
-    blenderAdaptiveLog.versionedCommonHeader match {
-      case VersionedCommonHeader.CommonHeader(CommonHeader.ServerHeader(serverHeader)) =>
-        serverHeader.requestInfo match {
-          case Some(requestInfo) => requestInfo.ids.get(IdType.UserId).map(_.toLong)
-          case _ => None
-        }
-      case _ => None
-    }
-  }
-
-}
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/AssembleMultiTypeGraphScioApp.docx b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/AssembleMultiTypeGraphScioApp.docx
new file mode 100644
index 000000000..d7546e65d
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/AssembleMultiTypeGraphScioApp.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/AssembleMultiTypeGraphScioApp.scala b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/AssembleMultiTypeGraphScioApp.scala
deleted file mode 100644
index 34f9b5f61..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/AssembleMultiTypeGraphScioApp.scala
+++ /dev/null
@@ -1,39 +0,0 @@
-package com.twitter.simclusters_v2.scio.multi_type_graph.assemble_multi_type_graph
-
-/**
-Build:
-./bazel bundle src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph:assemble-multi-type-graph-scio-adhoc-app
-
-To kick off an adhoc run:
-bin/d6w create \
-  ${GCP_PROJECT_NAME}/us-central1/assemble-multi-type-graph-scio-adhoc-app \
-  src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/assemble-multi-type-graph-scio-adhoc.d6w \
-  --jar dist/assemble-multi-type-graph-scio-adhoc-app.jar \
-  --bind=profile.project=${GCP_PROJECT_NAME} \
-  --bind=profile.user_name=${USER} \
-  --bind=profile.date="2021-11-04" \
-  --bind=profile.machine="n2-highmem-16"
- */
-
-object AssembleMultiTypeGraphScioAdhocApp extends AssembleMultiTypeGraphScioBaseApp {
-  override val isAdhoc: Boolean = true
-  override val rootMHPath: String = Config.AdhocRootPath
-  override val rootThriftPath: String = Config.AdhocRootPath
-}
-
-/**
-To deploy the job:
-
-bin/d6w schedule \
-  ${GCP_PROJECT_NAME}/us-central1/assemble-multi-type-graph-scio-batch-app \
-  src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/assemble-multi-type-graph-scio-batch.d6w \
-  --bind=profile.project=${GCP_PROJECT_NAME} \
-  --bind=profile.user_name=recos-platform \
-  --bind=profile.date="2021-11-04" \
-  --bind=profile.machine="n2-highmem-16"
- */
-object AssembleMultiTypeGraphScioBatchApp extends AssembleMultiTypeGraphScioBaseApp {
-  override val isAdhoc: Boolean = false
-  override val rootMHPath: String = Config.RootMHPath
-  override val rootThriftPath: String = Config.RootThriftPath
-}
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/AssembleMultiTypeGraphScioBaseApp.docx b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/AssembleMultiTypeGraphScioBaseApp.docx
new file mode 100644
index 000000000..5b346ccf4
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/AssembleMultiTypeGraphScioBaseApp.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/AssembleMultiTypeGraphScioBaseApp.scala b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/AssembleMultiTypeGraphScioBaseApp.scala
deleted file mode 100644
index 18325e2fc..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/AssembleMultiTypeGraphScioBaseApp.scala
+++ /dev/null
@@ -1,574 +0,0 @@
-package com.twitter.simclusters_v2.scio.multi_type_graph.assemble_multi_type_graph
-
-import com.spotify.scio.ScioContext
-import com.spotify.scio.coders.Coder
-import com.spotify.scio.values.SCollection
-import com.twitter.beam.io.dal.DAL
-import com.twitter.beam.io.fs.multiformat.DiskFormat
-import com.twitter.beam.io.fs.multiformat.PathLayout
-import com.twitter.beam.job.DateRangeOptions
-import com.twitter.dal.client.dataset.KeyValDALDataset
-import com.twitter.dal.client.dataset.SnapshotDALDataset
-import com.twitter.frigate.data_pipeline.magicrecs.magicrecs_notifications_lite.thriftscala.MagicRecsNotificationLite
-import com.twitter.iesource.thriftscala.InteractionEvent
-import com.twitter.iesource.thriftscala.InteractionType
-import com.twitter.iesource.thriftscala.ReferenceTweet
-import com.twitter.scalding_internal.multiformat.format.keyval.KeyVal
-import com.twitter.scio_internal.coders.ThriftStructLazyBinaryScroogeCoder
-import com.twitter.scio_internal.job.ScioBeamJob
-import com.twitter.scrooge.ThriftStruct
-import com.twitter.simclusters_v2.common.Country
-import com.twitter.simclusters_v2.common.Language
-import com.twitter.simclusters_v2.common.TopicId
-import com.twitter.simclusters_v2.common.TweetId
-import com.twitter.simclusters_v2.common.UserId
-import com.twitter.simclusters_v2.hdfs_sources.MultiTypeGraphForTopKRightNodesThriftScioScalaDataset
-import com.twitter.simclusters_v2.hdfs_sources.TopKRightNounsScioScalaDataset
-import com.twitter.simclusters_v2.hdfs_sources.TruncatedMultiTypeGraphScioScalaDataset
-import com.twitter.simclusters_v2.scio.common.ExternalDataSources
-import com.twitter.simclusters_v2.scio.multi_type_graph.assemble_multi_type_graph.Config.GlobalDefaultMinFrequencyOfRightNodeType
-import com.twitter.simclusters_v2.scio.multi_type_graph.assemble_multi_type_graph.Config.HalfLifeInDaysForFavScore
-import com.twitter.simclusters_v2.scio.multi_type_graph.assemble_multi_type_graph.Config.NumTopNounsForUnknownRightNodeType
-import com.twitter.simclusters_v2.scio.multi_type_graph.assemble_multi_type_graph.Config.SampledEmployeeIds
-import com.twitter.simclusters_v2.scio.multi_type_graph.assemble_multi_type_graph.Config.TopKConfig
-import com.twitter.simclusters_v2.scio.multi_type_graph.assemble_multi_type_graph.Config.TopKRightNounsForMHDump
-import com.twitter.simclusters_v2.scio.multi_type_graph.common.MultiTypeGraphUtil
-import com.twitter.simclusters_v2.thriftscala.EdgeWithDecayedWeights
-import com.twitter.simclusters_v2.thriftscala.LeftNode
-import com.twitter.simclusters_v2.thriftscala.MultiTypeGraphEdge
-import com.twitter.simclusters_v2.thriftscala.Noun
-import com.twitter.simclusters_v2.thriftscala.NounWithFrequency
-import com.twitter.simclusters_v2.thriftscala.NounWithFrequencyList
-import com.twitter.simclusters_v2.thriftscala.RightNode
-import com.twitter.simclusters_v2.thriftscala.RightNodeType
-import com.twitter.simclusters_v2.thriftscala.RightNodeTypeStruct
-import com.twitter.simclusters_v2.thriftscala.RightNodeWithEdgeWeight
-import com.twitter.simclusters_v2.thriftscala.RightNodeWithEdgeWeightList
-import com.twitter.twadoop.user.gen.thriftscala.CombinedUser
-import com.twitter.util.Duration
-import java.time.Instant
-import org.joda.time.Interval
-
-/**
- * Scio version of
- * src/scala/com/twitter/simclusters_v2/scalding/multi_type_graph/assemble_multi_type_graph/AssembleMultiTypeGraph.scala
- */
-trait AssembleMultiTypeGraphScioBaseApp extends ScioBeamJob[DateRangeOptions] {
-  // Provides an implicit binary thrift scrooge coder by default.
-  override implicit def scroogeCoder[T <: ThriftStruct: Manifest]: Coder[T] =
-    ThriftStructLazyBinaryScroogeCoder.scroogeCoder
-
-  val isAdhoc: Boolean
-  val rootMHPath: String
-  val rootThriftPath: String
-
-  val truncatedMultiTypeGraphMHOutputDir: String =
-    Config.truncatedMultiTypeGraphMHOutputDir
-  val truncatedMultiTypeGraphThriftOutputDir: String =
-    Config.truncatedMultiTypeGraphThriftOutputDir
-  val topKRightNounsMHOutputDir: String = Config.topKRightNounsMHOutputDir
-  val topKRightNounsOutputDir: String = Config.topKRightNounsOutputDir
-
-  val fullMultiTypeGraphThriftOutputDir: String =
-    Config.fullMultiTypeGraphThriftOutputDir
-  val truncatedMultiTypeGraphKeyValDataset: KeyValDALDataset[
-    KeyVal[LeftNode, RightNodeWithEdgeWeightList]
-  ] = TruncatedMultiTypeGraphScioScalaDataset
-  val topKRightNounsKeyValDataset: KeyValDALDataset[
-    KeyVal[RightNodeTypeStruct, NounWithFrequencyList]
-  ] = TopKRightNounsScioScalaDataset
-  val topKRightNounsMHKeyValDataset: KeyValDALDataset[
-    KeyVal[RightNodeTypeStruct, NounWithFrequencyList]
-  ] = TopKRightNounsMhScioScalaDataset
-  val fullMultiTypeGraphSnapshotDataset: SnapshotDALDataset[MultiTypeGraphEdge] =
-    FullMultiTypeGraphScioScalaDataset
-  val multiTypeGraphTopKForRightNodesSnapshotDataset: SnapshotDALDataset[
-    MultiTypeGraphEdge
-  ] =
-    MultiTypeGraphForTopKRightNodesThriftScioScalaDataset
-
-  def getValidUsers(
-    input: SCollection[CombinedUser]
-  ): SCollection[UserId] = {
-    input
-      .flatMap { u =>
-        for {
-          user <- u.user
-          if user.id != 0
-          safety <- user.safety
-          if !(safety.suspended || safety.deactivated)
-        } yield {
-          user.id
-        }
-      }
-  }
-
-  def filterInvalidUsers(
-    flockEdges: SCollection[(UserId, UserId)],
-    validUsers: SCollection[UserId]
-  ): SCollection[(UserId, UserId)] = {
-    val validUsersWithValues = validUsers.map(userId => (userId, ()))
-    flockEdges
-      .join(validUsersWithValues)
-      .map {
-        case (srcId, (destId, _)) =>
-          (destId, srcId)
-      }
-      .join(validUsersWithValues)
-      .map {
-        case (destId, (srcId, _)) =>
-          (srcId, destId)
-      }
-  }
-
-  def getFavEdges(
-    input: SCollection[EdgeWithDecayedWeights],
-    halfLifeInDaysForFavScore: Int,
-  ): SCollection[(Long, Long, Double)] = {
-    input
-      .flatMap { edge =>
-        if (edge.weights.halfLifeInDaysToDecayedSums.contains(halfLifeInDaysForFavScore)) {
-          Some(
-            (
-              edge.sourceId,
-              edge.destinationId,
-              edge.weights.halfLifeInDaysToDecayedSums(halfLifeInDaysForFavScore)))
-        } else {
-          None
-        }
-      }
-  }
-
-  def leftRightTuple(
-    leftNodeUserId: UserId,
-    rightNodeType: RightNodeType,
-    rightNoun: Noun,
-    weight: Double = 1.0
-  ): (LeftNode, RightNodeWithEdgeWeight) = {
-    (
-      LeftNode.UserId(leftNodeUserId),
-      RightNodeWithEdgeWeight(
-        rightNode = RightNode(rightNodeType = rightNodeType, noun = rightNoun),
-        weight = weight))
-  }
-
-  def getUserFavGraph(
-    userUserFavEdges: SCollection[(UserId, UserId, Double)]
-  ): SCollection[(LeftNode, RightNodeWithEdgeWeight)] = {
-    userUserFavEdges.map {
-      case (srcId, destId, edgeWt) =>
-        leftRightTuple(srcId, RightNodeType.FavUser, Noun.UserId(destId), edgeWt)
-    }
-  }
-
-  def getUserFollowGraph(
-    userUserFollowEdges: SCollection[(UserId, UserId)]
-  ): SCollection[(LeftNode, RightNodeWithEdgeWeight)] = {
-    userUserFollowEdges.map {
-      case (srcId, destId) =>
-        leftRightTuple(srcId, RightNodeType.FollowUser, Noun.UserId(destId), 1.0)
-    }
-  }
-
-  def getUserBlockGraph(
-    userUserBlockEdges: SCollection[(UserId, UserId)]
-  ): SCollection[(LeftNode, RightNodeWithEdgeWeight)] = {
-    userUserBlockEdges.map {
-      case (srcId, destId) =>
-        leftRightTuple(srcId, RightNodeType.BlockUser, Noun.UserId(destId), 1.0)
-    }
-  }
-
-  def getUserAbuseReportGraph(
-    userUserAbuseReportEdges: SCollection[(UserId, UserId)]
-  ): SCollection[(LeftNode, RightNodeWithEdgeWeight)] = {
-    userUserAbuseReportEdges.map {
-      case (srcId, destId) =>
-        leftRightTuple(srcId, RightNodeType.AbuseReportUser, Noun.UserId(destId), 1.0)
-    }
-  }
-
-  def getUserSpamReportGraph(
-    userUserSpamReportEdges: SCollection[(UserId, UserId)]
-  ): SCollection[(LeftNode, RightNodeWithEdgeWeight)] = {
-    userUserSpamReportEdges.map {
-      case (srcId, destId) =>
-        leftRightTuple(srcId, RightNodeType.SpamReportUser, Noun.UserId(destId), 1.0)
-    }
-  }
-
-  def getUserTopicFollowGraph(
-    topicUserFollowedByEdges: SCollection[(TopicId, UserId)]
-  ): SCollection[(LeftNode, RightNodeWithEdgeWeight)] = {
-    topicUserFollowedByEdges.map {
-      case (topicId, userId) =>
-        leftRightTuple(userId, RightNodeType.FollowTopic, Noun.TopicId(topicId), 1.0)
-    }
-  }
-
-  def getUserSignUpCountryGraph(
-    userSignUpCountryEdges: SCollection[(UserId, Country)]
-  ): SCollection[(LeftNode, RightNodeWithEdgeWeight)] = {
-    userSignUpCountryEdges.map {
-      case (userId, country) =>
-        leftRightTuple(userId, RightNodeType.SignUpCountry, Noun.Country(country), 1.0)
-    }
-  }
-
-  def getMagicRecsNotifOpenOrClickTweetsGraph(
-    userMRNotifOpenOrClickEvents: SCollection[MagicRecsNotificationLite]
-  ): SCollection[(LeftNode, RightNodeWithEdgeWeight)] = {
-    userMRNotifOpenOrClickEvents.flatMap { entry =>
-      for {
-        userId <- entry.targetUserId
-        tweetId <- entry.tweetId
-      } yield {
-        leftRightTuple(userId, RightNodeType.NotifOpenOrClickTweet, Noun.TweetId(tweetId), 1.0)
-      }
-    }
-  }
-
-  def getUserConsumedLanguagesGraph(
-    userConsumedLanguageEdges: SCollection[(UserId, Seq[(Language, Double)])]
-  ): SCollection[(LeftNode, RightNodeWithEdgeWeight)] = {
-    userConsumedLanguageEdges.flatMap {
-      case (userId, langWithWeights) =>
-        langWithWeights.map {
-          case (lang, weight) =>
-            leftRightTuple(userId, RightNodeType.ConsumedLanguage, Noun.Language(lang), 1.0)
-        }
-    }
-  }
-
-  def getSearchGraph(
-    userSearchQueryEdges: SCollection[(UserId, String)]
-  ): SCollection[(LeftNode, RightNodeWithEdgeWeight)] = {
-    userSearchQueryEdges.map {
-      case (userId, query) =>
-        leftRightTuple(userId, RightNodeType.SearchQuery, Noun.Query(query), 1.0)
-    }
-  }
-
-  def getUserTweetInteractionGraph(
-    tweetInteractionEvents: SCollection[InteractionEvent],
-  ): SCollection[(LeftNode, RightNodeWithEdgeWeight)] = {
-    val userTweetInteractionsByType: SCollection[((UserId, TweetId), RightNodeType)] =
-      tweetInteractionEvents
-        .flatMap { event =>
-          val referenceTweet: Option[ReferenceTweet] = event.referenceTweet
-          val targetId: Long = event.targetId
-          val userId: Long = event.engagingUserId
-
-          //  To find the id of the tweet that was interacted with
-          //  For likes, this is the targetId; for retweet or reply, it is the referenceTweet's id
-          //  One thing to note is that for likes, referenceTweet is empty
-          val (tweetIdOpt, rightNodeTypeOpt) = {
-            event.interactionType match {
-              case Some(InteractionType.Favorite) =>
-                // Only allow favorites on original tweets, not retweets, to avoid double-counting
-                // because we have retweet-type tweets in the data source as well
-                (
-                  if (referenceTweet.isEmpty) {
-                    Some(targetId)
-                  } else None,
-                  Some(RightNodeType.FavTweet))
-              case Some(InteractionType.Reply) =>
-                (referenceTweet.map(_.tweetId), Some(RightNodeType.ReplyTweet))
-              case Some(InteractionType.Retweet) =>
-                (referenceTweet.map(_.tweetId), Some(RightNodeType.RetweetTweet))
-              case _ => (None, None)
-            }
-          }
-          for {
-            tweetId <- tweetIdOpt
-            rightNodeType <- rightNodeTypeOpt
-          } yield {
-            ((userId, tweetId), rightNodeType)
-          }
-        }
-
-    userTweetInteractionsByType
-      .mapValues(Set(_))
-      .sumByKey
-      .flatMap {
-        case ((userId, tweetId), rightNodeTypeSet) =>
-          rightNodeTypeSet.map { rightNodeType =>
-            leftRightTuple(userId, rightNodeType, Noun.TweetId(tweetId), 1.0)
-          }
-      }
-  }
-
-  def getTopKRightNounsWithFrequencies(
-    fullGraph: SCollection[(LeftNode, RightNodeWithEdgeWeight)],
-    topKConfig: Map[RightNodeType, Int],
-    minFrequency: Int,
-  ): SCollection[(RightNodeType, Seq[(Noun, Double)])] = {
-    val maxAcrossRightNounType: Int = topKConfig.valuesIterator.max
-
-    fullGraph
-      .map {
-        case (leftNode, rightNodeWithWeight) =>
-          (rightNodeWithWeight.rightNode, 1.0)
-      }
-      .sumByKey
-      .filter(_._2 >= minFrequency)
-      .map {
-        case (rightNode, freq) =>
-          (rightNode.rightNodeType, (rightNode.noun, freq))
-      }
-      .topByKey(maxAcrossRightNounType)(Ordering.by(_._2))
-      .map {
-        case (rightNodeType, nounsListWithFreq) =>
-          val truncatedList = nounsListWithFreq.toSeq
-            .sortBy(-_._2)
-            .take(topKConfig.getOrElse(rightNodeType, NumTopNounsForUnknownRightNodeType))
-          (rightNodeType, truncatedList)
-      }
-  }
-
-  def getTruncatedGraph(
-    fullGraph: SCollection[(LeftNode, RightNodeWithEdgeWeight)],
-    topKWithFrequency: SCollection[(RightNodeType, Seq[(Noun, Double)])]
-  ): SCollection[(LeftNode, RightNodeWithEdgeWeight)] = {
-    val topNouns = topKWithFrequency
-      .flatMap {
-        case (rightNodeType, nounsList) =>
-          nounsList
-            .map {
-              case (nounVal, aggregatedFrequency) =>
-                RightNode(rightNodeType, nounVal)
-            }
-      }.map(nouns => (nouns, ()))
-
-    fullGraph
-      .map {
-        case (leftNode, rightNodeWithWeight) =>
-          (rightNodeWithWeight.rightNode, (leftNode, rightNodeWithWeight))
-      }
-      .hashJoin(topNouns)
-      .map {
-        case (rightNode, ((left, rightNodeWithWeight), _)) =>
-          (left, rightNodeWithWeight)
-      }
-  }
-
-  def buildEmployeeGraph(
-    graph: SCollection[(LeftNode, RightNodeWithEdgeWeight)]
-  ): SCollection[(LeftNode, RightNodeWithEdgeWeight)] = {
-    val employeeIds = SampledEmployeeIds
-    graph
-      .collect {
-        case (LeftNode.UserId(userId), rightNodeWithWeight) if employeeIds.contains(userId) =>
-          (LeftNode.UserId(userId), rightNodeWithWeight)
-      }
-  }
-
-  override def configurePipeline(sc: ScioContext, opts: DateRangeOptions): Unit = {
-    // Define the implicit ScioContext to read datasets from ExternalDataSources
-    implicit def scioContext: ScioContext = sc
-
-    // DAL.Environment variable for WriteExecs
-    val dalEnv = if (isAdhoc) DAL.Environment.Dev else DAL.Environment.Prod
-
-    // Define date intervals
-    val interval_7days =
-      new Interval(opts.interval.getEnd.minusWeeks(1), opts.interval.getEnd.minusMillis(1))
-    val interval_14days =
-      new Interval(opts.interval.getEnd.minusWeeks(2), opts.interval.getEnd.minusMillis(1))
-
-    /*
-     * Dataset read operations
-     */
-    // Get list of valid UserIds - to filter out deactivated or suspended user accounts
-    val validUsers = getValidUsers(ExternalDataSources.userSource(Duration.fromDays(7)))
-
-    // ieSource tweet engagements data for tweet favs, replies, retweets - from last 14 days
-    val tweetSource = ExternalDataSources.ieSourceTweetEngagementsSource(interval_14days)
-
-    // Read TFlock datasets
-    val flockFollowSource = ExternalDataSources.flockFollowSource(Duration.fromDays(7))
-    val flockBlockSource = ExternalDataSources.flockBlockSource(Duration.fromDays(7))
-    val flockReportAsAbuseSource =
-      ExternalDataSources.flockReportAsAbuseSource(Duration.fromDays(7))
-    val flockReportAsSpamSource =
-      ExternalDataSources.flockReportAsSpamSource(Duration.fromDays(7))
-
-    // user-user fav edges
-    val userUserFavSource = ExternalDataSources.userUserFavSource(Duration.fromDays(14))
-    val userUserFavEdges = getFavEdges(userUserFavSource, HalfLifeInDaysForFavScore)
-
-    // user-user follow edges
-    val userUserFollowEdges = filterInvalidUsers(flockFollowSource, validUsers)
-
-    // user-user block edges
-    val userUserBlockEdges = filterInvalidUsers(flockBlockSource, validUsers)
-
-    // user-user abuse report edges
-    val userUserAbuseReportEdges = filterInvalidUsers(flockReportAsAbuseSource, validUsers)
-
-    // user-user spam report edges
-    val userUserSpamReportEdges = filterInvalidUsers(flockReportAsSpamSource, validUsers)
-
-    // user-signup country edges
-    val userSignUpCountryEdges = ExternalDataSources
-      .userCountrySource(Duration.fromDays(7))
-
-    // user-consumed language edges
-    val userConsumedLanguageEdges =
-      ExternalDataSources.inferredUserConsumedLanguageSource(Duration.fromDays(7))
-
-    // user-topic follow edges
-    val topicUserFollowedByEdges =
-      ExternalDataSources.topicFollowGraphSource(Duration.fromDays(7))
-
-    // user-MRNotifOpenOrClick events from last 7 days
-    val userMRNotifOpenOrClickEvents =
-      ExternalDataSources.magicRecsNotficationOpenOrClickEventsSource(interval_7days)
-
-    // user-searchQuery strings from last 7 days
-    val userSearchQueryEdges =
-      ExternalDataSources.adaptiveSearchScribeLogsSource(interval_7days)
-
-    /*
-     * Generate the full graph
-     */
-    val fullGraph =
-      getUserTweetInteractionGraph(tweetSource) ++
-        getUserFavGraph(userUserFavEdges) ++
-        getUserFollowGraph(userUserFollowEdges) ++
-        getUserBlockGraph(userUserBlockEdges) ++
-        getUserAbuseReportGraph(userUserAbuseReportEdges) ++
-        getUserSpamReportGraph(userUserSpamReportEdges) ++
-        getUserSignUpCountryGraph(userSignUpCountryEdges) ++
-        getUserConsumedLanguagesGraph(userConsumedLanguageEdges) ++
-        getUserTopicFollowGraph(topicUserFollowedByEdges) ++
-        getMagicRecsNotifOpenOrClickTweetsGraph(userMRNotifOpenOrClickEvents) ++
-        getSearchGraph(userSearchQueryEdges)
-
-    // Get Top K RightNodes
-    val topKRightNodes: SCollection[(RightNodeType, Seq[(Noun, Double)])] =
-      getTopKRightNounsWithFrequencies(
-        fullGraph,
-        TopKConfig,
-        GlobalDefaultMinFrequencyOfRightNodeType)
-
-    // key transformation - topK nouns, keyed by the RightNodeNounType
-    val topKNounsKeyedByType: SCollection[(RightNodeTypeStruct, NounWithFrequencyList)] =
-      topKRightNodes
-        .map {
-          case (rightNodeType, rightNounsWithScoresList) =>
-            val nounsListWithFrequency: Seq[NounWithFrequency] = rightNounsWithScoresList
-              .map {
-                case (noun, aggregatedFrequency) =>
-                  NounWithFrequency(noun, aggregatedFrequency)
-              }
-            (RightNodeTypeStruct(rightNodeType), NounWithFrequencyList(nounsListWithFrequency))
-        }
-
-    // Get Truncated graph based on the top K RightNodes
-    val truncatedGraph: SCollection[(LeftNode, RightNodeWithEdgeWeight)] =
-      getTruncatedGraph(fullGraph, topKRightNodes)
-
-    // key transformations - truncated graph, keyed by LeftNode
-    // Note: By wrapping and unwrapping with the LeftNode.UserId, we don't have to deal
-    // with defining our own customer ordering for LeftNode type
-    val truncatedGraphKeyedBySrc: SCollection[(LeftNode, RightNodeWithEdgeWeightList)] =
-      truncatedGraph
-        .collect {
-          case (LeftNode.UserId(userId), rightNodeWithWeight) =>
-            userId -> List(rightNodeWithWeight)
-        }
-        .sumByKey
-        .map {
-          case (userId, rightNodeWithWeightList) =>
-            (LeftNode.UserId(userId), RightNodeWithEdgeWeightList(rightNodeWithWeightList))
-        }
-
-    // WriteExecs
-    // Write TopK RightNodes to DAL - save all the top K nodes for the clustering step
-    topKNounsKeyedByType
-      .map {
-        case (engagementType, rightList) =>
-          KeyVal(engagementType, rightList)
-      }
-      .saveAsCustomOutput(
-        name = "WriteTopKNouns",
-        DAL.writeVersionedKeyVal(
-          topKRightNounsKeyValDataset,
-          PathLayout.VersionedPath(prefix =
-            rootMHPath + topKRightNounsOutputDir),
-          instant = Instant.ofEpochMilli(opts.interval.getEndMillis - 1L),
-          environmentOverride = dalEnv,
-        )
-      )
-
-    // Write TopK RightNodes to DAL - only take TopKRightNounsForMHDump RightNodes for MH dump
-    topKNounsKeyedByType
-      .map {
-        case (engagementType, rightList) =>
-          val rightListMH =
-            NounWithFrequencyList(rightList.nounWithFrequencyList.take(TopKRightNounsForMHDump))
-          KeyVal(engagementType, rightListMH)
-      }
-      .saveAsCustomOutput(
-        name = "WriteTopKNounsToMHForDebugger",
-        DAL.writeVersionedKeyVal(
-          topKRightNounsMHKeyValDataset,
-          PathLayout.VersionedPath(prefix =
-            rootMHPath + topKRightNounsMHOutputDir),
-          instant = Instant.ofEpochMilli(opts.interval.getEndMillis - 1L),
-          environmentOverride = dalEnv,
-        )
-      )
-
-    // Write truncated graph (MultiTypeGraphTopKForRightNodes) to DAL in KeyVal format
-    truncatedGraphKeyedBySrc
-      .map {
-        case (leftNode, rightNodeWithWeightList) =>
-          KeyVal(leftNode, rightNodeWithWeightList)
-      }.saveAsCustomOutput(
-        name = "WriteTruncatedMultiTypeGraph",
-        DAL.writeVersionedKeyVal(
-          truncatedMultiTypeGraphKeyValDataset,
-          PathLayout.VersionedPath(prefix =
-            rootMHPath + truncatedMultiTypeGraphMHOutputDir),
-          instant = Instant.ofEpochMilli(opts.interval.getEndMillis - 1L),
-          environmentOverride = dalEnv,
-        )
-      )
-
-    // Write truncated graph (MultiTypeGraphTopKForRightNodes) to DAL in thrift format
-    truncatedGraph
-      .map {
-        case (leftNode, rightNodeWithWeight) =>
-          MultiTypeGraphEdge(leftNode, rightNodeWithWeight)
-      }.saveAsCustomOutput(
-        name = "WriteTruncatedMultiTypeGraphThrift",
-        DAL.writeSnapshot(
-          multiTypeGraphTopKForRightNodesSnapshotDataset,
-          PathLayout.FixedPath(rootThriftPath + truncatedMultiTypeGraphThriftOutputDir),
-          Instant.ofEpochMilli(opts.interval.getEndMillis - 1L),
-          DiskFormat.Thrift(),
-          environmentOverride = dalEnv
-        )
-      )
-
-    // Write full graph to DAL
-    fullGraph
-      .map {
-        case (leftNode, rightNodeWithWeight) =>
-          MultiTypeGraphEdge(leftNode, rightNodeWithWeight)
-      }
-      .saveAsCustomOutput(
-        name = "WriteFullMultiTypeGraph",
-        DAL.writeSnapshot(
-          fullMultiTypeGraphSnapshotDataset,
-          PathLayout.FixedPath(rootThriftPath + fullMultiTypeGraphThriftOutputDir),
-          Instant.ofEpochMilli(opts.interval.getEndMillis - 1L),
-          DiskFormat.Thrift(),
-          environmentOverride = dalEnv
-        )
-      )
-
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/BUILD b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/BUILD
deleted file mode 100644
index 4ad3bfb53..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/BUILD
+++ /dev/null
@@ -1,73 +0,0 @@
-scala_library(
-    name = "assemble-multi-type-graph-scio-lib",
-    sources = [
-        "*.scala",
-    ],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        ":full_multi_type_graph_scio-scala",
-        ":top_k_right_nouns_mh_scio-scala",
-        "beam-internal/src/main/scala/com/twitter/beam/io/dal",
-        "beam-internal/src/main/scala/com/twitter/beam/io/manhattan",
-        "beam-internal/src/main/scala/com/twitter/beam/job",
-        "beam-internal/src/main/scala/com/twitter/beam/transform",
-        "beam-internal/src/main/scala/com/twitter/scio_internal/runner/dataflow",
-        "src/scala/com/twitter/simclusters_v2/hdfs_sources",
-        "src/scala/com/twitter/simclusters_v2/scalding/multi_type_graph/assemble_multi_type_graph",
-        "src/scala/com/twitter/simclusters_v2/scio/common",
-        "src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/common",
-    ],
-)
-
-jvm_binary(
-    name = "assemble-multi-type-graph-scio-adhoc-app",
-    main = "com.twitter.simclusters_v2.scio.multi_type_graph.assemble_multi_type_graph.AssembleMultiTypeGraphScioAdhocApp",
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        ":assemble-multi-type-graph-scio-lib",
-        "beam-internal/src/main/scala/com/twitter/beam/runner/dataflow",
-    ],
-)
-
-jvm_binary(
-    name = "assemble-multi-type-graph-scio-batch-app",
-    main = "com.twitter.simclusters_v2.scio.multi_type_graph.assemble_multi_type_graph.AssembleMultiTypeGraphScioBatchApp",
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        ":assemble-multi-type-graph-scio-lib",
-        "beam-internal/src/main/scala/com/twitter/beam/runner/dataflow",
-    ],
-)
-
-create_datasets(
-    base_name = "full_multi_type_graph_scio",
-    java_schema = "com.twitter.simclusters_v2.thriftjava.MultiTypeGraphEdge",
-    platform = "java8",
-    role = "cassowary",
-    scala_schema = "com.twitter.simclusters_v2.thriftscala.MultiTypeGraphEdge",
-    segment_type = "snapshot",
-    tags = ["bazel-compatible"],
-    java_dependencies = [
-        "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-java",
-    ],
-    scala_dependencies = [
-        "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
-    ],
-)
-
-create_datasets(
-    base_name = "top_k_right_nouns_mh_scio",
-    key_type = "com.twitter.simclusters_v2.thriftscala.RightNodeTypeStruct",
-    platform = "java8",
-    role = "cassowary",
-    scala_schema = "com.twitter.simclusters_v2.hdfs_sources.injections.MultiTypeGraphInjections.topKRightNounListInjection",
-    segment_type = "snapshot",
-    tags = ["bazel-compatible"],
-    val_type = "com.twitter.simclusters_v2.thriftscala.NounWithFrequencyList",
-    scala_dependencies = [
-        "src/scala/com/twitter/simclusters_v2/hdfs_sources/injections",
-    ],
-)
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/BUILD.docx b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/BUILD.docx
new file mode 100644
index 000000000..19bf86785
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/BUILD.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/Config.docx b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/Config.docx
new file mode 100644
index 000000000..5af397f2c
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/Config.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/Config.scala b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/Config.scala
deleted file mode 100644
index 337789ca1..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/Config.scala
+++ /dev/null
@@ -1,37 +0,0 @@
-package com.twitter.simclusters_v2.scio.multi_type_graph.assemble_multi_type_graph
-
-import com.twitter.simclusters_v2.thriftscala.RightNodeType
-
-object Config {
-  val RootMHPath: String = "manhattan_sequence_files/multi_type_graph/"
-  val RootThriftPath: String = "processed/multi_type_graph/"
-  val AdhocRootPath = "adhoc/multi_type_graph/"
-  val truncatedMultiTypeGraphMHOutputDir: String = "truncated_graph_mh"
-  val truncatedMultiTypeGraphThriftOutputDir: String = "truncated_graph_thrift"
-  val topKRightNounsMHOutputDir: String = "top_k_right_nouns_mh"
-  val topKRightNounsOutputDir: String = "top_k_right_nouns"
-  val fullMultiTypeGraphThriftOutputDir: String = "full_graph_thrift"
-  val HalfLifeInDaysForFavScore = 100
-  val NumTopNounsForUnknownRightNodeType = 20
-  val GlobalDefaultMinFrequencyOfRightNodeType = 100
-  val TopKRightNounsForMHDump = 1000
-
-  // the topK most frequent nouns for each engagement type
-  val TopKConfig: Map[RightNodeType, Int] = Map(
-    RightNodeType.FollowUser -> 10000000, // 10M, current simclusters_v2 has this value set to 20M, providing this the most weight
-    RightNodeType.FavUser -> 5000000,
-    RightNodeType.BlockUser -> 1000000,
-    RightNodeType.AbuseReportUser -> 1000000,
-    RightNodeType.SpamReportUser -> 1000000,
-    RightNodeType.FollowTopic -> 5000,
-    RightNodeType.SignUpCountry -> 200,
-    RightNodeType.ConsumedLanguage -> 50,
-    RightNodeType.FavTweet -> 500000,
-    RightNodeType.ReplyTweet -> 500000,
-    RightNodeType.RetweetTweet -> 500000,
-    RightNodeType.NotifOpenOrClickTweet -> 500000,
-    RightNodeType.SearchQuery -> 500000
-  )
-  val SampledEmployeeIds: Set[Long] =
-    Set()
-}
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/README.docx b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/README.docx
new file mode 100644
index 000000000..7ba9ca173
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/README.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/README.md b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/README.md
deleted file mode 100644
index f258c9683..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/README.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# Pre-requisites
-
-## Tutorial
-Follow the tutorial Batch Job on Dataflow Quickstart on how to run a simple batch job on Dataflow.
-
-## GCP setup
-
-Ensure `gcloud` CLI is installed and `application_default_credentials.json` has been generated.
-
-## Data access
-
-If you want to run an adhoc job with your ldap, you will need access to multiple LDAP groups to read the datasets.
-
-# Running the job
-
-### Running an adhoc job
-
-```bash
-export GCP_PROJECT_NAME='twttr-recos-ml-prod'
-
-./bazel bundle src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph:assemble-multi-type-graph-scio-adhoc-app
-
-bin/d6w create \
-  ${GCP_PROJECT_NAME}/us-central1/assemble-multi-type-graph-scio-adhoc-app \
-  src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/assemble-multi-type-graph-scio-adhoc.d6w \
-  --jar dist/assemble-multi-type-graph-scio-adho-app.jar \
-  --bind=profile.project=${GCP_PROJECT_NAME} \
-  --bind=profile.user_name=${USER} \
-  --bind=profile.date="2021-11-04" \
-  --bind=profile.machine="n2-highmem-16"
-```
-
-### Scheduling the job on Workflow
-
-Scheduling a job will require a service account as `recos-platform`. 
-Remember this account will need permissions to read all the required dataset. 
-
-```bash
-export SERVICE_ACCOUNT='recos-platform'
-export GCP_PROJECT_NAME='twttr-recos-ml-prod'
-
-bin/d6w schedule \
-  ${GCP_PROJECT_NAME}/us-central1/assemble-multi-type-graph-scio-batch-app \
-  src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/assemble-multi-type-graph-scio-batch.d6w \
-  --bind=profile.project=${GCP_PROJECT_NAME} \
-  --bind=profile.user_name="recos-platform" \
-  --bind=profile.date="2021-11-04" \
-  --bind=profile.machine="n2-highmem-16"
-```
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/assemble-multi-type-graph-scio-adhoc.d6w b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/assemble-multi-type-graph-scio-adhoc.d6w
deleted file mode 100644
index 835c48e71..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/assemble-multi-type-graph-scio-adhoc.d6w
+++ /dev/null
@@ -1,36 +0,0 @@
-# See
-# Checkout the README to see how to deploy the job
-
-class Profile(Struct):
-  project = Required(String)
-  date = Required(String)
-  environment = Default(String, 'dev')
-  machine= Default(String, 'n2-highmem-16')
-
-job = Job(
-  name='assemble-multi-type-graph-scio-adhoc-app',
-  project='{{profile.project}}',
-  staging_bucket='{{profile.project}}',
-  service_account='{{profile.user_name}}-shdw@twttr-dp-svc-accounts.iam.gserviceaccount.com',
-  region='us-central1',
-  worker_config=WorkerConfig(
-    num_workers=2,
-    worker_machine_type='{{profile.machine}}',
-    worker_disk_type=WorkerDiskType('HDD')
-  ),
-  extra_args={
-    "environment": '{{profile.environment}}',
-    "date": Quote('{{profile.date}}'),
-  },
-  deployment_config=BatchDeploymentConfig(
-    role='{{profile.user_name}}',
-    build_target='src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph:assemble-multi-type-graph-scio-adhoc-app',
-    gcp_deployment_credentials='/var/lib/tss/keys/{{profile.user_name}}/cloud/gcp/dp/shadow.json',
-    statebird_config=StatebirdConfig(
-      batch_width='PT1H',
-      first_time='{{profile.date}}'
-    )
-  )
-)
-
-jobs=[job]
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/assemble-multi-type-graph-scio-adhoc.docx b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/assemble-multi-type-graph-scio-adhoc.docx
new file mode 100644
index 000000000..24e6c2d0f
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/assemble-multi-type-graph-scio-adhoc.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/assemble-multi-type-graph-scio-batch.d6w b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/assemble-multi-type-graph-scio-batch.d6w
deleted file mode 100644
index 4734e9c0f..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/assemble-multi-type-graph-scio-batch.d6w
+++ /dev/null
@@ -1,41 +0,0 @@
-# See
-# Checkout the README to see how to deploy the job
-
-class Profile(Struct):
-  project = Required(String)
-  date = Required(String)
-  environment = Default(String, 'prod')
-  machine= Default(String, 'n2-highmem-16')
-
-job = Job(
-  name='assemble-multi-type-graph-scio-batch-app',
-  project='{{profile.project}}',
-  staging_bucket='{{profile.project}}',
-  service_account='{{profile.user_name}}-shdw@twttr-dp-svc-accounts.iam.gserviceaccount.com',
-  region='us-central1',
-  worker_config=WorkerConfig(
-    num_workers=2,
-    worker_machine_type='{{profile.machine}}',
-    worker_disk_type=WorkerDiskType('HDD')
-  ),
-  extra_args={
-    "environment": '{{profile.environment}}',
-    "date": Quote('{{profile.date}}'),
-  },
-  deployment_config=BatchDeploymentConfig(
-    role='{{profile.user_name}}',
-    build_target='src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph:assemble-multi-type-graph-scio-batch-app',
-    gcp_deployment_credentials='/var/lib/tss/keys/{{profile.user_name}}/cloud/gcp/dp/shadow.json',
-    environment='prod',
-    statebird_config=StatebirdConfig(
-      batch_width='P1W',
-      first_time='{{profile.date}}'
-    ),
-    workflow_config=WorkflowConfig(
-      play=True,
-    ),
-    timeout='PT18H'
-  )
-)
-
-jobs=[job]
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/assemble-multi-type-graph-scio-batch.docx b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/assemble-multi-type-graph-scio-batch.docx
new file mode 100644
index 000000000..b6a8bb609
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/assemble_multi_type_graph/assemble-multi-type-graph-scio-batch.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/common/BUILD b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/common/BUILD
deleted file mode 100644
index d8ca4cd90..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/common/BUILD
+++ /dev/null
@@ -1,13 +0,0 @@
-scala_library(
-    sources = [
-        "*.scala",
-    ],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "beam-internal/src/main/scala/com/twitter/beam/io/dal",
-        "beam-internal/src/main/scala/com/twitter/scio_internal/runner/dataflow",
-        "src/scala/com/twitter/simclusters_v2/hdfs_sources",
-        "src/scala/com/twitter/simclusters_v2/scalding",
-    ],
-)
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/common/BUILD.docx b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/common/BUILD.docx
new file mode 100644
index 000000000..4d314e6ba
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/common/BUILD.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/common/MultiTypeGraphUtil.docx b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/common/MultiTypeGraphUtil.docx
new file mode 100644
index 000000000..2860a3c21
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/common/MultiTypeGraphUtil.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/common/MultiTypeGraphUtil.scala b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/common/MultiTypeGraphUtil.scala
deleted file mode 100644
index 4a5cd67de..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/common/MultiTypeGraphUtil.scala
+++ /dev/null
@@ -1,69 +0,0 @@
-package com.twitter.simclusters_v2.scio
-package multi_type_graph.common
-
-import com.spotify.scio.ScioContext
-import com.spotify.scio.values.SCollection
-import com.twitter.beam.io.dal.DAL
-import com.twitter.common.util.Clock
-import com.twitter.scalding_internal.job.RequiredBinaryComparators.ordSer
-import com.twitter.scalding_internal.multiformat.format.keyval.KeyVal
-import com.twitter.simclusters_v2.hdfs_sources.TruncatedMultiTypeGraphScioScalaDataset
-import com.twitter.simclusters_v2.thriftscala.LeftNode
-import com.twitter.simclusters_v2.thriftscala.Noun
-import com.twitter.simclusters_v2.thriftscala.RightNode
-import com.twitter.simclusters_v2.thriftscala.RightNodeType
-import com.twitter.util.Duration
-
-object MultiTypeGraphUtil {
-  val RootMHPath: String = "manhattan_sequence_files/multi_type_graph/"
-  val RootThriftPath: String = "processed/multi_type_graph/"
-  val AdhocRootPath = "adhoc/multi_type_graph/"
-
-  val nounOrdering: Ordering[Noun] = new Ordering[Noun] {
-    // We define an ordering for each noun type as specified in simclusters_v2/multi_type_graph.thrift
-    // Please make sure we don't remove anything here that's still a part of the union Noun thrift and
-    // vice versa, if we add a new noun type to thrift, an ordering for it needs to added here as well.
-    def nounTypeOrder(noun: Noun): Int = noun match {
-      case _: Noun.UserId => 0
-      case _: Noun.Country => 1
-      case _: Noun.Language => 2
-      case _: Noun.Query => 3
-      case _: Noun.TopicId => 4
-      case _: Noun.TweetId => 5
-    }
-
-    override def compare(x: Noun, y: Noun): Int = nounTypeOrder(x) compare nounTypeOrder(y)
-  }
-
-  val rightNodeTypeOrdering: Ordering[RightNodeType] = ordSer[RightNodeType]
-
-  val rightNodeOrdering: Ordering[RightNode] =
-    new Ordering[RightNode] {
-      override def compare(x: RightNode, y: RightNode): Int = {
-        Ordering
-          .Tuple2(rightNodeTypeOrdering, nounOrdering)
-          .compare((x.rightNodeType, x.noun), (y.rightNodeType, y.noun))
-      }
-    }
-
-  def getTruncatedMultiTypeGraph(
-    noOlderThan: Duration = Duration.fromDays(14)
-  )(
-    implicit sc: ScioContext
-  ): SCollection[(Long, RightNode, Double)] = {
-    sc.customInput(
-        "ReadTruncatedMultiTypeGraph",
-        DAL
-          .readMostRecentSnapshotNoOlderThan(
-            TruncatedMultiTypeGraphScioScalaDataset,
-            noOlderThan,
-            Clock.SYSTEM_CLOCK,
-            DAL.Environment.Prod
-          )
-      ).flatMap {
-        case KeyVal(LeftNode.UserId(userId), rightNodesList) =>
-          rightNodesList.rightNodeWithEdgeWeightList.map(rightNodeWithWeight =>
-            (userId, rightNodeWithWeight.rightNode, rightNodeWithWeight.weight))
-      }
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/BUILD b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/BUILD
deleted file mode 100644
index fa06b6d7a..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/BUILD
+++ /dev/null
@@ -1,92 +0,0 @@
-scala_library(
-    name = "multi-type-graph-scio-sims-lib",
-    sources = ["*.scala"],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        ":right_node_cosine_similarity_scio_adhoc-scala",
-        ":right_node_sim_hash_scio_adhoc-scala",
-        "3rdparty/jvm/com/twitter/bijection:scrooge",
-        "beam-internal/src/main/scala/com/twitter/beam/io/dal",
-        "beam-internal/src/main/scala/com/twitter/beam/io/manhattan",
-        "beam-internal/src/main/scala/com/twitter/beam/job",
-        "beam-internal/src/main/scala/com/twitter/beam/transform",
-        "beam-internal/src/main/scala/com/twitter/scio_internal/runner/dataflow",
-        "src/scala/com/twitter/simclusters_v2/hdfs_sources",
-        "src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/common",
-        "src/scala/com/twitter/wtf/dataflow/cosine_similarity/common",
-    ],
-)
-
-jvm_binary(
-    name = "multi-type-graph-sim-hash-scio-adhoc-app",
-    main = "com.twitter.simclusters_v2.scio.multi_type_graph.multi_type_graph_sims.RightNodeSimHashScioAdhocApp",
-    platform = "java8",
-    dependencies = [
-        ":multi-type-graph-scio-sims-lib",
-        "beam-internal/src/main/scala/com/twitter/beam/runner/dataflow",
-    ],
-)
-
-jvm_binary(
-    name = "multi-type-graph-sim-hash-scio-batch-app",
-    main = "com.twitter.simclusters_v2.scio.multi_type_graph.multi_type_graph_sims.RightNodeSimHashScioBatchApp",
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        ":multi-type-graph-scio-sims-lib",
-        "beam-internal/src/main/scala/com/twitter/beam/runner/dataflow",
-    ],
-)
-
-jvm_binary(
-    name = "multi-type-graph-cosine-similarity-scio-adhoc-app",
-    main = "com.twitter.simclusters_v2.scio.multi_type_graph.multi_type_graph_sims.RightNodeCosineSimilarityScioAdhocApp",
-    platform = "java8",
-    dependencies = [
-        ":multi-type-graph-scio-sims-lib",
-        "beam-internal/src/main/scala/com/twitter/beam/runner/dataflow",
-    ],
-)
-
-jvm_binary(
-    name = "multi-type-graph-cosine-similarity-scio-batch-app",
-    main = "com.twitter.simclusters_v2.scio.multi_type_graph.multi_type_graph_sims.RightNodeCosineSimilarityScioBatchApp",
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        ":multi-type-graph-scio-sims-lib",
-        "beam-internal/src/main/scala/com/twitter/beam/runner/dataflow",
-    ],
-)
-
-create_datasets(
-    base_name = "right_node_sim_hash_scio_adhoc",
-    java_schema = "com.twitter.simclusters_v2.thriftjava.RightNodeSimHashSketch",
-    platform = "java8",
-    role = "cassowary",
-    scala_schema = "com.twitter.simclusters_v2.thriftscala.RightNodeSimHashSketch",
-    segment_type = "snapshot",
-    tags = ["bazel-compatible"],
-    java_dependencies = [
-        "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-java",
-    ],
-    scala_dependencies = [
-        "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
-    ],
-)
-
-create_datasets(
-    base_name = "right_node_cosine_similarity_scio_adhoc",
-    key_type = "com.twitter.simclusters_v2.thriftscala.RightNode",
-    platform = "java8",
-    role = "cassowary",
-    scala_schema = "com.twitter.simclusters_v2.hdfs_sources.injections.MultiTypeGraphInjections.similarRightNodesInjection",
-    segment_type = "snapshot",
-    tags = ["bazel-compatible"],
-    val_type = "com.twitter.simclusters_v2.thriftscala.SimilarRightNodes",
-    scala_dependencies = [
-        "src/scala/com/twitter/scalding_internal/multiformat/format",
-        "src/scala/com/twitter/simclusters_v2/hdfs_sources/injections",
-    ],
-)
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/BUILD.docx b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/BUILD.docx
new file mode 100644
index 000000000..33940a6d4
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/BUILD.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/Config.docx b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/Config.docx
new file mode 100644
index 000000000..cdb8c7d1f
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/Config.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/Config.scala b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/Config.scala
deleted file mode 100644
index de0dc39c0..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/Config.scala
+++ /dev/null
@@ -1,18 +0,0 @@
-package com.twitter.simclusters_v2.scio
-package multi_type_graph.multi_type_graph_sims
-
-object Config {
-  // Config settings for RightNodeSimHashScioBaseApp job
-  // Number of hashes to generate in the sketch
-  val numHashes: Int = 8192 // each is a bit, so this results in 1KB uncompressed sketch/user
-  // Reduce skew by letting each reducers process a limited number of followers/user
-  val maxNumNeighborsPerReducers: Int = 300000
-  val simsHashJobOutputDirectory: String = "right_node/sims/sim_hash"
-
-  // Config settings for RightNodeCosineSimilarityScioBaseApp job
-  val numSims: Int = 500
-  val minCosineSimilarityThreshold: Double = 0.01
-  val maxOutDegree: Int = 10000
-  val cosineSimJobOutputDirectory = "right_node/sims/cosine_similarity"
-
-}
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeCosineSimilarityScioApp.docx b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeCosineSimilarityScioApp.docx
new file mode 100644
index 000000000..328483e5a
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeCosineSimilarityScioApp.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeCosineSimilarityScioApp.scala b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeCosineSimilarityScioApp.scala
deleted file mode 100644
index 6c064be9b..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeCosineSimilarityScioApp.scala
+++ /dev/null
@@ -1,55 +0,0 @@
-package com.twitter.simclusters_v2.scio
-package multi_type_graph.multi_type_graph_sims
-
-import com.twitter.dal.client.dataset.KeyValDALDataset
-import com.twitter.scalding_internal.multiformat.format.keyval.KeyVal
-import com.twitter.simclusters_v2.hdfs_sources.RightNodeCosineSimilarityScioScalaDataset
-import com.twitter.simclusters_v2.thriftscala.RightNode
-import com.twitter.simclusters_v2.thriftscala.SimilarRightNodes
-import com.twitter.wtf.scalding.jobs.cosine_similarity.common.ApproximateMatrixSelfTransposeMultiplicationJob
-
-/**
-Build:
-./bazel bundle src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims:multi-type-graph-cosine-similarity-scio-adhoc-app
-
-To kick off an adhoc run:
-bin/d6w create \
-  ${GCP_PROJECT_NAME}/us-central1/multi-type-graph-cosine-similarity-scio-adhoc-app \
-  src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/cosine-similarity-scio-adhoc.d6w \
-  --jar dist/multi-type-graph-cosine-similarity-scio-adhoc-app.jar \
-  --bind=profile.project=${GCP_PROJECT_NAME} \
-  --bind=profile.user_name=${USER} \
-  --bind=profile.date="2022-01-16" \
-  --bind=profile.machine="n2d-highmem-16" --ignore-existing
- */
-
-object RightNodeCosineSimilarityScioAdhocApp extends RightNodeCosineSimilarityScioBaseApp {
-  override val isAdhoc = true
-  override val cosineSimKeyValSnapshotDataset: KeyValDALDataset[
-    KeyVal[RightNode, SimilarRightNodes]
-  ] =
-    RightNodeCosineSimilarityScioAdhocScalaDataset
-  override val filterCandidateSimilarityPair: (Double, Double, Double) => Boolean =
-    ApproximateMatrixSelfTransposeMultiplicationJob.filterCandidateSimilarityPair
-}
-
-/**
-To deploy the job:
-
-bin/d6w schedule \
-  ${GCP_PROJECT_NAME}/us-central1/multi-type-graph-cosine-similarity-scio-batch-app \
-  src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/cosine-similarity-scio-batch.d6w \
-  --bind=profile.project=${GCP_PROJECT_NAME} \
-  --bind=profile.user_name=recos-platform \
-  --bind=profile.date="2021-12-01" \
-  --bind=profile.machine="n2d-highmem-16"
- */
-object RightNodeCosineSimilarityScioBatchApp extends RightNodeCosineSimilarityScioBaseApp {
-  override val isAdhoc = false
-  override val cosineSimKeyValSnapshotDataset: KeyValDALDataset[
-    KeyVal[RightNode, SimilarRightNodes]
-  ] =
-    RightNodeCosineSimilarityScioScalaDataset
-  override val filterCandidateSimilarityPair: (Double, Double, Double) => Boolean =
-    ApproximateMatrixSelfTransposeMultiplicationJob.filterCandidateSimilarityPair
-}
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeCosineSimilarityScioBaseApp.docx b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeCosineSimilarityScioBaseApp.docx
new file mode 100644
index 000000000..7ef5d4b7b
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeCosineSimilarityScioBaseApp.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeCosineSimilarityScioBaseApp.scala b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeCosineSimilarityScioBaseApp.scala
deleted file mode 100644
index 963178f7b..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeCosineSimilarityScioBaseApp.scala
+++ /dev/null
@@ -1,96 +0,0 @@
-package com.twitter.simclusters_v2.scio
-package multi_type_graph.multi_type_graph_sims
-
-import com.spotify.scio.ScioContext
-import com.spotify.scio.coders.Coder
-import com.spotify.scio.values.SCollection
-import com.twitter.beam.io.dal.DAL
-import com.twitter.beam.io.fs.multiformat.PathLayout
-import com.twitter.beam.job.DateRangeOptions
-import com.twitter.common.util.Clock
-import com.twitter.dal.client.dataset.KeyValDALDataset
-import com.twitter.dal.client.dataset.SnapshotDALDataset
-import com.twitter.scalding_internal.multiformat.format.keyval.KeyVal
-import com.twitter.scio_internal.coders.ThriftStructLazyBinaryScroogeCoder
-import com.twitter.scio_internal.job.ScioBeamJob
-import com.twitter.scrooge.ThriftStruct
-import com.twitter.simclusters_v2.hdfs_sources.RightNodeSimHashScioScalaDataset
-import com.twitter.simclusters_v2.scio.multi_type_graph.common.MultiTypeGraphUtil
-import com.twitter.simclusters_v2.thriftscala._
-import com.twitter.util.Duration
-import com.twitter.wtf.dataflow.cosine_similarity.ApproximateMatrixSelfTransposeMultiplicationJob
-import java.time.Instant
-
-trait RightNodeCosineSimilarityScioBaseApp
-    extends ScioBeamJob[DateRangeOptions]
-    with ApproximateMatrixSelfTransposeMultiplicationJob[RightNode] {
-  override implicit def scroogeCoder[T <: ThriftStruct: Manifest]: Coder[T] =
-    ThriftStructLazyBinaryScroogeCoder.scroogeCoder
-  override val ordering: Ordering[RightNode] = MultiTypeGraphUtil.rightNodeOrdering
-
-  val isAdhoc: Boolean
-  val cosineSimKeyValSnapshotDataset: KeyValDALDataset[KeyVal[RightNode, SimilarRightNodes]]
-  val rightNodeSimHashSnapshotDataset: SnapshotDALDataset[RightNodeSimHashSketch] =
-    RightNodeSimHashScioScalaDataset
-  val cosineSimJobOutputDirectory: String = Config.cosineSimJobOutputDirectory
-
-  override def graph(
-    implicit sc: ScioContext,
-    coder: Coder[RightNode]
-  ): SCollection[(Long, RightNode, Double)] =
-    MultiTypeGraphUtil.getTruncatedMultiTypeGraph(noOlderThan = Duration.fromDays(14))
-
-  override def simHashSketches(
-    implicit sc: ScioContext,
-    coder: Coder[RightNode]
-  ): SCollection[(RightNode, Array[Byte])] = {
-    sc.customInput(
-        "ReadSimHashSketches",
-        DAL
-          .readMostRecentSnapshotNoOlderThan(
-            rightNodeSimHashSnapshotDataset,
-            Duration.fromDays(14),
-            Clock.SYSTEM_CLOCK,
-            DAL.Environment.Prod
-          )
-      ).map { sketch =>
-        sketch.rightNode -> sketch.simHashOfEngagers.toArray
-      }
-  }
-
-  override def configurePipeline(
-    sc: ScioContext,
-    opts: DateRangeOptions
-  ): Unit = {
-    implicit def scioContext: ScioContext = sc
-    // DAL.Environment variable for WriteExecs
-    val dalEnv = if (isAdhoc) DAL.Environment.Dev else DAL.Environment.Prod
-
-    val topKRightNodes: SCollection[(RightNode, SimilarRightNodes)] = topK
-      .collect {
-        case (rightNode, simRightNodes) =>
-          val sims = simRightNodes.collect {
-            case (simRightNode, score) => SimilarRightNode(simRightNode, score)
-          }
-          (rightNode, SimilarRightNodes(sims))
-      }
-
-    topKRightNodes
-      .map {
-        case (rightNode, sims) => KeyVal(rightNode, sims)
-      }.saveAsCustomOutput(
-        name = "WriteRightNodeCosineSimilarityDataset",
-        DAL.writeVersionedKeyVal(
-          cosineSimKeyValSnapshotDataset,
-          PathLayout.VersionedPath(prefix =
-            ((if (!isAdhoc)
-                MultiTypeGraphUtil.RootMHPath
-              else
-                MultiTypeGraphUtil.AdhocRootPath)
-              + Config.cosineSimJobOutputDirectory)),
-          instant = Instant.ofEpochMilli(opts.interval.getEndMillis - 1L),
-          environmentOverride = dalEnv,
-        )
-      )
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeSimHashScioApp.docx b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeSimHashScioApp.docx
new file mode 100644
index 000000000..da9a70e5c
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeSimHashScioApp.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeSimHashScioApp.scala b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeSimHashScioApp.scala
deleted file mode 100644
index f485b52ce..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeSimHashScioApp.scala
+++ /dev/null
@@ -1,43 +0,0 @@
-package com.twitter.simclusters_v2.scio
-package multi_type_graph.multi_type_graph_sims
-
-import com.twitter.dal.client.dataset.SnapshotDALDataset
-import com.twitter.simclusters_v2.hdfs_sources.RightNodeSimHashScioScalaDataset
-import com.twitter.simclusters_v2.thriftscala.RightNodeSimHashSketch
-
-/**
-Build:
-./bazel bundle src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims:multi-type-graph-sim-hash-scio-adhoc-app
-
-To kick off an adhoc run:
-bin/d6w create \
-  ${GCP_PROJECT_NAME}/us-central1/multi-type-graph-sim-hash-scio-adhoc-app \
-  src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/sim-hash-scio-adhoc.d6w \
-  --jar dist/multi-type-graph-sim-hash-scio-adhoc-app.jar \
-  --bind=profile.project=${GCP_PROJECT_NAME} \
-  --bind=profile.user_name=${USER} \
-  --bind=profile.date="2021-12-01" \
-  --bind=profile.machine="n2d-highmem-16" --ignore-existing
- */
-object RightNodeSimHashScioAdhocApp extends RightNodeSimHashScioBaseApp {
-  override val isAdhoc: Boolean = true
-  override val rightNodeSimHashSnapshotDataset: SnapshotDALDataset[RightNodeSimHashSketch] =
-    RightNodeSimHashScioAdhocScalaDataset
-}
-
-/**
-To deploy the job:
-
-bin/d6w schedule \
-  ${GCP_PROJECT_NAME}/us-central1/multi-type-graph-sim-hash-scio-batch-app \
-  src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/sim-hash-scio-batch.d6w \
-  --bind=profile.project=${GCP_PROJECT_NAME} \
-  --bind=profile.user_name=recos-platform \
-  --bind=profile.date="2021-12-01" \
-  --bind=profile.machine="n2d-highmem-16"
- */
-object RightNodeSimHashScioBatchApp extends RightNodeSimHashScioBaseApp {
-  override val isAdhoc: Boolean = false
-  override val rightNodeSimHashSnapshotDataset: SnapshotDALDataset[RightNodeSimHashSketch] =
-    RightNodeSimHashScioScalaDataset
-}
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeSimHashScioBaseApp.docx b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeSimHashScioBaseApp.docx
new file mode 100644
index 000000000..0e768c660
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeSimHashScioBaseApp.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeSimHashScioBaseApp.scala b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeSimHashScioBaseApp.scala
deleted file mode 100644
index e17fe5a15..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/RightNodeSimHashScioBaseApp.scala
+++ /dev/null
@@ -1,65 +0,0 @@
-package com.twitter.simclusters_v2.scio
-package multi_type_graph.multi_type_graph_sims
-
-import com.spotify.scio.ScioContext
-import com.spotify.scio.coders.Coder
-import com.spotify.scio.values.SCollection
-import com.twitter.beam.io.dal.DAL
-import com.twitter.beam.io.fs.multiformat.DiskFormat
-import com.twitter.beam.io.fs.multiformat.PathLayout
-import com.twitter.beam.job.DateRangeOptions
-import com.twitter.dal.client.dataset.SnapshotDALDataset
-import com.twitter.scio_internal.coders.ThriftStructLazyBinaryScroogeCoder
-import com.twitter.scio_internal.job.ScioBeamJob
-import com.twitter.scrooge.ThriftStruct
-import com.twitter.simclusters_v2.scio.multi_type_graph.common.MultiTypeGraphUtil
-import com.twitter.simclusters_v2.thriftscala.RightNode
-import com.twitter.simclusters_v2.thriftscala.RightNodeSimHashSketch
-import com.twitter.util.Duration
-import com.twitter.wtf.dataflow.cosine_similarity.SimHashJob
-import java.time.Instant
-
-trait RightNodeSimHashScioBaseApp extends ScioBeamJob[DateRangeOptions] with SimHashJob[RightNode] {
-  override implicit def scroogeCoder[T <: ThriftStruct: Manifest]: Coder[T] =
-    ThriftStructLazyBinaryScroogeCoder.scroogeCoder
-  override val ordering: Ordering[RightNode] = MultiTypeGraphUtil.rightNodeOrdering
-
-  val isAdhoc: Boolean
-  val rightNodeSimHashSnapshotDataset: SnapshotDALDataset[RightNodeSimHashSketch]
-  val simsHashJobOutputDirectory: String = Config.simsHashJobOutputDirectory
-
-  override def graph(
-    implicit sc: ScioContext,
-  ): SCollection[(Long, RightNode, Double)] =
-    MultiTypeGraphUtil.getTruncatedMultiTypeGraph(noOlderThan = Duration.fromDays(14))
-
-  override def configurePipeline(sc: ScioContext, opts: DateRangeOptions): Unit = {
-    implicit def scioContext: ScioContext = sc
-
-    // DAL.Environment variable for WriteExecs
-    val dalEnv = if (isAdhoc) DAL.Environment.Dev else DAL.Environment.Prod
-
-    val sketches = computeSimHashSketchesForWeightedGraph(graph)
-      .map {
-        case (rightNode, sketch, norm) => RightNodeSimHashSketch(rightNode, sketch, norm)
-      }
-
-    // Write SimHashSketches to DAL
-    sketches
-      .saveAsCustomOutput(
-        name = "WriteSimHashSketches",
-        DAL.writeSnapshot(
-          rightNodeSimHashSnapshotDataset,
-          PathLayout.FixedPath(
-            ((if (!isAdhoc)
-                MultiTypeGraphUtil.RootThriftPath
-              else
-                MultiTypeGraphUtil.AdhocRootPath)
-              + simsHashJobOutputDirectory)),
-          Instant.ofEpochMilli(opts.interval.getEndMillis - 1L),
-          DiskFormat.Thrift(),
-          environmentOverride = dalEnv
-        )
-      )
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/cosine-similarity-scio-adhoc.d6w b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/cosine-similarity-scio-adhoc.d6w
deleted file mode 100644
index 2bdc591cf..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/cosine-similarity-scio-adhoc.d6w
+++ /dev/null
@@ -1,33 +0,0 @@
-class Profile(Struct):
-  project = Required(String)
-  date = Required(String)
-  environment = Default(String, 'dev')
-  machine = Default(String, 'n2d-highmem-16')
-
-job = Job(
-  name='multi-type-graph-cosine-similarity-scio-adhoc-app',
-  project='{{profile.project}}',
-  staging_bucket='{{profile.project}}',
-  service_account='{{profile.user_name}}-shdw@twttr-dp-svc-accounts.iam.gserviceaccount.com',
-  region='us-central1',
-  worker_config=WorkerConfig(
-    num_workers=2,
-    worker_machine_type='{{profile.machine}}',
-    worker_disk_type=WorkerDiskType('HDD'),
-  ),
-  extra_args={
-    "environment": '{{profile.environment}}',
-    "date": Quote('{{profile.date}}'),
-  },
-  deployment_config=BatchDeploymentConfig(
-    role='{{profile.user_name}}',
-    build_target='src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims:multi-type-graph-cosine-similarity-scio-adhoc-app',
-    gcp_deployment_credentials='/var/lib/tss/keys/{{profile.user_name}}/cloud/gcp/dp/shadow.json',
-    statebird_config=StatebirdConfig(
-      batch_width='PT1H',
-      first_time='{{profile.date}}'
-    )
-  )
-)
-
-jobs=[job]
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/cosine-similarity-scio-adhoc.docx b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/cosine-similarity-scio-adhoc.docx
new file mode 100644
index 000000000..8959f84d6
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/cosine-similarity-scio-adhoc.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/cosine-similarity-scio-batch.d6w b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/cosine-similarity-scio-batch.d6w
deleted file mode 100644
index b88bcd094..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/cosine-similarity-scio-batch.d6w
+++ /dev/null
@@ -1,39 +0,0 @@
-class Profile(Struct):
-  project = Required(String)
-  date = Required(String)
-  environment = Default(String, 'prod')
-  machine = Default(String, 'n2d-highmem-16')
-
-job = Job(
-  name='multi-type-graph-cosine-similarity-scio-batch-app',
-  project='{{profile.project}}',
-  staging_bucket='{{profile.project}}',
-  service_account='{{profile.user_name}}-shdw@twttr-dp-svc-accounts.iam.gserviceaccount.com',
-  region='us-central1',
-  worker_config=WorkerConfig(
-    num_workers=2,
-    worker_machine_type='{{profile.machine}}',
-    worker_disk_type=WorkerDiskType('HDD'),
-  ),
-  extra_args={
-    "environment": '{{profile.environment}}',
-    "date": Quote('{{profile.date}}'),
-  },
-  deployment_config=BatchDeploymentConfig(
-    role='{{profile.user_name}}',
-    build_target='src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims:multi-type-graph-cosine-similarity-scio-batch-app',
-    gcp_deployment_credentials='/var/lib/tss/keys/{{profile.user_name}}/cloud/gcp/dp/shadow.json',
-    environment='prod',
-    statebird_config=StatebirdConfig(
-      batch_width='P1W',
-      first_time='{{profile.date}}'
-    ),
-    workflow_config=WorkflowConfig(
-      play=True,
-    ),
-    timeout='PT50H'
-  )
-)
-
-jobs=[job]
-
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/cosine-similarity-scio-batch.docx b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/cosine-similarity-scio-batch.docx
new file mode 100644
index 000000000..32c5adf01
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/cosine-similarity-scio-batch.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/sim-hash-scio-adhoc.d6w b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/sim-hash-scio-adhoc.d6w
deleted file mode 100644
index ee653aabd..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/sim-hash-scio-adhoc.d6w
+++ /dev/null
@@ -1,33 +0,0 @@
-class Profile(Struct):
-  project = Required(String)
-  date = Required(String)
-  environment = Default(String, 'dev')
-  machine = Default(String, 'n2d-highmem-16')
-
-job = Job(
-  name='multi-type-graph-sim-hash-scio-adhoc-app',
-  project='{{profile.project}}',
-  staging_bucket='{{profile.project}}',
-  service_account='{{profile.user_name}}-shdw@twttr-dp-svc-accounts.iam.gserviceaccount.com',
-  region='us-central1',
-  worker_config=WorkerConfig(
-    num_workers=2,
-    worker_machine_type='{{profile.machine}}',
-    worker_disk_type=WorkerDiskType('HDD'),
-  ),
-  extra_args={
-    "environment": '{{profile.environment}}',
-    "date": Quote('{{profile.date}}'),
-  },
-  deployment_config=BatchDeploymentConfig(
-    role='{{profile.user_name}}',
-    build_target='src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims:multi-type-graph-sim-hash-scio-adhoc-app',
-    gcp_deployment_credentials='/var/lib/tss/keys/{{profile.user_name}}/cloud/gcp/dp/shadow.json',
-    statebird_config=StatebirdConfig(
-      batch_width='PT1H',
-      first_time='{{profile.date}}'
-    )
-  )
-)
-
-jobs=[job]
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/sim-hash-scio-adhoc.docx b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/sim-hash-scio-adhoc.docx
new file mode 100644
index 000000000..2f631eee5
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/sim-hash-scio-adhoc.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/sim-hash-scio-batch.d6w b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/sim-hash-scio-batch.d6w
deleted file mode 100644
index ff6a7b84c..000000000
--- a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/sim-hash-scio-batch.d6w
+++ /dev/null
@@ -1,38 +0,0 @@
-class Profile(Struct):
-  project = Required(String)
-  date = Required(String)
-  environment = Default(String, 'prod')
-  machine = Default(String, 'n2d-highmem-16')
-
-job = Job(
-  name='multi-type-graph-sim-hash-scio-batch-app',
-  project='{{profile.project}}',
-  staging_bucket='{{profile.project}}',
-  service_account='{{profile.user_name}}-shdw@twttr-dp-svc-accounts.iam.gserviceaccount.com',
-  region='us-central1',
-  worker_config=WorkerConfig(
-    num_workers=2,
-    worker_machine_type='{{profile.machine}}',
-    worker_disk_type=WorkerDiskType('HDD'),
-  ),
-  extra_args={
-    "environment": '{{profile.environment}}',
-    "date": Quote('{{profile.date}}'),
-  },
-  deployment_config=BatchDeploymentConfig(
-    role='{{profile.user_name}}',
-    build_target='src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims:multi-type-graph-sim-hash-scio-batch-app',
-    gcp_deployment_credentials='/var/lib/tss/keys/{{profile.user_name}}/cloud/gcp/dp/shadow.json',
-    environment='prod',
-    statebird_config=StatebirdConfig(
-      batch_width='P1W',
-      first_time='{{profile.date}}'
-    ),
-    workflow_config=WorkflowConfig(
-      play=True,
-    ),
-    timeout='PT20H'
-  )
-)
-
-jobs=[job]
diff --git a/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/sim-hash-scio-batch.docx b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/sim-hash-scio-batch.docx
new file mode 100644
index 000000000..120682b89
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/scio/multi_type_graph/multi_type_graph_sims/sim-hash-scio-batch.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/score/AggregatedScoreStore.docx b/src/scala/com/twitter/simclusters_v2/score/AggregatedScoreStore.docx
new file mode 100644
index 000000000..7e98009f7
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/score/AggregatedScoreStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/score/AggregatedScoreStore.scala b/src/scala/com/twitter/simclusters_v2/score/AggregatedScoreStore.scala
deleted file mode 100644
index 31734f226..000000000
--- a/src/scala/com/twitter/simclusters_v2/score/AggregatedScoreStore.scala
+++ /dev/null
@@ -1,24 +0,0 @@
-package com.twitter.simclusters_v2.score
-
-import com.twitter.simclusters_v2.thriftscala.{ScoreId => ThriftScoreId, Score => ThriftScore}
-import com.twitter.storehaus.ReadableStore
-
-/**
- * A wrapper class, used to aggregate the scores calculated by other score stores. It relies on the
- * results of other ScoreStores registered in the ScoreFacadeStore.
- */
-trait AggregatedScoreStore extends ReadableStore[ThriftScoreId, ThriftScore] {
-
-  // The underlyingScoreStore relies on [[ScoreFacadeStore]] to finish the dependency injection.
-  protected var scoreFacadeStore: ReadableStore[ThriftScoreId, ThriftScore] = ReadableStore.empty
-
-  /**
-   * When registering this store in a ScoreFacadeStore, the facade store calls this function to
-   * provide references to other score stores.
-   */
-  private[score] def set(facadeStore: ReadableStore[ThriftScoreId, ThriftScore]): Unit = {
-    this.synchronized {
-      scoreFacadeStore = facadeStore
-    }
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/score/BUILD b/src/scala/com/twitter/simclusters_v2/score/BUILD
deleted file mode 100644
index 13e8c07f6..000000000
--- a/src/scala/com/twitter/simclusters_v2/score/BUILD
+++ /dev/null
@@ -1,9 +0,0 @@
-scala_library(
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "finagle/finagle-stats",
-        "hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
-        "src/scala/com/twitter/simclusters_v2/stores",
-    ],
-)
diff --git a/src/scala/com/twitter/simclusters_v2/score/BUILD.docx b/src/scala/com/twitter/simclusters_v2/score/BUILD.docx
new file mode 100644
index 000000000..12ead8c79
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/score/BUILD.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/score/Score.docx b/src/scala/com/twitter/simclusters_v2/score/Score.docx
new file mode 100644
index 000000000..5b50c192c
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/score/Score.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/score/Score.scala b/src/scala/com/twitter/simclusters_v2/score/Score.scala
deleted file mode 100644
index c12acf97e..000000000
--- a/src/scala/com/twitter/simclusters_v2/score/Score.scala
+++ /dev/null
@@ -1,22 +0,0 @@
-package com.twitter.simclusters_v2.score
-
-import com.twitter.simclusters_v2.thriftscala.{Score => ThriftScore}
-
-/**
- * A uniform value type for all kinds of Calculation Score.
- **/
-case class Score(score: Double) {
-
-  implicit lazy val toThrift: ThriftScore = {
-    ThriftScore(score)
-  }
-}
-
-object Score {
-
-  /**
-   * Only support Double Type Thrift score
-   */
-  implicit val fromThriftScore: ThriftScore => Score = { thriftScore => Score(thriftScore.score) }
-
-}
diff --git a/src/scala/com/twitter/simclusters_v2/score/ScoreFacadeStore.docx b/src/scala/com/twitter/simclusters_v2/score/ScoreFacadeStore.docx
new file mode 100644
index 000000000..1e0c863f3
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/score/ScoreFacadeStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/score/ScoreFacadeStore.scala b/src/scala/com/twitter/simclusters_v2/score/ScoreFacadeStore.scala
deleted file mode 100644
index ac084e737..000000000
--- a/src/scala/com/twitter/simclusters_v2/score/ScoreFacadeStore.scala
+++ /dev/null
@@ -1,103 +0,0 @@
-package com.twitter.simclusters_v2.score
-
-import com.twitter.finagle.stats.BroadcastStatsReceiver
-import com.twitter.finagle.stats.StatsReceiver
-import com.twitter.hermit.store.common.ObservedReadableStore
-import com.twitter.simclusters_v2.thriftscala.ScoringAlgorithm
-import com.twitter.simclusters_v2.thriftscala.{ScoreId => ThriftScoreId}
-import com.twitter.simclusters_v2.thriftscala.{Score => ThriftScore}
-import com.twitter.storehaus.ReadableStore
-import com.twitter.util.Future
-
-/**
- * Provide a uniform access layer for all kind of Score.
- * @param readableStores readable stores indexed by the ScoringAlgorithm they implement
- */
-class ScoreFacadeStore private (
-  stores: Map[ScoringAlgorithm, ReadableStore[ThriftScoreId, ThriftScore]])
-    extends ReadableStore[ThriftScoreId, ThriftScore] {
-
-  override def get(k: ThriftScoreId): Future[Option[ThriftScore]] = {
-    findStore(k).get(k)
-  }
-
-  // Override the multiGet for better batch performance.
-  override def multiGet[K1 <: ThriftScoreId](ks: Set[K1]): Map[K1, Future[Option[ThriftScore]]] = {
-    if (ks.isEmpty) {
-      Map.empty
-    } else {
-      val head = ks.head
-      val notSameType = ks.exists(k => k.algorithm != head.algorithm)
-      if (!notSameType) {
-        findStore(head).multiGet(ks)
-      } else {
-        // Generate a large amount temp objects.
-        // For better performance, avoid querying the multiGet with more than one kind of embedding
-        ks.groupBy(id => id.algorithm).flatMap {
-          case (_, ks) =>
-            findStore(ks.head).multiGet(ks)
-        }
-      }
-    }
-  }
-
-  // If not store mapping, fast return a IllegalArgumentException.
-  private def findStore(id: ThriftScoreId): ReadableStore[ThriftScoreId, ThriftScore] = {
-    stores.get(id.algorithm) match {
-      case Some(store) => store
-      case None =>
-        throw new IllegalArgumentException(s"The Scoring Algorithm ${id.algorithm} doesn't exist.")
-    }
-  }
-
-}
-
-object ScoreFacadeStore {
-  /*
-  Build a ScoreFacadeStore which exposes stats for all requests (under "all") and per scoring algorithm:
-
-    score_facade_store/all/<observed readable store metrics for all requests>
-    score_facade_store/<scoring algorithm>/<observed readable store metrics for this algorithm's requests>
-
-  Stores in aggregatedStores may reference stores in readableStores. An instance of ScoreFacadeStore
-  is passed to them after instantiation.
-   */
-  def buildWithMetrics(
-    readableStores: Map[ScoringAlgorithm, ReadableStore[ThriftScoreId, ThriftScore]],
-    aggregatedStores: Map[ScoringAlgorithm, AggregatedScoreStore],
-    statsReceiver: StatsReceiver
-  ) = {
-    val scopedStatsReceiver = statsReceiver.scope("score_facade_store")
-
-    def wrapStore(
-      scoringAlgorithm: ScoringAlgorithm,
-      store: ReadableStore[ThriftScoreId, ThriftScore]
-    ): ReadableStore[ThriftScoreId, ThriftScore] = {
-      val sr = BroadcastStatsReceiver(
-        Seq(
-          scopedStatsReceiver.scope("all"),
-          scopedStatsReceiver.scope(scoringAlgorithm.name)
-        ))
-      ObservedReadableStore(store)(sr)
-    }
-
-    val stores = (readableStores ++ aggregatedStores).map {
-      case (algo, store) => algo -> wrapStore(algo, store)
-    }
-    val store = new ScoreFacadeStore(stores = stores)
-
-    /*
-    AggregatedScores aggregate scores from multiple non-aggregated stores. They access these via the
-    ScoreFacadeStore itself, and therefore must be passed an instance of it after it has been
-    constructed.
-     */
-    assert(
-      readableStores.keySet.forall(algorithm => !aggregatedStores.keySet.contains(algorithm)),
-      "Keys for stores are disjoint")
-
-    aggregatedStores.values.foreach(_.set(store))
-
-    store
-  }
-
-}
diff --git a/src/scala/com/twitter/simclusters_v2/score/ScoreId.docx b/src/scala/com/twitter/simclusters_v2/score/ScoreId.docx
new file mode 100644
index 000000000..7dd83b6c8
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/score/ScoreId.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/score/ScoreId.scala b/src/scala/com/twitter/simclusters_v2/score/ScoreId.scala
deleted file mode 100644
index da045ecda..000000000
--- a/src/scala/com/twitter/simclusters_v2/score/ScoreId.scala
+++ /dev/null
@@ -1,129 +0,0 @@
-package com.twitter.simclusters_v2.score
-
-import com.twitter.simclusters_v2.common.SimClustersEmbeddingId._
-import com.twitter.simclusters_v2.thriftscala.{
-  InternalId,
-  ScoreInternalId,
-  ScoringAlgorithm,
-  SimClustersEmbeddingId,
-  GenericPairScoreId => ThriftGenericPairScoreId,
-  ScoreId => ThriftScoreId,
-  SimClustersEmbeddingPairScoreId => ThriftSimClustersEmbeddingPairScoreId
-}
-
-/**
- * A uniform Identifier type for all kinds of Calculation Score.
- **/
-trait ScoreId {
-
-  def algorithm: ScoringAlgorithm
-
-  /**
-   * Convert to a Thrift object. Throw a exception if the operation is not override.
-   */
-  implicit def toThrift: ThriftScoreId =
-    throw new UnsupportedOperationException(s"ScoreId $this doesn't support Thrift format")
-}
-
-object ScoreId {
-
-  implicit val fromThriftScoreId: ThriftScoreId => ScoreId = {
-    case scoreId @ ThriftScoreId(_, ScoreInternalId.GenericPairScoreId(_)) =>
-      PairScoreId.fromThriftScoreId(scoreId)
-    case scoreId @ ThriftScoreId(_, ScoreInternalId.SimClustersEmbeddingPairScoreId(_)) =>
-      SimClustersEmbeddingPairScoreId.fromThriftScoreId(scoreId)
-  }
-
-}
-
-/**
- * Generic Internal pairwise id. Support all the subtypes in InternalId, which includes TweetId,
- * UserId, EntityId and more combination ids.
- **/
-trait PairScoreId extends ScoreId {
-
-  def id1: InternalId
-  def id2: InternalId
-
-  override implicit lazy val toThrift: ThriftScoreId = {
-    ThriftScoreId(
-      algorithm,
-      ScoreInternalId.GenericPairScoreId(ThriftGenericPairScoreId(id1, id2))
-    )
-  }
-}
-
-object PairScoreId {
-
-  // The default PairScoreId assume id1 <= id2. It used to increase the cache hit rate.
-  def apply(algorithm: ScoringAlgorithm, id1: InternalId, id2: InternalId): PairScoreId = {
-    if (internalIdOrdering.lteq(id1, id2)) {
-      DefaultPairScoreId(algorithm, id1, id2)
-    } else {
-      DefaultPairScoreId(algorithm, id2, id1)
-    }
-  }
-
-  private case class DefaultPairScoreId(
-    algorithm: ScoringAlgorithm,
-    id1: InternalId,
-    id2: InternalId)
-      extends PairScoreId
-
-  implicit val fromThriftScoreId: ThriftScoreId => PairScoreId = {
-    case ThriftScoreId(algorithm, ScoreInternalId.GenericPairScoreId(pairScoreId)) =>
-      DefaultPairScoreId(algorithm, pairScoreId.id1, pairScoreId.id2)
-    case ThriftScoreId(algorithm, ScoreInternalId.SimClustersEmbeddingPairScoreId(pairScoreId)) =>
-      SimClustersEmbeddingPairScoreId(algorithm, pairScoreId.id1, pairScoreId.id2)
-  }
-
-}
-
-/**
- * ScoreId for a pair of SimClustersEmbedding.
- * Used for dot product, cosine similarity and other basic embedding operations.
- */
-trait SimClustersEmbeddingPairScoreId extends PairScoreId {
-  def embeddingId1: SimClustersEmbeddingId
-
-  def embeddingId2: SimClustersEmbeddingId
-
-  override def id1: InternalId = embeddingId1.internalId
-
-  override def id2: InternalId = embeddingId2.internalId
-
-  override implicit lazy val toThrift: ThriftScoreId = {
-    ThriftScoreId(
-      algorithm,
-      ScoreInternalId.SimClustersEmbeddingPairScoreId(
-        ThriftSimClustersEmbeddingPairScoreId(embeddingId1, embeddingId2))
-    )
-  }
-}
-
-object SimClustersEmbeddingPairScoreId {
-
-  // The default PairScoreId assume id1 <= id2. It used to increase the cache hit rate.
-  def apply(
-    algorithm: ScoringAlgorithm,
-    id1: SimClustersEmbeddingId,
-    id2: SimClustersEmbeddingId
-  ): SimClustersEmbeddingPairScoreId = {
-    if (simClustersEmbeddingIdOrdering.lteq(id1, id2)) {
-      DefaultSimClustersEmbeddingPairScoreId(algorithm, id1, id2)
-    } else {
-      DefaultSimClustersEmbeddingPairScoreId(algorithm, id2, id1)
-    }
-  }
-
-  private case class DefaultSimClustersEmbeddingPairScoreId(
-    algorithm: ScoringAlgorithm,
-    embeddingId1: SimClustersEmbeddingId,
-    embeddingId2: SimClustersEmbeddingId)
-      extends SimClustersEmbeddingPairScoreId
-
-  implicit val fromThriftScoreId: ThriftScoreId => SimClustersEmbeddingPairScoreId = {
-    case ThriftScoreId(algorithm, ScoreInternalId.SimClustersEmbeddingPairScoreId(pairScoreId)) =>
-      SimClustersEmbeddingPairScoreId(algorithm, pairScoreId.id1, pairScoreId.id2)
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/score/ScoreStore.docx b/src/scala/com/twitter/simclusters_v2/score/ScoreStore.docx
new file mode 100644
index 000000000..285dad21f
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/score/ScoreStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/score/ScoreStore.scala b/src/scala/com/twitter/simclusters_v2/score/ScoreStore.scala
deleted file mode 100644
index 3aea91e1a..000000000
--- a/src/scala/com/twitter/simclusters_v2/score/ScoreStore.scala
+++ /dev/null
@@ -1,72 +0,0 @@
-package com.twitter.simclusters_v2.score
-
-import com.twitter.simclusters_v2.thriftscala.{Score => ThriftScore, ScoreId => ThriftScoreId}
-import com.twitter.storehaus.ReadableStore
-import com.twitter.util.Future
-
-/**
- * A Score Store is a readableStore with ScoreId as Key and Score as the Value.
- * It also needs to include the algorithm type.
- * A algorithm type should only be used by one Score Store in the application.
- */
-trait ScoreStore[K <: ScoreId] extends ReadableStore[K, Score] {
-
-  def fromThriftScoreId: ThriftScoreId => K
-
-  // Convert to a Thrift version.
-  def toThriftStore: ReadableStore[ThriftScoreId, ThriftScore] = {
-    this
-      .composeKeyMapping[ThriftScoreId](fromThriftScoreId)
-      .mapValues(_.toThrift)
-  }
-}
-
-/**
- * A generic Pairwise Score store.
- * Requires provide both left and right side feature hydration.
- */
-trait PairScoreStore[K <: PairScoreId, K1, K2, V1, V2] extends ScoreStore[K] {
-
-  def compositeKey1: K => K1
-  def compositeKey2: K => K2
-
-  // Left side feature hydration
-  def underlyingStore1: ReadableStore[K1, V1]
-
-  // Right side feature hydration
-  def underlyingStore2: ReadableStore[K2, V2]
-
-  def score: (V1, V2) => Future[Option[Double]]
-
-  override def get(k: K): Future[Option[Score]] = {
-    for {
-      vs <-
-        Future.join(underlyingStore1.get(compositeKey1(k)), underlyingStore2.get(compositeKey2(k)))
-      v <- vs match {
-        case (Some(v1), Some(v2)) =>
-          score(v1, v2)
-        case _ =>
-          Future.None
-      }
-    } yield {
-      v.map(buildScore)
-    }
-  }
-
-  override def multiGet[KK <: K](ks: Set[KK]): Map[KK, Future[Option[Score]]] = {
-
-    val v1Map = underlyingStore1.multiGet(ks.map { k => compositeKey1(k) })
-    val v2Map = underlyingStore2.multiGet(ks.map { k => compositeKey2(k) })
-
-    ks.map { k =>
-      k -> Future.join(v1Map(compositeKey1(k)), v2Map(compositeKey2(k))).flatMap {
-        case (Some(v1), Some(v2)) =>
-          score(v1, v2).map(_.map(buildScore))
-        case _ =>
-          Future.value(None)
-      }
-    }.toMap
-  }
-
-  private def buildScore(v: Double): Score = Score(v)
-}
diff --git a/src/scala/com/twitter/simclusters_v2/score/SimClustersEmbeddingPairScoreStore.docx b/src/scala/com/twitter/simclusters_v2/score/SimClustersEmbeddingPairScoreStore.docx
new file mode 100644
index 000000000..19805c507
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/score/SimClustersEmbeddingPairScoreStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/score/SimClustersEmbeddingPairScoreStore.scala b/src/scala/com/twitter/simclusters_v2/score/SimClustersEmbeddingPairScoreStore.scala
deleted file mode 100644
index ef0143711..000000000
--- a/src/scala/com/twitter/simclusters_v2/score/SimClustersEmbeddingPairScoreStore.scala
+++ /dev/null
@@ -1,201 +0,0 @@
-package com.twitter.simclusters_v2.score
-
-import com.twitter.simclusters_v2.common.SimClustersEmbedding
-import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbeddingId, ScoreId => ThriftScoreId}
-import com.twitter.storehaus.ReadableStore
-import com.twitter.util.Future
-
-object SimClustersEmbeddingPairScoreStore {
-
-  /**
-   * Internal Instance of a SimClusters Embedding based Pair Score store.
-   */
-  private case class SimClustersEmbeddingInternalPairScoreStore(
-    simClustersEmbeddingStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding],
-    score: (SimClustersEmbedding, SimClustersEmbedding) => Future[Option[Double]])
-      extends PairScoreStore[
-        SimClustersEmbeddingPairScoreId,
-        SimClustersEmbeddingId,
-        SimClustersEmbeddingId,
-        SimClustersEmbedding,
-        SimClustersEmbedding
-      ] {
-
-    override val compositeKey1: SimClustersEmbeddingPairScoreId => SimClustersEmbeddingId =
-      _.embeddingId1
-    override val compositeKey2: SimClustersEmbeddingPairScoreId => SimClustersEmbeddingId =
-      _.embeddingId2
-
-    override def underlyingStore1: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] =
-      simClustersEmbeddingStore
-
-    override def underlyingStore2: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] =
-      simClustersEmbeddingStore
-
-    override def fromThriftScoreId: ThriftScoreId => SimClustersEmbeddingPairScoreId =
-      SimClustersEmbeddingPairScoreId.fromThriftScoreId
-  }
-
-  def buildDotProductStore(
-    simClustersEmbeddingStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
-  ): PairScoreStore[
-    SimClustersEmbeddingPairScoreId,
-    SimClustersEmbeddingId,
-    SimClustersEmbeddingId,
-    SimClustersEmbedding,
-    SimClustersEmbedding
-  ] = {
-
-    def dotProduct: (SimClustersEmbedding, SimClustersEmbedding) => Future[Option[Double]] = {
-      case (embedding1, embedding2) =>
-        Future.value(Some(embedding1.dotProduct(embedding2)))
-    }
-
-    SimClustersEmbeddingInternalPairScoreStore(
-      simClustersEmbeddingStore,
-      dotProduct
-    )
-  }
-
-  def buildCosineSimilarityStore(
-    simClustersEmbeddingStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
-  ): PairScoreStore[
-    SimClustersEmbeddingPairScoreId,
-    SimClustersEmbeddingId,
-    SimClustersEmbeddingId,
-    SimClustersEmbedding,
-    SimClustersEmbedding
-  ] = {
-
-    def cosineSimilarity: (SimClustersEmbedding, SimClustersEmbedding) => Future[Option[Double]] = {
-      case (embedding1, embedding2) =>
-        Future.value(Some(embedding1.cosineSimilarity(embedding2)))
-    }
-
-    SimClustersEmbeddingInternalPairScoreStore(
-      simClustersEmbeddingStore,
-      cosineSimilarity
-    )
-  }
-
-  def buildLogCosineSimilarityStore(
-    simClustersEmbeddingStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
-  ): PairScoreStore[
-    SimClustersEmbeddingPairScoreId,
-    SimClustersEmbeddingId,
-    SimClustersEmbeddingId,
-    SimClustersEmbedding,
-    SimClustersEmbedding
-  ] = {
-
-    def logNormCosineSimilarity: (
-      SimClustersEmbedding,
-      SimClustersEmbedding
-    ) => Future[Option[Double]] = {
-      case (embedding1, embedding2) =>
-        Future.value(Some(embedding1.logNormCosineSimilarity(embedding2)))
-    }
-
-    SimClustersEmbeddingInternalPairScoreStore(
-      simClustersEmbeddingStore,
-      logNormCosineSimilarity
-    )
-  }
-
-  def buildExpScaledCosineSimilarityStore(
-    simClustersEmbeddingStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
-  ): PairScoreStore[
-    SimClustersEmbeddingPairScoreId,
-    SimClustersEmbeddingId,
-    SimClustersEmbeddingId,
-    SimClustersEmbedding,
-    SimClustersEmbedding
-  ] = {
-
-    def expScaledCosineSimilarity: (
-      SimClustersEmbedding,
-      SimClustersEmbedding
-    ) => Future[Option[Double]] = {
-      case (embedding1, embedding2) =>
-        Future.value(Some(embedding1.expScaledCosineSimilarity(embedding2)))
-    }
-
-    SimClustersEmbeddingInternalPairScoreStore(
-      simClustersEmbeddingStore,
-      expScaledCosineSimilarity
-    )
-  }
-
-  def buildJaccardSimilarityStore(
-    simClustersEmbeddingStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
-  ): PairScoreStore[
-    SimClustersEmbeddingPairScoreId,
-    SimClustersEmbeddingId,
-    SimClustersEmbeddingId,
-    SimClustersEmbedding,
-    SimClustersEmbedding
-  ] = {
-
-    def jaccardSimilarity: (
-      SimClustersEmbedding,
-      SimClustersEmbedding
-    ) => Future[Option[Double]] = {
-      case (embedding1, embedding2) =>
-        Future.value(Some(embedding1.jaccardSimilarity(embedding2)))
-    }
-
-    SimClustersEmbeddingInternalPairScoreStore(
-      simClustersEmbeddingStore,
-      jaccardSimilarity
-    )
-  }
-
-  def buildEuclideanDistanceStore(
-    simClustersEmbeddingStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
-  ): PairScoreStore[
-    SimClustersEmbeddingPairScoreId,
-    SimClustersEmbeddingId,
-    SimClustersEmbeddingId,
-    SimClustersEmbedding,
-    SimClustersEmbedding
-  ] = {
-
-    def euclideanDistance: (
-      SimClustersEmbedding,
-      SimClustersEmbedding
-    ) => Future[Option[Double]] = {
-      case (embedding1, embedding2) =>
-        Future.value(Some(embedding1.euclideanDistance(embedding2)))
-    }
-
-    SimClustersEmbeddingInternalPairScoreStore(
-      simClustersEmbeddingStore,
-      euclideanDistance
-    )
-  }
-
-  def buildManhattanDistanceStore(
-    simClustersEmbeddingStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
-  ): PairScoreStore[
-    SimClustersEmbeddingPairScoreId,
-    SimClustersEmbeddingId,
-    SimClustersEmbeddingId,
-    SimClustersEmbedding,
-    SimClustersEmbedding
-  ] = {
-
-    def manhattanDistance: (
-      SimClustersEmbedding,
-      SimClustersEmbedding
-    ) => Future[Option[Double]] = {
-      case (embedding1, embedding2) =>
-        Future.value(Some(embedding1.manhattanDistance(embedding2)))
-    }
-
-    SimClustersEmbeddingInternalPairScoreStore(
-      simClustersEmbeddingStore,
-      manhattanDistance
-    )
-  }
-
-}
diff --git a/src/scala/com/twitter/simclusters_v2/score/WeightedSumAggregatedScoreStore.docx b/src/scala/com/twitter/simclusters_v2/score/WeightedSumAggregatedScoreStore.docx
new file mode 100644
index 000000000..94cb69461
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/score/WeightedSumAggregatedScoreStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/score/WeightedSumAggregatedScoreStore.scala b/src/scala/com/twitter/simclusters_v2/score/WeightedSumAggregatedScoreStore.scala
deleted file mode 100644
index 8c1552c95..000000000
--- a/src/scala/com/twitter/simclusters_v2/score/WeightedSumAggregatedScoreStore.scala
+++ /dev/null
@@ -1,84 +0,0 @@
-package com.twitter.simclusters_v2.score
-
-import com.twitter.simclusters_v2.score.WeightedSumAggregatedScoreStore.WeightedSumAggregatedScoreParameter
-import com.twitter.simclusters_v2.thriftscala.{
-  EmbeddingType,
-  GenericPairScoreId,
-  ModelVersion,
-  ScoreInternalId,
-  ScoringAlgorithm,
-  SimClustersEmbeddingId,
-  Score => ThriftScore,
-  ScoreId => ThriftScoreId,
-  SimClustersEmbeddingPairScoreId => ThriftSimClustersEmbeddingPairScoreId
-}
-import com.twitter.util.Future
-
-/**
- * A generic store wrapper to aggregate the scores of N underlying stores in a weighted fashion.
- *
- */
-case class WeightedSumAggregatedScoreStore(parameters: Seq[WeightedSumAggregatedScoreParameter])
-    extends AggregatedScoreStore {
-
-  override def get(k: ThriftScoreId): Future[Option[ThriftScore]] = {
-    val underlyingScores = parameters.map { parameter =>
-      scoreFacadeStore
-        .get(ThriftScoreId(parameter.scoreAlgorithm, parameter.idTransform(k.internalId)))
-        .map(_.map(s => parameter.scoreTransform(s.score) * parameter.weight))
-    }
-    Future.collect(underlyingScores).map { scores =>
-      if (scores.exists(_.nonEmpty)) {
-        val newScore = scores.foldLeft(0.0) {
-          case (sum, maybeScore) =>
-            sum + maybeScore.getOrElse(0.0)
-        }
-        Some(ThriftScore(score = newScore))
-      } else {
-        // Return None if all of the underlying score is None.
-        None
-      }
-    }
-  }
-}
-
-object WeightedSumAggregatedScoreStore {
-
-  /**
-   * The parameter of WeightedSumAggregatedScoreStore. Create 0 to N parameters for a WeightedSum
-   * AggregatedScore Store. Please evaluate the performance before productionization any new score.
-   *
-   * @param scoreAlgorithm the underlying score algorithm name
-   * @param weight contribution to weighted sum of this sub-score
-   * @param idTransform transform the source ScoreInternalId to underlying score InternalId.
-   * @param scoreTransform function to apply to sub-score before adding to weighted sum
-   */
-  case class WeightedSumAggregatedScoreParameter(
-    scoreAlgorithm: ScoringAlgorithm,
-    weight: Double,
-    idTransform: ScoreInternalId => ScoreInternalId,
-    scoreTransform: Double => Double = identityScoreTransform)
-
-  val SameTypeScoreInternalIdTransform: ScoreInternalId => ScoreInternalId = { id => id }
-  val identityScoreTransform: Double => Double = { score => score }
-
-  // Convert Generic Internal Id to a SimClustersEmbeddingId
-  def genericPairScoreIdToSimClustersEmbeddingPairScoreId(
-    embeddingType1: EmbeddingType,
-    embeddingType2: EmbeddingType,
-    modelVersion: ModelVersion
-  ): ScoreInternalId => ScoreInternalId = {
-    case id: ScoreInternalId.GenericPairScoreId =>
-      ScoreInternalId.SimClustersEmbeddingPairScoreId(
-        ThriftSimClustersEmbeddingPairScoreId(
-          SimClustersEmbeddingId(embeddingType1, modelVersion, id.genericPairScoreId.id1),
-          SimClustersEmbeddingId(embeddingType2, modelVersion, id.genericPairScoreId.id2)
-        ))
-  }
-
-  val simClustersEmbeddingPairScoreIdToGenericPairScoreId: ScoreInternalId => ScoreInternalId = {
-    case ScoreInternalId.SimClustersEmbeddingPairScoreId(simClustersId) =>
-      ScoreInternalId.GenericPairScoreId(
-        GenericPairScoreId(simClustersId.id1.internalId, simClustersId.id2.internalId))
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/stores/BUILD b/src/scala/com/twitter/simclusters_v2/stores/BUILD
deleted file mode 100644
index 11bc8e7e6..000000000
--- a/src/scala/com/twitter/simclusters_v2/stores/BUILD
+++ /dev/null
@@ -1,14 +0,0 @@
-scala_library(
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "3rdparty/jvm/com/twitter/storehaus:core",
-        "hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
-        "src/scala/com/twitter/simclusters_v2/common",
-        "src/scala/com/twitter/storehaus_internal/manhattan",
-        "src/scala/com/twitter/storehaus_internal/util",
-        "src/scala/com/twitter/wtf/scalding/jobs/injection",
-        "src/thrift/com/twitter/recos/entities:entities-thrift-scala",
-        "storage/clients/manhattan/client/src/main/scala",
-    ],
-)
diff --git a/src/scala/com/twitter/simclusters_v2/stores/BUILD.docx b/src/scala/com/twitter/simclusters_v2/stores/BUILD.docx
new file mode 100644
index 000000000..e09e8bf74
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/stores/BUILD.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/stores/LanguageFilteredLocaleEntityEmbeddingStore.docx b/src/scala/com/twitter/simclusters_v2/stores/LanguageFilteredLocaleEntityEmbeddingStore.docx
new file mode 100644
index 000000000..ca2816877
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/stores/LanguageFilteredLocaleEntityEmbeddingStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/stores/LanguageFilteredLocaleEntityEmbeddingStore.scala b/src/scala/com/twitter/simclusters_v2/stores/LanguageFilteredLocaleEntityEmbeddingStore.scala
deleted file mode 100644
index e461e1ed2..000000000
--- a/src/scala/com/twitter/simclusters_v2/stores/LanguageFilteredLocaleEntityEmbeddingStore.scala
+++ /dev/null
@@ -1,96 +0,0 @@
-package com.twitter.simclusters_v2.stores
-
-import com.twitter.simclusters_v2.common.ClusterId
-import com.twitter.simclusters_v2.common.SimClustersEmbedding
-import com.twitter.simclusters_v2.thriftscala.ClusterDetails
-import com.twitter.simclusters_v2.thriftscala.InternalId
-import com.twitter.simclusters_v2.thriftscala.ModelVersion
-import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
-import com.twitter.storehaus.ReadableStore
-import com.twitter.util.Future
-
-/**
- * Transfer a Entity SimClustersEmbedding to a language filtered embedding.
- * The new embedding only contains clusters whose main language is the same as the language field in
- * the SimClustersEmbeddingId.
- *
- * This store is special designed for Topic Tweet and Topic Follow Prompt.
- * Only support new Ids whose internalId is LocaleEntityId.
- */
-@deprecated
-case class LanguageFilteredLocaleEntityEmbeddingStore(
-  underlyingStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding],
-  clusterDetailsStore: ReadableStore[(ModelVersion, ClusterId), ClusterDetails],
-  composeKeyMapping: SimClustersEmbeddingId => SimClustersEmbeddingId)
-    extends ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] {
-
-  import LanguageFilteredLocaleEntityEmbeddingStore._
-
-  override def get(k: SimClustersEmbeddingId): Future[Option[SimClustersEmbedding]] = {
-    for {
-      maybeEmbedding <- underlyingStore.get(composeKeyMapping(k))
-      maybeFilteredEmbedding <- maybeEmbedding match {
-        case Some(embedding) =>
-          embeddingsLanguageFilter(k, embedding).map(Some(_))
-        case None =>
-          Future.None
-      }
-    } yield maybeFilteredEmbedding
-  }
-
-  private def embeddingsLanguageFilter(
-    sourceEmbeddingId: SimClustersEmbeddingId,
-    simClustersEmbedding: SimClustersEmbedding
-  ): Future[SimClustersEmbedding] = {
-    val language = getLanguage(sourceEmbeddingId)
-    val modelVersion = sourceEmbeddingId.modelVersion
-
-    val clusterDetailKeys = simClustersEmbedding.sortedClusterIds.map { clusterId =>
-      (modelVersion, clusterId)
-    }.toSet
-
-    Future
-      .collect {
-        clusterDetailsStore.multiGet(clusterDetailKeys)
-      }.map { clusterDetailsMap =>
-        simClustersEmbedding.embedding.filter {
-          case (clusterId, _) =>
-            isDominantLanguage(
-              language,
-              clusterDetailsMap.getOrElse((modelVersion, clusterId), None))
-        }
-      }.map(SimClustersEmbedding(_))
-  }
-
-  private def isDominantLanguage(
-    requestLang: String,
-    clusterDetails: Option[ClusterDetails]
-  ): Boolean =
-    clusterDetails match {
-      case Some(details) =>
-        val dominantLanguage =
-          details.languageToFractionDeviceLanguage.map { langMap =>
-            langMap.maxBy {
-              case (_, score) => score
-            }._1
-          }
-
-        dominantLanguage.exists(_.equalsIgnoreCase(requestLang))
-      case _ => true
-    }
-
-}
-
-object LanguageFilteredLocaleEntityEmbeddingStore {
-
-  def getLanguage(simClustersEmbeddingId: SimClustersEmbeddingId): String = {
-    simClustersEmbeddingId match {
-      case SimClustersEmbeddingId(_, _, InternalId.LocaleEntityId(localeEntityId)) =>
-        localeEntityId.language
-      case _ =>
-        throw new IllegalArgumentException(
-          s"The Id $simClustersEmbeddingId doesn't contain Locale info")
-    }
-  }
-
-}
diff --git a/src/scala/com/twitter/simclusters_v2/stores/MultiTypeGraphStore.docx b/src/scala/com/twitter/simclusters_v2/stores/MultiTypeGraphStore.docx
new file mode 100644
index 000000000..2a03d6cc9
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/stores/MultiTypeGraphStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/stores/MultiTypeGraphStore.scala b/src/scala/com/twitter/simclusters_v2/stores/MultiTypeGraphStore.scala
deleted file mode 100644
index 656a61696..000000000
--- a/src/scala/com/twitter/simclusters_v2/stores/MultiTypeGraphStore.scala
+++ /dev/null
@@ -1,287 +0,0 @@
-package com.twitter.simclusters_v2.stores
-import com.twitter.bijection.Bufferable
-import com.twitter.bijection.Injection
-import com.twitter.bijection.scrooge.CompactScalaCodec
-import com.twitter.simclusters_v2.common.Language
-import com.twitter.simclusters_v2.thriftscala.ClustersUserIsInterestedIn
-import com.twitter.simclusters_v2.thriftscala.LeftNode
-import com.twitter.simclusters_v2.thriftscala.NounWithFrequencyList
-import com.twitter.simclusters_v2.thriftscala.RightNode
-import com.twitter.simclusters_v2.thriftscala.RightNodeTypeStruct
-import com.twitter.simclusters_v2.thriftscala.RightNodeWithEdgeWeightList
-import com.twitter.simclusters_v2.thriftscala.SimilarRightNodes
-import com.twitter.simclusters_v2.thriftscala.CandidateTweetsList
-import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
-import com.twitter.storehaus.ReadableStore
-import com.twitter.storehaus_internal.manhattan.Apollo
-import com.twitter.storehaus_internal.manhattan.ManhattanRO
-import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
-import com.twitter.storehaus_internal.util.ApplicationID
-import com.twitter.storehaus_internal.util.DatasetName
-import com.twitter.storehaus_internal.util.HDFSPath
-import com.twitter.scalding_internal.multiformat.format.keyval.KeyValInjection.Long2BigEndian
-import com.twitter.simclusters_v2.thriftscala.FullClusterId
-import com.twitter.simclusters_v2.thriftscala.TopKTweetsWithScores
-
-object MultiTypeGraphStore {
-
-  implicit val leftNodesInject: Injection[LeftNode, Array[Byte]] =
-    CompactScalaCodec(LeftNode)
-  implicit val truncatedMultiTypeGraphInject: Injection[RightNodeWithEdgeWeightList, Array[Byte]] =
-    CompactScalaCodec(RightNodeWithEdgeWeightList)
-  implicit val topKNounsListInject: Injection[NounWithFrequencyList, Array[Byte]] =
-    CompactScalaCodec(NounWithFrequencyList)
-  implicit val rightNodesStructInject: Injection[RightNodeTypeStruct, Array[Byte]] =
-    CompactScalaCodec(RightNodeTypeStruct)
-  implicit val similarRightNodesStructInject: Injection[SimilarRightNodes, Array[Byte]] =
-    CompactScalaCodec(SimilarRightNodes)
-  implicit val rightNodesInject: Injection[RightNode, Array[Byte]] =
-    CompactScalaCodec(RightNode)
-  implicit val tweetCandidatesInject: Injection[CandidateTweetsList, Array[Byte]] =
-    CompactScalaCodec(CandidateTweetsList)
-  implicit val fullClusterIdInject: Injection[FullClusterId, Array[Byte]] =
-    CompactScalaCodec(FullClusterId)
-  implicit val topKTweetsWithScoresInject: Injection[TopKTweetsWithScores, Array[Byte]] =
-    CompactScalaCodec(TopKTweetsWithScores)
-  implicit val clustersUserIsInterestedInInjection: Injection[ClustersUserIsInterestedIn, Array[
-    Byte
-  ]] =
-    CompactScalaCodec(ClustersUserIsInterestedIn)
-
-  private val appId = "multi_type_simclusters"
-
-  def getTruncatedMultiTypeGraphRightNodesForUser(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[LeftNode, RightNodeWithEdgeWeightList] = {
-    ManhattanRO.getReadableStoreWithMtls[LeftNode, RightNodeWithEdgeWeightList](
-      ManhattanROConfig(
-        HDFSPath(""),
-        ApplicationID(appId),
-        DatasetName("mts_user_truncated_graph"),
-        Apollo
-      ),
-      mhMtlsParams
-    )
-  }
-
-  def getTopKNounsForRightNodeType(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[RightNodeTypeStruct, NounWithFrequencyList] = {
-    ManhattanRO.getReadableStoreWithMtls[RightNodeTypeStruct, NounWithFrequencyList](
-      ManhattanROConfig(
-        HDFSPath(""),
-        ApplicationID(appId),
-        DatasetName("mts_topk_frequent_nouns"),
-        Apollo
-      ),
-      mhMtlsParams
-    )
-  }
-
-  def getTopKSimilarRightNodes(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[RightNode, SimilarRightNodes] = {
-    ManhattanRO.getReadableStoreWithMtls[RightNode, SimilarRightNodes](
-      ManhattanROConfig(
-        HDFSPath(""),
-        ApplicationID(appId),
-        DatasetName("mts_topk_similar_right_nodes_scio"),
-        Apollo
-      ),
-      mhMtlsParams
-    )
-  }
-
-  def getOfflineTweetMTSCandidateStore(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[Long, CandidateTweetsList] = {
-    ManhattanRO.getReadableStoreWithMtls[Long, CandidateTweetsList](
-      ManhattanROConfig(
-        HDFSPath(""),
-        ApplicationID(appId),
-        DatasetName("offline_tweet_recommendations_from_mts_consumer_embeddings"),
-        Apollo
-      ),
-      mhMtlsParams
-    )
-  }
-
-  def getOfflineTweet2020CandidateStore(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[Long, CandidateTweetsList] = {
-    ManhattanRO.getReadableStoreWithMtls[Long, CandidateTweetsList](
-      ManhattanROConfig(
-        HDFSPath(""),
-        ApplicationID(appId),
-        DatasetName("offline_tweet_recommendations_from_interestedin_2020"),
-        Apollo
-      ),
-      mhMtlsParams
-    )
-  }
-
-  def getVideoViewBasedClusterTopKTweets(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[FullClusterId, TopKTweetsWithScores] = {
-    ManhattanRO
-      .getReadableStoreWithMtls[FullClusterId, TopKTweetsWithScores](
-        ManhattanROConfig(
-          HDFSPath(""),
-          ApplicationID(appId),
-          DatasetName("video_view_based_cluster_to_tweet_index"),
-          Apollo
-        ),
-        mhMtlsParams
-      )
-  }
-
-  def getRetweetBasedClusterTopKTweets(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[FullClusterId, TopKTweetsWithScores] = {
-    ManhattanRO
-      .getReadableStoreWithMtls[FullClusterId, TopKTweetsWithScores](
-        ManhattanROConfig(
-          HDFSPath(""),
-          ApplicationID(appId),
-          DatasetName("retweet_based_simclusters_cluster_to_tweet_index"),
-          Apollo
-        ),
-        mhMtlsParams
-      )
-  }
-
-  def getReplyBasedClusterTopKTweets(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[FullClusterId, TopKTweetsWithScores] = {
-    ManhattanRO
-      .getReadableStoreWithMtls[FullClusterId, TopKTweetsWithScores](
-        ManhattanROConfig(
-          HDFSPath(""),
-          ApplicationID(appId),
-          DatasetName("reply_based_simclusters_cluster_to_tweet_index"),
-          Apollo
-        ),
-        mhMtlsParams
-      )
-  }
-
-  def getPushOpenBasedClusterTopKTweets(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[FullClusterId, TopKTweetsWithScores] = {
-    ManhattanRO
-      .getReadableStoreWithMtls[FullClusterId, TopKTweetsWithScores](
-        ManhattanROConfig(
-          HDFSPath(""),
-          ApplicationID(appId),
-          DatasetName("push_open_based_simclusters_cluster_to_tweet_index"),
-          Apollo
-        ),
-        mhMtlsParams
-      )
-  }
-
-  def getAdsFavBasedClusterTopKTweets(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[FullClusterId, TopKTweetsWithScores] = {
-    ManhattanRO
-      .getReadableStoreWithMtls[FullClusterId, TopKTweetsWithScores](
-        ManhattanROConfig(
-          HDFSPath(""),
-          ApplicationID(appId),
-          DatasetName("ads_fav_based_simclusters_cluster_to_tweet_index"),
-          Apollo
-        ),
-        mhMtlsParams
-      )
-  }
-
-  def getAdsFavClickBasedClusterTopKTweets(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[FullClusterId, TopKTweetsWithScores] = {
-    ManhattanRO
-      .getReadableStoreWithMtls[FullClusterId, TopKTweetsWithScores](
-        ManhattanROConfig(
-          HDFSPath(""),
-          ApplicationID(appId),
-          DatasetName("ads_fav_click_based_simclusters_cluster_to_tweet_index"),
-          Apollo
-        ),
-        mhMtlsParams
-      )
-  }
-
-  def getFTRPop1000BasedClusterTopKTweets(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[FullClusterId, TopKTweetsWithScores] = {
-    ManhattanRO
-      .getReadableStoreWithMtls[FullClusterId, TopKTweetsWithScores](
-        ManhattanROConfig(
-          HDFSPath(""),
-          ApplicationID(appId),
-          DatasetName("ftr_pop1000_rank_decay_1_1_cluster_to_tweet_index"),
-          Apollo
-        ),
-        mhMtlsParams
-      )
-  }
-
-  def getFTRPop10000BasedClusterTopKTweets(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[FullClusterId, TopKTweetsWithScores] = {
-    ManhattanRO
-      .getReadableStoreWithMtls[FullClusterId, TopKTweetsWithScores](
-        ManhattanROConfig(
-          HDFSPath(""),
-          ApplicationID(appId),
-          DatasetName("ftr_pop10000_rank_decay_1_1_cluster_to_tweet_index"),
-          Apollo
-        ),
-        mhMtlsParams
-      )
-  }
-
-  def getOONFTRPop1000BasedClusterTopKTweets(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[FullClusterId, TopKTweetsWithScores] = {
-    ManhattanRO
-      .getReadableStoreWithMtls[FullClusterId, TopKTweetsWithScores](
-        ManhattanROConfig(
-          HDFSPath(""),
-          ApplicationID(appId),
-          DatasetName("oon_ftr_pop1000_rnkdecay_cluster_to_tweet_index"),
-          Apollo
-        ),
-        mhMtlsParams
-      )
-  }
-
-  def getOfflineLogFavBasedTweetBasedClusterTopKTweets(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[FullClusterId, TopKTweetsWithScores] = {
-    ManhattanRO
-      .getReadableStoreWithMtls[FullClusterId, TopKTweetsWithScores](
-        ManhattanROConfig(
-          HDFSPath(""),
-          ApplicationID(appId),
-          DatasetName("decayed_sum_cluster_to_tweet_index"),
-          Apollo
-        ),
-        mhMtlsParams
-      )
-  }
-
-  def getGlobalSimClustersLanguageEmbeddings(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[Language, ClustersUserIsInterestedIn] = {
-    ManhattanRO
-      .getReadableStoreWithMtls[Language, ClustersUserIsInterestedIn](
-        ManhattanROConfig(
-          HDFSPath(""),
-          ApplicationID(appId),
-          DatasetName("global_simclusters_language_embeddings"),
-          Apollo
-        ),
-        mhMtlsParams
-      )
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/stores/SimClustersEmbeddingStore.docx b/src/scala/com/twitter/simclusters_v2/stores/SimClustersEmbeddingStore.docx
new file mode 100644
index 000000000..057d5c38a
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/stores/SimClustersEmbeddingStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/stores/SimClustersEmbeddingStore.scala b/src/scala/com/twitter/simclusters_v2/stores/SimClustersEmbeddingStore.scala
deleted file mode 100644
index 62785e205..000000000
--- a/src/scala/com/twitter/simclusters_v2/stores/SimClustersEmbeddingStore.scala
+++ /dev/null
@@ -1,120 +0,0 @@
-package com.twitter.simclusters_v2.stores
-
-import com.twitter.decider.Decider
-import com.twitter.finagle.stats.StatsReceiver
-import com.twitter.hermit.store.common.DeciderableReadableStore
-import com.twitter.servo.decider.DeciderKeyEnum
-import com.twitter.simclusters_v2.common.DeciderGateBuilderWithIdHashing
-import com.twitter.simclusters_v2.common.SimClustersEmbedding
-import com.twitter.simclusters_v2.thriftscala.EmbeddingType
-import com.twitter.simclusters_v2.thriftscala.ModelVersion
-import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
-import com.twitter.storehaus.ReadableStore
-import com.twitter.util.Future
-
-/**
- * Facade of all SimClusters Embedding Store.
- * Provide a uniform access layer for all kind of SimClusters Embedding.
- */
-case class SimClustersEmbeddingStore(
-  stores: Map[
-    (EmbeddingType, ModelVersion),
-    ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
-  ]) extends ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] {
-
-  private val lookupStores =
-    stores
-      .groupBy(_._1._1).mapValues(_.map {
-        case ((_, modelVersion), store) =>
-          modelVersion -> store
-      })
-
-  override def get(k: SimClustersEmbeddingId): Future[Option[SimClustersEmbedding]] = {
-    findStore(k) match {
-      case Some(store) => store.get(k)
-      case None => Future.None
-    }
-  }
-
-  // Override the multiGet for better batch performance.
-  override def multiGet[K1 <: SimClustersEmbeddingId](
-    ks: Set[K1]
-  ): Map[K1, Future[Option[SimClustersEmbedding]]] = {
-    if (ks.isEmpty) {
-      Map.empty
-    } else {
-      val head = ks.head
-      val notSameType =
-        ks.exists(k => k.embeddingType != head.embeddingType || k.modelVersion != head.modelVersion)
-      if (!notSameType) {
-        findStore(head) match {
-          case Some(store) => store.multiGet(ks)
-          case None => ks.map(_ -> Future.None).toMap
-        }
-      } else {
-        // Generate a large amount temp objects.
-        // For better performance, avoid querying the multiGet with more than one kind of embedding
-        ks.groupBy(id => (id.embeddingType, id.modelVersion)).flatMap {
-          case ((_, _), ks) =>
-            findStore(ks.head) match {
-              case Some(store) => store.multiGet(ks)
-              case None => ks.map(_ -> Future.None).toMap
-            }
-        }
-      }
-    }
-  }
-
-  private def findStore(
-    id: SimClustersEmbeddingId
-  ): Option[ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]] = {
-    lookupStores.get(id.embeddingType).flatMap(_.get(id.modelVersion))
-  }
-
-}
-
-object SimClustersEmbeddingStore {
-  /*
-  Build a SimClustersEmbeddingStore which wraps all stores in DeciderableReadableStore
-   */
-  def buildWithDecider(
-    underlyingStores: Map[
-      (EmbeddingType, ModelVersion),
-      ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
-    ],
-    decider: Decider,
-    statsReceiver: StatsReceiver
-  ): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
-    // To allow for lazy adding of decider config to enable / disable stores, if a value is not found
-    // fall back on returning true (equivalent to availability of 10000)
-    // This overrides default availability of 0 when not decider value is not found
-    val deciderGateBuilder = new DeciderGateBuilderWithIdHashing(decider.orElse(Decider.True))
-
-    val deciderKeyEnum = new DeciderKeyEnum {
-      underlyingStores.keySet.map(key => Value(s"enable_${key._1.name}_${key._2.name}"))
-    }
-
-    def wrapStore(
-      embeddingType: EmbeddingType,
-      modelVersion: ModelVersion,
-      store: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding]
-    ): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
-      val gate = deciderGateBuilder.idGateWithHashing[SimClustersEmbeddingId](
-        deciderKeyEnum.withName(s"enable_${embeddingType.name}_${modelVersion.name}"))
-
-      DeciderableReadableStore(
-        underlying = store,
-        gate = gate,
-        statsReceiver = statsReceiver.scope(embeddingType.name, modelVersion.name)
-      )
-    }
-
-    val stores = underlyingStores.map {
-      case ((embeddingType, modelVersion), store) =>
-        (embeddingType, modelVersion) -> wrapStore(embeddingType, modelVersion, store)
-    }
-
-    new SimClustersEmbeddingStore(stores = stores)
-  }
-
-}
diff --git a/src/scala/com/twitter/simclusters_v2/stores/SimClustersMultiEmbeddingStore.docx b/src/scala/com/twitter/simclusters_v2/stores/SimClustersMultiEmbeddingStore.docx
new file mode 100644
index 000000000..741dbca6f
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/stores/SimClustersMultiEmbeddingStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/stores/SimClustersMultiEmbeddingStore.scala b/src/scala/com/twitter/simclusters_v2/stores/SimClustersMultiEmbeddingStore.scala
deleted file mode 100644
index 0a520439e..000000000
--- a/src/scala/com/twitter/simclusters_v2/stores/SimClustersMultiEmbeddingStore.scala
+++ /dev/null
@@ -1,74 +0,0 @@
-package com.twitter.simclusters_v2.stores
-
-import com.twitter.simclusters_v2.common.SimClustersEmbedding
-import com.twitter.simclusters_v2.common.SimClustersMultiEmbeddingId._
-import com.twitter.simclusters_v2.thriftscala.{
-  SimClustersMultiEmbedding,
-  SimClustersEmbeddingId,
-  SimClustersMultiEmbeddingId
-}
-import com.twitter.storehaus.ReadableStore
-import com.twitter.util.Future
-
-/**
- * The helper methods for SimClusters Multi-Embedding based ReadableStore
- */
-object SimClustersMultiEmbeddingStore {
-
-  /**
-   * Only support the Values based Multi-embedding transformation.
-   */
-  case class SimClustersMultiEmbeddingWrapperStore(
-    sourceStore: ReadableStore[SimClustersMultiEmbeddingId, SimClustersMultiEmbedding])
-      extends ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] {
-
-    override def get(k: SimClustersEmbeddingId): Future[Option[SimClustersEmbedding]] = {
-      sourceStore.get(toMultiEmbeddingId(k)).map(_.map(toSimClustersEmbedding(k, _)))
-    }
-
-    // Override the multiGet for better batch performance.
-    override def multiGet[K1 <: SimClustersEmbeddingId](
-      ks: Set[K1]
-    ): Map[K1, Future[Option[SimClustersEmbedding]]] = {
-      if (ks.isEmpty) {
-        Map.empty
-      } else {
-        // Aggregate multiple get requests by MultiEmbeddingId
-        val multiEmbeddingIds = ks.map { k =>
-          k -> toMultiEmbeddingId(k)
-        }.toMap
-
-        val multiEmbeddings = sourceStore.multiGet(multiEmbeddingIds.values.toSet)
-        ks.map { k =>
-          k -> multiEmbeddings(multiEmbeddingIds(k)).map(_.map(toSimClustersEmbedding(k, _)))
-        }.toMap
-      }
-    }
-
-    private def toSimClustersEmbedding(
-      id: SimClustersEmbeddingId,
-      multiEmbedding: SimClustersMultiEmbedding
-    ): SimClustersEmbedding = {
-      multiEmbedding match {
-        case SimClustersMultiEmbedding.Values(values) =>
-          val subId = toSubId(id)
-          if (subId >= values.embeddings.size) {
-            throw new IllegalArgumentException(
-              s"SimClustersMultiEmbeddingId $id is over the size of ${values.embeddings.size}")
-          } else {
-            values.embeddings(subId).embedding
-          }
-        case _ =>
-          throw new IllegalArgumentException(
-            s"Invalid SimClustersMultiEmbedding $id, $multiEmbedding")
-      }
-    }
-  }
-
-  def toSimClustersEmbeddingStore(
-    sourceStore: ReadableStore[SimClustersMultiEmbeddingId, SimClustersMultiEmbedding]
-  ): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
-    SimClustersMultiEmbeddingWrapperStore(sourceStore)
-  }
-
-}
diff --git a/src/scala/com/twitter/simclusters_v2/stores/TopicTopProducersStore.docx b/src/scala/com/twitter/simclusters_v2/stores/TopicTopProducersStore.docx
new file mode 100644
index 000000000..c04e616d7
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/stores/TopicTopProducersStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/stores/TopicTopProducersStore.scala b/src/scala/com/twitter/simclusters_v2/stores/TopicTopProducersStore.scala
deleted file mode 100644
index c733ed157..000000000
--- a/src/scala/com/twitter/simclusters_v2/stores/TopicTopProducersStore.scala
+++ /dev/null
@@ -1,87 +0,0 @@
-package com.twitter.simclusters_v2.stores
-
-import com.twitter.bijection.scrooge.CompactScalaCodec
-import com.twitter.recos.entities.thriftscala.{SemanticCoreEntityWithLocale, UserScoreList}
-import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
-import com.twitter.storehaus.ReadableStore
-import com.twitter.storehaus_internal.manhattan.{Athena, ManhattanRO, ManhattanROConfig}
-import com.twitter.storehaus_internal.util.{ApplicationID, DatasetName, HDFSPath}
-
-object TopicTopProducersStore {
-  val appIdDevel = "recos_platform_dev"
-  val v2DatasetNameDevel = "topic_producers_em"
-  val v3DatasetNameDevel = "topic_producers_agg"
-  val v4DatasetNameDevel = "topic_producers_em_erg"
-
-  val appIdProd = "simclusters_v2"
-  val v1DatasetNameProd = "top_producers_for_topic_from_topic_follow_graph"
-  val v2DatasetNameProd = "top_producers_for_topic_em"
-
-  implicit val keyInj = CompactScalaCodec(SemanticCoreEntityWithLocale)
-  implicit val valInj = CompactScalaCodec(UserScoreList)
-
-  def getTopicTopProducerStoreV1Prod(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[SemanticCoreEntityWithLocale, UserScoreList] =
-    ManhattanRO.getReadableStoreWithMtls[SemanticCoreEntityWithLocale, UserScoreList](
-      ManhattanROConfig(
-        HDFSPath(""),
-        ApplicationID(appIdProd),
-        DatasetName(v1DatasetNameProd),
-        Athena
-      ),
-      mhMtlsParams
-    )
-
-  def getTopicTopProducerStoreV2Devel(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[SemanticCoreEntityWithLocale, UserScoreList] =
-    ManhattanRO.getReadableStoreWithMtls[SemanticCoreEntityWithLocale, UserScoreList](
-      ManhattanROConfig(
-        HDFSPath(""),
-        ApplicationID(appIdDevel),
-        DatasetName(v2DatasetNameDevel),
-        Athena
-      ),
-      mhMtlsParams
-    )
-
-  def getTopicTopProducerStoreV2Prod(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[SemanticCoreEntityWithLocale, UserScoreList] =
-    ManhattanRO.getReadableStoreWithMtls[SemanticCoreEntityWithLocale, UserScoreList](
-      ManhattanROConfig(
-        HDFSPath(""),
-        ApplicationID(appIdProd),
-        DatasetName(v2DatasetNameProd),
-        Athena
-      ),
-      mhMtlsParams
-    )
-
-  def getTopicTopProducerStoreV3Devel(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[SemanticCoreEntityWithLocale, UserScoreList] =
-    ManhattanRO.getReadableStoreWithMtls[SemanticCoreEntityWithLocale, UserScoreList](
-      ManhattanROConfig(
-        HDFSPath(""),
-        ApplicationID(appIdDevel),
-        DatasetName(v3DatasetNameDevel),
-        Athena
-      ),
-      mhMtlsParams
-    )
-
-  def getTopicTopProducerStoreV4Devel(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[SemanticCoreEntityWithLocale, UserScoreList] =
-    ManhattanRO.getReadableStoreWithMtls[SemanticCoreEntityWithLocale, UserScoreList](
-      ManhattanROConfig(
-        HDFSPath(""),
-        ApplicationID(appIdDevel),
-        DatasetName(v4DatasetNameDevel),
-        Athena
-      ),
-      mhMtlsParams
-    )
-}
diff --git a/src/scala/com/twitter/simclusters_v2/stores/WtfMbcgStore.docx b/src/scala/com/twitter/simclusters_v2/stores/WtfMbcgStore.docx
new file mode 100644
index 000000000..7e55b3371
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/stores/WtfMbcgStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/stores/WtfMbcgStore.scala b/src/scala/com/twitter/simclusters_v2/stores/WtfMbcgStore.scala
deleted file mode 100644
index 471d4bf2b..000000000
--- a/src/scala/com/twitter/simclusters_v2/stores/WtfMbcgStore.scala
+++ /dev/null
@@ -1,34 +0,0 @@
-package com.twitter.simclusters_v2.stores
-
-import com.twitter.scalding_internal.multiformat.format.keyval.KeyValInjection.{
-  Long2BigEndian,
-  ScalaBinaryThrift
-}
-import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
-import com.twitter.storehaus.ReadableStore
-import com.twitter.storehaus_internal.manhattan.{Apollo, ManhattanRO, ManhattanROConfig}
-import com.twitter.storehaus_internal.util.{ApplicationID, DatasetName, HDFSPath}
-import com.twitter.wtf.candidate.thriftscala.CandidateSeq
-
-object WtfMbcgStore {
-
-  val appId = "recos_platform_apollo"
-
-  implicit val keyInj = Long2BigEndian
-  implicit val valInj = ScalaBinaryThrift(CandidateSeq)
-
-  def getWtfMbcgStore(
-    mhMtlsParams: ManhattanKVClientMtlsParams,
-    datasetName: String
-  ): ReadableStore[Long, CandidateSeq] = {
-    ManhattanRO.getReadableStoreWithMtls[Long, CandidateSeq](
-      ManhattanROConfig(
-        HDFSPath(""),
-        ApplicationID(appId),
-        DatasetName(datasetName),
-        Apollo
-      ),
-      mhMtlsParams
-    )
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/BUILD b/src/scala/com/twitter/simclusters_v2/summingbird/BUILD
deleted file mode 100644
index f01857d26..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/BUILD
+++ /dev/null
@@ -1,118 +0,0 @@
-scala_library(
-    name = "common",
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "src/scala/com/twitter/simclusters_v2/summingbird/common",
-    ],
-)
-
-scala_library(
-    name = "stores",
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "src/scala/com/twitter/simclusters_v2/summingbird/stores",
-    ],
-)
-
-scala_library(
-    name = "webservice",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "src/scala/com/twitter/simclusters_v2/summingbird/webservice",
-        "twitter-server/slf4j-jdk14/src/main/scala/com/twitter/server/logging",
-    ],
-)
-
-heron_binary(
-    name = "tweet-simclusters-storm-binary",
-    main = "com.twitter.simclusters_v2.summingbird.storm.TweetJobRunner",
-    platform = "java8",
-    runtime_platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        ":common",
-        "3rdparty/jvm/org/slf4j:slf4j-jdk14",
-        "src/scala/com/twitter/simclusters_v2/summingbird/storm",
-    ],
-)
-
-jvm_app(
-    name = "tweet-simclusters-storm-job",
-    binary = ":tweet-simclusters-storm-binary",
-    bundles = [
-        bundle(
-            fileset = ["config/jaas.conf"],
-        ),
-    ],
-    tags = ["bazel-compatible"],
-)
-
-heron_binary(
-    name = "persistent-tweet-simclusters-storm-binary",
-    main = "com.twitter.simclusters_v2.summingbird.storm.PersistentTweetJobRunner",
-    platform = "java8",
-    runtime_platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        ":common",
-        "3rdparty/jvm/org/slf4j:slf4j-jdk14",
-        "src/scala/com/twitter/simclusters_v2/summingbird/storm",
-    ],
-)
-
-jvm_app(
-    name = "persistent-tweet-simclusters-storm-job",
-    binary = ":persistent-tweet-simclusters-storm-binary",
-    bundles = [
-        bundle(
-            fileset = ["config/jaas.conf"],
-        ),
-    ],
-    tags = ["bazel-compatible"],
-)
-
-heron_binary(
-    name = "multi-model-tweet-simclusters-storm-binary",
-    main = "com.twitter.simclusters_v2.summingbird.storm.MultiModelTweetJobRunner",
-    platform = "java8",
-    runtime_platform = "java8",
-    dependencies = [
-        ":common",
-        "3rdparty/jvm/org/slf4j:slf4j-jdk14",
-        "src/scala/com/twitter/simclusters_v2/summingbird/storm",
-    ],
-)
-
-jvm_app(
-    name = "multi-model-tweet-simclusters-storm-job",
-    binary = ":multi-model-tweet-simclusters-storm-binary",
-    bundles = [
-        bundle(
-            fileset = ["config/jaas.conf"],
-        ),
-    ],
-)
-
-jvm_binary(
-    name = "repl",
-    basename = "repl-simclusters_v2",
-    main = "scala.tools.nsc.MainGenericRunner",
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        ":common",
-        "3rdparty/jvm/org/scala-lang:scala-compiler",
-    ],
-)
-
-target(
-    dependencies = [
-        ":common",
-        ":repl",
-        ":stores",
-        ":webservice",
-        "src/scala/com/twitter/simclusters_v2/summingbird/storm",
-    ],
-)
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/BUILD.docx b/src/scala/com/twitter/simclusters_v2/summingbird/BUILD.docx
new file mode 100644
index 000000000..62a51e48d
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/BUILD.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/README.docx b/src/scala/com/twitter/simclusters_v2/summingbird/README.docx
new file mode 100644
index 000000000..88f7e4d24
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/README.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/README.md b/src/scala/com/twitter/simclusters_v2/summingbird/README.md
deleted file mode 100644
index 026df3a26..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/README.md
+++ /dev/null
@@ -1,4 +0,0 @@
-Simclusters v2 Online Tweet Embedding Pipeline
-==============================================
-
-The Heron jobs generate the tweet embedding and index of tweets for SimClusters, as well as persistenting the tweet embeddings from MemCache into Manhattan.
\ No newline at end of file
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/BUILD b/src/scala/com/twitter/simclusters_v2/summingbird/common/BUILD
deleted file mode 100644
index 0912b12fe..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/common/BUILD
+++ /dev/null
@@ -1,62 +0,0 @@
-scala_library(
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "3rdparty/jvm/com/twitter/algebird:core",
-        "3rdparty/jvm/com/twitter/algebird:util",
-        "3rdparty/jvm/com/twitter/bijection:core",
-        "3rdparty/jvm/com/twitter/bijection:util",
-        "3rdparty/jvm/com/twitter/storehaus:core",
-        "3rdparty/src/jvm/com/twitter/summingbird:client",
-        "cuad/projects/ner/client",
-        "cuad/projects/ner/thrift/src/main/thrift:thrift-scala",
-        "snowflake/src/main/scala/com/twitter/snowflake/id",
-        "src/scala/com/twitter/algebird_internal/injection",
-        "src/scala/com/twitter/simclusters_v2/common",
-        "src/scala/com/twitter/storehaus_internal/manhattan",
-        "src/scala/com/twitter/storehaus_internal/manhattan/config",
-        "src/scala/com/twitter/storehaus_internal/memcache",
-        "src/scala/com/twitter/storehaus_internal/memcache/config",
-        "src/scala/com/twitter/storehaus_internal/offline",
-        "src/scala/com/twitter/storehaus_internal/online",
-        "src/scala/com/twitter/storehaus_internal/util",
-        "src/scala/com/twitter/summingbird_internal/bijection:bijection-implicits",
-        "src/scala/com/twitter/summingbird_internal/runner/store_config",
-        "src/scala/com/twitter/taxi/util/text",
-        "src/scala/com/twitter/wtf/summingbird/sources/common",
-        "src/thrift/com/twitter/recos/entities:entities-thrift-scala",
-        "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
-        "src/thrift/com/twitter/timelineservice/server/internal:thrift-scala",
-        "src/thrift/com/twitter/tweetypie:tweet-scala",
-        "src/thrift/com/twitter/wtf/interest:interest-thrift-scala",
-        "stitch/stitch-core",
-        "stitch/stitch-storehaus/src/main/scala",
-    ],
-)
-
-## smaller build target for external usage
-scala_library(
-    name = "util",
-    sources = [
-        "Configs.scala",
-        "Implicits.scala",
-        "ModelVersionProfile.scala",
-        "Monoids.scala",
-        "ThriftDecayedValueMonoid.scala",
-    ],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "3rdparty/jvm/com/twitter/algebird:core",
-        "3rdparty/jvm/com/twitter/algebird:util",
-        "3rdparty/jvm/com/twitter/bijection:core",
-        "3rdparty/jvm/com/twitter/bijection:util",
-        "3rdparty/src/jvm/com/twitter/summingbird:batch",
-        "snowflake/src/main/scala/com/twitter/snowflake/id",
-        "src/scala/com/twitter/algebird_internal/injection",
-        "src/scala/com/twitter/simclusters_v2/common",
-        "src/thrift/com/twitter/recos/entities:entities-thrift-scala",
-        "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
-        "src/thrift/com/twitter/tweetypie:tweet-scala",
-    ],
-)
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/BUILD.docx b/src/scala/com/twitter/simclusters_v2/summingbird/common/BUILD.docx
new file mode 100644
index 000000000..49ae531aa
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/common/BUILD.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/ClientConfigs.docx b/src/scala/com/twitter/simclusters_v2/summingbird/common/ClientConfigs.docx
new file mode 100644
index 000000000..0ca19d56a
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/common/ClientConfigs.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/ClientConfigs.scala b/src/scala/com/twitter/simclusters_v2/summingbird/common/ClientConfigs.scala
deleted file mode 100644
index d288ad692..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/common/ClientConfigs.scala
+++ /dev/null
@@ -1,81 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.common
-
-import com.twitter.conversions.DurationOps._
-import com.twitter.finagle.mtls.authentication.ServiceIdentifier
-import com.twitter.finagle.thrift.ClientId
-import com.twitter.storehaus_internal.memcache.ConnectionConfig
-import com.twitter.storehaus_internal.memcache.MemcacheConfig
-import com.twitter.storehaus_internal.util.KeyPrefix
-import com.twitter.storehaus_internal.util.TTL
-import com.twitter.strato.client.Strato
-import com.twitter.strato.client.{Client => StratoClient}
-
-object ClientConfigs {
-
-  com.twitter.server.Init() // necessary in order to use WilyNS path
-
-  final lazy val simClustersCoreAltCachePath =
-    "/srv#/prod/local/cache/simclusters_core_alt"
-
-  final lazy val simClustersCoreAltLightCachePath =
-    "/srv#/prod/local/cache/simclusters_core_alt_light"
-
-  final lazy val develSimClustersCoreCachePath =
-    "/srv#/test/local/cache/twemcache_simclusters_core"
-
-  final lazy val develSimClustersCoreLightCachePath =
-    "/srv#/test/local/cache/twemcache_simclusters_core_light"
-
-  final lazy val logFavBasedTweet20M145K2020StratoPath =
-    "recommendations/simclusters_v2/embeddings/logFavBasedTweet20M145K2020Persistent"
-
-  final lazy val logFavBasedTweet20M145K2020UncachedStratoPath =
-    "recommendations/simclusters_v2/embeddings/logFavBasedTweet20M145K2020-UNCACHED"
-
-  final lazy val develLogFavBasedTweet20M145K2020StratoPath =
-    "recommendations/simclusters_v2/embeddings/logFavBasedTweet20M145K2020Devel"
-
-  final lazy val entityClusterScoreMemcacheConfig: (String, ServiceIdentifier) => MemcacheConfig = {
-    (path: String, serviceIdentifier: ServiceIdentifier) =>
-      new MemcacheConfig {
-        val connectionConfig: ConnectionConfig = ConnectionConfig(path, serviceIdentifier = serviceIdentifier)
-        override val keyPrefix: KeyPrefix = KeyPrefix(s"ecs_")
-        override val ttl: TTL = TTL(8.hours)
-      }
-  }
-
-  // note: this should in dedicated cache for tweet
-  final lazy val tweetTopKClustersMemcacheConfig: (String, ServiceIdentifier) => MemcacheConfig = {
-    (path: String, serviceIdentifier: ServiceIdentifier) =>
-      new MemcacheConfig {
-        val connectionConfig: ConnectionConfig =
-          ConnectionConfig(path, serviceIdentifier = serviceIdentifier)
-        override val keyPrefix: KeyPrefix = KeyPrefix(s"etk_")
-        override val ttl: TTL = TTL(2.days)
-      }
-  }
-
-  // note: this should in dedicated cache for tweet
-  final lazy val clusterTopTweetsMemcacheConfig: (String, ServiceIdentifier) => MemcacheConfig = {
-    (path: String, serviceIdentifier: ServiceIdentifier) =>
-      new MemcacheConfig {
-        val connectionConfig: ConnectionConfig =
-          ConnectionConfig(path, serviceIdentifier = serviceIdentifier)
-        override val keyPrefix: KeyPrefix = KeyPrefix(s"ctkt_")
-        override val ttl: TTL = TTL(8.hours)
-      }
-  }
-
-  final lazy val stratoClient: ServiceIdentifier => StratoClient = { serviceIdentifier =>
-    Strato.client
-      .withRequestTimeout(2.seconds)
-      .withMutualTls(serviceIdentifier)
-      .build()
-  }
-
-  // thrift client id
-  private final lazy val thriftClientId: String => ClientId = { env: String =>
-    ClientId(s"simclusters_v2_summingbird.$env")
-  }
-
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/Configs.docx b/src/scala/com/twitter/simclusters_v2/summingbird/common/Configs.docx
new file mode 100644
index 000000000..ed74754b1
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/common/Configs.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/Configs.scala b/src/scala/com/twitter/simclusters_v2/summingbird/common/Configs.scala
deleted file mode 100644
index d769330f0..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/common/Configs.scala
+++ /dev/null
@@ -1,70 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.common
-
-import com.twitter.conversions.DurationOps._
-import com.twitter.simclusters_v2.thriftscala.ModelVersion
-import com.twitter.util.Duration
-
-object Configs {
-
-  final val role = "cassowary"
-
-  final val ZoneAtla: String = "atla"
-
-  @deprecated("Use 'common/ModelVersions'", "2019-09-04")
-  final val ModelVersion20M145KDec11: String = "20M_145K_dec11"
-  @deprecated("Use 'common/ModelVersions'", "2019-09-04")
-  final val ModelVersion20M145KUpdated: String = "20M_145K_updated"
-  final val ModelVersion20M145K2020: String = "20M_145K_2020"
-
-  @deprecated("Use 'common/ModelVersions'", "2019-09-04")
-  final val ModelVersionMap: Map[String, ModelVersion] = Map(
-    ModelVersion20M145KDec11 -> ModelVersion.Model20m145kDec11,
-    ModelVersion20M145KUpdated -> ModelVersion.Model20m145kUpdated,
-    ModelVersion20M145K2020 -> ModelVersion.Model20m145k2020
-  )
-
-  final val favScoreThresholdForUserInterest: String => Double = {
-    case ModelVersion20M145KDec11 => 0.15
-    case ModelVersion20M145KUpdated => 1.0
-    case ModelVersion20M145K2020 => 0.3
-    case modelVersionStr => throw new Exception(s"$modelVersionStr is not a valid model")
-  }
-
-  @deprecated("Use 'common/ModelVersions'", "2019-09-04")
-  final val ReversedModelVersionMap = ModelVersionMap.map(_.swap)
-
-  final val batchesToKeep: Int = 1
-
-  final val HalfLife: Duration = 8.hours
-  final val HalfLifeInMs: Long = HalfLife.inMilliseconds
-
-  final val topKTweetsPerCluster: Int = 1600
-
-  final val topKClustersPerEntity: Int = 50
-
-  // the config used in offline job only
-  final val topKClustersPerTweet: Int = 400
-
-  // minimum score to save clusterIds in entityTopKClusters cache
-  // entity includes entities other than tweetId.
-  final val scoreThresholdForEntityTopKClustersCache: Double = 0.02
-
-  // minimum score to save clusterIds in tweetTopKClusters cache
-  final val scoreThresholdForTweetTopKClustersCache: Double = 0.02
-
-  // minimum score to save tweetIds in clusterTopKTweets cache
-  final val scoreThresholdForClusterTopKTweetsCache: Double = 0.001
-
-  // minimum score to save entities in clusterTopKEntities cache
-  final val scoreThresholdForClusterTopKEntitiesCache: Double = 0.001
-
-  final val MinFavoriteCount = 8
-
-  final val OldestTweetInLightIndexInMillis = 1.hours.inMillis
-
-  final val OldestTweetFavEventTimeInMillis = 3.days.inMillis
-
-  final val FirstUpdateValue = 1
-
-  final val TempUpdateValue = -1
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/EntityUtil.docx b/src/scala/com/twitter/simclusters_v2/summingbird/common/EntityUtil.docx
new file mode 100644
index 000000000..c11ec0bfd
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/common/EntityUtil.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/EntityUtil.scala b/src/scala/com/twitter/simclusters_v2/summingbird/common/EntityUtil.scala
deleted file mode 100644
index 4e4bbd7e7..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/common/EntityUtil.scala
+++ /dev/null
@@ -1,46 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.common
-
-import com.twitter.cuad.ner.thriftscala.WholeEntityType
-import com.twitter.simclusters_v2.summingbird.common.Implicits.thriftDecayedValueMonoid
-import com.twitter.simclusters_v2.thriftscala.{Scores, SimClusterEntity, TweetTextEntity}
-import scala.collection.Map
-
-private[summingbird] object EntityUtil {
-
-  def updateScoreWithLatestTimestamp[K](
-    scoresMapOption: Option[Map[K, Scores]],
-    timeInMs: Long
-  ): Option[Map[K, Scores]] = {
-    scoresMapOption map { scoresMap =>
-      scoresMap.mapValues(score => updateScoreWithLatestTimestamp(score, timeInMs))
-    }
-  }
-
-  def updateScoreWithLatestTimestamp(score: Scores, timeInMs: Long): Scores = {
-    score.copy(
-      favClusterNormalized8HrHalfLifeScore = score.favClusterNormalized8HrHalfLifeScore.map {
-        decayedValue => thriftDecayedValueMonoid.decayToTimestamp(decayedValue, timeInMs)
-      },
-      followClusterNormalized8HrHalfLifeScore = score.followClusterNormalized8HrHalfLifeScore.map {
-        decayedValue => thriftDecayedValueMonoid.decayToTimestamp(decayedValue, timeInMs)
-      }
-    )
-  }
-
-  def entityToString(entity: SimClusterEntity): String = {
-    entity match {
-      case SimClusterEntity.TweetId(id) => s"t_id:$id"
-      case SimClusterEntity.SpaceId(id) => s"space_id:$id"
-      case SimClusterEntity.TweetEntity(textEntity) =>
-        textEntity match {
-          case TweetTextEntity.Hashtag(str) => s"$str[h_tag]"
-          case TweetTextEntity.Penguin(penguin) =>
-            s"${penguin.textEntity}[penguin]"
-          case TweetTextEntity.Ner(ner) =>
-            s"${ner.textEntity}[ner_${WholeEntityType(ner.wholeEntityType)}]"
-          case TweetTextEntity.SemanticCore(semanticCore) =>
-            s"[sc:${semanticCore.entityId}]"
-        }
-    }
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/Implicits.docx b/src/scala/com/twitter/simclusters_v2/summingbird/common/Implicits.docx
new file mode 100644
index 000000000..0291c1a9c
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/common/Implicits.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/Implicits.scala b/src/scala/com/twitter/simclusters_v2/summingbird/common/Implicits.scala
deleted file mode 100644
index 79235573f..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/common/Implicits.scala
+++ /dev/null
@@ -1,140 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.common
-
-import com.twitter.algebird.DecayedValueMonoid
-import com.twitter.algebird.Monoid
-import com.twitter.algebird_internal.injection.AlgebirdImplicits
-import com.twitter.algebird_internal.thriftscala.{DecayedValue => ThriftDecayedValue}
-import com.twitter.bijection.Bufferable
-import com.twitter.bijection.Injection
-import com.twitter.bijection.scrooge.CompactScalaCodec
-import com.twitter.simclusters_v2.summingbird.common.Monoids.ClustersWithScoresMonoid
-import com.twitter.simclusters_v2.summingbird.common.Monoids.MultiModelClustersWithScoresMonoid
-import com.twitter.simclusters_v2.summingbird.common.Monoids.MultiModelPersistentSimClustersEmbeddingLongestL2NormMonoid
-import com.twitter.simclusters_v2.summingbird.common.Monoids.MultiModelPersistentSimClustersEmbeddingMonoid
-import com.twitter.simclusters_v2.summingbird.common.Monoids.MultiModelTopKTweetsWithScoresMonoid
-import com.twitter.simclusters_v2.summingbird.common.Monoids.PersistentSimClustersEmbeddingLongestL2NormMonoid
-import com.twitter.simclusters_v2.summingbird.common.Monoids.PersistentSimClustersEmbeddingMonoid
-import com.twitter.simclusters_v2.summingbird.common.Monoids.ScoresMonoid
-import com.twitter.simclusters_v2.summingbird.common.Monoids.TopKClustersWithScoresMonoid
-import com.twitter.simclusters_v2.summingbird.common.Monoids.TopKTweetsWithScoresMonoid
-import com.twitter.simclusters_v2.thriftscala.FullClusterIdBucket
-import com.twitter.simclusters_v2.thriftscala._
-import com.twitter.summingbird.batch.Batcher
-import com.twitter.tweetypie.thriftscala.StatusCounts
-
-object Implicits {
-
-  // -------------------- Monoids -------------------- //
-  implicit val decayedValueMonoid: DecayedValueMonoid = DecayedValueMonoid(0.0)
-
-  implicit val thriftDecayedValueMonoid: ThriftDecayedValueMonoid =
-    new ThriftDecayedValueMonoid(Configs.HalfLifeInMs)(decayedValueMonoid)
-
-  implicit val scoresMonoid: ScoresMonoid = new Monoids.ScoresMonoid()
-
-  implicit val clustersWithScoreMonoid: ClustersWithScoresMonoid =
-    new Monoids.ClustersWithScoresMonoid()(scoresMonoid)
-
-  implicit val multiModelClustersWithScoresMonoid: Monoid[MultiModelClustersWithScores] =
-    new MultiModelClustersWithScoresMonoid()
-
-  implicit val topKClustersWithScoresMonoid: Monoid[TopKClustersWithScores] =
-    new TopKClustersWithScoresMonoid(
-      Configs.topKClustersPerEntity,
-      Configs.scoreThresholdForEntityTopKClustersCache
-    )(thriftDecayedValueMonoid)
-
-  implicit val topKTweetsWithScoresMonoid: Monoid[TopKTweetsWithScores] =
-    new TopKTweetsWithScoresMonoid(
-      Configs.topKTweetsPerCluster,
-      Configs.scoreThresholdForClusterTopKTweetsCache,
-      Configs.OldestTweetFavEventTimeInMillis
-    )(thriftDecayedValueMonoid)
-
-  implicit val topKTweetsWithScoresLightMonoid: Monoid[TopKTweetsWithScores] =
-    new TopKTweetsWithScoresMonoid(
-      Configs.topKTweetsPerCluster,
-      Configs.scoreThresholdForClusterTopKTweetsCache,
-      Configs.OldestTweetInLightIndexInMillis
-    )(thriftDecayedValueMonoid)
-
-  implicit val MultiModeltopKTweetsWithScoresMonoid: Monoid[MultiModelTopKTweetsWithScores] =
-    new MultiModelTopKTweetsWithScoresMonoid(
-    )(thriftDecayedValueMonoid)
-
-  implicit val persistentSimClustersEmbeddingMonoid: Monoid[PersistentSimClustersEmbedding] =
-    new PersistentSimClustersEmbeddingMonoid()
-
-  implicit val persistentSimClustersEmbeddingLongestL2NormMonoid: Monoid[
-    PersistentSimClustersEmbedding
-  ] =
-    new PersistentSimClustersEmbeddingLongestL2NormMonoid()
-
-  implicit val multiModelPersistentSimClustersEmbeddingMonoid: Monoid[
-    MultiModelPersistentSimClustersEmbedding
-  ] =
-    new MultiModelPersistentSimClustersEmbeddingMonoid()
-
-  implicit val multiModelPersistentSimClustersEmbeddingLongestL2NormMonoid: Monoid[
-    MultiModelPersistentSimClustersEmbedding
-  ] = new MultiModelPersistentSimClustersEmbeddingLongestL2NormMonoid()
-
-  // -------------------- Codecs -------------------- //
-  implicit val longIntPairCodec: Injection[(Long, Int), Array[Byte]] =
-    Bufferable.injectionOf[(Long, Int)]
-
-  implicit val simClusterEntityCodec: Injection[SimClusterEntity, Array[Byte]] =
-    CompactScalaCodec(SimClusterEntity)
-
-  implicit val fullClusterIdBucket: Injection[FullClusterIdBucket, Array[Byte]] =
-    CompactScalaCodec(FullClusterIdBucket)
-
-  implicit val clustersWithScoresCodec: Injection[ClustersWithScores, Array[Byte]] =
-    CompactScalaCodec(ClustersWithScores)
-
-  implicit val topKClustersKeyCodec: Injection[EntityWithVersion, Array[Byte]] =
-    CompactScalaCodec(EntityWithVersion)
-
-  implicit val topKClustersWithScoresCodec: Injection[TopKClustersWithScores, Array[Byte]] =
-    CompactScalaCodec(TopKClustersWithScores)
-
-  implicit val fullClusterIdCodec: Injection[FullClusterId, Array[Byte]] =
-    CompactScalaCodec(FullClusterId)
-
-  implicit val topKEntitiesWithScoresCodec: Injection[TopKEntitiesWithScores, Array[Byte]] =
-    CompactScalaCodec(TopKEntitiesWithScores)
-
-  implicit val topKTweetsWithScoresCodec: Injection[TopKTweetsWithScores, Array[Byte]] =
-    CompactScalaCodec(TopKTweetsWithScores)
-
-  implicit val pairedArrayBytesCodec: Injection[(Array[Byte], Array[Byte]), Array[Byte]] =
-    Bufferable.injectionOf[(Array[Byte], Array[Byte])]
-
-  implicit val entityWithClusterInjection: Injection[(SimClusterEntity, FullClusterIdBucket), Array[
-    Byte
-  ]] =
-    Injection
-      .connect[(SimClusterEntity, FullClusterIdBucket), (Array[Byte], Array[Byte]), Array[Byte]]
-
-  implicit val topKClustersCodec: Injection[TopKClusters, Array[Byte]] =
-    CompactScalaCodec(TopKClusters)
-
-  implicit val topKTweetsCodec: Injection[TopKTweets, Array[Byte]] =
-    CompactScalaCodec(TopKTweets)
-
-  implicit val simClustersEmbeddingCodec: Injection[SimClustersEmbedding, Array[Byte]] =
-    CompactScalaCodec(SimClustersEmbedding)
-
-  implicit val persistentSimClustersEmbeddingCodec: Injection[PersistentSimClustersEmbedding, Array[
-    Byte
-  ]] =
-    CompactScalaCodec(PersistentSimClustersEmbedding)
-
-  implicit val statusCountsCodec: Injection[StatusCounts, Array[Byte]] =
-    CompactScalaCodec(StatusCounts)
-
-  implicit val thriftDecayedValueCodec: Injection[ThriftDecayedValue, Array[Byte]] =
-    AlgebirdImplicits.decayedValueCodec
-
-  implicit val batcher: Batcher = Batcher.unit
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/ModelVersionProfile.docx b/src/scala/com/twitter/simclusters_v2/summingbird/common/ModelVersionProfile.docx
new file mode 100644
index 000000000..175c21123
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/common/ModelVersionProfile.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/ModelVersionProfile.scala b/src/scala/com/twitter/simclusters_v2/summingbird/common/ModelVersionProfile.scala
deleted file mode 100644
index ad2c56386..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/common/ModelVersionProfile.scala
+++ /dev/null
@@ -1,40 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.common
-
-import com.twitter.simclusters_v2.thriftscala.EmbeddingType
-import com.twitter.util.Duration
-import com.twitter.conversions.DurationOps._
-import com.twitter.simclusters_v2.thriftscala.ModelVersion
-
-case class ModelVersionProfile(
-  modelVersion: ModelVersion,
-  usingLogFavScore: Boolean,
-  // redundant in the current models because the above parameter does the same currently.
-  coreEmbeddingType: EmbeddingType,
-  favScoreThresholdForUserInterest: Double,
-  // these values are shared between all profiles so lets set up defaults
-  halfLife: Duration = 8.hours,
-  scoreThresholdForEntityTopKClustersCache: Double = 0.2,
-  scoreThresholdForTweetTopKClustersCache: Double = 0.02,
-  scoreThresholdForClusterTopKTweetsCache: Double = 0.001,
-  scoreThresholdForClusterTopKEntitiesCache: Double = 0.001)
-
-object ModelVersionProfiles {
-  final val ModelVersion20M145KUpdated = ModelVersionProfile(
-    ModelVersion.Model20m145kUpdated,
-    usingLogFavScore = true,
-    coreEmbeddingType = EmbeddingType.LogFavBasedTweet,
-    favScoreThresholdForUserInterest = 1.0
-  )
-
-  final val ModelVersion20M145K2020 = ModelVersionProfile(
-    ModelVersion.Model20m145k2020,
-    usingLogFavScore = true,
-    coreEmbeddingType = EmbeddingType.LogFavBasedTweet,
-    favScoreThresholdForUserInterest = 0.3
-  )
-
-  final val ModelVersionProfiles: Map[ModelVersion, ModelVersionProfile] = Map(
-    ModelVersion.Model20m145kUpdated -> ModelVersion20M145KUpdated,
-    ModelVersion.Model20m145k2020 -> ModelVersion20M145K2020
-  )
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/Monoids.docx b/src/scala/com/twitter/simclusters_v2/summingbird/common/Monoids.docx
new file mode 100644
index 000000000..c9760504d
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/common/Monoids.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/Monoids.scala b/src/scala/com/twitter/simclusters_v2/summingbird/common/Monoids.scala
deleted file mode 100644
index 34dd27586..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/common/Monoids.scala
+++ /dev/null
@@ -1,478 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.common
-
-import com.twitter.algebird.DecayedValue
-import com.twitter.algebird.Monoid
-import com.twitter.algebird.OptionMonoid
-import com.twitter.algebird.ScMapMonoid
-import com.twitter.algebird_internal.thriftscala.{DecayedValue => ThriftDecayedValue}
-import com.twitter.simclusters_v2.common.SimClustersEmbedding
-import com.twitter.simclusters_v2.thriftscala.ClustersWithScores
-import com.twitter.simclusters_v2.thriftscala.MultiModelClustersWithScores
-import com.twitter.simclusters_v2.thriftscala.MultiModelTopKTweetsWithScores
-import com.twitter.simclusters_v2.thriftscala.ModelVersion
-import com.twitter.simclusters_v2.thriftscala.MultiModelPersistentSimClustersEmbedding
-import com.twitter.simclusters_v2.thriftscala.PersistentSimClustersEmbedding
-import com.twitter.simclusters_v2.thriftscala.Scores
-import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingMetadata
-import com.twitter.simclusters_v2.thriftscala.TopKClustersWithScores
-import com.twitter.simclusters_v2.thriftscala.TopKTweetsWithScores
-import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
-import com.twitter.snowflake.id.SnowflakeId
-import scala.collection.mutable
-
-/**
- * Contains various monoids used in the EntityJob
- */
-object Monoids {
-
-  class ScoresMonoid(implicit thriftDecayedValueMonoid: ThriftDecayedValueMonoid)
-      extends Monoid[Scores] {
-
-    private val optionalThriftDecayedValueMonoid =
-      new OptionMonoid[ThriftDecayedValue]()
-
-    override val zero: Scores = Scores()
-
-    override def plus(x: Scores, y: Scores): Scores = {
-      Scores(
-        optionalThriftDecayedValueMonoid.plus(
-          x.favClusterNormalized8HrHalfLifeScore,
-          y.favClusterNormalized8HrHalfLifeScore
-        ),
-        optionalThriftDecayedValueMonoid.plus(
-          x.followClusterNormalized8HrHalfLifeScore,
-          y.followClusterNormalized8HrHalfLifeScore
-        )
-      )
-    }
-  }
-
-  class ClustersWithScoresMonoid(implicit scoresMonoid: ScoresMonoid)
-      extends Monoid[ClustersWithScores] {
-
-    private val optionMapMonoid =
-      new OptionMonoid[collection.Map[Int, Scores]]()(new ScMapMonoid[Int, Scores]())
-
-    override val zero: ClustersWithScores = ClustersWithScores()
-
-    override def plus(x: ClustersWithScores, y: ClustersWithScores): ClustersWithScores = {
-      ClustersWithScores(
-        optionMapMonoid.plus(x.clustersToScore, y.clustersToScore)
-      )
-    }
-  }
-
-  class MultiModelClustersWithScoresMonoid(implicit scoresMonoid: ScoresMonoid)
-      extends Monoid[MultiModelClustersWithScores] {
-
-    override val zero: MultiModelClustersWithScores = MultiModelClustersWithScores()
-
-    override def plus(
-      x: MultiModelClustersWithScores,
-      y: MultiModelClustersWithScores
-    ): MultiModelClustersWithScores = {
-      // We reuse the logic from the Monoid for the Value here
-      val clustersWithScoreMonoid = Implicits.clustersWithScoreMonoid
-
-      MultiModelClustersWithScores(
-        MultiModelUtils.mergeTwoMultiModelMaps(
-          x.multiModelClustersWithScores,
-          y.multiModelClustersWithScores,
-          clustersWithScoreMonoid))
-    }
-  }
-
-  class TopKClustersWithScoresMonoid(
-    topK: Int,
-    threshold: Double
-  )(
-    implicit thriftDecayedValueMonoid: ThriftDecayedValueMonoid)
-      extends Monoid[TopKClustersWithScores] {
-
-    override val zero: TopKClustersWithScores = TopKClustersWithScores()
-
-    override def plus(
-      x: TopKClustersWithScores,
-      y: TopKClustersWithScores
-    ): TopKClustersWithScores = {
-
-      val mergedFavMap = TopKScoresUtils
-        .mergeTwoTopKMapWithDecayedValues(
-          x.topClustersByFavClusterNormalizedScore
-            .map(_.mapValues(
-              _.favClusterNormalized8HrHalfLifeScore.getOrElse(thriftDecayedValueMonoid.zero))),
-          y.topClustersByFavClusterNormalizedScore
-            .map(_.mapValues(
-              _.favClusterNormalized8HrHalfLifeScore.getOrElse(thriftDecayedValueMonoid.zero))),
-          topK,
-          threshold
-        ).map(_.mapValues(decayedValue =>
-          Scores(favClusterNormalized8HrHalfLifeScore = Some(decayedValue))))
-
-      val mergedFollowMap = TopKScoresUtils
-        .mergeTwoTopKMapWithDecayedValues(
-          x.topClustersByFollowClusterNormalizedScore
-            .map(_.mapValues(
-              _.followClusterNormalized8HrHalfLifeScore.getOrElse(thriftDecayedValueMonoid.zero))),
-          y.topClustersByFollowClusterNormalizedScore
-            .map(_.mapValues(
-              _.followClusterNormalized8HrHalfLifeScore.getOrElse(thriftDecayedValueMonoid.zero))),
-          topK,
-          threshold
-        ).map(_.mapValues(decayedValue =>
-          Scores(followClusterNormalized8HrHalfLifeScore = Some(decayedValue))))
-
-      TopKClustersWithScores(
-        mergedFavMap,
-        mergedFollowMap
-      )
-    }
-  }
-  class TopKTweetsWithScoresMonoid(
-    topK: Int,
-    threshold: Double,
-    tweetAgeThreshold: Long
-  )(
-    implicit thriftDecayedValueMonoid: ThriftDecayedValueMonoid)
-      extends Monoid[TopKTweetsWithScores] {
-
-    override val zero: TopKTweetsWithScores = TopKTweetsWithScores()
-
-    override def plus(x: TopKTweetsWithScores, y: TopKTweetsWithScores): TopKTweetsWithScores = {
-      val oldestTweetId = SnowflakeId.firstIdFor(System.currentTimeMillis() - tweetAgeThreshold)
-
-      val mergedFavMap = TopKScoresUtils
-        .mergeTwoTopKMapWithDecayedValues(
-          x.topTweetsByFavClusterNormalizedScore
-            .map(_.mapValues(
-              _.favClusterNormalized8HrHalfLifeScore.getOrElse(thriftDecayedValueMonoid.zero))),
-          y.topTweetsByFavClusterNormalizedScore
-            .map(_.mapValues(
-              _.favClusterNormalized8HrHalfLifeScore.getOrElse(thriftDecayedValueMonoid.zero))),
-          topK,
-          threshold
-        ).map(_.filter(_._1 >= oldestTweetId).mapValues(decayedValue =>
-          Scores(favClusterNormalized8HrHalfLifeScore = Some(decayedValue))))
-
-      TopKTweetsWithScores(mergedFavMap, None)
-    }
-  }
-
-  class MultiModelTopKTweetsWithScoresMonoid(
-  )(
-    implicit thriftDecayedValueMonoid: ThriftDecayedValueMonoid)
-      extends Monoid[MultiModelTopKTweetsWithScores] {
-    override val zero: MultiModelTopKTweetsWithScores = MultiModelTopKTweetsWithScores()
-
-    override def plus(
-      x: MultiModelTopKTweetsWithScores,
-      y: MultiModelTopKTweetsWithScores
-    ): MultiModelTopKTweetsWithScores = {
-      // We reuse the logic from the Monoid for the Value here
-      val topKTweetsWithScoresMonoid = Implicits.topKTweetsWithScoresMonoid
-
-      MultiModelTopKTweetsWithScores(
-        MultiModelUtils.mergeTwoMultiModelMaps(
-          x.multiModelTopKTweetsWithScores,
-          y.multiModelTopKTweetsWithScores,
-          topKTweetsWithScoresMonoid))
-    }
-
-  }
-
-  /**
-   * Merge two PersistentSimClustersEmbedding. The latest embedding overwrite the old embedding.
-   * The new count equals to the sum of the count.
-   */
-  class PersistentSimClustersEmbeddingMonoid extends Monoid[PersistentSimClustersEmbedding] {
-
-    override val zero: PersistentSimClustersEmbedding = PersistentSimClustersEmbedding(
-      ThriftSimClustersEmbedding(),
-      SimClustersEmbeddingMetadata()
-    )
-
-    private val optionLongMonoid = new OptionMonoid[Long]()
-
-    override def plus(
-      x: PersistentSimClustersEmbedding,
-      y: PersistentSimClustersEmbedding
-    ): PersistentSimClustersEmbedding = {
-      val latest =
-        if (x.metadata.updatedAtMs.getOrElse(0L) > y.metadata.updatedAtMs.getOrElse(0L)) x else y
-      latest.copy(
-        metadata = latest.metadata.copy(
-          updatedCount = optionLongMonoid.plus(x.metadata.updatedCount, y.metadata.updatedCount)))
-    }
-  }
-
-  class MultiModelPersistentSimClustersEmbeddingMonoid
-      extends Monoid[MultiModelPersistentSimClustersEmbedding] {
-
-    override val zero: MultiModelPersistentSimClustersEmbedding =
-      MultiModelPersistentSimClustersEmbedding(Map[ModelVersion, PersistentSimClustersEmbedding]())
-
-    override def plus(
-      x: MultiModelPersistentSimClustersEmbedding,
-      y: MultiModelPersistentSimClustersEmbedding
-    ): MultiModelPersistentSimClustersEmbedding = {
-      val monoid = Implicits.persistentSimClustersEmbeddingMonoid
-
-      // PersistentSimClustersEmbeddings is the only required thrift object so we need to wrap it
-      // in Some
-      MultiModelUtils.mergeTwoMultiModelMaps(
-        Some(x.multiModelPersistentSimClustersEmbedding),
-        Some(y.multiModelPersistentSimClustersEmbedding),
-        monoid) match {
-        // clean up the empty embeddings
-        case Some(res) =>
-          MultiModelPersistentSimClustersEmbedding(res.flatMap {
-            // in some cases the list of SimClustersScore is empty, so we want to remove the
-            // modelVersion from the list of Models for the embedding
-            case (modelVersion, persistentSimClustersEmbedding) =>
-              persistentSimClustersEmbedding.embedding.embedding match {
-                case embedding if embedding.nonEmpty =>
-                  Map(modelVersion -> persistentSimClustersEmbedding)
-                case _ =>
-                  None
-              }
-          })
-        case _ => zero
-      }
-    }
-  }
-
-  /**
-   * Merge two PersistentSimClustersEmbeddings. The embedding with the longest l2 norm overwrites
-   * the other embedding. The new count equals to the sum of the count.
-   */
-  class PersistentSimClustersEmbeddingLongestL2NormMonoid
-      extends Monoid[PersistentSimClustersEmbedding] {
-
-    override val zero: PersistentSimClustersEmbedding = PersistentSimClustersEmbedding(
-      ThriftSimClustersEmbedding(),
-      SimClustersEmbeddingMetadata()
-    )
-
-    override def plus(
-      x: PersistentSimClustersEmbedding,
-      y: PersistentSimClustersEmbedding
-    ): PersistentSimClustersEmbedding = {
-      if (SimClustersEmbedding(x.embedding).l2norm >= SimClustersEmbedding(y.embedding).l2norm) x
-      else y
-    }
-  }
-
-  class MultiModelPersistentSimClustersEmbeddingLongestL2NormMonoid
-      extends Monoid[MultiModelPersistentSimClustersEmbedding] {
-
-    override val zero: MultiModelPersistentSimClustersEmbedding =
-      MultiModelPersistentSimClustersEmbedding(Map[ModelVersion, PersistentSimClustersEmbedding]())
-
-    override def plus(
-      x: MultiModelPersistentSimClustersEmbedding,
-      y: MultiModelPersistentSimClustersEmbedding
-    ): MultiModelPersistentSimClustersEmbedding = {
-      val monoid = Implicits.persistentSimClustersEmbeddingLongestL2NormMonoid
-
-      MultiModelUtils.mergeTwoMultiModelMaps(
-        Some(x.multiModelPersistentSimClustersEmbedding),
-        Some(y.multiModelPersistentSimClustersEmbedding),
-        monoid) match {
-        // clean up empty embeddings
-        case Some(res) =>
-          MultiModelPersistentSimClustersEmbedding(res.flatMap {
-            case (modelVersion, persistentSimClustersEmbedding) =>
-              // in some cases the list of SimClustersScore is empty, so we want to remove the
-              // modelVersion from the list of Models for the embedding
-              persistentSimClustersEmbedding.embedding.embedding match {
-                case embedding if embedding.nonEmpty =>
-                  Map(modelVersion -> persistentSimClustersEmbedding)
-                case _ =>
-                  None
-              }
-          })
-        case _ => zero
-      }
-    }
-  }
-
-  object TopKScoresUtils {
-
-    /**
-     * Function for merging TopK scores with decayed values.
-     *
-     * This is for use with topk scores where all scores are updated at the same time (i.e. most
-     * time-decayed embedding aggregations). Rather than storing individual scores as algebird.DecayedValue
-     * and replicating time information for every key, we can store a single timestamp for the entire
-     * embedding and replicate the decay logic when processing each score.
-     *
-     * This should replicate the behaviour of `mergeTwoTopKMapWithDecayedValues`
-     *
-     * The logic is:
-     * - Determine the most recent update and build a DecayedValue for it (decayedValueForLatestTime)
-     * - For each (cluster, score), decay the score relative to the time of the most-recently updated embedding
-     *   - This is a no-op for scores from the most recently-updated embedding, and will scale scores
-     *     for the older embedding.
-     *     - Drop any (cluster, score) which are below the `threshold` score
-     *     - If both input embeddings contribute a score for the same cluster, keep the one with the largest score (after scaling)
-     *     - Sort (cluster, score) by score and keep the `topK`
-     *
-     */
-    def mergeClusterScoresWithUpdateTimes[Key](
-      x: Seq[(Key, Double)],
-      xUpdatedAtMs: Long,
-      y: Seq[(Key, Double)],
-      yUpdatedAtMs: Long,
-      halfLifeMs: Long,
-      topK: Int,
-      threshold: Double
-    ): Seq[(Key, Double)] = {
-      val latestUpdate = math.max(xUpdatedAtMs, yUpdatedAtMs)
-      val decayedValueForLatestTime = DecayedValue.build(0.0, latestUpdate, halfLifeMs)
-
-      val merged = mutable.HashMap[Key, Double]()
-
-      x.foreach {
-        case (key, score) =>
-          val decayedScore = Implicits.decayedValueMonoid
-            .plus(
-              DecayedValue.build(score, xUpdatedAtMs, halfLifeMs),
-              decayedValueForLatestTime
-            ).value
-          if (decayedScore > threshold)
-            merged += key -> decayedScore
-      }
-
-      y.foreach {
-        case (key, score) =>
-          val decayedScore = Implicits.decayedValueMonoid
-            .plus(
-              DecayedValue.build(score, yUpdatedAtMs, halfLifeMs),
-              decayedValueForLatestTime
-            ).value
-          if (decayedScore > threshold)
-            merged.get(key) match {
-              case Some(existingValue) =>
-                if (decayedScore > existingValue)
-                  merged += key -> decayedScore
-              case None =>
-                merged += key -> decayedScore
-            }
-      }
-
-      merged.toSeq
-        .sortBy(-_._2)
-        .take(topK)
-    }
-
-    /**
-     * Function for merging to TopK map with decayed values.
-     *
-     * First of all, all the values will be decayed to the latest scaled timestamp to be comparable.
-     *
-     * If the same key appears at both a and b, the one with larger scaled time (or larger value when
-     * their scaled times are same) will be taken. The values smaller than the threshold will be dropped.
-     *
-     * After merging, if the size is larger than TopK, only scores with topK largest value will be kept.
-     */
-    def mergeTwoTopKMapWithDecayedValues[T](
-      a: Option[collection.Map[T, ThriftDecayedValue]],
-      b: Option[collection.Map[T, ThriftDecayedValue]],
-      topK: Int,
-      threshold: Double
-    )(
-      implicit thriftDecayedValueMonoid: ThriftDecayedValueMonoid
-    ): Option[collection.Map[T, ThriftDecayedValue]] = {
-
-      if (a.isEmpty || a.exists(_.isEmpty)) {
-        return b
-      }
-
-      if (b.isEmpty || b.exists(_.isEmpty)) {
-        return a
-      }
-
-      val latestScaledTime = (a.get.view ++ b.get.view).map {
-        case (_, scores) =>
-          scores.scaledTime
-      }.max
-
-      val decayedValueWithLatestScaledTime = ThriftDecayedValue(0.0, latestScaledTime)
-
-      val merged = mutable.HashMap[T, ThriftDecayedValue]()
-
-      a.foreach {
-        _.foreach {
-          case (k, v) =>
-            // decay the value to latest scaled time
-            val decayedScores = thriftDecayedValueMonoid
-              .plus(v, decayedValueWithLatestScaledTime)
-
-            // only merge if the value is larger than the threshold
-            if (decayedScores.value > threshold) {
-              merged += k -> decayedScores
-            }
-        }
-      }
-
-      b.foreach {
-        _.foreach {
-          case (k, v) =>
-            val decayedScores = thriftDecayedValueMonoid
-              .plus(v, decayedValueWithLatestScaledTime)
-
-            // only merge if the value is larger than the threshold
-            if (decayedScores.value > threshold) {
-              if (!merged.contains(k)) {
-                merged += k -> decayedScores
-              } else {
-                // only update if the value is larger than the one already merged
-                if (decayedScores.value > merged(k).value) {
-                  merged.update(k, decayedScores)
-                }
-              }
-            }
-        }
-      }
-
-      // add some buffer size (~ 0.2 * topK) to avoid sorting and taking too frequently
-      if (merged.size > topK * 1.2) {
-        Some(
-          merged.toSeq
-            .sortBy { case (_, scores) => scores.value * -1 }
-            .take(topK)
-            .toMap
-        )
-      } else {
-        Some(merged)
-      }
-    }
-  }
-
-  object MultiModelUtils {
-
-    /**
-     * In order to reduce complexity we use the Monoid for the value to plus two MultiModel maps
-     */
-    def mergeTwoMultiModelMaps[T](
-      a: Option[collection.Map[ModelVersion, T]],
-      b: Option[collection.Map[ModelVersion, T]],
-      monoid: Monoid[T]
-    ): Option[collection.Map[ModelVersion, T]] = {
-      (a, b) match {
-        case (Some(_), None) => a
-        case (None, Some(_)) => b
-        case (Some(aa), Some(bb)) =>
-          val res = ModelVersionProfiles.ModelVersionProfiles.foldLeft(Map[ModelVersion, T]()) {
-            (map, model) =>
-              map + (model._1 -> monoid.plus(
-                aa.getOrElse(model._1, monoid.zero),
-                bb.getOrElse(model._1, monoid.zero)
-              ))
-          }
-          Some(res)
-        case _ => None
-      }
-    }
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersEmbeddingWithMetadataMonoid.docx b/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersEmbeddingWithMetadataMonoid.docx
new file mode 100644
index 000000000..8058ab5f9
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersEmbeddingWithMetadataMonoid.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersEmbeddingWithMetadataMonoid.scala b/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersEmbeddingWithMetadataMonoid.scala
deleted file mode 100644
index 4379eccb9..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersEmbeddingWithMetadataMonoid.scala
+++ /dev/null
@@ -1,59 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.common
-
-import com.twitter.algebird.{Monoid, OptionMonoid}
-import com.twitter.simclusters_v2.common.SimClustersEmbedding
-import com.twitter.simclusters_v2.summingbird.common.Monoids.TopKScoresUtils
-import com.twitter.simclusters_v2.thriftscala.{
-  SimClustersEmbeddingMetadata,
-  SimClustersEmbeddingWithMetadata,
-  SimClustersEmbedding => ThriftSimClustersEmbedding
-}
-
-/**
- * Decayed aggregation of embeddings.
- *
- * When merging 2 embeddings, the older embedding's scores are scaled by time. If a cluster is
- * present in both embeddings, the highest score (after scaling) is used in the result.
- *
- * @halfLifeMs - defines how quickly a score decays
- * @topK - only the topk clusters with the highest scores are retained in the result
- * @threshold - any clusters with weights below threshold are excluded from the result
- */
-class SimClustersEmbeddingWithMetadataMonoid(
-  halfLifeMs: Long,
-  topK: Int,
-  threshold: Double)
-    extends Monoid[SimClustersEmbeddingWithMetadata] {
-
-  override val zero: SimClustersEmbeddingWithMetadata = SimClustersEmbeddingWithMetadata(
-    ThriftSimClustersEmbedding(),
-    SimClustersEmbeddingMetadata()
-  )
-
-  private val optionLongMonoid = new OptionMonoid[Long]()
-  private val optionMaxMonoid =
-    new OptionMonoid[Long]()(com.twitter.algebird.Max.maxSemigroup[Long])
-
-  override def plus(
-    x: SimClustersEmbeddingWithMetadata,
-    y: SimClustersEmbeddingWithMetadata
-  ): SimClustersEmbeddingWithMetadata = {
-
-    val mergedClusterScores = TopKScoresUtils.mergeClusterScoresWithUpdateTimes(
-      x = SimClustersEmbedding(x.embedding).embedding,
-      xUpdatedAtMs = x.metadata.updatedAtMs.getOrElse(0),
-      y = SimClustersEmbedding(y.embedding).embedding,
-      yUpdatedAtMs = y.metadata.updatedAtMs.getOrElse(0),
-      halfLifeMs = halfLifeMs,
-      topK = topK,
-      threshold = threshold
-    )
-    SimClustersEmbeddingWithMetadata(
-      embedding = SimClustersEmbedding(mergedClusterScores).toThrift,
-      metadata = SimClustersEmbeddingMetadata(
-        updatedAtMs = optionMaxMonoid.plus(x.metadata.updatedAtMs, y.metadata.updatedAtMs),
-        updatedCount = optionLongMonoid.plus(x.metadata.updatedCount, y.metadata.updatedCount)
-      )
-    )
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersHashUtil.docx b/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersHashUtil.docx
new file mode 100644
index 000000000..d3f3bb507
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersHashUtil.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersHashUtil.scala b/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersHashUtil.scala
deleted file mode 100644
index fff4bb851..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersHashUtil.scala
+++ /dev/null
@@ -1,14 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.common
-
-/**
- * Provides int to int hash function. Used to batch clusterIds together.
- */
-object SimClustersHashUtil {
-  def clusterIdToBucket(clusterId: Int): Int = {
-    clusterId % numBuckets
-  }
-
-  val numBuckets: Int = 200
-
-  val getAllBuckets: Seq[Int] = 0.until(numBuckets)
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersInterestedInUtil.docx b/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersInterestedInUtil.docx
new file mode 100644
index 000000000..b3b825f0a
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersInterestedInUtil.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersInterestedInUtil.scala b/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersInterestedInUtil.scala
deleted file mode 100644
index 4cd7ff14b..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersInterestedInUtil.scala
+++ /dev/null
@@ -1,72 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.common
-
-import com.twitter.simclusters_v2.common.ClusterId
-import com.twitter.simclusters_v2.thriftscala.{
-  ClustersUserIsInterestedIn,
-  ClustersWithScores,
-  Scores
-}
-
-object SimClustersInterestedInUtil {
-
-  private final val EmptyClustersWithScores = ClustersWithScores()
-
-  case class InterestedInScores(
-    favScore: Double,
-    clusterNormalizedFavScore: Double,
-    clusterNormalizedFollowScore: Double,
-    clusterNormalizedLogFavScore: Double)
-
-  def topClustersWithScores(
-    userInterests: ClustersUserIsInterestedIn
-  ): Seq[(ClusterId, InterestedInScores)] = {
-    userInterests.clusterIdToScores.toSeq.map {
-      case (clusterId, scores) =>
-        val favScore = scores.favScore.getOrElse(0.0)
-        val normalizedFavScore = scores.favScoreClusterNormalizedOnly.getOrElse(0.0)
-        val normalizedFollowScore = scores.followScoreClusterNormalizedOnly.getOrElse(0.0)
-        val normalizedLogFavScore = scores.logFavScoreClusterNormalizedOnly.getOrElse(0.0)
-
-        (
-          clusterId,
-          InterestedInScores(
-            favScore,
-            normalizedFavScore,
-            normalizedFollowScore,
-            normalizedLogFavScore))
-    }
-  }
-
-  def buildClusterWithScores(
-    clusterScores: Seq[(ClusterId, InterestedInScores)],
-    timeInMs: Double,
-    favScoreThresholdForUserInterest: Double
-  )(
-    implicit thriftDecayedValueMonoid: ThriftDecayedValueMonoid
-  ): ClustersWithScores = {
-    val scoresMap = clusterScores.collect {
-      case (
-            clusterId,
-            InterestedInScores(
-              favScore,
-              _,
-              _,
-              clusterNormalizedLogFavScore))
-          // NOTE: the threshold is on favScore, and the computation is on normalizedFavScore
-          // This threshold reduces the number of unique keys in the cache by 80%,
-          // based on offline analysis
-          if favScore >= favScoreThresholdForUserInterest =>
-
-        val favClusterNormalized8HrHalfLifeScoreOpt =
-            Some(thriftDecayedValueMonoid.build(clusterNormalizedLogFavScore, timeInMs))
-
-        clusterId -> Scores(favClusterNormalized8HrHalfLifeScore = favClusterNormalized8HrHalfLifeScoreOpt)
-    }.toMap
-
-    if (scoresMap.nonEmpty) {
-      ClustersWithScores(Some(scoresMap))
-    } else {
-      EmptyClustersWithScores
-    }
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersProfile.docx b/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersProfile.docx
new file mode 100644
index 000000000..340fe868f
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersProfile.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersProfile.scala b/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersProfile.scala
deleted file mode 100644
index ee58bbd67..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/common/SimClustersProfile.scala
+++ /dev/null
@@ -1,212 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.common
-
-import com.twitter.finagle.mtls.authentication.ServiceIdentifier
-import com.twitter.simclusters_v2.common.ModelVersions._
-import com.twitter.simclusters_v2.summingbird.common.ClientConfigs._
-import com.twitter.simclusters_v2.summingbird.common.SimClustersProfile.AltSetting.AltSetting
-import com.twitter.simclusters_v2.summingbird.common.SimClustersProfile.Environment.Environment
-import com.twitter.simclusters_v2.summingbird.common.SimClustersProfile.JobType.JobType
-import com.twitter.simclusters_v2.summingbird.common.SimClustersProfile.AltSetting
-import com.twitter.simclusters_v2.summingbird.common.SimClustersProfile.JobType
-import com.twitter.simclusters_v2.thriftscala.EmbeddingType
-import com.twitter.simclusters_v2.thriftscala.ModelVersion
-
-sealed trait SimClustersProfile {
-  val env: Environment
-  val alt: AltSetting
-  val modelVersionStr: String
-
-  lazy val modelVersion: ModelVersion = modelVersionStr
-}
-
-sealed trait SimClustersJobProfile extends SimClustersProfile {
-
-  val jobType: JobType
-
-  final lazy val jobName: String = {
-    alt match {
-      case AltSetting.Alt =>
-        s"simclusters_v2_${jobType}_alt_job_$env"
-      case AltSetting.Esc =>
-        s"simclusters_v2_${jobType}_esc_job_$env"
-      case _ =>
-        s"simclusters_v2_${jobType}_job_$env"
-    }
-  }
-
-  // Build the serviceIdentifier by jobType, env and zone(dc)
-  final lazy val serviceIdentifier: String => ServiceIdentifier = { zone =>
-    ServiceIdentifier(Configs.role, s"summingbird_$jobName", env.toString, zone)
-  }
-
-  final lazy val favScoreThresholdForUserInterest: Double =
-    Configs.favScoreThresholdForUserInterest(modelVersionStr)
-
-  lazy val timelineEventSourceSubscriberId: String = {
-    val jobTypeStr = jobType match {
-      case JobType.MultiModelTweet => "multi_model_tweet_"
-      case JobType.PersistentTweet => "persistent_tweet_"
-      case JobType.Tweet => ""
-    }
-
-    val prefix = alt match {
-      case AltSetting.Alt =>
-        "alt_"
-      case AltSetting.Esc =>
-        "esc_"
-      case _ =>
-        ""
-    }
-
-    s"simclusters_v2_${jobTypeStr}summingbird_$prefix$env"
-  }
-
-}
-
-object SimClustersProfile {
-
-  object JobType extends Enumeration {
-    type JobType = Value
-    val Tweet: JobType = Value("tweet")
-    val PersistentTweet: JobType = Value("persistent_tweet")
-    val MultiModelTweet: JobType = Value("multimodel_tweet")
-  }
-
-  object Environment extends Enumeration {
-    type Environment = Value
-    val Prod: Environment = Value("prod")
-    val Devel: Environment = Value("devel")
-
-    def apply(setting: String): Environment = {
-      if (setting == Prod.toString) {
-        Prod
-      } else {
-        Devel
-      }
-    }
-  }
-
-  object AltSetting extends Enumeration {
-    type AltSetting = Value
-    val Normal: AltSetting = Value("normal")
-    val Alt: AltSetting = Value("alt")
-    val Esc: AltSetting = Value("esc")
-
-    def apply(setting: String): AltSetting = {
-
-      setting match {
-        case "alt" => Alt
-        case "esc" => Esc
-        case _ => Normal
-      }
-    }
-  }
-
-  case class SimClustersTweetProfile(
-    env: Environment,
-    alt: AltSetting,
-    modelVersionStr: String,
-    entityClusterScorePath: String,
-    tweetTopKClustersPath: String,
-    clusterTopKTweetsPath: String,
-    coreEmbeddingType: EmbeddingType,
-    clusterTopKTweetsLightPath: Option[String] = None)
-      extends SimClustersJobProfile {
-
-    final val jobType: JobType = JobType.Tweet
-  }
-
-  case class PersistentTweetProfile(
-    env: Environment,
-    alt: AltSetting,
-    modelVersionStr: String,
-    persistentTweetStratoPath: String,
-    coreEmbeddingType: EmbeddingType)
-      extends SimClustersJobProfile {
-    final val jobType: JobType = JobType.PersistentTweet
-  }
-
-  final val AltProdTweetJobProfile = SimClustersTweetProfile(
-    env = Environment.Prod,
-    alt = AltSetting.Alt,
-    modelVersionStr = Model20M145K2020,
-    entityClusterScorePath = simClustersCoreAltCachePath,
-    tweetTopKClustersPath = simClustersCoreAltCachePath,
-    clusterTopKTweetsPath = simClustersCoreAltCachePath,
-    clusterTopKTweetsLightPath = Some(simClustersCoreAltLightCachePath),
-    coreEmbeddingType = EmbeddingType.LogFavBasedTweet
-  )
-
-  final val AltDevelTweetJobProfile = SimClustersTweetProfile(
-    env = Environment.Devel,
-    alt = AltSetting.Alt,
-    modelVersionStr = Model20M145K2020,
-    // using the same devel cache with job
-    entityClusterScorePath = develSimClustersCoreCachePath,
-    tweetTopKClustersPath = develSimClustersCoreCachePath,
-    clusterTopKTweetsPath = develSimClustersCoreCachePath,
-    clusterTopKTweetsLightPath = Some(develSimClustersCoreLightCachePath),
-    coreEmbeddingType = EmbeddingType.LogFavBasedTweet,
-  )
-
-  final val ProdPersistentTweetProfile = PersistentTweetProfile(
-    env = Environment.Prod,
-    alt = AltSetting.Normal,
-    modelVersionStr = Model20M145K2020,
-    // This profile is used by the persistent tweet embedding job to update the embedding. We
-    // use the uncached column to avoid reading stale data
-    persistentTweetStratoPath = logFavBasedTweet20M145K2020UncachedStratoPath,
-    coreEmbeddingType = EmbeddingType.LogFavBasedTweet
-  )
-
-  final val DevelPersistentTweetProfile = PersistentTweetProfile(
-    env = Environment.Devel,
-    alt = AltSetting.Normal,
-    modelVersionStr = Model20M145K2020,
-    persistentTweetStratoPath = develLogFavBasedTweet20M145K2020StratoPath,
-    coreEmbeddingType = EmbeddingType.LogFavBasedTweet
-  )
-
-  def fetchTweetJobProfile(
-    env: Environment,
-    alt: AltSetting = AltSetting.Normal
-  ): SimClustersTweetProfile = {
-    (env, alt) match {
-      case (Environment.Prod, AltSetting.Alt) => AltProdTweetJobProfile
-      case (Environment.Devel, AltSetting.Alt) => AltDevelTweetJobProfile
-      case _ => throw new IllegalArgumentException("Invalid env or alt setting")
-    }
-  }
-
-  def fetchPersistentJobProfile(
-    env: Environment,
-    alt: AltSetting = AltSetting.Normal
-  ): PersistentTweetProfile = {
-    (env, alt) match {
-      case (Environment.Prod, AltSetting.Normal) => ProdPersistentTweetProfile
-      case (Environment.Devel, AltSetting.Normal) => DevelPersistentTweetProfile
-      case _ => throw new IllegalArgumentException("Invalid env or alt setting")
-    }
-  }
-
-  /**
-   * For short term, fav based tweet embedding and log fav based tweets embedding exists at the
-   * same time. We want to move to log fav based tweet embedding eventually.
-   * Follow based tweet embeddings exists in both environment.
-   * A uniform tweet embedding API is the future to replace the existing use case.
-   */
-  final lazy val tweetJobProfileMap: Environment => Map[
-    (EmbeddingType, String),
-    SimClustersTweetProfile
-  ] = {
-    case Environment.Prod =>
-      Map(
-        (EmbeddingType.LogFavBasedTweet, Model20M145K2020) -> AltProdTweetJobProfile
-      )
-    case Environment.Devel =>
-      Map(
-        (EmbeddingType.LogFavBasedTweet, Model20M145K2020) -> AltDevelTweetJobProfile
-      )
-  }
-
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/StatsUtil.docx b/src/scala/com/twitter/simclusters_v2/summingbird/common/StatsUtil.docx
new file mode 100644
index 000000000..1d9dab2d6
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/common/StatsUtil.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/StatsUtil.scala b/src/scala/com/twitter/simclusters_v2/summingbird/common/StatsUtil.scala
deleted file mode 100644
index 78a34fef2..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/common/StatsUtil.scala
+++ /dev/null
@@ -1,22 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.common
-
-import com.twitter.summingbird.{Counter, Group, Name, Platform, Producer}
-import com.twitter.summingbird.option.JobId
-
-object StatsUtil {
-
-  // for adding stats in Producer.
-  // this enables us to add new stats by just calling producer.observer("name")
-  implicit class EnrichedProducer[P <: Platform[P], T](
-    producer: Producer[P, T]
-  )(
-    implicit jobId: JobId) {
-    def observe(counter: String): Producer[P, T] = {
-      val stat = Counter(Group(jobId.get), Name(counter))
-      producer.map { v =>
-        stat.incr()
-        v
-      }
-    }
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/SummerWithSumValues.docx b/src/scala/com/twitter/simclusters_v2/summingbird/common/SummerWithSumValues.docx
new file mode 100644
index 000000000..cb045d207
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/common/SummerWithSumValues.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/SummerWithSumValues.scala b/src/scala/com/twitter/simclusters_v2/summingbird/common/SummerWithSumValues.scala
deleted file mode 100644
index e10718162..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/common/SummerWithSumValues.scala
+++ /dev/null
@@ -1,40 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.common
-
-import com.twitter.algebird.Monoid
-import com.twitter.summingbird._
-
-object SummerWithSumValues {
-  /*
-  A common pattern in heron is to use .sumByKeys to aggregate a value in a store, and then continue
-  processing with the aggregated value. Unfortunately, .sumByKeys returns the existing value from the
-  store and the delta separately, leaving you to manually combine them.
-
-  Example without sumValues:
-
-   someKeyedProducer
-    .sumByKeys(score)(monoid)
-    .map {
-      case (key, (existingValueOpt, delta)) =>
-        // if you want the value that was actually written to the store, you have to combine
-        // existingValueOpt and delta yourself
-    }
-
-  Example with sumValues:
-
-   someKeyedProducer
-    .sumByKeys(score)(monoid)
-    .sumValues(monoid)
-    .map {
-      case (key, value) =>
-        // `value` is the same as what was written to the store
-    }
-   */
-  implicit class SummerWithSumValues[P <: Platform[P], K, V](
-    summer: Summer[P, K, V]) {
-    def sumValues(monoid: Monoid[V]): KeyedProducer[P, K, V] =
-      summer.mapValues {
-        case (Some(oldV), deltaV) => monoid.plus(oldV, deltaV)
-        case (None, deltaV) => deltaV
-      }
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/ThriftDecayedValueMonoid.docx b/src/scala/com/twitter/simclusters_v2/summingbird/common/ThriftDecayedValueMonoid.docx
new file mode 100644
index 000000000..b39ed42d1
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/common/ThriftDecayedValueMonoid.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/ThriftDecayedValueMonoid.scala b/src/scala/com/twitter/simclusters_v2/summingbird/common/ThriftDecayedValueMonoid.scala
deleted file mode 100644
index af490fc9d..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/common/ThriftDecayedValueMonoid.scala
+++ /dev/null
@@ -1,57 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.common
-
-import com.twitter.algebird.DecayedValue
-import com.twitter.algebird.DecayedValueMonoid
-import com.twitter.algebird.Monoid
-import com.twitter.algebird_internal.injection.DecayedValueImplicits
-import com.twitter.algebird_internal.thriftscala.{DecayedValue => ThriftDecayedValue}
-
-/**
- * Monoid for ThriftDecayedValue
- */
-class ThriftDecayedValueMonoid(halfLifeInMs: Long)(implicit decayedValueMonoid: DecayedValueMonoid)
-    extends Monoid[ThriftDecayedValue] {
-
-  override val zero: ThriftDecayedValue = DecayedValueImplicits.toThrift(decayedValueMonoid.zero)
-
-  override def plus(x: ThriftDecayedValue, y: ThriftDecayedValue): ThriftDecayedValue = {
-    DecayedValueImplicits.toThrift(
-      decayedValueMonoid
-        .plus(DecayedValueImplicits.toThrift.invert(x), DecayedValueImplicits.toThrift.invert(y))
-    )
-  }
-
-  def build(value: Double, timeInMs: Double): ThriftDecayedValue = {
-    DecayedValueImplicits.toThrift(
-      DecayedValue.build(value, timeInMs, halfLifeInMs)
-    )
-  }
-
-  /**
-   * decay to a timestamp; note that timestamp should be in Ms, and do not use scaledTime!
-   */
-  def decayToTimestamp(
-    thriftDecayedValue: ThriftDecayedValue,
-    timestampInMs: Double
-  ): ThriftDecayedValue = {
-    this.plus(thriftDecayedValue, this.build(0.0, timestampInMs))
-  }
-}
-
-object ThriftDecayedValueMonoid {
-  // add the implicit class so that a decayed value can direct call .plus, .decayedValueOfTime and
-  // so on.
-  implicit class EnrichedThriftDecayedValue(
-    thriftDecayedValue: ThriftDecayedValue
-  )(
-    implicit thriftDecayedValueMonoid: ThriftDecayedValueMonoid) {
-    def plus(other: ThriftDecayedValue): ThriftDecayedValue = {
-      thriftDecayedValueMonoid.plus(thriftDecayedValue, other)
-    }
-
-    // decay to a timestamp; note that timestamp should be in Ms
-    def decayToTimestamp(timeInMs: Double): ThriftDecayedValue = {
-      thriftDecayedValueMonoid.decayToTimestamp(thriftDecayedValue, timeInMs)
-    }
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/TweetEntityExtractor.docx b/src/scala/com/twitter/simclusters_v2/summingbird/common/TweetEntityExtractor.docx
new file mode 100644
index 000000000..f0bbf210c
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/common/TweetEntityExtractor.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/common/TweetEntityExtractor.scala b/src/scala/com/twitter/simclusters_v2/summingbird/common/TweetEntityExtractor.scala
deleted file mode 100644
index bd6a81baa..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/common/TweetEntityExtractor.scala
+++ /dev/null
@@ -1,65 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.common
-
-import com.twitter.recos.entities.thriftscala.NamedEntity
-import com.twitter.simclusters_v2.thriftscala.{
-  NerKey,
-  PenguinKey,
-  SimClusterEntity,
-  TweetTextEntity
-}
-import com.twitter.taxi.util.text.{TweetFeatureExtractor, TweetTextFeatures}
-import com.twitter.tweetypie.thriftscala.Tweet
-
-object TweetEntityExtractor {
-
-  private val MaxHashtagsPerTweet: Int = 4
-
-  private val MaxNersPerTweet: Int = 4
-
-  private val MaxPenguinsPerTweet: Int = 4
-
-  private val tweetFeatureExtractor: TweetFeatureExtractor = TweetFeatureExtractor.Default
-
-  private def extractTweetTextFeatures(
-    text: String,
-    languageCode: Option[String]
-  ): TweetTextFeatures = {
-    if (languageCode.isDefined) {
-      tweetFeatureExtractor.extract(text, languageCode.get)
-    } else {
-      tweetFeatureExtractor.extract(text)
-    }
-  }
-
-  def extractEntitiesFromText(
-    tweet: Option[Tweet],
-    nerEntitiesOpt: Option[Seq[NamedEntity]]
-  ): Seq[SimClusterEntity.TweetEntity] = {
-
-    val hashtagEntities = tweet
-      .flatMap(_.hashtags.map(_.map(_.text))).getOrElse(Nil)
-      .map { hashtag => TweetTextEntity.Hashtag(hashtag.toLowerCase) }.take(MaxHashtagsPerTweet)
-
-    val nerEntities = nerEntitiesOpt
-      .getOrElse(Nil).map { namedEntity =>
-        TweetTextEntity
-          .Ner(NerKey(namedEntity.namedEntity.toLowerCase, namedEntity.entityType.getValue))
-      }.take(MaxNersPerTweet)
-
-    val nerEntitySet = nerEntities.map(_.ner.textEntity).toSet
-
-    val penguinEntities =
-      extractTweetTextFeatures(
-        tweet.flatMap(_.coreData.map(_.text)).getOrElse(""),
-        tweet.flatMap(_.language.map(_.language))
-      ).phrases
-        .map(_.normalizedOrOriginal)
-        .filter { s =>
-          s.charAt(0) != '#' && !nerEntitySet.contains(s) // not included in hashtags and NER
-        }
-        .map { penguinStr => TweetTextEntity.Penguin(PenguinKey(penguinStr.toLowerCase)) }.take(
-          MaxPenguinsPerTweet)
-
-    (hashtagEntities ++ penguinEntities ++ nerEntities).map(e => SimClusterEntity.TweetEntity(e))
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/ApeTopicEmbeddingStore.docx b/src/scala/com/twitter/simclusters_v2/summingbird/stores/ApeTopicEmbeddingStore.docx
new file mode 100644
index 000000000..8460f16db
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/stores/ApeTopicEmbeddingStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/ApeTopicEmbeddingStore.scala b/src/scala/com/twitter/simclusters_v2/summingbird/stores/ApeTopicEmbeddingStore.scala
deleted file mode 100644
index 0eec17b81..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/stores/ApeTopicEmbeddingStore.scala
+++ /dev/null
@@ -1,43 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.stores
-
-import com.twitter.frigate.common.store.strato.StratoStore
-import com.twitter.simclusters_v2.common.SimClustersEmbedding
-import com.twitter.simclusters_v2.common.ModelVersions
-import com.twitter.simclusters_v2.common.ModelVersions._
-import com.twitter.simclusters_v2.thriftscala.EmbeddingType
-import com.twitter.simclusters_v2.thriftscala.InternalId
-import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
-import com.twitter.simclusters_v2.thriftscala.TopicId
-import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
-import com.twitter.storehaus.ReadableStore
-import com.twitter.strato.client.Client
-
-object ApeTopicEmbeddingStore {
-
-  private val logFavBasedAPEColumn20M145K2020 =
-    "recommendations/simclusters_v2/embeddings/logFavBasedAPE20M145K2020"
-
-  private def getStore(
-    stratoClient: Client,
-    column: String
-  ): ReadableStore[SimClustersEmbeddingId, ThriftSimClustersEmbedding] = {
-    StratoStore
-      .withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](stratoClient, column)
-  }
-
-  def getFavBasedLocaleEntityEmbedding2020Store(
-    stratoClient: Client,
-  ): ReadableStore[TopicId, SimClustersEmbedding] = {
-
-    getStore(stratoClient, logFavBasedAPEColumn20M145K2020)
-      .composeKeyMapping[TopicId] { topicId =>
-        SimClustersEmbeddingId(
-          EmbeddingType.LogFavBasedKgoApeTopic,
-          ModelVersions.Model20M145K2020,
-          InternalId.TopicId(topicId)
-        )
-      }
-      .mapValues(SimClustersEmbedding(_))
-  }
-
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/BUILD b/src/scala/com/twitter/simclusters_v2/summingbird/stores/BUILD
deleted file mode 100644
index 9e78da7c4..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/stores/BUILD
+++ /dev/null
@@ -1,32 +0,0 @@
-scala_library(
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "3rdparty/jvm/com/twitter/algebird:core",
-        "3rdparty/jvm/com/twitter/algebird:util",
-        "3rdparty/jvm/com/twitter/bijection:core",
-        "3rdparty/jvm/com/twitter/bijection:util",
-        "3rdparty/jvm/com/twitter/storehaus:core",
-        "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato",
-        "relevance-platform/src/main/scala/com/twitter/relevance_platform/simclustersann/multicluster",
-        "src/scala/com/twitter/algebird_internal/injection",
-        "src/scala/com/twitter/simclusters_v2/common",
-        "src/scala/com/twitter/simclusters_v2/summingbird/common",
-        "src/scala/com/twitter/storehaus_internal/manhattan",
-        "src/scala/com/twitter/storehaus_internal/manhattan/config",
-        "src/scala/com/twitter/storehaus_internal/memcache",
-        "src/scala/com/twitter/storehaus_internal/memcache/config",
-        "src/scala/com/twitter/storehaus_internal/offline",
-        "src/scala/com/twitter/storehaus_internal/online",
-        "src/scala/com/twitter/storehaus_internal/util",
-        "src/scala/com/twitter/summingbird_internal/bijection:bijection-implicits",
-        "src/scala/com/twitter/summingbird_internal/runner/store_config",
-        "src/scala/com/twitter/wtf/summingbird/sources/common",
-        "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
-        "src/thrift/com/twitter/timelineservice/server/internal:thrift-scala",
-        "src/thrift/com/twitter/wtf/interest:interest-thrift-scala",
-        "src/thrift/com/twitter/wtf/utt:utt-scala",
-        "strato/src/main/scala/com/twitter/strato/client",
-        "strato/src/main/scala/com/twitter/strato/mh",
-    ],
-)
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/BUILD.docx b/src/scala/com/twitter/simclusters_v2/summingbird/stores/BUILD.docx
new file mode 100644
index 000000000..d1f942939
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/stores/BUILD.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/ClusterDetailsReadableStore.docx b/src/scala/com/twitter/simclusters_v2/summingbird/stores/ClusterDetailsReadableStore.docx
new file mode 100644
index 000000000..4f86a90f3
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/stores/ClusterDetailsReadableStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/ClusterDetailsReadableStore.scala b/src/scala/com/twitter/simclusters_v2/summingbird/stores/ClusterDetailsReadableStore.scala
deleted file mode 100644
index a553e7ff8..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/stores/ClusterDetailsReadableStore.scala
+++ /dev/null
@@ -1,67 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.stores
-
-import com.twitter.bijection.{Bufferable, Injection}
-import com.twitter.bijection.scrooge.CompactScalaCodec
-import com.twitter.simclusters_v2.common.ModelVersions
-import com.twitter.simclusters_v2.thriftscala.ClusterDetails
-import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
-import com.twitter.storehaus.ReadableStore
-import com.twitter.storehaus_internal.manhattan.{Athena, ManhattanRO, ManhattanROConfig}
-import com.twitter.storehaus_internal.util.{ApplicationID, DatasetName, HDFSPath}
-import com.twitter.util.{Future, Memoize}
-
-object ClusterDetailsReadableStore {
-
-  val modelVersionToDatasetMap: Map[String, String] = Map(
-    ModelVersions.Model20M145KDec11 -> "simclusters_v2_cluster_details",
-    ModelVersions.Model20M145KUpdated -> "simclusters_v2_cluster_details_20m_145k_updated",
-    ModelVersions.Model20M145K2020 -> "simclusters_v2_cluster_details_20m_145k_2020"
-  )
-
-  val knownModelVersions: String = modelVersionToDatasetMap.keys.mkString(",")
-
-  private val clusterDetailsStores =
-    Memoize[(ManhattanKVClientMtlsParams, String), ReadableStore[(String, Int), ClusterDetails]] {
-      case (mhMtlsParams: ManhattanKVClientMtlsParams, datasetName: String) =>
-        getForDatasetName(mhMtlsParams, datasetName)
-    }
-
-  def getForDatasetName(
-    mhMtlsParams: ManhattanKVClientMtlsParams,
-    datasetName: String
-  ): ReadableStore[(String, Int), ClusterDetails] = {
-    implicit val keyInjection: Injection[(String, Int), Array[Byte]] =
-      Bufferable.injectionOf[(String, Int)]
-    implicit val valueInjection: Injection[ClusterDetails, Array[Byte]] =
-      CompactScalaCodec(ClusterDetails)
-
-    ManhattanRO.getReadableStoreWithMtls[(String, Int), ClusterDetails](
-      ManhattanROConfig(
-        HDFSPath(""), // not needed
-        ApplicationID("simclusters_v2"),
-        DatasetName(datasetName), // this should be correct
-        Athena
-      ),
-      mhMtlsParams
-    )
-  }
-
-  def apply(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[(String, Int), ClusterDetails] = {
-    new ReadableStore[(String, Int), ClusterDetails] {
-      override def get(modelVersionAndClusterId: (String, Int)): Future[Option[ClusterDetails]] = {
-        val (modelVersion, _) = modelVersionAndClusterId
-        modelVersionToDatasetMap.get(modelVersion) match {
-          case Some(datasetName) =>
-            clusterDetailsStores((mhMtlsParams, datasetName)).get(modelVersionAndClusterId)
-          case None =>
-            Future.exception(
-              new IllegalArgumentException(
-                "Unknown model version " + modelVersion + ". Known modelVersions: " + knownModelVersions)
-            )
-        }
-      }
-    }
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/EntityClusterScoreReadableStore.docx b/src/scala/com/twitter/simclusters_v2/summingbird/stores/EntityClusterScoreReadableStore.docx
new file mode 100644
index 000000000..0cc5f6eae
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/stores/EntityClusterScoreReadableStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/EntityClusterScoreReadableStore.scala b/src/scala/com/twitter/simclusters_v2/summingbird/stores/EntityClusterScoreReadableStore.scala
deleted file mode 100644
index b25687f4e..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/stores/EntityClusterScoreReadableStore.scala
+++ /dev/null
@@ -1,62 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.stores
-
-import com.twitter.finagle.mtls.authentication.ServiceIdentifier
-import com.twitter.frigate.common.store.strato.StratoStore
-import com.twitter.simclusters_v2.summingbird.common.Implicits.clustersWithScoreMonoid
-import com.twitter.simclusters_v2.summingbird.common.Implicits.clustersWithScoresCodec
-import com.twitter.storehaus.algebra.MergeableStore
-import com.twitter.simclusters_v2.summingbird.common.ClientConfigs
-import com.twitter.simclusters_v2.summingbird.common.Implicits
-import com.twitter.simclusters_v2.thriftscala.ClustersWithScores
-import com.twitter.simclusters_v2.thriftscala.FullClusterIdBucket
-import com.twitter.simclusters_v2.thriftscala.MultiModelClustersWithScores
-import com.twitter.simclusters_v2.thriftscala.SimClusterEntity
-import com.twitter.storehaus.Store
-import com.twitter.storehaus_internal.memcache.Memcache
-import com.twitter.strato.client.Client
-import com.twitter.summingbird.batch.BatchID
-import com.twitter.summingbird_internal.bijection.BatchPairImplicits
-import com.twitter.util.Future
-import com.twitter.strato.thrift.ScroogeConvImplicits._
-
-object EntityClusterScoreReadableStore {
-
-  private[simclusters_v2] final lazy val onlineMergeableStore: (
-    String,
-    ServiceIdentifier
-  ) => MergeableStore[
-    ((SimClusterEntity, FullClusterIdBucket), BatchID),
-    ClustersWithScores
-  ] = { (path: String, serviceIdentifier: ServiceIdentifier) =>
-    Memcache
-      .getMemcacheStore[((SimClusterEntity, FullClusterIdBucket), BatchID), ClustersWithScores](
-        ClientConfigs.entityClusterScoreMemcacheConfig(path, serviceIdentifier)
-      )(
-        BatchPairImplicits.keyInjection[(SimClusterEntity, FullClusterIdBucket)](
-          Implicits.entityWithClusterInjection
-        ),
-        clustersWithScoresCodec,
-        clustersWithScoreMonoid
-      )
-  }
-
-}
-
-object MultiModelEntityClusterScoreReadableStore {
-
-  private[simclusters_v2] def MultiModelEntityClusterScoreReadableStore(
-    stratoClient: Client,
-    column: String
-  ): Store[EntityClusterId, MultiModelClustersWithScores] = {
-    StratoStore
-      .withUnitView[(SimClusterEntity, Int), MultiModelClustersWithScores](stratoClient, column)
-      .composeKeyMapping(_.toTuple)
-  }
-
-  case class EntityClusterId(
-    simClusterEntity: SimClusterEntity,
-    clusterIdBucket: Int) {
-    lazy val toTuple: (SimClusterEntity, Int) =
-      (simClusterEntity, clusterIdBucket)
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/ManhattanFromStratoStore.docx b/src/scala/com/twitter/simclusters_v2/summingbird/stores/ManhattanFromStratoStore.docx
new file mode 100644
index 000000000..e65b0a2cf
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/stores/ManhattanFromStratoStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/ManhattanFromStratoStore.scala b/src/scala/com/twitter/simclusters_v2/summingbird/stores/ManhattanFromStratoStore.scala
deleted file mode 100644
index ba9af7f00..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/stores/ManhattanFromStratoStore.scala
+++ /dev/null
@@ -1,108 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.stores
-
-import com.twitter.bijection.Injection
-import com.twitter.finagle.stats.NullStatsReceiver
-import com.twitter.finagle.stats.StatsReceiver
-import com.twitter.io.Buf
-import com.twitter.scrooge.ThriftStruct
-import com.twitter.simclusters_v2.common.TweetId
-import com.twitter.simclusters_v2.summingbird.stores.PersistentTweetEmbeddingStore.Timestamp
-import com.twitter.simclusters_v2.thriftscala.PersistentSimClustersEmbedding
-import com.twitter.storage.client.manhattan.kv.Guarantee
-import com.twitter.storage.client.manhattan.kv.ManhattanKVClient
-import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
-import com.twitter.storage.client.manhattan.kv.ManhattanKVEndpointBuilder
-import com.twitter.storage.client.manhattan.kv.impl.FullBufKey
-import com.twitter.storage.client.manhattan.kv.impl.ValueDescriptor
-import com.twitter.storehaus.ReadableStore
-import com.twitter.storehaus_internal.manhattan_kv.ManhattanEndpointStore
-import com.twitter.strato.catalog.Version
-import com.twitter.strato.config.MValEncoding
-import com.twitter.strato.config.NativeEncoding
-import com.twitter.strato.config.PkeyLkey2
-import com.twitter.strato.data.Conv
-import com.twitter.strato.data.Type
-import com.twitter.strato.mh.ManhattanInjections
-import com.twitter.strato.thrift.ScroogeConv
-import com.twitter.strato.thrift.ScroogeConvImplicits._
-
-object ManhattanFromStratoStore {
-  /* This enables reading from a MH store where the data is written by Strato. Strato uses a unique
-  encoding (Conv) which needs to be reconstructed for each MH store based on the type of data that
-  is written to it. Once that encoding is generated on start-up, we can read from the store like
-  any other ReadableStore.
-   */
-  def createPersistentTweetStore(
-    dataset: String,
-    mhMtlsParams: ManhattanKVClientMtlsParams,
-    statsReceiver: StatsReceiver = NullStatsReceiver
-  ): ReadableStore[(TweetId, Timestamp), PersistentSimClustersEmbedding] = {
-    val appId = "simclusters_embeddings_prod"
-    val dest = "/s/manhattan/omega.native-thrift"
-
-    val endpoint = createMhEndpoint(
-      appId = appId,
-      dest = dest,
-      mhMtlsParams = mhMtlsParams,
-      statsReceiver = statsReceiver)
-
-    val (
-      keyInj: Injection[(TweetId, Timestamp), FullBufKey],
-      valueDesc: ValueDescriptor.EmptyValue[PersistentSimClustersEmbedding]) =
-      injectionsFromPkeyLkeyValueStruct[TweetId, Timestamp, PersistentSimClustersEmbedding](
-        dataset = dataset,
-        pkType = Type.Long,
-        lkType = Type.Long)
-
-    ManhattanEndpointStore
-      .readable[(TweetId, Timestamp), PersistentSimClustersEmbedding, FullBufKey](
-        endpoint = endpoint,
-        keyDescBuilder = keyInj,
-        emptyValDesc = valueDesc)
-  }
-
-  private def createMhEndpoint(
-    appId: String,
-    dest: String,
-    mhMtlsParams: ManhattanKVClientMtlsParams,
-    statsReceiver: StatsReceiver = NullStatsReceiver
-  ) = {
-    val mhc = ManhattanKVClient.memoizedByDest(
-      appId = appId,
-      dest = dest,
-      mtlsParams = mhMtlsParams
-    )
-
-    ManhattanKVEndpointBuilder(mhc)
-      .defaultGuarantee(Guarantee.SoftDcReadMyWrites)
-      .statsReceiver(statsReceiver)
-      .build()
-  }
-
-  private def injectionsFromPkeyLkeyValueStruct[PK: Conv, LK: Conv, V <: ThriftStruct: Manifest](
-    dataset: String,
-    pkType: Type,
-    lkType: Type
-  ): (Injection[(PK, LK), FullBufKey], ValueDescriptor.EmptyValue[V]) = {
-    // Strato uses a unique encoding (Conv) so we need to rebuild that based on the pkey, lkey and
-    // value type before converting it to the Manhattan injections for key -> FullBufKey and
-    // value -> Buf
-    val valueConv: Conv[V] = ScroogeConv.fromStruct[V]
-
-    val mhEncodingMapping = PkeyLkey2(
-      pkey = pkType,
-      lkey = lkType,
-      value = valueConv.t,
-      pkeyEncoding = NativeEncoding,
-      lkeyEncoding = NativeEncoding,
-      valueEncoding = MValEncoding()
-    )
-
-    val (keyInj: Injection[(PK, LK), FullBufKey], valueInj: Injection[V, Buf], _, _) =
-      ManhattanInjections.fromPkeyLkey[PK, LK, V](mhEncodingMapping, dataset, Version.Default)
-
-    val valDesc: ValueDescriptor.EmptyValue[V] = ValueDescriptor.EmptyValue(valueInj)
-
-    (keyInj, valDesc)
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/PersistentTweetEmbeddingStore.docx b/src/scala/com/twitter/simclusters_v2/summingbird/stores/PersistentTweetEmbeddingStore.docx
new file mode 100644
index 000000000..5a2228b9c
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/stores/PersistentTweetEmbeddingStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/PersistentTweetEmbeddingStore.scala b/src/scala/com/twitter/simclusters_v2/summingbird/stores/PersistentTweetEmbeddingStore.scala
deleted file mode 100644
index ab9c06240..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/stores/PersistentTweetEmbeddingStore.scala
+++ /dev/null
@@ -1,104 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.stores
-
-import com.twitter.finagle.stats.StatsReceiver
-import com.twitter.frigate.common.store.strato.StratoFetchableStore
-import com.twitter.frigate.common.store.strato.StratoStore
-import com.twitter.simclusters_v2.common.SimClustersEmbedding
-import com.twitter.simclusters_v2.common.SimClustersEmbedding._
-import com.twitter.simclusters_v2.common.TweetId
-import com.twitter.simclusters_v2.thriftscala.PersistentSimClustersEmbedding
-import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
-import com.twitter.storehaus.ReadableStore
-import com.twitter.storehaus.Store
-import com.twitter.strato.catalog.Scan.Slice
-import com.twitter.strato.client.Client
-import com.twitter.strato.thrift.ScroogeConvImplicits._
-
-object PersistentTweetEmbeddingStore {
-
-  val LogFavBasedColumn =
-    "recommendations/simclusters_v2/embeddings/logFavBasedTweet20M145KUpdatedPersistent"
-  val LogFavBasedColumn20m145k2020 =
-    "recommendations/simclusters_v2/embeddings/logFavBasedTweet20M145K2020Persistent"
-
-  val LogFavBased20m145k2020Dataset = "log_fav_based_tweet_20m_145k_2020_embeddings"
-  val LogFavBased20m145kUpdatedDataset = "log_fav_based_tweet_20m_145k_updated_embeddings"
-
-  val DefaultMaxLength = 15
-
-  def mostRecentTweetEmbeddingStore(
-    stratoClient: Client,
-    column: String,
-    maxLength: Int = DefaultMaxLength
-  ): ReadableStore[TweetId, SimClustersEmbedding] = {
-    StratoFetchableStore
-      .withUnitView[(TweetId, Timestamp), PersistentSimClustersEmbedding](stratoClient, column)
-      .composeKeyMapping[TweetId]((_, LatestEmbeddingVersion))
-      .mapValues(_.embedding.truncate(maxLength))
-  }
-
-  def longestL2NormTweetEmbeddingStore(
-    stratoClient: Client,
-    column: String
-  ): ReadableStore[TweetId, SimClustersEmbedding] =
-    StratoFetchableStore
-      .withUnitView[(TweetId, Timestamp), PersistentSimClustersEmbedding](stratoClient, column)
-      .composeKeyMapping[TweetId]((_, LongestL2EmbeddingVersion))
-      .mapValues(_.embedding)
-
-  def mostRecentTweetEmbeddingStoreManhattan(
-    mhMtlsParams: ManhattanKVClientMtlsParams,
-    dataset: String,
-    statsReceiver: StatsReceiver,
-    maxLength: Int = DefaultMaxLength
-  ): ReadableStore[TweetId, SimClustersEmbedding] =
-    ManhattanFromStratoStore
-      .createPersistentTweetStore(
-        dataset = dataset,
-        mhMtlsParams = mhMtlsParams,
-        statsReceiver = statsReceiver
-      ).composeKeyMapping[TweetId]((_, LatestEmbeddingVersion))
-      .mapValues[SimClustersEmbedding](_.embedding.truncate(maxLength))
-
-  def longestL2NormTweetEmbeddingStoreManhattan(
-    mhMtlsParams: ManhattanKVClientMtlsParams,
-    dataset: String,
-    statsReceiver: StatsReceiver,
-    maxLength: Int = 50
-  ): ReadableStore[TweetId, SimClustersEmbedding] =
-    ManhattanFromStratoStore
-      .createPersistentTweetStore(
-        dataset = dataset,
-        mhMtlsParams = mhMtlsParams,
-        statsReceiver = statsReceiver
-      ).composeKeyMapping[TweetId]((_, LongestL2EmbeddingVersion))
-      .mapValues[SimClustersEmbedding](_.embedding.truncate(maxLength))
-
-  /**
-   * The writeable store for Persistent Tweet embedding. Only available in SimClusters package.
-   */
-  private[simclusters_v2] def persistentTweetEmbeddingStore(
-    stratoClient: Client,
-    column: String
-  ): Store[PersistentTweetEmbeddingId, PersistentSimClustersEmbedding] = {
-    StratoStore
-      .withUnitView[(TweetId, Timestamp), PersistentSimClustersEmbedding](stratoClient, column)
-      .composeKeyMapping(_.toTuple)
-  }
-
-  type Timestamp = Long
-
-  case class PersistentTweetEmbeddingId(
-    tweetId: TweetId,
-    timestampInMs: Timestamp = LatestEmbeddingVersion) {
-    lazy val toTuple: (TweetId, Timestamp) = (tweetId, timestampInMs)
-  }
-
-  // Special version - reserved for the latest version of the embedding
-  private[summingbird] val LatestEmbeddingVersion = 0L
-  // Special version - reserved for the embedding with the longest L2 norm
-  private[summingbird] val LongestL2EmbeddingVersion = 1L
-
-  // The tweet embedding store keeps at most 20 LKeys
-  private[stores] val DefaultSlice = Slice[Long](from = None, to = None, limit = None)
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/ProducerClusterEmbeddingReadableStores.docx b/src/scala/com/twitter/simclusters_v2/summingbird/stores/ProducerClusterEmbeddingReadableStores.docx
new file mode 100644
index 000000000..7e50b8a3d
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/stores/ProducerClusterEmbeddingReadableStores.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/ProducerClusterEmbeddingReadableStores.scala b/src/scala/com/twitter/simclusters_v2/summingbird/stores/ProducerClusterEmbeddingReadableStores.scala
deleted file mode 100644
index e978aa9f9..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/stores/ProducerClusterEmbeddingReadableStores.scala
+++ /dev/null
@@ -1,101 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.stores
-
-import com.twitter.bijection.Injection
-import com.twitter.bijection.scrooge.CompactScalaCodec
-import com.twitter.simclusters_v2.thriftscala.PersistedFullClusterId
-import com.twitter.simclusters_v2.thriftscala.TopProducersWithScore
-import com.twitter.simclusters_v2.thriftscala.TopSimClustersWithScore
-import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
-import com.twitter.storehaus.ReadableStore
-import com.twitter.storehaus_internal.manhattan.Athena
-import com.twitter.storehaus_internal.manhattan.ManhattanRO
-import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
-import com.twitter.storehaus_internal.util.ApplicationID
-import com.twitter.storehaus_internal.util.DatasetName
-import com.twitter.storehaus_internal.util.HDFSPath
-
-object ProducerClusterEmbeddingReadableStores {
-
-  implicit val longInject: Injection[Long, Array[Byte]] = Injection.long2BigEndian
-  implicit val clusterInject: Injection[TopSimClustersWithScore, Array[Byte]] =
-    CompactScalaCodec(TopSimClustersWithScore)
-  implicit val producerInject: Injection[TopProducersWithScore, Array[Byte]] =
-    CompactScalaCodec(TopProducersWithScore)
-  implicit val clusterIdInject: Injection[PersistedFullClusterId, Array[Byte]] =
-    CompactScalaCodec(PersistedFullClusterId)
-
-  private val appId = "simclusters_v2"
-
-  def getSimClusterEmbeddingTopKProducersStore(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[PersistedFullClusterId, TopProducersWithScore] = {
-    ManhattanRO.getReadableStoreWithMtls[PersistedFullClusterId, TopProducersWithScore](
-      ManhattanROConfig(
-        HDFSPath(""),
-        ApplicationID(appId),
-        DatasetName("simcluster_embedding_top_k_producers_by_fav_score_20m_145k_updated"),
-        Athena
-      ),
-      mhMtlsParams
-    )
-  }
-
-  def getProducerTopKSimClustersEmbeddingsStore(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[Long, TopSimClustersWithScore] = {
-    val datasetName = "producer_top_k_simcluster_embeddings_by_fav_score_20m_145k_updated"
-    ManhattanRO.getReadableStoreWithMtls[Long, TopSimClustersWithScore](
-      ManhattanROConfig(
-        HDFSPath(""),
-        ApplicationID(appId),
-        DatasetName(datasetName),
-        Athena
-      ),
-      mhMtlsParams
-    )
-  }
-
-  def getProducerTopKSimClusters2020EmbeddingsStore(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[Long, TopSimClustersWithScore] = {
-    val datasetName = "producer_top_k_simcluster_embeddings_by_fav_score_20m_145k_2020"
-    ManhattanRO.getReadableStoreWithMtls[Long, TopSimClustersWithScore](
-      ManhattanROConfig(
-        HDFSPath(""),
-        ApplicationID(appId),
-        DatasetName(datasetName),
-        Athena
-      ),
-      mhMtlsParams
-    )
-  }
-
-  def getSimClusterEmbeddingTopKProducersByFollowStore(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[PersistedFullClusterId, TopProducersWithScore] = {
-    ManhattanRO.getReadableStoreWithMtls[PersistedFullClusterId, TopProducersWithScore](
-      ManhattanROConfig(
-        HDFSPath(""),
-        ApplicationID(appId),
-        DatasetName("simcluster_embedding_top_k_producers_by_follow_score_20m_145k_updated"),
-        Athena
-      ),
-      mhMtlsParams
-    )
-  }
-
-  def getProducerTopKSimClustersEmbeddingsByFollowStore(
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[Long, TopSimClustersWithScore] = {
-    ManhattanRO.getReadableStoreWithMtls[Long, TopSimClustersWithScore](
-      ManhattanROConfig(
-        HDFSPath(""),
-        ApplicationID(appId),
-        DatasetName("producer_top_k_simcluster_embeddings_by_follow_score_20m_145k_2020"),
-        Athena
-      ),
-      mhMtlsParams
-    )
-  }
-
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/SemanticCoreEntityEmbeddingStore.docx b/src/scala/com/twitter/simclusters_v2/summingbird/stores/SemanticCoreEntityEmbeddingStore.docx
new file mode 100644
index 000000000..e0a108a71
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/stores/SemanticCoreEntityEmbeddingStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/SemanticCoreEntityEmbeddingStore.scala b/src/scala/com/twitter/simclusters_v2/summingbird/stores/SemanticCoreEntityEmbeddingStore.scala
deleted file mode 100644
index ccdea937c..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/stores/SemanticCoreEntityEmbeddingStore.scala
+++ /dev/null
@@ -1,49 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.stores
-
-import com.twitter.frigate.common.store.strato.StratoStore
-import com.twitter.simclusters_v2.common.ModelVersions
-import com.twitter.simclusters_v2.common.ModelVersions._
-import com.twitter.simclusters_v2.thriftscala.{
-  EmbeddingType,
-  InternalId,
-  LocaleEntityId,
-  SimClustersEmbeddingId,
-  SimClustersEmbedding => ThriftSimClustersEmbedding
-}
-import com.twitter.storehaus.ReadableStore
-import com.twitter.strato.client.Client
-import com.twitter.strato.thrift.ScroogeConvImplicits._
-import com.twitter.simclusters_v2.common.SimClustersEmbedding
-
-/**
- * entity -> List< cluster >
- */
-object SemanticCoreEntityEmbeddingStore {
-
-  private val column =
-    "recommendations/simclusters_v2/embeddings/semanticCoreEntityPerLanguageEmbeddings20M145KUpdated"
-
-  /**
-   * Default store, wrapped in generic data types. Use this if you know the underlying key struct.
-   */
-  private def getDefaultStore(
-    stratoClient: Client
-  ): ReadableStore[SimClustersEmbeddingId, ThriftSimClustersEmbedding] = {
-    StratoStore
-      .withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](stratoClient, column)
-  }
-
-  def getFavBasedLocaleEntityEmbeddingStore(
-    stratoClient: Client
-  ): ReadableStore[LocaleEntityId, SimClustersEmbedding] = {
-    getDefaultStore(stratoClient)
-      .composeKeyMapping[LocaleEntityId] { entityId =>
-        SimClustersEmbeddingId(
-          EmbeddingType.FavBasedSematicCoreEntity,
-          ModelVersions.Model20M145KUpdated,
-          InternalId.LocaleEntityId(entityId)
-        )
-      }
-      .mapValues(SimClustersEmbedding(_))
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/SimClustersManhattanReadableStoreForReadWriteDataset.docx b/src/scala/com/twitter/simclusters_v2/summingbird/stores/SimClustersManhattanReadableStoreForReadWriteDataset.docx
new file mode 100644
index 000000000..ea71e371e
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/stores/SimClustersManhattanReadableStoreForReadWriteDataset.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/SimClustersManhattanReadableStoreForReadWriteDataset.scala b/src/scala/com/twitter/simclusters_v2/summingbird/stores/SimClustersManhattanReadableStoreForReadWriteDataset.scala
deleted file mode 100644
index 63c1e772c..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/stores/SimClustersManhattanReadableStoreForReadWriteDataset.scala
+++ /dev/null
@@ -1,65 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.stores
-import com.twitter.simclusters_v2.thriftscala.ClustersUserIsInterestedIn
-import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
-import com.twitter.storage.client.manhattan.kv.ManhattanKVClient
-import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
-import com.twitter.storage.client.manhattan.kv.ManhattanKVEndpointBuilder
-import com.twitter.storage.client.manhattan.kv.impl.Component
-import com.twitter.storage.client.manhattan.kv.impl.DescriptorP1L0
-import com.twitter.storage.client.manhattan.kv.impl.KeyDescriptor
-import com.twitter.storage.client.manhattan.kv.impl.ValueDescriptor
-import com.twitter.storehaus.ReadableStore
-import com.twitter.storehaus_internal.manhattan.ManhattanCluster
-import com.twitter.storehaus_internal.manhattan.Adama
-import com.twitter.storage.client.manhattan.bijections.Bijections.BinaryScalaInjection
-import com.twitter.storage.client.manhattan.kv.Guarantee
-import com.twitter.conversions.DurationOps._
-import com.twitter.simclusters_v2.thriftscala.InternalId
-import com.twitter.stitch.Stitch
-import com.twitter.storage.client.manhattan.bijections.Bijections.LongInjection
-import com.twitter.util.Future
-
-/**
- * Manhattan Readable Store to fetch simcluster embedding from a read-write dataset.
- * Only read operations are allowed through this store.
- * @param appId The "application id"
- * @param datasetName The MH dataset name.
- * @param label The human readable label for the finagle thrift client
- * @param mtlsParams Client service identifier to use to authenticate with Manhattan service
- * @param manhattanCluster Manhattan RW cluster
- **/
-class SimClustersManhattanReadableStoreForReadWriteDataset(
-  appId: String,
-  datasetName: String,
-  label: String,
-  mtlsParams: ManhattanKVClientMtlsParams,
-  manhattanCluster: ManhattanCluster = Adama)
-    extends ReadableStore[SimClustersEmbeddingId, ClustersUserIsInterestedIn] {
-  /*
-  Setting up a new builder to read from Manhattan RW dataset. This is specifically required for
-  BeT project where we update the MH RW dataset (every 2 hours) using cloud shuttle service.
-   */
-  val destName = manhattanCluster.wilyName
-  val endPoint = ManhattanKVEndpointBuilder(ManhattanKVClient(appId, destName, mtlsParams, label))
-    .defaultGuarantee(Guarantee.SoftDcReadMyWrites)
-    .build()
-
-  val keyDesc = KeyDescriptor(Component(LongInjection), Component()).withDataset(datasetName)
-  val valueDesc = ValueDescriptor(BinaryScalaInjection(ClustersUserIsInterestedIn))
-
-  override def get(
-    embeddingId: SimClustersEmbeddingId
-  ): Future[Option[ClustersUserIsInterestedIn]] = {
-    embeddingId match {
-      case SimClustersEmbeddingId(theEmbeddingType, theModelVersion, InternalId.UserId(userId)) =>
-        val populatedKey: DescriptorP1L0.FullKey[Long] = keyDesc.withPkey(userId)
-        // returns result
-        val mhValue = Stitch.run(endPoint.get(populatedKey, valueDesc))
-        mhValue.map {
-          case Some(x) => Option(x.contents)
-          case _ => None
-        }
-      case _ => Future.None
-    }
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/TfgTopicEmbeddingsStore.docx b/src/scala/com/twitter/simclusters_v2/summingbird/stores/TfgTopicEmbeddingsStore.docx
new file mode 100644
index 000000000..b9bf2591c
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/stores/TfgTopicEmbeddingsStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/TfgTopicEmbeddingsStore.scala b/src/scala/com/twitter/simclusters_v2/summingbird/stores/TfgTopicEmbeddingsStore.scala
deleted file mode 100644
index 1332c573a..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/stores/TfgTopicEmbeddingsStore.scala
+++ /dev/null
@@ -1,46 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.stores
-
-import com.twitter.frigate.common.store.strato.StratoStore
-import com.twitter.simclusters_v2.common.ModelVersions
-import com.twitter.simclusters_v2.common.ModelVersions._
-import com.twitter.simclusters_v2.thriftscala.EmbeddingType
-import com.twitter.simclusters_v2.thriftscala.InternalId
-import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
-import com.twitter.simclusters_v2.thriftscala.TopicId
-import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
-import com.twitter.storehaus.ReadableStore
-import com.twitter.strato.client.Client
-import com.twitter.strato.thrift.ScroogeConvImplicits._
-import com.twitter.simclusters_v2.common.SimClustersEmbedding
-
-/**
- * TopicId -> List< cluster>
- */
-object TfgTopicEmbeddingsStore {
-
-  private val favBasedColumn20M145K2020 =
-    "recommendations/simclusters_v2/embeddings/favBasedTFGTopic20M145K2020"
-
-  private def getStore(
-    stratoClient: Client,
-    column: String
-  ): ReadableStore[SimClustersEmbeddingId, ThriftSimClustersEmbedding] = {
-    StratoStore
-      .withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding](stratoClient, column)
-  }
-
-  def getFavBasedLocaleEntityEmbedding2020Store(
-    stratoClient: Client,
-  ): ReadableStore[TopicId, SimClustersEmbedding] = {
-
-    getStore(stratoClient, favBasedColumn20M145K2020)
-      .composeKeyMapping[TopicId] { topicId =>
-        SimClustersEmbeddingId(
-          EmbeddingType.FavTfgTopic,
-          ModelVersions.Model20M145K2020,
-          InternalId.TopicId(topicId)
-        )
-      }
-      .mapValues(SimClustersEmbedding(_))
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/TopKClustersForEntityReadableStore.docx b/src/scala/com/twitter/simclusters_v2/summingbird/stores/TopKClustersForEntityReadableStore.docx
new file mode 100644
index 000000000..1accdcfa0
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/stores/TopKClustersForEntityReadableStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/TopKClustersForEntityReadableStore.scala b/src/scala/com/twitter/simclusters_v2/summingbird/stores/TopKClustersForEntityReadableStore.scala
deleted file mode 100644
index baa3fa2a1..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/stores/TopKClustersForEntityReadableStore.scala
+++ /dev/null
@@ -1,36 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.stores
-
-import com.twitter.simclusters_v2.summingbird.common.EntityUtil
-import com.twitter.simclusters_v2.thriftscala._
-import com.twitter.storehaus.ReadableStore
-import com.twitter.util.Future
-import com.twitter.util.Time
-
-case class TopKClustersForEntityReadableStore(
-  underlyingStore: ReadableStore[EntityWithVersion, TopKClustersWithScores])
-    extends ReadableStore[EntityWithVersion, TopKClustersWithScores] {
-
-  override def multiGet[K1 <: EntityWithVersion](
-    ks: Set[K1]
-  ): Map[K1, Future[Option[TopKClustersWithScores]]] = {
-    val nowInMs = Time.now.inMilliseconds
-    underlyingStore
-      .multiGet(ks)
-      .mapValues { resFuture =>
-        resFuture.map { resOpt =>
-          resOpt.map { clustersWithScores =>
-            clustersWithScores.copy(
-              topClustersByFavClusterNormalizedScore = EntityUtil.updateScoreWithLatestTimestamp(
-                clustersWithScores.topClustersByFavClusterNormalizedScore,
-                nowInMs
-              ),
-              topClustersByFollowClusterNormalizedScore = EntityUtil.updateScoreWithLatestTimestamp(
-                clustersWithScores.topClustersByFollowClusterNormalizedScore,
-                nowInMs
-              )
-            )
-          }
-        }
-      }
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/TopKClustersForTweetReadableStore.docx b/src/scala/com/twitter/simclusters_v2/summingbird/stores/TopKClustersForTweetReadableStore.docx
new file mode 100644
index 000000000..765e4be18
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/stores/TopKClustersForTweetReadableStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/TopKClustersForTweetReadableStore.scala b/src/scala/com/twitter/simclusters_v2/summingbird/stores/TopKClustersForTweetReadableStore.scala
deleted file mode 100644
index f2381a2a5..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/stores/TopKClustersForTweetReadableStore.scala
+++ /dev/null
@@ -1,176 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.stores
-
-import com.twitter.finagle.mtls.authentication.ServiceIdentifier
-import com.twitter.simclusters_v2.common.ModelVersions
-import com.twitter.simclusters_v2.summingbird.common.Implicits.batcher
-import com.twitter.simclusters_v2.summingbird.common.Implicits.topKClustersWithScoresCodec
-import com.twitter.simclusters_v2.summingbird.common.Implicits.topKClustersWithScoresMonoid
-import com.twitter.simclusters_v2.summingbird.common.SimClustersProfile.Environment
-import com.twitter.simclusters_v2.summingbird.common.ClientConfigs
-import com.twitter.simclusters_v2.summingbird.common.Configs
-import com.twitter.simclusters_v2.summingbird.common.Implicits
-import com.twitter.simclusters_v2.summingbird.common.SimClustersProfile
-import com.twitter.simclusters_v2.thriftscala._
-import com.twitter.storehaus.ReadableStore
-import com.twitter.storehaus.algebra.MergeableStore
-import com.twitter.storehaus_internal.memcache.Memcache
-import com.twitter.summingbird.batch.BatchID
-import com.twitter.summingbird.store.ClientStore
-import com.twitter.summingbird_internal.bijection.BatchPairImplicits
-import com.twitter.util.Duration
-import com.twitter.util.Future
-
-object TopKClustersForTweetReadableStore {
-
-  private[summingbird] final lazy val onlineMergeableStore: (
-    String,
-    ServiceIdentifier
-  ) => MergeableStore[(EntityWithVersion, BatchID), TopKClustersWithScores] = {
-    (storePath: String, serviceIdentifier: ServiceIdentifier) =>
-      Memcache.getMemcacheStore[(EntityWithVersion, BatchID), TopKClustersWithScores](
-        ClientConfigs.tweetTopKClustersMemcacheConfig(storePath, serviceIdentifier)
-      )(
-        BatchPairImplicits.keyInjection[EntityWithVersion](Implicits.topKClustersKeyCodec),
-        topKClustersWithScoresCodec,
-        topKClustersWithScoresMonoid
-      )
-  }
-
-  final lazy val defaultStore: (
-    String,
-    ServiceIdentifier
-  ) => ReadableStore[EntityWithVersion, TopKClustersWithScores] = {
-    (storePath: String, serviceIdentifier: ServiceIdentifier) =>
-      // note that DefaultTopKClustersForEntityReadableStore is reused here because they share the
-      // same structure
-      TopKClustersForEntityReadableStore(
-        ClientStore(this.onlineMergeableStore(storePath, serviceIdentifier), Configs.batchesToKeep))
-  }
-}
-
-case class TweetKey(
-  tweetId: Long,
-  modelVersion: String,
-  embeddingType: EmbeddingType = EmbeddingType.FavBasedTweet,
-  halfLife: Duration = Configs.HalfLife) {
-
-  lazy val modelVersionThrift: ModelVersion = ModelVersions.toModelVersion(modelVersion)
-
-  lazy val simClustersEmbeddingId: SimClustersEmbeddingId =
-    SimClustersEmbeddingId(embeddingType, modelVersionThrift, InternalId.TweetId(tweetId))
-}
-
-object TweetKey {
-
-  def apply(simClustersEmbeddingId: SimClustersEmbeddingId): TweetKey = {
-    simClustersEmbeddingId match {
-      case SimClustersEmbeddingId(embeddingType, modelVersion, InternalId.TweetId(tweetId)) =>
-        TweetKey(tweetId, ModelVersions.toKnownForModelVersion(modelVersion), embeddingType)
-      case id =>
-        throw new IllegalArgumentException(s"Invalid $id for TweetKey")
-    }
-  }
-
-}
-
-case class TopKClustersForTweetKeyReadableStore(
-  proxyMap: Map[(EmbeddingType, String), ReadableStore[EntityWithVersion, TopKClustersWithScores]],
-  halfLifeDuration: Duration,
-  topKClustersWithScoresToSeq: TopKClustersWithScores => Seq[(Int, Double)],
-  maxResult: Option[Int] = None)
-    extends ReadableStore[TweetKey, Seq[(Int, Double)]] {
-
-  private val modifiedProxyMap = proxyMap.map {
-    case ((embeddingType, modelVersion), proxy) =>
-      (embeddingType, modelVersion) -> proxy.composeKeyMapping { key: TweetKey =>
-        EntityWithVersion(
-          SimClusterEntity.TweetId(key.tweetId),
-          // Fast fail if the model version is invalid.
-          ModelVersions.toModelVersion(modelVersion))
-      }
-  }
-
-  override def multiGet[K1 <: TweetKey](
-    keys: Set[K1]
-  ): Map[K1, Future[Option[Seq[(Int, Double)]]]] = {
-    val (validKeys, invalidKeys) = keys.partition { tweetKey =>
-      proxyMap.contains((tweetKey.embeddingType, tweetKey.modelVersion)) &&
-      halfLifeDuration.inMilliseconds == Configs.HalfLifeInMs
-    }
-
-    val resultsFuture = validKeys.groupBy(key => (key.embeddingType, key.modelVersion)).flatMap {
-      case (typeModelTuple, subKeys) =>
-        modifiedProxyMap(typeModelTuple).multiGet(subKeys)
-    }
-
-    resultsFuture.mapValues { topKClustersWithScoresFut =>
-      for (topKClustersWithScoresOpt <- topKClustersWithScoresFut) yield {
-        for {
-          topKClustersWithScores <- topKClustersWithScoresOpt
-        } yield {
-          val results = topKClustersWithScoresToSeq(topKClustersWithScores)
-          maxResult match {
-            case Some(max) =>
-              results.take(max)
-            case None =>
-              results
-          }
-        }
-      }
-    } ++ invalidKeys.map { key => (key, Future.None) }.toMap
-  }
-}
-
-object TopKClustersForTweetKeyReadableStore {
-  // Use Prod cache by default
-  def defaultProxyMap(
-    serviceIdentifier: ServiceIdentifier
-  ): Map[(EmbeddingType, String), ReadableStore[EntityWithVersion, TopKClustersWithScores]] =
-    SimClustersProfile.tweetJobProfileMap(Environment.Prod).mapValues { profile =>
-      TopKClustersForTweetReadableStore
-        .defaultStore(profile.clusterTopKTweetsPath, serviceIdentifier)
-    }
-  val defaultHalfLife: Duration = Duration.fromMilliseconds(Configs.HalfLifeInMs)
-
-  def defaultStore(
-    serviceIdentifier: ServiceIdentifier
-  ): ReadableStore[TweetKey, Seq[(Int, Double)]] =
-    TopKClustersForTweetKeyReadableStore(
-      defaultProxyMap(serviceIdentifier),
-      defaultHalfLife,
-      getTopClustersWithScoresByFavClusterNormalizedScore
-    )
-
-  def overrideLimitDefaultStore(
-    maxResult: Int,
-    serviceIdentifier: ServiceIdentifier
-  ): ReadableStore[TweetKey, Seq[(Int, Double)]] = {
-    TopKClustersForTweetKeyReadableStore(
-      defaultProxyMap(serviceIdentifier),
-      defaultHalfLife,
-      getTopClustersWithScoresByFavClusterNormalizedScore,
-      Some(maxResult)
-    )
-  }
-
-  private def getTopClustersWithScoresByFavClusterNormalizedScore(
-    topKClustersWithScores: TopKClustersWithScores
-  ): Seq[(Int, Double)] = {
-    {
-      for {
-        clusterIdWIthScores <- topKClustersWithScores.topClustersByFavClusterNormalizedScore
-      } yield {
-        (
-          for {
-            (clusterId, scores) <- clusterIdWIthScores
-            favClusterNormalized8HrHalfLifeScore <- scores.favClusterNormalized8HrHalfLifeScore
-            if favClusterNormalized8HrHalfLifeScore.value > 0.0
-          } yield {
-            clusterId -> favClusterNormalized8HrHalfLifeScore.value
-          }
-        ).toSeq.sortBy(-_._2)
-      }
-    }.getOrElse(Nil)
-  }
-
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/TopKTweetsForClusterReadableStore.docx b/src/scala/com/twitter/simclusters_v2/summingbird/stores/TopKTweetsForClusterReadableStore.docx
new file mode 100644
index 000000000..420cccb92
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/stores/TopKTweetsForClusterReadableStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/TopKTweetsForClusterReadableStore.scala b/src/scala/com/twitter/simclusters_v2/summingbird/stores/TopKTweetsForClusterReadableStore.scala
deleted file mode 100644
index 39284424f..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/stores/TopKTweetsForClusterReadableStore.scala
+++ /dev/null
@@ -1,298 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.stores
-
-import com.twitter.bijection.Injection
-import com.twitter.bijection.scrooge.CompactScalaCodec
-import com.twitter.finagle.mtls.authentication.ServiceIdentifier
-import com.twitter.frigate.common.store.strato.StratoStore
-import com.twitter.relevance_platform.simclustersann.multicluster.ClusterTweetIndexStoreConfig
-import com.twitter.simclusters_v2.common.ClusterId
-import com.twitter.simclusters_v2.common.ModelVersions
-import com.twitter.simclusters_v2.common.TweetId
-import com.twitter.simclusters_v2.summingbird.common.ClientConfigs
-import com.twitter.simclusters_v2.summingbird.common.Configs
-import com.twitter.simclusters_v2.summingbird.common.EntityUtil
-import com.twitter.simclusters_v2.summingbird.common.Implicits
-import com.twitter.simclusters_v2.summingbird.common.Implicits.batcher
-import com.twitter.simclusters_v2.summingbird.common.Implicits.topKTweetsWithScoresCodec
-import com.twitter.simclusters_v2.summingbird.common.Implicits.topKTweetsWithScoresMonoid
-import com.twitter.simclusters_v2.summingbird.common.SimClustersProfile
-import com.twitter.simclusters_v2.summingbird.common.SimClustersProfile.Environment
-import com.twitter.simclusters_v2.thriftscala.EmbeddingType
-import com.twitter.simclusters_v2.thriftscala.FullClusterId
-import com.twitter.simclusters_v2.thriftscala.ModelVersion
-import com.twitter.simclusters_v2.thriftscala.MultiModelTopKTweetsWithScores
-import com.twitter.simclusters_v2.thriftscala.TopKTweetsWithScores
-import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
-import com.twitter.storehaus.ReadableStore
-import com.twitter.storehaus.Store
-import com.twitter.storehaus.algebra.MergeableStore
-import com.twitter.storehaus_internal.manhattan.ManhattanRO
-import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
-import com.twitter.storehaus_internal.memcache.Memcache
-import com.twitter.storehaus_internal.util.ApplicationID
-import com.twitter.storehaus_internal.util.DatasetName
-import com.twitter.storehaus_internal.util.HDFSPath
-import com.twitter.strato.client.Client
-import com.twitter.strato.thrift.ScroogeConvImplicits._
-import com.twitter.summingbird.batch.BatchID
-import com.twitter.summingbird.store.ClientStore
-import com.twitter.summingbird_internal.bijection.BatchPairImplicits
-import com.twitter.util.Duration
-import com.twitter.util.Future
-import com.twitter.util.Time
-
-/**
- * Comparing to underlyingStore, this store decays all the values to current timestamp
- */
-case class TopKTweetsForClusterReadableStore(
-  underlyingStore: ReadableStore[FullClusterId, TopKTweetsWithScores])
-    extends ReadableStore[FullClusterId, TopKTweetsWithScores] {
-
-  override def multiGet[K1 <: FullClusterId](
-    ks: Set[K1]
-  ): Map[K1, Future[Option[TopKTweetsWithScores]]] = {
-    val nowInMs = Time.now.inMilliseconds
-    underlyingStore
-      .multiGet(ks)
-      .mapValues { resFuture =>
-        resFuture.map { resOpt =>
-          resOpt.map { tweetsWithScores =>
-            tweetsWithScores.copy(
-              topTweetsByFavClusterNormalizedScore = EntityUtil.updateScoreWithLatestTimestamp(
-                tweetsWithScores.topTweetsByFavClusterNormalizedScore,
-                nowInMs),
-              topTweetsByFollowClusterNormalizedScore = EntityUtil.updateScoreWithLatestTimestamp(
-                tweetsWithScores.topTweetsByFollowClusterNormalizedScore,
-                nowInMs)
-            )
-          }
-        }
-      }
-  }
-}
-
-object TopKTweetsForClusterReadableStore {
-
-  private[summingbird] final lazy val onlineMergeableStore: (
-    String,
-    ServiceIdentifier
-  ) => MergeableStore[(FullClusterId, BatchID), TopKTweetsWithScores] = {
-    (storePath: String, serviceIdentifier: ServiceIdentifier) =>
-      Memcache.getMemcacheStore[(FullClusterId, BatchID), TopKTweetsWithScores](
-        ClientConfigs.clusterTopTweetsMemcacheConfig(storePath, serviceIdentifier)
-      )(
-        BatchPairImplicits.keyInjection[FullClusterId](Implicits.fullClusterIdCodec),
-        topKTweetsWithScoresCodec,
-        topKTweetsWithScoresMonoid
-      )
-  }
-
-  final lazy val defaultStore: (
-    String,
-    ServiceIdentifier
-  ) => ReadableStore[FullClusterId, TopKTweetsWithScores] = {
-    (storePath: String, serviceIdentifier: ServiceIdentifier) =>
-      TopKTweetsForClusterReadableStore(
-        ClientStore(
-          TopKTweetsForClusterReadableStore.onlineMergeableStore(storePath, serviceIdentifier),
-          Configs.batchesToKeep
-        ))
-  }
-}
-
-object MultiModelTopKTweetsForClusterReadableStore {
-
-  private[simclusters_v2] def MultiModelTopKTweetsForClusterReadableStore(
-    stratoClient: Client,
-    column: String
-  ): Store[Int, MultiModelTopKTweetsWithScores] = {
-    StratoStore
-      .withUnitView[Int, MultiModelTopKTweetsWithScores](stratoClient, column)
-  }
-}
-
-case class ClusterKey(
-  clusterId: ClusterId,
-  modelVersion: String,
-  embeddingType: EmbeddingType = EmbeddingType.FavBasedTweet,
-  halfLife: Duration = Configs.HalfLife) {
-  lazy val modelVersionThrift: ModelVersion = ModelVersions.toModelVersion(modelVersion)
-}
-
-case class TopKTweetsForClusterKeyReadableStore(
-  proxyMap: Map[(EmbeddingType, String), ReadableStore[FullClusterId, TopKTweetsWithScores]],
-  halfLife: Duration,
-  topKTweetsWithScoresToSeq: TopKTweetsWithScores => Seq[(Long, Double)],
-  maxResult: Option[Int] = None)
-    extends ReadableStore[ClusterKey, Seq[(Long, Double)]] {
-
-  private val modifiedProxyMap = proxyMap.map {
-    case (typeModelTuple, proxy) =>
-      typeModelTuple -> proxy.composeKeyMapping { key: ClusterKey =>
-        FullClusterId(ModelVersions.toModelVersion(typeModelTuple._2), key.clusterId)
-      }
-  }
-
-  override def multiGet[K1 <: ClusterKey](
-    keys: Set[K1]
-  ): Map[K1, Future[Option[Seq[(Long, Double)]]]] = {
-    val (validKeys, invalidKeys) = keys.partition { clusterKey =>
-      proxyMap.contains(
-        (clusterKey.embeddingType, clusterKey.modelVersion)) && clusterKey.halfLife == halfLife
-    }
-
-    val resultsFuture = validKeys.groupBy(key => (key.embeddingType, key.modelVersion)).flatMap {
-      case (typeModelTuple, subKeys) =>
-        modifiedProxyMap(typeModelTuple).multiGet(subKeys)
-    }
-
-    resultsFuture.mapValues { topKTweetsWithScoresFut =>
-      for (topKTweetsWithScoresOpt <- topKTweetsWithScoresFut) yield {
-        for {
-          topKTweetsWithScores <- topKTweetsWithScoresOpt
-        } yield {
-          val results = topKTweetsWithScoresToSeq(topKTweetsWithScores)
-          maxResult match {
-            case Some(max) =>
-              results.take(max)
-            case None =>
-              results
-          }
-        }
-      }
-    } ++ invalidKeys.map { key => (key, Future.None) }.toMap
-  }
-}
-
-object TopKTweetsForClusterKeyReadableStore {
-  implicit val fullClusterIdInjection: Injection[FullClusterId, Array[Byte]] =
-    CompactScalaCodec(FullClusterId)
-
-  // Use Prod cache by default
-  def defaultProxyMap(
-    serviceIdentifier: ServiceIdentifier,
-  ): Map[(EmbeddingType, String), ReadableStore[FullClusterId, TopKTweetsWithScores]] =
-    SimClustersProfile.tweetJobProfileMap(Environment.Prod).mapValues { profile =>
-      TopKTweetsForClusterReadableStore
-        .defaultStore(profile.clusterTopKTweetsPath, serviceIdentifier)
-    }
-  val defaultHalfLife: Duration = Configs.HalfLife
-
-  def defaultStore(
-    serviceIdentifier: ServiceIdentifier
-  ): ReadableStore[ClusterKey, Seq[(Long, Double)]] =
-    TopKTweetsForClusterKeyReadableStore(
-      defaultProxyMap(serviceIdentifier),
-      defaultHalfLife,
-      getTopTweetsWithScoresByFavClusterNormalizedScore
-    )
-
-  def storeUsingFollowClusterNormalizedScore(
-    serviceIdentifier: ServiceIdentifier
-  ): ReadableStore[ClusterKey, Seq[(Long, Double)]] =
-    TopKTweetsForClusterKeyReadableStore(
-      defaultProxyMap(serviceIdentifier),
-      defaultHalfLife,
-      getTopTweetsWithScoresByFollowClusterNormalizedScore
-    )
-
-  def overrideLimitDefaultStore(
-    maxResult: Int,
-    serviceIdentifier: ServiceIdentifier,
-  ): ReadableStore[ClusterKey, Seq[(Long, Double)]] = {
-    TopKTweetsForClusterKeyReadableStore(
-      defaultProxyMap(serviceIdentifier),
-      defaultHalfLife,
-      getTopTweetsWithScoresByFavClusterNormalizedScore,
-      Some(maxResult)
-    )
-  }
-
-  private def getTopTweetsWithScoresByFavClusterNormalizedScore(
-    topKTweets: TopKTweetsWithScores
-  ): Seq[(Long, Double)] = {
-    {
-      for {
-        tweetIdWithScores <- topKTweets.topTweetsByFavClusterNormalizedScore
-      } yield {
-        (
-          for {
-            (tweetId, scores) <- tweetIdWithScores
-            favClusterNormalized8HrHalfLifeScore <- scores.favClusterNormalized8HrHalfLifeScore
-            if favClusterNormalized8HrHalfLifeScore.value > 0.0
-          } yield {
-            tweetId -> favClusterNormalized8HrHalfLifeScore.value
-          }
-        ).toSeq.sortBy(-_._2)
-      }
-    }.getOrElse(Nil)
-  }
-
-  private def getTopTweetsWithScoresByFollowClusterNormalizedScore(
-    topKTweets: TopKTweetsWithScores
-  ): Seq[(Long, Double)] = {
-    {
-      for {
-        tweetIdWithScores <- topKTweets.topTweetsByFollowClusterNormalizedScore
-      } yield {
-        (
-          for {
-            (tweetId, scores) <- tweetIdWithScores
-            followClusterNormalized8HrHalfLifeScore <-
-              scores.followClusterNormalized8HrHalfLifeScore
-            if followClusterNormalized8HrHalfLifeScore.value > 0.0
-          } yield {
-            tweetId -> followClusterNormalized8HrHalfLifeScore.value
-          }
-        ).toSeq.sortBy(-_._2)
-      }
-    }.getOrElse(Nil)
-  }
-
-  def getClusterToTopKTweetsStoreFromManhattanRO(
-    maxResults: Int,
-    manhattanConfig: ClusterTweetIndexStoreConfig.Manhattan,
-    serviceIdentifier: ServiceIdentifier,
-  ): ReadableStore[ClusterKey, Seq[(TweetId, Double)]] = {
-    ManhattanRO
-      .getReadableStoreWithMtls[FullClusterId, TopKTweetsWithScores](
-        ManhattanROConfig(
-          HDFSPath(""),
-          ApplicationID(manhattanConfig.applicationID),
-          DatasetName(manhattanConfig.datasetName),
-          manhattanConfig.manhattanCluster
-        ),
-        ManhattanKVClientMtlsParams(serviceIdentifier)
-      ).composeKeyMapping[ClusterKey] { clusterKey =>
-        FullClusterId(
-          modelVersion = ModelVersions.toModelVersion(clusterKey.modelVersion),
-          clusterId = clusterKey.clusterId
-        )
-      }.mapValues { topKTweetsWithScores =>
-        // Only return maxResults tweets for each cluster Id
-        getTopTweetsWithScoresByFavClusterNormalizedScore(topKTweetsWithScores).take(maxResults)
-      }
-  }
-
-  def getClusterToTopKTweetsStoreFromMemCache(
-    maxResults: Int,
-    memCacheConfig: ClusterTweetIndexStoreConfig.Memcached,
-    serviceIdentifier: ServiceIdentifier,
-  ): ReadableStore[ClusterKey, Seq[(TweetId, Double)]] = {
-    TopKTweetsForClusterReadableStore(
-      ClientStore(
-        TopKTweetsForClusterReadableStore
-          .onlineMergeableStore(memCacheConfig.memcachedDest, serviceIdentifier),
-        Configs.batchesToKeep
-      ))
-      .composeKeyMapping[ClusterKey] { clusterKey =>
-        FullClusterId(
-          modelVersion = ModelVersions.toModelVersion(clusterKey.modelVersion),
-          clusterId = clusterKey.clusterId
-        )
-      }.mapValues { topKTweetsWithScores =>
-        // Only return maxResults tweets for each cluster Id
-        getTopTweetsWithScoresByFavClusterNormalizedScore(topKTweetsWithScores).take(maxResults)
-      }
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/TweetStatusCountsStore.docx b/src/scala/com/twitter/simclusters_v2/summingbird/stores/TweetStatusCountsStore.docx
new file mode 100644
index 000000000..33ed08de5
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/stores/TweetStatusCountsStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/TweetStatusCountsStore.scala b/src/scala/com/twitter/simclusters_v2/summingbird/stores/TweetStatusCountsStore.scala
deleted file mode 100644
index ce7ee2409..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/stores/TweetStatusCountsStore.scala
+++ /dev/null
@@ -1,29 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.stores
-
-import com.twitter.frigate.common.store.strato.StratoFetchableStore
-import com.twitter.simclusters_v2.common.TweetId
-import com.twitter.storehaus.ReadableStore
-import com.twitter.strato.client.Client
-import com.twitter.strato.thrift.ScroogeConvImplicits._
-import com.twitter.tweetypie.thriftscala.{GetTweetOptions, StatusCounts, Tweet}
-
-object TweetStatusCountsStore {
-
-  def tweetStatusCountsStore(
-    stratoClient: Client,
-    column: String
-  ): ReadableStore[TweetId, StatusCounts] = {
-    StratoFetchableStore
-      .withView[TweetId, GetTweetOptions, Tweet](stratoClient, column, getTweetOptions)
-      .mapValues(_.counts.getOrElse(emptyStatusCount))
-  }
-
-  private val emptyStatusCount = StatusCounts()
-
-  private val getTweetOptions =
-    GetTweetOptions(
-      includeRetweetCount = true,
-      includeReplyCount = true,
-      includeFavoriteCount = true,
-      includeQuoteCount = true)
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/UserInterestedInReadableStore.docx b/src/scala/com/twitter/simclusters_v2/summingbird/stores/UserInterestedInReadableStore.docx
new file mode 100644
index 000000000..2e56c7774
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/stores/UserInterestedInReadableStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/UserInterestedInReadableStore.scala b/src/scala/com/twitter/simclusters_v2/summingbird/stores/UserInterestedInReadableStore.scala
deleted file mode 100644
index e318c9185..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/stores/UserInterestedInReadableStore.scala
+++ /dev/null
@@ -1,263 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.stores
-
-import com.twitter.bijection.Injection
-import com.twitter.bijection.scrooge.CompactScalaCodec
-import com.twitter.simclusters_v2.common.ModelVersions
-import com.twitter.simclusters_v2.common.SimClustersEmbedding
-import com.twitter.simclusters_v2.common.UserId
-import com.twitter.simclusters_v2.thriftscala.ClustersUserIsInterestedIn
-import com.twitter.simclusters_v2.thriftscala.EmbeddingType
-import com.twitter.simclusters_v2.thriftscala.InternalId
-import com.twitter.simclusters_v2.thriftscala.ModelVersion
-import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
-import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
-import com.twitter.storehaus.ReadableStore
-import com.twitter.storehaus_internal.manhattan.ManhattanCluster
-import com.twitter.storehaus_internal.manhattan.Athena
-import com.twitter.storehaus_internal.manhattan.ManhattanRO
-import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
-import com.twitter.storehaus_internal.manhattan.Nash
-import com.twitter.storehaus_internal.util.ApplicationID
-import com.twitter.storehaus_internal.util.DatasetName
-import com.twitter.storehaus_internal.util.HDFSPath
-
-object UserInterestedInReadableStore {
-
-  // Clusters whose size is greater than this will not be considered. This is how the using UTEG
-  // experiment was run (because it could not process such clusters), and we don't have such a
-  // restriction for the Summingbird/Memcache implementation, but noticing that we aren't scoring
-  // tweets correctly in the big clusters. The fix for this seems a little involved, so for now
-  // let's just exclude such clusters.
-  val MaxClusterSizeForUserInterestedInDataset: Int = 5e6.toInt
-
-  val modelVersionToDatasetMap: Map[String, String] = Map(
-    ModelVersions.Model20M145KDec11 -> "simclusters_v2_interested_in",
-    ModelVersions.Model20M145KUpdated -> "simclusters_v2_interested_in_20m_145k_updated",
-    ModelVersions.Model20M145K2020 -> "simclusters_v2_interested_in_20m_145k_2020"
-  )
-
-  // Producer embedding based User InterestedIn.
-  val modelVersionToDenserDatasetMap: Map[String, String] = Map(
-    ModelVersions.Model20M145KUpdated -> "simclusters_v2_interested_in_from_producer_embeddings_model20m145kupdated"
-  )
-
-  val modelVersionToIIAPEDatasetMap: Map[String, String] = Map(
-    ModelVersions.Model20M145K2020 -> "simclusters_v2_interested_in_from_ape_20m145k2020"
-  )
-
-  val modelVersionToIIKFLiteDatasetMap: Map[String, String] = Map(
-    ModelVersions.Model20M145K2020 -> "simclusters_v2_interested_in_lite_20m_145k_2020"
-  )
-
-  val modelVersionToNextInterestedInDatasetMap: Map[String, String] = Map(
-    ModelVersions.Model20M145K2020 -> "bet_consumer_embedding_v2"
-  )
-
-  val defaultModelVersion: String = ModelVersions.Model20M145KUpdated
-  val knownModelVersions: String = modelVersionToDatasetMap.keys.mkString(",")
-
-  def defaultStoreWithMtls(
-    mhMtlsParams: ManhattanKVClientMtlsParams,
-    modelVersion: String = defaultModelVersion
-  ): ReadableStore[UserId, ClustersUserIsInterestedIn] = {
-    if (!modelVersionToDatasetMap.contains(modelVersion)) {
-      throw new IllegalArgumentException(
-        "Unknown model version: " + modelVersion + ". Known model versions: " + knownModelVersions)
-    }
-    this.getStore("simclusters_v2", mhMtlsParams, modelVersionToDatasetMap(modelVersion))
-  }
-
-  def defaultSimClustersEmbeddingStoreWithMtls(
-    mhMtlsParams: ManhattanKVClientMtlsParams,
-    embeddingType: EmbeddingType,
-    modelVersion: ModelVersion
-  ): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
-    defaultStoreWithMtls(mhMtlsParams, ModelVersions.toKnownForModelVersion(modelVersion))
-      .composeKeyMapping[SimClustersEmbeddingId] {
-        case SimClustersEmbeddingId(theEmbeddingType, theModelVersion, InternalId.UserId(userId))
-            if theEmbeddingType == embeddingType && theModelVersion == modelVersion =>
-          userId
-      }.mapValues(
-        toSimClustersEmbedding(_, embeddingType, Some(MaxClusterSizeForUserInterestedInDataset)))
-  }
-
-  def defaultIIKFLiteStoreWithMtls(
-    mhMtlsParams: ManhattanKVClientMtlsParams,
-    modelVersion: String = defaultModelVersion
-  ): ReadableStore[Long, ClustersUserIsInterestedIn] = {
-    if (!modelVersionToIIKFLiteDatasetMap.contains(modelVersion)) {
-      throw new IllegalArgumentException(
-        "Unknown model version: " + modelVersion + ". Known model versions: " + knownModelVersions)
-    }
-    getStore("simclusters_v2", mhMtlsParams, modelVersionToIIKFLiteDatasetMap(modelVersion))
-  }
-
-  def defaultIIPEStoreWithMtls(
-    mhMtlsParams: ManhattanKVClientMtlsParams,
-    modelVersion: String = defaultModelVersion
-  ): ReadableStore[Long, ClustersUserIsInterestedIn] = {
-    if (!modelVersionToDatasetMap.contains(modelVersion)) {
-      throw new IllegalArgumentException(
-        "Unknown model version: " + modelVersion + ". Known model versions: " + knownModelVersions)
-    }
-    getStore("simclusters_v2", mhMtlsParams, modelVersionToDenserDatasetMap(modelVersion))
-  }
-
-  def defaultIIAPEStoreWithMtls(
-    mhMtlsParams: ManhattanKVClientMtlsParams,
-    modelVersion: String = defaultModelVersion
-  ): ReadableStore[Long, ClustersUserIsInterestedIn] = {
-    if (!modelVersionToDatasetMap.contains(modelVersion)) {
-      throw new IllegalArgumentException(
-        "Unknown model version: " + modelVersion + ". Known model versions: " + knownModelVersions)
-    }
-    getStore("simclusters_v2", mhMtlsParams, modelVersionToIIAPEDatasetMap(modelVersion))
-  }
-
-  def defaultIIPESimClustersEmbeddingStoreWithMtls(
-    mhMtlsParams: ManhattanKVClientMtlsParams,
-    embeddingType: EmbeddingType,
-    modelVersion: ModelVersion
-  ): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
-    defaultIIPEStoreWithMtls(mhMtlsParams, ModelVersions.toKnownForModelVersion(modelVersion))
-      .composeKeyMapping[SimClustersEmbeddingId] {
-        case SimClustersEmbeddingId(theEmbeddingType, theModelVersion, InternalId.UserId(userId))
-            if theEmbeddingType == embeddingType && theModelVersion == modelVersion =>
-          userId
-
-      }.mapValues(toSimClustersEmbedding(_, embeddingType))
-  }
-
-  def defaultIIAPESimClustersEmbeddingStoreWithMtls(
-    mhMtlsParams: ManhattanKVClientMtlsParams,
-    embeddingType: EmbeddingType,
-    modelVersion: ModelVersion
-  ): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
-    defaultIIAPEStoreWithMtls(mhMtlsParams, ModelVersions.toKnownForModelVersion(modelVersion))
-      .composeKeyMapping[SimClustersEmbeddingId] {
-        case SimClustersEmbeddingId(theEmbeddingType, theModelVersion, InternalId.UserId(userId))
-            if theEmbeddingType == embeddingType && theModelVersion == modelVersion =>
-          userId
-      }.mapValues(toSimClustersEmbedding(_, embeddingType))
-  }
-
-  def defaultNextInterestedInStoreWithMtls(
-    mhMtlsParams: ManhattanKVClientMtlsParams,
-    embeddingType: EmbeddingType,
-    modelVersion: ModelVersion
-  ): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = {
-    if (!modelVersionToNextInterestedInDatasetMap.contains(
-        ModelVersions.toKnownForModelVersion(modelVersion))) {
-      throw new IllegalArgumentException(
-        "Unknown model version: " + modelVersion + ". Known model versions: " + knownModelVersions)
-    }
-    val datasetName = modelVersionToNextInterestedInDatasetMap(
-      ModelVersions.toKnownForModelVersion(modelVersion))
-    new SimClustersManhattanReadableStoreForReadWriteDataset(
-      appId = "kafka_beam_sink_bet_consumer_embedding_prod",
-      datasetName = datasetName,
-      label = datasetName,
-      mtlsParams = mhMtlsParams,
-      manhattanCluster = Nash
-    ).mapValues(toSimClustersEmbedding(_, embeddingType))
-  }
-
-  def getWithMtls(
-    appId: String,
-    mtlsParams: ManhattanKVClientMtlsParams,
-    modelVersion: String = defaultModelVersion
-  ): ReadableStore[Long, ClustersUserIsInterestedIn] = {
-    if (!modelVersionToDatasetMap.contains(modelVersion)) {
-      throw new IllegalArgumentException(
-        "Unknown model version: " + modelVersion + ". Known model versions: " + knownModelVersions)
-    }
-    this.getStore(appId, mtlsParams, modelVersionToDatasetMap(modelVersion))
-  }
-
-  /**
-   * @param appId      Manhattan AppId
-   * @param mtlsParams MltsParams for s2s Authentication
-   *
-   * @return ReadableStore of user to cluster interestedIn data set
-   */
-  def getStore(
-    appId: String,
-    mtlsParams: ManhattanKVClientMtlsParams,
-    datasetName: String,
-    manhattanCluster: ManhattanCluster = Athena
-  ): ReadableStore[Long, ClustersUserIsInterestedIn] = {
-
-    implicit val keyInjection: Injection[Long, Array[Byte]] = Injection.long2BigEndian
-    implicit val userInterestsCodec: Injection[ClustersUserIsInterestedIn, Array[Byte]] =
-      CompactScalaCodec(ClustersUserIsInterestedIn)
-
-    ManhattanRO.getReadableStoreWithMtls[Long, ClustersUserIsInterestedIn](
-      ManhattanROConfig(
-        HDFSPath(""), // not needed
-        ApplicationID(appId),
-        DatasetName(datasetName),
-        manhattanCluster
-      ),
-      mtlsParams
-    )
-  }
-
-  /**
-   *
-   * @param record ClustersUserIsInterestedIn thrift struct from the MH data set
-   * @param embeddingType Embedding Type as defined in com.twitter.simclusters_v2.thriftscala.EmbeddingType
-   * @param maxClusterSizeOpt Option param to set max cluster size.
-   *                          We will not filter out clusters based on cluster size if it is None
-   * @return
-   */
-  def toSimClustersEmbedding(
-    record: ClustersUserIsInterestedIn,
-    embeddingType: EmbeddingType,
-    maxClusterSizeOpt: Option[Int] = None
-  ): SimClustersEmbedding = {
-    val embedding = record.clusterIdToScores
-      .collect {
-        case (clusterId, clusterScores) if maxClusterSizeOpt.forall { maxClusterSize =>
-              clusterScores.numUsersInterestedInThisClusterUpperBound.exists(_ < maxClusterSize)
-            } =>
-          val score = embeddingType match {
-            case EmbeddingType.FavBasedUserInterestedIn =>
-              clusterScores.favScore
-            case EmbeddingType.FollowBasedUserInterestedIn =>
-              clusterScores.followScore
-            case EmbeddingType.LogFavBasedUserInterestedIn =>
-              clusterScores.logFavScore
-            case EmbeddingType.FavBasedUserInterestedInFromPE =>
-              clusterScores.favScore
-            case EmbeddingType.FollowBasedUserInterestedInFromPE =>
-              clusterScores.followScore
-            case EmbeddingType.LogFavBasedUserInterestedInFromPE =>
-              clusterScores.logFavScore
-            case EmbeddingType.LogFavBasedUserInterestedInFromAPE =>
-              clusterScores.logFavScore
-            case EmbeddingType.FollowBasedUserInterestedInFromAPE =>
-              clusterScores.followScore
-            case EmbeddingType.UserNextInterestedIn =>
-              clusterScores.logFavScore
-            case EmbeddingType.LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE =>
-              clusterScores.logFavScore
-            case EmbeddingType.LogFavBasedUserInterestedAverageAddressBookFromIIAPE =>
-              clusterScores.logFavScore
-            case EmbeddingType.LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE =>
-              clusterScores.logFavScore
-            case EmbeddingType.LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE =>
-              clusterScores.logFavScore
-            case EmbeddingType.LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE =>
-              clusterScores.logFavScore
-            case EmbeddingType.LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE =>
-              clusterScores.logFavScore
-
-            case _ =>
-              throw new IllegalArgumentException(s"unknown EmbeddingType: $embeddingType")
-          }
-          score.map(clusterId -> _)
-      }.flatten.toMap
-
-    SimClustersEmbedding(embedding)
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/UserKnownForReadableStore.docx b/src/scala/com/twitter/simclusters_v2/summingbird/stores/UserKnownForReadableStore.docx
new file mode 100644
index 000000000..832fa48d2
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/stores/UserKnownForReadableStore.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/stores/UserKnownForReadableStore.scala b/src/scala/com/twitter/simclusters_v2/summingbird/stores/UserKnownForReadableStore.scala
deleted file mode 100644
index 8655e605a..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/stores/UserKnownForReadableStore.scala
+++ /dev/null
@@ -1,75 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.stores
-
-import com.twitter.bijection.Injection
-import com.twitter.bijection.scrooge.CompactScalaCodec
-import com.twitter.simclusters_v2.thriftscala.{ClustersUserIsKnownFor, ModelVersion}
-import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
-import com.twitter.storehaus.ReadableStore
-import com.twitter.storehaus_internal.manhattan.{Athena, ManhattanRO, ManhattanROConfig}
-import com.twitter.storehaus_internal.util.{ApplicationID, DatasetName, HDFSPath}
-import com.twitter.util.Future
-
-object UserKnownForReadableStore {
-
-  private val dataSetNameDec11 = "simclusters_v2_known_for_20m_145k_dec11"
-  private val dataSetNameUpdated = "simclusters_v2_known_for_20m_145k_updated"
-  private val dataSetName2020 = "simclusters_v2_known_for_20m_145k_2020"
-
-  private def buildForModelVersion(
-    appId: String,
-    storeName: String,
-    mhMtlsParams: ManhattanKVClientMtlsParams
-  ): ReadableStore[Long, ClustersUserIsKnownFor] = {
-    implicit val keyInjection: Injection[Long, Array[Byte]] = Injection.long2BigEndian
-    implicit val knownForCodec: Injection[ClustersUserIsKnownFor, Array[Byte]] =
-      CompactScalaCodec(ClustersUserIsKnownFor)
-
-    ManhattanRO.getReadableStoreWithMtls[Long, ClustersUserIsKnownFor](
-      ManhattanROConfig(
-        HDFSPath(""), // not needed
-        ApplicationID(appId),
-        DatasetName(storeName),
-        Athena
-      ),
-      mhMtlsParams
-    )
-  }
-
-  def get(appId: String, mhMtlsParams: ManhattanKVClientMtlsParams): UserKnownForReadableStore = {
-    val dec11Store = buildForModelVersion(appId, dataSetNameDec11, mhMtlsParams)
-    val updatedStore = buildForModelVersion(appId, dataSetNameUpdated, mhMtlsParams)
-    val version2020Store = buildForModelVersion(appId, dataSetName2020, mhMtlsParams)
-
-    UserKnownForReadableStore(dec11Store, updatedStore, version2020Store)
-  }
-
-  def getDefaultStore(mhMtlsParams: ManhattanKVClientMtlsParams): UserKnownForReadableStore =
-    get("simclusters_v2", mhMtlsParams)
-
-}
-
-case class Query(userId: Long, modelVersion: ModelVersion = ModelVersion.Model20m145kUpdated)
-
-/**
- * Mainly used in debuggers to fetch the top knownFor clusters across different model versions
- */
-case class UserKnownForReadableStore(
-  knownForStoreDec11: ReadableStore[Long, ClustersUserIsKnownFor],
-  knownForStoreUpdated: ReadableStore[Long, ClustersUserIsKnownFor],
-  knownForStore2020: ReadableStore[Long, ClustersUserIsKnownFor])
-    extends ReadableStore[Query, ClustersUserIsKnownFor] {
-
-  override def get(query: Query): Future[Option[ClustersUserIsKnownFor]] = {
-    query.modelVersion match {
-      case ModelVersion.Model20m145kDec11 =>
-        knownForStoreDec11.get(query.userId)
-      case ModelVersion.Model20m145kUpdated =>
-        knownForStoreUpdated.get(query.userId)
-      case ModelVersion.Model20m145k2020 =>
-        knownForStore2020.get(query.userId)
-      case c =>
-        throw new IllegalArgumentException(
-          s"Never heard of $c before! Is this a new model version?")
-    }
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/storm/BUILD b/src/scala/com/twitter/simclusters_v2/summingbird/storm/BUILD
deleted file mode 100644
index 62f92f3e7..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/storm/BUILD
+++ /dev/null
@@ -1,27 +0,0 @@
-scala_library(
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "3rdparty/jvm/com/twitter/algebird:core",
-        "3rdparty/jvm/com/twitter/algebird:util",
-        "3rdparty/jvm/com/twitter/bijection:core",
-        "3rdparty/jvm/com/twitter/bijection:util",
-        "3rdparty/jvm/com/twitter/storehaus:core",
-        "3rdparty/jvm/com/twitter/storehaus:memcache",
-        "3rdparty/src/jvm/com/twitter/storehaus:memcache",
-        "hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
-        "src/scala/com/twitter/simclusters_v2/summingbird:common",
-        "src/scala/com/twitter/simclusters_v2/summingbird:stores",
-        "src/scala/com/twitter/storehaus_internal/memcache/config",
-        "src/scala/com/twitter/storehaus_internal/online",
-        "src/scala/com/twitter/summingbird_internal/runner/common",
-        "src/scala/com/twitter/summingbird_internal/runner/store_config",
-        "src/scala/com/twitter/summingbird_internal/runner/storm",
-        "src/scala/com/twitter/summingbird_internal/sources/common",
-        "src/scala/com/twitter/summingbird_internal/sources/common/remote:TweetEventSource",
-        "src/scala/com/twitter/summingbird_internal/sources/storm/remote:TweetEventSource",
-        "src/scala/com/twitter/tormenta_internal/spout/eventbus",
-        "src/scala/com/twitter/wtf/summingbird/sources/common",
-        "src/scala/com/twitter/wtf/summingbird/sources/storm",
-    ],
-)
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/storm/BUILD.docx b/src/scala/com/twitter/simclusters_v2/summingbird/storm/BUILD.docx
new file mode 100644
index 000000000..bd1bcc99d
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/storm/BUILD.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/storm/PersistentTweetJob.docx b/src/scala/com/twitter/simclusters_v2/summingbird/storm/PersistentTweetJob.docx
new file mode 100644
index 000000000..d14e1fcdf
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/storm/PersistentTweetJob.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/storm/PersistentTweetJob.scala b/src/scala/com/twitter/simclusters_v2/summingbird/storm/PersistentTweetJob.scala
deleted file mode 100644
index 1e0703647..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/storm/PersistentTweetJob.scala
+++ /dev/null
@@ -1,151 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.storm
-
-import com.twitter.simclusters_v2.common.TweetId
-import com.twitter.simclusters_v2.summingbird.common.Implicits
-import com.twitter.simclusters_v2.summingbird.common.Monoids.PersistentSimClustersEmbeddingLongestL2NormMonoid
-import com.twitter.simclusters_v2.summingbird.common.StatsUtil
-import com.twitter.simclusters_v2.summingbird.stores.PersistentTweetEmbeddingStore.{
-  LatestEmbeddingVersion,
-  LongestL2EmbeddingVersion,
-  PersistentTweetEmbeddingId
-}
-import com.twitter.simclusters_v2.thriftscala.{
-  PersistentSimClustersEmbedding,
-  SimClustersEmbedding,
-  SimClustersEmbeddingMetadata
-}
-import com.twitter.summingbird.option.JobId
-import com.twitter.summingbird.{Platform, Producer, TailProducer}
-import com.twitter.timelineservice.thriftscala.Event
-import com.twitter.tweetypie.thriftscala.StatusCounts
-
-/**
- * The job to save the qualified tweet SimClustersEmbedding into Strato Store(Back by Manhattan).
- *
- * The steps
- * 1. Read from Favorite Stream.
- * 2. Join with Tweet Status Count Service.
- * 3. Filter out the tweets whose favorite count < 8.
- *    We consider these tweets' SimClusters embedding is too noisy and untrustable.
- * 4. Update the SimClusters Tweet embedding with timestamp 0L.
- *    0L is reserved for the latest tweet embedding. It's also used to maintain the tweet count.
- * 5. If the SimClusters Tweet embedding's update count is 2 power N & N >= 3.
- *    Persistent the embeddings with the timestamp as part of the LK.
- **/
-private[storm] object PersistentTweetJob {
-  import StatsUtil._
-
-  private val MinFavoriteCount = 8
-  type Timestamp = Long
-
-  val longestL2NormMonoid = new PersistentSimClustersEmbeddingLongestL2NormMonoid()
-
-  def generate[P <: Platform[P]](
-    timelineEventSource: Producer[P, Event],
-    tweetStatusCountService: P#Service[TweetId, StatusCounts],
-    tweetEmbeddingService: P#Service[TweetId, SimClustersEmbedding],
-    persistentTweetEmbeddingStoreWithLatestAggregation: P#Store[
-      PersistentTweetEmbeddingId,
-      PersistentSimClustersEmbedding
-    ],
-    persistentTweetEmbeddingStoreWithLongestL2NormAggregation: P#Store[
-      PersistentTweetEmbeddingId,
-      PersistentSimClustersEmbedding
-    ]
-  )(
-    implicit jobId: JobId
-  ): TailProducer[P, Any] = {
-
-    val timelineEvents: Producer[P, (TweetId, Timestamp)] = timelineEventSource
-      .collect {
-        case Event.Favorite(favoriteEvent) =>
-          (favoriteEvent.tweetId, favoriteEvent.eventTimeMs)
-      }
-
-    val filteredEvents = timelineEvents
-      .leftJoin[StatusCounts](tweetStatusCountService)
-      .filter {
-        case (_, (_, Some(statusCounts))) =>
-          // Only consider tweets which has more than 8 favorite
-          statusCounts.favoriteCount.exists(_ >= MinFavoriteCount)
-        case _ =>
-          false
-      }
-      .leftJoin[SimClustersEmbedding](tweetEmbeddingService)
-
-    val latestAndPersistentEmbeddingProducer = filteredEvents
-      .collect {
-        case (tweetId, ((eventTimeMs, _), Some(tweetEmbedding))) =>
-          (
-            // This special timestamp is a reserved space for the latest tweet embedding.
-            PersistentTweetEmbeddingId(tweetId, timestampInMs = LatestEmbeddingVersion),
-            PersistentSimClustersEmbedding(
-              tweetEmbedding,
-              SimClustersEmbeddingMetadata(updatedAtMs = Some(eventTimeMs), updatedCount = Some(1))
-            ))
-      }
-      .observe("num_of_embedding_updates")
-      .sumByKey(persistentTweetEmbeddingStoreWithLatestAggregation)(
-        Implicits.persistentSimClustersEmbeddingMonoid)
-      .name("latest_embedding_producer")
-      .flatMap {
-        case (persistentTweetEmbeddingId, (maybeEmbedding, deltaEmbedding)) =>
-          lastQualifiedUpdatedCount(
-            maybeEmbedding.flatMap(_.metadata.updatedCount),
-            deltaEmbedding.metadata.updatedCount
-          ).map { newUpdateCount =>
-            (
-              persistentTweetEmbeddingId.copy(timestampInMs =
-                deltaEmbedding.metadata.updatedAtMs.getOrElse(0L)),
-              deltaEmbedding.copy(metadata =
-                deltaEmbedding.metadata.copy(updatedCount = Some(newUpdateCount)))
-            )
-          }
-      }
-      .observe("num_of_extra_embedding")
-      .sumByKey(persistentTweetEmbeddingStoreWithLatestAggregation)(
-        Implicits.persistentSimClustersEmbeddingMonoid)
-      .name("persistent_embeddings_producer")
-
-    val longestL2NormEmbeddingProducer = filteredEvents
-      .collect {
-        case (tweetId, ((eventTimeMs, Some(statusCounts)), Some(tweetEmbedding))) =>
-          (
-            // This special timestamp is a reserved space for the latest tweet embedding.
-            PersistentTweetEmbeddingId(tweetId, timestampInMs = LongestL2EmbeddingVersion),
-            PersistentSimClustersEmbedding(
-              tweetEmbedding,
-              SimClustersEmbeddingMetadata(
-                updatedAtMs = Some(eventTimeMs),
-                // We're not aggregating the existing embedding, we're replacing it. The count
-                // therefore needs to be the absolute fav count for this tweet, not the delta.
-                updatedCount = statusCounts.favoriteCount.map(_ + 1)
-              )
-            ))
-      }
-      .observe("num_longest_l2_norm_updates")
-      .sumByKey(persistentTweetEmbeddingStoreWithLongestL2NormAggregation)(longestL2NormMonoid)
-      .name("longest_l2_norm_embedding_producer")
-
-    latestAndPersistentEmbeddingProducer.also(longestL2NormEmbeddingProducer)
-  }
-
-  /*
-    If this change in counts crosses one or more powers of 2 (8,16,32...), return the last boundary
-    that was crossed. In the case where a count delta is large, it may skip a power of 2, and
-    thus we may not store embeddings for all 2^(i+3) where 0 <= i <= tweetFavCount.
-   */
-  private def lastQualifiedUpdatedCount(
-    existingUpdateCount: Option[Long],
-    deltaUpdateCount: Option[Long]
-  ): Option[Int] = {
-    val existing = existingUpdateCount.getOrElse(0L)
-    val sum = existing + deltaUpdateCount.getOrElse(0L)
-    qualifiedSet.filter { i => (existing < i) && (i <= sum) }.lastOption
-  }
-
-  // Only 2 Power n while n >= 3 is qualified for Persistent. The max = 16,777,216
-  private lazy val qualifiedSet = 3
-    .until(25).map { i => Math.pow(2, i).toInt }.toSet
-
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/storm/PersistentTweetJobRunner.docx b/src/scala/com/twitter/simclusters_v2/summingbird/storm/PersistentTweetJobRunner.docx
new file mode 100644
index 000000000..71eadd616
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/storm/PersistentTweetJobRunner.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/storm/PersistentTweetJobRunner.scala b/src/scala/com/twitter/simclusters_v2/summingbird/storm/PersistentTweetJobRunner.scala
deleted file mode 100644
index b7960d846..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/storm/PersistentTweetJobRunner.scala
+++ /dev/null
@@ -1,227 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.storm
-
-import com.twitter.conversions.DurationOps._
-import com.twitter.finagle.stats.NullStatsReceiver
-import com.twitter.hermit.store.common.ObservedCachedReadableStore
-import com.twitter.scalding.Args
-import com.twitter.simclusters_v2.common.SimClustersEmbedding
-import com.twitter.simclusters_v2.common.TweetId
-import com.twitter.simclusters_v2.summingbird.common.Monoids.PersistentSimClustersEmbeddingLongestL2NormMonoid
-import com.twitter.simclusters_v2.summingbird.common.SimClustersProfile.AltSetting
-import com.twitter.simclusters_v2.summingbird.common.SimClustersProfile.Environment
-import com.twitter.simclusters_v2.summingbird.common.ClientConfigs
-import com.twitter.simclusters_v2.summingbird.common.Implicits
-import com.twitter.simclusters_v2.summingbird.common.SimClustersProfile
-import com.twitter.simclusters_v2.summingbird.stores.PersistentTweetEmbeddingStore.PersistentTweetEmbeddingId
-import com.twitter.simclusters_v2.summingbird.stores.PersistentTweetEmbeddingStore
-import com.twitter.simclusters_v2.summingbird.stores.TopKClustersForTweetKeyReadableStore
-import com.twitter.simclusters_v2.summingbird.stores.TweetKey
-import com.twitter.simclusters_v2.summingbird.stores.TweetStatusCountsStore
-import com.twitter.simclusters_v2.thriftscala.PersistentSimClustersEmbedding
-import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
-import com.twitter.storehaus.FutureCollector
-import com.twitter.summingbird.online.option._
-import com.twitter.summingbird.option._
-import com.twitter.summingbird.storm.Storm
-import com.twitter.summingbird.Options
-import com.twitter.summingbird.TailProducer
-import com.twitter.summingbird_internal.runner.common.JobName
-import com.twitter.summingbird_internal.runner.common.SBRunConfig
-import com.twitter.summingbird_internal.runner.storm.GenericRunner
-import com.twitter.summingbird_internal.runner.storm.StormConfig
-import com.twitter.tormenta_internal.spout.eventbus.SubscriberId
-import com.twitter.tweetypie.thriftscala.StatusCounts
-import com.twitter.wtf.summingbird.sources.storm.TimelineEventSource
-import java.lang
-import java.util.{HashMap => JMap}
-import org.apache.heron.api.{Config => HeronConfig}
-import org.apache.storm.{Config => BTConfig}
-
-object PersistentTweetJobRunner {
-  def main(args: Array[String]): Unit = {
-    GenericRunner(args, PersistentTweetStormJob(_))
-  }
-}
-
-object PersistentTweetStormJob {
-
-  import com.twitter.simclusters_v2.summingbird.common.Implicits._
-
-  def jLong(num: Long): lang.Long = java.lang.Long.valueOf(num)
-  def jInt(num: Int): Integer = java.lang.Integer.valueOf(num)
-  def jFloat(num: Float): lang.Float = java.lang.Float.valueOf(num)
-
-  def apply(args: Args): StormConfig = {
-
-    lazy val env: String = args.getOrElse("env", "prod")
-    lazy val zone: String = args.getOrElse("dc", "atla")
-    lazy val alt: String = args.getOrElse("alt", default = "normal")
-
-    lazy val profile =
-      SimClustersProfile.fetchPersistentJobProfile(Environment(env), AltSetting(alt))
-
-    lazy val stratoClient = ClientConfigs.stratoClient(profile.serviceIdentifier(zone))
-
-    lazy val favoriteEventSource = TimelineEventSource(
-      // Note: do not share the same subsriberId with other jobs. Apply a new one if needed
-      SubscriberId(profile.timelineEventSourceSubscriberId)
-    ).kafkaSource
-
-    lazy val persistentTweetEmbeddingStore =
-      PersistentTweetEmbeddingStore
-        .persistentTweetEmbeddingStore(stratoClient, profile.persistentTweetStratoPath)
-
-    lazy val persistentTweetEmbeddingStoreWithLatestAggregation: Storm#Store[
-      PersistentTweetEmbeddingId,
-      PersistentSimClustersEmbedding
-    ] = {
-      import com.twitter.storehaus.algebra.StoreAlgebra._
-
-      lazy val mergeableStore =
-        persistentTweetEmbeddingStore.toMergeable(
-          mon = Implicits.persistentSimClustersEmbeddingMonoid,
-          fc = implicitly[FutureCollector])
-
-      Storm.onlineOnlyStore(mergeableStore)
-    }
-
-    lazy val persistentTweetEmbeddingStoreWithLongestL2NormAggregation: Storm#Store[
-      PersistentTweetEmbeddingId,
-      PersistentSimClustersEmbedding
-    ] = {
-      import com.twitter.storehaus.algebra.StoreAlgebra._
-
-      val longestL2NormMonoid = new PersistentSimClustersEmbeddingLongestL2NormMonoid()
-      lazy val mergeableStore =
-        persistentTweetEmbeddingStore.toMergeable(
-          mon = longestL2NormMonoid,
-          fc = implicitly[FutureCollector])
-
-      Storm.onlineOnlyStore(mergeableStore)
-    }
-
-    lazy val tweetStatusCountsService: Storm#Service[TweetId, StatusCounts] =
-      Storm.service(
-        ObservedCachedReadableStore.from[TweetId, StatusCounts](
-          TweetStatusCountsStore.tweetStatusCountsStore(stratoClient, "tweetypie/core.Tweet"),
-          ttl = 1.minute,
-          maxKeys = 10000, // 10K is enough for Heron Job.
-          cacheName = "tweet_status_count",
-          windowSize = 10000L
-        )(NullStatsReceiver)
-      )
-
-    lazy val tweetEmbeddingService: Storm#Service[TweetId, ThriftSimClustersEmbedding] =
-      Storm.service(
-        TopKClustersForTweetKeyReadableStore
-          .overrideLimitDefaultStore(50, profile.serviceIdentifier(zone))
-          .composeKeyMapping { tweetId: TweetId =>
-            TweetKey(tweetId, profile.modelVersionStr, profile.coreEmbeddingType)
-          }.mapValues { value => SimClustersEmbedding(value).toThrift })
-
-    new StormConfig {
-
-      val jobName: JobName = JobName(profile.jobName)
-
-      implicit val jobID: JobId = JobId(jobName.toString)
-
-      /**
-       * Add registrars for chill serialization for user-defined types.
-       */
-      override def registrars =
-        List(
-          SBRunConfig.register[StatusCounts],
-          SBRunConfig.register[ThriftSimClustersEmbedding],
-          SBRunConfig.register[PersistentSimClustersEmbedding]
-        )
-
-      /***** Job configuration settings *****/
-      /**
-       * Use vmSettings to configure the VM
-       */
-      override def vmSettings: Seq[String] = Seq()
-
-      private val SourcePerWorker = 1
-      private val FlatMapPerWorker = 1
-      private val SummerPerWorker = 1
-
-      private val TotalWorker = 60
-
-      /**
-       * Use transformConfig to set Heron options.
-       */
-      override def transformConfig(config: Map[String, AnyRef]): Map[String, AnyRef] = {
-
-        val heronJvmOptions = new JMap[String, AnyRef]()
-
-        val MetaspaceSize = jLong(256L * 1024 * 1024)
-        val DefaultHeapSize = jLong(2L * 1024 * 1024 * 1024)
-        val HighHeapSize = jLong(4L * 1024 * 1024 * 1024)
-
-        val TotalCPU = jLong(
-          SourcePerWorker * 1 + FlatMapPerWorker * 4 + SummerPerWorker * 3 + 1
-        )
-
-        // reserve 4GB for the StreamMgr
-        val TotalRam = jLong(
-          DefaultHeapSize * (SourcePerWorker * 1 + FlatMapPerWorker * 4)
-            + HighHeapSize * SummerPerWorker * 3
-            + MetaspaceSize * 8 // Applies to all workers
-            + 4L * 1024 * 1024 * 1024)
-
-        // These settings help prevent GC issues in the most memory intensive steps of the job by
-        // dedicating more memory to the new gen heap designated by the -Xmn flag.
-        Map(
-          "Tail" -> HighHeapSize
-        ).foreach {
-          case (stage, heap) =>
-            HeronConfig.setComponentJvmOptions(
-              heronJvmOptions,
-              stage,
-              s"-Xmx$heap -Xms$heap -Xmn${heap / 2}"
-            )
-        }
-
-        super.transformConfig(config) ++ List(
-          BTConfig.TOPOLOGY_TEAM_NAME -> "cassowary",
-          BTConfig.TOPOLOGY_TEAM_EMAIL -> "no-reply@twitter.com",
-          BTConfig.TOPOLOGY_WORKERS -> jInt(TotalWorker),
-          BTConfig.TOPOLOGY_ACKER_EXECUTORS -> jInt(0),
-          BTConfig.TOPOLOGY_MESSAGE_TIMEOUT_SECS -> jInt(30),
-          BTConfig.TOPOLOGY_WORKER_CHILDOPTS -> List(
-            "-Djava.security.auth.login.config=config/jaas.conf",
-            "-Dsun.security.krb5.debug=true",
-            "-Dcom.twitter.eventbus.client.EnableKafkaSaslTls=true",
-            "-Dcom.twitter.eventbus.client.zoneName=" + zone,
-            s"-XX:MaxMetaspaceSize=$MetaspaceSize"
-          ).mkString(" "),
-          HeronConfig.TOPOLOGY_CONTAINER_CPU_REQUESTED -> TotalCPU,
-          HeronConfig.TOPOLOGY_CONTAINER_RAM_REQUESTED -> TotalRam,
-          "storm.job.uniqueId" -> jobID.get
-        )
-      }
-
-      /**
-       * Use getNamedOptions to set Summingbird runtime options
-       * The list of available options: com.twitter.summingbird.online.option
-       */
-      override def getNamedOptions: Map[String, Options] = Map(
-        "DEFAULT" -> Options()
-          .set(SummerParallelism(TotalWorker * SummerPerWorker))
-          .set(FlatMapParallelism(TotalWorker * FlatMapPerWorker))
-          .set(SourceParallelism(TotalWorker * SourcePerWorker))
-          .set(CacheSize(10000))
-          .set(FlushFrequency(30.seconds))
-      )
-
-      /** Required job generation call for your job, defined in Job.scala */
-      override def graph: TailProducer[Storm, Any] = PersistentTweetJob.generate[Storm](
-        favoriteEventSource,
-        tweetStatusCountsService,
-        tweetEmbeddingService,
-        persistentTweetEmbeddingStoreWithLatestAggregation,
-        persistentTweetEmbeddingStoreWithLongestL2NormAggregation
-      )
-    }
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/storm/TweetJob.docx b/src/scala/com/twitter/simclusters_v2/summingbird/storm/TweetJob.docx
new file mode 100644
index 000000000..5d59d8998
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/storm/TweetJob.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/storm/TweetJob.scala b/src/scala/com/twitter/simclusters_v2/summingbird/storm/TweetJob.scala
deleted file mode 100644
index 54ac8011a..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/storm/TweetJob.scala
+++ /dev/null
@@ -1,232 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.storm
-
-import com.twitter.simclusters_v2.common.ModelVersions._
-import com.twitter.simclusters_v2.summingbird.common.SimClustersProfile.SimClustersTweetProfile
-import com.twitter.simclusters_v2.summingbird.common.Configs
-import com.twitter.simclusters_v2.summingbird.common.Implicits
-import com.twitter.simclusters_v2.summingbird.common.SimClustersHashUtil
-import com.twitter.simclusters_v2.summingbird.common.SimClustersInterestedInUtil
-import com.twitter.simclusters_v2.summingbird.common.StatsUtil
-import com.twitter.simclusters_v2.thriftscala._
-import com.twitter.snowflake.id.SnowflakeId
-import com.twitter.summingbird._
-import com.twitter.summingbird.option.JobId
-import com.twitter.timelineservice.thriftscala.Event
-import com.twitter.conversions.DurationOps._
-import com.twitter.timelineservice.thriftscala.EventAliases.FavoriteAlias
-
-object TweetJob {
-
-  import Implicits._
-  import StatsUtil._
-
-  object NodeName {
-    final val TweetClusterScoreFlatMapNodeName: String = "TweetClusterScoreFlatMap"
-    final val TweetClusterUpdatedScoresFlatMapNodeName: String = "TweetClusterUpdatedScoreFlatMap"
-    final val TweetClusterScoreSummerNodeName: String = "TweetClusterScoreSummer"
-    final val TweetTopKNodeName: String = "TweetTopKSummer"
-    final val ClusterTopKTweetsNodeName: String = "ClusterTopKTweetsSummer"
-    final val ClusterTopKTweetsLightNodeName: String = "ClusterTopKTweetsLightSummer"
-  }
-
-  def generate[P <: Platform[P]](
-    profile: SimClustersTweetProfile,
-    timelineEventSource: Producer[P, Event],
-    userInterestedInService: P#Service[Long, ClustersUserIsInterestedIn],
-    tweetClusterScoreStore: P#Store[(SimClusterEntity, FullClusterIdBucket), ClustersWithScores],
-    tweetTopKClustersStore: P#Store[EntityWithVersion, TopKClustersWithScores],
-    clusterTopKTweetsStore: P#Store[FullClusterId, TopKTweetsWithScores],
-    clusterTopKTweetsLightStore: Option[P#Store[FullClusterId, TopKTweetsWithScores]]
-  )(
-    implicit jobId: JobId
-  ): TailProducer[P, Any] = {
-
-    val userInterestNonEmptyCount = Counter(Group(jobId.get), Name("num_user_interests_non_empty"))
-    val userInterestEmptyCount = Counter(Group(jobId.get), Name("num_user_interests_empty"))
-
-    val numClustersCount = Counter(Group(jobId.get), Name("num_clusters"))
-
-    val entityClusterPairCount = Counter(Group(jobId.get), Name("num_entity_cluster_pairs_emitted"))
-
-    // Fav QPS is around 6K
-    val qualifiedFavEvents = timelineEventSource
-      .collect {
-        case Event.Favorite(favEvent)
-            if favEvent.userId != favEvent.tweetUserId && !isTweetTooOld(favEvent) =>
-          (favEvent.userId, favEvent)
-      }
-      .observe("num_qualified_favorite_events")
-
-    val entityWithSimClustersProducer = qualifiedFavEvents
-      .leftJoin(userInterestedInService)
-      .map {
-        case (_, (favEvent, userInterestOpt)) =>
-          (favEvent.tweetId, (favEvent, userInterestOpt))
-      }
-      .flatMap {
-        case (_, (favEvent, Some(userInterests))) =>
-          userInterestNonEmptyCount.incr()
-
-          val timestamp = favEvent.eventTimeMs
-
-          val clustersWithScores = SimClustersInterestedInUtil.topClustersWithScores(userInterests)
-
-          // clusters.size is around 25 in average
-          numClustersCount.incrBy(clustersWithScores.size)
-
-          val simClusterScoresByHashBucket = clustersWithScores.groupBy {
-            case (clusterId, _) => SimClustersHashUtil.clusterIdToBucket(clusterId)
-          }
-
-          for {
-            (hashBucket, scores) <- simClusterScoresByHashBucket
-          } yield {
-            entityClusterPairCount.incr()
-
-            val clusterBucket = FullClusterIdBucket(userInterests.knownForModelVersion, hashBucket)
-
-            val tweetId: SimClusterEntity = SimClusterEntity.TweetId(favEvent.tweetId)
-
-            (tweetId, clusterBucket) -> SimClustersInterestedInUtil
-              .buildClusterWithScores(
-                scores,
-                timestamp,
-                profile.favScoreThresholdForUserInterest
-              )
-          }
-        case _ =>
-          userInterestEmptyCount.incr()
-          None
-      }
-      .observe("entity_cluster_delta_scores")
-      .name(NodeName.TweetClusterScoreFlatMapNodeName)
-      .sumByKey(tweetClusterScoreStore)(clustersWithScoreMonoid)
-      .name(NodeName.TweetClusterScoreSummerNodeName)
-      .map {
-        case ((simClusterEntity, clusterBucket), (oldValueOpt, deltaValue)) =>
-          val updatedClusterIds = deltaValue.clustersToScore.map(_.keySet).getOrElse(Set.empty[Int])
-
-          (simClusterEntity, clusterBucket) -> clustersWithScoreMonoid.plus(
-            oldValueOpt
-              .map { oldValue =>
-                oldValue.copy(
-                  clustersToScore =
-                    oldValue.clustersToScore.map(_.filterKeys(updatedClusterIds.contains))
-                )
-              }.getOrElse(clustersWithScoreMonoid.zero),
-            deltaValue
-          )
-      }
-      .observe("entity_cluster_updated_scores")
-      .name(NodeName.TweetClusterUpdatedScoresFlatMapNodeName)
-
-    val tweetTopK = entityWithSimClustersProducer
-      .flatMap {
-        case ((simClusterEntity, FullClusterIdBucket(modelVersion, _)), clusterWithScores)
-            if simClusterEntity.isInstanceOf[SimClusterEntity.TweetId] =>
-          clusterWithScores.clustersToScore
-            .map { clustersToScores =>
-              val topClustersWithFavScores = clustersToScores.mapValues { scores: Scores =>
-                Scores(
-                  favClusterNormalized8HrHalfLifeScore =
-                    scores.favClusterNormalized8HrHalfLifeScore.filter(
-                      _.value >= Configs.scoreThresholdForTweetTopKClustersCache
-                    )
-                )
-              }
-
-              (
-                EntityWithVersion(simClusterEntity, modelVersion),
-                TopKClustersWithScores(Some(topClustersWithFavScores), None)
-              )
-            }
-        case _ =>
-          None
-
-      }
-      .observe("tweet_topk_updates")
-      .sumByKey(tweetTopKClustersStore)(topKClustersWithScoresMonoid)
-      .name(NodeName.TweetTopKNodeName)
-
-    val clusterTopKTweets = entityWithSimClustersProducer
-      .flatMap {
-        case ((simClusterEntity, FullClusterIdBucket(modelVersion, _)), clusterWithScores) =>
-          simClusterEntity match {
-            case SimClusterEntity.TweetId(tweetId) =>
-              clusterWithScores.clustersToScore
-                .map { clustersToScores =>
-                  clustersToScores.toSeq.map {
-                    case (clusterId, scores) =>
-                      val topTweetsByFavScore = Map(
-                        tweetId -> Scores(favClusterNormalized8HrHalfLifeScore =
-                          scores.favClusterNormalized8HrHalfLifeScore.filter(_.value >=
-                            Configs.scoreThresholdForClusterTopKTweetsCache)))
-
-                      (
-                        FullClusterId(modelVersion, clusterId),
-                        TopKTweetsWithScores(Some(topTweetsByFavScore), None)
-                      )
-                  }
-                }.getOrElse(Nil)
-            case _ =>
-              Nil
-          }
-      }
-      .observe("cluster_topk_tweets_updates")
-      .sumByKey(clusterTopKTweetsStore)(topKTweetsWithScoresMonoid)
-      .name(NodeName.ClusterTopKTweetsNodeName)
-
-    val clusterTopKTweetsLight = clusterTopKTweetsLightStore.map { lightStore =>
-      entityWithSimClustersProducer
-        .flatMap {
-          case ((simClusterEntity, FullClusterIdBucket(modelVersion, _)), clusterWithScores) =>
-            simClusterEntity match {
-              case SimClusterEntity.TweetId(tweetId) if isTweetTooOldForLight(tweetId) =>
-                clusterWithScores.clustersToScore
-                  .map { clustersToScores =>
-                    clustersToScores.toSeq.map {
-                      case (clusterId, scores) =>
-                        val topTweetsByFavScore = Map(
-                          tweetId -> Scores(favClusterNormalized8HrHalfLifeScore =
-                            scores.favClusterNormalized8HrHalfLifeScore.filter(_.value >=
-                              Configs.scoreThresholdForClusterTopKTweetsCache)))
-
-                        (
-                          FullClusterId(modelVersion, clusterId),
-                          TopKTweetsWithScores(Some(topTweetsByFavScore), None)
-                        )
-                    }
-                  }.getOrElse(Nil)
-              case _ =>
-                Nil
-            }
-        }
-        .observe("cluster_topk_tweets_updates")
-        .sumByKey(lightStore)(topKTweetsWithScoresLightMonoid)
-        .name(NodeName.ClusterTopKTweetsLightNodeName)
-    }
-
-    clusterTopKTweetsLight match {
-      case Some(lightNode) =>
-        tweetTopK.also(clusterTopKTweets).also(lightNode)
-      case None =>
-        tweetTopK.also(clusterTopKTweets)
-    }
-  }
-
-  // Boolean check to see if the tweet is too old
-  private def isTweetTooOld(favEvent: FavoriteAlias): Boolean = {
-    favEvent.tweet.forall { tweet =>
-      SnowflakeId.unixTimeMillisOptFromId(tweet.id).exists { millis =>
-        System.currentTimeMillis() - millis >= Configs.OldestTweetFavEventTimeInMillis
-      }
-    }
-  }
-
-  private def isTweetTooOldForLight(tweetId: Long): Boolean = {
-    SnowflakeId.unixTimeMillisOptFromId(tweetId).exists { millis =>
-      System.currentTimeMillis() - millis >= Configs.OldestTweetInLightIndexInMillis
-    }
-  }
-
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/storm/TweetJobRunner.docx b/src/scala/com/twitter/simclusters_v2/summingbird/storm/TweetJobRunner.docx
new file mode 100644
index 000000000..a38303ca9
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/storm/TweetJobRunner.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/storm/TweetJobRunner.scala b/src/scala/com/twitter/simclusters_v2/summingbird/storm/TweetJobRunner.scala
deleted file mode 100644
index 11a94a47b..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/storm/TweetJobRunner.scala
+++ /dev/null
@@ -1,242 +0,0 @@
-package com.twitter.simclusters_v2.summingbird.storm
-
-import com.twitter.conversions.DurationOps._
-import com.twitter.heron.util.CommonMetric
-import com.twitter.scalding.Args
-import com.twitter.simclusters_v2.summingbird.common.SimClustersProfile
-import com.twitter.simclusters_v2.summingbird.common.SimClustersProfile.AltSetting
-import com.twitter.simclusters_v2.summingbird.common.SimClustersProfile.Environment
-import com.twitter.simclusters_v2.summingbird.stores.EntityClusterScoreReadableStore
-import com.twitter.simclusters_v2.summingbird.stores.TopKClustersForTweetReadableStore
-import com.twitter.simclusters_v2.summingbird.stores.TopKTweetsForClusterReadableStore
-import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore
-import com.twitter.simclusters_v2.thriftscala._
-import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
-import com.twitter.summingbird.online.option._
-import com.twitter.summingbird.option._
-import com.twitter.summingbird.storm.option.FlatMapStormMetrics
-import com.twitter.summingbird.storm.option.SummerStormMetrics
-import com.twitter.summingbird.storm.Storm
-import com.twitter.summingbird.storm.StormMetric
-import com.twitter.summingbird.Options
-import com.twitter.summingbird.TailProducer
-import com.twitter.summingbird_internal.runner.common.JobName
-import com.twitter.summingbird_internal.runner.common.SBRunConfig
-import com.twitter.summingbird_internal.runner.storm.GenericRunner
-import com.twitter.summingbird_internal.runner.storm.StormConfig
-import com.twitter.tormenta_internal.spout.eventbus.SubscriberId
-import com.twitter.wtf.summingbird.sources.storm.TimelineEventSource
-import java.lang
-import org.apache.heron.api.{Config => HeronConfig}
-import org.apache.heron.common.basics.ByteAmount
-import org.apache.storm.{Config => BTConfig}
-import scala.collection.JavaConverters._
-
-object TweetJobRunner {
-  def main(args: Array[String]): Unit = {
-    GenericRunner(args, TweetStormJob(_))
-  }
-}
-
-object TweetStormJob {
-
-  import com.twitter.simclusters_v2.summingbird.common.Implicits._
-
-  def jLong(num: Long): lang.Long = java.lang.Long.valueOf(num)
-  def jInt(num: Int): Integer = java.lang.Integer.valueOf(num)
-  def apply(args: Args): StormConfig = {
-
-    lazy val env: String = args.getOrElse("env", "prod")
-    lazy val zone: String = args.getOrElse("dc", "atla")
-
-    // The only SimClusters ENV is Alt. Will clean up soon.
-    lazy val profile = SimClustersProfile.fetchTweetJobProfile(Environment(env), AltSetting.Alt)
-
-    lazy val favoriteEventSource = TimelineEventSource(
-      // Note: do not share the same subsriberId with other jobs. Apply a new one if needed
-      SubscriberId(profile.timelineEventSourceSubscriberId)
-    ).source
-
-    lazy val commonMetric =
-      StormMetric(new CommonMetric(), CommonMetric.NAME, CommonMetric.POLL_INTERVAL)
-    lazy val flatMapMetrics = FlatMapStormMetrics(Iterable(commonMetric))
-    lazy val summerMetrics = SummerStormMetrics(Iterable(commonMetric))
-
-    lazy val entityClusterScoreStore: Storm#Store[
-      (SimClusterEntity, FullClusterIdBucket),
-      ClustersWithScores
-    ] = {
-      Storm.store(
-        EntityClusterScoreReadableStore
-          .onlineMergeableStore(profile.entityClusterScorePath, profile.serviceIdentifier(zone)))
-    }
-
-    lazy val tweetTopKStore: Storm#Store[EntityWithVersion, TopKClustersWithScores] = {
-      Storm.store(
-        TopKClustersForTweetReadableStore
-          .onlineMergeableStore(profile.tweetTopKClustersPath, profile.serviceIdentifier(zone)))
-    }
-
-    lazy val clusterTopKTweetsStore: Storm#Store[FullClusterId, TopKTweetsWithScores] = {
-      Storm.store(
-        TopKTweetsForClusterReadableStore
-          .onlineMergeableStore(profile.clusterTopKTweetsPath, profile.serviceIdentifier(zone)))
-    }
-
-    lazy val clusterTopKTweetsLightStore: Option[
-      Storm#Store[FullClusterId, TopKTweetsWithScores]
-    ] = {
-      profile.clusterTopKTweetsLightPath.map { lightPath =>
-        Storm.store(
-          TopKTweetsForClusterReadableStore
-            .onlineMergeableStore(lightPath, profile.serviceIdentifier(zone)))
-      }
-    }
-
-    lazy val userInterestedInService: Storm#Service[Long, ClustersUserIsInterestedIn] = {
-      Storm.service(
-        UserInterestedInReadableStore.defaultStoreWithMtls(
-          ManhattanKVClientMtlsParams(profile.serviceIdentifier(zone)),
-          modelVersion = profile.modelVersionStr
-        ))
-    }
-
-    new StormConfig {
-
-      val jobName: JobName = JobName(profile.jobName)
-
-      implicit val jobID: JobId = JobId(jobName.toString)
-
-      /**
-       * Add registrars for chill serialization for user-defined types.
-       */
-      override def registrars =
-        List(
-          SBRunConfig.register[SimClusterEntity],
-          SBRunConfig.register[FullClusterIdBucket],
-          SBRunConfig.register[ClustersWithScores],
-          SBRunConfig.register[EntityWithVersion],
-          SBRunConfig.register[FullClusterId],
-          SBRunConfig.register[EntityWithVersion],
-          SBRunConfig.register[TopKEntitiesWithScores],
-          SBRunConfig.register[TopKClustersWithScores],
-          SBRunConfig.register[TopKTweetsWithScores]
-        )
-
-      /***** Job configuration settings *****/
-      /**
-       * Use vmSettings to configure the VM
-       */
-      override def vmSettings: Seq[String] = Seq()
-
-      private val SourcePerWorker = 1
-      private val FlatMapPerWorker = 3
-      private val SummerPerWorker = 3
-
-      private val TotalWorker = 150
-
-      /**
-       * Use transformConfig to set Heron options.
-       */
-      override def transformConfig(config: Map[String, AnyRef]): Map[String, AnyRef] = {
-        val heronConfig = new HeronConfig()
-
-        /**
-        Component names (subject to change if you add more components, make sure to update this)
-          Source: Tail-FlatMap-FlatMap-Summer-FlatMap-Source
-          FlatMap: Tail-FlatMap-FlatMap-Summer-FlatMap, Tail-FlatMap-FlatMap, Tail-FlatMap-FlatMap,
-          Tail-FlatMap
-          Summer: Tail-FlatMap-FlatMap-Summer * 2, Tail, Tail.2
-         */
-        val sourceName = "Tail-FlatMap-FlatMap-Summer-FlatMap-Source"
-        val flatMapFlatMapSummerFlatMapName = "Tail-FlatMap-FlatMap-Summer-FlatMap"
-
-        // 1 CPU per node, 1 for StreamMgr
-        // By default, numCpus per component = totalCPUs / total number of components.
-        // To add more CPUs for a specific component, use heronConfig.setComponentCpu(name, numCPUs)
-        // add 20% more CPUs to address back pressure issue
-        val TotalCPU = jLong(
-          (1.2 * (SourcePerWorker * 1 + FlatMapPerWorker * 4 + SummerPerWorker * 6 + 1)).ceil.toInt)
-        heronConfig.setContainerCpuRequested(TotalCPU.toDouble)
-
-        // RAM settings
-        val RamPerSourceGB = 8
-        val RamPerSummerFlatMap = 8
-        val RamDefaultPerComponent = 4
-
-        // The extra 4GB is not explicitly assigned to the StreamMgr, so it gets 2GB by default, and
-        // the remaining 2GB is shared among components. Keeping this configuration for now, since
-        // it seems stable
-        val TotalRamRB =
-          RamPerSourceGB * SourcePerWorker * 1 +
-            RamDefaultPerComponent * FlatMapPerWorker * 4 +
-            RamDefaultPerComponent * SummerPerWorker * 6 +
-            4 // reserve 4GB for the StreamMgr
-
-        // By default, ramGB per component = totalRAM / total number of components.
-        // To adjust RAMs for a specific component, use heronConfig.setComponentRam(name, ramGB)
-        heronConfig.setComponentRam(sourceName, ByteAmount.fromGigabytes(RamPerSourceGB))
-        heronConfig.setComponentRam(
-          flatMapFlatMapSummerFlatMapName,
-          ByteAmount.fromGigabytes(RamPerSummerFlatMap))
-        heronConfig.setContainerRamRequested(ByteAmount.fromGigabytes(TotalRamRB))
-
-        super.transformConfig(config) ++ List(
-          BTConfig.TOPOLOGY_TEAM_NAME -> "cassowary",
-          BTConfig.TOPOLOGY_TEAM_EMAIL -> "no-reply@twitter.com",
-          BTConfig.TOPOLOGY_WORKERS -> jInt(TotalWorker),
-          BTConfig.TOPOLOGY_ACKER_EXECUTORS -> jInt(0),
-          BTConfig.TOPOLOGY_MESSAGE_TIMEOUT_SECS -> jInt(30),
-          BTConfig.TOPOLOGY_WORKER_CHILDOPTS -> List(
-            "-XX:MaxMetaspaceSize=256M",
-            "-Djava.security.auth.login.config=config/jaas.conf",
-            "-Dsun.security.krb5.debug=true",
-            "-Dcom.twitter.eventbus.client.EnableKafkaSaslTls=true",
-            "-Dcom.twitter.eventbus.client.zoneName=" + zone
-          ).mkString(" "),
-          "storm.job.uniqueId" -> jobID.get
-        ) ++ heronConfig.asScala.toMap
-      }
-
-      /**
-       * Use getNamedOptions to set Summingbird runtime options
-       * The list of available options: com.twitter.summingbird.online.option
-       */
-      override def getNamedOptions: Map[String, Options] = Map(
-        "DEFAULT" -> Options()
-          .set(FlatMapParallelism(TotalWorker * FlatMapPerWorker))
-          .set(SourceParallelism(TotalWorker))
-          .set(SummerBatchMultiplier(1000))
-          .set(CacheSize(10000))
-          .set(flatMapMetrics)
-          .set(summerMetrics),
-        TweetJob.NodeName.TweetClusterUpdatedScoresFlatMapNodeName -> Options()
-          .set(FlatMapParallelism(TotalWorker * FlatMapPerWorker)),
-        TweetJob.NodeName.TweetClusterScoreSummerNodeName -> Options()
-        // Most expensive step. Double the capacity.
-          .set(SummerParallelism(TotalWorker * SummerPerWorker * 4))
-          .set(FlushFrequency(30.seconds)),
-        TweetJob.NodeName.ClusterTopKTweetsNodeName -> Options()
-          .set(SummerParallelism(TotalWorker * SummerPerWorker))
-          .set(FlushFrequency(30.seconds)),
-        TweetJob.NodeName.ClusterTopKTweetsLightNodeName -> Options()
-          .set(SummerParallelism(TotalWorker * SummerPerWorker))
-          .set(FlushFrequency(30.seconds)),
-        TweetJob.NodeName.TweetTopKNodeName -> Options()
-          .set(SummerParallelism(TotalWorker * SummerPerWorker))
-          .set(FlushFrequency(30.seconds))
-      )
-
-      /** Required job generation call for your job, defined in Job.scala */
-      override def graph: TailProducer[Storm, Any] = TweetJob.generate[Storm](
-        profile,
-        favoriteEventSource,
-        userInterestedInService,
-        entityClusterScoreStore,
-        tweetTopKStore,
-        clusterTopKTweetsStore,
-        clusterTopKTweetsLightStore
-      )
-    }
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/storm/persistent_tweet_job_deploy.docx b/src/scala/com/twitter/simclusters_v2/summingbird/storm/persistent_tweet_job_deploy.docx
new file mode 100644
index 000000000..bbeacd45b
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/storm/persistent_tweet_job_deploy.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/storm/persistent_tweet_job_deploy.sh b/src/scala/com/twitter/simclusters_v2/summingbird/storm/persistent_tweet_job_deploy.sh
deleted file mode 100755
index 9340c72bb..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/storm/persistent_tweet_job_deploy.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/bin/bash
-# script to deploy simclusters persistent storm job to CI
-
-set -u -e
-
-cd "$(git rev-parse --show-toplevel)"
-
-# shellcheck source=/dev/null
-. "$(git rev-parse --show-toplevel)/devprod/source-sh-setup"
-
-function usage {
-  cat <<EOF
-    $0 --env [devel | prod] --dc [atla | pdxa]
-
-Optional:
-    --dc              atla | pdxa
-    --env             devel | prod
-
-EOF
-  if [ -n "$1" ] && [ "$1" != "noargs" ]; then
-    echo ""
-    echo "Invalid app args encountered! Expecting: $1"
-  fi
-}
-
-if [ $# -lt 1 ]; then
-  usage noargs
-  exit 1
-fi
-
-CLUSTER=
-ENV=
-USER=cassowary
-
-while [[ $# -gt 1 ]]; do
-  key="$1"
-  
-  case $key in
-    --dc)
-      CLUSTER="$2"
-      shift
-      ;;
-    --env)
-      ENV="$2"
-      shift
-      ;;
-    *)
-      # options ignored
-      ;;
-  esac
-  shift
-done
-
-echo "Bundling..."
-
-
-JAR_NAME="persistent-tweet-simclusters-storm-job.tar"
-JOB_NAME="summingbird_simclusters_v2_persistent_tweet_job_${ENV}"
-
-BASE_DIR="src/scala/com/twitter/simclusters_v2/summingbird"
-./bazel bundle --bundle-jvm-archive=tar ${BASE_DIR}:persistent-tweet-simclusters-storm-job || exit 1
-
-# initialize the aurora path for a heron job: <dc>/<role>/<env> where <env> can only be devel or prod 
-AURORA_PATH=${AURORA_PATH:="$CLUSTER/$USER/$ENV"}
-AURORA_JOB_KEY="${AURORA_PATH}/${JOB_NAME}"
-
-heron kill "$AURORA_PATH" "$JOB_NAME" || true
-
-echo "Waiting 5 seconds so heron is sure its dead"
-sleep 5
-
-echo "AURORA_JOB_KEY: $AURORA_JOB_KEY"
-
-echo "Starting your topology... for ${ENV} ${JOB_NAME}"
-#set -v
-
-heron submit "${AURORA_PATH}" "dist/${JAR_NAME}" com.twitter.simclusters_v2.summingbird.storm.PersistentTweetJobRunner --env "$ENV" --dc "$CLUSTER"
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/storm/tweet_alt_job_deploy.docx b/src/scala/com/twitter/simclusters_v2/summingbird/storm/tweet_alt_job_deploy.docx
new file mode 100644
index 000000000..ada984233
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/storm/tweet_alt_job_deploy.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/storm/tweet_alt_job_deploy.sh b/src/scala/com/twitter/simclusters_v2/summingbird/storm/tweet_alt_job_deploy.sh
deleted file mode 100755
index 67b14d126..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/storm/tweet_alt_job_deploy.sh
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/bin/bash
-# script to deploy simcluster storm job to CI
-
-set -u -e
-
-cd "$(git rev-parse --show-toplevel)"
-
-# shellcheck source=/dev/null
-. "$(git rev-parse --show-toplevel)/devprod/source-sh-setup"
-
-function usage {
-  cat <<EOF
-    $0 --env [devel | prod] --dc [atla | pdxa]
-
-Optional:
-    --dc              atla | pdxa
-    --env             devel | prod
-
-EOF
-  if [ -n "$1" ] && [ "$1" != "noargs" ]; then
-    echo ""
-    echo "Invalid app args encountered! Expecting: $1"
-  fi
-}
-
-if [ $# -lt 1 ]; then
-  usage noargs
-  exit 1
-fi
-
-CLUSTER=
-ENV=
-USER=cassowary
-
-while [[ $# -gt 1 ]]; do
-  key="$1"
-  
-  case $key in
-    --dc)
-      CLUSTER="$2"
-      shift
-      ;;
-    --env)
-      ENV="$2"
-      shift
-      ;;
-    *)
-      # options ignored
-      ;;
-  esac
-  shift
-done
-
-echo "Bundling..."
-
-
-JAR_NAME="tweet-simclusters-storm-job.tar"
-JOB_NAME="summingbird_simclusters_v2_tweet_alt_job_${ENV}"
-
-BASE_DIR="src/scala/com/twitter/simclusters_v2/summingbird"
-./bazel bundle --bundle-jvm-archive=tar ${BASE_DIR}:tweet-simclusters-storm-job || exit 1
-
-# initialize the aurora path for a heron job: <dc>/<role>/<env> where <env> can only be devel or prod 
-AURORA_PATH=${AURORA_PATH:="$CLUSTER/$USER/$ENV"}
-AURORA_JOB_KEY="${AURORA_PATH}/${JOB_NAME}"
-
-heron kill "$AURORA_PATH" "$JOB_NAME" || true
-
-echo "Waiting 5 seconds so heron is sure its dead"
-sleep 5
-
-echo "AURORA_JOB_KEY: $AURORA_JOB_KEY"
-
-echo "Starting your topology... for ${ENV} ${JOB_NAME}"
-#set -v
-
-heron submit "${AURORA_PATH}" "dist/${JAR_NAME}" com.twitter.simclusters_v2.summingbird.storm.TweetJobRunner --env "$ENV" --dc "$CLUSTER" --alt "alt" --usingLogFavScore
-
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/storm/tweet_job_deploy.docx b/src/scala/com/twitter/simclusters_v2/summingbird/storm/tweet_job_deploy.docx
new file mode 100644
index 000000000..3278a1cb3
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/summingbird/storm/tweet_job_deploy.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/summingbird/storm/tweet_job_deploy.sh b/src/scala/com/twitter/simclusters_v2/summingbird/storm/tweet_job_deploy.sh
deleted file mode 100755
index b3e4f22d4..000000000
--- a/src/scala/com/twitter/simclusters_v2/summingbird/storm/tweet_job_deploy.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/bin/bash
-# script to deploy simcluster storm job to CI
-
-set -u -e
-
-cd "$(git rev-parse --show-toplevel)"
-
-# shellcheck source=/dev/null
-. "$(git rev-parse --show-toplevel)/devprod/source-sh-setup"
-
-function usage {
-  cat <<EOF
-    $0 --env [devel | prod] --dc [atla | pdxa]
-
-Optional:
-    --dc              atla | pdxa
-    --env             devel | prod
-
-EOF
-  if [ -n "$1" ] && [ "$1" != "noargs" ]; then
-    echo ""
-    echo "Invalid app args encountered! Expecting: $1"
-  fi
-}
-
-if [ $# -lt 1 ]; then
-  usage noargs
-  exit 1
-fi
-
-CLUSTER=
-ENV=
-USER=cassowary
-
-while [[ $# -gt 1 ]]; do
-  key="$1"
-  
-  case $key in
-    --dc)
-      CLUSTER="$2"
-      shift
-      ;;
-    --env)
-      ENV="$2"
-      shift
-      ;;
-    *)
-      # options ignored
-      ;;
-  esac
-  shift
-done
-
-echo "Bundling..."
-
-
-JAR_NAME="tweet-simclusters-storm-job.tar"
-JOB_NAME="summingbird_simclusters_v2_tweet_job_${ENV}"
-
-BASE_DIR="src/scala/com/twitter/simclusters_v2/summingbird"
-./bazel bundle --bundle-jvm-archive=tar ${BASE_DIR}:tweet-simclusters-storm-job || exit 1
-
-# initialize the aurora path for a heron job: <dc>/<role>/<env> where <env> can only be devel or prod 
-AURORA_PATH=${AURORA_PATH:="$CLUSTER/$USER/$ENV"}
-AURORA_JOB_KEY="${AURORA_PATH}/${JOB_NAME}"
-
-heron kill "$AURORA_PATH" "$JOB_NAME" || true
-
-echo "Waiting 5 seconds so heron is sure its dead"
-sleep 5
-
-echo "AURORA_JOB_KEY: $AURORA_JOB_KEY"
-
-echo "Starting your topology... for ${ENV} ${JOB_NAME}"
-#set -v
-
-heron submit "${AURORA_PATH}" "dist/${JAR_NAME}" com.twitter.simclusters_v2.summingbird.storm.TweetJobRunner --env "$ENV" --dc "$CLUSTER"
diff --git a/src/scala/com/twitter/simclusters_v2/tweet_similarity/BUILD b/src/scala/com/twitter/simclusters_v2/tweet_similarity/BUILD
deleted file mode 100644
index 526ee6d23..000000000
--- a/src/scala/com/twitter/simclusters_v2/tweet_similarity/BUILD
+++ /dev/null
@@ -1,11 +0,0 @@
-scala_library(
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "src/scala/com/twitter/ml/api:api-base",
-        "src/scala/com/twitter/ml/featurestore/catalog/features/recommendations:aggregate",
-        "src/scala/com/twitter/ml/featurestore/lib/embedding",
-        "src/scala/com/twitter/simclusters_v2/common",
-        "src/scala/com/twitter/simclusters_v2/common/ml",
-    ],
-)
diff --git a/src/scala/com/twitter/simclusters_v2/tweet_similarity/BUILD.docx b/src/scala/com/twitter/simclusters_v2/tweet_similarity/BUILD.docx
new file mode 100644
index 000000000..7e187a560
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/tweet_similarity/BUILD.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/tweet_similarity/ModelBasedTweetSimilaritySimClustersEmbeddingAdapter.docx b/src/scala/com/twitter/simclusters_v2/tweet_similarity/ModelBasedTweetSimilaritySimClustersEmbeddingAdapter.docx
new file mode 100644
index 000000000..e9cb0d509
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/tweet_similarity/ModelBasedTweetSimilaritySimClustersEmbeddingAdapter.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/tweet_similarity/ModelBasedTweetSimilaritySimClustersEmbeddingAdapter.scala b/src/scala/com/twitter/simclusters_v2/tweet_similarity/ModelBasedTweetSimilaritySimClustersEmbeddingAdapter.scala
deleted file mode 100644
index f1c3f8cc2..000000000
--- a/src/scala/com/twitter/simclusters_v2/tweet_similarity/ModelBasedTweetSimilaritySimClustersEmbeddingAdapter.scala
+++ /dev/null
@@ -1,37 +0,0 @@
-package com.twitter.simclusters_v2.tweet_similarity
-
-import com.twitter.ml.api.{DataRecord, DataRecordMerger}
-import com.twitter.simclusters_v2.common.ml.{
-  SimClustersEmbeddingAdapter,
-  NormalizedSimClustersEmbeddingAdapter
-}
-import com.twitter.simclusters_v2.common.SimClustersEmbedding
-
-object ModelBasedTweetSimilaritySimClustersEmbeddingAdapter {
-  val QueryEmbAdapter = new SimClustersEmbeddingAdapter(TweetSimilarityFeatures.QueryTweetEmbedding)
-  val CandidateEmbAdapter = new SimClustersEmbeddingAdapter(
-    TweetSimilarityFeatures.CandidateTweetEmbedding)
-
-  val NormalizedQueryEmbAdapter = new NormalizedSimClustersEmbeddingAdapter(
-    TweetSimilarityFeatures.QueryTweetEmbedding,
-    TweetSimilarityFeatures.QueryTweetEmbeddingNorm)
-  val NormalizedCandidateEmbAdapter = new NormalizedSimClustersEmbeddingAdapter(
-    TweetSimilarityFeatures.CandidateTweetEmbedding,
-    TweetSimilarityFeatures.CandidateTweetEmbeddingNorm)
-
-  def adaptEmbeddingPairToDataRecord(
-    queryEmbedding: SimClustersEmbedding,
-    candidateEmbedding: SimClustersEmbedding,
-    normalized: Boolean
-  ): DataRecord = {
-    val DataRecordMerger = new DataRecordMerger()
-    val queryAdapter = if (normalized) NormalizedQueryEmbAdapter else QueryEmbAdapter
-    val candidateAdapter = if (normalized) NormalizedCandidateEmbAdapter else CandidateEmbAdapter
-
-    val featureDataRecord = queryAdapter.adaptToDataRecord(queryEmbedding)
-    DataRecordMerger.merge(
-      featureDataRecord,
-      candidateAdapter.adaptToDataRecord(candidateEmbedding))
-    featureDataRecord
-  }
-}
diff --git a/src/scala/com/twitter/simclusters_v2/tweet_similarity/TweetSimilarityFeatures.docx b/src/scala/com/twitter/simclusters_v2/tweet_similarity/TweetSimilarityFeatures.docx
new file mode 100644
index 000000000..100c9a688
Binary files /dev/null and b/src/scala/com/twitter/simclusters_v2/tweet_similarity/TweetSimilarityFeatures.docx differ
diff --git a/src/scala/com/twitter/simclusters_v2/tweet_similarity/TweetSimilarityFeatures.scala b/src/scala/com/twitter/simclusters_v2/tweet_similarity/TweetSimilarityFeatures.scala
deleted file mode 100644
index 0d6b90c95..000000000
--- a/src/scala/com/twitter/simclusters_v2/tweet_similarity/TweetSimilarityFeatures.scala
+++ /dev/null
@@ -1,54 +0,0 @@
-package com.twitter.simclusters_v2.tweet_similarity
-
-import com.twitter.ml.api.Feature.{Binary, Continuous, Discrete, SparseContinuous}
-import com.twitter.ml.api.util.FDsl._
-import com.twitter.ml.api.{DataRecord, FeatureContext, IRecordOneToOneAdapter}
-import com.twitter.ml.featurestore.catalog.features.recommendations.ProducerSimClustersEmbedding
-import com.twitter.ml.featurestore.lib.UserId
-import com.twitter.ml.featurestore.lib.data.{PredictionRecord, PredictionRecordAdapter}
-import com.twitter.ml.featurestore.lib.entity.Entity
-import com.twitter.ml.featurestore.lib.feature.BoundFeatureSet
-
-object TweetSimilarityFeatures {
-  val QueryTweetId = new Discrete("query_tweet.id")
-  val CandidateTweetId = new Discrete("candidate_tweet.id")
-  val QueryTweetEmbedding = new SparseContinuous("query_tweet.simclusters_embedding")
-  val CandidateTweetEmbedding = new SparseContinuous("candidate_tweet.simclusters_embedding")
-  val QueryTweetEmbeddingNorm = new Continuous("query_tweet.embedding_norm")
-  val CandidateTweetEmbeddingNorm = new Continuous("candidate_tweet.embedding_norm")
-  val QueryTweetTimestamp = new Discrete("query_tweet.timestamp")
-  val CandidateTweetTimestamp = new Discrete("candidate_tweet.timestamp")
-  val TweetPairCount = new Discrete("popularity_count.tweet_pair")
-  val QueryTweetCount = new Discrete("popularity_count.query_tweet")
-  val CosineSimilarity = new Continuous("meta.cosine_similarity")
-  val Label = new Binary("co-engagement.label")
-
-  val FeatureContext: FeatureContext = new FeatureContext(
-    QueryTweetId,
-    CandidateTweetId,
-    QueryTweetEmbedding,
-    CandidateTweetEmbedding,
-    QueryTweetEmbeddingNorm,
-    CandidateTweetEmbeddingNorm,
-    QueryTweetTimestamp,
-    CandidateTweetTimestamp,
-    TweetPairCount,
-    QueryTweetCount,
-    CosineSimilarity,
-    Label
-  )
-
-  def isCoengaged(dataRecord: DataRecord): Boolean = {
-    dataRecord.getFeatureValue(Label)
-  }
-}
-
-class TweetSimilarityFeaturesStoreConfig(identifier: String) {
-  val bindingIdentifier: Entity[UserId] = Entity[UserId](identifier)
-
-  val featureStoreBoundFeatureSet: BoundFeatureSet = BoundFeatureSet(
-    ProducerSimClustersEmbedding.FavBasedEmbedding20m145kUpdated.bind(bindingIdentifier))
-
-  val predictionRecordAdapter: IRecordOneToOneAdapter[PredictionRecord] =
-    PredictionRecordAdapter.oneToOne(featureStoreBoundFeatureSet)
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/BCELabelTransformFromUUADataRecord.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/BCELabelTransformFromUUADataRecord.docx
new file mode 100644
index 000000000..5ecf5ac1e
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/BCELabelTransformFromUUADataRecord.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/BCELabelTransformFromUUADataRecord.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/BCELabelTransformFromUUADataRecord.scala
deleted file mode 100644
index 6adf6eaf8..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/BCELabelTransformFromUUADataRecord.scala
+++ /dev/null
@@ -1,68 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates
-
-import com.twitter.ml.api.Feature
-import com.twitter.ml.api.FeatureContext
-import com.twitter.ml.api.ITransform
-import com.twitter.ml.api.constant.SharedFeatures
-import java.lang.{Double => JDouble}
-
-import com.twitter.timelines.prediction.common.adapters.AdapterConsumer
-import com.twitter.timelines.prediction.common.adapters.EngagementLabelFeaturesDataRecordUtils
-import com.twitter.ml.api.DataRecord
-import com.twitter.ml.api.RichDataRecord
-import com.twitter.timelines.suggests.common.engagement.thriftscala.EngagementType
-import com.twitter.timelines.suggests.common.engagement.thriftscala.Engagement
-import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures
-import com.twitter.timelines.prediction.features.common.CombinedFeatures
-
-/**
- * To transfrom BCE events UUA data records that contain only continuous dwell time to datarecords that contain corresponding binary label features
- * The UUA datarecords inputted would have USER_ID, SOURCE_TWEET_ID,TIMESTAMP and
- * 0 or one of (TWEET_DETAIL_DWELL_TIME_MS, PROFILE_DWELL_TIME_MS, FULLSCREEN_VIDEO_DWELL_TIME_MS) features.
- * We will use the different engagement TIME_MS to differentiate different engagements,
- * and then re-use the function in EngagementTypeConverte to add the binary label to the datarecord.
- **/
-
-object BCELabelTransformFromUUADataRecord extends ITransform {
-
-  val dwellTimeFeatureToEngagementMap = Map(
-    TimelinesSharedFeatures.TWEET_DETAIL_DWELL_TIME_MS -> EngagementType.TweetDetailDwell,
-    TimelinesSharedFeatures.PROFILE_DWELL_TIME_MS -> EngagementType.ProfileDwell,
-    TimelinesSharedFeatures.FULLSCREEN_VIDEO_DWELL_TIME_MS -> EngagementType.FullscreenVideoDwell
-  )
-
-  def dwellFeatureToEngagement(
-    rdr: RichDataRecord,
-    dwellTimeFeature: Feature[JDouble],
-    engagementType: EngagementType
-  ): Option[Engagement] = {
-    if (rdr.hasFeature(dwellTimeFeature)) {
-      Some(
-        Engagement(
-          engagementType = engagementType,
-          timestampMs = rdr.getFeatureValue(SharedFeatures.TIMESTAMP),
-          weight = Some(rdr.getFeatureValue(dwellTimeFeature))
-        ))
-    } else {
-      None
-    }
-  }
-  override def transformContext(featureContext: FeatureContext): FeatureContext = {
-    featureContext.addFeatures(
-      (CombinedFeatures.TweetDetailDwellEngagements ++ CombinedFeatures.ProfileDwellEngagements ++ CombinedFeatures.FullscreenVideoDwellEngagements).toSeq: _*)
-  }
-  override def transform(record: DataRecord): Unit = {
-    val rdr = new RichDataRecord(record)
-    val engagements = dwellTimeFeatureToEngagementMap
-      .map {
-        case (dwellTimeFeature, engagementType) =>
-          dwellFeatureToEngagement(rdr, dwellTimeFeature, engagementType)
-      }.flatten.toSeq
-
-    // Re-use BCE( behavior client events) label conversion in EngagementTypeConverter to align with BCE labels generation for offline training data
-    EngagementLabelFeaturesDataRecordUtils.setDwellTimeFeatures(
-      rdr,
-      Some(engagements),
-      AdapterConsumer.Combined)
-  }
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/BUILD b/src/scala/com/twitter/timelines/prediction/common/aggregates/BUILD
deleted file mode 100644
index 01c930e8e..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/BUILD
+++ /dev/null
@@ -1,353 +0,0 @@
-create_datasets(
-    base_name = "original_author_aggregates",
-    fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/original_author_aggregates/1556496000000",
-    key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
-    platform = "java8",
-    role = "timelines",
-    scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.OriginalAuthor",
-    segment_type = "snapshot",
-    tags = ["bazel-compatible"],
-    val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
-    scala_dependencies = [
-        ":injections",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-    ],
-)
-
-create_datasets(
-    base_name = "twitter_wide_user_aggregates",
-    fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/twitter_wide_user_aggregates/1556496000000",
-    key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
-    platform = "java8",
-    role = "timelines",
-    scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.TwitterWideUser",
-    segment_type = "snapshot",
-    tags = ["bazel-compatible"],
-    val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
-    scala_dependencies = [
-        ":injections",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-    ],
-)
-
-create_datasets(
-    base_name = "twitter_wide_user_author_aggregates",
-    fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/twitter_wide_user_author_aggregates/1556323200000",
-    key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
-    platform = "java8",
-    role = "timelines",
-    scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.TwitterWideUserAuthor",
-    segment_type = "snapshot",
-    tags = ["bazel-compatible"],
-    val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
-    scala_dependencies = [
-        ":injections",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-    ],
-)
-
-create_datasets(
-    base_name = "user_aggregates",
-    fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_aggregates/1556150400000",
-    key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
-    platform = "java8",
-    role = "timelines",
-    scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.User",
-    segment_type = "snapshot",
-    tags = ["bazel-compatible"],
-    val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
-    scala_dependencies = [
-        ":injections",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-    ],
-)
-
-create_datasets(
-    base_name = "user_author_aggregates",
-    fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_author_aggregates/1556064000000",
-    key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
-    platform = "java8",
-    role = "timelines",
-    scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserAuthor",
-    segment_type = "snapshot",
-    tags = ["bazel-compatible"],
-    val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
-    scala_dependencies = [
-        ":injections",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-    ],
-)
-
-create_datasets(
-    base_name = "aggregates_canary",
-    fallback_path = "gs://user.timelines.dp.gcp.twttr.net//canaries/processed/aggregates_v2/user_aggregates/1622851200000",
-    key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
-    platform = "java8",
-    role = "timelines",
-    scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.User",
-    segment_type = "snapshot",
-    tags = ["bazel-compatible"],
-    val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
-    scala_dependencies = [
-        ":injections",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-    ],
-)
-
-create_datasets(
-    base_name = "user_engager_aggregates",
-    fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_engager_aggregates/1556496000000",
-    key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
-    platform = "java8",
-    role = "timelines",
-    scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserEngager",
-    segment_type = "snapshot",
-    tags = ["bazel-compatible"],
-    val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
-    scala_dependencies = [
-        ":injections",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-    ],
-)
-
-create_datasets(
-    base_name = "user_original_author_aggregates",
-    fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_original_author_aggregates/1556496000000",
-    key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
-    platform = "java8",
-    role = "timelines",
-    scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserOriginalAuthor",
-    segment_type = "snapshot",
-    tags = ["bazel-compatible"],
-    val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
-    scala_dependencies = [
-        ":injections",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-    ],
-)
-
-create_datasets(
-    base_name = "author_topic_aggregates",
-    fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/author_topic_aggregates/1589932800000",
-    key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
-    platform = "java8",
-    role = "timelines",
-    scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.AuthorTopic",
-    segment_type = "snapshot",
-    tags = ["bazel-compatible"],
-    val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
-    scala_dependencies = [
-        ":injections",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-    ],
-)
-
-create_datasets(
-    base_name = "user_topic_aggregates",
-    fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_topic_aggregates/1590278400000",
-    key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
-    platform = "java8",
-    role = "timelines",
-    scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserTopic",
-    segment_type = "snapshot",
-    tags = ["bazel-compatible"],
-    val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
-    scala_dependencies = [
-        ":injections",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-    ],
-)
-
-create_datasets(
-    base_name = "user_inferred_topic_aggregates",
-    fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_inferred_topic_aggregates/1599696000000",
-    key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
-    platform = "java8",
-    role = "timelines",
-    scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserInferredTopic",
-    segment_type = "snapshot",
-    tags = ["bazel-compatible"],
-    val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
-    scala_dependencies = [
-        ":injections",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-    ],
-)
-
-create_datasets(
-    base_name = "user_mention_aggregates",
-    fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_mention_aggregates/1556582400000",
-    key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
-    platform = "java8",
-    role = "timelines",
-    scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserMention",
-    segment_type = "snapshot",
-    tags = ["bazel-compatible"],
-    val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
-    scala_dependencies = [
-        ":injections",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-    ],
-)
-
-create_datasets(
-    base_name = "user_request_dow_aggregates",
-    fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_request_dow_aggregates/1556236800000",
-    key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
-    platform = "java8",
-    role = "timelines",
-    scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserRequestDow",
-    segment_type = "snapshot",
-    tags = ["bazel-compatible"],
-    val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
-    scala_dependencies = [
-        ":injections",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-    ],
-)
-
-create_datasets(
-    base_name = "user_request_hour_aggregates",
-    fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_request_hour_aggregates/1556150400000",
-    key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
-    platform = "java8",
-    role = "timelines",
-    scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserRequestHour",
-    segment_type = "snapshot",
-    tags = ["bazel-compatible"],
-    val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
-    scala_dependencies = [
-        ":injections",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-    ],
-)
-
-
-create_datasets(
-    base_name = "user_list_aggregates",
-    fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_list_aggregates/1590624000000",
-    key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
-    platform = "java8",
-    role = "timelines",
-    scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserList",
-    segment_type = "snapshot",
-    tags = ["bazel-compatible"],
-    val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
-    scala_dependencies = [
-        ":injections",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-    ],
-)
-
-
-create_datasets(
-    base_name = "user_media_understanding_annotation_aggregates",
-    key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
-    platform = "java8",
-    role = "timelines",
-    scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserMediaUnderstandingAnnotation",
-    segment_type = "snapshot",
-    tags = ["bazel-compatible"],
-    val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
-    scala_dependencies = [
-        ":injections",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-    ],
-)
-
-scala_library(
-    sources = [
-        "BCELabelTransformFromUUADataRecord.scala",
-        "FeatureSelectorConfig.scala",
-        "RecapUserFeatureAggregation.scala",
-        "RectweetUserFeatureAggregation.scala",
-        "TimelinesAggregationConfig.scala",
-        "TimelinesAggregationConfigDetails.scala",
-        "TimelinesAggregationConfigTrait.scala",
-        "TimelinesAggregationSources.scala",
-    ],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        ":aggregates_canary-scala",
-        ":author_topic_aggregates-scala",
-        ":original_author_aggregates-scala",
-        ":twitter_wide_user_aggregates-scala",
-        ":twitter_wide_user_author_aggregates-scala",
-        ":user_aggregates-scala",
-        ":user_author_aggregates-scala",
-        ":user_engager_aggregates-scala",
-        ":user_inferred_topic_aggregates-scala",
-        ":user_list_aggregates-scala",
-        ":user_media_understanding_annotation_aggregates-scala",
-        ":user_mention_aggregates-scala",
-        ":user_original_author_aggregates-scala",
-        ":user_request_dow_aggregates-scala",
-        ":user_request_hour_aggregates-scala",
-        ":user_topic_aggregates-scala",
-        "src/java/com/twitter/ml/api:api-base",
-        "src/java/com/twitter/ml/api/constant",
-        "src/java/com/twitter/ml/api/matcher",
-        "src/scala/com/twitter/common/text/util",
-        "src/scala/com/twitter/dal/client/dataset",
-        "src/scala/com/twitter/frigate/data_pipeline/features_aggregated/core",
-        "src/scala/com/twitter/scalding_internal/multiformat/format",
-        "src/scala/com/twitter/timelines/prediction/common/adapters:engagement-converter",
-        "src/scala/com/twitter/timelines/prediction/features/client_log_event",
-        "src/scala/com/twitter/timelines/prediction/features/common",
-        "src/scala/com/twitter/timelines/prediction/features/engagement_features",
-        "src/scala/com/twitter/timelines/prediction/features/escherbird",
-        "src/scala/com/twitter/timelines/prediction/features/itl",
-        "src/scala/com/twitter/timelines/prediction/features/list_features",
-        "src/scala/com/twitter/timelines/prediction/features/p_home_latest",
-        "src/scala/com/twitter/timelines/prediction/features/real_graph",
-        "src/scala/com/twitter/timelines/prediction/features/recap",
-        "src/scala/com/twitter/timelines/prediction/features/request_context",
-        "src/scala/com/twitter/timelines/prediction/features/simcluster",
-        "src/scala/com/twitter/timelines/prediction/features/time_features",
-        "src/scala/com/twitter/timelines/prediction/transform/filter",
-        "src/thrift/com/twitter/timelines/suggests/common:engagement-scala",
-        "timelines/data_processing/ad_hoc/recap/data_record_preparation:recap_data_records_agg_minimal-java",
-        "util/util-core:scala",
-    ],
-)
-
-scala_library(
-    name = "injections",
-    sources = [
-        "FeatureSelectorConfig.scala",
-        "RecapUserFeatureAggregation.scala",
-        "RectweetUserFeatureAggregation.scala",
-        "TimelinesAggregationConfigDetails.scala",
-        "TimelinesAggregationConfigTrait.scala",
-        "TimelinesAggregationKeyValInjections.scala",
-        "TimelinesAggregationSources.scala",
-    ],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "src/java/com/twitter/ml/api:api-base",
-        "src/java/com/twitter/ml/api/constant",
-        "src/java/com/twitter/ml/api/matcher",
-        "src/scala/com/twitter/common/text/util",
-        "src/scala/com/twitter/dal/client/dataset",
-        "src/scala/com/twitter/frigate/data_pipeline/features_aggregated/core",
-        "src/scala/com/twitter/scalding_internal/multiformat/format",
-        "src/scala/com/twitter/timelines/prediction/features/client_log_event",
-        "src/scala/com/twitter/timelines/prediction/features/common",
-        "src/scala/com/twitter/timelines/prediction/features/engagement_features",
-        "src/scala/com/twitter/timelines/prediction/features/escherbird",
-        "src/scala/com/twitter/timelines/prediction/features/itl",
-        "src/scala/com/twitter/timelines/prediction/features/list_features",
-        "src/scala/com/twitter/timelines/prediction/features/p_home_latest",
-        "src/scala/com/twitter/timelines/prediction/features/real_graph",
-        "src/scala/com/twitter/timelines/prediction/features/recap",
-        "src/scala/com/twitter/timelines/prediction/features/request_context",
-        "src/scala/com/twitter/timelines/prediction/features/semantic_core_features",
-        "src/scala/com/twitter/timelines/prediction/features/simcluster",
-        "src/scala/com/twitter/timelines/prediction/features/time_features",
-        "src/scala/com/twitter/timelines/prediction/transform/filter",
-        "timelines/data_processing/ad_hoc/recap/data_record_preparation:recap_data_records_agg_minimal-java",
-        "util/util-core:scala",
-    ],
-)
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/BUILD.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/BUILD.docx
new file mode 100644
index 000000000..2c821dfc5
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/BUILD.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/FeatureSelectorConfig.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/FeatureSelectorConfig.docx
new file mode 100644
index 000000000..ab274a3fd
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/FeatureSelectorConfig.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/FeatureSelectorConfig.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/FeatureSelectorConfig.scala
deleted file mode 100644
index 1c91ef16c..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/FeatureSelectorConfig.scala
+++ /dev/null
@@ -1,121 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates
-
-import com.twitter.ml.api.matcher.FeatureMatcher
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup
-import scala.collection.JavaConverters._
-
-object FeatureSelectorConfig {
-  val BasePairsToStore = Seq(
-    ("twitter_wide_user_aggregate.pair", "*"),
-    ("twitter_wide_user_author_aggregate.pair", "*"),
-    ("user_aggregate_v5.continuous.pair", "*"),
-    ("user_aggregate_v7.pair", "*"),
-    ("user_author_aggregate_v2.pair", "recap.earlybird.*"),
-    ("user_author_aggregate_v2.pair", "recap.searchfeature.*"),
-    ("user_author_aggregate_v2.pair", "recap.tweetfeature.embeds*"),
-    ("user_author_aggregate_v2.pair", "recap.tweetfeature.link_count*"),
-    ("user_author_aggregate_v2.pair", "engagement_features.in_network.*"),
-    ("user_author_aggregate_v2.pair", "recap.tweetfeature.is_reply.*"),
-    ("user_author_aggregate_v2.pair", "recap.tweetfeature.is_retweet.*"),
-    ("user_author_aggregate_v2.pair", "recap.tweetfeature.num_mentions.*"),
-    ("user_author_aggregate_v5.pair", "*"),
-    ("user_author_aggregate_tweetsource_v1.pair", "*"),
-    ("user_engager_aggregate.pair", "*"),
-    ("user_mention_aggregate.pair", "*"),
-    ("user_request_context_aggregate.dow.pair", "*"),
-    ("user_request_context_aggregate.hour.pair", "*"),
-    ("user_aggregate_v6.pair", "*"),
-    ("user_original_author_aggregate_v1.pair", "*"),
-    ("user_original_author_aggregate_v2.pair", "*"),
-    ("original_author_aggregate_v1.pair", "*"),
-    ("original_author_aggregate_v2.pair", "*"),
-    ("author_topic_aggregate.pair", "*"),
-    ("user_list_aggregate.pair", "*"),
-    ("user_topic_aggregate.pair", "*"),
-    ("user_topic_aggregate_v2.pair", "*"),
-    ("user_inferred_topic_aggregate.pair", "*"),
-    ("user_inferred_topic_aggregate_v2.pair", "*"),
-    ("user_media_annotation_aggregate.pair", "*"),
-    ("user_media_annotation_aggregate.pair", "*"),
-    ("user_author_good_click_aggregate.pair", "*"),
-    ("user_engager_good_click_aggregate.pair", "*")
-  )
-  val PairsToStore = BasePairsToStore ++ Seq(
-    ("user_aggregate_v2.pair", "*"),
-    ("user_aggregate_v5.boolean.pair", "*"),
-    ("user_aggregate_tweetsource_v1.pair", "*"),
-  )
-
-
-  val LabelsToStore = Seq(
-    "any_label",
-    "recap.engagement.is_favorited",
-    "recap.engagement.is_retweeted",
-    "recap.engagement.is_replied",
-    "recap.engagement.is_open_linked",
-    "recap.engagement.is_profile_clicked",
-    "recap.engagement.is_clicked",
-    "recap.engagement.is_photo_expanded",
-    "recap.engagement.is_video_playback_50",
-    "recap.engagement.is_video_quality_viewed",
-    "recap.engagement.is_replied_reply_impressed_by_author",
-    "recap.engagement.is_replied_reply_favorited_by_author",
-    "recap.engagement.is_replied_reply_replied_by_author",
-    "recap.engagement.is_report_tweet_clicked",
-    "recap.engagement.is_block_clicked",
-    "recap.engagement.is_mute_clicked",
-    "recap.engagement.is_dont_like",
-    "recap.engagement.is_good_clicked_convo_desc_favorited_or_replied",
-    "recap.engagement.is_good_clicked_convo_desc_v2",
-    "itl.engagement.is_favorited",
-    "itl.engagement.is_retweeted",
-    "itl.engagement.is_replied",
-    "itl.engagement.is_open_linked",
-    "itl.engagement.is_profile_clicked",
-    "itl.engagement.is_clicked",
-    "itl.engagement.is_photo_expanded",
-    "itl.engagement.is_video_playback_50"
-  )
-
-  val PairGlobsToStore = for {
-    (prefix, suffix) <- PairsToStore
-    label <- LabelsToStore
-  } yield FeatureMatcher.glob(prefix + "." + label + "." + suffix)
-
-  val BaseAggregateV2FeatureSelector = FeatureMatcher
-    .none()
-    .or(
-      FeatureMatcher.glob("meta.user_id"),
-      FeatureMatcher.glob("meta.author_id"),
-      FeatureMatcher.glob("entities.original_author_id"),
-      FeatureMatcher.glob("entities.topic_id"),
-      FeatureMatcher
-        .glob("entities.inferred_topic_ids" + TypedAggregateGroup.SparseFeatureSuffix),
-      FeatureMatcher.glob("timelines.meta.list_id"),
-      FeatureMatcher.glob("list.id"),
-      FeatureMatcher
-        .glob("engagement_features.user_ids.public" + TypedAggregateGroup.SparseFeatureSuffix),
-      FeatureMatcher
-        .glob("entities.users.mentioned_screen_names" + TypedAggregateGroup.SparseFeatureSuffix),
-      FeatureMatcher.glob("user_aggregate_v2.pair.recap.engagement.is_dont_like.*"),
-      FeatureMatcher.glob("user_author_aggregate_v2.pair.any_label.recap.tweetfeature.has_*"),
-      FeatureMatcher.glob("request_context.country_code"),
-      FeatureMatcher.glob("request_context.timestamp_gmt_dow"),
-      FeatureMatcher.glob("request_context.timestamp_gmt_hour"),
-      FeatureMatcher.glob(
-        "semantic_core.media_understanding.high_recall.non_sensitive.entity_ids" + TypedAggregateGroup.SparseFeatureSuffix)
-    )
-
-  val AggregatesV2ProdFeatureSelector = BaseAggregateV2FeatureSelector
-    .orList(PairGlobsToStore.asJava)
-
-  val ReducedPairGlobsToStore = (for {
-    (prefix, suffix) <- BasePairsToStore
-    label <- LabelsToStore
-  } yield FeatureMatcher.glob(prefix + "." + label + "." + suffix)) ++ Seq(
-    FeatureMatcher.glob("user_aggregate_v2.pair.any_label.*"),
-    FeatureMatcher.glob("user_aggregate_v2.pair.recap.engagement.is_favorited.*"),
-    FeatureMatcher.glob("user_aggregate_v2.pair.recap.engagement.is_photo_expanded.*"),
-    FeatureMatcher.glob("user_aggregate_v2.pair.recap.engagement.is_profile_clicked.*")
-  )
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/README.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/README.docx
new file mode 100644
index 000000000..a9b5bf9fd
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/README.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/README.md b/src/scala/com/twitter/timelines/prediction/common/aggregates/README.md
deleted file mode 100644
index 0bae21a14..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/README.md
+++ /dev/null
@@ -1,6 +0,0 @@
-## Timelines Aggregation Jobs
-
-This directory contains the specific definition of aggregate jobs that generate features used by the Heavy Ranker. 
-The primary files of interest are [`TimelinesAggregationConfigDetails.scala`](TimelinesAggregationConfigDetails.scala), which contains the defintion for the batch aggregate jobs and [`real_time/TimelinesOnlineAggregationConfigBase.scala`](real_time/TimelinesOnlineAggregationConfigBase.scala) which contains the definitions for the real time aggregate jobs. 
-
-The aggregation framework that these jobs are based on is [here](../../../../../../../../timelines/data_processing/ml_util/aggregation_framework).
\ No newline at end of file
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/RecapUserFeatureAggregation.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/RecapUserFeatureAggregation.docx
new file mode 100644
index 000000000..c0fd7392d
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/RecapUserFeatureAggregation.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/RecapUserFeatureAggregation.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/RecapUserFeatureAggregation.scala
deleted file mode 100644
index 657d5a713..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/RecapUserFeatureAggregation.scala
+++ /dev/null
@@ -1,415 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates
-
-import com.twitter.ml.api.Feature
-import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures
-import com.twitter.timelines.prediction.features.engagement_features.EngagementDataRecordFeatures
-import com.twitter.timelines.prediction.features.real_graph.RealGraphDataRecordFeatures
-import com.twitter.timelines.prediction.features.recap.RecapFeatures
-import com.twitter.timelines.prediction.features.time_features.TimeDataRecordFeatures
-
-object RecapUserFeatureAggregation {
-  val RecapFeaturesForAggregation: Set[Feature[_]] =
-    Set(
-      RecapFeatures.HAS_IMAGE,
-      RecapFeatures.HAS_VIDEO,
-      RecapFeatures.FROM_MUTUAL_FOLLOW,
-      RecapFeatures.HAS_CARD,
-      RecapFeatures.HAS_NEWS,
-      RecapFeatures.REPLY_COUNT,
-      RecapFeatures.FAV_COUNT,
-      RecapFeatures.RETWEET_COUNT,
-      RecapFeatures.BLENDER_SCORE,
-      RecapFeatures.CONVERSATIONAL_COUNT,
-      RecapFeatures.IS_BUSINESS_SCORE,
-      RecapFeatures.CONTAINS_MEDIA,
-      RecapFeatures.RETWEET_SEARCHER,
-      RecapFeatures.REPLY_SEARCHER,
-      RecapFeatures.MENTION_SEARCHER,
-      RecapFeatures.REPLY_OTHER,
-      RecapFeatures.RETWEET_OTHER,
-      RecapFeatures.MATCH_UI_LANG,
-      RecapFeatures.MATCH_SEARCHER_MAIN_LANG,
-      RecapFeatures.MATCH_SEARCHER_LANGS,
-      RecapFeatures.TWEET_COUNT_FROM_USER_IN_SNAPSHOT,
-      RecapFeatures.TEXT_SCORE,
-      RealGraphDataRecordFeatures.NUM_RETWEETS_EWMA,
-      RealGraphDataRecordFeatures.NUM_RETWEETS_NON_ZERO_DAYS,
-      RealGraphDataRecordFeatures.NUM_RETWEETS_ELAPSED_DAYS,
-      RealGraphDataRecordFeatures.NUM_RETWEETS_DAYS_SINCE_LAST,
-      RealGraphDataRecordFeatures.NUM_FAVORITES_EWMA,
-      RealGraphDataRecordFeatures.NUM_FAVORITES_NON_ZERO_DAYS,
-      RealGraphDataRecordFeatures.NUM_FAVORITES_ELAPSED_DAYS,
-      RealGraphDataRecordFeatures.NUM_FAVORITES_DAYS_SINCE_LAST,
-      RealGraphDataRecordFeatures.NUM_MENTIONS_EWMA,
-      RealGraphDataRecordFeatures.NUM_MENTIONS_NON_ZERO_DAYS,
-      RealGraphDataRecordFeatures.NUM_MENTIONS_ELAPSED_DAYS,
-      RealGraphDataRecordFeatures.NUM_MENTIONS_DAYS_SINCE_LAST,
-      RealGraphDataRecordFeatures.NUM_TWEET_CLICKS_EWMA,
-      RealGraphDataRecordFeatures.NUM_TWEET_CLICKS_NON_ZERO_DAYS,
-      RealGraphDataRecordFeatures.NUM_TWEET_CLICKS_ELAPSED_DAYS,
-      RealGraphDataRecordFeatures.NUM_TWEET_CLICKS_DAYS_SINCE_LAST,
-      RealGraphDataRecordFeatures.NUM_PROFILE_VIEWS_EWMA,
-      RealGraphDataRecordFeatures.NUM_PROFILE_VIEWS_NON_ZERO_DAYS,
-      RealGraphDataRecordFeatures.NUM_PROFILE_VIEWS_ELAPSED_DAYS,
-      RealGraphDataRecordFeatures.NUM_PROFILE_VIEWS_DAYS_SINCE_LAST,
-      RealGraphDataRecordFeatures.TOTAL_DWELL_TIME_EWMA,
-      RealGraphDataRecordFeatures.TOTAL_DWELL_TIME_NON_ZERO_DAYS,
-      RealGraphDataRecordFeatures.TOTAL_DWELL_TIME_ELAPSED_DAYS,
-      RealGraphDataRecordFeatures.TOTAL_DWELL_TIME_DAYS_SINCE_LAST,
-      RealGraphDataRecordFeatures.NUM_INSPECTED_TWEETS_EWMA,
-      RealGraphDataRecordFeatures.NUM_INSPECTED_TWEETS_NON_ZERO_DAYS,
-      RealGraphDataRecordFeatures.NUM_INSPECTED_TWEETS_ELAPSED_DAYS,
-      RealGraphDataRecordFeatures.NUM_INSPECTED_TWEETS_DAYS_SINCE_LAST
-    )
-
-  val RecapLabelsForAggregation: Set[Feature.Binary] =
-    Set(
-      RecapFeatures.IS_FAVORITED,
-      RecapFeatures.IS_RETWEETED,
-      RecapFeatures.IS_CLICKED,
-      RecapFeatures.IS_PROFILE_CLICKED,
-      RecapFeatures.IS_OPEN_LINKED
-    )
-
-  val DwellDuration: Set[Feature[_]] =
-    Set(
-      TimelinesSharedFeatures.DWELL_TIME_MS,
-    )
-
-  val UserFeaturesV2: Set[Feature[_]] = RecapFeaturesForAggregation ++ Set(
-    RecapFeatures.HAS_VINE,
-    RecapFeatures.HAS_PERISCOPE,
-    RecapFeatures.HAS_PRO_VIDEO,
-    RecapFeatures.HAS_VISIBLE_LINK,
-    RecapFeatures.BIDIRECTIONAL_FAV_COUNT,
-    RecapFeatures.UNIDIRECTIONAL_FAV_COUNT,
-    RecapFeatures.BIDIRECTIONAL_REPLY_COUNT,
-    RecapFeatures.UNIDIRECTIONAL_REPLY_COUNT,
-    RecapFeatures.BIDIRECTIONAL_RETWEET_COUNT,
-    RecapFeatures.UNIDIRECTIONAL_RETWEET_COUNT,
-    RecapFeatures.EMBEDS_URL_COUNT,
-    RecapFeatures.EMBEDS_IMPRESSION_COUNT,
-    RecapFeatures.VIDEO_VIEW_COUNT,
-    RecapFeatures.IS_RETWEET,
-    RecapFeatures.IS_REPLY,
-    RecapFeatures.IS_EXTENDED_REPLY,
-    RecapFeatures.HAS_LINK,
-    RecapFeatures.HAS_TREND,
-    RecapFeatures.LINK_LANGUAGE,
-    RecapFeatures.NUM_HASHTAGS,
-    RecapFeatures.NUM_MENTIONS,
-    RecapFeatures.IS_SENSITIVE,
-    RecapFeatures.HAS_MULTIPLE_MEDIA,
-    RecapFeatures.USER_REP,
-    RecapFeatures.FAV_COUNT_V2,
-    RecapFeatures.RETWEET_COUNT_V2,
-    RecapFeatures.REPLY_COUNT_V2,
-    RecapFeatures.LINK_COUNT,
-    EngagementDataRecordFeatures.InNetworkFavoritesCount,
-    EngagementDataRecordFeatures.InNetworkRetweetsCount,
-    EngagementDataRecordFeatures.InNetworkRepliesCount
-  )
-
-  val UserAuthorFeaturesV2: Set[Feature[_]] = Set(
-    RecapFeatures.HAS_IMAGE,
-    RecapFeatures.HAS_VINE,
-    RecapFeatures.HAS_PERISCOPE,
-    RecapFeatures.HAS_PRO_VIDEO,
-    RecapFeatures.HAS_VIDEO,
-    RecapFeatures.HAS_CARD,
-    RecapFeatures.HAS_NEWS,
-    RecapFeatures.HAS_VISIBLE_LINK,
-    RecapFeatures.REPLY_COUNT,
-    RecapFeatures.FAV_COUNT,
-    RecapFeatures.RETWEET_COUNT,
-    RecapFeatures.BLENDER_SCORE,
-    RecapFeatures.CONVERSATIONAL_COUNT,
-    RecapFeatures.IS_BUSINESS_SCORE,
-    RecapFeatures.CONTAINS_MEDIA,
-    RecapFeatures.RETWEET_SEARCHER,
-    RecapFeatures.REPLY_SEARCHER,
-    RecapFeatures.MENTION_SEARCHER,
-    RecapFeatures.REPLY_OTHER,
-    RecapFeatures.RETWEET_OTHER,
-    RecapFeatures.MATCH_UI_LANG,
-    RecapFeatures.MATCH_SEARCHER_MAIN_LANG,
-    RecapFeatures.MATCH_SEARCHER_LANGS,
-    RecapFeatures.TWEET_COUNT_FROM_USER_IN_SNAPSHOT,
-    RecapFeatures.TEXT_SCORE,
-    RecapFeatures.BIDIRECTIONAL_FAV_COUNT,
-    RecapFeatures.UNIDIRECTIONAL_FAV_COUNT,
-    RecapFeatures.BIDIRECTIONAL_REPLY_COUNT,
-    RecapFeatures.UNIDIRECTIONAL_REPLY_COUNT,
-    RecapFeatures.BIDIRECTIONAL_RETWEET_COUNT,
-    RecapFeatures.UNIDIRECTIONAL_RETWEET_COUNT,
-    RecapFeatures.EMBEDS_URL_COUNT,
-    RecapFeatures.EMBEDS_IMPRESSION_COUNT,
-    RecapFeatures.VIDEO_VIEW_COUNT,
-    RecapFeatures.IS_RETWEET,
-    RecapFeatures.IS_REPLY,
-    RecapFeatures.HAS_LINK,
-    RecapFeatures.HAS_TREND,
-    RecapFeatures.LINK_LANGUAGE,
-    RecapFeatures.NUM_HASHTAGS,
-    RecapFeatures.NUM_MENTIONS,
-    RecapFeatures.IS_SENSITIVE,
-    RecapFeatures.HAS_MULTIPLE_MEDIA,
-    RecapFeatures.FAV_COUNT_V2,
-    RecapFeatures.RETWEET_COUNT_V2,
-    RecapFeatures.REPLY_COUNT_V2,
-    RecapFeatures.LINK_COUNT,
-    EngagementDataRecordFeatures.InNetworkFavoritesCount,
-    EngagementDataRecordFeatures.InNetworkRetweetsCount,
-    EngagementDataRecordFeatures.InNetworkRepliesCount
-  )
-
-  val UserAuthorFeaturesV2Count: Set[Feature[_]] = Set(
-    RecapFeatures.HAS_IMAGE,
-    RecapFeatures.HAS_VINE,
-    RecapFeatures.HAS_PERISCOPE,
-    RecapFeatures.HAS_PRO_VIDEO,
-    RecapFeatures.HAS_VIDEO,
-    RecapFeatures.HAS_CARD,
-    RecapFeatures.HAS_NEWS,
-    RecapFeatures.HAS_VISIBLE_LINK,
-    RecapFeatures.FAV_COUNT,
-    RecapFeatures.CONTAINS_MEDIA,
-    RecapFeatures.RETWEET_SEARCHER,
-    RecapFeatures.REPLY_SEARCHER,
-    RecapFeatures.MENTION_SEARCHER,
-    RecapFeatures.REPLY_OTHER,
-    RecapFeatures.RETWEET_OTHER,
-    RecapFeatures.MATCH_UI_LANG,
-    RecapFeatures.MATCH_SEARCHER_MAIN_LANG,
-    RecapFeatures.MATCH_SEARCHER_LANGS,
-    RecapFeatures.IS_RETWEET,
-    RecapFeatures.IS_REPLY,
-    RecapFeatures.HAS_LINK,
-    RecapFeatures.HAS_TREND,
-    RecapFeatures.IS_SENSITIVE,
-    RecapFeatures.HAS_MULTIPLE_MEDIA,
-    EngagementDataRecordFeatures.InNetworkFavoritesCount
-  )
-
-  val UserTopicFeaturesV2Count: Set[Feature[_]] = Set(
-    RecapFeatures.HAS_IMAGE,
-    RecapFeatures.HAS_VIDEO,
-    RecapFeatures.HAS_CARD,
-    RecapFeatures.HAS_NEWS,
-    RecapFeatures.FAV_COUNT,
-    RecapFeatures.CONTAINS_MEDIA,
-    RecapFeatures.RETWEET_SEARCHER,
-    RecapFeatures.REPLY_SEARCHER,
-    RecapFeatures.MENTION_SEARCHER,
-    RecapFeatures.REPLY_OTHER,
-    RecapFeatures.RETWEET_OTHER,
-    RecapFeatures.MATCH_UI_LANG,
-    RecapFeatures.MATCH_SEARCHER_MAIN_LANG,
-    RecapFeatures.MATCH_SEARCHER_LANGS,
-    RecapFeatures.IS_RETWEET,
-    RecapFeatures.IS_REPLY,
-    RecapFeatures.HAS_LINK,
-    RecapFeatures.HAS_TREND,
-    RecapFeatures.IS_SENSITIVE,
-    EngagementDataRecordFeatures.InNetworkFavoritesCount,
-    EngagementDataRecordFeatures.InNetworkRetweetsCount,
-    TimelinesSharedFeatures.NUM_CAPS,
-    TimelinesSharedFeatures.ASPECT_RATIO_DEN,
-    TimelinesSharedFeatures.NUM_NEWLINES,
-    TimelinesSharedFeatures.IS_360,
-    TimelinesSharedFeatures.IS_MANAGED,
-    TimelinesSharedFeatures.IS_MONETIZABLE,
-    TimelinesSharedFeatures.HAS_SELECTED_PREVIEW_IMAGE,
-    TimelinesSharedFeatures.HAS_TITLE,
-    TimelinesSharedFeatures.HAS_DESCRIPTION,
-    TimelinesSharedFeatures.HAS_VISIT_SITE_CALL_TO_ACTION,
-    TimelinesSharedFeatures.HAS_WATCH_NOW_CALL_TO_ACTION
-  )
-
-  val UserFeaturesV5Continuous: Set[Feature[_]] = Set(
-    TimelinesSharedFeatures.QUOTE_COUNT,
-    TimelinesSharedFeatures.VISIBLE_TOKEN_RATIO,
-    TimelinesSharedFeatures.WEIGHTED_FAV_COUNT,
-    TimelinesSharedFeatures.WEIGHTED_RETWEET_COUNT,
-    TimelinesSharedFeatures.WEIGHTED_REPLY_COUNT,
-    TimelinesSharedFeatures.WEIGHTED_QUOTE_COUNT,
-    TimelinesSharedFeatures.EMBEDS_IMPRESSION_COUNT_V2,
-    TimelinesSharedFeatures.EMBEDS_URL_COUNT_V2,
-    TimelinesSharedFeatures.DECAYED_FAVORITE_COUNT,
-    TimelinesSharedFeatures.DECAYED_RETWEET_COUNT,
-    TimelinesSharedFeatures.DECAYED_REPLY_COUNT,
-    TimelinesSharedFeatures.DECAYED_QUOTE_COUNT,
-    TimelinesSharedFeatures.FAKE_FAVORITE_COUNT,
-    TimelinesSharedFeatures.FAKE_RETWEET_COUNT,
-    TimelinesSharedFeatures.FAKE_REPLY_COUNT,
-    TimelinesSharedFeatures.FAKE_QUOTE_COUNT,
-    TimeDataRecordFeatures.LAST_FAVORITE_SINCE_CREATION_HRS,
-    TimeDataRecordFeatures.LAST_RETWEET_SINCE_CREATION_HRS,
-    TimeDataRecordFeatures.LAST_REPLY_SINCE_CREATION_HRS,
-    TimeDataRecordFeatures.LAST_QUOTE_SINCE_CREATION_HRS,
-    TimeDataRecordFeatures.TIME_SINCE_LAST_FAVORITE_HRS,
-    TimeDataRecordFeatures.TIME_SINCE_LAST_RETWEET_HRS,
-    TimeDataRecordFeatures.TIME_SINCE_LAST_REPLY_HRS,
-    TimeDataRecordFeatures.TIME_SINCE_LAST_QUOTE_HRS
-  )
-
-  val UserFeaturesV5Boolean: Set[Feature[_]] = Set(
-    TimelinesSharedFeatures.LABEL_ABUSIVE_FLAG,
-    TimelinesSharedFeatures.LABEL_ABUSIVE_HI_RCL_FLAG,
-    TimelinesSharedFeatures.LABEL_DUP_CONTENT_FLAG,
-    TimelinesSharedFeatures.LABEL_NSFW_HI_PRC_FLAG,
-    TimelinesSharedFeatures.LABEL_NSFW_HI_RCL_FLAG,
-    TimelinesSharedFeatures.LABEL_SPAM_FLAG,
-    TimelinesSharedFeatures.LABEL_SPAM_HI_RCL_FLAG,
-    TimelinesSharedFeatures.PERISCOPE_EXISTS,
-    TimelinesSharedFeatures.PERISCOPE_IS_LIVE,
-    TimelinesSharedFeatures.PERISCOPE_HAS_BEEN_FEATURED,
-    TimelinesSharedFeatures.PERISCOPE_IS_CURRENTLY_FEATURED,
-    TimelinesSharedFeatures.PERISCOPE_IS_FROM_QUALITY_SOURCE,
-    TimelinesSharedFeatures.HAS_QUOTE
-  )
-
-  val UserAuthorFeaturesV5: Set[Feature[_]] = Set(
-    TimelinesSharedFeatures.HAS_QUOTE,
-    TimelinesSharedFeatures.LABEL_ABUSIVE_FLAG,
-    TimelinesSharedFeatures.LABEL_ABUSIVE_HI_RCL_FLAG,
-    TimelinesSharedFeatures.LABEL_DUP_CONTENT_FLAG,
-    TimelinesSharedFeatures.LABEL_NSFW_HI_PRC_FLAG,
-    TimelinesSharedFeatures.LABEL_NSFW_HI_RCL_FLAG,
-    TimelinesSharedFeatures.LABEL_SPAM_FLAG,
-    TimelinesSharedFeatures.LABEL_SPAM_HI_RCL_FLAG
-  )
-
-  val UserTweetSourceFeaturesV1Continuous: Set[Feature[_]] = Set(
-    TimelinesSharedFeatures.NUM_CAPS,
-    TimelinesSharedFeatures.NUM_WHITESPACES,
-    TimelinesSharedFeatures.TWEET_LENGTH,
-    TimelinesSharedFeatures.ASPECT_RATIO_DEN,
-    TimelinesSharedFeatures.ASPECT_RATIO_NUM,
-    TimelinesSharedFeatures.BIT_RATE,
-    TimelinesSharedFeatures.HEIGHT_1,
-    TimelinesSharedFeatures.HEIGHT_2,
-    TimelinesSharedFeatures.HEIGHT_3,
-    TimelinesSharedFeatures.HEIGHT_4,
-    TimelinesSharedFeatures.VIDEO_DURATION,
-    TimelinesSharedFeatures.WIDTH_1,
-    TimelinesSharedFeatures.WIDTH_2,
-    TimelinesSharedFeatures.WIDTH_3,
-    TimelinesSharedFeatures.WIDTH_4,
-    TimelinesSharedFeatures.NUM_MEDIA_TAGS
-  )
-
-  val UserTweetSourceFeaturesV1Boolean: Set[Feature[_]] = Set(
-    TimelinesSharedFeatures.HAS_QUESTION,
-    TimelinesSharedFeatures.RESIZE_METHOD_1,
-    TimelinesSharedFeatures.RESIZE_METHOD_2,
-    TimelinesSharedFeatures.RESIZE_METHOD_3,
-    TimelinesSharedFeatures.RESIZE_METHOD_4
-  )
-
-  val UserTweetSourceFeaturesV2Continuous: Set[Feature[_]] = Set(
-    TimelinesSharedFeatures.NUM_EMOJIS,
-    TimelinesSharedFeatures.NUM_EMOTICONS,
-    TimelinesSharedFeatures.NUM_NEWLINES,
-    TimelinesSharedFeatures.NUM_STICKERS,
-    TimelinesSharedFeatures.NUM_FACES,
-    TimelinesSharedFeatures.NUM_COLOR_PALLETTE_ITEMS,
-    TimelinesSharedFeatures.VIEW_COUNT,
-    TimelinesSharedFeatures.TWEET_LENGTH_TYPE
-  )
-
-  val UserTweetSourceFeaturesV2Boolean: Set[Feature[_]] = Set(
-    TimelinesSharedFeatures.IS_360,
-    TimelinesSharedFeatures.IS_MANAGED,
-    TimelinesSharedFeatures.IS_MONETIZABLE,
-    TimelinesSharedFeatures.IS_EMBEDDABLE,
-    TimelinesSharedFeatures.HAS_SELECTED_PREVIEW_IMAGE,
-    TimelinesSharedFeatures.HAS_TITLE,
-    TimelinesSharedFeatures.HAS_DESCRIPTION,
-    TimelinesSharedFeatures.HAS_VISIT_SITE_CALL_TO_ACTION,
-    TimelinesSharedFeatures.HAS_WATCH_NOW_CALL_TO_ACTION
-  )
-
-  val UserAuthorTweetSourceFeaturesV1: Set[Feature[_]] = Set(
-    TimelinesSharedFeatures.HAS_QUESTION,
-    TimelinesSharedFeatures.TWEET_LENGTH,
-    TimelinesSharedFeatures.VIDEO_DURATION,
-    TimelinesSharedFeatures.NUM_MEDIA_TAGS
-  )
-
-  val UserAuthorTweetSourceFeaturesV2: Set[Feature[_]] = Set(
-    TimelinesSharedFeatures.NUM_CAPS,
-    TimelinesSharedFeatures.NUM_WHITESPACES,
-    TimelinesSharedFeatures.ASPECT_RATIO_DEN,
-    TimelinesSharedFeatures.ASPECT_RATIO_NUM,
-    TimelinesSharedFeatures.BIT_RATE,
-    TimelinesSharedFeatures.TWEET_LENGTH_TYPE,
-    TimelinesSharedFeatures.NUM_EMOJIS,
-    TimelinesSharedFeatures.NUM_EMOTICONS,
-    TimelinesSharedFeatures.NUM_NEWLINES,
-    TimelinesSharedFeatures.NUM_STICKERS,
-    TimelinesSharedFeatures.NUM_FACES,
-    TimelinesSharedFeatures.IS_360,
-    TimelinesSharedFeatures.IS_MANAGED,
-    TimelinesSharedFeatures.IS_MONETIZABLE,
-    TimelinesSharedFeatures.HAS_SELECTED_PREVIEW_IMAGE,
-    TimelinesSharedFeatures.HAS_TITLE,
-    TimelinesSharedFeatures.HAS_DESCRIPTION,
-    TimelinesSharedFeatures.HAS_VISIT_SITE_CALL_TO_ACTION,
-    TimelinesSharedFeatures.HAS_WATCH_NOW_CALL_TO_ACTION
-  )
-
-  val UserAuthorTweetSourceFeaturesV2Count: Set[Feature[_]] = Set(
-    TimelinesSharedFeatures.NUM_CAPS,
-    TimelinesSharedFeatures.ASPECT_RATIO_DEN,
-    TimelinesSharedFeatures.NUM_NEWLINES,
-    TimelinesSharedFeatures.IS_360,
-    TimelinesSharedFeatures.IS_MANAGED,
-    TimelinesSharedFeatures.IS_MONETIZABLE,
-    TimelinesSharedFeatures.HAS_SELECTED_PREVIEW_IMAGE,
-    TimelinesSharedFeatures.HAS_TITLE,
-    TimelinesSharedFeatures.HAS_DESCRIPTION,
-    TimelinesSharedFeatures.HAS_VISIT_SITE_CALL_TO_ACTION,
-    TimelinesSharedFeatures.HAS_WATCH_NOW_CALL_TO_ACTION
-  )
-
-  val LabelsV2: Set[Feature.Binary] = RecapLabelsForAggregation ++ Set(
-    RecapFeatures.IS_REPLIED,
-    RecapFeatures.IS_PHOTO_EXPANDED,
-    RecapFeatures.IS_VIDEO_PLAYBACK_50
-  )
-
-  val TwitterWideFeatures: Set[Feature[_]] = Set(
-    RecapFeatures.IS_REPLY,
-    TimelinesSharedFeatures.HAS_QUOTE,
-    RecapFeatures.HAS_MENTION,
-    RecapFeatures.HAS_HASHTAG,
-    RecapFeatures.HAS_LINK,
-    RecapFeatures.HAS_CARD,
-    RecapFeatures.CONTAINS_MEDIA
-  )
-
-  val TwitterWideLabels: Set[Feature.Binary] = Set(
-    RecapFeatures.IS_FAVORITED,
-    RecapFeatures.IS_RETWEETED,
-    RecapFeatures.IS_REPLIED
-  )
-
-  val ReciprocalLabels: Set[Feature.Binary] = Set(
-    RecapFeatures.IS_REPLIED_REPLY_IMPRESSED_BY_AUTHOR,
-    RecapFeatures.IS_REPLIED_REPLY_REPLIED_BY_AUTHOR,
-    RecapFeatures.IS_REPLIED_REPLY_FAVORITED_BY_AUTHOR
-  )
-
-  val NegativeEngagementLabels: Set[Feature.Binary] = Set(
-    RecapFeatures.IS_REPORT_TWEET_CLICKED,
-    RecapFeatures.IS_BLOCK_CLICKED,
-    RecapFeatures.IS_MUTE_CLICKED,
-    RecapFeatures.IS_DONT_LIKE
-  )
-
-  val GoodClickLabels: Set[Feature.Binary] = Set(
-    RecapFeatures.IS_GOOD_CLICKED_CONVO_DESC_V1,
-    RecapFeatures.IS_GOOD_CLICKED_CONVO_DESC_V2,
-  )
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/RectweetUserFeatureAggregation.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/RectweetUserFeatureAggregation.docx
new file mode 100644
index 000000000..9e9299e6a
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/RectweetUserFeatureAggregation.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/RectweetUserFeatureAggregation.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/RectweetUserFeatureAggregation.scala
deleted file mode 100644
index 12835ef1f..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/RectweetUserFeatureAggregation.scala
+++ /dev/null
@@ -1,52 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates
-
-import com.twitter.ml.api.Feature
-import com.twitter.timelines.prediction.features.engagement_features.EngagementDataRecordFeatures
-import com.twitter.timelines.prediction.features.itl.ITLFeatures
-
-object RectweetUserFeatureAggregation {
-  val RectweetLabelsForAggregation: Set[Feature.Binary] =
-    Set(
-      ITLFeatures.IS_FAVORITED,
-      ITLFeatures.IS_RETWEETED,
-      ITLFeatures.IS_REPLIED,
-      ITLFeatures.IS_CLICKED,
-      ITLFeatures.IS_PROFILE_CLICKED,
-      ITLFeatures.IS_OPEN_LINKED,
-      ITLFeatures.IS_PHOTO_EXPANDED,
-      ITLFeatures.IS_VIDEO_PLAYBACK_50
-    )
-
-  val TweetFeatures: Set[Feature[_]] = Set(
-    ITLFeatures.HAS_IMAGE,
-    ITLFeatures.HAS_CARD,
-    ITLFeatures.HAS_NEWS,
-    ITLFeatures.REPLY_COUNT,
-    ITLFeatures.FAV_COUNT,
-    ITLFeatures.REPLY_COUNT,
-    ITLFeatures.RETWEET_COUNT,
-    ITLFeatures.MATCHES_UI_LANG,
-    ITLFeatures.MATCHES_SEARCHER_MAIN_LANG,
-    ITLFeatures.MATCHES_SEARCHER_LANGS,
-    ITLFeatures.TEXT_SCORE,
-    ITLFeatures.LINK_LANGUAGE,
-    ITLFeatures.NUM_HASHTAGS,
-    ITLFeatures.NUM_MENTIONS,
-    ITLFeatures.IS_SENSITIVE,
-    ITLFeatures.HAS_VIDEO,
-    ITLFeatures.HAS_LINK,
-    ITLFeatures.HAS_VISIBLE_LINK,
-    EngagementDataRecordFeatures.InNetworkFavoritesCount
-    // nice to have, but currently not hydrated in the RecommendedTweet payload
-    //EngagementDataRecordFeatures.InNetworkRetweetsCount,
-    //EngagementDataRecordFeatures.InNetworkRepliesCount
-  )
-
-  val ReciprocalLabels: Set[Feature.Binary] = Set(
-    ITLFeatures.IS_REPLIED_REPLY_IMPRESSED_BY_AUTHOR,
-    ITLFeatures.IS_REPLIED_REPLY_REPLIED_BY_AUTHOR,
-    ITLFeatures.IS_REPLIED_REPLY_FAVORITED_BY_AUTHOR,
-    ITLFeatures.IS_REPLIED_REPLY_RETWEETED_BY_AUTHOR,
-    ITLFeatures.IS_REPLIED_REPLY_QUOTED_BY_AUTHOR
-  )
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfig.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfig.docx
new file mode 100644
index 000000000..a8e2690cc
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfig.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfig.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfig.scala
deleted file mode 100644
index e6581e32e..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfig.scala
+++ /dev/null
@@ -1,80 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates
-
-import com.twitter.dal.client.dataset.KeyValDALDataset
-import com.twitter.ml.api.DataRecord
-import com.twitter.ml.api.FeatureContext
-import com.twitter.scalding_internal.multiformat.format.keyval
-import com.twitter.summingbird.batch.BatchID
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.conversion.CombineCountsPolicy
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregateStore
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.OfflineAggregateDataRecordStore
-import scala.collection.JavaConverters._
-
-object TimelinesAggregationConfig extends TimelinesAggregationConfigTrait {
-  override def outputHdfsPath: String = "/user/timelines/processed/aggregates_v2"
-
-  def storeToDatasetMap: Map[String, KeyValDALDataset[
-    keyval.KeyVal[AggregationKey, (BatchID, DataRecord)]
-  ]] = Map(
-    AuthorTopicAggregateStore -> AuthorTopicAggregatesScalaDataset,
-    UserTopicAggregateStore -> UserTopicAggregatesScalaDataset,
-    UserInferredTopicAggregateStore -> UserInferredTopicAggregatesScalaDataset,
-    UserAggregateStore -> UserAggregatesScalaDataset,
-    UserAuthorAggregateStore -> UserAuthorAggregatesScalaDataset,
-    UserOriginalAuthorAggregateStore -> UserOriginalAuthorAggregatesScalaDataset,
-    OriginalAuthorAggregateStore -> OriginalAuthorAggregatesScalaDataset,
-    UserEngagerAggregateStore -> UserEngagerAggregatesScalaDataset,
-    UserMentionAggregateStore -> UserMentionAggregatesScalaDataset,
-    TwitterWideUserAggregateStore -> TwitterWideUserAggregatesScalaDataset,
-    TwitterWideUserAuthorAggregateStore -> TwitterWideUserAuthorAggregatesScalaDataset,
-    UserRequestHourAggregateStore -> UserRequestHourAggregatesScalaDataset,
-    UserRequestDowAggregateStore -> UserRequestDowAggregatesScalaDataset,
-    UserListAggregateStore -> UserListAggregatesScalaDataset,
-    UserMediaUnderstandingAnnotationAggregateStore -> UserMediaUnderstandingAnnotationAggregatesScalaDataset,
-  )
-
-  override def mkPhysicalStore(store: AggregateStore): AggregateStore = store match {
-    case s: OfflineAggregateDataRecordStore =>
-      s.toOfflineAggregateDataRecordStoreWithDAL(storeToDatasetMap(s.name))
-    case _ => throw new IllegalArgumentException("Unsupported logical dataset type.")
-  }
-
-  object CombineCountPolicies {
-    val EngagerCountsPolicy: CombineCountsPolicy = mkCountsPolicy("user_engager_aggregate")
-    val EngagerGoodClickCountsPolicy: CombineCountsPolicy = mkCountsPolicy(
-      "user_engager_good_click_aggregate")
-    val RectweetEngagerCountsPolicy: CombineCountsPolicy =
-      mkCountsPolicy("rectweet_user_engager_aggregate")
-    val MentionCountsPolicy: CombineCountsPolicy = mkCountsPolicy("user_mention_aggregate")
-    val RectweetSimclustersTweetCountsPolicy: CombineCountsPolicy =
-      mkCountsPolicy("rectweet_user_simcluster_tweet_aggregate")
-    val UserInferredTopicCountsPolicy: CombineCountsPolicy =
-      mkCountsPolicy("user_inferred_topic_aggregate")
-    val UserInferredTopicV2CountsPolicy: CombineCountsPolicy =
-      mkCountsPolicy("user_inferred_topic_aggregate_v2")
-    val UserMediaUnderstandingAnnotationCountsPolicy: CombineCountsPolicy =
-      mkCountsPolicy("user_media_annotation_aggregate")
-
-    private[this] def mkCountsPolicy(prefix: String): CombineCountsPolicy = {
-      val features = TimelinesAggregationConfig.aggregatesToCompute
-        .filter(_.aggregatePrefix == prefix)
-        .flatMap(_.allOutputFeatures)
-      CombineCountsPolicy(
-        topK = 2,
-        aggregateContextToPrecompute = new FeatureContext(features.asJava),
-        hardLimit = Some(20)
-      )
-    }
-  }
-}
-
-object TimelinesAggregationCanaryConfig extends TimelinesAggregationConfigTrait {
-  override def outputHdfsPath: String = "/user/timelines/canaries/processed/aggregates_v2"
-
-  override def mkPhysicalStore(store: AggregateStore): AggregateStore = store match {
-    case s: OfflineAggregateDataRecordStore =>
-      s.toOfflineAggregateDataRecordStoreWithDAL(dalDataset = AggregatesCanaryScalaDataset)
-    case _ => throw new IllegalArgumentException("Unsupported logical dataset type.")
-  }
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfigDetails.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfigDetails.docx
new file mode 100644
index 000000000..1e37e61cf
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfigDetails.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfigDetails.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfigDetails.scala
deleted file mode 100644
index aa439deda..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfigDetails.scala
+++ /dev/null
@@ -1,579 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates
-
-import com.twitter.conversions.DurationOps._
-import com.twitter.ml.api.constant.SharedFeatures.AUTHOR_ID
-import com.twitter.ml.api.constant.SharedFeatures.USER_ID
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework._
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics._
-import com.twitter.timelines.data_processing.ml_util.transforms.DownsampleTransform
-import com.twitter.timelines.data_processing.ml_util.transforms.RichRemoveAuthorIdZero
-import com.twitter.timelines.data_processing.ml_util.transforms.RichRemoveUserIdZero
-import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures
-import com.twitter.timelines.prediction.features.engagement_features.EngagementDataRecordFeatures
-import com.twitter.timelines.prediction.features.engagement_features.EngagementDataRecordFeatures.RichUnifyPublicEngagersTransform
-import com.twitter.timelines.prediction.features.list_features.ListFeatures
-import com.twitter.timelines.prediction.features.recap.RecapFeatures
-import com.twitter.timelines.prediction.features.request_context.RequestContextFeatures
-import com.twitter.timelines.prediction.features.semantic_core_features.SemanticCoreFeatures
-import com.twitter.timelines.prediction.transform.filter.FilterInNetworkTransform
-import com.twitter.timelines.prediction.transform.filter.FilterImageTweetTransform
-import com.twitter.timelines.prediction.transform.filter.FilterVideoTweetTransform
-import com.twitter.timelines.prediction.transform.filter.FilterOutImageVideoTweetTransform
-import com.twitter.util.Duration
-
-trait TimelinesAggregationConfigDetails extends Serializable {
-
-  import TimelinesAggregationSources._
-
-  def outputHdfsPath: String
-
-  /**
-   * Converts the given logical store to a physical store. The reason we do not specify the
-   * physical store directly with the [[AggregateGroup]] is because of a cyclic dependency when
-   * create physical stores that are DalDataset with PersonalDataType annotations derived from
-   * the [[AggregateGroup]].
-   *
-   */
-  def mkPhysicalStore(store: AggregateStore): AggregateStore
-
-  def defaultMaxKvSourceFailures: Int = 100
-
-  val timelinesOfflineAggregateSink = new OfflineStoreCommonConfig {
-    override def apply(startDate: String) = OfflineAggregateStoreCommonConfig(
-      outputHdfsPathPrefix = outputHdfsPath,
-      dummyAppId = "timelines_aggregates_v2_ro",
-      dummyDatasetPrefix = "timelines_aggregates_v2_ro",
-      startDate = startDate
-    )
-  }
-
-  val UserAggregateStore = "user_aggregates"
-  val UserAuthorAggregateStore = "user_author_aggregates"
-  val UserOriginalAuthorAggregateStore = "user_original_author_aggregates"
-  val OriginalAuthorAggregateStore = "original_author_aggregates"
-  val UserEngagerAggregateStore = "user_engager_aggregates"
-  val UserMentionAggregateStore = "user_mention_aggregates"
-  val TwitterWideUserAggregateStore = "twitter_wide_user_aggregates"
-  val TwitterWideUserAuthorAggregateStore = "twitter_wide_user_author_aggregates"
-  val UserRequestHourAggregateStore = "user_request_hour_aggregates"
-  val UserRequestDowAggregateStore = "user_request_dow_aggregates"
-  val UserListAggregateStore = "user_list_aggregates"
-  val AuthorTopicAggregateStore = "author_topic_aggregates"
-  val UserTopicAggregateStore = "user_topic_aggregates"
-  val UserInferredTopicAggregateStore = "user_inferred_topic_aggregates"
-  val UserMediaUnderstandingAnnotationAggregateStore =
-    "user_media_understanding_annotation_aggregates"
-  val AuthorCountryCodeAggregateStore = "author_country_code_aggregates"
-  val OriginalAuthorCountryCodeAggregateStore = "original_author_country_code_aggregates"
-
-  /**
-   * Step 3: Configure all aggregates to compute.
-   * Note that different subsets of aggregates in this list
-   * can be launched by different summingbird job instances.
-   * Any given job can be responsible for a set of AggregateGroup
-   * configs whose outputStores share the same exact startDate.
-   * AggregateGroups that do not share the same inputSource,
-   * outputStore or startDate MUST be launched using different
-   * summingbird jobs and passed in a different --start-time argument
-   * See science/scalding/mesos/timelines/prod.yaml for an example
-   * of how to configure your own job.
-   */
-  val negativeDownsampleTransform =
-    DownsampleTransform(
-      negativeSamplingRate = 0.03,
-      keepLabels = RecapUserFeatureAggregation.LabelsV2)
-  val negativeRecTweetDownsampleTransform = DownsampleTransform(
-    negativeSamplingRate = 0.03,
-    keepLabels = RectweetUserFeatureAggregation.RectweetLabelsForAggregation
-  )
-
-  val userAggregatesV2: AggregateGroup =
-    AggregateGroup(
-      inputSource = timelinesDailyRecapMinimalSource,
-      aggregatePrefix = "user_aggregate_v2",
-      preTransforms = Seq(RichRemoveUserIdZero), /* Eliminates reducer skew */
-      keys = Set(USER_ID),
-      features = RecapUserFeatureAggregation.UserFeaturesV2,
-      labels = RecapUserFeatureAggregation.LabelsV2,
-      metrics = Set(CountMetric, SumMetric),
-      halfLives = Set(50.days),
-      outputStore = mkPhysicalStore(
-        OfflineAggregateDataRecordStore(
-          name = UserAggregateStore,
-          startDate = "2016-07-15 00:00",
-          commonConfig = timelinesOfflineAggregateSink,
-          maxKvSourceFailures = defaultMaxKvSourceFailures
-        ))
-    )
-
-  val userAuthorAggregatesV2: Set[AggregateGroup] = {
-
-    /**
-     * NOTE: We need to remove records from out-of-network authors from the recap input
-     * records (which now include out-of-network records as well after merging recap and
-     * rectweet models) that are used to compute user-author aggregates. This is necessary
-     * to limit the growth rate of user-author aggregates.
-     */
-    val allFeatureAggregates = Set(
-      AggregateGroup(
-        inputSource = timelinesDailyRecapMinimalSource,
-        aggregatePrefix = "user_author_aggregate_v2",
-        preTransforms = Seq(FilterInNetworkTransform, RichRemoveUserIdZero),
-        keys = Set(USER_ID, AUTHOR_ID),
-        features = RecapUserFeatureAggregation.UserAuthorFeaturesV2,
-        labels = RecapUserFeatureAggregation.LabelsV2,
-        metrics = Set(SumMetric),
-        halfLives = Set(50.days),
-        outputStore = mkPhysicalStore(
-          OfflineAggregateDataRecordStore(
-            name = UserAuthorAggregateStore,
-            startDate = "2016-07-15 00:00",
-            commonConfig = timelinesOfflineAggregateSink,
-            maxKvSourceFailures = defaultMaxKvSourceFailures
-          ))
-      )
-    )
-
-    val countAggregates: Set[AggregateGroup] = Set(
-      AggregateGroup(
-        inputSource = timelinesDailyRecapMinimalSource,
-        aggregatePrefix = "user_author_aggregate_v2",
-        preTransforms = Seq(FilterInNetworkTransform, RichRemoveUserIdZero),
-        keys = Set(USER_ID, AUTHOR_ID),
-        features = RecapUserFeatureAggregation.UserAuthorFeaturesV2Count,
-        labels = RecapUserFeatureAggregation.LabelsV2,
-        metrics = Set(CountMetric),
-        halfLives = Set(50.days),
-        outputStore = mkPhysicalStore(
-          OfflineAggregateDataRecordStore(
-            name = UserAuthorAggregateStore,
-            startDate = "2016-07-15 00:00",
-            commonConfig = timelinesOfflineAggregateSink,
-            maxKvSourceFailures = defaultMaxKvSourceFailures
-          ))
-      )
-    )
-
-    allFeatureAggregates ++ countAggregates
-  }
-
-  val userAggregatesV5Continuous: AggregateGroup =
-    AggregateGroup(
-      inputSource = timelinesDailyRecapMinimalSource,
-      aggregatePrefix = "user_aggregate_v5.continuous",
-      preTransforms = Seq(RichRemoveUserIdZero),
-      keys = Set(USER_ID),
-      features = RecapUserFeatureAggregation.UserFeaturesV5Continuous,
-      labels = RecapUserFeatureAggregation.LabelsV2,
-      metrics = Set(CountMetric, SumMetric, SumSqMetric),
-      halfLives = Set(50.days),
-      outputStore = mkPhysicalStore(
-        OfflineAggregateDataRecordStore(
-          name = UserAggregateStore,
-          startDate = "2016-07-15 00:00",
-          commonConfig = timelinesOfflineAggregateSink,
-          maxKvSourceFailures = defaultMaxKvSourceFailures
-        ))
-    )
-
-  val userAuthorAggregatesV5: AggregateGroup =
-    AggregateGroup(
-      inputSource = timelinesDailyRecapMinimalSource,
-      aggregatePrefix = "user_author_aggregate_v5",
-      preTransforms = Seq(FilterInNetworkTransform, RichRemoveUserIdZero),
-      keys = Set(USER_ID, AUTHOR_ID),
-      features = RecapUserFeatureAggregation.UserAuthorFeaturesV5,
-      labels = RecapUserFeatureAggregation.LabelsV2,
-      metrics = Set(CountMetric),
-      halfLives = Set(50.days),
-      outputStore = mkPhysicalStore(
-        OfflineAggregateDataRecordStore(
-          name = UserAuthorAggregateStore,
-          startDate = "2016-07-15 00:00",
-          commonConfig = timelinesOfflineAggregateSink,
-          maxKvSourceFailures = defaultMaxKvSourceFailures
-        ))
-    )
-
-  val tweetSourceUserAuthorAggregatesV1: AggregateGroup =
-    AggregateGroup(
-      inputSource = timelinesDailyRecapMinimalSource,
-      aggregatePrefix = "user_author_aggregate_tweetsource_v1",
-      preTransforms = Seq(FilterInNetworkTransform, RichRemoveUserIdZero),
-      keys = Set(USER_ID, AUTHOR_ID),
-      features = RecapUserFeatureAggregation.UserAuthorTweetSourceFeaturesV1,
-      labels = RecapUserFeatureAggregation.LabelsV2,
-      metrics = Set(CountMetric, SumMetric),
-      halfLives = Set(50.days),
-      outputStore = mkPhysicalStore(
-        OfflineAggregateDataRecordStore(
-          name = UserAuthorAggregateStore,
-          startDate = "2016-07-15 00:00",
-          commonConfig = timelinesOfflineAggregateSink,
-          maxKvSourceFailures = defaultMaxKvSourceFailures
-        ))
-    )
-
-  val userEngagerAggregates = AggregateGroup(
-    inputSource = timelinesDailyRecapMinimalSource,
-    aggregatePrefix = "user_engager_aggregate",
-    keys = Set(USER_ID, EngagementDataRecordFeatures.PublicEngagementUserIds),
-    features = Set.empty,
-    labels = RecapUserFeatureAggregation.LabelsV2,
-    metrics = Set(CountMetric),
-    halfLives = Set(50.days),
-    outputStore = mkPhysicalStore(
-      OfflineAggregateDataRecordStore(
-        name = UserEngagerAggregateStore,
-        startDate = "2016-09-02 00:00",
-        commonConfig = timelinesOfflineAggregateSink,
-        maxKvSourceFailures = defaultMaxKvSourceFailures
-      )),
-    preTransforms = Seq(
-      RichRemoveUserIdZero,
-      RichUnifyPublicEngagersTransform
-    )
-  )
-
-  val userMentionAggregates = AggregateGroup(
-    inputSource = timelinesDailyRecapMinimalSource,
-    preTransforms = Seq(RichRemoveUserIdZero), /* Eliminates reducer skew */
-    aggregatePrefix = "user_mention_aggregate",
-    keys = Set(USER_ID, RecapFeatures.MENTIONED_SCREEN_NAMES),
-    features = Set.empty,
-    labels = RecapUserFeatureAggregation.LabelsV2,
-    metrics = Set(CountMetric),
-    halfLives = Set(50.days),
-    outputStore = mkPhysicalStore(
-      OfflineAggregateDataRecordStore(
-        name = UserMentionAggregateStore,
-        startDate = "2017-03-01 00:00",
-        commonConfig = timelinesOfflineAggregateSink,
-        maxKvSourceFailures = defaultMaxKvSourceFailures
-      )),
-    includeAnyLabel = false
-  )
-
-  val twitterWideUserAggregates = AggregateGroup(
-    inputSource = timelinesDailyTwitterWideSource,
-    preTransforms = Seq(RichRemoveUserIdZero), /* Eliminates reducer skew */
-    aggregatePrefix = "twitter_wide_user_aggregate",
-    keys = Set(USER_ID),
-    features = RecapUserFeatureAggregation.TwitterWideFeatures,
-    labels = RecapUserFeatureAggregation.TwitterWideLabels,
-    metrics = Set(CountMetric, SumMetric),
-    halfLives = Set(50.days),
-    outputStore = mkPhysicalStore(
-      OfflineAggregateDataRecordStore(
-        name = TwitterWideUserAggregateStore,
-        startDate = "2016-12-28 00:00",
-        commonConfig = timelinesOfflineAggregateSink,
-        maxKvSourceFailures = defaultMaxKvSourceFailures
-      ))
-  )
-
-  val twitterWideUserAuthorAggregates = AggregateGroup(
-    inputSource = timelinesDailyTwitterWideSource,
-    preTransforms = Seq(RichRemoveUserIdZero), /* Eliminates reducer skew */
-    aggregatePrefix = "twitter_wide_user_author_aggregate",
-    keys = Set(USER_ID, AUTHOR_ID),
-    features = RecapUserFeatureAggregation.TwitterWideFeatures,
-    labels = RecapUserFeatureAggregation.TwitterWideLabels,
-    metrics = Set(CountMetric),
-    halfLives = Set(50.days),
-    outputStore = mkPhysicalStore(
-      OfflineAggregateDataRecordStore(
-        name = TwitterWideUserAuthorAggregateStore,
-        startDate = "2016-12-28 00:00",
-        commonConfig = timelinesOfflineAggregateSink,
-        maxKvSourceFailures = defaultMaxKvSourceFailures
-      )),
-    includeAnyLabel = false
-  )
-
-  /**
-   * User-HourOfDay and User-DayOfWeek aggregations, both for recap and rectweet
-   */
-  val userRequestHourAggregates = AggregateGroup(
-    inputSource = timelinesDailyRecapMinimalSource,
-    aggregatePrefix = "user_request_context_aggregate.hour",
-    preTransforms = Seq(RichRemoveUserIdZero, negativeDownsampleTransform),
-    keys = Set(USER_ID, RequestContextFeatures.TIMESTAMP_GMT_HOUR),
-    features = Set.empty,
-    labels = RecapUserFeatureAggregation.LabelsV2,
-    metrics = Set(CountMetric),
-    halfLives = Set(50.days),
-    outputStore = mkPhysicalStore(
-      OfflineAggregateDataRecordStore(
-        name = UserRequestHourAggregateStore,
-        startDate = "2017-08-01 00:00",
-        commonConfig = timelinesOfflineAggregateSink,
-        maxKvSourceFailures = defaultMaxKvSourceFailures
-      ))
-  )
-
-  val userRequestDowAggregates = AggregateGroup(
-    inputSource = timelinesDailyRecapMinimalSource,
-    aggregatePrefix = "user_request_context_aggregate.dow",
-    preTransforms = Seq(RichRemoveUserIdZero, negativeDownsampleTransform),
-    keys = Set(USER_ID, RequestContextFeatures.TIMESTAMP_GMT_DOW),
-    features = Set.empty,
-    labels = RecapUserFeatureAggregation.LabelsV2,
-    metrics = Set(CountMetric),
-    halfLives = Set(50.days),
-    outputStore = mkPhysicalStore(
-      OfflineAggregateDataRecordStore(
-        name = UserRequestDowAggregateStore,
-        startDate = "2017-08-01 00:00",
-        commonConfig = timelinesOfflineAggregateSink,
-        maxKvSourceFailures = defaultMaxKvSourceFailures
-      ))
-  )
-
-  val authorTopicAggregates = AggregateGroup(
-    inputSource = timelinesDailyRecapMinimalSource,
-    aggregatePrefix = "author_topic_aggregate",
-    preTransforms = Seq(RichRemoveUserIdZero),
-    keys = Set(AUTHOR_ID, TimelinesSharedFeatures.TOPIC_ID),
-    features = Set.empty,
-    labels = RecapUserFeatureAggregation.LabelsV2,
-    metrics = Set(CountMetric),
-    halfLives = Set(50.days),
-    outputStore = mkPhysicalStore(
-      OfflineAggregateDataRecordStore(
-        name = AuthorTopicAggregateStore,
-        startDate = "2020-05-19 00:00",
-        commonConfig = timelinesOfflineAggregateSink,
-        maxKvSourceFailures = defaultMaxKvSourceFailures
-      ))
-  )
-
-  val userTopicAggregates = AggregateGroup(
-    inputSource = timelinesDailyRecapMinimalSource,
-    aggregatePrefix = "user_topic_aggregate",
-    preTransforms = Seq(RichRemoveUserIdZero),
-    keys = Set(USER_ID, TimelinesSharedFeatures.TOPIC_ID),
-    features = Set.empty,
-    labels = RecapUserFeatureAggregation.LabelsV2,
-    metrics = Set(CountMetric),
-    halfLives = Set(50.days),
-    outputStore = mkPhysicalStore(
-      OfflineAggregateDataRecordStore(
-        name = UserTopicAggregateStore,
-        startDate = "2020-05-23 00:00",
-        commonConfig = timelinesOfflineAggregateSink,
-        maxKvSourceFailures = defaultMaxKvSourceFailures
-      ))
-  )
-
-  val userTopicAggregatesV2 = AggregateGroup(
-    inputSource = timelinesDailyRecapMinimalSource,
-    aggregatePrefix = "user_topic_aggregate_v2",
-    preTransforms = Seq(RichRemoveUserIdZero),
-    keys = Set(USER_ID, TimelinesSharedFeatures.TOPIC_ID),
-    features = RecapUserFeatureAggregation.UserTopicFeaturesV2Count,
-    labels = RecapUserFeatureAggregation.LabelsV2,
-    includeAnyFeature = false,
-    includeAnyLabel = false,
-    metrics = Set(CountMetric),
-    halfLives = Set(50.days),
-    outputStore = mkPhysicalStore(
-      OfflineAggregateDataRecordStore(
-        name = UserTopicAggregateStore,
-        startDate = "2020-05-23 00:00",
-        commonConfig = timelinesOfflineAggregateSink,
-        maxKvSourceFailures = defaultMaxKvSourceFailures
-      ))
-  )
-
-  val userInferredTopicAggregates = AggregateGroup(
-    inputSource = timelinesDailyRecapMinimalSource,
-    aggregatePrefix = "user_inferred_topic_aggregate",
-    preTransforms = Seq(RichRemoveUserIdZero),
-    keys = Set(USER_ID, TimelinesSharedFeatures.INFERRED_TOPIC_IDS),
-    features = Set.empty,
-    labels = RecapUserFeatureAggregation.LabelsV2,
-    metrics = Set(CountMetric),
-    halfLives = Set(50.days),
-    outputStore = mkPhysicalStore(
-      OfflineAggregateDataRecordStore(
-        name = UserInferredTopicAggregateStore,
-        startDate = "2020-09-09 00:00",
-        commonConfig = timelinesOfflineAggregateSink,
-        maxKvSourceFailures = defaultMaxKvSourceFailures
-      ))
-  )
-
-  val userInferredTopicAggregatesV2 = AggregateGroup(
-    inputSource = timelinesDailyRecapMinimalSource,
-    aggregatePrefix = "user_inferred_topic_aggregate_v2",
-    preTransforms = Seq(RichRemoveUserIdZero),
-    keys = Set(USER_ID, TimelinesSharedFeatures.INFERRED_TOPIC_IDS),
-    features = RecapUserFeatureAggregation.UserTopicFeaturesV2Count,
-    labels = RecapUserFeatureAggregation.LabelsV2,
-    includeAnyFeature = false,
-    includeAnyLabel = false,
-    metrics = Set(CountMetric),
-    halfLives = Set(50.days),
-    outputStore = mkPhysicalStore(
-      OfflineAggregateDataRecordStore(
-        name = UserInferredTopicAggregateStore,
-        startDate = "2020-09-09 00:00",
-        commonConfig = timelinesOfflineAggregateSink,
-        maxKvSourceFailures = defaultMaxKvSourceFailures
-      ))
-  )
-
-  val userReciprocalEngagementAggregates = AggregateGroup(
-    inputSource = timelinesDailyRecapMinimalSource,
-    aggregatePrefix = "user_aggregate_v6",
-    preTransforms = Seq(RichRemoveUserIdZero),
-    keys = Set(USER_ID),
-    features = Set.empty,
-    labels = RecapUserFeatureAggregation.ReciprocalLabels,
-    metrics = Set(CountMetric),
-    halfLives = Set(50.days),
-    outputStore = mkPhysicalStore(
-      OfflineAggregateDataRecordStore(
-        name = UserAggregateStore,
-        startDate = "2016-07-15 00:00",
-        commonConfig = timelinesOfflineAggregateSink,
-        maxKvSourceFailures = defaultMaxKvSourceFailures
-      )),
-    includeAnyLabel = false
-  )
-
-  val userOriginalAuthorReciprocalEngagementAggregates = AggregateGroup(
-    inputSource = timelinesDailyRecapMinimalSource,
-    aggregatePrefix = "user_original_author_aggregate_v1",
-    preTransforms = Seq(RichRemoveUserIdZero, RichRemoveAuthorIdZero),
-    keys = Set(USER_ID, TimelinesSharedFeatures.ORIGINAL_AUTHOR_ID),
-    features = Set.empty,
-    labels = RecapUserFeatureAggregation.ReciprocalLabels,
-    metrics = Set(CountMetric),
-    halfLives = Set(50.days),
-    outputStore = mkPhysicalStore(
-      OfflineAggregateDataRecordStore(
-        name = UserOriginalAuthorAggregateStore,
-        startDate = "2018-12-26 00:00",
-        commonConfig = timelinesOfflineAggregateSink,
-        maxKvSourceFailures = defaultMaxKvSourceFailures
-      )),
-    includeAnyLabel = false
-  )
-
-  val originalAuthorReciprocalEngagementAggregates = AggregateGroup(
-    inputSource = timelinesDailyRecapMinimalSource,
-    aggregatePrefix = "original_author_aggregate_v1",
-    preTransforms = Seq(RichRemoveUserIdZero, RichRemoveAuthorIdZero),
-    keys = Set(TimelinesSharedFeatures.ORIGINAL_AUTHOR_ID),
-    features = Set.empty,
-    labels = RecapUserFeatureAggregation.ReciprocalLabels,
-    metrics = Set(CountMetric),
-    halfLives = Set(50.days),
-    outputStore = mkPhysicalStore(
-      OfflineAggregateDataRecordStore(
-        name = OriginalAuthorAggregateStore,
-        startDate = "2023-02-25 00:00",
-        commonConfig = timelinesOfflineAggregateSink,
-        maxKvSourceFailures = defaultMaxKvSourceFailures
-      )),
-    includeAnyLabel = false
-  )
-
-  val originalAuthorNegativeEngagementAggregates = AggregateGroup(
-    inputSource = timelinesDailyRecapMinimalSource,
-    aggregatePrefix = "original_author_aggregate_v2",
-    preTransforms = Seq(RichRemoveUserIdZero, RichRemoveAuthorIdZero),
-    keys = Set(TimelinesSharedFeatures.ORIGINAL_AUTHOR_ID),
-    features = Set.empty,
-    labels = RecapUserFeatureAggregation.NegativeEngagementLabels,
-    metrics = Set(CountMetric),
-    halfLives = Set(50.days),
-    outputStore = mkPhysicalStore(
-      OfflineAggregateDataRecordStore(
-        name = OriginalAuthorAggregateStore,
-        startDate = "2023-02-25 00:00",
-        commonConfig = timelinesOfflineAggregateSink,
-        maxKvSourceFailures = defaultMaxKvSourceFailures
-      )),
-    includeAnyLabel = false
-  )
-
-  val userListAggregates: AggregateGroup =
-    AggregateGroup(
-      inputSource = timelinesDailyRecapMinimalSource,
-      aggregatePrefix = "user_list_aggregate",
-      keys = Set(USER_ID, ListFeatures.LIST_ID),
-      features = Set.empty,
-      labels = RecapUserFeatureAggregation.LabelsV2,
-      metrics = Set(CountMetric),
-      halfLives = Set(50.days),
-      outputStore = mkPhysicalStore(
-        OfflineAggregateDataRecordStore(
-          name = UserListAggregateStore,
-          startDate = "2020-05-28 00:00",
-          commonConfig = timelinesOfflineAggregateSink,
-          maxKvSourceFailures = defaultMaxKvSourceFailures
-        )),
-      preTransforms = Seq(RichRemoveUserIdZero)
-    )
-
-  val userMediaUnderstandingAnnotationAggregates: AggregateGroup = AggregateGroup(
-    inputSource = timelinesDailyRecapMinimalSource,
-    aggregatePrefix = "user_media_annotation_aggregate",
-    preTransforms = Seq(RichRemoveUserIdZero),
-    keys =
-      Set(USER_ID, SemanticCoreFeatures.mediaUnderstandingHighRecallNonSensitiveEntityIdsFeature),
-    features = Set.empty,
-    labels = RecapUserFeatureAggregation.LabelsV2,
-    metrics = Set(CountMetric),
-    halfLives = Set(50.days),
-    outputStore = mkPhysicalStore(
-      OfflineAggregateDataRecordStore(
-        name = UserMediaUnderstandingAnnotationAggregateStore,
-        startDate = "2021-03-20 00:00",
-        commonConfig = timelinesOfflineAggregateSink
-      ))
-  )
-
-  val userAuthorGoodClickAggregates = AggregateGroup(
-    inputSource = timelinesDailyRecapMinimalSource,
-    aggregatePrefix = "user_author_good_click_aggregate",
-    preTransforms = Seq(FilterInNetworkTransform, RichRemoveUserIdZero),
-    keys = Set(USER_ID, AUTHOR_ID),
-    features = RecapUserFeatureAggregation.UserAuthorFeaturesV2,
-    labels = RecapUserFeatureAggregation.GoodClickLabels,
-    metrics = Set(SumMetric),
-    halfLives = Set(14.days),
-    outputStore = mkPhysicalStore(
-      OfflineAggregateDataRecordStore(
-        name = UserAuthorAggregateStore,
-        startDate = "2016-07-15 00:00",
-        commonConfig = timelinesOfflineAggregateSink,
-        maxKvSourceFailures = defaultMaxKvSourceFailures
-      ))
-  )
-
-  val userEngagerGoodClickAggregates = AggregateGroup(
-    inputSource = timelinesDailyRecapMinimalSource,
-    aggregatePrefix = "user_engager_good_click_aggregate",
-    keys = Set(USER_ID, EngagementDataRecordFeatures.PublicEngagementUserIds),
-    features = Set.empty,
-    labels = RecapUserFeatureAggregation.GoodClickLabels,
-    metrics = Set(CountMetric),
-    halfLives = Set(14.days),
-    outputStore = mkPhysicalStore(
-      OfflineAggregateDataRecordStore(
-        name = UserEngagerAggregateStore,
-        startDate = "2016-09-02 00:00",
-        commonConfig = timelinesOfflineAggregateSink,
-        maxKvSourceFailures = defaultMaxKvSourceFailures
-      )),
-    preTransforms = Seq(
-      RichRemoveUserIdZero,
-      RichUnifyPublicEngagersTransform
-    )
-  )
-
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfigTrait.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfigTrait.docx
new file mode 100644
index 000000000..1389d8c20
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfigTrait.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfigTrait.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfigTrait.scala
deleted file mode 100644
index 6fb2e07b7..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfigTrait.scala
+++ /dev/null
@@ -1,50 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates
-
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationConfig
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregateGroup
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup
-
-trait TimelinesAggregationConfigTrait
-    extends TimelinesAggregationConfigDetails
-    with AggregationConfig {
-  private val aggregateGroups = Set(
-    authorTopicAggregates,
-    userTopicAggregates,
-    userTopicAggregatesV2,
-    userInferredTopicAggregates,
-    userInferredTopicAggregatesV2,
-    userAggregatesV2,
-    userAggregatesV5Continuous,
-    userReciprocalEngagementAggregates,
-    userAuthorAggregatesV5,
-    userOriginalAuthorReciprocalEngagementAggregates,
-    originalAuthorReciprocalEngagementAggregates,
-    tweetSourceUserAuthorAggregatesV1,
-    userEngagerAggregates,
-    userMentionAggregates,
-    twitterWideUserAggregates,
-    twitterWideUserAuthorAggregates,
-    userRequestHourAggregates,
-    userRequestDowAggregates,
-    userListAggregates,
-    userMediaUnderstandingAnnotationAggregates,
-  ) ++ userAuthorAggregatesV2
-
-  val aggregatesToComputeList: Set[List[TypedAggregateGroup[_]]] =
-    aggregateGroups.map(_.buildTypedAggregateGroups())
-
-  override val aggregatesToCompute: Set[TypedAggregateGroup[_]] = aggregatesToComputeList.flatten
-
-  /*
-   * Feature selection config to save storage space and manhattan query bandwidth.
-   * Only the most important features found using offline RCE simulations are used
-   * when actually training and serving. This selector is used by
-   * [[com.twitter.timelines.data_processing.jobs.timeline_ranking_user_features.TimelineRankingAggregatesV2FeaturesProdJob]]
-   * but defined here to keep it in sync with the config that computes the aggregates.
-   */
-  val AggregatesV2FeatureSelector = FeatureSelectorConfig.AggregatesV2ProdFeatureSelector
-
-  def filterAggregatesGroups(storeNames: Set[String]): Set[AggregateGroup] = {
-    aggregateGroups.filter(aggregateGroup => storeNames.contains(aggregateGroup.outputStore.name))
-  }
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationKeyValInjections.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationKeyValInjections.docx
new file mode 100644
index 000000000..5960168d4
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationKeyValInjections.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationKeyValInjections.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationKeyValInjections.scala
deleted file mode 100644
index 1f2433b53..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationKeyValInjections.scala
+++ /dev/null
@@ -1,48 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates
-
-import com.twitter.ml.api.DataRecord
-import com.twitter.scalding_internal.multiformat.format.keyval.KeyValInjection
-import com.twitter.summingbird.batch.BatchID
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.{
-  AggregateStore,
-  AggregationKey,
-  OfflineAggregateInjections,
-  TypedAggregateGroup
-}
-
-object TimelinesAggregationKeyValInjections extends TimelinesAggregationConfigTrait {
-
-  import OfflineAggregateInjections.getInjection
-
-  type KVInjection = KeyValInjection[AggregationKey, (BatchID, DataRecord)]
-
-  val AuthorTopic: KVInjection = getInjection(filter(AuthorTopicAggregateStore))
-  val UserTopic: KVInjection = getInjection(filter(UserTopicAggregateStore))
-  val UserInferredTopic: KVInjection = getInjection(filter(UserInferredTopicAggregateStore))
-  val User: KVInjection = getInjection(filter(UserAggregateStore))
-  val UserAuthor: KVInjection = getInjection(filter(UserAuthorAggregateStore))
-  val UserOriginalAuthor: KVInjection = getInjection(filter(UserOriginalAuthorAggregateStore))
-  val OriginalAuthor: KVInjection = getInjection(filter(OriginalAuthorAggregateStore))
-  val UserEngager: KVInjection = getInjection(filter(UserEngagerAggregateStore))
-  val UserMention: KVInjection = getInjection(filter(UserMentionAggregateStore))
-  val TwitterWideUser: KVInjection = getInjection(filter(TwitterWideUserAggregateStore))
-  val TwitterWideUserAuthor: KVInjection = getInjection(filter(TwitterWideUserAuthorAggregateStore))
-  val UserRequestHour: KVInjection = getInjection(filter(UserRequestHourAggregateStore))
-  val UserRequestDow: KVInjection = getInjection(filter(UserRequestDowAggregateStore))
-  val UserList: KVInjection = getInjection(filter(UserListAggregateStore))
-  val UserMediaUnderstandingAnnotation: KVInjection = getInjection(
-    filter(UserMediaUnderstandingAnnotationAggregateStore))
-
-  private def filter(storeName: String): Set[TypedAggregateGroup[_]] = {
-    val groups = aggregatesToCompute.filter(_.outputStore.name == storeName)
-    require(groups.nonEmpty)
-    groups
-  }
-
-  override def outputHdfsPath: String = "/user/timelines/processed/aggregates_v2"
-
-  // Since this object is not used to execute any online or offline aggregates job, but is meant
-  // to store all PDT enabled KeyValInjections, we do not need to construct a physical store.
-  // We use the identity operation as a default.
-  override def mkPhysicalStore(store: AggregateStore): AggregateStore = store
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationSources.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationSources.docx
new file mode 100644
index 000000000..71fd8ba05
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationSources.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationSources.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationSources.scala
deleted file mode 100644
index c799f22fa..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationSources.scala
+++ /dev/null
@@ -1,45 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates
-
-import com.twitter.ml.api.constant.SharedFeatures.TIMESTAMP
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.OfflineAggregateSource
-import com.twitter.timelines.prediction.features.p_home_latest.HomeLatestUserAggregatesFeatures
-import timelines.data_processing.ad_hoc.recap.data_record_preparation.RecapDataRecordsAggMinimalJavaDataset
-
-/**
- * Any update here should be in sync with [[TimelinesFeatureGroups]] and [[AggMinimalDataRecordGeneratorJob]].
- */
-object TimelinesAggregationSources {
-
-  /**
-   * This is the recap data records after post-processing in [[GenerateRecapAggMinimalDataRecordsJob]]
-   */
-  val timelinesDailyRecapMinimalSource = OfflineAggregateSource(
-    name = "timelines_daily_recap",
-    timestampFeature = TIMESTAMP,
-    dalDataSet = Some(RecapDataRecordsAggMinimalJavaDataset),
-    scaldingSuffixType = Some("dal"),
-    withValidation = true
-  )
-  val timelinesDailyTwitterWideSource = OfflineAggregateSource(
-    name = "timelines_daily_twitter_wide",
-    timestampFeature = TIMESTAMP,
-    scaldingHdfsPath = Some("/user/timelines/processed/suggests/recap/twitter_wide_data_records"),
-    scaldingSuffixType = Some("daily"),
-    withValidation = true
-  )
-
-  val timelinesDailyListTimelineSource = OfflineAggregateSource(
-    name = "timelines_daily_list_timeline",
-    timestampFeature = TIMESTAMP,
-    scaldingHdfsPath = Some("/user/timelines/processed/suggests/recap/all_features/list"),
-    scaldingSuffixType = Some("hourly"),
-    withValidation = true
-  )
-
-  val timelinesDailyHomeLatestSource = OfflineAggregateSource(
-    name = "timelines_daily_home_latest",
-    timestampFeature = HomeLatestUserAggregatesFeatures.AGGREGATE_TIMESTAMP_MS,
-    scaldingHdfsPath = Some("/user/timelines/processed/p_home_latest/user_aggregates"),
-    scaldingSuffixType = Some("daily")
-  )
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/AuthorFeaturesAdapter.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/AuthorFeaturesAdapter.docx
new file mode 100644
index 000000000..f50b427be
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/AuthorFeaturesAdapter.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/AuthorFeaturesAdapter.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/AuthorFeaturesAdapter.scala
deleted file mode 100644
index 7cefc67b9..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/AuthorFeaturesAdapter.scala
+++ /dev/null
@@ -1,70 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates.real_time
-
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType.UserState
-import com.twitter.ml.api.Feature.Binary
-import com.twitter.ml.api.{DataRecord, Feature, FeatureContext, RichDataRecord}
-import com.twitter.ml.featurestore.catalog.entities.core.Author
-import com.twitter.ml.featurestore.catalog.features.magicrecs.UserActivity
-import com.twitter.ml.featurestore.lib.data.PredictionRecord
-import com.twitter.ml.featurestore.lib.feature.{BoundFeature, BoundFeatureSet}
-import com.twitter.ml.featurestore.lib.{UserId, Discrete => FSDiscrete}
-import com.twitter.timelines.prediction.common.adapters.TimelinesAdapterBase
-import java.lang.{Boolean => JBoolean}
-import java.util
-import scala.collection.JavaConverters._
-
-object AuthorFeaturesAdapter extends TimelinesAdapterBase[PredictionRecord] {
-  val UserStateBoundFeature: BoundFeature[UserId, FSDiscrete] = UserActivity.UserState.bind(Author)
-  val UserFeaturesSet: BoundFeatureSet = BoundFeatureSet(UserStateBoundFeature)
-
-  /**
-   * Boolean features about viewer's user state. 
-   * enum UserState {
-   *   NEW = 0,
-   *   NEAR_ZERO = 1,
-   *   VERY_LIGHT = 2,
-   *   LIGHT = 3,
-   *   MEDIUM_TWEETER = 4,
-   *   MEDIUM_NON_TWEETER = 5,
-   *   HEAVY_NON_TWEETER = 6,
-   *   HEAVY_TWEETER = 7
-   * }(persisted='true')
-   */
-  val IS_USER_NEW = new Binary("timelines.author.user_state.is_user_new", Set(UserState).asJava)
-  val IS_USER_LIGHT = new Binary("timelines.author.user_state.is_user_light", Set(UserState).asJava)
-  val IS_USER_MEDIUM_TWEETER =
-    new Binary("timelines.author.user_state.is_user_medium_tweeter", Set(UserState).asJava)
-  val IS_USER_MEDIUM_NON_TWEETER =
-    new Binary("timelines.author.user_state.is_user_medium_non_tweeter", Set(UserState).asJava)
-  val IS_USER_HEAVY_NON_TWEETER =
-    new Binary("timelines.author.user_state.is_user_heavy_non_tweeter", Set(UserState).asJava)
-  val IS_USER_HEAVY_TWEETER =
-    new Binary("timelines.author.user_state.is_user_heavy_tweeter", Set(UserState).asJava)
-  val userStateToFeatureMap: Map[Long, Binary] = Map(
-    0L -> IS_USER_NEW,
-    1L -> IS_USER_LIGHT,
-    2L -> IS_USER_LIGHT,
-    3L -> IS_USER_LIGHT,
-    4L -> IS_USER_MEDIUM_TWEETER,
-    5L -> IS_USER_MEDIUM_NON_TWEETER,
-    6L -> IS_USER_HEAVY_NON_TWEETER,
-    7L -> IS_USER_HEAVY_TWEETER
-  )
-
-  val UserStateBooleanFeatures: Set[Feature[_]] = userStateToFeatureMap.values.toSet
-
-  private val allFeatures: Seq[Feature[_]] = UserStateBooleanFeatures.toSeq
-  override def getFeatureContext: FeatureContext = new FeatureContext(allFeatures: _*)
-  override def commonFeatures: Set[Feature[_]] = Set.empty
-
-  override def adaptToDataRecords(record: PredictionRecord): util.List[DataRecord] = {
-    val newRecord = new RichDataRecord(new DataRecord)
-    record
-      .getFeatureValue(UserStateBoundFeature)
-      .flatMap { userState => userStateToFeatureMap.get(userState.value) }.foreach {
-        booleanFeature => newRecord.setFeatureValue[JBoolean](booleanFeature, true)
-      }
-
-    List(newRecord.getRecord).asJava
-  }
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/BUILD b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/BUILD
deleted file mode 100644
index 93f39405d..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/BUILD
+++ /dev/null
@@ -1,199 +0,0 @@
-heron_binary(
-    name = "heron-without-jass",
-    main = "com.twitter.timelines.prediction.common.aggregates.real_time.TypeSafeRunner",
-    oss = True,
-    platform = "java8",
-    runtime_platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        ":real_time",
-        "3rdparty/jvm/org/slf4j:slf4j-jdk14",
-    ],
-)
-
-jvm_app(
-    name = "rta_heron",
-    binary = ":heron-without-jass",
-    bundles = [
-        bundle(
-            fileset = ["resources/jaas.conf"],
-        ),
-    ],
-    tags = [
-        "bazel-compatible",
-        "bazel-only",
-    ],
-)
-
-scala_library(
-    sources = ["*.scala"],
-    platform = "java8",
-    strict_deps = False,
-    tags = ["bazel-compatible"],
-    dependencies = [
-        ":online-configs",
-        "3rdparty/src/jvm/com/twitter/summingbird:storm",
-        "src/java/com/twitter/heron/util",
-        "src/java/com/twitter/ml/api:api-base",
-        "src/java/com/twitter/ml/api/constant",
-        "src/scala/com/twitter/frigate/data_pipeline/features_aggregated/core:core-features",
-        "src/scala/com/twitter/ml/api/util",
-        "src/scala/com/twitter/storehaus_internal/memcache",
-        "src/scala/com/twitter/storehaus_internal/util",
-        "src/scala/com/twitter/summingbird_internal/bijection:bijection-implicits",
-        "src/scala/com/twitter/summingbird_internal/runner/store_config",
-        "src/scala/com/twitter/summingbird_internal/runner/storm",
-        "src/scala/com/twitter/summingbird_internal/sources/storm/remote:ClientEventSourceScrooge2",
-        "src/scala/com/twitter/timelines/prediction/adapters/client_log_event",
-        "src/scala/com/twitter/timelines/prediction/adapters/client_log_event_mr",
-        "src/scala/com/twitter/timelines/prediction/features/client_log_event",
-        "src/scala/com/twitter/timelines/prediction/features/common",
-        "src/scala/com/twitter/timelines/prediction/features/list_features",
-        "src/scala/com/twitter/timelines/prediction/features/recap",
-        "src/scala/com/twitter/timelines/prediction/features/user_health",
-        "src/thrift/com/twitter/ml/api:data-java",
-        "src/thrift/com/twitter/timelines/suggests/common:record-scala",
-        "timelinemixer/common/src/main/scala/com/twitter/timelinemixer/clients/served_features_cache",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-        "timelines/data_processing/ml_util/aggregation_framework/heron",
-        "timelines/data_processing/ml_util/aggregation_framework/job",
-        "timelines/data_processing/ml_util/aggregation_framework/metrics",
-        "timelines/data_processing/ml_util/transforms",
-        "timelines/src/main/scala/com/twitter/timelines/clients/memcache_common",
-        "util/util-core:scala",
-    ],
-)
-
-scala_library(
-    name = "online-configs",
-    sources = [
-        "AuthorFeaturesAdapter.scala",
-        "Event.scala",
-        "FeatureStoreUtils.scala",
-        "StormAggregateSourceUtils.scala",
-        "TimelinesOnlineAggregationConfig.scala",
-        "TimelinesOnlineAggregationConfigBase.scala",
-        "TimelinesOnlineAggregationSources.scala",
-        "TimelinesStormAggregateSource.scala",
-        "TweetFeaturesReadableStore.scala",
-        "UserFeaturesAdapter.scala",
-        "UserFeaturesReadableStore.scala",
-    ],
-    platform = "java8",
-    strict_deps = True,
-    tags = ["bazel-compatible"],
-    dependencies = [
-        ":base-config",
-        "3rdparty/src/jvm/com/twitter/scalding:db",
-        "3rdparty/src/jvm/com/twitter/storehaus:core",
-        "3rdparty/src/jvm/com/twitter/summingbird:core",
-        "3rdparty/src/jvm/com/twitter/summingbird:online",
-        "3rdparty/src/jvm/com/twitter/summingbird:storm",
-        "abuse/detection/src/main/thrift/com/twitter/abuse/detection/mention_interactions:thrift-scala",
-        "snowflake/src/main/scala/com/twitter/snowflake/id",
-        "snowflake/src/main/thrift:thrift-scala",
-        "src/java/com/twitter/ml/api:api-base",
-        "src/java/com/twitter/ml/api/constant",
-        "src/scala/com/twitter/frigate/data_pipeline/features_aggregated/core:core-features",
-        "src/scala/com/twitter/ml/api/util:datarecord",
-        "src/scala/com/twitter/ml/featurestore/catalog/datasets/geo:geo-user-location",
-        "src/scala/com/twitter/ml/featurestore/catalog/datasets/magicrecs:user-features",
-        "src/scala/com/twitter/ml/featurestore/catalog/entities/core",
-        "src/scala/com/twitter/ml/featurestore/catalog/features/core:user",
-        "src/scala/com/twitter/ml/featurestore/catalog/features/geo",
-        "src/scala/com/twitter/ml/featurestore/catalog/features/magicrecs:user-activity",
-        "src/scala/com/twitter/ml/featurestore/catalog/features/magicrecs:user-info",
-        "src/scala/com/twitter/ml/featurestore/catalog/features/trends:tweet_trends_scores",
-        "src/scala/com/twitter/ml/featurestore/lib/data",
-        "src/scala/com/twitter/ml/featurestore/lib/dataset/offline",
-        "src/scala/com/twitter/ml/featurestore/lib/export/strato:app-names",
-        "src/scala/com/twitter/ml/featurestore/lib/feature",
-        "src/scala/com/twitter/ml/featurestore/lib/online",
-        "src/scala/com/twitter/ml/featurestore/lib/params",
-        "src/scala/com/twitter/storehaus_internal/util",
-        "src/scala/com/twitter/summingbird_internal/bijection:bijection-implicits",
-        "src/scala/com/twitter/summingbird_internal/runner/store_config",
-        "src/scala/com/twitter/summingbird_internal/runner/storm",
-        "src/scala/com/twitter/summingbird_internal/sources/common",
-        "src/scala/com/twitter/summingbird_internal/sources/common/remote:ClientEventSourceScrooge",
-        "src/scala/com/twitter/summingbird_internal/sources/storm/remote:ClientEventSourceScrooge2",
-        "src/scala/com/twitter/timelines/prediction/adapters/client_log_event",
-        "src/scala/com/twitter/timelines/prediction/adapters/client_log_event_mr",
-        "src/scala/com/twitter/timelines/prediction/common/adapters:base",
-        "src/scala/com/twitter/timelines/prediction/common/adapters:engagement-converter",
-        "src/scala/com/twitter/timelines/prediction/common/aggregates",
-        "src/scala/com/twitter/timelines/prediction/features/client_log_event",
-        "src/scala/com/twitter/timelines/prediction/features/common",
-        "src/scala/com/twitter/timelines/prediction/features/list_features",
-        "src/scala/com/twitter/timelines/prediction/features/recap",
-        "src/scala/com/twitter/timelines/prediction/features/user_health",
-        "src/thrift/com/twitter/clientapp/gen:clientapp-scala",
-        "src/thrift/com/twitter/dal/personal_data:personal_data-java",
-        "src/thrift/com/twitter/ml/api:data-java",
-        "src/thrift/com/twitter/timelines/suggests/common:engagement-java",
-        "src/thrift/com/twitter/timelines/suggests/common:engagement-scala",
-        "src/thrift/com/twitter/timelines/suggests/common:record-scala",
-        "src/thrift/com/twitter/timelineservice/injection:thrift-scala",
-        "src/thrift/com/twitter/timelineservice/server/suggests/logging:thrift-scala",
-        "strato/src/main/scala/com/twitter/strato/client",
-        "timelinemixer/common/src/main/scala/com/twitter/timelinemixer/clients/served_features_cache",
-        "timelines/data_processing/ad_hoc/suggests/common:raw_training_data_creator",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-        "timelines/data_processing/ml_util/aggregation_framework/heron:configs",
-        "timelines/data_processing/ml_util/aggregation_framework/metrics",
-        "timelines/data_processing/ml_util/transforms",
-        "timelines/data_processing/util:rich-request",
-        "tweetsource/common/src/main/thrift:thrift-scala",
-        "twitter-server-internal/src/main/scala",
-        "unified_user_actions/client/src/main/scala/com/twitter/unified_user_actions/client/config",
-        "unified_user_actions/client/src/main/scala/com/twitter/unified_user_actions/client/summingbird",
-        "unified_user_actions/thrift/src/main/thrift/com/twitter/unified_user_actions:unified_user_actions-scala",
-        "util/util-core:scala",
-        "util/util-stats/src/main/scala/com/twitter/finagle/stats",
-    ],
-)
-
-scala_library(
-    name = "base-config",
-    sources = [
-        "AuthorFeaturesAdapter.scala",
-        "TimelinesOnlineAggregationConfigBase.scala",
-        "TweetFeaturesAdapter.scala",
-        "UserFeaturesAdapter.scala",
-    ],
-    platform = "java8",
-    strict_deps = True,
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "src/java/com/twitter/ml/api:api-base",
-        "src/java/com/twitter/ml/api/constant",
-        "src/resources/com/twitter/timelines/prediction/common/aggregates/real_time",
-        "src/scala/com/twitter/ml/api/util:datarecord",
-        "src/scala/com/twitter/ml/featurestore/catalog/datasets/magicrecs:user-features",
-        "src/scala/com/twitter/ml/featurestore/catalog/entities/core",
-        "src/scala/com/twitter/ml/featurestore/catalog/features/core:user",
-        "src/scala/com/twitter/ml/featurestore/catalog/features/geo",
-        "src/scala/com/twitter/ml/featurestore/catalog/features/magicrecs:user-activity",
-        "src/scala/com/twitter/ml/featurestore/catalog/features/magicrecs:user-info",
-        "src/scala/com/twitter/ml/featurestore/catalog/features/trends:tweet_trends_scores",
-        "src/scala/com/twitter/ml/featurestore/lib/data",
-        "src/scala/com/twitter/ml/featurestore/lib/feature",
-        "src/scala/com/twitter/timelines/prediction/common/adapters:base",
-        "src/scala/com/twitter/timelines/prediction/common/adapters:engagement-converter",
-        "src/scala/com/twitter/timelines/prediction/common/aggregates",
-        "src/scala/com/twitter/timelines/prediction/features/client_log_event",
-        "src/scala/com/twitter/timelines/prediction/features/common",
-        "src/scala/com/twitter/timelines/prediction/features/list_features",
-        "src/scala/com/twitter/timelines/prediction/features/recap",
-        "src/scala/com/twitter/timelines/prediction/features/user_health",
-        "src/thrift/com/twitter/dal/personal_data:personal_data-java",
-        "src/thrift/com/twitter/ml/api:feature_context-java",
-        "src/thrift/com/twitter/timelines/suggests/common:engagement-scala",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-        "timelines/data_processing/ml_util/aggregation_framework/heron:base-config",
-        "timelines/data_processing/ml_util/aggregation_framework/metrics",
-        "timelines/data_processing/ml_util/transforms",
-        "util/util-core:scala",
-        "util/util-core:util-core-util",
-    ],
-)
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/BUILD.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/BUILD.docx
new file mode 100644
index 000000000..3ebae7619
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/BUILD.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/Event.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/Event.docx
new file mode 100644
index 000000000..ed42b61d0
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/Event.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/Event.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/Event.scala
deleted file mode 100644
index 1bd697d0d..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/Event.scala
+++ /dev/null
@@ -1,11 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates.real_time
-
-private[real_time] sealed trait Event[T] { def event: T }
-
-private[real_time] case class HomeEvent[T](override val event: T) extends Event[T]
-
-private[real_time] case class ProfileEvent[T](override val event: T) extends Event[T]
-
-private[real_time] case class SearchEvent[T](override val event: T) extends Event[T]
-
-private[real_time] case class UuaEvent[T](override val event: T) extends Event[T]
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/FeatureStoreUtils.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/FeatureStoreUtils.docx
new file mode 100644
index 000000000..13250c1c9
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/FeatureStoreUtils.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/FeatureStoreUtils.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/FeatureStoreUtils.scala
deleted file mode 100644
index 156d9d35f..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/FeatureStoreUtils.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates.real_time
-
-import com.twitter.finagle.mtls.authentication.ServiceIdentifier
-import com.twitter.finagle.stats.StatsReceiver
-import com.twitter.ml.featurestore.catalog.datasets.magicrecs.UserFeaturesDataset
-import com.twitter.ml.featurestore.catalog.datasets.geo.GeoUserLocationDataset
-import com.twitter.ml.featurestore.lib.dataset.DatasetParams
-import com.twitter.ml.featurestore.lib.export.strato.FeatureStoreAppNames
-import com.twitter.ml.featurestore.lib.online.FeatureStoreClient
-import com.twitter.ml.featurestore.lib.params.FeatureStoreParams
-import com.twitter.strato.client.{Client, Strato}
-import com.twitter.strato.opcontext.Attribution.ManhattanAppId
-import com.twitter.util.Duration
-
-private[real_time] object FeatureStoreUtils {
-  private def mkStratoClient(serviceIdentifier: ServiceIdentifier): Client =
-    Strato.client
-      .withMutualTls(serviceIdentifier)
-      .withRequestTimeout(Duration.fromMilliseconds(50))
-      .build()
-
-  private val featureStoreParams: FeatureStoreParams =
-    FeatureStoreParams(
-      perDataset = Map(
-        UserFeaturesDataset.id ->
-          DatasetParams(
-            stratoSuffix = Some(FeatureStoreAppNames.Timelines),
-            attributions = Seq(ManhattanAppId("athena", "timelines_aggregates_v2_features_by_user"))
-          ),
-        GeoUserLocationDataset.id ->
-          DatasetParams(
-            attributions = Seq(ManhattanAppId("starbuck", "timelines_geo_features_by_user"))
-          )
-      )
-    )
-
-  def mkFeatureStoreClient(
-    serviceIdentifier: ServiceIdentifier,
-    statsReceiver: StatsReceiver
-  ): FeatureStoreClient = {
-    com.twitter.server.Init() // necessary in order to use WilyNS path
-
-    val stratoClient: Client = mkStratoClient(serviceIdentifier)
-    val featureStoreClient: FeatureStoreClient = FeatureStoreClient(
-      featureSet =
-        UserFeaturesAdapter.UserFeaturesSet ++ AuthorFeaturesAdapter.UserFeaturesSet ++ TweetFeaturesAdapter.TweetFeaturesSet,
-      client = stratoClient,
-      statsReceiver = statsReceiver,
-      featureStoreParams = featureStoreParams
-    )
-    featureStoreClient
-  }
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/LocallyReplicatedStore.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/LocallyReplicatedStore.docx
new file mode 100644
index 000000000..09768903c
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/LocallyReplicatedStore.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/LocallyReplicatedStore.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/LocallyReplicatedStore.scala
deleted file mode 100644
index 42f86fa4f..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/LocallyReplicatedStore.scala
+++ /dev/null
@@ -1,79 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates.real_time
-
-import com.twitter.conversions.DurationOps._
-import com.twitter.finagle.stats.StatsReceiver
-import com.twitter.storehaus.ReplicatedReadableStore
-import com.twitter.storehaus.Store
-import com.twitter.timelines.clients.memcache_common._
-import com.twitter.timelines.util.FailOpenHandler
-import com.twitter.util.Future
-
-object ServedFeaturesMemcacheConfigBuilder {
-  def getTwCacheDestination(cluster: String, isProd: Boolean = false): String =
-    if (!isProd) {
-      s"/srv#/test/$cluster/cache//twemcache_timelines_served_features_cache"
-    } else {
-      s"/srv#/prod/$cluster/cache/timelines_served_features"
-    }
-
-  /**
-   * @cluster The DC of the cache that this client will send requests to. This
-   *   can be different to the DC where the summingbird job is running in.
-   * @isProd  Define if this client is part of a production summingbird job as
-   *   different accesspoints will need to be chosen.
-   */
-  def build(cluster: String, isProd: Boolean = false): StorehausMemcacheConfig =
-    StorehausMemcacheConfig(
-      destName = getTwCacheDestination(cluster, isProd),
-      keyPrefix = "",
-      requestTimeout = 200.milliseconds,
-      numTries = 2,
-      globalTimeout = 400.milliseconds,
-      tcpConnectTimeout = 200.milliseconds,
-      connectionAcquisitionTimeout = 200.milliseconds,
-      numPendingRequests = 1000,
-      isReadOnly = false
-    )
-}
-
-/**
- * If lookup key does not exist locally, make a call to the replicated store(s).
- * If value exists remotely, write the first returned value to the local store
- * and return it. Map any exceptions to None so that the subsequent operations
- * may proceed.
- */
-class LocallyReplicatedStore[-K, V](
-  localStore: Store[K, V],
-  remoteStore: ReplicatedReadableStore[K, V],
-  scopedStatsReceiver: StatsReceiver)
-    extends Store[K, V] {
-  private[this] val failOpenHandler = new FailOpenHandler(scopedStatsReceiver.scope("failOpen"))
-  private[this] val localFailsCounter = scopedStatsReceiver.counter("localFails")
-  private[this] val localWritesCounter = scopedStatsReceiver.counter("localWrites")
-  private[this] val remoteFailsCounter = scopedStatsReceiver.counter("remoteFails")
-
-  override def get(k: K): Future[Option[V]] =
-    failOpenHandler {
-      localStore
-        .get(k)
-        .flatMap {
-          case Some(v) => Future.value(Some(v))
-          case _ => {
-            localFailsCounter.incr()
-            val replicatedOptFu = remoteStore.get(k)
-            // async write if result is not empty
-            replicatedOptFu.onSuccess {
-              case Some(v) => {
-                localWritesCounter.incr()
-                localStore.put((k, Some(v)))
-              }
-              case _ => {
-                remoteFailsCounter.incr()
-                Unit
-              }
-            }
-            replicatedOptFu
-          }
-        }
-    } { _: Throwable => Future.None }
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/StormAggregateSourceUtils.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/StormAggregateSourceUtils.docx
new file mode 100644
index 000000000..cf9cd7b7f
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/StormAggregateSourceUtils.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/StormAggregateSourceUtils.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/StormAggregateSourceUtils.scala
deleted file mode 100644
index e72d3392b..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/StormAggregateSourceUtils.scala
+++ /dev/null
@@ -1,254 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates.real_time
-
-import com.twitter.finagle.stats.Counter
-import com.twitter.finagle.stats.StatsReceiver
-import com.twitter.ml.api.constant.SharedFeatures
-import com.twitter.ml.api.DataRecord
-import com.twitter.ml.api.DataRecordMerger
-import com.twitter.ml.api.Feature
-import com.twitter.ml.api.RichDataRecord
-import com.twitter.ml.featurestore.catalog.entities.core.Author
-import com.twitter.ml.featurestore.catalog.entities.core.Tweet
-import com.twitter.ml.featurestore.catalog.entities.core.User
-import com.twitter.ml.featurestore.lib.online.FeatureStoreClient
-import com.twitter.summingbird.Producer
-import com.twitter.summingbird.storm.Storm
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron.RealTimeAggregatesJobConfig
-import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures
-import java.lang.{Long => JLong}
-
-import com.twitter.unified_user_actions.thriftscala.ActionType
-import com.twitter.unified_user_actions.thriftscala.UnifiedUserAction
-
-private[real_time] object StormAggregateSourceUtils {
-  type UserId = Long
-  type AuthorId = Long
-  type TweetId = Long
-
-  /**
-   * Attaches a [[FeatureStoreClient]] to the underyling [[Producer]]. The FeatureStoreClient
-   * hydrates additional user features.
-   *
-   * @param underlyingProducer converts a stream of [[com.twitter.clientapp.thriftscala.LogEvent]]
-   *                           to a stream of [[DataRecord]].
-   */
-  def wrapByFeatureStoreClient(
-    underlyingProducer: Producer[Storm, Event[DataRecord]],
-    jobConfig: RealTimeAggregatesJobConfig,
-    scopedStatsReceiver: StatsReceiver
-  ): Producer[Storm, Event[DataRecord]] = {
-    lazy val keyDataRecordCounter = scopedStatsReceiver.counter("keyDataRecord")
-    lazy val keyFeatureCounter = scopedStatsReceiver.counter("keyFeature")
-    lazy val leftDataRecordCounter = scopedStatsReceiver.counter("leftDataRecord")
-    lazy val rightDataRecordCounter = scopedStatsReceiver.counter("rightDataRecord")
-    lazy val mergeNumFeaturesCounter = scopedStatsReceiver.counter("mergeNumFeatures")
-    lazy val authorKeyDataRecordCounter = scopedStatsReceiver.counter("authorKeyDataRecord")
-    lazy val authorKeyFeatureCounter = scopedStatsReceiver.counter("authorKeyFeature")
-    lazy val authorLeftDataRecordCounter = scopedStatsReceiver.counter("authorLeftDataRecord")
-    lazy val authorRightDataRecordCounter = scopedStatsReceiver.counter("authorRightDataRecord")
-    lazy val authorMergeNumFeaturesCounter = scopedStatsReceiver.counter("authorMergeNumFeatures")
-    lazy val tweetKeyDataRecordCounter =
-      scopedStatsReceiver.counter("tweetKeyDataRecord")
-    lazy val tweetKeyFeatureCounter = scopedStatsReceiver.counter("tweetKeyFeature")
-    lazy val tweetLeftDataRecordCounter =
-      scopedStatsReceiver.counter("tweetLeftDataRecord")
-    lazy val tweetRightDataRecordCounter =
-      scopedStatsReceiver.counter("tweetRightDataRecord")
-    lazy val tweetMergeNumFeaturesCounter =
-      scopedStatsReceiver.counter("tweetMergeNumFeatures")
-
-    @transient lazy val featureStoreClient: FeatureStoreClient =
-      FeatureStoreUtils.mkFeatureStoreClient(
-        serviceIdentifier = jobConfig.serviceIdentifier,
-        statsReceiver = scopedStatsReceiver
-      )
-
-    lazy val joinUserFeaturesDataRecordProducer =
-      if (jobConfig.keyedByUserEnabled) {
-        lazy val keyedByUserFeaturesStormService: Storm#Service[Set[UserId], DataRecord] =
-          Storm.service(
-            new UserFeaturesReadableStore(
-              featureStoreClient = featureStoreClient,
-              userEntity = User,
-              userFeaturesAdapter = UserFeaturesAdapter
-            )
-          )
-
-        leftJoinDataRecordProducer(
-          keyFeature = SharedFeatures.USER_ID,
-          leftDataRecordProducer = underlyingProducer,
-          rightStormService = keyedByUserFeaturesStormService,
-          keyDataRecordCounter = keyDataRecordCounter,
-          keyFeatureCounter = keyFeatureCounter,
-          leftDataRecordCounter = leftDataRecordCounter,
-          rightDataRecordCounter = rightDataRecordCounter,
-          mergeNumFeaturesCounter = mergeNumFeaturesCounter
-        )
-      } else {
-        underlyingProducer
-      }
-
-    lazy val joinAuthorFeaturesDataRecordProducer =
-      if (jobConfig.keyedByAuthorEnabled) {
-        lazy val keyedByAuthorFeaturesStormService: Storm#Service[Set[AuthorId], DataRecord] =
-          Storm.service(
-            new UserFeaturesReadableStore(
-              featureStoreClient = featureStoreClient,
-              userEntity = Author,
-              userFeaturesAdapter = AuthorFeaturesAdapter
-            )
-          )
-
-        leftJoinDataRecordProducer(
-          keyFeature = TimelinesSharedFeatures.SOURCE_AUTHOR_ID,
-          leftDataRecordProducer = joinUserFeaturesDataRecordProducer,
-          rightStormService = keyedByAuthorFeaturesStormService,
-          keyDataRecordCounter = authorKeyDataRecordCounter,
-          keyFeatureCounter = authorKeyFeatureCounter,
-          leftDataRecordCounter = authorLeftDataRecordCounter,
-          rightDataRecordCounter = authorRightDataRecordCounter,
-          mergeNumFeaturesCounter = authorMergeNumFeaturesCounter
-        )
-      } else {
-        joinUserFeaturesDataRecordProducer
-      }
-
-    lazy val joinTweetFeaturesDataRecordProducer = {
-      if (jobConfig.keyedByTweetEnabled) {
-        lazy val keyedByTweetFeaturesStormService: Storm#Service[Set[TweetId], DataRecord] =
-          Storm.service(
-            new TweetFeaturesReadableStore(
-              featureStoreClient = featureStoreClient,
-              tweetEntity = Tweet,
-              tweetFeaturesAdapter = TweetFeaturesAdapter
-            )
-          )
-
-        leftJoinDataRecordProducer(
-          keyFeature = TimelinesSharedFeatures.SOURCE_TWEET_ID,
-          leftDataRecordProducer = joinAuthorFeaturesDataRecordProducer,
-          rightStormService = keyedByTweetFeaturesStormService,
-          keyDataRecordCounter = tweetKeyDataRecordCounter,
-          keyFeatureCounter = tweetKeyFeatureCounter,
-          leftDataRecordCounter = tweetLeftDataRecordCounter,
-          rightDataRecordCounter = tweetRightDataRecordCounter,
-          mergeNumFeaturesCounter = tweetMergeNumFeaturesCounter
-        )
-      } else {
-        joinAuthorFeaturesDataRecordProducer
-      }
-    }
-
-    joinTweetFeaturesDataRecordProducer
-  }
-
-  private[this] lazy val DataRecordMerger = new DataRecordMerger
-
-  /**
-   * Make join key from the client event data record and return both.
-   * @param keyFeature Feature to extract join key value: USER_ID, SOURCE_TWEET_ID, etc.
-   * @param record DataRecord containing client engagement and basic tweet-side features
-   * @return The return type is a tuple of this key and original data record which will be used
-   *         in the subsequent leftJoin operation.
-   */
-  private[this] def mkKey(
-    keyFeature: Feature[JLong],
-    record: DataRecord,
-    keyDataRecordCounter: Counter,
-    keyFeatureCounter: Counter
-  ): Set[Long] = {
-    keyDataRecordCounter.incr()
-    val richRecord = new RichDataRecord(record)
-    if (richRecord.hasFeature(keyFeature)) {
-      keyFeatureCounter.incr()
-      val key: Long = richRecord.getFeatureValue(keyFeature).toLong
-      Set(key)
-    } else {
-      Set.empty[Long]
-    }
-  }
-
-  /**
-   * After the leftJoin, merge the client event data record and the joined data record
-   * into a single data record used for further aggregation.
-   */
-  private[this] def mergeDataRecord(
-    leftRecord: Event[DataRecord],
-    rightRecordOpt: Option[DataRecord],
-    leftDataRecordCounter: Counter,
-    rightDataRecordCounter: Counter,
-    mergeNumFeaturesCounter: Counter
-  ): Event[DataRecord] = {
-    leftDataRecordCounter.incr()
-    rightRecordOpt.foreach { rightRecord =>
-      rightDataRecordCounter.incr()
-      DataRecordMerger.merge(leftRecord.event, rightRecord)
-      mergeNumFeaturesCounter.incr(new RichDataRecord(leftRecord.event).numFeatures())
-    }
-    leftRecord
-  }
-
-  private[this] def leftJoinDataRecordProducer(
-    keyFeature: Feature[JLong],
-    leftDataRecordProducer: Producer[Storm, Event[DataRecord]],
-    rightStormService: Storm#Service[Set[Long], DataRecord],
-    keyDataRecordCounter: => Counter,
-    keyFeatureCounter: => Counter,
-    leftDataRecordCounter: => Counter,
-    rightDataRecordCounter: => Counter,
-    mergeNumFeaturesCounter: => Counter
-  ): Producer[Storm, Event[DataRecord]] = {
-    val keyedLeftDataRecordProducer: Producer[Storm, (Set[Long], Event[DataRecord])] =
-      leftDataRecordProducer.map {
-        case dataRecord: HomeEvent[DataRecord] =>
-          val key = mkKey(
-            keyFeature = keyFeature,
-            record = dataRecord.event,
-            keyDataRecordCounter = keyDataRecordCounter,
-            keyFeatureCounter = keyFeatureCounter
-          )
-          (key, dataRecord)
-        case dataRecord: ProfileEvent[DataRecord] =>
-          val key = Set.empty[Long]
-          (key, dataRecord)
-        case dataRecord: SearchEvent[DataRecord] =>
-          val key = Set.empty[Long]
-          (key, dataRecord)
-        case dataRecord: UuaEvent[DataRecord] =>
-          val key = Set.empty[Long]
-          (key, dataRecord)
-      }
-
-    keyedLeftDataRecordProducer
-      .leftJoin(rightStormService)
-      .map {
-        case (_, (leftRecord, rightRecordOpt)) =>
-          mergeDataRecord(
-            leftRecord = leftRecord,
-            rightRecordOpt = rightRecordOpt,
-            leftDataRecordCounter = leftDataRecordCounter,
-            rightDataRecordCounter = rightDataRecordCounter,
-            mergeNumFeaturesCounter = mergeNumFeaturesCounter
-          )
-      }
-  }
-
-  /**
-   * Filter Unified User Actions events to include only actions that has home timeline visit prior to landing on the page
-   */
-  def isUuaBCEEventsFromHome(event: UnifiedUserAction): Boolean = {
-    def breadcrumbViewsContain(view: String): Boolean =
-      event.eventMetadata.breadcrumbViews.map(_.contains(view)).getOrElse(false)
-
-    (event.actionType) match {
-      case ActionType.ClientTweetV2Impression if breadcrumbViewsContain("home") =>
-        true
-      case ActionType.ClientTweetVideoFullscreenV2Impression
-          if (breadcrumbViewsContain("home") & breadcrumbViewsContain("video")) =>
-        true
-      case ActionType.ClientProfileV2Impression if breadcrumbViewsContain("home") =>
-        true
-      case _ => false
-    }
-  }
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationConfig.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationConfig.docx
new file mode 100644
index 000000000..6cde9194d
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationConfig.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationConfig.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationConfig.scala
deleted file mode 100644
index 8d7a41d21..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationConfig.scala
+++ /dev/null
@@ -1,34 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates.real_time
-
-import com.twitter.conversions.DurationOps._
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron.{
-  OnlineAggregationStoresTrait,
-  RealTimeAggregateStore
-}
-
-object TimelinesOnlineAggregationConfig
-    extends TimelinesOnlineAggregationDefinitionsTrait
-    with OnlineAggregationStoresTrait {
-
-  import TimelinesOnlineAggregationSources._
-
-  override lazy val ProductionStore = RealTimeAggregateStore(
-    memcacheDataSet = "timelines_real_time_aggregates",
-    isProd = true,
-    cacheTTL = 5.days
-  )
-
-  override lazy val StagingStore = RealTimeAggregateStore(
-    memcacheDataSet = "twemcache_timelines_real_time_aggregates",
-    isProd = false,
-    cacheTTL = 5.days
-  )
-
-  override lazy val inputSource = timelinesOnlineAggregateSource
-
-  /**
-   * AggregateToCompute: This defines the complete set of aggregates to be
-   *    computed by the aggregation job and to be stored in memcache.
-   */
-  override lazy val AggregatesToCompute = ProdAggregates ++ StagingAggregates
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationConfigBase.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationConfigBase.docx
new file mode 100644
index 000000000..31d6abb91
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationConfigBase.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationConfigBase.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationConfigBase.scala
deleted file mode 100644
index 0d7c072e2..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationConfigBase.scala
+++ /dev/null
@@ -1,1112 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates.real_time
-
-import com.twitter.conversions.DurationOps._
-import com.twitter.ml.api.Feature
-import com.twitter.ml.api.constant.SharedFeatures
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregateGroup
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregateSource
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregateStore
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron.OnlineAggregationConfigTrait
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.CountMetric
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.SumMetric
-import com.twitter.timelines.data_processing.ml_util.transforms.BinaryUnion
-import com.twitter.timelines.data_processing.ml_util.transforms.DownsampleTransform
-import com.twitter.timelines.data_processing.ml_util.transforms.IsNewUserTransform
-import com.twitter.timelines.data_processing.ml_util.transforms.IsPositionTransform
-import com.twitter.timelines.data_processing.ml_util.transforms.LogTransform
-import com.twitter.timelines.data_processing.ml_util.transforms.PositionCase
-import com.twitter.timelines.data_processing.ml_util.transforms.RichITransform
-import com.twitter.timelines.data_processing.ml_util.transforms.RichRemoveUnverifiedUserTransform
-import com.twitter.timelines.prediction.features.client_log_event.ClientLogEventDataRecordFeatures
-import com.twitter.timelines.prediction.features.common.CombinedFeatures
-import com.twitter.timelines.prediction.features.common.CombinedFeatures._
-import com.twitter.timelines.prediction.features.common.ProfileLabelFeatures
-import com.twitter.timelines.prediction.features.common.SearchLabelFeatures
-import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures
-import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures.IS_TOP_FIVE
-import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures.IS_TOP_ONE
-import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures.IS_TOP_TEN
-import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures.LOG_POSITION
-import com.twitter.timelines.prediction.features.list_features.ListFeatures
-import com.twitter.timelines.prediction.features.recap.RecapFeatures
-import com.twitter.util.Duration
-import java.lang.{Boolean => JBoolean}
-import java.lang.{Long => JLong}
-import scala.io.Source
-
-object TimelinesOnlineAggregationUtils {
-  val TweetLabels: Set[Feature[JBoolean]] = CombinedFeatures.EngagementsRealTime
-  val TweetCoreLabels: Set[Feature[JBoolean]] = CombinedFeatures.CoreEngagements
-  val TweetDwellLabels: Set[Feature[JBoolean]] = CombinedFeatures.DwellEngagements
-  val TweetCoreAndDwellLabels: Set[Feature[JBoolean]] = TweetCoreLabels ++ TweetDwellLabels
-  val PrivateEngagementLabelsV2: Set[Feature[JBoolean]] = CombinedFeatures.PrivateEngagementsV2
-  val ProfileCoreLabels: Set[Feature[JBoolean]] = ProfileLabelFeatures.CoreEngagements
-  val ProfileNegativeEngagementLabels: Set[Feature[JBoolean]] =
-    ProfileLabelFeatures.NegativeEngagements
-  val ProfileNegativeEngagementUnionLabels: Set[Feature[JBoolean]] = Set(
-    ProfileLabelFeatures.IS_NEGATIVE_FEEDBACK_UNION)
-  val SearchCoreLabels: Set[Feature[JBoolean]] = SearchLabelFeatures.CoreEngagements
-  val TweetNegativeEngagementLabels: Set[Feature[JBoolean]] =
-    CombinedFeatures.NegativeEngagementsRealTime
-  val TweetNegativeEngagementDontLikeLabels: Set[Feature[JBoolean]] =
-    CombinedFeatures.NegativeEngagementsRealTimeDontLike
-  val TweetNegativeEngagementSecondaryLabels: Set[Feature[JBoolean]] =
-    CombinedFeatures.NegativeEngagementsSecondary
-  val AllTweetNegativeEngagementLabels: Set[Feature[JBoolean]] =
-    TweetNegativeEngagementLabels ++ TweetNegativeEngagementDontLikeLabels ++ TweetNegativeEngagementSecondaryLabels
-  val UserAuthorEngagementLabels: Set[Feature[JBoolean]] = CombinedFeatures.UserAuthorEngagements
-  val ShareEngagementLabels: Set[Feature[JBoolean]] = CombinedFeatures.ShareEngagements
-  val BookmarkEngagementLabels: Set[Feature[JBoolean]] = CombinedFeatures.BookmarkEngagements
-  val AllBCEDwellLabels: Set[Feature[JBoolean]] =
-    CombinedFeatures.TweetDetailDwellEngagements ++ CombinedFeatures.ProfileDwellEngagements ++ CombinedFeatures.FullscreenVideoDwellEngagements
-  val AllTweetUnionLabels: Set[Feature[JBoolean]] = Set(
-    CombinedFeatures.IS_IMPLICIT_POSITIVE_FEEDBACK_UNION,
-    CombinedFeatures.IS_EXPLICIT_POSITIVE_FEEDBACK_UNION,
-    CombinedFeatures.IS_ALL_NEGATIVE_FEEDBACK_UNION
-  )
-  val AllTweetLabels: Set[Feature[JBoolean]] =
-    TweetLabels ++ TweetCoreAndDwellLabels ++ AllTweetNegativeEngagementLabels ++ ProfileCoreLabels ++ ProfileNegativeEngagementLabels ++ ProfileNegativeEngagementUnionLabels ++ UserAuthorEngagementLabels ++ SearchCoreLabels ++ ShareEngagementLabels ++ BookmarkEngagementLabels ++ PrivateEngagementLabelsV2 ++ AllBCEDwellLabels ++ AllTweetUnionLabels
-
-  def addFeatureFilterFromResource(
-    prodGroup: AggregateGroup,
-    aggRemovalPath: String
-  ): AggregateGroup = {
-    val resource = Some(Source.fromResource(aggRemovalPath))
-    val lines = resource.map(_.getLines.toSeq)
-    lines match {
-      case Some(value) => prodGroup.copy(aggExclusionRegex = value)
-      case _ => prodGroup
-    }
-  }
-}
-
-trait TimelinesOnlineAggregationDefinitionsTrait extends OnlineAggregationConfigTrait {
-  import TimelinesOnlineAggregationUtils._
-
-  def inputSource: AggregateSource
-  def ProductionStore: AggregateStore
-  def StagingStore: AggregateStore
-
-  val TweetFeatures: Set[Feature[_]] = Set(
-    ClientLogEventDataRecordFeatures.HasConsumerVideo,
-    ClientLogEventDataRecordFeatures.PhotoCount
-  )
-  val CandidateTweetSourceFeatures: Set[Feature[_]] = Set(
-    ClientLogEventDataRecordFeatures.FromRecap,
-    ClientLogEventDataRecordFeatures.FromRecycled,
-    ClientLogEventDataRecordFeatures.FromActivity,
-    ClientLogEventDataRecordFeatures.FromSimcluster,
-    ClientLogEventDataRecordFeatures.FromErg,
-    ClientLogEventDataRecordFeatures.FromCroon,
-    ClientLogEventDataRecordFeatures.FromList,
-    ClientLogEventDataRecordFeatures.FromRecTopic
-  )
-
-  def createStagingGroup(prodGroup: AggregateGroup): AggregateGroup =
-    prodGroup.copy(
-      outputStore = StagingStore
-    )
-
-  // Aggregate user engagements/features by tweet Id.
-  val tweetEngagement30MinuteCountsProd =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_tweet_aggregates_v1",
-      keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID),
-      features = Set.empty,
-      labels = TweetLabels ++ TweetNegativeEngagementDontLikeLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  // Aggregate user engagements/features by tweet Id.
-  val tweetVerifiedDontLikeEngagementRealTimeAggregatesProd =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_tweet_aggregates_v6",
-      preTransforms = Seq(RichRemoveUnverifiedUserTransform),
-      keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID),
-      features = Set.empty,
-      labels = TweetNegativeEngagementDontLikeLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  val tweetNegativeEngagement6HourCounts =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_tweet_aggregates_v2",
-      keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID),
-      features = Set.empty,
-      labels = TweetNegativeEngagementLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  val tweetVerifiedNegativeEngagementCounts =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_tweet_aggregates_v7",
-      preTransforms = Seq(RichRemoveUnverifiedUserTransform),
-      keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID),
-      features = Set.empty,
-      labels = TweetNegativeEngagementLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  val promotedTweetEngagementRealTimeCounts =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_tweet_aggregates_v3.is_promoted",
-      preTransforms = Seq(
-        DownsampleTransform(
-          negativeSamplingRate = 0.0,
-          keepLabels = Set(ClientLogEventDataRecordFeatures.IsPromoted))),
-      keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID),
-      features = Set.empty,
-      labels = TweetCoreAndDwellLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(2.hours, 24.hours),
-      outputStore = ProductionStore,
-      includeAnyFeature = false,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  /**
-   * Aggregate total engagement counts by tweet Id for non-public
-   * engagements. Similar to EB's public engagement counts.
-   */
-  val tweetEngagementTotalCountsProd =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_tweet_aggregates_v1",
-      keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID),
-      features = Set.empty,
-      labels = TweetLabels ++ TweetNegativeEngagementDontLikeLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  val tweetNegativeEngagementTotalCounts =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_tweet_aggregates_v2",
-      keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID),
-      features = Set.empty,
-      labels = TweetNegativeEngagementLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  /**
-   * Aggregate tweet features grouped by viewer's user id.
-   */
-  val userEngagementRealTimeAggregatesProd =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_user_aggregates_v1",
-      keys = Set(SharedFeatures.USER_ID),
-      features = TweetFeatures,
-      labels = TweetLabels ++ TweetNegativeEngagementDontLikeLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  /**
-   * Aggregate tweet features grouped by viewer's user id.
-   */
-  val userEngagementRealTimeAggregatesV2 =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_user_aggregates_v2",
-      keys = Set(SharedFeatures.USER_ID),
-      features = ClientLogEventDataRecordFeatures.TweetFeaturesV2,
-      labels = TweetCoreAndDwellLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyFeature = false,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  /**
-   * Aggregate author's user state features grouped by viewer's user id.
-   */
-  val userEngagementAuthorUserStateRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_user_aggregates_v3",
-      preTransforms = Seq.empty,
-      keys = Set(SharedFeatures.USER_ID),
-      features = AuthorFeaturesAdapter.UserStateBooleanFeatures,
-      labels = TweetCoreAndDwellLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyFeature = false,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  /**
-   * Aggregate author's user state features grouped by viewer's user id.
-   */
-  val userNegativeEngagementAuthorUserStateRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_user_aggregates_v4",
-      preTransforms = Seq.empty,
-      keys = Set(SharedFeatures.USER_ID),
-      features = AuthorFeaturesAdapter.UserStateBooleanFeatures,
-      labels = TweetNegativeEngagementLabels ++ TweetNegativeEngagementDontLikeLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyFeature = false,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  /**
-   * Aggregate tweet features grouped by viewer's user id, with 48 hour halfLife.
-   */
-  val userEngagement48HourRealTimeAggregatesProd =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_user_aggregates_v5",
-      keys = Set(SharedFeatures.USER_ID),
-      features = TweetFeatures,
-      labels = TweetLabels ++ TweetNegativeEngagementDontLikeLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(48.hours),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  /**
-   * Aggregate author's user state features grouped by viewer's user id.
-   */
-  val userNegativeEngagementAuthorUserState72HourRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_user_aggregates_v6",
-      preTransforms = Seq.empty,
-      keys = Set(SharedFeatures.USER_ID),
-      features = AuthorFeaturesAdapter.UserStateBooleanFeatures,
-      labels = TweetNegativeEngagementLabels ++ TweetNegativeEngagementDontLikeLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(72.hours),
-      outputStore = ProductionStore,
-      includeAnyFeature = false,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  /**
-   * Aggregate features grouped by source author id: for each author, aggregate features are created
-   * to quantify engagements (fav, reply, etc.) which tweets of the author has received.
-   */
-  val authorEngagementRealTimeAggregatesProd =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_author_aggregates_v1",
-      keys = Set(TimelinesSharedFeatures.SOURCE_AUTHOR_ID),
-      features = Set.empty,
-      labels = TweetLabels ++ TweetNegativeEngagementDontLikeLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  /**
-   * Aggregate features grouped by source author id: for each author, aggregate features are created
-   * to quantify negative engagements (mute, block, etc.) which tweets of the author has received.
-   *
-   * This aggregate group is not used in Home, but it is used in Follow Recommendation Service so need to keep it for now.
-   *
-   */
-  val authorNegativeEngagementRealTimeAggregatesProd =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_author_aggregates_v2",
-      keys = Set(TimelinesSharedFeatures.SOURCE_AUTHOR_ID),
-      features = Set.empty,
-      labels = TweetNegativeEngagementLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  /**
-   * Aggregate features grouped by source author id: for each author, aggregate features are created
-   * to quantify negative engagements (don't like) which tweets of the author has received from
-   * verified users.
-   */
-  val authorVerifiedNegativeEngagementRealTimeAggregatesProd =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_author_aggregates_v3",
-      preTransforms = Seq(RichRemoveUnverifiedUserTransform),
-      keys = Set(TimelinesSharedFeatures.SOURCE_AUTHOR_ID),
-      features = Set.empty,
-      labels = TweetNegativeEngagementDontLikeLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  /**
-   * Aggregate tweet features grouped by topic id.
-   */
-  val topicEngagementRealTimeAggregatesProd =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_topic_aggregates_v1",
-      keys = Set(TimelinesSharedFeatures.TOPIC_ID),
-      features = Set.empty,
-      labels = TweetLabels ++ AllTweetNegativeEngagementLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  /**
-   * Aggregate user engagements / user state by topic id.
-   */
-  val topicEngagementUserStateRealTimeAggregatesProd =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_topic_aggregates_v2",
-      keys = Set(TimelinesSharedFeatures.TOPIC_ID),
-      features = UserFeaturesAdapter.UserStateBooleanFeatures,
-      labels = TweetCoreAndDwellLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyFeature = false,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  /**
-   * Aggregate user negative engagements / user state by topic id.
-   */
-  val topicNegativeEngagementUserStateRealTimeAggregatesProd =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_topic_aggregates_v3",
-      keys = Set(TimelinesSharedFeatures.TOPIC_ID),
-      features = UserFeaturesAdapter.UserStateBooleanFeatures,
-      labels = TweetNegativeEngagementLabels ++ TweetNegativeEngagementDontLikeLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyFeature = false,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  /**
-   * Aggregate tweet features grouped by topic id like real_time_topic_aggregates_v1 but 24hour halfLife
-   */
-  val topicEngagement24HourRealTimeAggregatesProd =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_topic_aggregates_v4",
-      keys = Set(TimelinesSharedFeatures.TOPIC_ID),
-      features = Set.empty,
-      labels = TweetLabels ++ AllTweetNegativeEngagementLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(24.hours),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  // Aggregate user engagements / user state by tweet Id.
-  val tweetEngagementUserStateRealTimeAggregatesProd =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_tweet_aggregates_v3",
-      keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID),
-      features = UserFeaturesAdapter.UserStateBooleanFeatures,
-      labels = TweetCoreAndDwellLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyFeature = false,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  // Aggregate user engagements / user gender by tweet Id.
-  val tweetEngagementGenderRealTimeAggregatesProd =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_tweet_aggregates_v4",
-      keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID),
-      features = UserFeaturesAdapter.GenderBooleanFeatures,
-      labels =
-        TweetCoreAndDwellLabels ++ TweetNegativeEngagementLabels ++ TweetNegativeEngagementDontLikeLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyFeature = false,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  // Aggregate user negative engagements / user state by tweet Id.
-  val tweetNegativeEngagementUserStateRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_tweet_aggregates_v5",
-      keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID),
-      features = UserFeaturesAdapter.UserStateBooleanFeatures,
-      labels = TweetNegativeEngagementLabels ++ TweetNegativeEngagementDontLikeLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyFeature = false,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  // Aggregate user negative engagements / user state by tweet Id.
-  val tweetVerifiedNegativeEngagementUserStateRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_tweet_aggregates_v8",
-      preTransforms = Seq(RichRemoveUnverifiedUserTransform),
-      keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID),
-      features = UserFeaturesAdapter.UserStateBooleanFeatures,
-      labels = TweetNegativeEngagementLabels ++ TweetNegativeEngagementDontLikeLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyFeature = false,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  /**
-   * Aggregate tweet engagement labels and candidate tweet source features grouped by user id.
-   */
-  val userCandidateTweetSourceEngagementRealTimeAggregatesProd =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_user_candidate_tweet_source_aggregates_v1",
-      keys = Set(SharedFeatures.USER_ID),
-      features = CandidateTweetSourceFeatures,
-      labels = TweetCoreAndDwellLabels ++ NegativeEngagementsRealTimeDontLike,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyFeature = false,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  /**
-   * Aggregate tweet engagement labels and candidate tweet source features grouped by user id.
-   */
-  val userCandidateTweetSourceEngagement48HourRealTimeAggregatesProd =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_user_candidate_tweet_source_aggregates_v2",
-      keys = Set(SharedFeatures.USER_ID),
-      features = CandidateTweetSourceFeatures,
-      labels = TweetCoreAndDwellLabels ++ NegativeEngagementsRealTimeDontLike,
-      metrics = Set(CountMetric),
-      halfLives = Set(48.hours),
-      outputStore = ProductionStore,
-      includeAnyFeature = false,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  /**
-   * Aggregate tweet features grouped by viewer's user id on Profile engagements
-   */
-  val userProfileEngagementRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "profile_real_time_user_aggregates_v1",
-      preTransforms = Seq(IsNewUserTransform),
-      keys = Set(SharedFeatures.USER_ID),
-      features = TweetFeatures,
-      labels = ProfileCoreLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyFeature = true,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  val NegativeEngagementsUnionTransform = RichITransform(
-    BinaryUnion(
-      featuresToUnify = ProfileNegativeEngagementLabels,
-      outputFeature = ProfileLabelFeatures.IS_NEGATIVE_FEEDBACK_UNION
-    ))
-
-  /**
-   * Aggregate tweet features grouped by viewer's user id on Profile negative engagements.
-   */
-  val userProfileNegativeEngagementRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "profile_negative_engagement_real_time_user_aggregates_v1",
-      preTransforms = Seq(NegativeEngagementsUnionTransform),
-      keys = Set(SharedFeatures.USER_ID),
-      features = Set.empty,
-      labels = ProfileNegativeEngagementLabels ++ ProfileNegativeEngagementUnionLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, 72.hours, 14.day),
-      outputStore = ProductionStore,
-      includeAnyFeature = true,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  /**
-   * Aggregate tweet features grouped by viewer's and author's user ids and on Profile engagements
-   */
-  val userAuthorProfileEngagementRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "user_author_profile_real_time_aggregates_v1",
-      keys = Set(SharedFeatures.USER_ID, TimelinesSharedFeatures.SOURCE_AUTHOR_ID),
-      features = Set.empty,
-      labels = ProfileCoreLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, 24.hours, 72.hours),
-      outputStore = ProductionStore,
-      includeAnyFeature = true,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  /**
-   * Aggregate tweet features grouped by viewer's and author's user ids and on negative Profile engagements
-   */
-  val userAuthorProfileNegativeEngagementRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "user_author_profile_negative_engagement_real_time_aggregates_v1",
-      preTransforms = Seq(NegativeEngagementsUnionTransform),
-      keys = Set(SharedFeatures.USER_ID, TimelinesSharedFeatures.SOURCE_AUTHOR_ID),
-      features = Set.empty,
-      labels = ProfileNegativeEngagementUnionLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, 72.hours, 14.day),
-      outputStore = ProductionStore,
-      includeAnyFeature = true,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  val newUserAuthorEngagementRealTimeAggregatesProd =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_new_user_author_aggregates_v1",
-      preTransforms = Seq(IsNewUserTransform),
-      keys = Set(SharedFeatures.USER_ID, TimelinesSharedFeatures.SOURCE_AUTHOR_ID),
-      features = Set.empty,
-      labels = TweetCoreAndDwellLabels ++ Set(
-        IS_CLICKED,
-        IS_PROFILE_CLICKED,
-        IS_PHOTO_EXPANDED
-      ),
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyFeature = true,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  val userAuthorEngagementRealTimeAggregatesProd = {
-    // Computing user-author real-time aggregates is very expensive so we
-    // take the union of all major negative feedback engagements to create
-    // a single negtive label for aggregation. We also include a number of
-    // core positive engagements.
-    val BinaryUnionNegativeEngagements =
-      BinaryUnion(
-        featuresToUnify = AllTweetNegativeEngagementLabels,
-        outputFeature = IS_NEGATIVE_FEEDBACK_UNION
-      )
-    val BinaryUnionNegativeEngagementsTransform = RichITransform(BinaryUnionNegativeEngagements)
-
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_user_author_aggregates_v1",
-      preTransforms = Seq(BinaryUnionNegativeEngagementsTransform),
-      keys = Set(SharedFeatures.USER_ID, TimelinesSharedFeatures.SOURCE_AUTHOR_ID),
-      features = Set.empty,
-      labels = UserAuthorEngagementLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, 1.day),
-      outputStore = ProductionStore,
-      includeAnyFeature = true,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-  }
-
-  /**
-   * Aggregate tweet features grouped by list id.
-   */
-  val listEngagementRealTimeAggregatesProd =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_list_aggregates_v1",
-      keys = Set(ListFeatures.LIST_ID),
-      features = Set.empty,
-      labels =
-        TweetCoreAndDwellLabels ++ TweetNegativeEngagementLabels ++ TweetNegativeEngagementDontLikeLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  // Aggregate features grouped by topic of tweet and country from user's location
-  val topicCountryRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_topic_country_aggregates_v1",
-      keys = Set(TimelinesSharedFeatures.TOPIC_ID, UserFeaturesAdapter.USER_COUNTRY_ID),
-      features = Set.empty,
-      labels =
-        TweetCoreAndDwellLabels ++ AllTweetNegativeEngagementLabels ++ PrivateEngagementLabelsV2 ++ ShareEngagementLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, 72.hours),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  // Aggregate features grouped by TweetId_Country from user's location
-  val tweetCountryRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_tweet_country_aggregates_v1",
-      keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID, UserFeaturesAdapter.USER_COUNTRY_ID),
-      features = Set.empty,
-      labels = TweetCoreAndDwellLabels ++ AllTweetNegativeEngagementLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyLabel = true,
-      includeTimestampFeature = false,
-    )
-
-  // Additional aggregate features grouped by TweetId_Country from user's location
-  val tweetCountryPrivateEngagementsRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_tweet_country_aggregates_v2",
-      keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID, UserFeaturesAdapter.USER_COUNTRY_ID),
-      features = Set.empty,
-      labels = PrivateEngagementLabelsV2 ++ ShareEngagementLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, 72.hours),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  // Aggregate features grouped by TweetId_Country from user's location
-  val tweetCountryVerifiedNegativeEngagementsRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_tweet_country_aggregates_v3",
-      preTransforms = Seq(RichRemoveUnverifiedUserTransform),
-      keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID, UserFeaturesAdapter.USER_COUNTRY_ID),
-      features = Set.empty,
-      labels = AllTweetNegativeEngagementLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, Duration.Top),
-      outputStore = ProductionStore,
-      includeAnyLabel = true,
-      includeTimestampFeature = false,
-    )
-
-  object positionTranforms extends IsPositionTransform {
-    override val isInPositionRangeFeature: Seq[PositionCase] =
-      Seq(PositionCase(1, IS_TOP_ONE), PositionCase(5, IS_TOP_FIVE), PositionCase(10, IS_TOP_TEN))
-    override val decodedPositionFeature: Feature.Discrete =
-      ClientLogEventDataRecordFeatures.InjectedPosition
-  }
-
-  val userPositionEngagementsCountsProd =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_position_based_user_aggregates_v1",
-      keys = Set(SharedFeatures.USER_ID),
-      features = Set(IS_TOP_ONE, IS_TOP_FIVE, IS_TOP_TEN),
-      labels = TweetCoreAndDwellLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, 24.hours),
-      outputStore = ProductionStore,
-      preTransforms = Seq(positionTranforms),
-      includeAnyLabel = false,
-      includeAnyFeature = false,
-      includeTimestampFeature = false,
-    )
-
-  val userPositionEngagementsSumProd =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_position_based_user_sum_aggregates_v2",
-      keys = Set(SharedFeatures.USER_ID),
-      features = Set(LOG_POSITION),
-      labels = TweetCoreAndDwellLabels,
-      metrics = Set(SumMetric),
-      halfLives = Set(30.minutes, 24.hours),
-      outputStore = ProductionStore,
-      preTransforms =
-        Seq(new LogTransform(ClientLogEventDataRecordFeatures.InjectedPosition, LOG_POSITION)),
-      includeAnyLabel = false,
-      includeAnyFeature = false,
-      includeTimestampFeature = false,
-    )
-
-  // Aggregates for share engagements
-  val tweetShareEngagementsRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_tweet_share_aggregates_v1",
-      keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID),
-      features = Set.empty,
-      labels = ShareEngagementLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, 24.hours),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  val userShareEngagementsRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_user_share_aggregates_v1",
-      keys = Set(SharedFeatures.USER_ID),
-      features = Set.empty,
-      labels = ShareEngagementLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, 24.hours),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  val userAuthorShareEngagementsRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_user_author_share_aggregates_v1",
-      keys = Set(SharedFeatures.USER_ID, TimelinesSharedFeatures.SOURCE_AUTHOR_ID),
-      features = Set.empty,
-      labels = ShareEngagementLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, 24.hours),
-      outputStore = ProductionStore,
-      includeAnyFeature = true,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  val topicShareEngagementsRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_topic_share_aggregates_v1",
-      keys = Set(TimelinesSharedFeatures.TOPIC_ID),
-      features = Set.empty,
-      labels = ShareEngagementLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, 24.hours),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  val authorShareEngagementsRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_author_share_aggregates_v1",
-      keys = Set(TimelinesSharedFeatures.SOURCE_AUTHOR_ID),
-      features = Set.empty,
-      labels = ShareEngagementLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, 24.hours),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  // Bookmark RTAs
-  val tweetBookmarkEngagementsRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_tweet_bookmark_aggregates_v1",
-      keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID),
-      features = Set.empty,
-      labels = BookmarkEngagementLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, 24.hours),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  val userBookmarkEngagementsRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_user_bookmark_aggregates_v1",
-      keys = Set(SharedFeatures.USER_ID),
-      features = Set.empty,
-      labels = BookmarkEngagementLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, 24.hours),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  val userAuthorBookmarkEngagementsRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_user_author_bookmark_aggregates_v1",
-      keys = Set(SharedFeatures.USER_ID, TimelinesSharedFeatures.SOURCE_AUTHOR_ID),
-      features = Set.empty,
-      labels = BookmarkEngagementLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, 24.hours),
-      outputStore = ProductionStore,
-      includeAnyFeature = true,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  val authorBookmarkEngagementsRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_author_bookmark_aggregates_v1",
-      keys = Set(TimelinesSharedFeatures.SOURCE_AUTHOR_ID),
-      features = Set.empty,
-      labels = BookmarkEngagementLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, 24.hours),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  /**
-   * Aggregate on user level dwell labels from BCE
-   */
-  val userBCEDwellEngagementsRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_user_bce_dwell_aggregates",
-      keys = Set(SharedFeatures.USER_ID),
-      features = Set.empty,
-      labels = AllBCEDwellLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, 24.hours),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  /**
-   * Aggregate on tweet level dwell labels from BCE
-   */
-  val tweetBCEDwellEngagementsRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_tweet_bce_dwell_aggregates",
-      keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID),
-      features = Set.empty,
-      labels = AllBCEDwellLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(30.minutes, 24.hours),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeTimestampFeature = false,
-    )
-
-  val ImplicitPositiveEngagementsUnionTransform = RichITransform(
-    BinaryUnion(
-      featuresToUnify = CombinedFeatures.ImplicitPositiveEngagements,
-      outputFeature = CombinedFeatures.IS_IMPLICIT_POSITIVE_FEEDBACK_UNION
-    )
-  )
-
-  val ExplicitPositiveEngagementsUnionTransform = RichITransform(
-    BinaryUnion(
-      featuresToUnify = CombinedFeatures.ExplicitPositiveEngagements,
-      outputFeature = CombinedFeatures.IS_EXPLICIT_POSITIVE_FEEDBACK_UNION
-    )
-  )
-
-  val AllNegativeEngagementsUnionTransform = RichITransform(
-    BinaryUnion(
-      featuresToUnify = CombinedFeatures.AllNegativeEngagements,
-      outputFeature = CombinedFeatures.IS_ALL_NEGATIVE_FEEDBACK_UNION
-    )
-  )
-
-  /**
-   * Aggregate features for author content preference
-   */
-  val authorContentPreferenceRealTimeAggregates =
-    AggregateGroup(
-      inputSource = inputSource,
-      aggregatePrefix = "real_time_author_content_preference_aggregates",
-      preTransforms = Seq(
-        ImplicitPositiveEngagementsUnionTransform,
-        ExplicitPositiveEngagementsUnionTransform,
-        AllNegativeEngagementsUnionTransform),
-      keys = Set(TimelinesSharedFeatures.SOURCE_AUTHOR_ID),
-      features =
-        ClientLogEventDataRecordFeatures.AuthorContentPreferenceTweetTypeFeatures ++ AuthorFeaturesAdapter.UserStateBooleanFeatures,
-      labels = AllTweetUnionLabels,
-      metrics = Set(CountMetric),
-      halfLives = Set(24.hours),
-      outputStore = ProductionStore,
-      includeAnyLabel = false,
-      includeAnyFeature = false,
-    )
-
-  val FeaturesGeneratedByPreTransforms = Set(LOG_POSITION, IS_TOP_TEN, IS_TOP_FIVE, IS_TOP_ONE)
-
-  val ProdAggregateGroups = Set(
-    tweetEngagement30MinuteCountsProd,
-    tweetEngagementTotalCountsProd,
-    tweetNegativeEngagement6HourCounts,
-    tweetNegativeEngagementTotalCounts,
-    userEngagementRealTimeAggregatesProd,
-    userEngagement48HourRealTimeAggregatesProd,
-    userNegativeEngagementAuthorUserStateRealTimeAggregates,
-    userNegativeEngagementAuthorUserState72HourRealTimeAggregates,
-    authorEngagementRealTimeAggregatesProd,
-    topicEngagementRealTimeAggregatesProd,
-    topicEngagement24HourRealTimeAggregatesProd,
-    tweetEngagementUserStateRealTimeAggregatesProd,
-    tweetNegativeEngagementUserStateRealTimeAggregates,
-    userProfileEngagementRealTimeAggregates,
-    newUserAuthorEngagementRealTimeAggregatesProd,
-    userAuthorEngagementRealTimeAggregatesProd,
-    listEngagementRealTimeAggregatesProd,
-    tweetCountryRealTimeAggregates,
-    tweetShareEngagementsRealTimeAggregates,
-    userShareEngagementsRealTimeAggregates,
-    userAuthorShareEngagementsRealTimeAggregates,
-    topicShareEngagementsRealTimeAggregates,
-    authorShareEngagementsRealTimeAggregates,
-    tweetBookmarkEngagementsRealTimeAggregates,
-    userBookmarkEngagementsRealTimeAggregates,
-    userAuthorBookmarkEngagementsRealTimeAggregates,
-    authorBookmarkEngagementsRealTimeAggregates,
-    topicCountryRealTimeAggregates,
-    tweetCountryPrivateEngagementsRealTimeAggregates,
-    userBCEDwellEngagementsRealTimeAggregates,
-    tweetBCEDwellEngagementsRealTimeAggregates,
-    authorContentPreferenceRealTimeAggregates,
-    authorVerifiedNegativeEngagementRealTimeAggregatesProd,
-    tweetVerifiedDontLikeEngagementRealTimeAggregatesProd,
-    tweetVerifiedNegativeEngagementCounts,
-    tweetVerifiedNegativeEngagementUserStateRealTimeAggregates,
-    tweetCountryVerifiedNegativeEngagementsRealTimeAggregates
-  ).map(
-    addFeatureFilterFromResource(
-      _,
-      "com/twitter/timelines/prediction/common/aggregates/real_time/aggregates_to_drop.txt"))
-
-  val StagingAggregateGroups = ProdAggregateGroups.map(createStagingGroup)
-
-  /**
-   * Contains the fully typed aggregate groups from which important
-   * values can be derived e.g. the features to be computed, halflives etc.
-   */
-  override val ProdAggregates = ProdAggregateGroups.flatMap(_.buildTypedAggregateGroups())
-
-  override val StagingAggregates = StagingAggregateGroups.flatMap(_.buildTypedAggregateGroups())
-
-
-  override val ProdCommonAggregates = ProdAggregates
-    .filter(_.keysToAggregate == Set(SharedFeatures.USER_ID))
-
-  /**
-   * This defines the set of selected features from a candidate
-   * that we'd like to send to the served features cache by TLM.
-   * These should include  interesting and necessary features that
-   * cannot be extracted from LogEvents only by the real-time aggregates
-   * job. If you are adding new AggregateGroups requiring TLM-side
-   * candidate features, make sure to add them here.
-   */
-  val candidateFeaturesToCache: Set[Feature[_]] = Set(
-    TimelinesSharedFeatures.SOURCE_AUTHOR_ID,
-    RecapFeatures.HASHTAGS,
-    RecapFeatures.MENTIONED_SCREEN_NAMES,
-    RecapFeatures.URL_DOMAINS
-  )
-}
-
-/**
- * This config should only be used to access the aggregate features constructed by the
- * aggregation config, and not for implementing an online real-time aggregates job.
- */
-object TimelinesOnlineAggregationFeaturesOnlyConfig
-    extends TimelinesOnlineAggregationDefinitionsTrait {
-
-  private[real_time] case class DummyAggregateSource(name: String, timestampFeature: Feature[JLong])
-      extends AggregateSource
-
-  private[real_time] case class DummyAggregateStore(name: String) extends AggregateStore
-
-  override lazy val inputSource = DummyAggregateSource(
-    name = "timelines_rta",
-    timestampFeature = SharedFeatures.TIMESTAMP
-  )
-  override lazy val ProductionStore = DummyAggregateStore("timelines_rta")
-  override lazy val StagingStore = DummyAggregateStore("timelines_rta")
-
-  override lazy val AggregatesToCompute = ProdAggregates ++ StagingAggregates
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationSources.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationSources.docx
new file mode 100644
index 000000000..1bad79931
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationSources.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationSources.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationSources.scala
deleted file mode 100644
index 71e97a1b1..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationSources.scala
+++ /dev/null
@@ -1,5 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates.real_time
-
-object TimelinesOnlineAggregationSources {
-  val timelinesOnlineAggregateSource = new TimelinesStormAggregateSource
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesRealTimeAggregatesJob.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesRealTimeAggregatesJob.docx
new file mode 100644
index 000000000..edca2ce66
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesRealTimeAggregatesJob.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesRealTimeAggregatesJob.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesRealTimeAggregatesJob.scala
deleted file mode 100644
index e386d4da1..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesRealTimeAggregatesJob.scala
+++ /dev/null
@@ -1,182 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates.real_time
-
-import com.twitter.conversions.DurationOps._
-import com.twitter.finagle.stats.DefaultStatsReceiver
-import com.twitter.summingbird.Options
-import com.twitter.summingbird.online.option.FlatMapParallelism
-import com.twitter.summingbird.online.option.SourceParallelism
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron._
-import com.twitter.timelines.data_processing.ml_util.transforms.DownsampleTransform
-import com.twitter.timelines.data_processing.ml_util.transforms.RichITransform
-import com.twitter.timelines.data_processing.ml_util.transforms.UserDownsampleTransform
-
-import com.twitter.timelines.prediction.common.aggregates.BCELabelTransformFromUUADataRecord
-
-/**
- * Sets up relevant topology parameters. Our primary goal is to handle the
- * LogEvent stream and aggregate (sum) on the parsed DataRecords without falling
- * behind. Our constraint is the resulting write (and read) QPS to the backing
- * memcache store.
- *
- * If the job is falling behind, add more flatMappers and/or Summers after
- * inspecting the viz panels for the respective job (go/heron-ui). An increase in
- * Summers (and/or aggregation keys and features in the config) results in an
- * increase in memcache QPS (go/cb and search for our cache). Adjust with CacheSize
- * settings until QPS is well-controlled.
- *
- */
-object TimelinesRealTimeAggregatesJobConfigs extends RealTimeAggregatesJobConfigs {
-  import TimelinesOnlineAggregationUtils._
-
-  /**
-   * We remove input records that do not contain a label/engagement as defined in AllTweetLabels, which includes
-   * explicit user engagements including public, private and impression events. By avoiding ingesting records without
-   * engagemnts, we guarantee that no distribution shifts occur in computed aggregate features when we add a new spout
-   * to input aggregate sources. Counterfactual signal is still available since we aggregate on explicit dwell
-   * engagements.
-   */
-  val NegativeDownsampleTransform =
-    DownsampleTransform(
-      negativeSamplingRate = 0.0,
-      keepLabels = AllTweetLabels,
-      positiveSamplingRate = 1.0)
-
-  /**
-   * We downsample positive engagements for devel topology to reduce traffic, aiming for equivalent of 10% of prod traffic.
-   * First apply consistent downsampling to 10% of users, and then apply downsampling to remove records without
-   * explicit labels. We apply user-consistent sampling to more closely approximate prod query patterns.
-   */
-  val StagingUserBasedDownsampleTransform =
-    UserDownsampleTransform(
-      availability = 1000,
-      featureName = "rta_devel"
-    )
-
-  override val Prod = RealTimeAggregatesJobConfig(
-    appId = "summingbird_timelines_rta",
-    topologyWorkers = 1450,
-    sourceCount = 120,
-    flatMapCount = 1800,
-    summerCount = 3850,
-    cacheSize = 200,
-    containerRamGigaBytes = 54,
-    name = "timelines_real_time_aggregates",
-    teamName = "timelines",
-    teamEmail = "",
-    // If one component is hitting GC limit at prod, tune componentToMetaSpaceSizeMap.
-    // Except for Source bolts. Tune componentToRamGigaBytesMap for Source bolts instead.
-    componentToMetaSpaceSizeMap = Map(
-      "Tail-FlatMap" -> "-XX:MaxMetaspaceSize=1024M -XX:MetaspaceSize=1024M",
-      "Tail" -> "-XX:MaxMetaspaceSize=2560M -XX:MetaspaceSize=2560M"
-    ),
-    // If either component is hitting memory limit at prod
-    // its memory need to increase: either increase total memory of container (containerRamGigaBytes),
-    // or allocate more memory for one component while keeping total memory unchanged.
-    componentToRamGigaBytesMap = Map(
-      "Tail-FlatMap-Source" -> 3, // Home source
-      "Tail-FlatMap-Source.2" -> 3, // Profile source
-      "Tail-FlatMap-Source.3" -> 3, // Search source
-      "Tail-FlatMap-Source.4" -> 3, // UUA source
-      "Tail-FlatMap" -> 8
-      // Tail will use the leftover memory in the container.
-      // Make sure to tune topologyWorkers and containerRamGigaBytes such that this is greater than 10 GB.
-    ),
-    topologyNamedOptions = Map(
-      "TL_EVENTS_SOURCE" -> Options()
-        .set(SourceParallelism(120)),
-      "PROFILE_EVENTS_SOURCE" -> Options()
-        .set(SourceParallelism(30)),
-      "SEARCH_EVENTS_SOURCE" -> Options()
-        .set(SourceParallelism(10)),
-      "UUA_EVENTS_SOURCE" -> Options()
-        .set(SourceParallelism(10)),
-      "COMBINED_PRODUCER" -> Options()
-        .set(FlatMapParallelism(1800))
-    ),
-    // The UUA datarecord for BCE events inputted will not have binary labels populated.
-    // BCELabelTransform will set the datarecord with binary BCE dwell labels features based on the corresponding dwell_time_ms.
-    // It's important to have the BCELabelTransformFromUUADataRecord before ProdNegativeDownsampleTransform
-    // because ProdNegativeDownsampleTransform will remove datarecord that contains no features from AllTweetLabels.
-    onlinePreTransforms =
-      Seq(RichITransform(BCELabelTransformFromUUADataRecord), NegativeDownsampleTransform)
-  )
-
-  /**
-   * we downsample 10% computation of devel RTA based on [[StagingNegativeDownsampleTransform]].
-   * To better test scalability of topology, we reduce computing resource of components "Tail-FlatMap"
-   * and "Tail" to be 10% of prod but keep computing resource of component "Tail-FlatMap-Source" unchanged.
-   * hence flatMapCount=110, summerCount=105 and sourceCount=100. Hence topologyWorkers =(110+105+100)/5 = 63.
-   */
-  override val Devel = RealTimeAggregatesJobConfig(
-    appId = "summingbird_timelines_rta_devel",
-    topologyWorkers = 120,
-    sourceCount = 120,
-    flatMapCount = 150,
-    summerCount = 300,
-    cacheSize = 200,
-    containerRamGigaBytes = 54,
-    name = "timelines_real_time_aggregates_devel",
-    teamName = "timelines",
-    teamEmail = "",
-    // If one component is hitting GC limit at prod, tune componentToMetaSpaceSizeMap
-    // Except for Source bolts. Tune componentToRamGigaBytesMap for Source bolts instead.
-    componentToMetaSpaceSizeMap = Map(
-      "Tail-FlatMap" -> "-XX:MaxMetaspaceSize=1024M -XX:MetaspaceSize=1024M",
-      "Tail" -> "-XX:MaxMetaspaceSize=2560M -XX:MetaspaceSize=2560M"
-    ),
-    // If either component is hitting memory limit at prod
-    // its memory need to increase: either increase total memory of container (containerRamGigaBytes),
-    // or allocate more memory for one component while keeping total memory unchanged.
-    componentToRamGigaBytesMap = Map(
-      "Tail-FlatMap-Source" -> 3, // Home source
-      "Tail-FlatMap-Source.2" -> 3, // Profile source
-      "Tail-FlatMap-Source.3" -> 3, // Search source
-      "Tail-FlatMap-Source.4" -> 3, // UUA source
-      "Tail-FlatMap" -> 8
-      // Tail will use the leftover memory in the container.
-      // Make sure to tune topologyWorkers and containerRamGigaBytes such that this is greater than 10 GB.
-    ),
-    topologyNamedOptions = Map(
-      "TL_EVENTS_SOURCE" -> Options()
-        .set(SourceParallelism(120)),
-      "PROFILE_EVENTS_SOURCE" -> Options()
-        .set(SourceParallelism(30)),
-      "SEARCH_EVENTS_SOURCE" -> Options()
-        .set(SourceParallelism(10)),
-      "UUA_EVENTS_SOURCE" -> Options()
-        .set(SourceParallelism(10)),
-      "COMBINED_PRODUCER" -> Options()
-        .set(FlatMapParallelism(150))
-    ),
-    // It's important to have the BCELabelTransformFromUUADataRecord before ProdNegativeDownsampleTransform
-    onlinePreTransforms = Seq(
-      StagingUserBasedDownsampleTransform,
-      RichITransform(BCELabelTransformFromUUADataRecord),
-      NegativeDownsampleTransform),
-    enableUserReindexingNighthawkBtreeStore = true,
-    enableUserReindexingNighthawkHashStore = true,
-    userReindexingNighthawkBtreeStoreConfig = NighthawkUnderlyingStoreConfig(
-      serversetPath =
-        "/twitter/service/cache-user/test/nighthawk_timelines_real_time_aggregates_btree_test_api",
-      // NOTE: table names are prefixed to every pkey so keep it short
-      tableName = "u_r_v1", // (u)ser_(r)eindexing_v1
-      // keep ttl <= 1 day because it's keyed on user, and we will have limited hit rates beyond 1 day
-      cacheTTL = 1.day
-    ),
-    userReindexingNighthawkHashStoreConfig = NighthawkUnderlyingStoreConfig(
-      // For prod: "/s/cache-user/nighthawk_timelines_real_time_aggregates_hash_api",
-      serversetPath =
-        "/twitter/service/cache-user/test/nighthawk_timelines_real_time_aggregates_hash_test_api",
-      // NOTE: table names are prefixed to every pkey so keep it short
-      tableName = "u_r_v1", // (u)ser_(r)eindexing_v1
-      // keep ttl <= 1 day because it's keyed on user, and we will have limited hit rates beyond 1 day
-      cacheTTL = 1.day
-    )
-  )
-}
-
-object TimelinesRealTimeAggregatesJob extends RealTimeAggregatesJobBase {
-  override lazy val statsReceiver = DefaultStatsReceiver.scope("timelines_real_time_aggregates")
-  override lazy val jobConfigs = TimelinesRealTimeAggregatesJobConfigs
-  override lazy val aggregatesToCompute = TimelinesOnlineAggregationConfig.AggregatesToCompute
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesStormAggregateSource.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesStormAggregateSource.docx
new file mode 100644
index 000000000..ec9eaa180
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesStormAggregateSource.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesStormAggregateSource.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesStormAggregateSource.scala
deleted file mode 100644
index 2e096dc07..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesStormAggregateSource.scala
+++ /dev/null
@@ -1,185 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates.real_time
-
-import com.twitter.clientapp.thriftscala.LogEvent
-import com.twitter.conversions.DurationOps._
-import com.twitter.finagle.stats.Counter
-import com.twitter.finagle.stats.StatsReceiver
-import com.twitter.ml.api.DataRecord
-import com.twitter.ml.api.constant.SharedFeatures
-import com.twitter.snowflake.id.SnowflakeId
-import com.twitter.summingbird._
-import com.twitter.summingbird.storm.Storm
-import com.twitter.summingbird_internal.sources.AppId
-import com.twitter.summingbird_internal.sources.storm.remote.ClientEventSourceScrooge2
-import com.twitter.timelines.data_processing.ad_hoc.suggests.common.AllScribeProcessor
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron.RealTimeAggregatesJobConfig
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron.StormAggregateSource
-import com.twitter.timelines.prediction.adapters.client_log_event.ClientLogEventAdapter
-import com.twitter.timelines.prediction.adapters.client_log_event.ProfileClientLogEventAdapter
-import com.twitter.timelines.prediction.adapters.client_log_event.SearchClientLogEventAdapter
-import com.twitter.timelines.prediction.adapters.client_log_event.UuaEventAdapter
-import com.twitter.unified_user_actions.client.config.KafkaConfigs
-import com.twitter.unified_user_actions.client.summingbird.UnifiedUserActionsSourceScrooge
-import com.twitter.unified_user_actions.thriftscala.UnifiedUserAction
-import scala.collection.JavaConverters._
-
-/**
- * Storm Producer for client events generated on Home, Profile, and Search
- */
-class TimelinesStormAggregateSource extends StormAggregateSource {
-
-  override val name = "timelines_rta"
-  override val timestampFeature = SharedFeatures.TIMESTAMP
-
-  private lazy val TimelinesClientEventSourceName = "TL_EVENTS_SOURCE"
-  private lazy val ProfileClientEventSourceName = "PROFILE_EVENTS_SOURCE"
-  private lazy val SearchClientEventSourceName = "SEARCH_EVENTS_SOURCE"
-  private lazy val UuaEventSourceName = "UUA_EVENTS_SOURCE"
-  private lazy val CombinedProducerName = "COMBINED_PRODUCER"
-  private lazy val FeatureStoreProducerName = "FEATURE_STORE_PRODUCER"
-
-  private def isNewUserEvent(event: LogEvent): Boolean = {
-    event.logBase.flatMap(_.userId).flatMap(SnowflakeId.timeFromIdOpt).exists(_.untilNow < 30.days)
-  }
-
-  private def mkDataRecords(event: LogEvent, dataRecordCounter: Counter): Seq[DataRecord] = {
-    val dataRecords: Seq[DataRecord] =
-      if (AllScribeProcessor.isValidSuggestTweetEvent(event)) {
-        ClientLogEventAdapter.adaptToDataRecords(event).asScala
-      } else {
-        Seq.empty[DataRecord]
-      }
-    dataRecordCounter.incr(dataRecords.size)
-    dataRecords
-  }
-
-  private def mkProfileDataRecords(
-    event: LogEvent,
-    dataRecordCounter: Counter
-  ): Seq[DataRecord] = {
-    val dataRecords: Seq[DataRecord] =
-      ProfileClientLogEventAdapter.adaptToDataRecords(event).asScala
-    dataRecordCounter.incr(dataRecords.size)
-    dataRecords
-  }
-
-  private def mkSearchDataRecords(
-    event: LogEvent,
-    dataRecordCounter: Counter
-  ): Seq[DataRecord] = {
-    val dataRecords: Seq[DataRecord] =
-      SearchClientLogEventAdapter.adaptToDataRecords(event).asScala
-    dataRecordCounter.incr(dataRecords.size)
-    dataRecords
-  }
-
-  private def mkUuaDataRecords(
-    event: UnifiedUserAction,
-    dataRecordCounter: Counter
-  ): Seq[DataRecord] = {
-    val dataRecords: Seq[DataRecord] =
-      UuaEventAdapter.adaptToDataRecords(event).asScala
-    dataRecordCounter.incr(dataRecords.size)
-    dataRecords
-  }
-
-  override def build(
-    statsReceiver: StatsReceiver,
-    jobConfig: RealTimeAggregatesJobConfig
-  ): Producer[Storm, DataRecord] = {
-    lazy val scopedStatsReceiver = statsReceiver.scope(getClass.getSimpleName)
-    lazy val dataRecordCounter = scopedStatsReceiver.counter("dataRecord")
-
-    // Home Timeline Engagements
-    // Step 1: => LogEvent
-    lazy val clientEventProducer: Producer[Storm, HomeEvent[LogEvent]] =
-      ClientEventSourceScrooge2(
-        appId = AppId(jobConfig.appId),
-        topic = "julep_client_event_suggests",
-        resumeAtLastReadOffset = false,
-        enableTls = true
-      ).source.map(HomeEvent[LogEvent]).name(TimelinesClientEventSourceName)
-
-    // Profile Engagements
-    // Step 1: => LogEvent
-    lazy val profileClientEventProducer: Producer[Storm, ProfileEvent[LogEvent]] =
-      ClientEventSourceScrooge2(
-        appId = AppId(jobConfig.appId),
-        topic = "julep_client_event_profile_real_time_engagement_metrics",
-        resumeAtLastReadOffset = false,
-        enableTls = true
-      ).source
-        .map(ProfileEvent[LogEvent])
-        .name(ProfileClientEventSourceName)
-
-    // Search Engagements
-    // Step 1: => LogEvent
-    // Only process events for all users to save resource
-    lazy val searchClientEventProducer: Producer[Storm, SearchEvent[LogEvent]] =
-      ClientEventSourceScrooge2(
-        appId = AppId(jobConfig.appId),
-        topic = "julep_client_event_search_real_time_engagement_metrics",
-        resumeAtLastReadOffset = false,
-        enableTls = true
-      ).source
-        .map(SearchEvent[LogEvent])
-        .name(SearchClientEventSourceName)
-
-    // Unified User Actions (includes Home and other product surfaces)
-    lazy val uuaEventProducer: Producer[Storm, UuaEvent[UnifiedUserAction]] =
-      UnifiedUserActionsSourceScrooge(
-        appId = AppId(jobConfig.appId),
-        parallelism = 10,
-        kafkaConfig = KafkaConfigs.ProdUnifiedUserActionsEngagementOnly
-      ).source
-        .filter(StormAggregateSourceUtils.isUuaBCEEventsFromHome(_))
-        .map(UuaEvent[UnifiedUserAction])
-        .name(UuaEventSourceName)
-
-    // Combined
-    // Step 2:
-    // (a) Combine
-    // (b) Transform LogEvent => Seq[DataRecord]
-    // (c) Apply sampler
-    lazy val combinedClientEventDataRecordProducer: Producer[Storm, Event[DataRecord]] =
-      profileClientEventProducer // This becomes the bottom branch
-        .merge(clientEventProducer) // This becomes the middle branch
-        .merge(searchClientEventProducer)
-        .merge(uuaEventProducer) // This becomes the top
-        .flatMap { // LogEvent => Seq[DataRecord]
-          case e: HomeEvent[LogEvent] =>
-            mkDataRecords(e.event, dataRecordCounter).map(HomeEvent[DataRecord])
-          case e: ProfileEvent[LogEvent] =>
-            mkProfileDataRecords(e.event, dataRecordCounter).map(ProfileEvent[DataRecord])
-          case e: SearchEvent[LogEvent] =>
-            mkSearchDataRecords(e.event, dataRecordCounter).map(SearchEvent[DataRecord])
-          case e: UuaEvent[UnifiedUserAction] =>
-            mkUuaDataRecords(
-              e.event,
-              dataRecordCounter
-            ).map(UuaEvent[DataRecord])
-        }
-        .flatMap { // Apply sampler
-          case e: HomeEvent[DataRecord] =>
-            jobConfig.sequentiallyTransform(e.event).map(HomeEvent[DataRecord])
-          case e: ProfileEvent[DataRecord] =>
-            jobConfig.sequentiallyTransform(e.event).map(ProfileEvent[DataRecord])
-          case e: SearchEvent[DataRecord] =>
-            jobConfig.sequentiallyTransform(e.event).map(SearchEvent[DataRecord])
-          case e: UuaEvent[DataRecord] =>
-            jobConfig.sequentiallyTransform(e.event).map(UuaEvent[DataRecord])
-        }
-        .name(CombinedProducerName)
-
-    // Step 3: Join with Feature Store features
-    lazy val featureStoreDataRecordProducer: Producer[Storm, DataRecord] =
-      StormAggregateSourceUtils
-        .wrapByFeatureStoreClient(
-          underlyingProducer = combinedClientEventDataRecordProducer,
-          jobConfig = jobConfig,
-          scopedStatsReceiver = scopedStatsReceiver
-        ).map(_.event).name(FeatureStoreProducerName)
-
-    featureStoreDataRecordProducer
-  }
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TweetFeaturesAdapter.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TweetFeaturesAdapter.docx
new file mode 100644
index 000000000..576bcd985
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TweetFeaturesAdapter.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TweetFeaturesAdapter.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TweetFeaturesAdapter.scala
deleted file mode 100644
index 0d5c06d7c..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TweetFeaturesAdapter.scala
+++ /dev/null
@@ -1,35 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates.real_time
-
-import com.twitter.ml.api.DataRecord
-import com.twitter.ml.api.Feature
-import com.twitter.ml.api.FeatureContext
-import com.twitter.ml.featurestore.catalog.entities.core.Tweet
-import com.twitter.ml.featurestore.catalog.features.trends.TweetTrendsScores
-import com.twitter.ml.featurestore.lib.TweetId
-import com.twitter.ml.featurestore.lib.data.PredictionRecord
-import com.twitter.ml.featurestore.lib.data.PredictionRecordAdapter
-import com.twitter.ml.featurestore.lib.feature.BoundFeature
-import com.twitter.ml.featurestore.lib.feature.BoundFeatureSet
-import com.twitter.timelines.prediction.common.adapters.TimelinesAdapterBase
-import java.util
-import scala.collection.JavaConverters._
-
-object TweetFeaturesAdapter extends TimelinesAdapterBase[PredictionRecord] {
-
-  private val ContinuousFeatureMap: Map[BoundFeature[TweetId, Double], Feature.Continuous] = Map()
-
-  val TweetFeaturesSet: BoundFeatureSet = new BoundFeatureSet(ContinuousFeatureMap.keys.toSet)
-
-  val AllFeatures: Seq[Feature[_]] =
-    ContinuousFeatureMap.values.toSeq
-
-  private val adapter = PredictionRecordAdapter.oneToOne(TweetFeaturesSet)
-
-  override def getFeatureContext: FeatureContext = new FeatureContext(AllFeatures: _*)
-
-  override def commonFeatures: Set[Feature[_]] = Set.empty
-
-  override def adaptToDataRecords(record: PredictionRecord): util.List[DataRecord] = {
-    List(adapter.adaptToDataRecord(record)).asJava
-  }
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TweetFeaturesReadableStore.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TweetFeaturesReadableStore.docx
new file mode 100644
index 000000000..065940779
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TweetFeaturesReadableStore.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TweetFeaturesReadableStore.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TweetFeaturesReadableStore.scala
deleted file mode 100644
index b461e179a..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TweetFeaturesReadableStore.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates.real_time
-
-import com.twitter.ml.api.DataRecord
-import com.twitter.ml.featurestore.lib.TweetId
-import com.twitter.ml.featurestore.lib.data.PredictionRecord
-import com.twitter.ml.featurestore.lib.entity.Entity
-import com.twitter.ml.featurestore.lib.online.{FeatureStoreClient, FeatureStoreRequest}
-import com.twitter.storehaus.ReadableStore
-import com.twitter.timelines.prediction.common.adapters.TimelinesAdapterBase
-import com.twitter.util.Future
-import scala.collection.JavaConverters._
-
-class TweetFeaturesReadableStore(
-  featureStoreClient: FeatureStoreClient,
-  tweetEntity: Entity[TweetId],
-  tweetFeaturesAdapter: TimelinesAdapterBase[PredictionRecord])
-    extends ReadableStore[Set[Long], DataRecord] {
-
-  override def multiGet[K <: Set[Long]](keys: Set[K]): Map[K, Future[Option[DataRecord]]] = {
-    val orderedKeys: Seq[K] = keys.toSeq
-    val featureStoreRequests: Seq[FeatureStoreRequest] = getFeatureStoreRequests(orderedKeys)
-    val predictionRecordsFut: Future[Seq[PredictionRecord]] = featureStoreClient(
-      featureStoreRequests)
-
-    getDataRecordMap(orderedKeys, predictionRecordsFut)
-  }
-
-  private def getFeatureStoreRequests[K <: Set[Long]](
-    orderedKeys: Seq[K]
-  ): Seq[FeatureStoreRequest] = {
-    orderedKeys.map { key: Set[Long] =>
-      FeatureStoreRequest(
-        entityIds = key.map { tweetId => tweetEntity.withId(TweetId(tweetId)) }.toSeq
-      )
-    }
-  }
-
-  private def getDataRecordMap[K <: Set[Long]](
-    orderedKeys: Seq[K],
-    predictionRecordsFut: Future[Seq[PredictionRecord]]
-  ): Map[K, Future[Option[DataRecord]]] = {
-    orderedKeys.zipWithIndex.map {
-      case (tweetIdSet, index) =>
-        val dataRecordFutOpt: Future[Option[DataRecord]] = predictionRecordsFut.map {
-          predictionRecords =>
-            predictionRecords.lift(index).flatMap { predictionRecordAtIndex: PredictionRecord =>
-              tweetFeaturesAdapter.adaptToDataRecords(predictionRecordAtIndex).asScala.headOption
-            }
-        }
-        (tweetIdSet, dataRecordFutOpt)
-    }.toMap
-  }
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TypeSafeRunner.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TypeSafeRunner.docx
new file mode 100644
index 000000000..8357fa66a
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TypeSafeRunner.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TypeSafeRunner.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TypeSafeRunner.scala
deleted file mode 100644
index 92b6618e4..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TypeSafeRunner.scala
+++ /dev/null
@@ -1,7 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates.real_time
-
-import com.twitter.summingbird_internal.runner.storm.GenericRunner
-
-object TypeSafeRunner {
-  def main(args: Array[String]): Unit = GenericRunner(args, TimelinesRealTimeAggregatesJob(_))
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/UserFeaturesAdapter.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/UserFeaturesAdapter.docx
new file mode 100644
index 000000000..29ec43b5a
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/UserFeaturesAdapter.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/UserFeaturesAdapter.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/UserFeaturesAdapter.scala
deleted file mode 100644
index 8ff39938c..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/UserFeaturesAdapter.scala
+++ /dev/null
@@ -1,108 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates.real_time
-
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType.InferredGender
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType.UserState
-import com.twitter.ml.api.Feature.Binary
-import com.twitter.ml.api.Feature.Text
-import com.twitter.ml.api.DataRecord
-import com.twitter.ml.api.Feature
-import com.twitter.ml.api.FeatureContext
-import com.twitter.ml.api.RichDataRecord
-import com.twitter.ml.featurestore.catalog.entities.core.User
-import com.twitter.ml.featurestore.catalog.features.core.UserAccount
-import com.twitter.ml.featurestore.catalog.features.geo.UserLocation
-import com.twitter.ml.featurestore.catalog.features.magicrecs.UserActivity
-import com.twitter.ml.featurestore.lib.EntityId
-import com.twitter.ml.featurestore.lib.data.PredictionRecord
-import com.twitter.ml.featurestore.lib.feature.BoundFeature
-import com.twitter.ml.featurestore.lib.feature.BoundFeatureSet
-import com.twitter.ml.featurestore.lib.UserId
-import com.twitter.ml.featurestore.lib.{Discrete => FSDiscrete}
-import com.twitter.timelines.prediction.common.adapters.TimelinesAdapterBase
-import com.twitter.timelines.prediction.features.user_health.UserHealthFeatures
-import java.lang.{Boolean => JBoolean}
-import java.lang.{String => JString}
-import java.util
-import scala.collection.JavaConverters._
-
-object UserFeaturesAdapter extends TimelinesAdapterBase[PredictionRecord] {
-  val UserStateBoundFeature: BoundFeature[UserId, FSDiscrete] = UserActivity.UserState.bind(User)
-
-  /**
-   * Boolean features about viewer's user state. 
-   * enum UserState {
-   *   NEW = 0,
-   *   NEAR_ZERO = 1,
-   *   VERY_LIGHT = 2,
-   *   LIGHT = 3,
-   *   MEDIUM_TWEETER = 4,
-   *   MEDIUM_NON_TWEETER = 5,
-   *   HEAVY_NON_TWEETER = 6,
-   *   HEAVY_TWEETER = 7
-   * }(persisted='true')
-   */
-  val IS_USER_NEW = new Binary("timelines.user_state.is_user_new", Set(UserState).asJava)
-  val IS_USER_LIGHT = new Binary("timelines.user_state.is_user_light", Set(UserState).asJava)
-  val IS_USER_MEDIUM_TWEETER =
-    new Binary("timelines.user_state.is_user_medium_tweeter", Set(UserState).asJava)
-  val IS_USER_MEDIUM_NON_TWEETER =
-    new Binary("timelines.user_state.is_user_medium_non_tweeter", Set(UserState).asJava)
-  val IS_USER_HEAVY_NON_TWEETER =
-    new Binary("timelines.user_state.is_user_heavy_non_tweeter", Set(UserState).asJava)
-  val IS_USER_HEAVY_TWEETER =
-    new Binary("timelines.user_state.is_user_heavy_tweeter", Set(UserState).asJava)
-  val userStateToFeatureMap: Map[Long, Binary] = Map(
-    0L -> IS_USER_NEW,
-    1L -> IS_USER_LIGHT,
-    2L -> IS_USER_LIGHT,
-    3L -> IS_USER_LIGHT,
-    4L -> IS_USER_MEDIUM_TWEETER,
-    5L -> IS_USER_MEDIUM_NON_TWEETER,
-    6L -> IS_USER_HEAVY_NON_TWEETER,
-    7L -> IS_USER_HEAVY_TWEETER
-  )
-
-  val UserStateBooleanFeatures: Set[Feature[_]] = userStateToFeatureMap.values.toSet
-
-
-  val USER_COUNTRY_ID = new Text("geo.user_location.country_code")
-  val UserCountryCodeFeature: BoundFeature[UserId, String] =
-    UserLocation.CountryCodeAlpha2.bind(User)
-  val UserLocationFeatures: Set[Feature[_]] = Set(USER_COUNTRY_ID)
-
-  private val UserVerifiedFeaturesSet = Set(
-    UserAccount.IsUserVerified.bind(User),
-    UserAccount.IsUserBlueVerified.bind(User),
-    UserAccount.IsUserGoldVerified.bind(User),
-    UserAccount.IsUserGrayVerified.bind(User)
-  )
-
-  val UserFeaturesSet: BoundFeatureSet =
-    BoundFeatureSet(UserStateBoundFeature, UserCountryCodeFeature) ++
-      BoundFeatureSet(UserVerifiedFeaturesSet.asInstanceOf[Set[BoundFeature[_ <: EntityId, _]]])
-
-  private val allFeatures: Seq[Feature[_]] =
-    UserStateBooleanFeatures.toSeq ++ GenderBooleanFeatures.toSeq ++
-      UserLocationFeatures.toSeq ++ Seq(UserHealthFeatures.IsUserVerifiedUnion)
-
-  override def getFeatureContext: FeatureContext = new FeatureContext(allFeatures: _*)
-  override def commonFeatures: Set[Feature[_]] = Set.empty
-
-  override def adaptToDataRecords(record: PredictionRecord): util.List[DataRecord] = {
-    val newRecord = new RichDataRecord(new DataRecord)
-    record
-      .getFeatureValue(UserStateBoundFeature)
-      .flatMap { userState => userStateToFeatureMap.get(userState.value) }.foreach {
-        booleanFeature => newRecord.setFeatureValue[JBoolean](booleanFeature, true)
-      }
-    record.getFeatureValue(UserCountryCodeFeature).foreach { countryCodeFeatureValue =>
-      newRecord.setFeatureValue[JString](USER_COUNTRY_ID, countryCodeFeatureValue)
-    }
-
-    val isUserVerifiedUnion =
-      UserVerifiedFeaturesSet.exists(feature => record.getFeatureValue(feature).getOrElse(false))
-    newRecord.setFeatureValue[JBoolean](UserHealthFeatures.IsUserVerifiedUnion, isUserVerifiedUnion)
-
-    List(newRecord.getRecord).asJava
-  }
-}
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/UserFeaturesReadableStore.docx b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/UserFeaturesReadableStore.docx
new file mode 100644
index 000000000..3769b54e5
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/UserFeaturesReadableStore.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/UserFeaturesReadableStore.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/UserFeaturesReadableStore.scala
deleted file mode 100644
index c1931c32b..000000000
--- a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/UserFeaturesReadableStore.scala
+++ /dev/null
@@ -1,37 +0,0 @@
-package com.twitter.timelines.prediction.common.aggregates.real_time
-
-import com.twitter.ml.api.DataRecord
-import com.twitter.ml.featurestore.lib.UserId
-import com.twitter.ml.featurestore.lib.data.PredictionRecord
-import com.twitter.ml.featurestore.lib.entity.Entity
-import com.twitter.ml.featurestore.lib.online.{FeatureStoreClient, FeatureStoreRequest}
-import com.twitter.storehaus.ReadableStore
-import com.twitter.timelines.prediction.common.adapters.TimelinesAdapterBase
-import com.twitter.util.Future
-import scala.collection.JavaConverters._
-
-class UserFeaturesReadableStore(
-  featureStoreClient: FeatureStoreClient,
-  userEntity: Entity[UserId],
-  userFeaturesAdapter: TimelinesAdapterBase[PredictionRecord])
-    extends ReadableStore[Set[Long], DataRecord] {
-
-  override def multiGet[K <: Set[Long]](keys: Set[K]): Map[K, Future[Option[DataRecord]]] = {
-    val orderedKeys = keys.toSeq
-    val featureStoreRequests: Seq[FeatureStoreRequest] = orderedKeys.map { key: Set[Long] =>
-      FeatureStoreRequest(
-        entityIds = key.map(userId => userEntity.withId(UserId(userId))).toSeq
-      )
-    }
-    val predictionRecordsFut: Future[Seq[PredictionRecord]] = featureStoreClient(
-      featureStoreRequests)
-
-    orderedKeys.zipWithIndex.map {
-      case (userId, index) =>
-        val dataRecordFutOpt = predictionRecordsFut.map { predictionRecords =>
-          userFeaturesAdapter.adaptToDataRecords(predictionRecords(index)).asScala.headOption
-        }
-        (userId, dataRecordFutOpt)
-    }.toMap
-  }
-}
diff --git a/src/scala/com/twitter/timelines/prediction/features/README.docx b/src/scala/com/twitter/timelines/prediction/features/README.docx
new file mode 100644
index 000000000..7db5ccc23
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/README.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/README.md b/src/scala/com/twitter/timelines/prediction/features/README.md
deleted file mode 100644
index d42639a77..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/README.md
+++ /dev/null
@@ -1,6 +0,0 @@
-## Prediction Features
-
-This directory contains a collection of `Features` (`com.twitter.ml.api.Feature`) which are definitions of feature names and datatypes which allow the features to be efficiently processed and passed to the different ranking models. 
-By predefining the features with their names and datatypes, when features are being generated, scribed or used to score they can be identified with only a hash of their name. 
-
-Not all of these features are used in the model, many are experimental or deprecated. 
\ No newline at end of file
diff --git a/src/scala/com/twitter/timelines/prediction/features/client_log_event/BUILD b/src/scala/com/twitter/timelines/prediction/features/client_log_event/BUILD
deleted file mode 100644
index 3d3c34092..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/client_log_event/BUILD
+++ /dev/null
@@ -1,11 +0,0 @@
-scala_library(
-    sources = ["*.scala"],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "src/java/com/twitter/ml/api:api-base",
-        "src/scala/com/twitter/suggests/controller_data",
-        "src/thrift/com/twitter/dal/personal_data:personal_data-java",
-        "src/thrift/com/twitter/timelineservice/server/suggests/logging:thrift-scala",
-    ],
-)
diff --git a/src/scala/com/twitter/timelines/prediction/features/client_log_event/BUILD.docx b/src/scala/com/twitter/timelines/prediction/features/client_log_event/BUILD.docx
new file mode 100644
index 000000000..b0654e23a
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/client_log_event/BUILD.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/client_log_event/ClientLogEventDataRecordFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/client_log_event/ClientLogEventDataRecordFeatures.docx
new file mode 100644
index 000000000..8202740c3
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/client_log_event/ClientLogEventDataRecordFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/client_log_event/ClientLogEventDataRecordFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/client_log_event/ClientLogEventDataRecordFeatures.scala
deleted file mode 100644
index cccb99998..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/client_log_event/ClientLogEventDataRecordFeatures.scala
+++ /dev/null
@@ -1,169 +0,0 @@
-package com.twitter.timelines.prediction.features.client_log_event
-
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
-import com.twitter.ml.api.Feature
-import com.twitter.ml.api.Feature.Binary
-import com.twitter.ml.api.Feature.Continuous
-import com.twitter.ml.api.Feature.Discrete
-import scala.collection.JavaConverters._
-import com.twitter.timelineservice.suggests.logging.candidate_tweet_source_id.thriftscala.CandidateTweetSourceId
-
-object ClientLogEventDataRecordFeatures {
-  val HasConsumerVideo = new Binary(
-    "client_log_event.tweet.has_consumer_video",
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val PhotoCount = new Continuous(
-    "client_log_event.tweet.photo_count",
-    Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava)
-  val HasImage = new Binary(
-    "client_log_event.tweet.has_image",
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val IsReply =
-    new Binary("client_log_event.tweet.is_reply", Set(PublicReplies, PrivateReplies).asJava)
-  val IsRetweet =
-    new Binary("client_log_event.tweet.is_retweet", Set(PublicRetweets, PrivateRetweets).asJava)
-  val IsPromoted =
-    new Binary(
-      "client_log_event.tweet.is_promoted",
-      Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HasVisibleLink = new Binary(
-    "client_log_event.tweet.has_visible_link",
-    Set(UrlFoundFlag, PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HasHashtag = new Binary(
-    "client_log_event.tweet.has_hashtag",
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val FromMutualFollow = new Binary("client_log_event.tweet.from_mutual_follow")
-  val IsInNetwork = new Binary("client_log_event.tweet.is_in_network")
-  val IsNotInNetwork = new Binary("client_log_event.tweet.is_not_in_network")
-  val FromRecap = new Binary("client_log_event.tweet.from_recap")
-  val FromRecycled = new Binary("client_log_event.tweet.from_recycled")
-  val FromActivity = new Binary("client_log_event.tweet.from_activity")
-  val FromSimcluster = new Binary("client_log_event.tweet.from_simcluster")
-  val FromErg = new Binary("client_log_event.tweet.from_erg")
-  val FromCroon = new Binary("client_log_event.tweet.from_croon")
-  val FromList = new Binary("client_log_event.tweet.from_list")
-  val FromRecTopic = new Binary("client_log_event.tweet.from_rec_topic")
-  val InjectedPosition = new Discrete("client_log_event.tweet.injectedPosition")
-  val TextOnly = new Binary("client_log_event.tweet.text_only")
-  val HasLikedBySocialContext = new Binary("client_log_event.tweet.has_liked_by_social_context")
-  val HasFollowedBySocialContext = new Binary(
-    "client_log_event.tweet.has_followed_by_social_context")
-  val HasTopicSocialContext = new Binary("client_log_event.tweet.has_topic_social_context")
-  val IsFollowedTopicTweet = new Binary("client_log_event.tweet.is_followed_topic_tweet")
-  val IsRecommendedTopicTweet = new Binary("client_log_event.tweet.is_recommended_topic_tweet")
-  val IsTweetAgeLessThan15Seconds = new Binary(
-    "client_log_event.tweet.tweet_age_less_than_15_seconds")
-  val IsTweetAgeLessThanOrEqualTo30Minutes = new Binary(
-    "client_log_event.tweet.tweet_age_lte_30_minutes")
-  val IsTweetAgeLessThanOrEqualTo1Hour = new Binary("client_log_event.tweet.tweet_age_lte_1_hour")
-  val IsTweetAgeLessThanOrEqualTo6Hours = new Binary("client_log_event.tweet.tweet_age_lte_6_hours")
-  val IsTweetAgeLessThanOrEqualTo12Hours = new Binary(
-    "client_log_event.tweet.tweet_age_lte_12_hours")
-  val IsTweetAgeGreaterThanOrEqualTo24Hours = new Binary(
-    "client_log_event.tweet.tweet_age_gte_24_hours")
-  val HasGreaterThanOrEqualTo100Favs = new Binary("client_log_event.tweet.has_gte_100_favs")
-  val HasGreaterThanOrEqualTo1KFavs = new Binary("client_log_event.tweet.has_gte_1k_favs")
-  val HasGreaterThanOrEqualTo10KFavs = new Binary("client_log_event.tweet.has_gte_10k_favs")
-  val HasGreaterThanOrEqualTo100KFavs = new Binary("client_log_event.tweet.has_gte_100k_favs")
-  val HasGreaterThanOrEqualTo10Retweets = new Binary("client_log_event.tweet.has_gte_10_retweets")
-  val HasGreaterThanOrEqualTo100Retweets = new Binary("client_log_event.tweet.has_gte_100_retweets")
-  val HasGreaterThanOrEqualTo1KRetweets = new Binary("client_log_event.tweet.has_gte_1k_retweets")
-
-  val TweetTypeToFeatureMap: Map[String, Binary] = Map(
-    "link" -> HasVisibleLink,
-    "hashtag" -> HasHashtag,
-    "mutual_follow" -> FromMutualFollow,
-    "in_network" -> IsInNetwork,
-    "text_only" -> TextOnly,
-    "has_liked_by_social_context" -> HasLikedBySocialContext,
-    "has_followed_by_social_context" -> HasFollowedBySocialContext,
-    "has_topic_social_context" -> HasTopicSocialContext,
-    "is_followed_topic_tweet" -> IsFollowedTopicTweet,
-    "is_recommended_topic_tweet" -> IsRecommendedTopicTweet,
-    "tweet_age_less_than_15_seconds" -> IsTweetAgeLessThan15Seconds,
-    "tweet_age_lte_30_minutes" -> IsTweetAgeLessThanOrEqualTo30Minutes,
-    "tweet_age_lte_1_hour" -> IsTweetAgeLessThanOrEqualTo1Hour,
-    "tweet_age_lte_6_hours" -> IsTweetAgeLessThanOrEqualTo6Hours,
-    "tweet_age_lte_12_hours" -> IsTweetAgeLessThanOrEqualTo12Hours,
-    "tweet_age_gte_24_hours" -> IsTweetAgeGreaterThanOrEqualTo24Hours,
-    "has_gte_100_favs" -> HasGreaterThanOrEqualTo100Favs,
-    "has_gte_1k_favs" -> HasGreaterThanOrEqualTo1KFavs,
-    "has_gte_10k_favs" -> HasGreaterThanOrEqualTo10KFavs,
-    "has_gte_100k_favs" -> HasGreaterThanOrEqualTo100KFavs,
-    "has_gte_10_retweets" -> HasGreaterThanOrEqualTo10Retweets,
-    "has_gte_100_retweets" -> HasGreaterThanOrEqualTo100Retweets,
-    "has_gte_1k_retweets" -> HasGreaterThanOrEqualTo1KRetweets
-  )
-
-  val CandidateTweetSourceIdFeatureMap: Map[Int, Binary] = Map(
-    CandidateTweetSourceId.RecapTweet.value -> FromRecap,
-    CandidateTweetSourceId.RecycledTweet.value -> FromRecycled,
-    CandidateTweetSourceId.RecommendedTweet.value -> FromActivity,
-    CandidateTweetSourceId.Simcluster.value -> FromSimcluster,
-    CandidateTweetSourceId.ErgTweet.value -> FromErg,
-    CandidateTweetSourceId.CroonTopicTweet.value -> FromCroon,
-    CandidateTweetSourceId.CroonTweet.value -> FromCroon,
-    CandidateTweetSourceId.ListTweet.value -> FromList,
-    CandidateTweetSourceId.RecommendedTopicTweet.value -> FromRecTopic
-  )
-
-  val TweetFeaturesV2: Set[Feature[_]] = Set(
-    HasImage,
-    IsReply,
-    IsRetweet,
-    HasVisibleLink,
-    HasHashtag,
-    FromMutualFollow,
-    IsInNetwork
-  )
-
-  val ContentTweetTypeFeatures: Set[Feature[_]] = Set(
-    HasImage,
-    HasVisibleLink,
-    HasHashtag,
-    TextOnly,
-    HasVisibleLink
-  )
-
-  val FreshnessTweetTypeFeatures: Set[Feature[_]] = Set(
-    IsTweetAgeLessThan15Seconds,
-    IsTweetAgeLessThanOrEqualTo30Minutes,
-    IsTweetAgeLessThanOrEqualTo1Hour,
-    IsTweetAgeLessThanOrEqualTo6Hours,
-    IsTweetAgeLessThanOrEqualTo12Hours,
-    IsTweetAgeGreaterThanOrEqualTo24Hours
-  )
-
-  val SocialProofTweetTypeFeatures: Set[Feature[_]] = Set(
-    HasLikedBySocialContext,
-    HasFollowedBySocialContext,
-    HasTopicSocialContext
-  )
-
-  val TopicTweetPreferenceTweetTypeFeatures: Set[Feature[_]] = Set(
-    IsFollowedTopicTweet,
-    IsRecommendedTopicTweet
-  )
-
-  val TweetPopularityTweetTypeFeatures: Set[Feature[_]] = Set(
-    HasGreaterThanOrEqualTo100Favs,
-    HasGreaterThanOrEqualTo1KFavs,
-    HasGreaterThanOrEqualTo10KFavs,
-    HasGreaterThanOrEqualTo100KFavs,
-    HasGreaterThanOrEqualTo10Retweets,
-    HasGreaterThanOrEqualTo100Retweets,
-    HasGreaterThanOrEqualTo1KRetweets
-  )
-
-  val UserGraphInteractionTweetTypeFeatures: Set[Feature[_]] = Set(
-    IsInNetwork,
-    FromMutualFollow,
-    IsNotInNetwork,
-    IsPromoted
-  )
-
-  val UserContentPreferenceTweetTypeFeatures: Set[Feature[_]] =
-    ContentTweetTypeFeatures ++ FreshnessTweetTypeFeatures ++ SocialProofTweetTypeFeatures ++ TopicTweetPreferenceTweetTypeFeatures ++ TweetPopularityTweetTypeFeatures ++ UserGraphInteractionTweetTypeFeatures
-  val AuthorContentPreferenceTweetTypeFeatures: Set[Feature[_]] =
-    Set(IsInNetwork, FromMutualFollow, IsNotInNetwork) ++ ContentTweetTypeFeatures
-}
diff --git a/src/scala/com/twitter/timelines/prediction/features/common/BUILD b/src/scala/com/twitter/timelines/prediction/features/common/BUILD
deleted file mode 100644
index bfbe764c7..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/common/BUILD
+++ /dev/null
@@ -1,11 +0,0 @@
-scala_library(
-    sources = ["*.scala"],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "src/java/com/twitter/ml/api:api-base",
-        "src/thrift/com/twitter/dal/personal_data:personal_data-java",
-        "src/thrift/com/twitter/ml/api:data-java",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-    ],
-)
diff --git a/src/scala/com/twitter/timelines/prediction/features/common/BUILD.docx b/src/scala/com/twitter/timelines/prediction/features/common/BUILD.docx
new file mode 100644
index 000000000..833ed8363
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/common/BUILD.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/common/CombinedFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/common/CombinedFeatures.docx
new file mode 100644
index 000000000..284804225
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/common/CombinedFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/common/CombinedFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/common/CombinedFeatures.scala
deleted file mode 100644
index d995fe2b0..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/common/CombinedFeatures.scala
+++ /dev/null
@@ -1,536 +0,0 @@
-package com.twitter.timelines.prediction.features.common
-
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
-import com.twitter.ml.api.Feature
-import com.twitter.ml.api.FeatureType
-import com.twitter.ml.api.Feature.Binary
-import java.lang.{Boolean => JBoolean}
-import scala.collection.JavaConverters._
-
-object CombinedFeatures {
-  val IS_CLICKED =
-    new Binary("timelines.engagement.is_clicked", Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_DWELLED =
-    new Binary("timelines.engagement.is_dwelled", Set(TweetsViewed, EngagementsPrivate).asJava)
-  val IS_DWELLED_IN_BOUNDS_V1 = new Binary(
-    "timelines.engagement.is_dwelled_in_bounds_v1",
-    Set(TweetsViewed, EngagementsPrivate).asJava)
-  val IS_FAVORITED = new Binary(
-    "timelines.engagement.is_favorited",
-    Set(PublicLikes, PrivateLikes, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_FOLLOWED = new Binary(
-    "timelines.engagement.is_followed",
-    Set(EngagementsPrivate, EngagementsPublic, Follow).asJava)
-  val IS_IMPRESSED =
-    new Binary("timelines.engagement.is_impressed", Set(TweetsViewed, EngagementsPrivate).asJava)
-  val IS_OPEN_LINKED = new Binary(
-    "timelines.engagement.is_open_linked",
-    Set(EngagementsPrivate, LinksClickedOn).asJava)
-  val IS_PHOTO_EXPANDED = new Binary(
-    "timelines.engagement.is_photo_expanded",
-    Set(MediaEngagementActivities, EngagementsPrivate).asJava)
-  val IS_PROFILE_CLICKED = new Binary(
-    "timelines.engagement.is_profile_clicked",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_QUOTED = new Binary(
-    "timelines.engagement.is_quoted",
-    Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_REPLIED = new Binary(
-    "timelines.engagement.is_replied",
-    Set(PublicReplies, PrivateReplies, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_RETWEETED = new Binary(
-    "timelines.engagement.is_retweeted",
-    Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_RETWEETED_WITHOUT_QUOTE = new Binary(
-    "timelines.enagagement.is_retweeted_without_quote",
-    Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_SHARE_DM_CLICKED =
-    new Binary("timelines.engagement.is_tweet_share_dm_clicked", Set(EngagementsPrivate).asJava)
-  val IS_SHARE_DM_SENT =
-    new Binary("timelines.engagement.is_tweet_share_dm_sent", Set(EngagementsPrivate).asJava)
-  val IS_VIDEO_PLAYBACK_25 = new Binary(
-    "timelines.engagement.is_video_playback_25",
-    Set(MediaEngagementActivities, EngagementsPrivate).asJava)
-  val IS_VIDEO_PLAYBACK_50 = new Binary(
-    "timelines.engagement.is_video_playback_50",
-    Set(MediaEngagementActivities, EngagementsPrivate).asJava)
-  val IS_VIDEO_PLAYBACK_75 = new Binary(
-    "timelines.engagement.is_video_playback_75",
-    Set(MediaEngagementActivities, EngagementsPrivate).asJava)
-  val IS_VIDEO_PLAYBACK_95 = new Binary(
-    "timelines.engagement.is_video_playback_95",
-    Set(MediaEngagementActivities, EngagementsPrivate).asJava)
-  val IS_VIDEO_PLAYBACK_COMPLETE = new Binary(
-    "timelines.engagement.is_video_playback_complete",
-    Set(MediaEngagementActivities, EngagementsPrivate).asJava)
-  val IS_VIDEO_PLAYBACK_START = new Binary(
-    "timelines.engagement.is_video_playback_start",
-    Set(MediaEngagementActivities, EngagementsPrivate).asJava)
-  val IS_VIDEO_VIEWED = new Binary(
-    "timelines.engagement.is_video_viewed",
-    Set(MediaEngagementActivities, EngagementsPrivate).asJava)
-  val IS_VIDEO_QUALITY_VIEWED = new Binary(
-    "timelines.engagement.is_video_quality_viewed",
-    Set(MediaEngagementActivities, EngagementsPrivate).asJava
-  ) 
-  // v1: post click engagements: fav, reply
-  val IS_GOOD_CLICKED_CONVO_DESC_V1 = new Binary(
-    "timelines.engagement.is_good_clicked_convo_desc_favorited_or_replied",
-    Set(
-      TweetsClicked,
-      PublicLikes,
-      PrivateLikes,
-      PublicReplies,
-      PrivateReplies,
-      EngagementsPrivate,
-      EngagementsPublic).asJava)
-  // v2: post click engagements: click
-  val IS_GOOD_CLICKED_CONVO_DESC_V2 = new Binary(
-    "timelines.engagement.is_good_clicked_convo_desc_v2",
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_GOOD_CLICKED_WITH_DWELL_SUM_GTE_60S = new Binary(
-    "timelines.engagement.is_good_clicked_convo_desc_favorited_or_replied_or_dwell_sum_gte_60_secs",
-    Set(
-      TweetsClicked,
-      PublicLikes,
-      PrivateLikes,
-      PublicReplies,
-      PrivateReplies,
-      EngagementsPrivate,
-      EngagementsPublic).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_FAVORITED = new Binary(
-    "timelines.engagement.is_good_clicked_convo_desc_favorited",
-    Set(PublicLikes, PrivateLikes, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_REPLIED = new Binary(
-    "timelines.engagement.is_good_clicked_convo_desc_replied",
-    Set(PublicReplies, PrivateReplies, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_RETWEETED = new Binary(
-    "timelines.engagement.is_good_clicked_convo_desc_retweeted",
-    Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_CLICKED = new Binary(
-    "timelines.engagement.is_good_clicked_convo_desc_clicked",
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_FOLLOWED = new Binary(
-    "timelines.engagement.is_good_clicked_convo_desc_followed",
-    Set(EngagementsPrivate).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_SHARE_DM_CLICKED = new Binary(
-    "timelines.engagement.is_good_clicked_convo_desc_share_dm_clicked",
-    Set(EngagementsPrivate).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_PROFILE_CLICKED = new Binary(
-    "timelines.engagement.is_good_clicked_convo_desc_profile_clicked",
-    Set(EngagementsPrivate).asJava)
-
-  val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_0 = new Binary(
-    "timelines.engagement.is_good_clicked_convo_desc_uam_gt_0",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_1 = new Binary(
-    "timelines.engagement.is_good_clicked_convo_desc_uam_gt_1",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_2 = new Binary(
-    "timelines.engagement.is_good_clicked_convo_desc_uam_gt_2",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_3 = new Binary(
-    "timelines.engagement.is_good_clicked_convo_desc_uam_gt_3",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-
-  val IS_TWEET_DETAIL_DWELLED = new Binary(
-    "timelines.engagement.is_tweet_detail_dwelled",
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_TWEET_DETAIL_DWELLED_8_SEC = new Binary(
-    "timelines.engagement.is_tweet_detail_dwelled_8_sec",
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_TWEET_DETAIL_DWELLED_15_SEC = new Binary(
-    "timelines.engagement.is_tweet_detail_dwelled_15_sec",
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_TWEET_DETAIL_DWELLED_25_SEC = new Binary(
-    "timelines.engagement.is_tweet_detail_dwelled_25_sec",
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_TWEET_DETAIL_DWELLED_30_SEC = new Binary(
-    "timelines.engagement.is_tweet_detail_dwelled_30_sec",
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-
-  val IS_PROFILE_DWELLED = new Binary(
-    "timelines.engagement.is_profile_dwelled",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_PROFILE_DWELLED_10_SEC = new Binary(
-    "timelines.engagement.is_profile_dwelled_10_sec",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_PROFILE_DWELLED_20_SEC = new Binary(
-    "timelines.engagement.is_profile_dwelled_20_sec",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_PROFILE_DWELLED_30_SEC = new Binary(
-    "timelines.engagement.is_profile_dwelled_30_sec",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-
-  val IS_FULLSCREEN_VIDEO_DWELLED = new Binary(
-    "timelines.engagement.is_fullscreen_video_dwelled",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_FULLSCREEN_VIDEO_DWELLED_5_SEC = new Binary(
-    "timelines.engagement.is_fullscreen_video_dwelled_5_sec",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_FULLSCREEN_VIDEO_DWELLED_10_SEC = new Binary(
-    "timelines.engagement.is_fullscreen_video_dwelled_10_sec",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_FULLSCREEN_VIDEO_DWELLED_20_SEC = new Binary(
-    "timelines.engagement.is_fullscreen_video_dwelled_20_sec",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_FULLSCREEN_VIDEO_DWELLED_30_SEC = new Binary(
-    "timelines.engagement.is_fullscreen_video_dwelled_30_sec",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_LINK_DWELLED_15_SEC = new Binary(
-    "timelines.engagement.is_link_dwelled_15_sec",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_LINK_DWELLED_30_SEC = new Binary(
-    "timelines.engagement.is_link_dwelled_30_sec",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_LINK_DWELLED_60_SEC = new Binary(
-    "timelines.engagement.is_link_dwelled_60_sec",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_HOME_LATEST_VISITED =
-    new Binary("timelines.engagement.is_home_latest_visited", Set(EngagementsPrivate).asJava)
-
-  val IS_BOOKMARKED =
-    new Binary("timelines.engagement.is_bookmarked", Set(EngagementsPrivate).asJava)
-  val IS_SHARED =
-    new Binary("timelines.engagement.is_shared", Set(EngagementsPrivate).asJava)
-  val IS_SHARE_MENU_CLICKED =
-    new Binary("timelines.engagement.is_share_menu_clicked", Set(EngagementsPrivate).asJava)
-
-  // Negative engagements
-  val IS_DONT_LIKE = new Binary("timelines.engagement.is_dont_like", Set(EngagementsPrivate).asJava)
-  val IS_BLOCK_CLICKED = new Binary(
-    "timelines.engagement.is_block_clicked",
-    Set(Blocks, TweetsClicked, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_BLOCK_DIALOG_BLOCKED = new Binary(
-    "timelines.engagement.is_block_dialog_blocked",
-    Set(Blocks, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_MUTE_CLICKED = new Binary(
-    "timelines.engagement.is_mute_clicked",
-    Set(Mutes, TweetsClicked, EngagementsPrivate).asJava)
-  val IS_MUTE_DIALOG_MUTED =
-    new Binary("timelines.engagement.is_mute_dialog_muted", Set(Mutes, EngagementsPrivate).asJava)
-  val IS_REPORT_TWEET_CLICKED = new Binary(
-    "timelines.engagement.is_report_tweet_clicked",
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_CARET_CLICKED =
-    new Binary("timelines.engagement.is_caret_clicked", Set(EngagementsPrivate).asJava)
-  val IS_NOT_ABOUT_TOPIC =
-    new Binary("timelines.engagement.is_not_about_topic", Set(EngagementsPrivate).asJava)
-  val IS_NOT_RECENT =
-    new Binary("timelines.engagement.is_not_recent", Set(EngagementsPrivate).asJava)
-  val IS_NOT_RELEVANT =
-    new Binary("timelines.engagement.is_not_relevant", Set(EngagementsPrivate).asJava)
-  val IS_SEE_FEWER =
-    new Binary("timelines.engagement.is_see_fewer", Set(EngagementsPrivate).asJava)
-  val IS_UNFOLLOW_TOPIC =
-    new Binary("timelines.engagement.is_unfollow_topic", Set(EngagementsPrivate).asJava)
-  val IS_FOLLOW_TOPIC =
-    new Binary("timelines.engagement.is_follow_topic", Set(EngagementsPrivate).asJava)
-  val IS_NOT_INTERESTED_IN_TOPIC =
-    new Binary("timelines.engagement.is_not_interested_in_topic", Set(EngagementsPrivate).asJava)
-  val IS_NEGATIVE_FEEDBACK =
-    new Binary("timelines.engagement.is_negative_feedback", Set(EngagementsPrivate).asJava)
-  val IS_IMPLICIT_POSITIVE_FEEDBACK_UNION =
-    new Binary(
-      "timelines.engagement.is_implicit_positive_feedback_union",
-      Set(EngagementsPrivate).asJava)
-  val IS_EXPLICIT_POSITIVE_FEEDBACK_UNION =
-    new Binary(
-      "timelines.engagement.is_explicit_positive_feedback_union",
-      Set(EngagementsPrivate).asJava)
-  val IS_ALL_NEGATIVE_FEEDBACK_UNION =
-    new Binary(
-      "timelines.engagement.is_all_negative_feedback_union",
-      Set(EngagementsPrivate).asJava)
-  // Reciprocal engagements for reply forward engagement
-  val IS_REPLIED_REPLY_IMPRESSED_BY_AUTHOR = new Binary(
-    "timelines.engagement.is_replied_reply_impressed_by_author",
-    Set(EngagementsPrivate).asJava)
-  val IS_REPLIED_REPLY_FAVORITED_BY_AUTHOR = new Binary(
-    "timelines.engagement.is_replied_reply_favorited_by_author",
-    Set(EngagementsPrivate, EngagementsPublic, PrivateLikes, PublicLikes).asJava)
-  val IS_REPLIED_REPLY_QUOTED_BY_AUTHOR = new Binary(
-    "timelines.engagement.is_replied_reply_quoted_by_author",
-    Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava)
-  val IS_REPLIED_REPLY_REPLIED_BY_AUTHOR = new Binary(
-    "timelines.engagement.is_replied_reply_replied_by_author",
-    Set(EngagementsPrivate, EngagementsPublic, PrivateReplies, PublicReplies).asJava)
-  val IS_REPLIED_REPLY_RETWEETED_BY_AUTHOR = new Binary(
-    "timelines.engagement.is_replied_reply_retweeted_by_author",
-    Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava)
-  val IS_REPLIED_REPLY_BLOCKED_BY_AUTHOR = new Binary(
-    "timelines.engagement.is_replied_reply_blocked_by_author",
-    Set(Blocks, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_REPLIED_REPLY_FOLLOWED_BY_AUTHOR = new Binary(
-    "timelines.engagement.is_replied_reply_followed_by_author",
-    Set(EngagementsPrivate, EngagementsPublic, Follow).asJava)
-  val IS_REPLIED_REPLY_UNFOLLOWED_BY_AUTHOR = new Binary(
-    "timelines.engagement.is_replied_reply_unfollowed_by_author",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_REPLIED_REPLY_MUTED_BY_AUTHOR = new Binary(
-    "timelines.engagement.is_replied_reply_muted_by_author",
-    Set(Mutes, EngagementsPrivate).asJava)
-  val IS_REPLIED_REPLY_REPORTED_BY_AUTHOR = new Binary(
-    "timelines.engagement.is_replied_reply_reported_by_author",
-    Set(EngagementsPrivate).asJava)
-
-  // Reciprocal engagements for fav forward engagement
-  val IS_FAVORITED_FAV_FAVORITED_BY_AUTHOR = new Binary(
-    "timelines.engagement.is_favorited_fav_favorited_by_author",
-    Set(EngagementsPrivate, EngagementsPublic, PrivateLikes, PublicLikes).asJava
-  )
-  val IS_FAVORITED_FAV_REPLIED_BY_AUTHOR = new Binary(
-    "timelines.engagement.is_favorited_fav_replied_by_author",
-    Set(EngagementsPrivate, EngagementsPublic, PrivateReplies, PublicReplies).asJava
-  )
-  val IS_FAVORITED_FAV_RETWEETED_BY_AUTHOR = new Binary(
-    "timelines.engagement.is_favorited_fav_retweeted_by_author",
-    Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava
-  )
-  val IS_FAVORITED_FAV_FOLLOWED_BY_AUTHOR = new Binary(
-    "timelines.engagement.is_favorited_fav_followed_by_author",
-    Set(EngagementsPrivate, EngagementsPublic).asJava
-  )
-
-  // define good profile click by considering following engagements (follow, fav, reply, retweet, etc.) at profile page
-  val IS_PROFILE_CLICKED_AND_PROFILE_FOLLOW = new Binary(
-    "timelines.engagement.is_profile_clicked_and_profile_follow",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, Follow).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_FAV = new Binary(
-    "timelines.engagement.is_profile_clicked_and_profile_fav",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, PrivateLikes, PublicLikes).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_REPLY = new Binary(
-    "timelines.engagement.is_profile_clicked_and_profile_reply",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, PrivateReplies, PublicReplies).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_RETWEET = new Binary(
-    "timelines.engagement.is_profile_clicked_and_profile_retweet",
-    Set(
-      ProfilesViewed,
-      ProfilesClicked,
-      EngagementsPrivate,
-      PrivateRetweets,
-      PublicRetweets).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_TWEET_CLICK = new Binary(
-    "timelines.engagement.is_profile_clicked_and_profile_tweet_click",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, TweetsClicked).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_SHARE_DM_CLICK = new Binary(
-    "timelines.engagement.is_profile_clicked_and_profile_share_dm_click",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  // This derived label is the union of all binary features above
-  val IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED = new Binary(
-    "timelines.engagement.is_profile_clicked_and_profile_engaged",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, EngagementsPublic).asJava)
-
-  // define bad profile click by considering following engagements (user report, tweet report, mute, block, etc) at profile page
-  val IS_PROFILE_CLICKED_AND_PROFILE_USER_REPORT_CLICK = new Binary(
-    "timelines.engagement.is_profile_clicked_and_profile_user_report_click",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_TWEET_REPORT_CLICK = new Binary(
-    "timelines.engagement.is_profile_clicked_and_profile_tweet_report_click",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_MUTE = new Binary(
-    "timelines.engagement.is_profile_clicked_and_profile_mute",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_BLOCK = new Binary(
-    "timelines.engagement.is_profile_clicked_and_profile_block",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  // This derived label is the union of bad profile click engagements and existing negative feedback
-  val IS_NEGATIVE_FEEDBACK_V2 = new Binary(
-    "timelines.engagement.is_negative_feedback_v2",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_NEGATIVE_FEEDBACK_UNION = new Binary(
-    "timelines.engagement.is_negative_feedback_union",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  // don't like, mute or profile page -> mute
-  val IS_WEAK_NEGATIVE_FEEDBACK = new Binary(
-    "timelines.engagement.is_weak_negative_feedback",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  // report, block or profile page -> report, block
-  val IS_STRONG_NEGATIVE_FEEDBACK = new Binary(
-    "timelines.engagement.is_strong_negative_feedback",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  // engagement for following user from any surface area
-  val IS_FOLLOWED_FROM_ANY_SURFACE_AREA = new Binary(
-    "timelines.engagement.is_followed_from_any_surface_area",
-    Set(EngagementsPublic, EngagementsPrivate).asJava)
-  val IS_RELEVANCE_PROMPT_YES_CLICKED = new Binary(
-    "timelines.engagement.is_relevance_prompt_yes_clicked",
-    Set(EngagementsPublic, EngagementsPrivate).asJava)
-
-  // Reply downvote engagements
-  val IS_REPLY_DOWNVOTED =
-    new Binary("timelines.engagement.is_reply_downvoted", Set(EngagementsPrivate).asJava)
-  val IS_REPLY_DOWNVOTE_REMOVED =
-    new Binary("timelines.engagement.is_reply_downvote_removed", Set(EngagementsPrivate).asJava)
-
-  /**
-   * Contains all engagements that are used/consumed by real-time
-   * aggregates summingbird jobs. These engagements need to be
-   * extractable from [[ClientEvent]].
-   */
-  val EngagementsRealTime: Set[Feature[JBoolean]] = Set(
-    IS_CLICKED,
-    IS_DWELLED,
-    IS_FAVORITED,
-    IS_FOLLOWED,
-    IS_OPEN_LINKED,
-    IS_PHOTO_EXPANDED,
-    IS_PROFILE_CLICKED,
-    IS_QUOTED,
-    IS_REPLIED,
-    IS_RETWEETED,
-    IS_RETWEETED_WITHOUT_QUOTE,
-    IS_SHARE_DM_CLICKED,
-    IS_SHARE_DM_SENT,
-    IS_VIDEO_PLAYBACK_50,
-    IS_VIDEO_VIEWED,
-    IS_VIDEO_QUALITY_VIEWED
-  )
-
-  val NegativeEngagementsRealTime: Set[Feature[JBoolean]] = Set(
-    IS_REPORT_TWEET_CLICKED,
-    IS_BLOCK_CLICKED,
-    IS_MUTE_CLICKED
-  )
-
-  val NegativeEngagementsRealTimeDontLike: Set[Feature[JBoolean]] = Set(
-    IS_DONT_LIKE
-  )
-
-  val NegativeEngagementsSecondary: Set[Feature[JBoolean]] = Set(
-    IS_NOT_INTERESTED_IN_TOPIC,
-    IS_NOT_ABOUT_TOPIC,
-    IS_NOT_RECENT,
-    IS_NOT_RELEVANT,
-    IS_SEE_FEWER,
-    IS_UNFOLLOW_TOPIC
-  )
-
-  val PrivateEngagements: Set[Feature[JBoolean]] = Set(
-    IS_CLICKED,
-    IS_DWELLED,
-    IS_OPEN_LINKED,
-    IS_PHOTO_EXPANDED,
-    IS_PROFILE_CLICKED,
-    IS_QUOTED,
-    IS_VIDEO_PLAYBACK_50,
-    IS_VIDEO_QUALITY_VIEWED
-  )
-
-  val ImpressedEngagements: Set[Feature[JBoolean]] = Set(
-    IS_IMPRESSED
-  )
-
-  val PrivateEngagementsV2: Set[Feature[JBoolean]] = Set(
-    IS_CLICKED,
-    IS_OPEN_LINKED,
-    IS_PHOTO_EXPANDED,
-    IS_PROFILE_CLICKED,
-    IS_VIDEO_PLAYBACK_50,
-    IS_VIDEO_QUALITY_VIEWED
-  ) ++ ImpressedEngagements
-
-  val CoreEngagements: Set[Feature[JBoolean]] = Set(
-    IS_FAVORITED,
-    IS_REPLIED,
-    IS_RETWEETED
-  )
-
-  val DwellEngagements: Set[Feature[JBoolean]] = Set(
-    IS_DWELLED
-  )
-
-  val PrivateCoreEngagements: Set[Feature[JBoolean]] = Set(
-    IS_CLICKED,
-    IS_OPEN_LINKED,
-    IS_PHOTO_EXPANDED,
-    IS_VIDEO_PLAYBACK_50,
-    IS_VIDEO_QUALITY_VIEWED
-  )
-
-  val ConditionalEngagements: Set[Feature[JBoolean]] = Set(
-    IS_GOOD_CLICKED_CONVO_DESC_V1,
-    IS_GOOD_CLICKED_CONVO_DESC_V2,
-    IS_GOOD_CLICKED_WITH_DWELL_SUM_GTE_60S
-  )
-
-  val ShareEngagements: Set[Feature[JBoolean]] = Set(
-    IS_SHARED,
-    IS_SHARE_MENU_CLICKED
-  )
-
-  val BookmarkEngagements: Set[Feature[JBoolean]] = Set(
-    IS_BOOKMARKED
-  )
-
-  val TweetDetailDwellEngagements: Set[Feature[JBoolean]] = Set(
-    IS_TWEET_DETAIL_DWELLED,
-    IS_TWEET_DETAIL_DWELLED_8_SEC,
-    IS_TWEET_DETAIL_DWELLED_15_SEC,
-    IS_TWEET_DETAIL_DWELLED_25_SEC,
-    IS_TWEET_DETAIL_DWELLED_30_SEC
-  )
-
-  val ProfileDwellEngagements: Set[Feature[JBoolean]] = Set(
-    IS_PROFILE_DWELLED,
-    IS_PROFILE_DWELLED_10_SEC,
-    IS_PROFILE_DWELLED_20_SEC,
-    IS_PROFILE_DWELLED_30_SEC
-  )
-
-  val FullscreenVideoDwellEngagements: Set[Feature[JBoolean]] = Set(
-    IS_FULLSCREEN_VIDEO_DWELLED,
-    IS_FULLSCREEN_VIDEO_DWELLED_5_SEC,
-    IS_FULLSCREEN_VIDEO_DWELLED_10_SEC,
-    IS_FULLSCREEN_VIDEO_DWELLED_20_SEC,
-    IS_FULLSCREEN_VIDEO_DWELLED_30_SEC
-  )
-
-  // Please do not add new engagements here until having estimated the impact
-  // to capacity requirements. User-author real-time aggregates have a very
-  // large key space.
-  val UserAuthorEngagements: Set[Feature[JBoolean]] = CoreEngagements ++ DwellEngagements ++ Set(
-    IS_CLICKED,
-    IS_PROFILE_CLICKED,
-    IS_PHOTO_EXPANDED,
-    IS_VIDEO_PLAYBACK_50,
-    IS_NEGATIVE_FEEDBACK_UNION
-  )
-
-  val ImplicitPositiveEngagements: Set[Feature[JBoolean]] = Set(
-    IS_CLICKED,
-    IS_DWELLED,
-    IS_OPEN_LINKED,
-    IS_PROFILE_CLICKED,
-    IS_QUOTED,
-    IS_VIDEO_PLAYBACK_50,
-    IS_VIDEO_QUALITY_VIEWED,
-    IS_TWEET_DETAIL_DWELLED,
-    IS_GOOD_CLICKED_CONVO_DESC_V1,
-    IS_GOOD_CLICKED_CONVO_DESC_V2,
-    IS_SHARED,
-    IS_SHARE_MENU_CLICKED,
-    IS_SHARE_DM_SENT,
-    IS_SHARE_DM_CLICKED
-  )
-
-  val ExplicitPositiveEngagements: Set[Feature[JBoolean]] = CoreEngagements ++ Set(
-    IS_FOLLOWED,
-    IS_QUOTED
-  )
-
-  val AllNegativeEngagements: Set[Feature[JBoolean]] =
-    NegativeEngagementsRealTime ++ NegativeEngagementsRealTimeDontLike ++ Set(
-      IS_NOT_RECENT,
-      IS_NOT_RELEVANT,
-      IS_SEE_FEWER
-    )
-}
diff --git a/src/scala/com/twitter/timelines/prediction/features/common/NonHomeLabelFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/common/NonHomeLabelFeatures.docx
new file mode 100644
index 000000000..ac439d03c
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/common/NonHomeLabelFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/common/NonHomeLabelFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/common/NonHomeLabelFeatures.scala
deleted file mode 100644
index 369b48b39..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/common/NonHomeLabelFeatures.scala
+++ /dev/null
@@ -1,97 +0,0 @@
-package com.twitter.timelines.prediction.features.common
-
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
-import com.twitter.ml.api.Feature
-import com.twitter.ml.api.Feature.Binary
-import java.lang.{Boolean => JBoolean}
-import scala.collection.JavaConverters._
-
-object ProfileLabelFeatures {
-  private val prefix = "profile"
-
-  val IS_CLICKED =
-    new Binary(s"${prefix}.engagement.is_clicked", Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_DWELLED =
-    new Binary(s"${prefix}.engagement.is_dwelled", Set(TweetsViewed, EngagementsPrivate).asJava)
-  val IS_FAVORITED = new Binary(
-    s"${prefix}.engagement.is_favorited",
-    Set(PublicLikes, PrivateLikes, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_REPLIED = new Binary(
-    s"${prefix}.engagement.is_replied",
-    Set(PublicReplies, PrivateReplies, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_RETWEETED = new Binary(
-    s"${prefix}.engagement.is_retweeted",
-    Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
-
-  // Negative engagements
-  val IS_DONT_LIKE =
-    new Binary(s"${prefix}.engagement.is_dont_like", Set(EngagementsPrivate).asJava)
-  val IS_BLOCK_CLICKED = new Binary(
-    s"${prefix}.engagement.is_block_clicked",
-    Set(Blocks, TweetsClicked, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_MUTE_CLICKED = new Binary(
-    s"${prefix}.engagement.is_mute_clicked",
-    Set(Mutes, TweetsClicked, EngagementsPrivate).asJava)
-  val IS_REPORT_TWEET_CLICKED = new Binary(
-    s"${prefix}.engagement.is_report_tweet_clicked",
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-
-  val IS_NEGATIVE_FEEDBACK_UNION = new Binary(
-    s"${prefix}.engagement.is_negative_feedback_union",
-    Set(EngagementsPrivate, Blocks, Mutes, TweetsClicked, EngagementsPublic).asJava)
-
-  val CoreEngagements: Set[Feature[JBoolean]] = Set(
-    IS_CLICKED,
-    IS_DWELLED,
-    IS_FAVORITED,
-    IS_REPLIED,
-    IS_RETWEETED
-  )
-
-  val NegativeEngagements: Set[Feature[JBoolean]] = Set(
-    IS_DONT_LIKE,
-    IS_BLOCK_CLICKED,
-    IS_MUTE_CLICKED,
-    IS_REPORT_TWEET_CLICKED
-  )
-
-}
-
-object SearchLabelFeatures {
-  private val prefix = "search"
-
-  val IS_CLICKED =
-    new Binary(s"${prefix}.engagement.is_clicked", Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_DWELLED =
-    new Binary(s"${prefix}.engagement.is_dwelled", Set(TweetsViewed, EngagementsPrivate).asJava)
-  val IS_FAVORITED = new Binary(
-    s"${prefix}.engagement.is_favorited",
-    Set(PublicLikes, PrivateLikes, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_REPLIED = new Binary(
-    s"${prefix}.engagement.is_replied",
-    Set(PublicReplies, PrivateReplies, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_RETWEETED = new Binary(
-    s"${prefix}.engagement.is_retweeted",
-    Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_PROFILE_CLICKED_SEARCH_RESULT_USER = new Binary(
-    s"${prefix}.engagement.is_profile_clicked_search_result_user",
-    Set(ProfilesClicked, ProfilesViewed, EngagementsPrivate).asJava)
-  val IS_PROFILE_CLICKED_SEARCH_RESULT_TWEET = new Binary(
-    s"${prefix}.engagement.is_profile_clicked_search_result_tweet",
-    Set(ProfilesClicked, ProfilesViewed, EngagementsPrivate).asJava)
-  val IS_PROFILE_CLICKED_TYPEAHEAD_USER = new Binary(
-    s"${prefix}.engagement.is_profile_clicked_typeahead_user",
-    Set(ProfilesClicked, ProfilesViewed, EngagementsPrivate).asJava)
-
-  val CoreEngagements: Set[Feature[JBoolean]] = Set(
-    IS_CLICKED,
-    IS_DWELLED,
-    IS_FAVORITED,
-    IS_REPLIED,
-    IS_RETWEETED,
-    IS_PROFILE_CLICKED_SEARCH_RESULT_USER,
-    IS_PROFILE_CLICKED_SEARCH_RESULT_TWEET,
-    IS_PROFILE_CLICKED_TYPEAHEAD_USER
-  )
-}
-// Add Tweet Detail labels later
diff --git a/src/scala/com/twitter/timelines/prediction/features/common/TimelinesSharedFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/common/TimelinesSharedFeatures.docx
new file mode 100644
index 000000000..f8b64c569
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/common/TimelinesSharedFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/common/TimelinesSharedFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/common/TimelinesSharedFeatures.scala
deleted file mode 100644
index 99698530f..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/common/TimelinesSharedFeatures.scala
+++ /dev/null
@@ -1,759 +0,0 @@
-package com.twitter.timelines.prediction.features.common
-
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
-import com.twitter.ml.api.Feature.Binary
-import com.twitter.ml.api.Feature.Continuous
-import com.twitter.ml.api.Feature.Discrete
-import com.twitter.ml.api.Feature.SparseBinary
-import com.twitter.ml.api.Feature.SparseContinuous
-import com.twitter.ml.api.Feature.Text
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup
-import scala.collection.JavaConverters._
-
-object TimelinesSharedFeatures extends TimelinesSharedFeatures("")
-object InReplyToTweetTimelinesSharedFeatures extends TimelinesSharedFeatures("in_reply_to_tweet")
-
-/**
- * Defines shared features
- */
-class TimelinesSharedFeatures(prefix: String) {
-  private def name(featureName: String): String = {
-    if (prefix.nonEmpty) {
-      s"$prefix.$featureName"
-    } else {
-      featureName
-    }
-  }
-
-  // meta
-  val EXPERIMENT_META = new SparseBinary(
-    name("timelines.meta.experiment_meta"),
-    Set(ExperimentId, ExperimentName).asJava)
-
-  // historically used in the "combined models" to distinguish in-network and out of network tweets.
-  // now the feature denotes which adapter (recap or rectweet) was used to generate the datarecords.
-  // and is used by the data collection pipeline to split the training data.
-  val INJECTION_TYPE = new Discrete(name("timelines.meta.injection_type"))
-
-  // Used to indicate which injection module is this
-  val INJECTION_MODULE_NAME = new Text(name("timelines.meta.injection_module_name"))
-
-  val LIST_ID = new Discrete(name("timelines.meta.list_id"))
-  val LIST_IS_PINNED = new Binary(name("timelines.meta.list_is_pinned"))
-
-  // internal id per each PS request. mainly to join back commomn features and candidate features later
-  val PREDICTION_REQUEST_ID = new Discrete(name("timelines.meta.prediction_request_id"))
-  // internal id per each TLM request. mainly to deduplicate re-served cached tweets in logging
-  val SERVED_REQUEST_ID = new Discrete(name("timelines.meta.served_request_id"))
-  // internal id used for join key in kafka logging, equal to servedRequestId if tweet is cached,
-  // else equal to predictionRequestId
-  val SERVED_ID = new Discrete(name("timelines.meta.served_id"))
-  val REQUEST_JOIN_ID = new Discrete(name("timelines.meta.request_join_id"))
-
-  // Internal boolean flag per tweet, whether the tweet is served from RankedTweetsCache: TQ-14050
-  // this feature should not be trained on, blacklisted in feature_config: D838346
-  val IS_READ_FROM_CACHE = new Binary(name("timelines.meta.is_read_from_cache"))
-
-  // model score discounts
-  val PHOTO_DISCOUNT = new Continuous(name("timelines.score_discounts.photo"))
-  val VIDEO_DISCOUNT = new Continuous(name("timelines.score_discounts.video"))
-  val TWEET_HEIGHT_DISCOUNT = new Continuous(name("timelines.score_discounts.tweet_height"))
-  val TOXICITY_DISCOUNT = new Continuous(name("timelines.score_discounts.toxicity"))
-
-  // engagements
-  val ENGAGEMENT_TYPE = new Discrete(name("timelines.engagement.type"))
-  val PREDICTED_IS_FAVORITED =
-    new Continuous(name("timelines.engagement_predicted.is_favorited"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_RETWEETED =
-    new Continuous(name("timelines.engagement_predicted.is_retweeted"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_QUOTED =
-    new Continuous(name("timelines.engagement_predicted.is_quoted"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_REPLIED =
-    new Continuous(name("timelines.engagement_predicted.is_replied"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_OPEN_LINKED = new Continuous(
-    name("timelines.engagement_predicted.is_open_linked"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_GOOD_OPEN_LINK = new Continuous(
-    name("timelines.engagement_predicted.is_good_open_link"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_PROFILE_CLICKED = new Continuous(
-    name("timelines.engagement_predicted.is_profile_clicked"),
-    Set(EngagementScore).asJava
-  )
-  val PREDICTED_IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED = new Continuous(
-    name("timelines.engagement_predicted.is_profile_clicked_and_profile_engaged"),
-    Set(EngagementScore).asJava
-  )
-  val PREDICTED_IS_CLICKED =
-    new Continuous(name("timelines.engagement_predicted.is_clicked"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_PHOTO_EXPANDED = new Continuous(
-    name("timelines.engagement_predicted.is_photo_expanded"),
-    Set(EngagementScore).asJava
-  )
-  val PREDICTED_IS_FOLLOWED =
-    new Continuous(name("timelines.engagement_predicted.is_followed"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_DONT_LIKE =
-    new Continuous(name("timelines.engagement_predicted.is_dont_like"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_VIDEO_PLAYBACK_50 = new Continuous(
-    name("timelines.engagement_predicted.is_video_playback_50"),
-    Set(EngagementScore).asJava
-  )
-  val PREDICTED_IS_VIDEO_QUALITY_VIEWED = new Continuous(
-    name("timelines.engagement_predicted.is_video_quality_viewed"),
-    Set(EngagementScore).asJava
-  )
-  val PREDICTED_IS_GOOD_CLICKED_V1 = new Continuous(
-    name("timelines.engagement_predicted.is_good_clicked_convo_desc_favorited_or_replied"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_GOOD_CLICKED_V2 = new Continuous(
-    name("timelines.engagement_predicted.is_good_clicked_convo_desc_v2"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_TWEET_DETAIL_DWELLED_8_SEC = new Continuous(
-    name("timelines.engagement_predicted.is_tweet_detail_dwelled_8_sec"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_TWEET_DETAIL_DWELLED_15_SEC = new Continuous(
-    name("timelines.engagement_predicted.is_tweet_detail_dwelled_15_sec"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_TWEET_DETAIL_DWELLED_25_SEC = new Continuous(
-    name("timelines.engagement_predicted.is_tweet_detail_dwelled_25_sec"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_TWEET_DETAIL_DWELLED_30_SEC = new Continuous(
-    name("timelines.engagement_predicted.is_tweet_detail_dwelled_30_sec"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_GOOD_CLICKED_WITH_DWELL_SUM_GTE_60S = new Continuous(
-    name(
-      "timelines.engagement_predicted.is_good_clicked_convo_desc_favorited_or_replied_or_dwell_sum_gte_60_secs"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_FAVORITED_FAV_ENGAGED_BY_AUTHOR = new Continuous(
-    name("timelines.engagement_predicted.is_favorited_fav_engaged_by_author"),
-    Set(EngagementScore).asJava)
-
-  val PREDICTED_IS_REPORT_TWEET_CLICKED =
-    new Continuous(
-      name("timelines.engagement_predicted.is_report_tweet_clicked"),
-      Set(EngagementScore).asJava)
-  val PREDICTED_IS_NEGATIVE_FEEDBACK = new Continuous(
-    name("timelines.engagement_predicted.is_negative_feedback"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_NEGATIVE_FEEDBACK_V2 = new Continuous(
-    name("timelines.engagement_predicted.is_negative_feedback_v2"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_WEAK_NEGATIVE_FEEDBACK = new Continuous(
-    name("timelines.engagement_predicted.is_weak_negative_feedback"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_STRONG_NEGATIVE_FEEDBACK = new Continuous(
-    name("timelines.engagement_predicted.is_strong_negative_feedback"),
-    Set(EngagementScore).asJava)
-
-  val PREDICTED_IS_DWELLED_IN_BOUNDS_V1 = new Continuous(
-    name("timelines.engagement_predicted.is_dwelled_in_bounds_v1"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_DWELL_NORMALIZED_OVERALL = new Continuous(
-    name("timelines.engagement_predicted.dwell_normalized_overall"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_DWELL_CDF =
-    new Continuous(name("timelines.engagement_predicted.dwell_cdf"), Set(EngagementScore).asJava)
-  val PREDICTED_DWELL_CDF_OVERALL = new Continuous(
-    name("timelines.engagement_predicted.dwell_cdf_overall"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_DWELLED =
-    new Continuous(name("timelines.engagement_predicted.is_dwelled"), Set(EngagementScore).asJava)
-
-  val PREDICTED_IS_HOME_LATEST_VISITED = new Continuous(
-    name("timelines.engagement_predicted.is_home_latest_visited"),
-    Set(EngagementScore).asJava)
-
-  val PREDICTED_IS_BOOKMARKED = new Continuous(
-    name("timelines.engagement_predicted.is_bookmarked"),
-    Set(EngagementScore).asJava)
-
-  val PREDICTED_IS_SHARED =
-    new Continuous(name("timelines.engagement_predicted.is_shared"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_SHARE_MENU_CLICKED = new Continuous(
-    name("timelines.engagement_predicted.is_share_menu_clicked"),
-    Set(EngagementScore).asJava)
-
-  val PREDICTED_IS_PROFILE_DWELLED_20_SEC = new Continuous(
-    name("timelines.engagement_predicted.is_profile_dwelled_20_sec"),
-    Set(EngagementScore).asJava)
-
-  val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_5_SEC = new Continuous(
-    name("timelines.engagement_predicted.is_fullscreen_video_dwelled_5_sec"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_10_SEC = new Continuous(
-    name("timelines.engagement_predicted.is_fullscreen_video_dwelled_10_sec"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_20_SEC = new Continuous(
-    name("timelines.engagement_predicted.is_fullscreen_video_dwelled_20_sec"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_30_SEC = new Continuous(
-    name("timelines.engagement_predicted.is_fullscreen_video_dwelled_30_sec"),
-    Set(EngagementScore).asJava)
-
-  // Please use this timestamp, not the `meta.timestamp`, for the actual served timestamp.
-  val SERVED_TIMESTAMP =
-    new Discrete("timelines.meta.timestamp.served", Set(PrivateTimestamp).asJava)
-
-  // timestamp when the engagement has occurred. do not train on these features
-  val TIMESTAMP_FAVORITED =
-    new Discrete("timelines.meta.timestamp.engagement.favorited", Set(PublicTimestamp).asJava)
-  val TIMESTAMP_RETWEETED =
-    new Discrete("timelines.meta.timestamp.engagement.retweeted", Set(PublicTimestamp).asJava)
-  val TIMESTAMP_REPLIED =
-    new Discrete("timelines.meta.timestamp.engagement.replied", Set(PublicTimestamp).asJava)
-  val TIMESTAMP_PROFILE_CLICKED = new Discrete(
-    "timelines.meta.timestamp.engagement.profile_clicked",
-    Set(PrivateTimestamp).asJava)
-  val TIMESTAMP_CLICKED =
-    new Discrete("timelines.meta.timestamp.engagement.clicked", Set(PrivateTimestamp).asJava)
-  val TIMESTAMP_PHOTO_EXPANDED =
-    new Discrete("timelines.meta.timestamp.engagement.photo_expanded", Set(PrivateTimestamp).asJava)
-  val TIMESTAMP_DWELLED =
-    new Discrete("timelines.meta.timestamp.engagement.dwelled", Set(PrivateTimestamp).asJava)
-  val TIMESTAMP_VIDEO_PLAYBACK_50 = new Discrete(
-    "timelines.meta.timestamp.engagement.video_playback_50",
-    Set(PrivateTimestamp).asJava)
-  // reply engaged by author
-  val TIMESTAMP_REPLY_FAVORITED_BY_AUTHOR = new Discrete(
-    "timelines.meta.timestamp.engagement.reply_favorited_by_author",
-    Set(PublicTimestamp).asJava)
-  val TIMESTAMP_REPLY_REPLIED_BY_AUTHOR = new Discrete(
-    "timelines.meta.timestamp.engagement.reply_replied_by_author",
-    Set(PublicTimestamp).asJava)
-  val TIMESTAMP_REPLY_RETWEETED_BY_AUTHOR = new Discrete(
-    "timelines.meta.timestamp.engagement.reply_retweeted_by_author",
-    Set(PublicTimestamp).asJava)
-  // fav engaged by author
-  val TIMESTAMP_FAV_FAVORITED_BY_AUTHOR = new Discrete(
-    "timelines.meta.timestamp.engagement.fav_favorited_by_author",
-    Set(PublicTimestamp).asJava)
-  val TIMESTAMP_FAV_REPLIED_BY_AUTHOR = new Discrete(
-    "timelines.meta.timestamp.engagement.fav_replied_by_author",
-    Set(PublicTimestamp).asJava)
-  val TIMESTAMP_FAV_RETWEETED_BY_AUTHOR = new Discrete(
-    "timelines.meta.timestamp.engagement.fav_retweeted_by_author",
-    Set(PublicTimestamp).asJava)
-  val TIMESTAMP_FAV_FOLLOWED_BY_AUTHOR = new Discrete(
-    "timelines.meta.timestamp.engagement.fav_followed_by_author",
-    Set(PublicTimestamp).asJava)
-  // good click
-  val TIMESTAMP_GOOD_CLICK_CONVO_DESC_FAVORITED = new Discrete(
-    "timelines.meta.timestamp.engagement.good_click_convo_desc_favorited",
-    Set(PrivateTimestamp).asJava)
-  val TIMESTAMP_GOOD_CLICK_CONVO_DESC_REPLIIED = new Discrete(
-    "timelines.meta.timestamp.engagement.good_click_convo_desc_replied",
-    Set(PrivateTimestamp).asJava)
-  val TIMESTAMP_GOOD_CLICK_CONVO_DESC_PROFILE_CLICKED = new Discrete(
-    "timelines.meta.timestamp.engagement.good_click_convo_desc_profiile_clicked",
-    Set(PrivateTimestamp).asJava)
-  val TIMESTAMP_NEGATIVE_FEEDBACK = new Discrete(
-    "timelines.meta.timestamp.engagement.negative_feedback",
-    Set(PrivateTimestamp).asJava)
-  val TIMESTAMP_REPORT_TWEET_CLICK =
-    new Discrete(
-      "timelines.meta.timestamp.engagement.report_tweet_click",
-      Set(PrivateTimestamp).asJava)
-  val TIMESTAMP_IMPRESSED =
-    new Discrete("timelines.meta.timestamp.engagement.impressed", Set(PublicTimestamp).asJava)
-  val TIMESTAMP_TWEET_DETAIL_DWELLED =
-    new Discrete(
-      "timelines.meta.timestamp.engagement.tweet_detail_dwelled",
-      Set(PublicTimestamp).asJava)
-  val TIMESTAMP_PROFILE_DWELLED =
-    new Discrete("timelines.meta.timestamp.engagement.profile_dwelled", Set(PublicTimestamp).asJava)
-  val TIMESTAMP_FULLSCREEN_VIDEO_DWELLED =
-    new Discrete(
-      "timelines.meta.timestamp.engagement.fullscreen_video_dwelled",
-      Set(PublicTimestamp).asJava)
-  val TIMESTAMP_LINK_DWELLED =
-    new Discrete("timelines.meta.timestamp.engagement.link_dwelled", Set(PublicTimestamp).asJava)
-
-  // these are used to dup and split the negative instances during streaming processing (kafka)
-  val TRAINING_FOR_FAVORITED =
-    new Binary("timelines.meta.training_data.for_favorited", Set(EngagementId).asJava)
-  val TRAINING_FOR_RETWEETED =
-    new Binary("timelines.meta.training_data.for_retweeted", Set(EngagementId).asJava)
-  val TRAINING_FOR_REPLIED =
-    new Binary("timelines.meta.training_data.for_replied", Set(EngagementId).asJava)
-  val TRAINING_FOR_PROFILE_CLICKED =
-    new Binary("timelines.meta.training_data.for_profile_clicked", Set(EngagementId).asJava)
-  val TRAINING_FOR_CLICKED =
-    new Binary("timelines.meta.training_data.for_clicked", Set(EngagementId).asJava)
-  val TRAINING_FOR_PHOTO_EXPANDED =
-    new Binary("timelines.meta.training_data.for_photo_expanded", Set(EngagementId).asJava)
-  val TRAINING_FOR_VIDEO_PLAYBACK_50 =
-    new Binary("timelines.meta.training_data.for_video_playback_50", Set(EngagementId).asJava)
-  val TRAINING_FOR_NEGATIVE_FEEDBACK =
-    new Binary("timelines.meta.training_data.for_negative_feedback", Set(EngagementId).asJava)
-  val TRAINING_FOR_REPORTED =
-    new Binary("timelines.meta.training_data.for_reported", Set(EngagementId).asJava)
-  val TRAINING_FOR_DWELLED =
-    new Binary("timelines.meta.training_data.for_dwelled", Set(EngagementId).asJava)
-  val TRAINING_FOR_SHARED =
-    new Binary("timelines.meta.training_data.for_shared", Set(EngagementId).asJava)
-  val TRAINING_FOR_SHARE_MENU_CLICKED =
-    new Binary("timelines.meta.training_data.for_share_menu_clicked", Set(EngagementId).asJava)
-
-  // Warning: do not train on these features
-  val PREDICTED_SCORE = new Continuous(name("timelines.score"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_FAV = new Continuous(name("timelines.score.fav"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_RETWEET =
-    new Continuous(name("timelines.score.retweet"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_REPLY =
-    new Continuous(name("timelines.score.reply"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_OPEN_LINK =
-    new Continuous(name("timelines.score.open_link"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_GOOD_OPEN_LINK =
-    new Continuous(name("timelines.score.good_open_link"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_PROFILE_CLICK =
-    new Continuous(name("timelines.score.profile_click"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_DETAIL_EXPAND =
-    new Continuous(name("timelines.score.detail_expand"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_PHOTO_EXPAND =
-    new Continuous(name("timelines.score.photo_expand"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_PLAYBACK_50 =
-    new Continuous(name("timelines.score.playback_50"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_VIDEO_QUALITY_VIEW =
-    new Continuous(name("timelines.score.video_quality_view"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_DONT_LIKE =
-    new Continuous(name("timelines.score.dont_like"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_PROFILE_CLICKED_AND_PROFILE_ENGAGED =
-    new Continuous(
-      name("timelines.score.profile_clicked_and_profile_engaged"),
-      Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_GOOD_CLICKED_V1 =
-    new Continuous(name("timelines.score.good_clicked_v1"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_GOOD_CLICKED_V2 =
-    new Continuous(name("timelines.score.good_clicked_v2"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_DWELL =
-    new Continuous(name("timelines.score.dwell"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_DWELL_CDF =
-    new Continuous(name("timelines.score.dwell_cfd"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_DWELL_CDF_OVERALL =
-    new Continuous(name("timelines.score.dwell_cfd_overall"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_DWELL_NORMALIZED_OVERALL =
-    new Continuous(name("timelines.score.dwell_normalized_overall"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_NEGATIVE_FEEDBACK =
-    new Continuous(name("timelines.score.negative_feedback"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_NEGATIVE_FEEDBACK_V2 =
-    new Continuous(name("timelines.score.negative_feedback_v2"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_WEAK_NEGATIVE_FEEDBACK =
-    new Continuous(name("timelines.score.weak_negative_feedback"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_STRONG_NEGATIVE_FEEDBACK =
-    new Continuous(name("timelines.score.strong_negative_feedback"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_REPORT_TWEET_CLICKED =
-    new Continuous(name("timelines.score.report_tweet_clicked"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_UNFOLLOW_TOPIC =
-    new Continuous(name("timelines.score.unfollow_topic"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_FOLLOW =
-    new Continuous(name("timelines.score.follow"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_RELEVANCE_PROMPT_YES_CLICKED =
-    new Continuous(
-      name("timelines.score.relevance_prompt_yes_clicked"),
-      Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_BOOKMARK =
-    new Continuous(name("timelines.score.bookmark"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_SHARE =
-    new Continuous(name("timelines.score.share"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_SHARE_MENU_CLICK =
-    new Continuous(name("timelines.score.share_menu_click"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_PROFILE_DWELLED =
-    new Continuous(name("timelines.score.good_profile_dwelled"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_TWEET_DETAIL_DWELLED =
-    new Continuous(name("timelines.score.tweet_detail_dwelled"), Set(EngagementScore).asJava)
-  val PREDICTED_SCORE_FULLSCREEN_VIDEO_DWELL =
-    new Continuous(name("timelines.score.fullscreen_video_dwell"), Set(EngagementScore).asJava)
-
-  // hydrated in TimelinesSharedFeaturesAdapter that recap adapter calls
-  val ORIGINAL_AUTHOR_ID = new Discrete(name("entities.original_author_id"), Set(UserId).asJava)
-  val SOURCE_AUTHOR_ID = new Discrete(name("entities.source_author_id"), Set(UserId).asJava)
-  val SOURCE_TWEET_ID = new Discrete(name("entities.source_tweet_id"), Set(TweetId).asJava)
-  val TOPIC_ID = new Discrete(name("entities.topic_id"), Set(SemanticcoreClassification).asJava)
-  val INFERRED_TOPIC_IDS =
-    new SparseBinary(name("entities.inferred_topic_ids"), Set(SemanticcoreClassification).asJava)
-  val INFERRED_TOPIC_ID = TypedAggregateGroup.sparseFeature(INFERRED_TOPIC_IDS)
-
-  val WEIGHTED_FAV_COUNT = new Continuous(
-    name("timelines.earlybird.weighted_fav_count"),
-    Set(CountOfPrivateLikes, CountOfPublicLikes).asJava)
-  val WEIGHTED_RETWEET_COUNT = new Continuous(
-    name("timelines.earlybird.weighted_retweet_count"),
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
-  val WEIGHTED_REPLY_COUNT = new Continuous(
-    name("timelines.earlybird.weighted_reply_count"),
-    Set(CountOfPrivateReplies, CountOfPublicReplies).asJava)
-  val WEIGHTED_QUOTE_COUNT = new Continuous(
-    name("timelines.earlybird.weighted_quote_count"),
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
-  val EMBEDS_IMPRESSION_COUNT_V2 = new Continuous(
-    name("timelines.earlybird.embeds_impression_count_v2"),
-    Set(CountOfImpression).asJava)
-  val EMBEDS_URL_COUNT_V2 = new Continuous(
-    name("timelines.earlybird.embeds_url_count_v2"),
-    Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava)
-  val DECAYED_FAVORITE_COUNT = new Continuous(
-    name("timelines.earlybird.decayed_favorite_count"),
-    Set(CountOfPrivateLikes, CountOfPublicLikes).asJava)
-  val DECAYED_RETWEET_COUNT = new Continuous(
-    name("timelines.earlybird.decayed_retweet_count"),
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
-  val DECAYED_REPLY_COUNT = new Continuous(
-    name("timelines.earlybird.decayed_reply_count"),
-    Set(CountOfPrivateReplies, CountOfPublicReplies).asJava)
-  val DECAYED_QUOTE_COUNT = new Continuous(
-    name("timelines.earlybird.decayed_quote_count"),
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
-  val FAKE_FAVORITE_COUNT = new Continuous(
-    name("timelines.earlybird.fake_favorite_count"),
-    Set(CountOfPrivateLikes, CountOfPublicLikes).asJava)
-  val FAKE_RETWEET_COUNT = new Continuous(
-    name("timelines.earlybird.fake_retweet_count"),
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
-  val FAKE_REPLY_COUNT = new Continuous(
-    name("timelines.earlybird.fake_reply_count"),
-    Set(CountOfPrivateReplies, CountOfPublicReplies).asJava)
-  val FAKE_QUOTE_COUNT = new Continuous(
-    name("timelines.earlybird.fake_quote_count"),
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
-  val QUOTE_COUNT = new Continuous(
-    name("timelines.earlybird.quote_count"),
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
-
-  // Safety features
-  val LABEL_ABUSIVE_FLAG =
-    new Binary(name("timelines.earlybird.label_abusive_flag"), Set(TweetSafetyLabels).asJava)
-  val LABEL_ABUSIVE_HI_RCL_FLAG =
-    new Binary(name("timelines.earlybird.label_abusive_hi_rcl_flag"), Set(TweetSafetyLabels).asJava)
-  val LABEL_DUP_CONTENT_FLAG =
-    new Binary(name("timelines.earlybird.label_dup_content_flag"), Set(TweetSafetyLabels).asJava)
-  val LABEL_NSFW_HI_PRC_FLAG =
-    new Binary(name("timelines.earlybird.label_nsfw_hi_prc_flag"), Set(TweetSafetyLabels).asJava)
-  val LABEL_NSFW_HI_RCL_FLAG =
-    new Binary(name("timelines.earlybird.label_nsfw_hi_rcl_flag"), Set(TweetSafetyLabels).asJava)
-  val LABEL_SPAM_FLAG =
-    new Binary(name("timelines.earlybird.label_spam_flag"), Set(TweetSafetyLabels).asJava)
-  val LABEL_SPAM_HI_RCL_FLAG =
-    new Binary(name("timelines.earlybird.label_spam_hi_rcl_flag"), Set(TweetSafetyLabels).asJava)
-
-  // Periscope features
-  val PERISCOPE_EXISTS = new Binary(
-    name("timelines.earlybird.periscope_exists"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val PERISCOPE_IS_LIVE = new Binary(
-    name("timelines.earlybird.periscope_is_live"),
-    Set(PrivateBroadcastMetrics, PublicBroadcastMetrics).asJava)
-  val PERISCOPE_HAS_BEEN_FEATURED = new Binary(
-    name("timelines.earlybird.periscope_has_been_featured"),
-    Set(PrivateBroadcastMetrics, PublicBroadcastMetrics).asJava)
-  val PERISCOPE_IS_CURRENTLY_FEATURED = new Binary(
-    name("timelines.earlybird.periscope_is_currently_featured"),
-    Set(PrivateBroadcastMetrics, PublicBroadcastMetrics).asJava
-  )
-  val PERISCOPE_IS_FROM_QUALITY_SOURCE = new Binary(
-    name("timelines.earlybird.periscope_is_from_quality_source"),
-    Set(PrivateBroadcastMetrics, PublicBroadcastMetrics).asJava
-  )
-
-  val VISIBLE_TOKEN_RATIO = new Continuous(name("timelines.earlybird.visible_token_ratio"))
-  val HAS_QUOTE = new Binary(
-    name("timelines.earlybird.has_quote"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val IS_COMPOSER_SOURCE_CAMERA = new Binary(
-    name("timelines.earlybird.is_composer_source_camera"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-
-  val EARLYBIRD_SCORE = new Continuous(
-    name("timelines.earlybird_score"),
-    Set(EngagementScore).asJava
-  ) // separating from the rest of "timelines.earlybird." namespace
-
-  val DWELL_TIME_MS = new Continuous(
-    name("timelines.engagement.dwell_time_ms"),
-    Set(EngagementDurationAndTimestamp, ImpressionMetadata, PrivateTimestamp).asJava)
-
-  val TWEET_DETAIL_DWELL_TIME_MS = new Continuous(
-    name("timelines.engagement.tweet_detail_dwell_time_ms"),
-    Set(EngagementDurationAndTimestamp, ImpressionMetadata, PrivateTimestamp).asJava)
-
-  val PROFILE_DWELL_TIME_MS = new Continuous(
-    name("timelines.engagement.profile_dwell_time_ms"),
-    Set(EngagementDurationAndTimestamp, ImpressionMetadata, PrivateTimestamp).asJava)
-
-  val FULLSCREEN_VIDEO_DWELL_TIME_MS = new Continuous(
-    name("timelines.engagement.fullscreen_video_dwell_time_ms"),
-    Set(EngagementDurationAndTimestamp, ImpressionMetadata, PrivateTimestamp).asJava)
-
-  val LINK_DWELL_TIME_MS = new Continuous(
-    name("timelines.engagement.link_dwell_time_ms"),
-    Set(EngagementDurationAndTimestamp, ImpressionMetadata, PrivateTimestamp).asJava)
-
-  val ASPECT_RATIO_DEN = new Continuous(
-    name("tweetsource.tweet.media.aspect_ratio_den"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val ASPECT_RATIO_NUM = new Continuous(
-    name("tweetsource.tweet.media.aspect_ratio_num"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val BIT_RATE = new Continuous(
-    name("tweetsource.tweet.media.bit_rate"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val HEIGHT_2 = new Continuous(
-    name("tweetsource.tweet.media.height_2"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val HEIGHT_1 = new Continuous(
-    name("tweetsource.tweet.media.height_1"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val HEIGHT_3 = new Continuous(
-    name("tweetsource.tweet.media.height_3"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val HEIGHT_4 = new Continuous(
-    name("tweetsource.tweet.media.height_4"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val RESIZE_METHOD_1 = new Discrete(
-    name("tweetsource.tweet.media.resize_method_1"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val RESIZE_METHOD_2 = new Discrete(
-    name("tweetsource.tweet.media.resize_method_2"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val RESIZE_METHOD_3 = new Discrete(
-    name("tweetsource.tweet.media.resize_method_3"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val RESIZE_METHOD_4 = new Discrete(
-    name("tweetsource.tweet.media.resize_method_4"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val VIDEO_DURATION = new Continuous(
-    name("tweetsource.tweet.media.video_duration"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val WIDTH_1 = new Continuous(
-    name("tweetsource.tweet.media.width_1"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val WIDTH_2 = new Continuous(
-    name("tweetsource.tweet.media.width_2"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val WIDTH_3 = new Continuous(
-    name("tweetsource.tweet.media.width_3"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val WIDTH_4 = new Continuous(
-    name("tweetsource.tweet.media.width_4"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val NUM_MEDIA_TAGS = new Continuous(
-    name("tweetsource.tweet.media.num_tags"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val MEDIA_TAG_SCREEN_NAMES = new SparseBinary(
-    name("tweetsource.tweet.media.tag_screen_names"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val STICKER_IDS = new SparseBinary(
-    name("tweetsource.tweet.media.sticker_ids"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-
-  val NUM_COLOR_PALLETTE_ITEMS = new Continuous(
-    name("tweetsource.v2.tweet.media.num_color_pallette_items"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val COLOR_1_RED = new Continuous(
-    name("tweetsource.v2.tweet.media.color_1_red"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val COLOR_1_BLUE = new Continuous(
-    name("tweetsource.v2.tweet.media.color_1_blue"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val COLOR_1_GREEN = new Continuous(
-    name("tweetsource.v2.tweet.media.color_1_green"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val COLOR_1_PERCENTAGE = new Continuous(
-    name("tweetsource.v2.tweet.media.color_1_percentage"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val MEDIA_PROVIDERS = new SparseBinary(
-    name("tweetsource.v2.tweet.media.providers"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val IS_360 = new Binary(
-    name("tweetsource.v2.tweet.media.is_360"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val VIEW_COUNT =
-    new Continuous(name("tweetsource.v2.tweet.media.view_count"), Set(MediaContentMetrics).asJava)
-  val IS_MANAGED = new Binary(
-    name("tweetsource.v2.tweet.media.is_managed"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val IS_MONETIZABLE = new Binary(
-    name("tweetsource.v2.tweet.media.is_monetizable"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val IS_EMBEDDABLE = new Binary(
-    name("tweetsource.v2.tweet.media.is_embeddable"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val CLASSIFICATION_LABELS = new SparseContinuous(
-    name("tweetsource.v2.tweet.media.classification_labels"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-
-  val NUM_STICKERS = new Continuous(
-    name("tweetsource.v2.tweet.media.num_stickers"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val NUM_FACES = new Continuous(
-    name("tweetsource.v2.tweet.media.num_faces"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val FACE_AREAS = new Continuous(
-    name("tweetsource.v2.tweet.media.face_areas"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val HAS_SELECTED_PREVIEW_IMAGE = new Binary(
-    name("tweetsource.v2.tweet.media.has_selected_preview_image"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val HAS_TITLE = new Binary(
-    name("tweetsource.v2.tweet.media.has_title"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val HAS_DESCRIPTION = new Binary(
-    name("tweetsource.v2.tweet.media.has_description"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val HAS_VISIT_SITE_CALL_TO_ACTION = new Binary(
-    name("tweetsource.v2.tweet.media.has_visit_site_call_to_action"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val HAS_APP_INSTALL_CALL_TO_ACTION = new Binary(
-    name("tweetsource.v2.tweet.media.has_app_install_call_to_action"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-  val HAS_WATCH_NOW_CALL_TO_ACTION = new Binary(
-    name("tweetsource.v2.tweet.media.has_watch_now_call_to_action"),
-    Set(MediaFile, MediaProcessingInformation).asJava)
-
-  val NUM_CAPS =
-    new Continuous(name("tweetsource.tweet.text.num_caps"), Set(PublicTweets, PrivateTweets).asJava)
-  val TWEET_LENGTH =
-    new Continuous(name("tweetsource.tweet.text.length"), Set(PublicTweets, PrivateTweets).asJava)
-  val TWEET_LENGTH_TYPE = new Discrete(
-    name("tweetsource.tweet.text.length_type"),
-    Set(PublicTweets, PrivateTweets).asJava)
-  val NUM_WHITESPACES = new Continuous(
-    name("tweetsource.tweet.text.num_whitespaces"),
-    Set(PublicTweets, PrivateTweets).asJava)
-  val HAS_QUESTION =
-    new Binary(name("tweetsource.tweet.text.has_question"), Set(PublicTweets, PrivateTweets).asJava)
-  val NUM_NEWLINES = new Continuous(
-    name("tweetsource.tweet.text.num_newlines"),
-    Set(PublicTweets, PrivateTweets).asJava)
-  val EMOJI_TOKENS = new SparseBinary(
-    name("tweetsource.v3.tweet.text.emoji_tokens"),
-    Set(PublicTweets, PrivateTweets).asJava)
-  val EMOTICON_TOKENS = new SparseBinary(
-    name("tweetsource.v3.tweet.text.emoticon_tokens"),
-    Set(PublicTweets, PrivateTweets).asJava)
-  val NUM_EMOJIS = new Continuous(
-    name("tweetsource.v3.tweet.text.num_emojis"),
-    Set(PublicTweets, PrivateTweets).asJava)
-  val NUM_EMOTICONS = new Continuous(
-    name("tweetsource.v3.tweet.text.num_emoticons"),
-    Set(PublicTweets, PrivateTweets).asJava)
-  val POS_UNIGRAMS = new SparseBinary(
-    name("tweetsource.v3.tweet.text.pos_unigrams"),
-    Set(PublicTweets, PrivateTweets).asJava)
-  val POS_BIGRAMS = new SparseBinary(
-    name("tweetsource.v3.tweet.text.pos_bigrams"),
-    Set(PublicTweets, PrivateTweets).asJava)
-  val TEXT_TOKENS = new SparseBinary(
-    name("tweetsource.v4.tweet.text.tokens"),
-    Set(PublicTweets, PrivateTweets).asJava)
-
-  // Health features model scores (see go/toxicity, go/pblock, go/pspammytweet)
-  val PBLOCK_SCORE =
-    new Continuous(name("timelines.earlybird.pblock_score"), Set(TweetSafetyScores).asJava)
-  val TOXICITY_SCORE =
-    new Continuous(name("timelines.earlybird.toxicity_score"), Set(TweetSafetyScores).asJava)
-  val EXPERIMENTAL_HEALTH_MODEL_SCORE_1 =
-    new Continuous(
-      name("timelines.earlybird.experimental_health_model_score_1"),
-      Set(TweetSafetyScores).asJava)
-  val EXPERIMENTAL_HEALTH_MODEL_SCORE_2 =
-    new Continuous(
-      name("timelines.earlybird.experimental_health_model_score_2"),
-      Set(TweetSafetyScores).asJava)
-  val EXPERIMENTAL_HEALTH_MODEL_SCORE_3 =
-    new Continuous(
-      name("timelines.earlybird.experimental_health_model_score_3"),
-      Set(TweetSafetyScores).asJava)
-  val EXPERIMENTAL_HEALTH_MODEL_SCORE_4 =
-    new Continuous(
-      name("timelines.earlybird.experimental_health_model_score_4"),
-      Set(TweetSafetyScores).asJava)
-  val PSPAMMY_TWEET_SCORE =
-    new Continuous(name("timelines.earlybird.pspammy_tweet_score"), Set(TweetSafetyScores).asJava)
-  val PREPORTED_TWEET_SCORE =
-    new Continuous(name("timelines.earlybird.preported_tweet_score"), Set(TweetSafetyScores).asJava)
-
-  // where record was displayed e.g. recap vs ranked timeline vs recycled
-  // (do NOT use for training in prediction, since this is set post-scoring)
-  // This differs from TimelinesSharedFeatures.INJECTION_TYPE, which is only
-  // set to Recap or Rectweet, and is available pre-scoring.
-  // This also differs from TimeFeatures.IS_TWEET_RECYCLED, which is set
-  // pre-scoring and indicates if a tweet is being considered for recycling.
-  // In contrast, DISPLAY_SUGGEST_TYPE == RecycledTweet means the tweet
-  // was actually served in a recycled tweet module. The two should currently
-  // have the same value, but need not in future, so please only use
-  // IS_TWEET_RECYCLED/CANDIDATE_TWEET_SOURCE_ID for training models and
-  // only use DISPLAY_SUGGEST_TYPE for offline analysis of tweets actually
-  // served in recycled modules.
-  val DISPLAY_SUGGEST_TYPE = new Discrete(name("recap.display.suggest_type"))
-
-  // Candidate tweet source id - related to DISPLAY_SUGGEST_TYPE above, but this is a
-  // property of the candidate rather than display location so is safe to use
-  // in model training, unlike DISPLAY_SUGGEST_TYPE.
-  val CANDIDATE_TWEET_SOURCE_ID =
-    new Discrete(name("timelines.meta.candidate_tweet_source_id"), Set(TweetId).asJava)
-
-  // Was at least 50% of this tweet in the user's viewport for at least 500 ms,
-  // OR did the user engage with the tweet publicly or privately
-  val IS_LINGER_IMPRESSION =
-    new Binary(name("timelines.engagement.is_linger_impression"), Set(EngagementsPrivate).asJava)
-
-  // Features to create rollups
-  val LANGUAGE_GROUP = new Discrete(name("timelines.tweet.text.language_group"))
-
-  // The final position index of the tweet being trained on in the timeline
-  // served from TLM (could still change later in TLS-API), as recorded by
-  // PositionIndexLoggingEnvelopeTransform.
-  val FINAL_POSITION_INDEX = new Discrete(name("timelines.display.final_position_index"))
-
-  // The traceId of the timeline request, can be used to group tweets in the same response.
-  val TRACE_ID = new Discrete(name("timelines.display.trace_id"), Set(TfeTransactionId).asJava)
-
-  // Whether this tweet was randomly injected into the timeline or not, for exploration purposes
-  val IS_RANDOM_TWEET = new Binary(name("timelines.display.is_random_tweet"))
-
-  //  Whether this tweet was reordered with softmax ranking for explore/exploit, and needs to
-  //  be excluded from exploit only holdback
-  val IS_SOFTMAX_RANKING_TWEET = new Binary(name("timelines.display.is_softmax_ranking_tweet"))
-
-  // Whether the user viewing the tweet has disabled ranked timeline.
-  val IS_RANKED_TIMELINE_DISABLER = new Binary(
-    name("timelines.user_features.is_ranked_timeline_disabler"),
-    Set(AnnotationValue, GeneralSettings).asJava)
-
-  // Whether the user viewing the tweet was one of those released from DDG 4205 control
-  // as part of http://go/shrink-4205 process to shrink the quality features holdback.
-  val IS_USER_RELEASED_FROM_QUALITY_HOLDBACK = new Binary(
-    name("timelines.user_features.is_released_from_quality_holdback"),
-    Set(ExperimentId, ExperimentName).asJava)
-
-  val INITIAL_PREDICTION_FAV =
-    new Continuous(name("timelines.initial_prediction.fav"), Set(EngagementScore).asJava)
-  val INITIAL_PREDICTION_RETWEET =
-    new Continuous(name("timelines.initial_prediction.retweet"), Set(EngagementScore).asJava)
-  val INITIAL_PREDICTION_REPLY =
-    new Continuous(name("timelines.initial_prediction.reply"), Set(EngagementScore).asJava)
-  val INITIAL_PREDICTION_OPEN_LINK =
-    new Continuous(name("timelines.initial_prediction.open_link"), Set(EngagementScore).asJava)
-  val INITIAL_PREDICTION_PROFILE_CLICK =
-    new Continuous(name("timelines.initial_prediction.profile_click"), Set(EngagementScore).asJava)
-  val INITIAL_PREDICTION_VIDEO_PLAYBACK_50 = new Continuous(
-    name("timelines.initial_prediction.video_playback_50"),
-    Set(EngagementScore).asJava)
-  val INITIAL_PREDICTION_DETAIL_EXPAND =
-    new Continuous(name("timelines.initial_prediction.detail_expand"), Set(EngagementScore).asJava)
-  val INITIAL_PREDICTION_PHOTO_EXPAND =
-    new Continuous(name("timelines.initial_prediction.photo_expand"), Set(EngagementScore).asJava)
-
-  val VIEWER_FOLLOWS_ORIGINAL_AUTHOR =
-    new Binary(name("timelines.viewer_follows_original_author"), Set(Follow).asJava)
-
-  val IS_TOP_ONE = new Binary(name("timelines.position.is_top_one"))
-  val IS_TOP_FIVE =
-    new Binary(name(featureName = "timelines.position.is_top_five"))
-  val IS_TOP_TEN =
-    new Binary(name(featureName = "timelines.position.is_top_ten"))
-
-  val LOG_POSITION =
-    new Continuous(name(featureName = "timelines.position.log_10"))
-
-}
diff --git a/src/scala/com/twitter/timelines/prediction/features/engagement_features/BUILD b/src/scala/com/twitter/timelines/prediction/features/engagement_features/BUILD
deleted file mode 100644
index f6caadea0..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/engagement_features/BUILD
+++ /dev/null
@@ -1,12 +0,0 @@
-scala_library(
-    sources = ["*.scala"],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "src/java/com/twitter/ml/api:api-base",
-        "src/thrift/com/twitter/dal/personal_data:personal_data-java",
-        "src/thrift/com/twitter/timelineservice/server/suggests/features/engagement_features:thrift-scala",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-        "timelines/data_processing/ml_util/transforms",
-    ],
-)
diff --git a/src/scala/com/twitter/timelines/prediction/features/engagement_features/BUILD.docx b/src/scala/com/twitter/timelines/prediction/features/engagement_features/BUILD.docx
new file mode 100644
index 000000000..b1514452c
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/engagement_features/BUILD.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/engagement_features/EngagementFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/engagement_features/EngagementFeatures.docx
new file mode 100644
index 000000000..d53ff9087
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/engagement_features/EngagementFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/engagement_features/EngagementFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/engagement_features/EngagementFeatures.scala
deleted file mode 100644
index e65c9db20..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/engagement_features/EngagementFeatures.scala
+++ /dev/null
@@ -1,246 +0,0 @@
-package com.twitter.timelines.prediction.features.engagement_features
-
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
-import com.twitter.logging.Logger
-import com.twitter.ml.api.DataRecord
-import com.twitter.ml.api.Feature
-import com.twitter.ml.api.Feature.Continuous
-import com.twitter.ml.api.Feature.SparseBinary
-import com.twitter.timelines.data_processing.ml_util.transforms.OneToSomeTransform
-import com.twitter.timelines.data_processing.ml_util.transforms.RichITransform
-import com.twitter.timelines.data_processing.ml_util.transforms.SparseBinaryUnion
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup
-import com.twitter.timelineservice.suggests.features.engagement_features.thriftscala.{
-  EngagementFeatures => ThriftEngagementFeatures
-}
-import com.twitter.timelineservice.suggests.features.engagement_features.v1.thriftscala.{
-  EngagementFeatures => ThriftEngagementFeaturesV1
-}
-import scala.collection.JavaConverters._
-
-object EngagementFeatures {
-  private[this] val logger = Logger.get(getClass.getSimpleName)
-
-  sealed trait EngagementFeature
-  case object Count extends EngagementFeature
-  case object RealGraphWeightAverage extends EngagementFeature
-  case object RealGraphWeightMax extends EngagementFeature
-  case object RealGraphWeightMin extends EngagementFeature
-  case object RealGraphWeightMissing extends EngagementFeature
-  case object RealGraphWeightVariance extends EngagementFeature
-  case object UserIds extends EngagementFeature
-
-  def fromThrift(thriftEngagementFeatures: ThriftEngagementFeatures): Option[EngagementFeatures] = {
-    thriftEngagementFeatures match {
-      case thriftEngagementFeaturesV1: ThriftEngagementFeatures.V1 =>
-        Some(
-          EngagementFeatures(
-            favoritedBy = thriftEngagementFeaturesV1.v1.favoritedBy,
-            retweetedBy = thriftEngagementFeaturesV1.v1.retweetedBy,
-            repliedBy = thriftEngagementFeaturesV1.v1.repliedBy,
-          )
-        )
-      case _ => {
-        logger.error("Unexpected EngagementFeatures version found.")
-        None
-      }
-    }
-  }
-
-  val empty: EngagementFeatures = EngagementFeatures()
-}
-
-/**
- * Contains user IDs who have engaged with a target entity, such as a Tweet,
- * and any additional data needed for derived features.
- */
-case class EngagementFeatures(
-  favoritedBy: Seq[Long] = Nil,
-  retweetedBy: Seq[Long] = Nil,
-  repliedBy: Seq[Long] = Nil,
-  realGraphWeightByUser: Map[Long, Double] = Map.empty) {
-  def isEmpty: Boolean = favoritedBy.isEmpty && retweetedBy.isEmpty && repliedBy.isEmpty
-  def nonEmpty: Boolean = !isEmpty
-  def toLogThrift: ThriftEngagementFeatures.V1 =
-    ThriftEngagementFeatures.V1(
-      ThriftEngagementFeaturesV1(
-        favoritedBy = favoritedBy,
-        retweetedBy = retweetedBy,
-        repliedBy = repliedBy
-      )
-    )
-}
-
-/**
- * Represents engagement features derived from the Real Graph weight.
- *
- * These features are from the perspective of the source user, who is viewing their
- * timeline, to the destination users (or user), who created engagements.
- *
- * @param count number of engagements present
- * @param max max score of the engaging users
- * @param mean average score of the engaging users
- * @param min minimum score of the engaging users
- * @param missing for engagements present, how many Real Graph scores were missing
- * @param variance variance of scores of the engaging users
- */
-case class RealGraphDerivedEngagementFeatures(
-  count: Int,
-  max: Double,
-  mean: Double,
-  min: Double,
-  missing: Int,
-  variance: Double)
-
-object EngagementDataRecordFeatures {
-  import EngagementFeatures._
-
-  val FavoritedByUserIds = new SparseBinary(
-    "engagement_features.user_ids.favorited_by",
-    Set(UserId, PrivateLikes, PublicLikes).asJava)
-  val RetweetedByUserIds = new SparseBinary(
-    "engagement_features.user_ids.retweeted_by",
-    Set(UserId, PrivateRetweets, PublicRetweets).asJava)
-  val RepliedByUserIds = new SparseBinary(
-    "engagement_features.user_ids.replied_by",
-    Set(UserId, PrivateReplies, PublicReplies).asJava)
-
-  val InNetworkFavoritesCount = new Continuous(
-    "engagement_features.in_network.favorites.count",
-    Set(CountOfPrivateLikes, CountOfPublicLikes).asJava)
-  val InNetworkRetweetsCount = new Continuous(
-    "engagement_features.in_network.retweets.count",
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
-  val InNetworkRepliesCount = new Continuous(
-    "engagement_features.in_network.replies.count",
-    Set(CountOfPrivateReplies, CountOfPublicReplies).asJava)
-
-  // real graph derived features
-  val InNetworkFavoritesAvgRealGraphWeight = new Continuous(
-    "engagement_features.real_graph.favorites.avg_weight",
-    Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
-  )
-  val InNetworkFavoritesMaxRealGraphWeight = new Continuous(
-    "engagement_features.real_graph.favorites.max_weight",
-    Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
-  )
-  val InNetworkFavoritesMinRealGraphWeight = new Continuous(
-    "engagement_features.real_graph.favorites.min_weight",
-    Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
-  )
-  val InNetworkFavoritesRealGraphWeightMissing = new Continuous(
-    "engagement_features.real_graph.favorites.missing"
-  )
-  val InNetworkFavoritesRealGraphWeightVariance = new Continuous(
-    "engagement_features.real_graph.favorites.weight_variance"
-  )
-
-  val InNetworkRetweetsMaxRealGraphWeight = new Continuous(
-    "engagement_features.real_graph.retweets.max_weight",
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
-  )
-  val InNetworkRetweetsMinRealGraphWeight = new Continuous(
-    "engagement_features.real_graph.retweets.min_weight",
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
-  )
-  val InNetworkRetweetsAvgRealGraphWeight = new Continuous(
-    "engagement_features.real_graph.retweets.avg_weight",
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
-  )
-  val InNetworkRetweetsRealGraphWeightMissing = new Continuous(
-    "engagement_features.real_graph.retweets.missing"
-  )
-  val InNetworkRetweetsRealGraphWeightVariance = new Continuous(
-    "engagement_features.real_graph.retweets.weight_variance"
-  )
-
-  val InNetworkRepliesMaxRealGraphWeight = new Continuous(
-    "engagement_features.real_graph.replies.max_weight",
-    Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
-  )
-  val InNetworkRepliesMinRealGraphWeight = new Continuous(
-    "engagement_features.real_graph.replies.min_weight",
-    Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
-  )
-  val InNetworkRepliesAvgRealGraphWeight = new Continuous(
-    "engagement_features.real_graph.replies.avg_weight",
-    Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
-  )
-  val InNetworkRepliesRealGraphWeightMissing = new Continuous(
-    "engagement_features.real_graph.replies.missing"
-  )
-  val InNetworkRepliesRealGraphWeightVariance = new Continuous(
-    "engagement_features.real_graph.replies.weight_variance"
-  )
-
-  sealed trait FeatureGroup {
-    def continuousFeatures: Map[EngagementFeature, Continuous]
-    def sparseBinaryFeatures: Map[EngagementFeature, SparseBinary]
-    def allFeatures: Seq[Feature[_]] =
-      (continuousFeatures.values ++ sparseBinaryFeatures.values).toSeq
-  }
-
-  case object Favorites extends FeatureGroup {
-    override val continuousFeatures: Map[EngagementFeature, Continuous] =
-      Map(
-        Count -> InNetworkFavoritesCount,
-        RealGraphWeightAverage -> InNetworkFavoritesAvgRealGraphWeight,
-        RealGraphWeightMax -> InNetworkFavoritesMaxRealGraphWeight,
-        RealGraphWeightMin -> InNetworkFavoritesMinRealGraphWeight,
-        RealGraphWeightMissing -> InNetworkFavoritesRealGraphWeightMissing,
-        RealGraphWeightVariance -> InNetworkFavoritesRealGraphWeightVariance
-      )
-
-    override val sparseBinaryFeatures: Map[EngagementFeature, SparseBinary] =
-      Map(UserIds -> FavoritedByUserIds)
-  }
-
-  case object Retweets extends FeatureGroup {
-    override val continuousFeatures: Map[EngagementFeature, Continuous] =
-      Map(
-        Count -> InNetworkRetweetsCount,
-        RealGraphWeightAverage -> InNetworkRetweetsAvgRealGraphWeight,
-        RealGraphWeightMax -> InNetworkRetweetsMaxRealGraphWeight,
-        RealGraphWeightMin -> InNetworkRetweetsMinRealGraphWeight,
-        RealGraphWeightMissing -> InNetworkRetweetsRealGraphWeightMissing,
-        RealGraphWeightVariance -> InNetworkRetweetsRealGraphWeightVariance
-      )
-
-    override val sparseBinaryFeatures: Map[EngagementFeature, SparseBinary] =
-      Map(UserIds -> RetweetedByUserIds)
-  }
-
-  case object Replies extends FeatureGroup {
-    override val continuousFeatures: Map[EngagementFeature, Continuous] =
-      Map(
-        Count -> InNetworkRepliesCount,
-        RealGraphWeightAverage -> InNetworkRepliesAvgRealGraphWeight,
-        RealGraphWeightMax -> InNetworkRepliesMaxRealGraphWeight,
-        RealGraphWeightMin -> InNetworkRepliesMinRealGraphWeight,
-        RealGraphWeightMissing -> InNetworkRepliesRealGraphWeightMissing,
-        RealGraphWeightVariance -> InNetworkRepliesRealGraphWeightVariance
-      )
-
-    override val sparseBinaryFeatures: Map[EngagementFeature, SparseBinary] =
-      Map(UserIds -> RepliedByUserIds)
-  }
-
-  val PublicEngagerSets = Set(FavoritedByUserIds, RetweetedByUserIds, RepliedByUserIds)
-  val PublicEngagementUserIds = new SparseBinary(
-    "engagement_features.user_ids.public",
-    Set(UserId, EngagementsPublic).asJava
-  )
-  val ENGAGER_ID = TypedAggregateGroup.sparseFeature(PublicEngagementUserIds)
-
-  val UnifyPublicEngagersTransform = SparseBinaryUnion(
-    featuresToUnify = PublicEngagerSets,
-    outputFeature = PublicEngagementUserIds
-  )
-
-  object RichUnifyPublicEngagersTransform extends OneToSomeTransform {
-    override def apply(dataRecord: DataRecord): Option[DataRecord] =
-      RichITransform(EngagementDataRecordFeatures.UnifyPublicEngagersTransform)(dataRecord)
-    override def featuresToTransform: Set[Feature[_]] =
-      EngagementDataRecordFeatures.UnifyPublicEngagersTransform.featuresToUnify.toSet
-  }
-}
diff --git a/src/scala/com/twitter/timelines/prediction/features/escherbird/BUILD b/src/scala/com/twitter/timelines/prediction/features/escherbird/BUILD
deleted file mode 100644
index c28786b77..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/escherbird/BUILD
+++ /dev/null
@@ -1,19 +0,0 @@
-scala_library(
-    sources = ["*.scala"],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "src/java/com/twitter/ml/api:api-base",
-        "src/thrift/com/twitter/tweetypie:tweet-scala",
-    ],
-)
-
-scala_library(
-    name = "escherbird-features",
-    sources = ["EscherbirdFeatures.scala"],
-    tags = ["bazel-only"],
-    dependencies = [
-        "src/java/com/twitter/ml/api:api-base",
-        "src/thrift/com/twitter/dal/personal_data:personal_data-java",
-    ],
-)
diff --git a/src/scala/com/twitter/timelines/prediction/features/escherbird/BUILD.docx b/src/scala/com/twitter/timelines/prediction/features/escherbird/BUILD.docx
new file mode 100644
index 000000000..1e0d74f5f
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/escherbird/BUILD.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/escherbird/EscherbirdFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/escherbird/EscherbirdFeatures.docx
new file mode 100644
index 000000000..4c5e192bc
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/escherbird/EscherbirdFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/escherbird/EscherbirdFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/escherbird/EscherbirdFeatures.scala
deleted file mode 100644
index 3aaf9b856..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/escherbird/EscherbirdFeatures.scala
+++ /dev/null
@@ -1,19 +0,0 @@
-package com.twitter.timelines.prediction.features.escherbird
-
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
-import com.twitter.ml.api.Feature
-import java.util.{Set => JSet}
-import scala.collection.JavaConverters._
-
-object EscherbirdFeatures {
-  val TweetGroupIds = new Feature.SparseBinary("escherbird.tweet_group_ids")
-  val TweetDomainIds = new Feature.SparseBinary("escherbird.tweet_domain_ids", Set(DomainId).asJava)
-  val TweetEntityIds =
-    new Feature.SparseBinary("escherbird.tweet_entity_ids", Set(SemanticcoreClassification).asJava)
-}
-
-case class EscherbirdFeatures(
-  tweetId: Long,
-  tweetGroupIds: JSet[String],
-  tweetDomainIds: JSet[String],
-  tweetEntityIds: JSet[String])
diff --git a/src/scala/com/twitter/timelines/prediction/features/escherbird/EscherbirdFeaturesConverter.docx b/src/scala/com/twitter/timelines/prediction/features/escherbird/EscherbirdFeaturesConverter.docx
new file mode 100644
index 000000000..ed3ea3ceb
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/escherbird/EscherbirdFeaturesConverter.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/escherbird/EscherbirdFeaturesConverter.scala b/src/scala/com/twitter/timelines/prediction/features/escherbird/EscherbirdFeaturesConverter.scala
deleted file mode 100644
index bd3333a03..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/escherbird/EscherbirdFeaturesConverter.scala
+++ /dev/null
@@ -1,19 +0,0 @@
-package com.twitter.timelines.prediction.features.escherbird
-
-import com.twitter.tweetypie.thriftscala.Tweet
-import scala.collection.JavaConverters._
-
-object EscherbirdFeaturesConverter {
-  val DeprecatedOrTestDomains = Set(1L, 5L, 7L, 9L, 14L, 19L, 20L, 31L)
-
-  def fromTweet(tweet: Tweet): Option[EscherbirdFeatures] = tweet.escherbirdEntityAnnotations.map {
-    escherbirdEntityAnnotations =>
-      val annotations = escherbirdEntityAnnotations.entityAnnotations
-        .filterNot(annotation => DeprecatedOrTestDomains.contains(annotation.domainId))
-      val tweetGroupIds = annotations.map(_.groupId.toString).toSet.asJava
-      val tweetDomainIds = annotations.map(_.domainId.toString).toSet.asJava
-      // An entity is only unique within a given domain
-      val tweetEntityIds = annotations.map(a => s"${a.domainId}.${a.entityId}").toSet.asJava
-      EscherbirdFeatures(tweet.id, tweetGroupIds, tweetDomainIds, tweetEntityIds)
-  }
-}
diff --git a/src/scala/com/twitter/timelines/prediction/features/followsource/BUILD.bazel b/src/scala/com/twitter/timelines/prediction/features/followsource/BUILD.bazel
deleted file mode 100644
index 0ee33acdb..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/followsource/BUILD.bazel
+++ /dev/null
@@ -1,7 +0,0 @@
-scala_library(
-    sources = ["*.scala"],
-    dependencies = [
-        "src/java/com/twitter/ml/api:api-base",
-        "src/thrift/com/twitter/dal/personal_data:personal_data-java",
-    ],
-)
diff --git a/src/scala/com/twitter/timelines/prediction/features/followsource/BUILD.docx b/src/scala/com/twitter/timelines/prediction/features/followsource/BUILD.docx
new file mode 100644
index 000000000..4cf9ff82a
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/followsource/BUILD.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/followsource/FollowSourceFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/followsource/FollowSourceFeatures.docx
new file mode 100644
index 000000000..288af8c6a
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/followsource/FollowSourceFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/followsource/FollowSourceFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/followsource/FollowSourceFeatures.scala
deleted file mode 100644
index 012103b14..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/followsource/FollowSourceFeatures.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-package com.twitter.timelines.prediction.features.followsource
-
-import com.twitter.ml.api.Feature
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
-import scala.collection.JavaConverters._
-
-object FollowSourceFeatures {
-
-  // Corresponds to an algorithm constant from com.twitter.hermit.profile.HermitProfileConstants
-  val FollowSourceAlgorithm = new Feature.Text("follow_source.algorithm")
-
-  // Type of follow action: one of "unfollow", "follow", "follow_back", "follow_many", "follow_all"
-  val FollowAction = new Feature.Text(
-    "follow_source.action",
-    Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
-
-  // Millisecond timestamp when follow occurred
-  val FollowTimestamp =
-    new Feature.Discrete("follow_source.follow_timestamp", Set(Follow, PrivateTimestamp).asJava)
-
-  // Age of follow (in minutes)
-  val FollowAgeMinutes =
-    new Feature.Continuous("follow_source.follow_age_minutes", Set(Follow).asJava)
-
-  // Tweet ID of tweet details page from where follow happened (if applicable)
-  val FollowCauseTweetId = new Feature.Discrete("follow_source.cause_tweet_id", Set(TweetId).asJava)
-
-  // String representation of follow client (android, web, iphone, etc). Derived from "client"
-  // portion of client event namespace.
-  val FollowClientId = new Feature.Text("follow_source.client_id", Set(ClientType).asJava)
-
-  // If the follow happens via a profile's Following or Followers,
-  // the id of the profile owner is recorded here.
-  val FollowAssociationId =
-    new Feature.Discrete("follow_source.association_id", Set(Follow, UserId).asJava)
-
-  // The "friendly name" here is computed using FollowSourceUtil.getSource. It represents
-  // a grouping on a few client events that reflect where the event occurred. For example,
-  // events on the tweet details page are grouped using "tweetDetails":
-  //   case (Some("web"), Some("permalink"), _, _, _) => "tweetDetails"
-  //   case (Some("iphone"), Some("tweet"), _, _, _) => "tweetDetails"
-  //   case (Some("android"), Some("tweet"), _, _, _) => "tweetDetails"
-  val FollowSourceFriendlyName = new Feature.Text("follow_source.friendly_name", Set(Follow).asJava)
-
-  // Up to two sources and actions that preceded the follow (for example, a profile visit
-  // through a mention click, which itself was on a tweet detail page reached through a tweet
-  // click in the Home tab). See go/followsource for more details and examples.
-  // The "source" here is computed using FollowSourceUtil.getSource
-  val PreFollowAction1 = new Feature.Text("follow_source.pre_follow_action_1", Set(Follow).asJava)
-  val PreFollowAction2 = new Feature.Text("follow_source.pre_follow_action_2", Set(Follow).asJava)
-  val PreFollowSource1 = new Feature.Text("follow_source.pre_follow_source_1", Set(Follow).asJava)
-  val PreFollowSource2 = new Feature.Text("follow_source.pre_follow_source_2", Set(Follow).asJava)
-}
diff --git a/src/scala/com/twitter/timelines/prediction/features/itl/BUILD b/src/scala/com/twitter/timelines/prediction/features/itl/BUILD
deleted file mode 100644
index 6fc497bf3..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/itl/BUILD
+++ /dev/null
@@ -1,9 +0,0 @@
-scala_library(
-    sources = ["*.scala"],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "src/java/com/twitter/ml/api:api-base",
-        "src/thrift/com/twitter/dal/personal_data:personal_data-java",
-    ],
-)
diff --git a/src/scala/com/twitter/timelines/prediction/features/itl/BUILD.docx b/src/scala/com/twitter/timelines/prediction/features/itl/BUILD.docx
new file mode 100644
index 000000000..7e4b99410
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/itl/BUILD.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/itl/ITLFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/itl/ITLFeatures.docx
new file mode 100644
index 000000000..693437c80
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/itl/ITLFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/itl/ITLFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/itl/ITLFeatures.scala
deleted file mode 100644
index 3351e5c11..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/itl/ITLFeatures.scala
+++ /dev/null
@@ -1,575 +0,0 @@
-package com.twitter.timelines.prediction.features.itl
-
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
-import com.twitter.ml.api.Feature.Binary
-import com.twitter.ml.api.Feature.Continuous
-import com.twitter.ml.api.Feature.Discrete
-import com.twitter.ml.api.Feature.SparseBinary
-import scala.collection.JavaConverters._
-
-object ITLFeatures {
-  // engagement
-  val IS_RETWEETED =
-    new Binary("itl.engagement.is_retweeted", Set(PublicRetweets, PrivateRetweets).asJava)
-  val IS_FAVORITED =
-    new Binary("itl.engagement.is_favorited", Set(PublicLikes, PrivateLikes).asJava)
-  val IS_REPLIED =
-    new Binary("itl.engagement.is_replied", Set(PublicReplies, PrivateReplies).asJava)
-  // v1: post click engagements: fav, reply
-  val IS_GOOD_CLICKED_CONVO_DESC_V1 = new Binary(
-    "itl.engagement.is_good_clicked_convo_desc_favorited_or_replied",
-    Set(
-      PublicLikes,
-      PrivateLikes,
-      PublicReplies,
-      PrivateReplies,
-      EngagementsPrivate,
-      EngagementsPublic).asJava)
-  // v2: post click engagements: click
-  val IS_GOOD_CLICKED_CONVO_DESC_V2 = new Binary(
-    "itl.engagement.is_good_clicked_convo_desc_v2",
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-
-  val IS_GOOD_CLICKED_CONVO_DESC_FAVORITED = new Binary(
-    "itl.engagement.is_good_clicked_convo_desc_favorited",
-    Set(PublicLikes, PrivateLikes).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_REPLIED = new Binary(
-    "itl.engagement.is_good_clicked_convo_desc_replied",
-    Set(PublicReplies, PrivateReplies).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_RETWEETED = new Binary(
-    "itl.engagement.is_good_clicked_convo_desc_retweeted",
-    Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_CLICKED = new Binary(
-    "itl.engagement.is_good_clicked_convo_desc_clicked",
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_FOLLOWED =
-    new Binary("itl.engagement.is_good_clicked_convo_desc_followed", Set(EngagementsPrivate).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_SHARE_DM_CLICKED = new Binary(
-    "itl.engagement.is_good_clicked_convo_desc_share_dm_clicked",
-    Set(EngagementsPrivate).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_PROFILE_CLICKED = new Binary(
-    "itl.engagement.is_good_clicked_convo_desc_profile_clicked",
-    Set(EngagementsPrivate).asJava)
-
-  val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_0 = new Binary(
-    "itl.engagement.is_good_clicked_convo_desc_uam_gt_0",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_1 = new Binary(
-    "itl.engagement.is_good_clicked_convo_desc_uam_gt_1",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_2 = new Binary(
-    "itl.engagement.is_good_clicked_convo_desc_uam_gt_2",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_3 = new Binary(
-    "itl.engagement.is_good_clicked_convo_desc_uam_gt_3",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-
-  val IS_TWEET_DETAIL_DWELLED = new Binary(
-    "itl.engagement.is_tweet_detail_dwelled",
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-
-  val IS_TWEET_DETAIL_DWELLED_8_SEC = new Binary(
-    "itl.engagement.is_tweet_detail_dwelled_8_sec",
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_TWEET_DETAIL_DWELLED_15_SEC = new Binary(
-    "itl.engagement.is_tweet_detail_dwelled_15_sec",
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_TWEET_DETAIL_DWELLED_25_SEC = new Binary(
-    "itl.engagement.is_tweet_detail_dwelled_25_sec",
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_TWEET_DETAIL_DWELLED_30_SEC = new Binary(
-    "itl.engagement.is_tweet_detail_dwelled_30_sec",
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-
-  val IS_PROFILE_DWELLED = new Binary(
-    "itl.engagement.is_profile_dwelled",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_PROFILE_DWELLED_10_SEC = new Binary(
-    "itl.engagement.is_profile_dwelled_10_sec",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_PROFILE_DWELLED_20_SEC = new Binary(
-    "itl.engagement.is_profile_dwelled_20_sec",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_PROFILE_DWELLED_30_SEC = new Binary(
-    "itl.engagement.is_profile_dwelled_30_sec",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-
-  val IS_FULLSCREEN_VIDEO_DWELLED = new Binary(
-    "itl.engagement.is_fullscreen_video_dwelled",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_FULLSCREEN_VIDEO_DWELLED_5_SEC = new Binary(
-    "itl.engagement.is_fullscreen_video_dwelled_5_sec",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_FULLSCREEN_VIDEO_DWELLED_10_SEC = new Binary(
-    "itl.engagement.is_fullscreen_video_dwelled_10_sec",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_FULLSCREEN_VIDEO_DWELLED_20_SEC = new Binary(
-    "itl.engagement.is_fullscreen_video_dwelled_20_sec",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_FULLSCREEN_VIDEO_DWELLED_30_SEC = new Binary(
-    "itl.engagement.is_fullscreen_video_dwelled_30_sec",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_LINK_DWELLED_15_SEC = new Binary(
-    "itl.engagement.is_link_dwelled_15_sec",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_LINK_DWELLED_30_SEC = new Binary(
-    "itl.engagement.is_link_dwelled_30_sec",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_LINK_DWELLED_60_SEC = new Binary(
-    "itl.engagement.is_link_dwelled_60_sec",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_QUOTED =
-    new Binary("itl.engagement.is_quoted", Set(PublicRetweets, PrivateRetweets).asJava)
-  val IS_RETWEETED_WITHOUT_QUOTE = new Binary(
-    "itl.engagement.is_retweeted_without_quote",
-    Set(PublicRetweets, PrivateRetweets).asJava)
-  val IS_CLICKED = new Binary(
-    "itl.engagement.is_clicked",
-    Set(EngagementsPrivate, TweetsClicked, LinksClickedOn).asJava)
-  val IS_PROFILE_CLICKED = new Binary(
-    "itl.engagement.is_profile_clicked",
-    Set(EngagementsPrivate, TweetsClicked, ProfilesViewed, ProfilesClicked).asJava)
-  val IS_DWELLED = new Binary("itl.engagement.is_dwelled", Set(EngagementsPrivate).asJava)
-  val IS_DWELLED_IN_BOUNDS_V1 =
-    new Binary("itl.engagement.is_dwelled_in_bounds_v1", Set(EngagementsPrivate).asJava)
-  val DWELL_NORMALIZED_OVERALL =
-    new Continuous("itl.engagement.dwell_normalized_overall", Set(EngagementsPrivate).asJava)
-  val DWELL_CDF_OVERALL =
-    new Continuous("itl.engagement.dwell_cdf_overall", Set(EngagementsPrivate).asJava)
-  val DWELL_CDF = new Continuous("itl.engagement.dwell_cdf", Set(EngagementsPrivate).asJava)
-
-  val IS_DWELLED_1S = new Binary("itl.engagement.is_dwelled_1s", Set(EngagementsPrivate).asJava)
-  val IS_DWELLED_2S = new Binary("itl.engagement.is_dwelled_2s", Set(EngagementsPrivate).asJava)
-  val IS_DWELLED_3S = new Binary("itl.engagement.is_dwelled_3s", Set(EngagementsPrivate).asJava)
-  val IS_DWELLED_4S = new Binary("itl.engagement.is_dwelled_4s", Set(EngagementsPrivate).asJava)
-  val IS_DWELLED_5S = new Binary("itl.engagement.is_dwelled_5s", Set(EngagementsPrivate).asJava)
-  val IS_DWELLED_6S = new Binary("itl.engagement.is_dwelled_6s", Set(EngagementsPrivate).asJava)
-  val IS_DWELLED_7S = new Binary("itl.engagement.is_dwelled_7s", Set(EngagementsPrivate).asJava)
-  val IS_DWELLED_8S = new Binary("itl.engagement.is_dwelled_8s", Set(EngagementsPrivate).asJava)
-  val IS_DWELLED_9S = new Binary("itl.engagement.is_dwelled_9s", Set(EngagementsPrivate).asJava)
-  val IS_DWELLED_10S = new Binary("itl.engagement.is_dwelled_10s", Set(EngagementsPrivate).asJava)
-
-  val IS_SKIPPED_1S = new Binary("itl.engagement.is_skipped_1s", Set(EngagementsPrivate).asJava)
-  val IS_SKIPPED_2S = new Binary("itl.engagement.is_skipped_2s", Set(EngagementsPrivate).asJava)
-  val IS_SKIPPED_3S = new Binary("itl.engagement.is_skipped_3s", Set(EngagementsPrivate).asJava)
-  val IS_SKIPPED_4S = new Binary("itl.engagement.is_skipped_4s", Set(EngagementsPrivate).asJava)
-  val IS_SKIPPED_5S = new Binary("itl.engagement.is_skipped_5s", Set(EngagementsPrivate).asJava)
-  val IS_SKIPPED_6S = new Binary("itl.engagement.is_skipped_6s", Set(EngagementsPrivate).asJava)
-  val IS_SKIPPED_7S = new Binary("itl.engagement.is_skipped_7s", Set(EngagementsPrivate).asJava)
-  val IS_SKIPPED_8S = new Binary("itl.engagement.is_skipped_8s", Set(EngagementsPrivate).asJava)
-  val IS_SKIPPED_9S = new Binary("itl.engagement.is_skipped_9s", Set(EngagementsPrivate).asJava)
-  val IS_SKIPPED_10S = new Binary("itl.engagement.is_skipped_10s", Set(EngagementsPrivate).asJava)
-
-  val IS_FOLLOWED =
-    new Binary("itl.engagement.is_followed", Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_IMPRESSED = new Binary("itl.engagement.is_impressed", Set(EngagementsPrivate).asJava)
-  val IS_OPEN_LINKED =
-    new Binary("itl.engagement.is_open_linked", Set(EngagementsPrivate, LinksClickedOn).asJava)
-  val IS_PHOTO_EXPANDED = new Binary(
-    "itl.engagement.is_photo_expanded",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_VIDEO_VIEWED =
-    new Binary("itl.engagement.is_video_viewed", Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_VIDEO_PLAYBACK_50 = new Binary(
-    "itl.engagement.is_video_playback_50",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_VIDEO_QUALITY_VIEWED = new Binary(
-    "itl.engagement.is_video_quality_viewed",
-    Set(EngagementsPrivate, EngagementsPublic).asJava
-  ) 
-  val IS_BOOKMARKED =
-    new Binary("itl.engagement.is_bookmarked", Set(EngagementsPrivate).asJava)
-  val IS_SHARED =
-    new Binary("itl.engagement.is_shared", Set(EngagementsPrivate).asJava)
-  val IS_SHARE_MENU_CLICKED =
-    new Binary("itl.engagement.is_share_menu_clicked", Set(EngagementsPrivate).asJava)
-
-  // Negative engagements
-  val IS_DONT_LIKE =
-    new Binary("itl.engagement.is_dont_like", Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_BLOCK_CLICKED = new Binary(
-    "itl.engagement.is_block_clicked",
-    Set(TweetsClicked, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_BLOCK_DIALOG_BLOCKED = new Binary(
-    "itl.engagement.is_block_dialog_blocked",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_MUTE_CLICKED =
-    new Binary("itl.engagement.is_mute_clicked", Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_MUTE_DIALOG_MUTED =
-    new Binary("itl.engagement.is_mute_dialog_muted", Set(EngagementsPrivate).asJava)
-  val IS_REPORT_TWEET_CLICKED = new Binary(
-    "itl.engagement.is_report_tweet_clicked",
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_CARET_CLICKED =
-    new Binary("itl.engagement.is_caret_clicked", Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_NOT_ABOUT_TOPIC =
-    new Binary("itl.engagement.is_not_about_topic", Set(EngagementsPrivate).asJava)
-  val IS_NOT_RECENT =
-    new Binary("itl.engagement.is_not_recent", Set(EngagementsPrivate).asJava)
-  val IS_NOT_RELEVANT =
-    new Binary("itl.engagement.is_not_relevant", Set(EngagementsPrivate).asJava)
-  val IS_SEE_FEWER =
-    new Binary("itl.engagement.is_see_fewer", Set(EngagementsPrivate).asJava)
-  val IS_UNFOLLOW_TOPIC =
-    new Binary("itl.engagement.is_unfollow_topic", Set(EngagementsPrivate).asJava)
-  val IS_FOLLOW_TOPIC =
-    new Binary("itl.engagement.is_follow_topic", Set(EngagementsPrivate).asJava)
-  val IS_NOT_INTERESTED_IN_TOPIC =
-    new Binary("itl.engagement.is_not_interested_in_topic", Set(EngagementsPrivate).asJava)
-  val IS_HOME_LATEST_VISITED =
-    new Binary("itl.engagement.is_home_latest_visited", Set(EngagementsPrivate).asJava)
-
-  // This derived label is the logical OR of IS_DONT_LIKE, IS_BLOCK_CLICKED, IS_MUTE_CLICKED and IS_REPORT_TWEET_CLICKED
-  val IS_NEGATIVE_FEEDBACK =
-    new Binary("itl.engagement.is_negative_feedback", Set(EngagementsPrivate).asJava)
-
-  // Reciprocal engagements for reply forward engagement
-  val IS_REPLIED_REPLY_IMPRESSED_BY_AUTHOR = new Binary(
-    "itl.engagement.is_replied_reply_impressed_by_author",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_REPLIED_REPLY_FAVORITED_BY_AUTHOR = new Binary(
-    "itl.engagement.is_replied_reply_favorited_by_author",
-    Set(PublicLikes, PrivateLikes, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_REPLIED_REPLY_QUOTED_BY_AUTHOR = new Binary(
-    "itl.engagement.is_replied_reply_quoted_by_author",
-    Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_REPLIED_REPLY_REPLIED_BY_AUTHOR = new Binary(
-    "itl.engagement.is_replied_reply_replied_by_author",
-    Set(PublicReplies, PrivateReplies, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_REPLIED_REPLY_RETWEETED_BY_AUTHOR = new Binary(
-    "itl.engagement.is_replied_reply_retweeted_by_author",
-    Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_REPLIED_REPLY_BLOCKED_BY_AUTHOR = new Binary(
-    "itl.engagement.is_replied_reply_blocked_by_author",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_REPLIED_REPLY_FOLLOWED_BY_AUTHOR = new Binary(
-    "itl.engagement.is_replied_reply_followed_by_author",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_REPLIED_REPLY_UNFOLLOWED_BY_AUTHOR = new Binary(
-    "itl.engagement.is_replied_reply_unfollowed_by_author",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_REPLIED_REPLY_MUTED_BY_AUTHOR = new Binary(
-    "itl.engagement.is_replied_reply_muted_by_author",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_REPLIED_REPLY_REPORTED_BY_AUTHOR = new Binary(
-    "itl.engagement.is_replied_reply_reported_by_author",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-
-  // This derived label is the logical OR of REPLY_REPLIED, REPLY_FAVORITED, REPLY_RETWEETED
-  val IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR = new Binary(
-    "itl.engagement.is_replied_reply_engaged_by_author",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-
-  // Reciprocal engagements for fav forward engagement
-  val IS_FAVORITED_FAV_FAVORITED_BY_AUTHOR = new Binary(
-    "itl.engagement.is_favorited_fav_favorited_by_author",
-    Set(EngagementsPrivate, EngagementsPublic, PrivateLikes, PublicLikes).asJava
-  )
-  val IS_FAVORITED_FAV_REPLIED_BY_AUTHOR = new Binary(
-    "itl.engagement.is_favorited_fav_replied_by_author",
-    Set(EngagementsPrivate, EngagementsPublic, PrivateReplies, PublicReplies).asJava
-  )
-  val IS_FAVORITED_FAV_RETWEETED_BY_AUTHOR = new Binary(
-    "itl.engagement.is_favorited_fav_retweeted_by_author",
-    Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava
-  )
-  val IS_FAVORITED_FAV_FOLLOWED_BY_AUTHOR = new Binary(
-    "itl.engagement.is_favorited_fav_followed_by_author",
-    Set(EngagementsPrivate, EngagementsPublic).asJava
-  )
-  // This derived label is the logical OR of FAV_REPLIED, FAV_FAVORITED, FAV_RETWEETED, FAV_FOLLOWED
-  val IS_FAVORITED_FAV_ENGAGED_BY_AUTHOR = new Binary(
-    "itl.engagement.is_favorited_fav_engaged_by_author",
-    Set(EngagementsPrivate, EngagementsPublic).asJava
-  )
-
-  // define good profile click by considering following engagements (follow, fav, reply, retweet, etc.) at profile page
-  val IS_PROFILE_CLICKED_AND_PROFILE_FOLLOW = new Binary(
-    "itl.engagement.is_profile_clicked_and_profile_follow",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, Follow).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_FAV = new Binary(
-    "itl.engagement.is_profile_clicked_and_profile_fav",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, PrivateLikes, PublicLikes).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_REPLY = new Binary(
-    "itl.engagement.is_profile_clicked_and_profile_reply",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, PrivateReplies, PublicReplies).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_RETWEET = new Binary(
-    "itl.engagement.is_profile_clicked_and_profile_retweet",
-    Set(
-      ProfilesViewed,
-      ProfilesClicked,
-      EngagementsPrivate,
-      PrivateRetweets,
-      PublicRetweets).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_TWEET_CLICK = new Binary(
-    "itl.engagement.is_profile_clicked_and_profile_tweet_click",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, TweetsClicked).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_SHARE_DM_CLICK = new Binary(
-    "itl.engagement.is_profile_clicked_and_profile_share_dm_click",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  // This derived label is the union of all binary features above
-  val IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED = new Binary(
-    "itl.engagement.is_profile_clicked_and_profile_engaged",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, EngagementsPublic).asJava)
-
-  // define bad profile click by considering following engagements (user report, tweet report, mute, block, etc) at profile page
-  val IS_PROFILE_CLICKED_AND_PROFILE_USER_REPORT_CLICK = new Binary(
-    "itl.engagement.is_profile_clicked_and_profile_user_report_click",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_TWEET_REPORT_CLICK = new Binary(
-    "itl.engagement.is_profile_clicked_and_profile_tweet_report_click",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_MUTE = new Binary(
-    "itl.engagement.is_profile_clicked_and_profile_mute",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_BLOCK = new Binary(
-    "itl.engagement.is_profile_clicked_and_profile_block",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  // This derived label is the union of bad profile click engagements and existing negative feedback
-  val IS_NEGATIVE_FEEDBACK_V2 = new Binary(
-    "itl.engagement.is_negative_feedback_v2",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  // engagement for following user from any surface area
-  val IS_FOLLOWED_FROM_ANY_SURFACE_AREA = new Binary(
-    "itl.engagement.is_followed_from_any_surface_area",
-    Set(EngagementsPublic, EngagementsPrivate).asJava)
-
-  // Relevance prompt tweet engagements
-  val IS_RELEVANCE_PROMPT_YES_CLICKED =
-    new Binary("itl.engagement.is_relevance_prompt_yes_clicked", Set(EngagementsPrivate).asJava)
-
-  // Reply downvote engagements
-  val IS_REPLY_DOWNVOTED =
-    new Binary("itl.engagement.is_reply_downvoted", Set(EngagementsPrivate).asJava)
-  val IS_REPLY_DOWNVOTE_REMOVED =
-    new Binary("itl.engagement.is_reply_downvote_removed", Set(EngagementsPrivate).asJava)
-
-  // features from RecommendedTweet
-  val RECTWEET_SCORE = new Continuous("itl.recommended_tweet_features.rectweet_score")
-  val NUM_FAVORITING_USERS = new Continuous("itl.recommended_tweet_features.num_favoriting_users")
-  val NUM_FOLLOWING_USERS = new Continuous("itl.recommended_tweet_features.num_following_users")
-  val CONTENT_SOURCE_TYPE = new Discrete("itl.recommended_tweet_features.content_source_type")
-
-  val RECOS_SCORE = new Continuous(
-    "itl.recommended_tweet_features.recos_score",
-    Set(EngagementScore, UsersRealGraphScore, UsersSalsaScore).asJava)
-  val AUTHOR_REALGRAPH_SCORE = new Continuous(
-    "itl.recommended_tweet_features.realgraph_score",
-    Set(UsersRealGraphScore).asJava)
-  val AUTHOR_SARUS_SCORE = new Continuous(
-    "itl.recommended_tweet_features.sarus_score",
-    Set(EngagementScore, UsersSalsaScore).asJava)
-
-  val NUM_INTERACTING_USERS = new Continuous(
-    "itl.recommended_tweet_features.num_interacting_users",
-    Set(EngagementScore).asJava
-  )
-  val MAX_REALGRAPH_SCORE_OF_INTERACTING_USERS = new Continuous(
-    "itl.recommended_tweet_features.max_realgraph_score_of_interacting_users",
-    Set(UsersRealGraphScore, EngagementScore).asJava
-  )
-  val SUM_REALGRAPH_SCORE_OF_INTERACTING_USERS = new Continuous(
-    "itl.recommended_tweet_features.sum_realgraph_score_of_interacting_users",
-    Set(UsersRealGraphScore, EngagementScore).asJava
-  )
-  val AVG_REALGRAPH_SCORE_OF_INTERACTING_USERS = new Continuous(
-    "itl.recommended_tweet_features.avg_realgraph_score_of_interacting_users",
-    Set(UsersRealGraphScore, EngagementScore).asJava
-  )
-  val MAX_SARUS_SCORE_OF_INTERACTING_USERS = new Continuous(
-    "itl.recommended_tweet_features.max_sarus_score_of_interacting_users",
-    Set(EngagementScore, UsersSalsaScore).asJava
-  )
-  val SUM_SARUS_SCORE_OF_INTERACTING_USERS = new Continuous(
-    "itl.recommended_tweet_features.sum_sarus_score_of_interacting_users",
-    Set(EngagementScore, UsersSalsaScore).asJava
-  )
-  val AVG_SARUS_SCORE_OF_INTERACTING_USERS = new Continuous(
-    "itl.recommended_tweet_features.avg_sarus_score_of_interacting_users",
-    Set(EngagementScore, UsersSalsaScore).asJava
-  )
-
-  val NUM_INTERACTING_FOLLOWINGS = new Continuous(
-    "itl.recommended_tweet_features.num_interacting_followings",
-    Set(EngagementScore).asJava
-  )
-
-  // features from HydratedTweetFeatures
-  val REAL_GRAPH_WEIGHT =
-    new Continuous("itl.hydrated_tweet_features.real_graph_weight", Set(UsersRealGraphScore).asJava)
-  val SARUS_GRAPH_WEIGHT = new Continuous("itl.hydrated_tweet_features.sarus_graph_weight")
-  val FROM_TOP_ENGAGED_USER = new Binary("itl.hydrated_tweet_features.from_top_engaged_user")
-  val FROM_TOP_INFLUENCER = new Binary("itl.hydrated_tweet_features.from_top_influencer")
-  val TOPIC_SIM_SEARCHER_INTERSTED_IN_AUTHOR_KNOWN_FOR = new Continuous(
-    "itl.hydrated_tweet_features.topic_sim_searcher_interested_in_author_known_for"
-  )
-  val TOPIC_SIM_SEARCHER_AUTHOR_BOTH_INTERESTED_IN = new Continuous(
-    "itl.hydrated_tweet_features.topic_sim_searcher_author_both_interested_in"
-  )
-  val TOPIC_SIM_SEARCHER_AUTHOR_BOTH_KNOWN_FOR = new Continuous(
-    "itl.hydrated_tweet_features.topic_sim_searcher_author_both_known_for"
-  )
-  val USER_REP = new Continuous("itl.hydrated_tweet_features.user_rep")
-  val NORMALIZED_PARUS_SCORE = new Continuous("itl.hydrated_tweet_features.normalized_parus_score")
-  val CONTAINS_MEDIA = new Binary("itl.hydrated_tweet_features.contains_media")
-  val FROM_NEARBY = new Binary("itl.hydrated_tweet_features.from_nearby")
-  val TOPIC_SIM_SEARCHER_INTERESTED_IN_TWEET = new Continuous(
-    "itl.hydrated_tweet_features.topic_sim_searcher_interested_in_tweet"
-  )
-  val MATCHES_UI_LANG = new Binary(
-    "itl.hydrated_tweet_features.matches_ui_lang",
-    Set(ProvidedLanguage, InferredLanguage).asJava)
-  val MATCHES_SEARCHER_MAIN_LANG = new Binary(
-    "itl.hydrated_tweet_features.matches_searcher_main_lang",
-    Set(ProvidedLanguage, InferredLanguage).asJava
-  )
-  val MATCHES_SEARCHER_LANGS = new Binary(
-    "itl.hydrated_tweet_features.matches_searcher_langs",
-    Set(ProvidedLanguage, InferredLanguage).asJava)
-  val HAS_CARD = new Binary(
-    "itl.hydrated_tweet_features.has_card",
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_IMAGE = new Binary(
-    "itl.hydrated_tweet_features.has_image",
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_NATIVE_IMAGE = new Binary(
-    "itl.hydrated_tweet_features.has_native_image",
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_VIDEO = new Binary("itl.hydrated_tweet_features.has_video")
-  val HAS_CONSUMER_VIDEO = new Binary(
-    "itl.hydrated_tweet_features.has_consumer_video",
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_PRO_VIDEO = new Binary(
-    "itl.hydrated_tweet_features.has_pro_video",
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_PERISCOPE = new Binary(
-    "itl.hydrated_tweet_features.has_periscope",
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_VINE = new Binary(
-    "itl.hydrated_tweet_features.has_vine",
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_NATIVE_VIDEO = new Binary(
-    "itl.hydrated_tweet_features.has_native_video",
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_LINK = new Binary(
-    "itl.hydrated_tweet_features.has_link",
-    Set(UrlFoundFlag, PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val LINK_COUNT = new Continuous(
-    "itl.hydrated_tweet_features.link_count",
-    Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava)
-  val URL_DOMAINS = new SparseBinary(
-    "itl.hydrated_tweet_features.url_domains",
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_VISIBLE_LINK = new Binary(
-    "itl.hydrated_tweet_features.has_visible_link",
-    Set(UrlFoundFlag, PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_NEWS = new Binary(
-    "itl.hydrated_tweet_features.has_news",
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_TREND = new Binary(
-    "itl.hydrated_tweet_features.has_trend",
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val BLENDER_SCORE =
-    new Continuous("itl.hydrated_tweet_features.blender_score", Set(EngagementScore).asJava)
-  val PARUS_SCORE =
-    new Continuous("itl.hydrated_tweet_features.parus_score", Set(EngagementScore).asJava)
-  val TEXT_SCORE =
-    new Continuous("itl.hydrated_tweet_features.text_score", Set(EngagementScore).asJava)
-  val BIDIRECTIONAL_REPLY_COUNT = new Continuous(
-    "itl.hydrated_tweet_features.bidirectional_reply_count",
-    Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
-  )
-  val UNIDIRECTIONAL_REPLY_COUNT = new Continuous(
-    "itl.hydrated_tweet_features.unidirectional_reply_count",
-    Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
-  )
-  val BIDIRECTIONAL_RETWEET_COUNT = new Continuous(
-    "itl.hydrated_tweet_features.bidirectional_retweet_count",
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
-  )
-  val UNIDIRECTIONAL_RETWEET_COUNT = new Continuous(
-    "itl.hydrated_tweet_features.unidirectional_retweet_count",
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
-  )
-  val BIDIRECTIONAL_FAV_COUNT = new Continuous(
-    "itl.hydrated_tweet_features.bidirectional_fav_count",
-    Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
-  )
-  val UNIDIRECTIONAL_FAV_COUNT = new Continuous(
-    "itl.hydrated_tweet_features.unidirectional_fav_count",
-    Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
-  )
-  val CONVERSATION_COUNT = new Continuous("itl.hydrated_tweet_features.conversation_count")
-  val FAV_COUNT = new Continuous(
-    "itl.hydrated_tweet_features.fav_count",
-    Set(CountOfPrivateLikes, CountOfPublicLikes).asJava)
-  val REPLY_COUNT = new Continuous(
-    "itl.hydrated_tweet_features.reply_count",
-    Set(CountOfPrivateReplies, CountOfPublicReplies).asJava)
-  val RETWEET_COUNT = new Continuous(
-    "itl.hydrated_tweet_features.retweet_count",
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
-  val PREV_USER_TWEET_ENGAGEMENT = new Continuous(
-    "itl.hydrated_tweet_features.prev_user_tweet_enagagement",
-    Set(EngagementScore, EngagementsPrivate, EngagementsPublic).asJava
-  )
-  val IS_SENSITIVE = new Binary("itl.hydrated_tweet_features.is_sensitive")
-  val HAS_MULTIPLE_MEDIA = new Binary(
-    "itl.hydrated_tweet_features.has_multiple_media",
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_MULTIPLE_HASHTAGS_OR_TRENDS = new Binary(
-    "itl.hydrated_tweet_features.has_multiple_hashtag_or_trend",
-    Set(
-      UserVisibleFlag,
-      CountOfPrivateTweetEntitiesAndMetadata,
-      CountOfPublicTweetEntitiesAndMetadata).asJava)
-  val IS_AUTHOR_PROFILE_EGG =
-    new Binary("itl.hydrated_tweet_features.is_author_profile_egg", Set(ProfileImage).asJava)
-  val IS_AUTHOR_NEW =
-    new Binary("itl.hydrated_tweet_features.is_author_new", Set(UserType, UserState).asJava)
-  val NUM_MENTIONS = new Continuous(
-    "itl.hydrated_tweet_features.num_mentions",
-    Set(
-      UserVisibleFlag,
-      CountOfPrivateTweetEntitiesAndMetadata,
-      CountOfPublicTweetEntitiesAndMetadata).asJava)
-  val NUM_HASHTAGS = new Continuous(
-    "itl.hydrated_tweet_features.num_hashtags",
-    Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava)
-  val LANGUAGE = new Discrete(
-    "itl.hydrated_tweet_features.language",
-    Set(ProvidedLanguage, InferredLanguage).asJava)
-  val LINK_LANGUAGE = new Continuous(
-    "itl.hydrated_tweet_features.link_language",
-    Set(ProvidedLanguage, InferredLanguage).asJava)
-  val IS_AUTHOR_NSFW =
-    new Binary("itl.hydrated_tweet_features.is_author_nsfw", Set(UserType).asJava)
-  val IS_AUTHOR_SPAM =
-    new Binary("itl.hydrated_tweet_features.is_author_spam", Set(UserType).asJava)
-  val IS_AUTHOR_BOT = new Binary("itl.hydrated_tweet_features.is_author_bot", Set(UserType).asJava)
-  val IS_OFFENSIVE = new Binary("itl.hydrated_tweet_features.is_offensive")
-  val FROM_VERIFIED_ACCOUNT =
-    new Binary("itl.hydrated_tweet_features.from_verified_account", Set(UserVerifiedFlag).asJava)
-  val EMBEDS_IMPRESSION_COUNT = new Continuous(
-    "itl.hydrated_tweet_features.embeds_impression_count",
-    Set(CountOfImpression).asJava)
-  val EMBEDS_URL_COUNT =
-    new Continuous("itl.hydrated_tweet_features.embeds_url_count", Set(UrlFoundFlag).asJava)
-  val FAV_COUNT_V2 = new Continuous(
-    "recap.earlybird.fav_count_v2",
-    Set(CountOfPrivateLikes, CountOfPublicLikes).asJava)
-  val RETWEET_COUNT_V2 = new Continuous(
-    "recap.earlybird.retweet_count_v2",
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
-  val REPLY_COUNT_V2 = new Continuous(
-    "recap.earlybird.reply_count_v2",
-    Set(CountOfPrivateReplies, CountOfPublicReplies).asJava)
-}
diff --git a/src/scala/com/twitter/timelines/prediction/features/list_features/BUILD b/src/scala/com/twitter/timelines/prediction/features/list_features/BUILD
deleted file mode 100644
index 6fc497bf3..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/list_features/BUILD
+++ /dev/null
@@ -1,9 +0,0 @@
-scala_library(
-    sources = ["*.scala"],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "src/java/com/twitter/ml/api:api-base",
-        "src/thrift/com/twitter/dal/personal_data:personal_data-java",
-    ],
-)
diff --git a/src/scala/com/twitter/timelines/prediction/features/list_features/BUILD.docx b/src/scala/com/twitter/timelines/prediction/features/list_features/BUILD.docx
new file mode 100644
index 000000000..7e4b99410
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/list_features/BUILD.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/list_features/ListFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/list_features/ListFeatures.docx
new file mode 100644
index 000000000..20b5c2485
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/list_features/ListFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/list_features/ListFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/list_features/ListFeatures.scala
deleted file mode 100644
index ffb00d1f6..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/list_features/ListFeatures.scala
+++ /dev/null
@@ -1,24 +0,0 @@
-package com.twitter.timelines.prediction.features.list_features
-
-import com.twitter.ml.api.Feature.{Binary, Discrete}
-import com.twitter.ml.api.FeatureContext
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
-import scala.collection.JavaConverters._
-
-object ListFeatures {
-
-  // list.id is used for list tweet injections in home. timelines.meta.list_id is used for list tweets in list timeline.
-  val LIST_ID = new Discrete("list.id")
-
-  val VIEWER_IS_OWNER =
-    new Binary("list.viewer.is_owner", Set(ListsNonpublicList, ListsPublicList).asJava)
-  val VIEWER_IS_SUBSCRIBER = new Binary("list.viewer.is_subscriber")
-  val IS_PINNED_LIST = new Binary("list.is_pinned")
-
-  val featureContext = new FeatureContext(
-    LIST_ID,
-    VIEWER_IS_OWNER,
-    VIEWER_IS_SUBSCRIBER,
-    IS_PINNED_LIST
-  )
-}
diff --git a/src/scala/com/twitter/timelines/prediction/features/p_home_latest/BUILD b/src/scala/com/twitter/timelines/prediction/features/p_home_latest/BUILD
deleted file mode 100644
index 6fc497bf3..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/p_home_latest/BUILD
+++ /dev/null
@@ -1,9 +0,0 @@
-scala_library(
-    sources = ["*.scala"],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "src/java/com/twitter/ml/api:api-base",
-        "src/thrift/com/twitter/dal/personal_data:personal_data-java",
-    ],
-)
diff --git a/src/scala/com/twitter/timelines/prediction/features/p_home_latest/BUILD.docx b/src/scala/com/twitter/timelines/prediction/features/p_home_latest/BUILD.docx
new file mode 100644
index 000000000..7e4b99410
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/p_home_latest/BUILD.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/p_home_latest/HomeLatestUserFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/p_home_latest/HomeLatestUserFeatures.docx
new file mode 100644
index 000000000..40c9badda
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/p_home_latest/HomeLatestUserFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/p_home_latest/HomeLatestUserFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/p_home_latest/HomeLatestUserFeatures.scala
deleted file mode 100644
index 65d721a05..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/p_home_latest/HomeLatestUserFeatures.scala
+++ /dev/null
@@ -1,49 +0,0 @@
-package com.twitter.timelines.prediction.features.p_home_latest
-
-import com.twitter.ml.api.Feature.{Continuous, Discrete}
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
-import scala.collection.JavaConverters._
-
-object HomeLatestUserFeatures {
-  val LAST_LOGIN_TIMESTAMP_MS =
-    new Discrete("home_latest.user_feature.last_login_timestamp_ms", Set(PrivateTimestamp).asJava)
-}
-
-object HomeLatestUserAggregatesFeatures {
-
-  /**
-   * Used as `timestampFeature` in `OfflineAggregateSource` required by feature aggregations, set to
-   * the `dateRange` end timestamp by default
-   */
-  val AGGREGATE_TIMESTAMP_MS =
-    new Discrete("home_latest.user_feature.aggregate_timestamp_ms", Set(PrivateTimestamp).asJava)
-  val HOME_TOP_IMPRESSIONS =
-    new Continuous("home_latest.user_feature.home_top_impressions", Set(CountOfImpression).asJava)
-  val HOME_LATEST_IMPRESSIONS =
-    new Continuous(
-      "home_latest.user_feature.home_latest_impressions",
-      Set(CountOfImpression).asJava)
-  val HOME_TOP_LAST_LOGIN_TIMESTAMP_MS =
-    new Discrete(
-      "home_latest.user_feature.home_top_last_login_timestamp_ms",
-      Set(PrivateTimestamp).asJava)
-  val HOME_LATEST_LAST_LOGIN_TIMESTAMP_MS =
-    new Discrete(
-      "home_latest.user_feature.home_latest_last_login_timestamp_ms",
-      Set(PrivateTimestamp).asJava)
-  val HOME_LATEST_MOST_RECENT_CLICK_TIMESTAMP_MS =
-    new Discrete(
-      "home_latest.user_feature.home_latest_most_recent_click_timestamp_ms",
-      Set(PrivateTimestamp).asJava)
-}
-
-case class HomeLatestUserFeatures(userId: Long, lastLoginTimestampMs: Long)
-
-case class HomeLatestUserAggregatesFeatures(
-  userId: Long,
-  aggregateTimestampMs: Long,
-  homeTopImpressions: Option[Double],
-  homeLatestImpressions: Option[Double],
-  homeTopLastLoginTimestampMs: Option[Long],
-  homeLatestLastLoginTimestampMs: Option[Long],
-  homeLatestMostRecentClickTimestampMs: Option[Long])
diff --git a/src/scala/com/twitter/timelines/prediction/features/ppmi/BUILD b/src/scala/com/twitter/timelines/prediction/features/ppmi/BUILD
deleted file mode 100644
index babba31bb..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/ppmi/BUILD
+++ /dev/null
@@ -1,8 +0,0 @@
-scala_library(
-    sources = ["*.scala"],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "src/java/com/twitter/ml/api:api-base",
-    ],
-)
diff --git a/src/scala/com/twitter/timelines/prediction/features/ppmi/BUILD.docx b/src/scala/com/twitter/timelines/prediction/features/ppmi/BUILD.docx
new file mode 100644
index 000000000..50c6d7101
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/ppmi/BUILD.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/ppmi/PpmiFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/ppmi/PpmiFeatures.docx
new file mode 100644
index 000000000..4750e81f2
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/ppmi/PpmiFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/ppmi/PpmiFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/ppmi/PpmiFeatures.scala
deleted file mode 100644
index 7e6d1dea8..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/ppmi/PpmiFeatures.scala
+++ /dev/null
@@ -1,7 +0,0 @@
-package com.twitter.timelines.prediction.features.ppmi
-
-import com.twitter.ml.api.Feature.Continuous
-
-object PpmiDataRecordFeatures {
-  val PPMI_SCORE = new Continuous("ppmi.source_author.score")
-}
diff --git a/src/scala/com/twitter/timelines/prediction/features/real_graph/BUILD b/src/scala/com/twitter/timelines/prediction/features/real_graph/BUILD
deleted file mode 100644
index 868acec21..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/real_graph/BUILD
+++ /dev/null
@@ -1,15 +0,0 @@
-scala_library(
-    sources = ["*.scala"],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "src/java/com/twitter/ml/api:api-base",
-        "src/scala/com/twitter/ml/featurestore/catalog/entities/core",
-        "src/scala/com/twitter/ml/featurestore/catalog/entities/timelines",
-        "src/scala/com/twitter/ml/featurestore/catalog/features/timelines:realgraph",
-        "src/scala/com/twitter/ml/featurestore/lib/entity",
-        "src/scala/com/twitter/ml/featurestore/lib/feature",
-        "src/thrift/com/twitter/dal/personal_data:personal_data-java",
-        "src/thrift/com/twitter/timelines/real_graph:real_graph-scala",
-    ],
-)
diff --git a/src/scala/com/twitter/timelines/prediction/features/real_graph/BUILD.docx b/src/scala/com/twitter/timelines/prediction/features/real_graph/BUILD.docx
new file mode 100644
index 000000000..202cce6a7
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/real_graph/BUILD.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/real_graph/RealGraphDataRecordFeatureStoreFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/real_graph/RealGraphDataRecordFeatureStoreFeatures.docx
new file mode 100644
index 000000000..683df0113
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/real_graph/RealGraphDataRecordFeatureStoreFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/real_graph/RealGraphDataRecordFeatureStoreFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/real_graph/RealGraphDataRecordFeatureStoreFeatures.scala
deleted file mode 100644
index 7c52349aa..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/real_graph/RealGraphDataRecordFeatureStoreFeatures.scala
+++ /dev/null
@@ -1,232 +0,0 @@
-package com.twitter.timelines.prediction.features.real_graph
-
-import com.twitter.ml.featurestore.catalog.entities.core.UserAuthor
-import com.twitter.ml.featurestore.catalog.features.timelines.RealGraph
-import com.twitter.ml.featurestore.lib.EdgeEntityId
-import com.twitter.ml.featurestore.lib.UserId
-import com.twitter.ml.featurestore.lib.feature.BoundFeatureSet
-import com.twitter.ml.featurestore.lib.feature.Feature
-import com.twitter.ml.featurestore.lib.feature.FeatureSet
-
-object RealGraphDataRecordFeatureStoreFeatures {
-  val boundUserAuthorfeatureSet: BoundFeatureSet = FeatureSet(
-    RealGraph.DestId,
-    RealGraph.AddressBookEmail.DaysSinceLast,
-    RealGraph.AddressBookEmail.ElapsedDays,
-    RealGraph.AddressBookEmail.Ewma,
-    RealGraph.AddressBookEmail.IsMissing,
-    RealGraph.AddressBookEmail.Mean,
-    RealGraph.AddressBookEmail.NonZeroDays,
-    RealGraph.AddressBookEmail.Variance,
-    RealGraph.AddressBookInBoth.DaysSinceLast,
-    RealGraph.AddressBookInBoth.ElapsedDays,
-    RealGraph.AddressBookInBoth.Ewma,
-    RealGraph.AddressBookInBoth.IsMissing,
-    RealGraph.AddressBookInBoth.Mean,
-    RealGraph.AddressBookInBoth.NonZeroDays,
-    RealGraph.AddressBookInBoth.Variance,
-    RealGraph.AddressBookMutualEdgeEmail.DaysSinceLast,
-    RealGraph.AddressBookMutualEdgeEmail.ElapsedDays,
-    RealGraph.AddressBookMutualEdgeEmail.Ewma,
-    RealGraph.AddressBookMutualEdgeEmail.IsMissing,
-    RealGraph.AddressBookMutualEdgeEmail.Mean,
-    RealGraph.AddressBookMutualEdgeEmail.NonZeroDays,
-    RealGraph.AddressBookMutualEdgeEmail.Variance,
-    RealGraph.AddressBookMutualEdgeInBoth.DaysSinceLast,
-    RealGraph.AddressBookMutualEdgeInBoth.ElapsedDays,
-    RealGraph.AddressBookMutualEdgeInBoth.Ewma,
-    RealGraph.AddressBookMutualEdgeInBoth.IsMissing,
-    RealGraph.AddressBookMutualEdgeInBoth.Mean,
-    RealGraph.AddressBookMutualEdgeInBoth.NonZeroDays,
-    RealGraph.AddressBookMutualEdgeInBoth.Variance,
-    RealGraph.AddressBookMutualEdgePhone.DaysSinceLast,
-    RealGraph.AddressBookMutualEdgePhone.ElapsedDays,
-    RealGraph.AddressBookMutualEdgePhone.Ewma,
-    RealGraph.AddressBookMutualEdgePhone.IsMissing,
-    RealGraph.AddressBookMutualEdgePhone.Mean,
-    RealGraph.AddressBookMutualEdgePhone.NonZeroDays,
-    RealGraph.AddressBookMutualEdgePhone.Variance,
-    RealGraph.AddressBookPhone.DaysSinceLast,
-    RealGraph.AddressBookPhone.ElapsedDays,
-    RealGraph.AddressBookPhone.Ewma,
-    RealGraph.AddressBookPhone.IsMissing,
-    RealGraph.AddressBookPhone.Mean,
-    RealGraph.AddressBookPhone.NonZeroDays,
-    RealGraph.AddressBookPhone.Variance,
-    RealGraph.DirectMessages.DaysSinceLast,
-    RealGraph.DirectMessages.ElapsedDays,
-    RealGraph.DirectMessages.Ewma,
-    RealGraph.DirectMessages.IsMissing,
-    RealGraph.DirectMessages.Mean,
-    RealGraph.DirectMessages.NonZeroDays,
-    RealGraph.DirectMessages.Variance,
-    RealGraph.DwellTime.DaysSinceLast,
-    RealGraph.DwellTime.ElapsedDays,
-    RealGraph.DwellTime.Ewma,
-    RealGraph.DwellTime.IsMissing,
-    RealGraph.DwellTime.Mean,
-    RealGraph.DwellTime.NonZeroDays,
-    RealGraph.DwellTime.Variance,
-    RealGraph.Follow.DaysSinceLast,
-    RealGraph.Follow.ElapsedDays,
-    RealGraph.Follow.Ewma,
-    RealGraph.Follow.IsMissing,
-    RealGraph.Follow.Mean,
-    RealGraph.Follow.NonZeroDays,
-    RealGraph.Follow.Variance,
-    RealGraph.InspectedStatuses.DaysSinceLast,
-    RealGraph.InspectedStatuses.ElapsedDays,
-    RealGraph.InspectedStatuses.Ewma,
-    RealGraph.InspectedStatuses.IsMissing,
-    RealGraph.InspectedStatuses.Mean,
-    RealGraph.InspectedStatuses.NonZeroDays,
-    RealGraph.InspectedStatuses.Variance,
-    RealGraph.Likes.DaysSinceLast,
-    RealGraph.Likes.ElapsedDays,
-    RealGraph.Likes.Ewma,
-    RealGraph.Likes.IsMissing,
-    RealGraph.Likes.Mean,
-    RealGraph.Likes.NonZeroDays,
-    RealGraph.Likes.Variance,
-    RealGraph.LinkClicks.DaysSinceLast,
-    RealGraph.LinkClicks.ElapsedDays,
-    RealGraph.LinkClicks.Ewma,
-    RealGraph.LinkClicks.IsMissing,
-    RealGraph.LinkClicks.Mean,
-    RealGraph.LinkClicks.NonZeroDays,
-    RealGraph.LinkClicks.Variance,
-    RealGraph.Mentions.DaysSinceLast,
-    RealGraph.Mentions.ElapsedDays,
-    RealGraph.Mentions.Ewma,
-    RealGraph.Mentions.IsMissing,
-    RealGraph.Mentions.Mean,
-    RealGraph.Mentions.NonZeroDays,
-    RealGraph.Mentions.Variance,
-    RealGraph.MutualFollow.DaysSinceLast,
-    RealGraph.MutualFollow.ElapsedDays,
-    RealGraph.MutualFollow.Ewma,
-    RealGraph.MutualFollow.IsMissing,
-    RealGraph.MutualFollow.Mean,
-    RealGraph.MutualFollow.NonZeroDays,
-    RealGraph.MutualFollow.Variance,
-    RealGraph.NumTweetQuotes.DaysSinceLast,
-    RealGraph.NumTweetQuotes.ElapsedDays,
-    RealGraph.NumTweetQuotes.Ewma,
-    RealGraph.NumTweetQuotes.IsMissing,
-    RealGraph.NumTweetQuotes.Mean,
-    RealGraph.NumTweetQuotes.NonZeroDays,
-    RealGraph.NumTweetQuotes.Variance,
-    RealGraph.PhotoTags.DaysSinceLast,
-    RealGraph.PhotoTags.ElapsedDays,
-    RealGraph.PhotoTags.Ewma,
-    RealGraph.PhotoTags.IsMissing,
-    RealGraph.PhotoTags.Mean,
-    RealGraph.PhotoTags.NonZeroDays,
-    RealGraph.PhotoTags.Variance,
-    RealGraph.ProfileViews.DaysSinceLast,
-    RealGraph.ProfileViews.ElapsedDays,
-    RealGraph.ProfileViews.Ewma,
-    RealGraph.ProfileViews.IsMissing,
-    RealGraph.ProfileViews.Mean,
-    RealGraph.ProfileViews.NonZeroDays,
-    RealGraph.ProfileViews.Variance,
-    RealGraph.Retweets.DaysSinceLast,
-    RealGraph.Retweets.ElapsedDays,
-    RealGraph.Retweets.Ewma,
-    RealGraph.Retweets.IsMissing,
-    RealGraph.Retweets.Mean,
-    RealGraph.Retweets.NonZeroDays,
-    RealGraph.Retweets.Variance,
-    RealGraph.SmsFollow.DaysSinceLast,
-    RealGraph.SmsFollow.ElapsedDays,
-    RealGraph.SmsFollow.Ewma,
-    RealGraph.SmsFollow.IsMissing,
-    RealGraph.SmsFollow.Mean,
-    RealGraph.SmsFollow.NonZeroDays,
-    RealGraph.SmsFollow.Variance,
-    RealGraph.TweetClicks.DaysSinceLast,
-    RealGraph.TweetClicks.ElapsedDays,
-    RealGraph.TweetClicks.Ewma,
-    RealGraph.TweetClicks.IsMissing,
-    RealGraph.TweetClicks.Mean,
-    RealGraph.TweetClicks.NonZeroDays,
-    RealGraph.TweetClicks.Variance,
-    RealGraph.Weight
-  ).bind(UserAuthor)
-
-  private[this] val edgeFeatures: Seq[RealGraph.EdgeFeature] = Seq(
-    RealGraph.AddressBookEmail,
-    RealGraph.AddressBookInBoth,
-    RealGraph.AddressBookMutualEdgeEmail,
-    RealGraph.AddressBookMutualEdgeInBoth,
-    RealGraph.AddressBookMutualEdgePhone,
-    RealGraph.AddressBookPhone,
-    RealGraph.DirectMessages,
-    RealGraph.DwellTime,
-    RealGraph.Follow,
-    RealGraph.InspectedStatuses,
-    RealGraph.Likes,
-    RealGraph.LinkClicks,
-    RealGraph.Mentions,
-    RealGraph.MutualFollow,
-    RealGraph.PhotoTags,
-    RealGraph.ProfileViews,
-    RealGraph.Retweets,
-    RealGraph.SmsFollow,
-    RealGraph.TweetClicks
-  )
-
-  val htlDoubleFeatures: Set[Feature[EdgeEntityId[UserId, UserId], Double]] = {
-    val features = edgeFeatures.flatMap { ef =>
-      Seq(ef.Ewma, ef.Mean, ef.Variance)
-    } ++ Seq(RealGraph.Weight)
-    features.toSet
-  }
-
-  val htlLongFeatures: Set[Feature[EdgeEntityId[UserId, UserId], Long]] = {
-    val features = edgeFeatures.flatMap { ef =>
-      Seq(ef.DaysSinceLast, ef.ElapsedDays, ef.NonZeroDays)
-    }
-    features.toSet
-  }
-
-  private val edgeFeatureToLegacyName = Map(
-    RealGraph.AddressBookEmail -> "num_address_book_email",
-    RealGraph.AddressBookInBoth -> "num_address_book_in_both",
-    RealGraph.AddressBookMutualEdgeEmail -> "num_address_book_mutual_edge_email",
-    RealGraph.AddressBookMutualEdgeInBoth -> "num_address_book_mutual_edge_in_both",
-    RealGraph.AddressBookMutualEdgePhone -> "num_address_book_mutual_edge_phone",
-    RealGraph.AddressBookPhone -> "num_address_book_phone",
-    RealGraph.DirectMessages -> "direct_messages",
-    RealGraph.DwellTime -> "total_dwell_time",
-    RealGraph.Follow -> "num_follow",
-    RealGraph.InspectedStatuses -> "num_inspected_tweets",
-    RealGraph.Likes -> "num_favorites",
-    RealGraph.LinkClicks -> "num_link_clicks",
-    RealGraph.Mentions -> "num_mentions",
-    RealGraph.MutualFollow -> "num_mutual_follow",
-    RealGraph.PhotoTags -> "num_photo_tags",
-    RealGraph.ProfileViews -> "num_profile_views",
-    RealGraph.Retweets -> "num_retweets",
-    RealGraph.SmsFollow -> "num_sms_follow",
-    RealGraph.TweetClicks -> "num_tweet_clicks",
-  )
-
-  def convertFeatureToLegacyName(
-    prefix: String,
-    variance: String = "variance"
-  ): Map[Feature[EdgeEntityId[UserId, UserId], _ >: Long with Double <: AnyVal], String] =
-    edgeFeatureToLegacyName.flatMap {
-      case (k, v) =>
-        Seq(
-          k.NonZeroDays -> s"${prefix}.${v}.non_zero_days",
-          k.DaysSinceLast -> s"${prefix}.${v}.days_since_last",
-          k.ElapsedDays -> s"${prefix}.${v}.elapsed_days",
-          k.Ewma -> s"${prefix}.${v}.ewma",
-          k.Mean -> s"${prefix}.${v}.mean",
-          k.Variance -> s"${prefix}.${v}.${variance}",
-        )
-    } ++ Map(
-      RealGraph.Weight -> (prefix + ".weight")
-    )
-}
diff --git a/src/scala/com/twitter/timelines/prediction/features/real_graph/RealGraphDataRecordFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/real_graph/RealGraphDataRecordFeatures.docx
new file mode 100644
index 000000000..bf20c172c
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/real_graph/RealGraphDataRecordFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/real_graph/RealGraphDataRecordFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/real_graph/RealGraphDataRecordFeatures.scala
deleted file mode 100644
index 4c1915944..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/real_graph/RealGraphDataRecordFeatures.scala
+++ /dev/null
@@ -1,534 +0,0 @@
-package com.twitter.timelines.prediction.features.real_graph
-
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
-import com.twitter.ml.api.Feature._
-import com.twitter.timelines.real_graph.v1.thriftscala.RealGraphEdgeFeature
-import scala.collection.JavaConverters._
-
-
-object RealGraphDataRecordFeatures {
-  // the source user id
-  val SRC_ID = new Discrete("realgraph.src_id", Set(UserId).asJava)
-  // the destination user id
-  val DST_ID = new Discrete("realgraph.dst_id", Set(UserId).asJava)
-  // real graph weight
-  val WEIGHT = new Continuous("realgraph.weight", Set(UsersRealGraphScore).asJava)
-  // the number of retweets that the source user sent to the destination user
-  val NUM_RETWEETS_MEAN =
-    new Continuous("realgraph.num_retweets.mean", Set(PrivateRetweets, PublicRetweets).asJava)
-  val NUM_RETWEETS_EWMA =
-    new Continuous("realgraph.num_retweets.ewma", Set(PrivateRetweets, PublicRetweets).asJava)
-  val NUM_RETWEETS_VARIANCE =
-    new Continuous("realgraph.num_retweets.variance", Set(PrivateRetweets, PublicRetweets).asJava)
-  val NUM_RETWEETS_NON_ZERO_DAYS = new Continuous(
-    "realgraph.num_retweets.non_zero_days",
-    Set(PrivateRetweets, PublicRetweets).asJava)
-  val NUM_RETWEETS_ELAPSED_DAYS = new Continuous(
-    "realgraph.num_retweets.elapsed_days",
-    Set(PrivateRetweets, PublicRetweets).asJava)
-  val NUM_RETWEETS_DAYS_SINCE_LAST = new Continuous(
-    "realgraph.num_retweets.days_since_last",
-    Set(PrivateRetweets, PublicRetweets).asJava)
-  val NUM_RETWEETS_IS_MISSING =
-    new Binary("realgraph.num_retweets.is_missing", Set(PrivateRetweets, PublicRetweets).asJava)
-  // the number of favories that the source user sent to the destination user
-  val NUM_FAVORITES_MEAN =
-    new Continuous("realgraph.num_favorites.mean", Set(PublicLikes, PrivateLikes).asJava)
-  val NUM_FAVORITES_EWMA =
-    new Continuous("realgraph.num_favorites.ewma", Set(PublicLikes, PrivateLikes).asJava)
-  val NUM_FAVORITES_VARIANCE =
-    new Continuous("realgraph.num_favorites.variance", Set(PublicLikes, PrivateLikes).asJava)
-  val NUM_FAVORITES_NON_ZERO_DAYS =
-    new Continuous("realgraph.num_favorites.non_zero_days", Set(PublicLikes, PrivateLikes).asJava)
-  val NUM_FAVORITES_ELAPSED_DAYS =
-    new Continuous("realgraph.num_favorites.elapsed_days", Set(PublicLikes, PrivateLikes).asJava)
-  val NUM_FAVORITES_DAYS_SINCE_LAST =
-    new Continuous("realgraph.num_favorites.days_since_last", Set(PublicLikes, PrivateLikes).asJava)
-  val NUM_FAVORITES_IS_MISSING =
-    new Binary("realgraph.num_favorites.is_missing", Set(PublicLikes, PrivateLikes).asJava)
-  // the number of mentions that the source user sent to the destination user
-  val NUM_MENTIONS_MEAN =
-    new Continuous("realgraph.num_mentions.mean", Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val NUM_MENTIONS_EWMA =
-    new Continuous("realgraph.num_mentions.ewma", Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val NUM_MENTIONS_VARIANCE = new Continuous(
-    "realgraph.num_mentions.variance",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val NUM_MENTIONS_NON_ZERO_DAYS = new Continuous(
-    "realgraph.num_mentions.non_zero_days",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val NUM_MENTIONS_ELAPSED_DAYS = new Continuous(
-    "realgraph.num_mentions.elapsed_days",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val NUM_MENTIONS_DAYS_SINCE_LAST = new Continuous(
-    "realgraph.num_mentions.days_since_last",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val NUM_MENTIONS_IS_MISSING = new Binary(
-    "realgraph.num_mentions.is_missing",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  // the number of direct messages that the source user sent to the destination user
-  val NUM_DIRECT_MESSAGES_MEAN = new Continuous(
-    "realgraph.num_direct_messages.mean",
-    Set(DmEntitiesAndMetadata, CountOfDms).asJava)
-  val NUM_DIRECT_MESSAGES_EWMA = new Continuous(
-    "realgraph.num_direct_messages.ewma",
-    Set(DmEntitiesAndMetadata, CountOfDms).asJava)
-  val NUM_DIRECT_MESSAGES_VARIANCE = new Continuous(
-    "realgraph.num_direct_messages.variance",
-    Set(DmEntitiesAndMetadata, CountOfDms).asJava)
-  val NUM_DIRECT_MESSAGES_NON_ZERO_DAYS = new Continuous(
-    "realgraph.num_direct_messages.non_zero_days",
-    Set(DmEntitiesAndMetadata, CountOfDms).asJava
-  )
-  val NUM_DIRECT_MESSAGES_ELAPSED_DAYS = new Continuous(
-    "realgraph.num_direct_messages.elapsed_days",
-    Set(DmEntitiesAndMetadata, CountOfDms).asJava
-  )
-  val NUM_DIRECT_MESSAGES_DAYS_SINCE_LAST = new Continuous(
-    "realgraph.num_direct_messages.days_since_last",
-    Set(DmEntitiesAndMetadata, CountOfDms).asJava
-  )
-  val NUM_DIRECT_MESSAGES_IS_MISSING = new Binary(
-    "realgraph.num_direct_messages.is_missing",
-    Set(DmEntitiesAndMetadata, CountOfDms).asJava)
-  // the number of tweet clicks that the source user sent to the destination user
-  val NUM_TWEET_CLICKS_MEAN =
-    new Continuous("realgraph.num_tweet_clicks.mean", Set(TweetsClicked).asJava)
-  val NUM_TWEET_CLICKS_EWMA =
-    new Continuous("realgraph.num_tweet_clicks.ewma", Set(TweetsClicked).asJava)
-  val NUM_TWEET_CLICKS_VARIANCE =
-    new Continuous("realgraph.num_tweet_clicks.variance", Set(TweetsClicked).asJava)
-  val NUM_TWEET_CLICKS_NON_ZERO_DAYS =
-    new Continuous("realgraph.num_tweet_clicks.non_zero_days", Set(TweetsClicked).asJava)
-  val NUM_TWEET_CLICKS_ELAPSED_DAYS =
-    new Continuous("realgraph.num_tweet_clicks.elapsed_days", Set(TweetsClicked).asJava)
-  val NUM_TWEET_CLICKS_DAYS_SINCE_LAST = new Continuous(
-    "realgraph.num_tweet_clicks.days_since_last",
-    Set(TweetsClicked).asJava
-  )
-  val NUM_TWEET_CLICKS_IS_MISSING =
-    new Binary("realgraph.num_tweet_clicks.is_missing", Set(TweetsClicked).asJava)
-  // the number of link clicks that the source user sent to the destination user
-  val NUM_LINK_CLICKS_MEAN =
-    new Continuous("realgraph.num_link_clicks.mean", Set(CountOfTweetEntitiesClicked).asJava)
-  val NUM_LINK_CLICKS_EWMA =
-    new Continuous("realgraph.num_link_clicks.ewma", Set(CountOfTweetEntitiesClicked).asJava)
-  val NUM_LINK_CLICKS_VARIANCE =
-    new Continuous("realgraph.num_link_clicks.variance", Set(CountOfTweetEntitiesClicked).asJava)
-  val NUM_LINK_CLICKS_NON_ZERO_DAYS = new Continuous(
-    "realgraph.num_link_clicks.non_zero_days",
-    Set(CountOfTweetEntitiesClicked).asJava)
-  val NUM_LINK_CLICKS_ELAPSED_DAYS = new Continuous(
-    "realgraph.num_link_clicks.elapsed_days",
-    Set(CountOfTweetEntitiesClicked).asJava)
-  val NUM_LINK_CLICKS_DAYS_SINCE_LAST = new Continuous(
-    "realgraph.num_link_clicks.days_since_last",
-    Set(CountOfTweetEntitiesClicked).asJava)
-  val NUM_LINK_CLICKS_IS_MISSING =
-    new Binary("realgraph.num_link_clicks.is_missing", Set(CountOfTweetEntitiesClicked).asJava)
-  // the number of profile views that the source user sent to the destination user
-  val NUM_PROFILE_VIEWS_MEAN =
-    new Continuous("realgraph.num_profile_views.mean", Set(ProfilesViewed).asJava)
-  val NUM_PROFILE_VIEWS_EWMA =
-    new Continuous("realgraph.num_profile_views.ewma", Set(ProfilesViewed).asJava)
-  val NUM_PROFILE_VIEWS_VARIANCE =
-    new Continuous("realgraph.num_profile_views.variance", Set(ProfilesViewed).asJava)
-  val NUM_PROFILE_VIEWS_NON_ZERO_DAYS =
-    new Continuous("realgraph.num_profile_views.non_zero_days", Set(ProfilesViewed).asJava)
-  val NUM_PROFILE_VIEWS_ELAPSED_DAYS =
-    new Continuous("realgraph.num_profile_views.elapsed_days", Set(ProfilesViewed).asJava)
-  val NUM_PROFILE_VIEWS_DAYS_SINCE_LAST = new Continuous(
-    "realgraph.num_profile_views.days_since_last",
-    Set(ProfilesViewed).asJava
-  )
-  val NUM_PROFILE_VIEWS_IS_MISSING =
-    new Binary("realgraph.num_profile_views.is_missing", Set(ProfilesViewed).asJava)
-  // the total dwell time the source user spends on the target user's tweets
-  val TOTAL_DWELL_TIME_MEAN =
-    new Continuous("realgraph.total_dwell_time.mean", Set(CountOfImpression).asJava)
-  val TOTAL_DWELL_TIME_EWMA =
-    new Continuous("realgraph.total_dwell_time.ewma", Set(CountOfImpression).asJava)
-  val TOTAL_DWELL_TIME_VARIANCE =
-    new Continuous("realgraph.total_dwell_time.variance", Set(CountOfImpression).asJava)
-  val TOTAL_DWELL_TIME_NON_ZERO_DAYS =
-    new Continuous("realgraph.total_dwell_time.non_zero_days", Set(CountOfImpression).asJava)
-  val TOTAL_DWELL_TIME_ELAPSED_DAYS =
-    new Continuous("realgraph.total_dwell_time.elapsed_days", Set(CountOfImpression).asJava)
-  val TOTAL_DWELL_TIME_DAYS_SINCE_LAST = new Continuous(
-    "realgraph.total_dwell_time.days_since_last",
-    Set(CountOfImpression).asJava
-  )
-  val TOTAL_DWELL_TIME_IS_MISSING =
-    new Binary("realgraph.total_dwell_time.is_missing", Set(CountOfImpression).asJava)
-  // the number of the target user's tweets that the source user has inspected
-  val NUM_INSPECTED_TWEETS_MEAN =
-    new Continuous("realgraph.num_inspected_tweets.mean", Set(CountOfImpression).asJava)
-  val NUM_INSPECTED_TWEETS_EWMA =
-    new Continuous("realgraph.num_inspected_tweets.ewma", Set(CountOfImpression).asJava)
-  val NUM_INSPECTED_TWEETS_VARIANCE =
-    new Continuous("realgraph.num_inspected_tweets.variance", Set(CountOfImpression).asJava)
-  val NUM_INSPECTED_TWEETS_NON_ZERO_DAYS = new Continuous(
-    "realgraph.num_inspected_tweets.non_zero_days",
-    Set(CountOfImpression).asJava
-  )
-  val NUM_INSPECTED_TWEETS_ELAPSED_DAYS = new Continuous(
-    "realgraph.num_inspected_tweets.elapsed_days",
-    Set(CountOfImpression).asJava
-  )
-  val NUM_INSPECTED_TWEETS_DAYS_SINCE_LAST = new Continuous(
-    "realgraph.num_inspected_tweets.days_since_last",
-    Set(CountOfImpression).asJava
-  )
-  val NUM_INSPECTED_TWEETS_IS_MISSING =
-    new Binary("realgraph.num_inspected_tweets.is_missing", Set(CountOfImpression).asJava)
-  // the number of photos in which the source user has tagged the target user
-  val NUM_PHOTO_TAGS_MEAN = new Continuous(
-    "realgraph.num_photo_tags.mean",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val NUM_PHOTO_TAGS_EWMA = new Continuous(
-    "realgraph.num_photo_tags.ewma",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val NUM_PHOTO_TAGS_VARIANCE = new Continuous(
-    "realgraph.num_photo_tags.variance",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val NUM_PHOTO_TAGS_NON_ZERO_DAYS = new Continuous(
-    "realgraph.num_photo_tags.non_zero_days",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val NUM_PHOTO_TAGS_ELAPSED_DAYS = new Continuous(
-    "realgraph.num_photo_tags.elapsed_days",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val NUM_PHOTO_TAGS_DAYS_SINCE_LAST = new Continuous(
-    "realgraph.num_photo_tags.days_since_last",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val NUM_PHOTO_TAGS_IS_MISSING = new Binary(
-    "realgraph.num_photo_tags.is_missing",
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-
-  val NUM_FOLLOW_MEAN = new Continuous(
-    "realgraph.num_follow.mean",
-    Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
-  val NUM_FOLLOW_EWMA = new Continuous(
-    "realgraph.num_follow.ewma",
-    Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
-  val NUM_FOLLOW_VARIANCE = new Continuous(
-    "realgraph.num_follow.variance",
-    Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
-  val NUM_FOLLOW_NON_ZERO_DAYS = new Continuous(
-    "realgraph.num_follow.non_zero_days",
-    Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
-  val NUM_FOLLOW_ELAPSED_DAYS = new Continuous(
-    "realgraph.num_follow.elapsed_days",
-    Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
-  val NUM_FOLLOW_DAYS_SINCE_LAST = new Continuous(
-    "realgraph.num_follow.days_since_last",
-    Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
-  val NUM_FOLLOW_IS_MISSING = new Binary(
-    "realgraph.num_follow.is_missing",
-    Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
-  // the number of blocks that the source user sent to the destination user
-  val NUM_BLOCKS_MEAN =
-    new Continuous("realgraph.num_blocks.mean", Set(CountOfBlocks).asJava)
-  val NUM_BLOCKS_EWMA =
-    new Continuous("realgraph.num_blocks.ewma", Set(CountOfBlocks).asJava)
-  val NUM_BLOCKS_VARIANCE =
-    new Continuous("realgraph.num_blocks.variance", Set(CountOfBlocks).asJava)
-  val NUM_BLOCKS_NON_ZERO_DAYS =
-    new Continuous("realgraph.num_blocks.non_zero_days", Set(CountOfBlocks).asJava)
-  val NUM_BLOCKS_ELAPSED_DAYS =
-    new Continuous("realgraph.num_blocks.elapsed_days", Set(CountOfBlocks).asJava)
-  val NUM_BLOCKS_DAYS_SINCE_LAST =
-    new Continuous("realgraph.num_blocks.days_since_last", Set(CountOfBlocks).asJava)
-  val NUM_BLOCKS_IS_MISSING =
-    new Binary("realgraph.num_blocks.is_missing", Set(CountOfBlocks).asJava)
-  // the number of mutes that the source user sent to the destination user
-  val NUM_MUTES_MEAN =
-    new Continuous("realgraph.num_mutes.mean", Set(CountOfMutes).asJava)
-  val NUM_MUTES_EWMA =
-    new Continuous("realgraph.num_mutes.ewma", Set(CountOfMutes).asJava)
-  val NUM_MUTES_VARIANCE =
-    new Continuous("realgraph.num_mutes.variance", Set(CountOfMutes).asJava)
-  val NUM_MUTES_NON_ZERO_DAYS =
-    new Continuous("realgraph.num_mutes.non_zero_days", Set(CountOfMutes).asJava)
-  val NUM_MUTES_ELAPSED_DAYS =
-    new Continuous("realgraph.num_mutes.elapsed_days", Set(CountOfMutes).asJava)
-  val NUM_MUTES_DAYS_SINCE_LAST =
-    new Continuous("realgraph.num_mutes.days_since_last", Set(CountOfMutes).asJava)
-  val NUM_MUTES_IS_MISSING =
-    new Binary("realgraph.num_mutes.is_missing", Set(CountOfMutes).asJava)
-  // the number of report as abuses that the source user sent to the destination user
-  val NUM_REPORTS_AS_ABUSES_MEAN =
-    new Continuous("realgraph.num_report_as_abuses.mean", Set(CountOfAbuseReports).asJava)
-  val NUM_REPORTS_AS_ABUSES_EWMA =
-    new Continuous("realgraph.num_report_as_abuses.ewma", Set(CountOfAbuseReports).asJava)
-  val NUM_REPORTS_AS_ABUSES_VARIANCE =
-    new Continuous("realgraph.num_report_as_abuses.variance", Set(CountOfAbuseReports).asJava)
-  val NUM_REPORTS_AS_ABUSES_NON_ZERO_DAYS =
-    new Continuous("realgraph.num_report_as_abuses.non_zero_days", Set(CountOfAbuseReports).asJava)
-  val NUM_REPORTS_AS_ABUSES_ELAPSED_DAYS =
-    new Continuous("realgraph.num_report_as_abuses.elapsed_days", Set(CountOfAbuseReports).asJava)
-  val NUM_REPORTS_AS_ABUSES_DAYS_SINCE_LAST =
-    new Continuous(
-      "realgraph.num_report_as_abuses.days_since_last",
-      Set(CountOfAbuseReports).asJava)
-  val NUM_REPORTS_AS_ABUSES_IS_MISSING =
-    new Binary("realgraph.num_report_as_abuses.is_missing", Set(CountOfAbuseReports).asJava)
-  // the number of report as spams that the source user sent to the destination user
-  val NUM_REPORTS_AS_SPAMS_MEAN =
-    new Continuous(
-      "realgraph.num_report_as_spams.mean",
-      Set(CountOfAbuseReports, SafetyRelationships).asJava)
-  val NUM_REPORTS_AS_SPAMS_EWMA =
-    new Continuous(
-      "realgraph.num_report_as_spams.ewma",
-      Set(CountOfAbuseReports, SafetyRelationships).asJava)
-  val NUM_REPORTS_AS_SPAMS_VARIANCE =
-    new Continuous(
-      "realgraph.num_report_as_spams.variance",
-      Set(CountOfAbuseReports, SafetyRelationships).asJava)
-  val NUM_REPORTS_AS_SPAMS_NON_ZERO_DAYS =
-    new Continuous(
-      "realgraph.num_report_as_spams.non_zero_days",
-      Set(CountOfAbuseReports, SafetyRelationships).asJava)
-  val NUM_REPORTS_AS_SPAMS_ELAPSED_DAYS =
-    new Continuous(
-      "realgraph.num_report_as_spams.elapsed_days",
-      Set(CountOfAbuseReports, SafetyRelationships).asJava)
-  val NUM_REPORTS_AS_SPAMS_DAYS_SINCE_LAST =
-    new Continuous(
-      "realgraph.num_report_as_spams.days_since_last",
-      Set(CountOfAbuseReports, SafetyRelationships).asJava)
-  val NUM_REPORTS_AS_SPAMS_IS_MISSING =
-    new Binary(
-      "realgraph.num_report_as_spams.is_missing",
-      Set(CountOfAbuseReports, SafetyRelationships).asJava)
-
-  val NUM_MUTUAL_FOLLOW_MEAN = new Continuous(
-    "realgraph.num_mutual_follow.mean",
-    Set(
-      Follow,
-      PrivateAccountsFollowedBy,
-      PublicAccountsFollowedBy,
-      PrivateAccountsFollowing,
-      PublicAccountsFollowing).asJava
-  )
-  val NUM_MUTUAL_FOLLOW_EWMA = new Continuous(
-    "realgraph.num_mutual_follow.ewma",
-    Set(
-      Follow,
-      PrivateAccountsFollowedBy,
-      PublicAccountsFollowedBy,
-      PrivateAccountsFollowing,
-      PublicAccountsFollowing).asJava
-  )
-  val NUM_MUTUAL_FOLLOW_VARIANCE = new Continuous(
-    "realgraph.num_mutual_follow.variance",
-    Set(
-      Follow,
-      PrivateAccountsFollowedBy,
-      PublicAccountsFollowedBy,
-      PrivateAccountsFollowing,
-      PublicAccountsFollowing).asJava
-  )
-  val NUM_MUTUAL_FOLLOW_NON_ZERO_DAYS = new Continuous(
-    "realgraph.num_mutual_follow.non_zero_days",
-    Set(
-      Follow,
-      PrivateAccountsFollowedBy,
-      PublicAccountsFollowedBy,
-      PrivateAccountsFollowing,
-      PublicAccountsFollowing).asJava
-  )
-  val NUM_MUTUAL_FOLLOW_ELAPSED_DAYS = new Continuous(
-    "realgraph.num_mutual_follow.elapsed_days",
-    Set(
-      Follow,
-      PrivateAccountsFollowedBy,
-      PublicAccountsFollowedBy,
-      PrivateAccountsFollowing,
-      PublicAccountsFollowing).asJava
-  )
-  val NUM_MUTUAL_FOLLOW_DAYS_SINCE_LAST = new Continuous(
-    "realgraph.num_mutual_follow.days_since_last",
-    Set(
-      Follow,
-      PrivateAccountsFollowedBy,
-      PublicAccountsFollowedBy,
-      PrivateAccountsFollowing,
-      PublicAccountsFollowing).asJava
-  )
-  val NUM_MUTUAL_FOLLOW_IS_MISSING = new Binary(
-    "realgraph.num_mutual_follow.is_missing",
-    Set(
-      Follow,
-      PrivateAccountsFollowedBy,
-      PublicAccountsFollowedBy,
-      PrivateAccountsFollowing,
-      PublicAccountsFollowing).asJava
-  )
-
-  val NUM_SMS_FOLLOW_MEAN = new Continuous(
-    "realgraph.num_sms_follow.mean",
-    Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
-  val NUM_SMS_FOLLOW_EWMA = new Continuous(
-    "realgraph.num_sms_follow.ewma",
-    Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
-  val NUM_SMS_FOLLOW_VARIANCE = new Continuous(
-    "realgraph.num_sms_follow.variance",
-    Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
-  val NUM_SMS_FOLLOW_NON_ZERO_DAYS = new Continuous(
-    "realgraph.num_sms_follow.non_zero_days",
-    Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
-  val NUM_SMS_FOLLOW_ELAPSED_DAYS = new Continuous(
-    "realgraph.num_sms_follow.elapsed_days",
-    Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
-  val NUM_SMS_FOLLOW_DAYS_SINCE_LAST = new Continuous(
-    "realgraph.num_sms_follow.days_since_last",
-    Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
-  val NUM_SMS_FOLLOW_IS_MISSING = new Binary(
-    "realgraph.num_sms_follow.is_missing",
-    Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
-
-  val NUM_ADDRESS_BOOK_EMAIL_MEAN =
-    new Continuous("realgraph.num_address_book_email.mean", Set(AddressBook).asJava)
-  val NUM_ADDRESS_BOOK_EMAIL_EWMA =
-    new Continuous("realgraph.num_address_book_email.ewma", Set(AddressBook).asJava)
-  val NUM_ADDRESS_BOOK_EMAIL_VARIANCE =
-    new Continuous("realgraph.num_address_book_email.variance", Set(AddressBook).asJava)
-  val NUM_ADDRESS_BOOK_EMAIL_NON_ZERO_DAYS = new Continuous(
-    "realgraph.num_address_book_email.non_zero_days",
-    Set(AddressBook).asJava
-  )
-  val NUM_ADDRESS_BOOK_EMAIL_ELAPSED_DAYS = new Continuous(
-    "realgraph.num_address_book_email.elapsed_days",
-    Set(AddressBook).asJava
-  )
-  val NUM_ADDRESS_BOOK_EMAIL_DAYS_SINCE_LAST = new Continuous(
-    "realgraph.num_address_book_email.days_since_last",
-    Set(AddressBook).asJava
-  )
-  val NUM_ADDRESS_BOOK_EMAIL_IS_MISSING =
-    new Binary("realgraph.num_address_book_email.is_missing", Set(AddressBook).asJava)
-
-  val NUM_ADDRESS_BOOK_IN_BOTH_MEAN =
-    new Continuous("realgraph.num_address_book_in_both.mean", Set(AddressBook).asJava)
-  val NUM_ADDRESS_BOOK_IN_BOTH_EWMA =
-    new Continuous("realgraph.num_address_book_in_both.ewma", Set(AddressBook).asJava)
-  val NUM_ADDRESS_BOOK_IN_BOTH_VARIANCE = new Continuous(
-    "realgraph.num_address_book_in_both.variance",
-    Set(AddressBook).asJava
-  )
-  val NUM_ADDRESS_BOOK_IN_BOTH_NON_ZERO_DAYS = new Continuous(
-    "realgraph.num_address_book_in_both.non_zero_days",
-    Set(AddressBook).asJava
-  )
-  val NUM_ADDRESS_BOOK_IN_BOTH_ELAPSED_DAYS = new Continuous(
-    "realgraph.num_address_book_in_both.elapsed_days",
-    Set(AddressBook).asJava
-  )
-  val NUM_ADDRESS_BOOK_IN_BOTH_DAYS_SINCE_LAST = new Continuous(
-    "realgraph.num_address_book_in_both.days_since_last",
-    Set(AddressBook).asJava
-  )
-  val NUM_ADDRESS_BOOK_IN_BOTH_IS_MISSING = new Binary(
-    "realgraph.num_address_book_in_both.is_missing",
-    Set(AddressBook).asJava
-  )
-
-  val NUM_ADDRESS_BOOK_PHONE_MEAN =
-    new Continuous("realgraph.num_address_book_phone.mean", Set(AddressBook).asJava)
-  val NUM_ADDRESS_BOOK_PHONE_EWMA =
-    new Continuous("realgraph.num_address_book_phone.ewma", Set(AddressBook).asJava)
-  val NUM_ADDRESS_BOOK_PHONE_VARIANCE =
-    new Continuous("realgraph.num_address_book_phone.variance", Set(AddressBook).asJava)
-  val NUM_ADDRESS_BOOK_PHONE_NON_ZERO_DAYS = new Continuous(
-    "realgraph.num_address_book_phone.non_zero_days",
-    Set(AddressBook).asJava
-  )
-  val NUM_ADDRESS_BOOK_PHONE_ELAPSED_DAYS = new Continuous(
-    "realgraph.num_address_book_phone.elapsed_days",
-    Set(AddressBook).asJava
-  )
-  val NUM_ADDRESS_BOOK_PHONE_DAYS_SINCE_LAST = new Continuous(
-    "realgraph.num_address_book_phone.days_since_last",
-    Set(AddressBook).asJava
-  )
-  val NUM_ADDRESS_BOOK_PHONE_IS_MISSING =
-    new Binary("realgraph.num_address_book_phone.is_missing", Set(AddressBook).asJava)
-
-  val NUM_ADDRESS_BOOK_MUTUAL_EDGE_EMAIL_MEAN =
-    new Continuous("realgraph.num_address_book_mutual_edge_email.mean", Set(AddressBook).asJava)
-  val NUM_ADDRESS_BOOK_MUTUAL_EDGE_EMAIL_EWMA =
-    new Continuous("realgraph.num_address_book_mutual_edge_email.ewma", Set(AddressBook).asJava)
-  val NUM_ADDRESS_BOOK_MUTUAL_EDGE_EMAIL_VARIANCE =
-    new Continuous("realgraph.num_address_book_mutual_edge_email.variance", Set(AddressBook).asJava)
-  val NUM_ADDRESS_BOOK_MUTUAL_EDGE_EMAIL_NON_ZERO_DAYS = new Continuous(
-    "realgraph.num_address_book_mutual_edge_email.non_zero_days",
-    Set(AddressBook).asJava
-  )
-  val NUM_ADDRESS_BOOK_MUTUAL_EDGE_EMAIL_ELAPSED_DAYS = new Continuous(
-    "realgraph.num_address_book_mutual_edge_email.elapsed_days",
-    Set(AddressBook).asJava
-  )
-  val NUM_ADDRESS_BOOK_MUTUAL_EDGE_EMAIL_DAYS_SINCE_LAST = new Continuous(
-    "realgraph.num_address_book_mutual_edge_email.days_since_last",
-    Set(AddressBook).asJava
-  )
-  val NUM_ADDRESS_BOOK_MUTUAL_EDGE_EMAIL_IS_MISSING =
-    new Binary("realgraph.num_address_book_mutual_edge_email.is_missing", Set(AddressBook).asJava)
-
-  val NUM_ADDRESS_BOOK_MUTUAL_EDGE_IN_BOTH_MEAN =
-    new Continuous("realgraph.num_address_book_mutual_edge_in_both.mean", Set(AddressBook).asJava)
-  val NUM_ADDRESS_BOOK_MUTUAL_EDGE_IN_BOTH_EWMA =
-    new Continuous("realgraph.num_address_book_mutual_edge_in_both.ewma", Set(AddressBook).asJava)
-  val NUM_ADDRESS_BOOK_MUTUAL_EDGE_IN_BOTH_VARIANCE = new Continuous(
-    "realgraph.num_address_book_mutual_edge_in_both.variance",
-    Set(AddressBook).asJava
-  )
-  val NUM_ADDRESS_BOOK_MUTUAL_EDGE_IN_BOTH_NON_ZERO_DAYS = new Continuous(
-    "realgraph.num_address_book_mutual_edge_in_both.non_zero_days",
-    Set(AddressBook).asJava
-  )
-  val NUM_ADDRESS_BOOK_MUTUAL_EDGE_IN_BOTH_ELAPSED_DAYS = new Continuous(
-    "realgraph.num_address_book_mutual_edge_in_both.elapsed_days",
-    Set(AddressBook).asJava
-  )
-  val NUM_ADDRESS_BOOK_MUTUAL_EDGE_IN_BOTH_DAYS_SINCE_LAST = new Continuous(
-    "realgraph.num_address_book_mutual_edge_in_both.days_since_last",
-    Set(AddressBook).asJava
-  )
-  val NUM_ADDRESS_BOOK_MUTUAL_EDGE_IN_BOTH_IS_MISSING = new Binary(
-    "realgraph.num_address_book_mutual_edge_in_both.is_missing",
-    Set(AddressBook).asJava
-  )
-
-  val NUM_ADDRESS_BOOK_MUTUAL_EDGE_PHONE_MEAN =
-    new Continuous("realgraph.num_address_book_mutual_edge_phone.mean", Set(AddressBook).asJava)
-  val NUM_ADDRESS_BOOK_MUTUAL_EDGE_PHONE_EWMA =
-    new Continuous("realgraph.num_address_book_mutual_edge_phone.ewma", Set(AddressBook).asJava)
-  val NUM_ADDRESS_BOOK_MUTUAL_EDGE_PHONE_VARIANCE =
-    new Continuous("realgraph.num_address_book_mutual_edge_phone.variance", Set(AddressBook).asJava)
-  val NUM_ADDRESS_BOOK_MUTUAL_EDGE_PHONE_NON_ZERO_DAYS = new Continuous(
-    "realgraph.num_address_book_mutual_edge_phone.non_zero_days",
-    Set(AddressBook).asJava
-  )
-  val NUM_ADDRESS_BOOK_MUTUAL_EDGE_PHONE_ELAPSED_DAYS = new Continuous(
-    "realgraph.num_address_book_mutual_edge_phone.elapsed_days",
-    Set(AddressBook).asJava
-  )
-  val NUM_ADDRESS_BOOK_MUTUAL_EDGE_PHONE_DAYS_SINCE_LAST = new Continuous(
-    "realgraph.num_address_book_mutual_edge_phone.days_since_last",
-    Set(AddressBook).asJava
-  )
-  val NUM_ADDRESS_BOOK_MUTUAL_EDGE_PHONE_IS_MISSING =
-    new Binary("realgraph.num_address_book_mutual_edge_phone.is_missing", Set(AddressBook).asJava)
-}
-
-case class RealGraphEdgeDataRecordFeatures(
-  edgeFeatureOpt: Option[RealGraphEdgeFeature],
-  meanFeature: Continuous,
-  ewmaFeature: Continuous,
-  varianceFeature: Continuous,
-  nonZeroDaysFeature: Continuous,
-  elapsedDaysFeature: Continuous,
-  daysSinceLastFeature: Continuous,
-  isMissingFeature: Binary)
diff --git a/src/scala/com/twitter/timelines/prediction/features/recap/BUILD b/src/scala/com/twitter/timelines/prediction/features/recap/BUILD
deleted file mode 100644
index 6fc497bf3..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/recap/BUILD
+++ /dev/null
@@ -1,9 +0,0 @@
-scala_library(
-    sources = ["*.scala"],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "src/java/com/twitter/ml/api:api-base",
-        "src/thrift/com/twitter/dal/personal_data:personal_data-java",
-    ],
-)
diff --git a/src/scala/com/twitter/timelines/prediction/features/recap/BUILD.docx b/src/scala/com/twitter/timelines/prediction/features/recap/BUILD.docx
new file mode 100644
index 000000000..7e4b99410
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/recap/BUILD.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/recap/RecapFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/recap/RecapFeatures.docx
new file mode 100644
index 000000000..48a7767d6
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/recap/RecapFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/recap/RecapFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/recap/RecapFeatures.scala
deleted file mode 100644
index c8ee6da7d..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/recap/RecapFeatures.scala
+++ /dev/null
@@ -1,967 +0,0 @@
-package com.twitter.timelines.prediction.features.recap
-
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
-import com.twitter.ml.api.Feature.Binary
-import com.twitter.ml.api.Feature.Continuous
-import com.twitter.ml.api.Feature.Discrete
-import com.twitter.ml.api.Feature.SparseBinary
-import com.twitter.ml.api.Feature.Text
-import scala.collection.JavaConverters._
-
-object RecapFeatures extends RecapFeatures("")
-object InReplyToRecapFeatures extends RecapFeatures("in_reply_to_tweet")
-
-class RecapFeatures(prefix: String) {
-  private def name(featureName: String): String = {
-    if (prefix.nonEmpty) {
-      s"$prefix.$featureName"
-    } else {
-      featureName
-    }
-  }
-
-  val IS_IPAD_CLIENT = new Binary(name("recap.client.is_ipad"), Set(ClientType).asJava)
-  val IS_WEB_CLIENT = new Binary(name("recap.client.is_web"), Set(ClientType).asJava)
-  val IS_IPHONE_CLIENT = new Binary(name("recap.client.is_phone"), Set(ClientType).asJava)
-  val IS_ANDROID_CLIENT = new Binary(name("recap.client.is_android"), Set(ClientType).asJava)
-  val IS_ANDROID_TABLET_CLIENT =
-    new Binary(name("recap.client.is_android_tablet"), Set(ClientType).asJava)
-
-  // features from userAgent
-  val CLIENT_NAME = new Text(name("recap.user_agent.client_name"), Set(ClientType).asJava)
-  val CLIENT_SOURCE = new Discrete(name("recap.user_agent.client_source"), Set(ClientType).asJava)
-  val CLIENT_VERSION = new Text(name("recap.user_agent.client_version"), Set(ClientVersion).asJava)
-  val CLIENT_VERSION_CODE =
-    new Text(name("recap.user_agent.client_version_code"), Set(ClientVersion).asJava)
-  val DEVICE = new Text(name("recap.user_agent.device"), Set(DeviceType).asJava)
-  val FROM_DOG_FOOD = new Binary(name("recap.meta.from_dog_food"), Set(UserAgent).asJava)
-  val FROM_TWITTER_CLIENT =
-    new Binary(name("recap.user_agent.from_twitter_client"), Set(UserAgent).asJava)
-  val MANUFACTURER = new Text(name("recap.user_agent.manufacturer"), Set(UserAgent).asJava)
-  val MODEL = new Text(name("recap.user_agent.model"), Set(UserAgent).asJava)
-  val NETWORK_CONNECTION =
-    new Discrete(name("recap.user_agent.network_connection"), Set(UserAgent).asJava)
-  val SDK_VERSION = new Text(name("recap.user_agent.sdk_version"), Set(AppId, UserAgent).asJava)
-
-  // engagement
-  val IS_RETWEETED = new Binary(
-    name("recap.engagement.is_retweeted"),
-    Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_FAVORITED = new Binary(
-    name("recap.engagement.is_favorited"),
-    Set(PublicLikes, PrivateLikes, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_REPLIED = new Binary(
-    name("recap.engagement.is_replied"),
-    Set(PublicReplies, PrivateReplies, EngagementsPrivate, EngagementsPublic).asJava)
-  // v1: post click engagements: fav, reply
-  val IS_GOOD_CLICKED_CONVO_DESC_V1 = new Binary(
-    name("recap.engagement.is_good_clicked_convo_desc_favorited_or_replied"),
-    Set(
-      PublicLikes,
-      PrivateLikes,
-      PublicReplies,
-      PrivateReplies,
-      EngagementsPrivate,
-      EngagementsPublic).asJava)
-  // v2: post click engagements: click
-  val IS_GOOD_CLICKED_CONVO_DESC_V2 = new Binary(
-    name("recap.engagement.is_good_clicked_convo_desc_v2"),
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-
-  val IS_GOOD_CLICKED_CONVO_DESC_FAVORITED = new Binary(
-    name("recap.engagement.is_good_clicked_convo_desc_favorited"),
-    Set(PublicLikes, PrivateLikes, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_REPLIED = new Binary(
-    name("recap.engagement.is_good_clicked_convo_desc_replied"),
-    Set(PublicReplies, PrivateReplies, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_RETWEETED = new Binary(
-    name("recap.engagement.is_good_clicked_convo_desc_retweeted"),
-    Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_CLICKED = new Binary(
-    name("recap.engagement.is_good_clicked_convo_desc_clicked"),
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_FOLLOWED = new Binary(
-    name("recap.engagement.is_good_clicked_convo_desc_followed"),
-    Set(EngagementsPrivate).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_SHARE_DM_CLICKED = new Binary(
-    name("recap.engagement.is_good_clicked_convo_desc_share_dm_clicked"),
-    Set(EngagementsPrivate).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_PROFILE_CLICKED = new Binary(
-    name("recap.engagement.is_good_clicked_convo_desc_profile_clicked"),
-    Set(EngagementsPrivate).asJava)
-
-  val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_0 = new Binary(
-    name("recap.engagement.is_good_clicked_convo_desc_uam_gt_0"),
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_1 = new Binary(
-    name("recap.engagement.is_good_clicked_convo_desc_uam_gt_1"),
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_2 = new Binary(
-    name("recap.engagement.is_good_clicked_convo_desc_uam_gt_2"),
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_3 = new Binary(
-    name("recap.engagement.is_good_clicked_convo_desc_uam_gt_3"),
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-
-  val IS_TWEET_DETAIL_DWELLED = new Binary(
-    name("recap.engagement.is_tweet_detail_dwelled"),
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_TWEET_DETAIL_DWELLED_8_SEC = new Binary(
-    name("recap.engagement.is_tweet_detail_dwelled_8_sec"),
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_TWEET_DETAIL_DWELLED_15_SEC = new Binary(
-    name("recap.engagement.is_tweet_detail_dwelled_15_sec"),
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_TWEET_DETAIL_DWELLED_25_SEC = new Binary(
-    name("recap.engagement.is_tweet_detail_dwelled_25_sec"),
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_TWEET_DETAIL_DWELLED_30_SEC = new Binary(
-    name("recap.engagement.is_tweet_detail_dwelled_30_sec"),
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-
-  val IS_PROFILE_DWELLED = new Binary(
-    "recap.engagement.is_profile_dwelled",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_PROFILE_DWELLED_10_SEC = new Binary(
-    "recap.engagement.is_profile_dwelled_10_sec",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_PROFILE_DWELLED_20_SEC = new Binary(
-    "recap.engagement.is_profile_dwelled_20_sec",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_PROFILE_DWELLED_30_SEC = new Binary(
-    "recap.engagement.is_profile_dwelled_30_sec",
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-
-  val IS_FULLSCREEN_VIDEO_DWELLED = new Binary(
-    "recap.engagement.is_fullscreen_video_dwelled",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_FULLSCREEN_VIDEO_DWELLED_5_SEC = new Binary(
-    "recap.engagement.is_fullscreen_video_dwelled_5_sec",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_FULLSCREEN_VIDEO_DWELLED_10_SEC = new Binary(
-    "recap.engagement.is_fullscreen_video_dwelled_10_sec",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_FULLSCREEN_VIDEO_DWELLED_20_SEC = new Binary(
-    "recap.engagement.is_fullscreen_video_dwelled_20_sec",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_FULLSCREEN_VIDEO_DWELLED_30_SEC = new Binary(
-    "recap.engagement.is_fullscreen_video_dwelled_30_sec",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_LINK_DWELLED_15_SEC = new Binary(
-    "recap.engagement.is_link_dwelled_15_sec",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_LINK_DWELLED_30_SEC = new Binary(
-    "recap.engagement.is_link_dwelled_30_sec",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_LINK_DWELLED_60_SEC = new Binary(
-    "recap.engagement.is_link_dwelled_60_sec",
-    Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
-
-  val IS_QUOTED = new Binary(
-    name("recap.engagement.is_quoted"),
-    Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_RETWEETED_WITHOUT_QUOTE = new Binary(
-    name("recap.engagement.is_retweeted_without_quote"),
-    Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_CLICKED =
-    new Binary(name("recap.engagement.is_clicked"), Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_DWELLED = new Binary(name("recap.engagement.is_dwelled"), Set(EngagementsPrivate).asJava)
-  val IS_DWELLED_IN_BOUNDS_V1 =
-    new Binary(name("recap.engagement.is_dwelled_in_bounds_v1"), Set(EngagementsPrivate).asJava)
-  val DWELL_NORMALIZED_OVERALL = new Continuous(
-    name("recap.engagement.dwell_normalized_overall"),
-    Set(EngagementsPrivate).asJava)
-  val DWELL_CDF_OVERALL =
-    new Continuous(name("recap.engagement.dwell_cdf_overall"), Set(EngagementsPrivate).asJava)
-  val DWELL_CDF = new Continuous(name("recap.engagement.dwell_cdf"), Set(EngagementsPrivate).asJava)
-
-  val IS_DWELLED_1S =
-    new Binary(name("recap.engagement.is_dwelled_1s"), Set(EngagementsPrivate).asJava)
-  val IS_DWELLED_2S =
-    new Binary(name("recap.engagement.is_dwelled_2s"), Set(EngagementsPrivate).asJava)
-  val IS_DWELLED_3S =
-    new Binary(name("recap.engagement.is_dwelled_3s"), Set(EngagementsPrivate).asJava)
-  val IS_DWELLED_4S =
-    new Binary(name("recap.engagement.is_dwelled_4s"), Set(EngagementsPrivate).asJava)
-  val IS_DWELLED_5S =
-    new Binary(name("recap.engagement.is_dwelled_5s"), Set(EngagementsPrivate).asJava)
-  val IS_DWELLED_6S =
-    new Binary(name("recap.engagement.is_dwelled_6s"), Set(EngagementsPrivate).asJava)
-  val IS_DWELLED_7S =
-    new Binary(name("recap.engagement.is_dwelled_7s"), Set(EngagementsPrivate).asJava)
-  val IS_DWELLED_8S =
-    new Binary(name("recap.engagement.is_dwelled_8s"), Set(EngagementsPrivate).asJava)
-  val IS_DWELLED_9S =
-    new Binary(name("recap.engagement.is_dwelled_9s"), Set(EngagementsPrivate).asJava)
-  val IS_DWELLED_10S =
-    new Binary(name("recap.engagement.is_dwelled_10s"), Set(EngagementsPrivate).asJava)
-
-  val IS_SKIPPED_1S =
-    new Binary(name("recap.engagement.is_skipped_1s"), Set(EngagementsPrivate).asJava)
-  val IS_SKIPPED_2S =
-    new Binary(name("recap.engagement.is_skipped_2s"), Set(EngagementsPrivate).asJava)
-  val IS_SKIPPED_3S =
-    new Binary(name("recap.engagement.is_skipped_3s"), Set(EngagementsPrivate).asJava)
-  val IS_SKIPPED_4S =
-    new Binary(name("recap.engagement.is_skipped_4s"), Set(EngagementsPrivate).asJava)
-  val IS_SKIPPED_5S =
-    new Binary(name("recap.engagement.is_skipped_5s"), Set(EngagementsPrivate).asJava)
-  val IS_SKIPPED_6S =
-    new Binary(name("recap.engagement.is_skipped_6s"), Set(EngagementsPrivate).asJava)
-  val IS_SKIPPED_7S =
-    new Binary(name("recap.engagement.is_skipped_7s"), Set(EngagementsPrivate).asJava)
-  val IS_SKIPPED_8S =
-    new Binary(name("recap.engagement.is_skipped_8s"), Set(EngagementsPrivate).asJava)
-  val IS_SKIPPED_9S =
-    new Binary(name("recap.engagement.is_skipped_9s"), Set(EngagementsPrivate).asJava)
-  val IS_SKIPPED_10S =
-    new Binary(name("recap.engagement.is_skipped_10s"), Set(EngagementsPrivate).asJava)
-
-  val IS_IMPRESSED =
-    new Binary(name("recap.engagement.is_impressed"), Set(EngagementsPrivate).asJava)
-  val IS_FOLLOWED =
-    new Binary("recap.engagement.is_followed", Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_PROFILE_CLICKED = new Binary(
-    name("recap.engagement.is_profile_clicked"),
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_OPEN_LINKED = new Binary(
-    name("recap.engagement.is_open_linked"),
-    Set(EngagementsPrivate, LinksClickedOn).asJava)
-  val IS_PHOTO_EXPANDED =
-    new Binary(name("recap.engagement.is_photo_expanded"), Set(EngagementsPrivate).asJava)
-  val IS_VIDEO_VIEWED =
-    new Binary(name("recap.engagement.is_video_viewed"), Set(EngagementsPrivate).asJava)
-  val IS_VIDEO_PLAYBACK_START =
-    new Binary(name("recap.engagement.is_video_playback_start"), Set(EngagementsPrivate).asJava)
-  val IS_VIDEO_PLAYBACK_25 =
-    new Binary(name("recap.engagement.is_video_playback_25"), Set(EngagementsPrivate).asJava)
-  val IS_VIDEO_PLAYBACK_50 =
-    new Binary(name("recap.engagement.is_video_playback_50"), Set(EngagementsPrivate).asJava)
-  val IS_VIDEO_PLAYBACK_75 =
-    new Binary(name("recap.engagement.is_video_playback_75"), Set(EngagementsPrivate).asJava)
-  val IS_VIDEO_PLAYBACK_95 =
-    new Binary(name("recap.engagement.is_video_playback_95"), Set(EngagementsPrivate).asJava)
-  val IS_VIDEO_PLAYBACK_COMPLETE =
-    new Binary(name("recap.engagement.is_video_playback_complete"), Set(EngagementsPrivate).asJava)
-  val IS_VIDEO_VIEWED_AND_PLAYBACK_50 = new Binary(
-    name("recap.engagement.is_video_viewed_and_playback_50"),
-    Set(EngagementsPrivate).asJava)
-  val IS_VIDEO_QUALITY_VIEWED = new Binary(
-    name("recap.engagement.is_video_quality_viewed"),
-    Set(EngagementsPrivate).asJava
-  ) 
-  val IS_TWEET_SHARE_DM_CLICKED =
-    new Binary(name("recap.engagement.is_tweet_share_dm_clicked"), Set(EngagementsPrivate).asJava)
-  val IS_TWEET_SHARE_DM_SENT =
-    new Binary(name("recap.engagement.is_tweet_share_dm_sent"), Set(EngagementsPrivate).asJava)
-  val IS_BOOKMARKED =
-    new Binary(name("recap.engagement.is_bookmarked"), Set(EngagementsPrivate).asJava)
-  val IS_SHARED =
-    new Binary(name("recap.engagement.is_shared"), Set(EngagementsPrivate).asJava)
-  val IS_SHARE_MENU_CLICKED =
-    new Binary(name("recap.engagement.is_share_menu_clicked"), Set(EngagementsPrivate).asJava)
-
-  // Negative engagements
-  val IS_DONT_LIKE =
-    new Binary(name("recap.engagement.is_dont_like"), Set(EngagementsPrivate).asJava)
-  val IS_BLOCK_CLICKED = new Binary(
-    name("recap.engagement.is_block_clicked"),
-    Set(TweetsClicked, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_BLOCK_DIALOG_BLOCKED = new Binary(
-    name("recap.engagement.is_block_dialog_blocked"),
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_MUTE_CLICKED = new Binary(
-    name("recap.engagement.is_mute_clicked"),
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_MUTE_DIALOG_MUTED =
-    new Binary(name("recap.engagement.is_mute_dialog_muted"), Set(EngagementsPrivate).asJava)
-  val IS_REPORT_TWEET_CLICKED = new Binary(
-    name("recap.engagement.is_report_tweet_clicked"),
-    Set(TweetsClicked, EngagementsPrivate).asJava)
-  val IS_NEGATIVE_FEEDBACK =
-    new Binary("recap.engagement.is_negative_feedback", Set(EngagementsPrivate).asJava)
-  val IS_NOT_ABOUT_TOPIC =
-    new Binary(name("recap.engagement.is_not_about_topic"), Set(EngagementsPrivate).asJava)
-  val IS_NOT_RECENT =
-    new Binary(name("recap.engagement.is_not_recent"), Set(EngagementsPrivate).asJava)
-  val IS_NOT_RELEVANT =
-    new Binary(name("recap.engagement.is_not_relevant"), Set(EngagementsPrivate).asJava)
-  val IS_SEE_FEWER =
-    new Binary(name("recap.engagement.is_see_fewer"), Set(EngagementsPrivate).asJava)
-  val IS_TOPIC_SPEC_NEG_ENGAGEMENT =
-    new Binary("recap.engagement.is_topic_spec_neg_engagement", Set(EngagementsPrivate).asJava)
-  val IS_UNFOLLOW_TOPIC =
-    new Binary("recap.engagement.is_unfollow_topic", Set(EngagementsPrivate).asJava)
-  val IS_UNFOLLOW_TOPIC_EXPLICIT_POSITIVE_LABEL =
-    new Binary(
-      "recap.engagement.is_unfollow_topic_explicit_positive_label",
-      Set(EngagementsPrivate).asJava)
-  val IS_UNFOLLOW_TOPIC_IMPLICIT_POSITIVE_LABEL =
-    new Binary(
-      "recap.engagement.is_unfollow_topic_implicit_positive_label",
-      Set(EngagementsPrivate).asJava)
-  val IS_UNFOLLOW_TOPIC_STRONG_EXPLICIT_NEGATIVE_LABEL =
-    new Binary(
-      "recap.engagement.is_unfollow_topic_strong_explicit_negative_label",
-      Set(EngagementsPrivate).asJava)
-  val IS_UNFOLLOW_TOPIC_EXPLICIT_NEGATIVE_LABEL =
-    new Binary(
-      "recap.engagement.is_unfollow_topic_explicit_negative_label",
-      Set(EngagementsPrivate).asJava)
-  val IS_NOT_INTERESTED_IN =
-    new Binary("recap.engagement.is_not_interested_in", Set(EngagementsPrivate).asJava)
-  val IS_NOT_INTERESTED_IN_EXPLICIT_POSITIVE_LABEL =
-    new Binary(
-      "recap.engagement.is_not_interested_in_explicit_positive_label",
-      Set(EngagementsPrivate).asJava)
-  val IS_NOT_INTERESTED_IN_EXPLICIT_NEGATIVE_LABEL =
-    new Binary(
-      "recap.engagement.is_not_interested_in_explicit_negative_label",
-      Set(EngagementsPrivate).asJava)
-  val IS_CARET_CLICKED =
-    new Binary(name("recap.engagement.is_caret_clicked"), Set(EngagementsPrivate).asJava)
-  val IS_FOLLOW_TOPIC =
-    new Binary("recap.engagement.is_follow_topic", Set(EngagementsPrivate).asJava)
-  val IS_NOT_INTERESTED_IN_TOPIC =
-    new Binary("recap.engagement.is_not_interested_in_topic", Set(EngagementsPrivate).asJava)
-  val IS_HOME_LATEST_VISITED =
-    new Binary(name("recap.engagement.is_home_latest_visited"), Set(EngagementsPrivate).asJava)
-
-  // Relevance prompt tweet engagements
-  val IS_RELEVANCE_PROMPT_YES_CLICKED = new Binary(
-    name("recap.engagement.is_relevance_prompt_yes_clicked"),
-    Set(EngagementsPrivate).asJava)
-  val IS_RELEVANCE_PROMPT_NO_CLICKED = new Binary(
-    name("recap.engagement.is_relevance_prompt_no_clicked"),
-    Set(EngagementsPrivate).asJava)
-  val IS_RELEVANCE_PROMPT_IMPRESSED = new Binary(
-    name("recap.engagement.is_relevance_prompt_impressed"),
-    Set(EngagementsPrivate).asJava)
-
-  // Reciprocal engagements for reply forward engagement
-  val IS_REPLIED_REPLY_IMPRESSED_BY_AUTHOR = new Binary(
-    name("recap.engagement.is_replied_reply_impressed_by_author"),
-    Set(EngagementsPrivate).asJava)
-  val IS_REPLIED_REPLY_FAVORITED_BY_AUTHOR = new Binary(
-    name("recap.engagement.is_replied_reply_favorited_by_author"),
-    Set(EngagementsPrivate, EngagementsPublic, PrivateLikes, PublicLikes).asJava)
-  val IS_REPLIED_REPLY_QUOTED_BY_AUTHOR = new Binary(
-    name("recap.engagement.is_replied_reply_quoted_by_author"),
-    Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava)
-  val IS_REPLIED_REPLY_REPLIED_BY_AUTHOR = new Binary(
-    name("recap.engagement.is_replied_reply_replied_by_author"),
-    Set(EngagementsPrivate, EngagementsPublic, PrivateReplies, PublicReplies).asJava)
-  val IS_REPLIED_REPLY_RETWEETED_BY_AUTHOR = new Binary(
-    name("recap.engagement.is_replied_reply_retweeted_by_author"),
-    Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava)
-  val IS_REPLIED_REPLY_BLOCKED_BY_AUTHOR = new Binary(
-    name("recap.engagement.is_replied_reply_blocked_by_author"),
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_REPLIED_REPLY_FOLLOWED_BY_AUTHOR = new Binary(
-    name("recap.engagement.is_replied_reply_followed_by_author"),
-    Set(EngagementsPrivate, EngagementsPublic, Follow).asJava)
-  val IS_REPLIED_REPLY_UNFOLLOWED_BY_AUTHOR = new Binary(
-    name("recap.engagement.is_replied_reply_unfollowed_by_author"),
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_REPLIED_REPLY_MUTED_BY_AUTHOR = new Binary(
-    name("recap.engagement.is_replied_reply_muted_by_author"),
-    Set(EngagementsPrivate).asJava)
-  val IS_REPLIED_REPLY_REPORTED_BY_AUTHOR = new Binary(
-    name("recap.engagement.is_replied_reply_reported_by_author"),
-    Set(EngagementsPrivate).asJava)
-
-  // This derived label is the logical OR of REPLY_REPLIED, REPLY_FAVORITED, REPLY_RETWEETED
-  val IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR = new Binary(
-    name("recap.engagement.is_replied_reply_engaged_by_author"),
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-
-  // Reciprocal engagements for fav forward engagement
-  val IS_FAVORITED_FAV_FAVORITED_BY_AUTHOR = new Binary(
-    name("recap.engagement.is_favorited_fav_favorited_by_author"),
-    Set(EngagementsPrivate, EngagementsPublic, PrivateLikes, PublicLikes).asJava
-  )
-  val IS_FAVORITED_FAV_REPLIED_BY_AUTHOR = new Binary(
-    name("recap.engagement.is_favorited_fav_replied_by_author"),
-    Set(EngagementsPrivate, EngagementsPublic, PrivateReplies, PublicReplies).asJava
-  )
-  val IS_FAVORITED_FAV_RETWEETED_BY_AUTHOR = new Binary(
-    name("recap.engagement.is_favorited_fav_retweeted_by_author"),
-    Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava
-  )
-  val IS_FAVORITED_FAV_FOLLOWED_BY_AUTHOR = new Binary(
-    name("recap.engagement.is_favorited_fav_followed_by_author"),
-    Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava
-  )
-  // This derived label is the logical OR of FAV_REPLIED, FAV_FAVORITED, FAV_RETWEETED, FAV_FOLLOWED
-  val IS_FAVORITED_FAV_ENGAGED_BY_AUTHOR = new Binary(
-    name("recap.engagement.is_favorited_fav_engaged_by_author"),
-    Set(EngagementsPrivate, EngagementsPublic).asJava)
-
-  // define good profile click by considering following engagements (follow, fav, reply, retweet, etc.) at profile page
-  val IS_PROFILE_CLICKED_AND_PROFILE_FOLLOW = new Binary(
-    name("recap.engagement.is_profile_clicked_and_profile_follow"),
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, Follow).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_FAV = new Binary(
-    name("recap.engagement.is_profile_clicked_and_profile_fav"),
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, PrivateLikes, PublicLikes).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_REPLY = new Binary(
-    name("recap.engagement.is_profile_clicked_and_profile_reply"),
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, PrivateReplies, PublicReplies).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_RETWEET = new Binary(
-    name("recap.engagement.is_profile_clicked_and_profile_retweet"),
-    Set(
-      ProfilesViewed,
-      ProfilesClicked,
-      EngagementsPrivate,
-      PrivateRetweets,
-      PublicRetweets).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_TWEET_CLICK = new Binary(
-    name("recap.engagement.is_profile_clicked_and_profile_tweet_click"),
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, TweetsClicked).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_SHARE_DM_CLICK = new Binary(
-    name("recap.engagement.is_profile_clicked_and_profile_share_dm_click"),
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  // This derived label is the union of all binary features above
-  val IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED = new Binary(
-    name("recap.engagement.is_profile_clicked_and_profile_engaged"),
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, EngagementsPublic).asJava)
-
-  // define bad profile click by considering following engagements (user report, tweet report, mute, block, etc) at profile page
-  val IS_PROFILE_CLICKED_AND_PROFILE_USER_REPORT_CLICK = new Binary(
-    name("recap.engagement.is_profile_clicked_and_profile_user_report_click"),
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_TWEET_REPORT_CLICK = new Binary(
-    name("recap.engagement.is_profile_clicked_and_profile_tweet_report_click"),
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_MUTE = new Binary(
-    name("recap.engagement.is_profile_clicked_and_profile_mute"),
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_PROFILE_CLICKED_AND_PROFILE_BLOCK = new Binary(
-    name("recap.engagement.is_profile_clicked_and_profile_block"),
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  // This derived label is the union of bad profile click engagements and existing negative feedback
-  val IS_NEGATIVE_FEEDBACK_V2 = new Binary(
-    name("recap.engagement.is_negative_feedback_v2"),
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_STRONG_NEGATIVE_FEEDBACK = new Binary(
-    name("recap.engagement.is_strong_negative_feedback"),
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  val IS_WEAK_NEGATIVE_FEEDBACK = new Binary(
-    name("recap.engagement.is_weak_negative_feedback"),
-    Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
-  // engagement for following user from any surface area
-  val IS_FOLLOWED_FROM_ANY_SURFACE_AREA = new Binary(
-    "recap.engagement.is_followed_from_any_surface_area",
-    Set(EngagementsPublic, EngagementsPrivate).asJava)
-
-  // Reply downvote engagements
-  val IS_REPLY_DOWNVOTED =
-    new Binary(name("recap.engagement.is_reply_downvoted"), Set(EngagementsPrivate).asJava)
-  val IS_REPLY_DOWNVOTE_REMOVED =
-    new Binary(name("recap.engagement.is_reply_downvote_removed"), Set(EngagementsPrivate).asJava)
-
-  // Other engagements
-  val IS_GOOD_OPEN_LINK = new Binary(
-    name("recap.engagement.is_good_open_link"),
-    Set(EngagementsPrivate, LinksClickedOn).asJava)
-  val IS_ENGAGED = new Binary(
-    name("recap.engagement.any"),
-    Set(EngagementsPrivate, EngagementsPublic).asJava
-  ) // Deprecated - to be removed shortly
-  val IS_EARLYBIRD_UNIFIED_ENGAGEMENT = new Binary(
-    name("recap.engagement.is_unified_engagement"),
-    Set(EngagementsPrivate, EngagementsPublic).asJava
-  ) // A subset of IS_ENGAGED specifically intended for use in earlybird models
-
-  // features from ThriftTweetFeatures
-  val PREV_USER_TWEET_ENGAGEMENT = new Continuous(
-    name("recap.tweetfeature.prev_user_tweet_enagagement"),
-    Set(EngagementScore, EngagementsPrivate, EngagementsPublic).asJava)
-  val IS_SENSITIVE = new Binary(name("recap.tweetfeature.is_sensitive"))
-  val HAS_MULTIPLE_MEDIA = new Binary(
-    name("recap.tweetfeature.has_multiple_media"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val IS_AUTHOR_PROFILE_EGG = new Binary(name("recap.tweetfeature.is_author_profile_egg"))
-  val IS_AUTHOR_NEW =
-    new Binary(name("recap.tweetfeature.is_author_new"), Set(UserState, UserType).asJava)
-  val NUM_MENTIONS = new Continuous(
-    name("recap.tweetfeature.num_mentions"),
-    Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava)
-  val HAS_MENTION = new Binary(name("recap.tweetfeature.has_mention"), Set(UserVisibleFlag).asJava)
-  val NUM_HASHTAGS = new Continuous(
-    name("recap.tweetfeature.num_hashtags"),
-    Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava)
-  val HAS_HASHTAG = new Binary(
-    name("recap.tweetfeature.has_hashtag"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val LINK_LANGUAGE = new Continuous(
-    name("recap.tweetfeature.link_language"),
-    Set(ProvidedLanguage, InferredLanguage).asJava)
-  val IS_AUTHOR_NSFW =
-    new Binary(name("recap.tweetfeature.is_author_nsfw"), Set(UserSafetyLabels, UserType).asJava)
-  val IS_AUTHOR_SPAM =
-    new Binary(name("recap.tweetfeature.is_author_spam"), Set(UserSafetyLabels, UserType).asJava)
-  val IS_AUTHOR_BOT =
-    new Binary(name("recap.tweetfeature.is_author_bot"), Set(UserSafetyLabels, UserType).asJava)
-  val SIGNATURE =
-    new Discrete(name("recap.tweetfeature.signature"), Set(DigitalSignatureNonrepudiation).asJava)
-  val LANGUAGE = new Discrete(
-    name("recap.tweetfeature.language"),
-    Set(ProvidedLanguage, InferredLanguage).asJava)
-  val FROM_INACTIVE_USER =
-    new Binary(name("recap.tweetfeature.from_inactive_user"), Set(UserActiveFlag).asJava)
-  val PROBABLY_FROM_FOLLOWED_AUTHOR = new Binary(name("recap.v3.tweetfeature.probably_from_follow"))
-  val FROM_MUTUAL_FOLLOW = new Binary(name("recap.tweetfeature.from_mutual_follow"))
-  val USER_REP = new Continuous(name("recap.tweetfeature.user_rep"))
-  val FROM_VERIFIED_ACCOUNT =
-    new Binary(name("recap.tweetfeature.from_verified_account"), Set(UserVerifiedFlag).asJava)
-  val IS_BUSINESS_SCORE = new Continuous(name("recap.tweetfeature.is_business_score"))
-  val HAS_CONSUMER_VIDEO = new Binary(
-    name("recap.tweetfeature.has_consumer_video"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_PRO_VIDEO = new Binary(
-    name("recap.tweetfeature.has_pro_video"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_VINE = new Binary(
-    name("recap.tweetfeature.has_vine"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_PERISCOPE = new Binary(
-    name("recap.tweetfeature.has_periscope"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_NATIVE_VIDEO = new Binary(
-    name("recap.tweetfeature.has_native_video"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_NATIVE_IMAGE = new Binary(
-    name("recap.tweetfeature.has_native_image"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_CARD = new Binary(
-    name("recap.tweetfeature.has_card"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_IMAGE = new Binary(
-    name("recap.tweetfeature.has_image"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_NEWS = new Binary(
-    name("recap.tweetfeature.has_news"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_VIDEO = new Binary(
-    name("recap.tweetfeature.has_video"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_VISIBLE_LINK = new Binary(
-    name("recap.tweetfeature.has_visible_link"),
-    Set(UrlFoundFlag, PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val LINK_COUNT = new Continuous(
-    name("recap.tweetfeature.link_count"),
-    Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava)
-  val HAS_LINK = new Binary(
-    name("recap.tweetfeature.has_link"),
-    Set(UrlFoundFlag, PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val IS_OFFENSIVE = new Binary(name("recap.tweetfeature.is_offensive"))
-  val HAS_TREND = new Binary(
-    name("recap.tweetfeature.has_trend"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val HAS_MULTIPLE_HASHTAGS_OR_TRENDS = new Binary(
-    name("recap.tweetfeature.has_multiple_hashtag_or_trend"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val URL_DOMAINS = new SparseBinary(
-    name("recap.tweetfeature.url_domains"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val CONTAINS_MEDIA = new Binary(
-    name("recap.tweetfeature.contains_media"),
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val RETWEET_SEARCHER = new Binary(name("recap.tweetfeature.retweet_searcher"))
-  val REPLY_SEARCHER = new Binary(name("recap.tweetfeature.reply_searcher"))
-  val MENTION_SEARCHER =
-    new Binary(name("recap.tweetfeature.mention_searcher"), Set(UserVisibleFlag).asJava)
-  val REPLY_OTHER =
-    new Binary(name("recap.tweetfeature.reply_other"), Set(PublicReplies, PrivateReplies).asJava)
-  val RETWEET_OTHER = new Binary(
-    name("recap.tweetfeature.retweet_other"),
-    Set(PublicRetweets, PrivateRetweets).asJava)
-  val IS_REPLY =
-    new Binary(name("recap.tweetfeature.is_reply"), Set(PublicReplies, PrivateReplies).asJava)
-  val IS_RETWEET =
-    new Binary(name("recap.tweetfeature.is_retweet"), Set(PublicRetweets, PrivateRetweets).asJava)
-  val IS_EXTENDED_REPLY = new Binary(
-    name("recap.tweetfeature.is_extended_reply"),
-    Set(PublicReplies, PrivateReplies).asJava)
-  val MATCH_UI_LANG = new Binary(
-    name("recap.tweetfeature.match_ui_lang"),
-    Set(ProvidedLanguage, InferredLanguage).asJava)
-  val MATCH_SEARCHER_MAIN_LANG = new Binary(
-    name("recap.tweetfeature.match_searcher_main_lang"),
-    Set(ProvidedLanguage, InferredLanguage).asJava)
-  val MATCH_SEARCHER_LANGS = new Binary(
-    name("recap.tweetfeature.match_searcher_langs"),
-    Set(ProvidedLanguage, InferredLanguage).asJava)
-  val BIDIRECTIONAL_REPLY_COUNT = new Continuous(
-    name("recap.tweetfeature.bidirectional_reply_count"),
-    Set(CountOfPrivateReplies, CountOfPublicReplies).asJava)
-  val UNIDIRECTIONAL_REPLY_COUNT = new Continuous(
-    name("recap.tweetfeature.unidirectional_reply_count"),
-    Set(CountOfPrivateReplies, CountOfPublicReplies).asJava)
-  val BIDIRECTIONAL_RETWEET_COUNT = new Continuous(
-    name("recap.tweetfeature.bidirectional_retweet_count"),
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
-  val UNIDIRECTIONAL_RETWEET_COUNT = new Continuous(
-    name("recap.tweetfeature.unidirectional_retweet_count"),
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
-  val BIDIRECTIONAL_FAV_COUNT = new Continuous(
-    name("recap.tweetfeature.bidirectional_fav_count"),
-    Set(CountOfPrivateLikes, CountOfPublicLikes).asJava)
-  val UNIDIRECTIONAL_FAV_COUNT = new Continuous(
-    name("recap.tweetfeature.unidirectiona_fav_count"),
-    Set(CountOfPrivateLikes, CountOfPublicLikes).asJava)
-  val CONVERSATIONAL_COUNT = new Continuous(
-    name("recap.tweetfeature.conversational_count"),
-    Set(CountOfPrivateTweets, CountOfPublicTweets).asJava)
-  // tweet impressions on an embedded tweet
-  val EMBEDS_IMPRESSION_COUNT = new Continuous(
-    name("recap.tweetfeature.embeds_impression_count"),
-    Set(CountOfImpression).asJava)
-  // number of URLs that embed the tweet
-  val EMBEDS_URL_COUNT = new Continuous(
-    name("recap.tweetfeature.embeds_url_count"),
-    Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava)
-  // currently only counts views on Snappy and Amplify pro videos. Counts for other videos forthcoming
-  val VIDEO_VIEW_COUNT = new Continuous(
-    name("recap.tweetfeature.video_view_count"),
-    Set(
-      CountOfTweetEntitiesClicked,
-      CountOfPrivateTweetEntitiesAndMetadata,
-      CountOfPublicTweetEntitiesAndMetadata,
-      EngagementsPrivate,
-      EngagementsPublic).asJava
-  )
-  val TWEET_COUNT_FROM_USER_IN_SNAPSHOT = new Continuous(
-    name("recap.tweetfeature.tweet_count_from_user_in_snapshot"),
-    Set(CountOfPrivateTweets, CountOfPublicTweets).asJava)
-  val NORMALIZED_PARUS_SCORE =
-    new Continuous("recap.tweetfeature.normalized_parus_score", Set(EngagementScore).asJava)
-  val PARUS_SCORE = new Continuous("recap.tweetfeature.parus_score", Set(EngagementScore).asJava)
-  val REAL_GRAPH_WEIGHT =
-    new Continuous("recap.tweetfeature.real_graph_weight", Set(UsersRealGraphScore).asJava)
-  val SARUS_GRAPH_WEIGHT = new Continuous("recap.tweetfeature.sarus_graph_weight")
-  val TOPIC_SIM_SEARCHER_INTERSTED_IN_AUTHOR_KNOWN_FOR = new Continuous(
-    "recap.tweetfeature.topic_sim_searcher_interested_in_author_known_for")
-  val TOPIC_SIM_SEARCHER_AUTHOR_BOTH_INTERESTED_IN = new Continuous(
-    "recap.tweetfeature.topic_sim_searcher_author_both_interested_in")
-  val TOPIC_SIM_SEARCHER_AUTHOR_BOTH_KNOWN_FOR = new Continuous(
-    "recap.tweetfeature.topic_sim_searcher_author_both_known_for")
-  val TOPIC_SIM_SEARCHER_INTERESTED_IN_TWEET = new Continuous(
-    "recap.tweetfeature.topic_sim_searcher_interested_in_tweet")
-  val IS_RETWEETER_PROFILE_EGG =
-    new Binary(name("recap.v2.tweetfeature.is_retweeter_profile_egg"), Set(UserType).asJava)
-  val IS_RETWEETER_NEW =
-    new Binary(name("recap.v2.tweetfeature.is_retweeter_new"), Set(UserType, UserState).asJava)
-  val IS_RETWEETER_BOT =
-    new Binary(
-      name("recap.v2.tweetfeature.is_retweeter_bot"),
-      Set(UserType, UserSafetyLabels).asJava)
-  val IS_RETWEETER_NSFW =
-    new Binary(
-      name("recap.v2.tweetfeature.is_retweeter_nsfw"),
-      Set(UserType, UserSafetyLabels).asJava)
-  val IS_RETWEETER_SPAM =
-    new Binary(
-      name("recap.v2.tweetfeature.is_retweeter_spam"),
-      Set(UserType, UserSafetyLabels).asJava)
-  val RETWEET_OF_MUTUAL_FOLLOW = new Binary(
-    name("recap.v2.tweetfeature.retweet_of_mutual_follow"),
-    Set(PublicRetweets, PrivateRetweets).asJava)
-  val SOURCE_AUTHOR_REP = new Continuous(name("recap.v2.tweetfeature.source_author_rep"))
-  val IS_RETWEET_OF_REPLY = new Binary(
-    name("recap.v2.tweetfeature.is_retweet_of_reply"),
-    Set(PublicRetweets, PrivateRetweets).asJava)
-  val RETWEET_DIRECTED_AT_USER_IN_FIRST_DEGREE = new Binary(
-    name("recap.v2.tweetfeature.is_retweet_directed_at_user_in_first_degree"),
-    Set(PublicRetweets, PrivateRetweets, Follow).asJava)
-  val MENTIONED_SCREEN_NAMES = new SparseBinary(
-    "entities.users.mentioned_screen_names",
-    Set(DisplayName, UserVisibleFlag).asJava)
-  val MENTIONED_SCREEN_NAME = new Text(
-    "entities.users.mentioned_screen_names.member",
-    Set(DisplayName, UserVisibleFlag).asJava)
-  val HASHTAGS = new SparseBinary(
-    "entities.hashtags",
-    Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
-  val URL_SLUGS = new SparseBinary(name("recap.linkfeature.url_slugs"), Set(UrlFoundFlag).asJava)
-
-  // features from ThriftSearchResultMetadata
-  val REPLY_COUNT = new Continuous(
-    name("recap.searchfeature.reply_count"),
-    Set(CountOfPrivateReplies, CountOfPublicReplies).asJava)
-  val RETWEET_COUNT = new Continuous(
-    name("recap.searchfeature.retweet_count"),
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
-  val FAV_COUNT = new Continuous(
-    name("recap.searchfeature.fav_count"),
-    Set(CountOfPrivateLikes, CountOfPublicLikes).asJava)
-  val BLENDER_SCORE = new Continuous(name("recap.searchfeature.blender_score"))
-  val TEXT_SCORE = new Continuous(name("recap.searchfeature.text_score"))
-
-  // features related to content source
-  val SOURCE_TYPE = new Discrete(name("recap.source.type"))
-
-  // features from addressbook
-  // the author is in the user's email addressbook
-  val USER_TO_AUTHOR_EMAIL_REACHABLE =
-    new Binary(name("recap.addressbook.user_to_author_email_reachable"), Set(AddressBook).asJava)
-  // the author is in the user's phone addressbook
-  val USER_TO_AUTHOR_PHONE_REACHABLE =
-    new Binary(name("recap.addressbook.user_to_author_phone_reachable"), Set(AddressBook).asJava)
-  // the user is in the author's email addressbook
-  val AUTHOR_TO_USER_EMAIL_REACHABLE =
-    new Binary(name("recap.addressbook.author_to_user_email_reachable"), Set(AddressBook).asJava)
-  // the user is in the user's phone addressbook
-  val AUTHOR_TO_USER_PHONE_REACHABLE =
-    new Binary(name("recap.addressbook.author_to_user_phone_reachable"), Set(AddressBook).asJava)
-
-  // predicted engagement (these features are used by prediction service to return the predicted engagement probability)
-  // these should match the names in engagement_to_score_feature_mapping
-  val PREDICTED_IS_FAVORITED =
-    new Continuous(name("recap.engagement_predicted.is_favorited"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_RETWEETED =
-    new Continuous(name("recap.engagement_predicted.is_retweeted"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_QUOTED =
-    new Continuous(name("recap.engagement_predicted.is_quoted"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_REPLIED =
-    new Continuous(name("recap.engagement_predicted.is_replied"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_GOOD_OPEN_LINK = new Continuous(
-    name("recap.engagement_predicted.is_good_open_link"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_PROFILE_CLICKED = new Continuous(
-    name("recap.engagement_predicted.is_profile_clicked"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED = new Continuous(
-    name("recap.engagement_predicted.is_profile_clicked_and_profile_engaged"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_CLICKED =
-    new Continuous(name("recap.engagement_predicted.is_clicked"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_PHOTO_EXPANDED = new Continuous(
-    name("recap.engagement_predicted.is_photo_expanded"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_DONT_LIKE =
-    new Continuous(name("recap.engagement_predicted.is_dont_like"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_VIDEO_PLAYBACK_50 = new Continuous(
-    name("recap.engagement_predicted.is_video_playback_50"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_VIDEO_QUALITY_VIEWED = new Continuous(
-    name("recap.engagement_predicted.is_video_quality_viewed"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_BOOKMARKED =
-    new Continuous(name("recap.engagement_predicted.is_bookmarked"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_SHARED =
-    new Continuous(name("recap.engagement_predicted.is_shared"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_SHARE_MENU_CLICKED =
-    new Continuous(
-      name("recap.engagement_predicted.is_share_menu_clicked"),
-      Set(EngagementScore).asJava)
-  val PREDICTED_IS_PROFILE_DWELLED_20_SEC = new Continuous(
-    name("recap.engagement_predicted.is_profile_dwelled_20_sec"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_5_SEC = new Continuous(
-    name("recap.engagement_predicted.is_fullscreen_video_dwelled_5_sec"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_10_SEC = new Continuous(
-    name("recap.engagement_predicted.is_fullscreen_video_dwelled_10_sec"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_20_SEC = new Continuous(
-    name("recap.engagement_predicted.is_fullscreen_video_dwelled_20_sec"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_30_SEC = new Continuous(
-    name("recap.engagement_predicted.is_fullscreen_video_dwelled_30_sec"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_UNIFIED_ENGAGEMENT = new Continuous(
-    name("recap.engagement_predicted.is_unified_engagement"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_COMPOSE_TRIGGERED = new Continuous(
-    name("recap.engagement_predicted.is_compose_triggered"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_REPLIED_REPLY_IMPRESSED_BY_AUTHOR = new Continuous(
-    name("recap.engagement_predicted.is_replied_reply_impressed_by_author"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR = new Continuous(
-    name("recap.engagement_predicted.is_replied_reply_engaged_by_author"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_GOOD_CLICKED_V1 = new Continuous(
-    name("recap.engagement_predicted.is_good_clicked_convo_desc_favorited_or_replied"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_GOOD_CLICKED_V2 = new Continuous(
-    name("recap.engagement_predicted.is_good_clicked_convo_desc_v2"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_TWEET_DETAIL_DWELLED_8_SEC = new Continuous(
-    name("recap.engagement_predicted.is_tweet_detail_dwelled_8_sec"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_TWEET_DETAIL_DWELLED_15_SEC = new Continuous(
-    name("recap.engagement_predicted.is_tweet_detail_dwelled_15_sec"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_TWEET_DETAIL_DWELLED_25_SEC = new Continuous(
-    name("recap.engagement_predicted.is_tweet_detail_dwelled_25_sec"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_TWEET_DETAIL_DWELLED_30_SEC = new Continuous(
-    name("recap.engagement_predicted.is_tweet_detail_dwelled_30_sec"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_FAVORITED_FAV_ENGAGED_BY_AUTHOR = new Continuous(
-    name("recap.engagement_predicted.is_favorited_fav_engaged_by_author"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_GOOD_CLICKED_WITH_DWELL_SUM_GTE_60S = new Continuous(
-    name(
-      "recap.engagement_predicted.is_good_clicked_convo_desc_favorited_or_replied_or_dwell_sum_gte_60_secs"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_DWELLED_IN_BOUNDS_V1 = new Continuous(
-    name("recap.engagement_predicted.is_dwelled_in_bounds_v1"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_DWELL_NORMALIZED_OVERALL = new Continuous(
-    name("recap.engagement_predicted.dwell_normalized_overall"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_DWELL_CDF =
-    new Continuous(name("recap.engagement_predicted.dwell_cdf"), Set(EngagementScore).asJava)
-  val PREDICTED_DWELL_CDF_OVERALL = new Continuous(
-    name("recap.engagement_predicted.dwell_cdf_overall"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_DWELLED =
-    new Continuous(name("recap.engagement_predicted.is_dwelled"), Set(EngagementScore).asJava)
-
-  val PREDICTED_IS_DWELLED_1S =
-    new Continuous(name("recap.engagement_predicted.is_dwelled_1s"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_DWELLED_2S =
-    new Continuous(name("recap.engagement_predicted.is_dwelled_2s"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_DWELLED_3S =
-    new Continuous(name("recap.engagement_predicted.is_dwelled_3s"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_DWELLED_4S =
-    new Continuous(name("recap.engagement_predicted.is_dwelled_4s"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_DWELLED_5S =
-    new Continuous(name("recap.engagement_predicted.is_dwelled_5s"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_DWELLED_6S =
-    new Continuous(name("recap.engagement_predicted.is_dwelled_6s"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_DWELLED_7S =
-    new Continuous(name("recap.engagement_predicted.is_dwelled_7s"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_DWELLED_8S =
-    new Continuous(name("recap.engagement_predicted.is_dwelled_8s"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_DWELLED_9S =
-    new Continuous(name("recap.engagement_predicted.is_dwelled_9s"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_DWELLED_10S =
-    new Continuous(name("recap.engagement_predicted.is_dwelled_10s"), Set(EngagementScore).asJava)
-
-  val PREDICTED_IS_SKIPPED_1S =
-    new Continuous(name("recap.engagement_predicted.is_skipped_1s"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_SKIPPED_2S =
-    new Continuous(name("recap.engagement_predicted.is_skipped_2s"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_SKIPPED_3S =
-    new Continuous(name("recap.engagement_predicted.is_skipped_3s"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_SKIPPED_4S =
-    new Continuous(name("recap.engagement_predicted.is_skipped_4s"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_SKIPPED_5S =
-    new Continuous(name("recap.engagement_predicted.is_skipped_5s"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_SKIPPED_6S =
-    new Continuous(name("recap.engagement_predicted.is_skipped_6s"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_SKIPPED_7S =
-    new Continuous(name("recap.engagement_predicted.is_skipped_7s"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_SKIPPED_8S =
-    new Continuous(name("recap.engagement_predicted.is_skipped_8s"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_SKIPPED_9S =
-    new Continuous(name("recap.engagement_predicted.is_skipped_9s"), Set(EngagementScore).asJava)
-  val PREDICTED_IS_SKIPPED_10S =
-    new Continuous(name("recap.engagement_predicted.is_skipped_10s"), Set(EngagementScore).asJava)
-
-  val PREDICTED_IS_HOME_LATEST_VISITED = new Continuous(
-    name("recap.engagement_predicted.is_home_latest_visited"),
-    Set(EngagementScore).asJava)
-  val PREDICTED_IS_NEGATIVE_FEEDBACK =
-    new Continuous(
-      name("recap.engagement_predicted.is_negative_feedback"),
-      Set(EngagementScore).asJava)
-  val PREDICTED_IS_NEGATIVE_FEEDBACK_V2 =
-    new Continuous(
-      name("recap.engagement_predicted.is_negative_feedback_v2"),
-      Set(EngagementScore).asJava)
-  val PREDICTED_IS_WEAK_NEGATIVE_FEEDBACK =
-    new Continuous(
-      name("recap.engagement_predicted.is_weak_negative_feedback"),
-      Set(EngagementScore).asJava)
-  val PREDICTED_IS_STRONG_NEGATIVE_FEEDBACK =
-    new Continuous(
-      name("recap.engagement_predicted.is_strong_negative_feedback"),
-      Set(EngagementScore).asJava)
-  val PREDICTED_IS_REPORT_TWEET_CLICKED =
-    new Continuous(
-      name("recap.engagement_predicted.is_report_tweet_clicked"),
-      Set(EngagementScore).asJava)
-  val PREDICTED_IS_UNFOLLOW_TOPIC =
-    new Continuous(
-      name("recap.engagement_predicted.is_unfollow_topic"),
-      Set(EngagementScore).asJava)
-  val PREDICTED_IS_RELEVANCE_PROMPT_YES_CLICKED = new Continuous(
-    name("recap.engagement_predicted.is_relevance_prompt_yes_clicked"),
-    Set(EngagementScore).asJava)
-
-  // engagement for following user from any surface area
-  val PREDICTED_IS_FOLLOWED_FROM_ANY_SURFACE_AREA = new Continuous(
-    "recap.engagement_predicted.is_followed_from_any_surface_area",
-    Set(EngagementScore).asJava)
-
-  
-  // These are global engagement counts for the Tweets.
-  val FAV_COUNT_V2 = new Continuous(
-    name("recap.earlybird.fav_count_v2"),
-    Set(CountOfPrivateLikes, CountOfPublicLikes).asJava)
-  val RETWEET_COUNT_V2 = new Continuous(
-    name("recap.earlybird.retweet_count_v2"),
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
-  val REPLY_COUNT_V2 = new Continuous(
-    name("recap.earlybird.reply_count_v2"),
-    Set(CountOfPrivateReplies, CountOfPublicReplies).asJava)
-
-  val HAS_US_POLITICAL_ANNOTATION = new Binary(
-    name("recap.has_us_political_annotation"),
-    Set(SemanticcoreClassification).asJava
-  )
-
-  val HAS_US_POLITICAL_ALL_GROUPS_ANNOTATION = new Binary(
-    name("recap.has_us_political_all_groups_annotation"),
-    Set(SemanticcoreClassification).asJava
-  )
-
-  val HAS_US_POLITICAL_ANNOTATION_HIGH_RECALL = new Binary(
-    name("recap.has_us_political_annotation_high_recall"),
-    Set(SemanticcoreClassification).asJava
-  )
-
-  val HAS_US_POLITICAL_ANNOTATION_HIGH_RECALL_V2 = new Binary(
-    name("recap.has_us_political_annotation_high_recall_v2"),
-    Set(SemanticcoreClassification).asJava
-  )
-
-  val HAS_US_POLITICAL_ANNOTATION_HIGH_PRECISION_V0 = new Binary(
-    name("recap.has_us_political_annotation_high_precision_v0"),
-    Set(SemanticcoreClassification).asJava
-  )
-
-  val HAS_US_POLITICAL_ANNOTATION_BALANCED_PRECISION_RECALL_V0 = new Binary(
-    name("recap.has_us_political_annotation_balanced_precision_recall_v0"),
-    Set(SemanticcoreClassification).asJava
-  )
-
-  val HAS_US_POLITICAL_ANNOTATION_HIGH_RECALL_V3 = new Binary(
-    name("recap.has_us_political_annotation_high_recall_v3"),
-    Set(SemanticcoreClassification).asJava
-  )
-
-  val HAS_US_POLITICAL_ANNOTATION_HIGH_PRECISION_V3 = new Binary(
-    name("recap.has_us_political_annotation_high_precision_v3"),
-    Set(SemanticcoreClassification).asJava
-  )
-
-  val HAS_US_POLITICAL_ANNOTATION_BALANCED_V3 = new Binary(
-    name("recap.has_us_political_annotation_balanced_v3"),
-    Set(SemanticcoreClassification).asJava
-  )
-
-}
diff --git a/src/scala/com/twitter/timelines/prediction/features/recap/RecapFeaturesUtils.docx b/src/scala/com/twitter/timelines/prediction/features/recap/RecapFeaturesUtils.docx
new file mode 100644
index 000000000..993fcf1c0
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/recap/RecapFeaturesUtils.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/recap/RecapFeaturesUtils.scala b/src/scala/com/twitter/timelines/prediction/features/recap/RecapFeaturesUtils.scala
deleted file mode 100644
index edf152cda..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/recap/RecapFeaturesUtils.scala
+++ /dev/null
@@ -1,29 +0,0 @@
-package com.twitter.timelines.prediction.features.recap
-
-object RecapFeaturesUtils {
-  // This needs to be updated if an engagement model is added or removed from prediction service.
-  val scoreFeatureIdsMap: Map[String, Long] = Map(
-    RecapFeatures.IS_FAVORITED.getFeatureName -> RecapFeatures.PREDICTED_IS_FAVORITED.getFeatureId,
-    RecapFeatures.IS_REPLIED.getFeatureName -> RecapFeatures.PREDICTED_IS_REPLIED.getFeatureId,
-    RecapFeatures.IS_RETWEETED.getFeatureName -> RecapFeatures.PREDICTED_IS_RETWEETED.getFeatureId,
-    RecapFeatures.IS_GOOD_CLICKED_CONVO_DESC_V1.getFeatureName -> RecapFeatures.PREDICTED_IS_GOOD_CLICKED_V1.getFeatureId,
-    RecapFeatures.IS_GOOD_CLICKED_CONVO_DESC_V2.getFeatureName -> RecapFeatures.PREDICTED_IS_GOOD_CLICKED_V2.getFeatureId,
-//    RecapFeatures.IS_NEGATIVE_FEEDBACK_V2.getFeatureName -> RecapFeatures.PREDICTED_IS_NEGATIVE_FEEDBACK_V2.getFeatureId,
-    RecapFeatures.IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED.getFeatureName -> RecapFeatures.PREDICTED_IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED.getFeatureId,
-    RecapFeatures.IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR.getFeatureName -> RecapFeatures.PREDICTED_IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR.getFeatureId
-  )
-
-  // This needs to be updated if an engagement model is added or removed from prediction service.
-  val labelFeatureIdToScoreFeatureIdsMap: Map[Long, Long] = Map(
-    RecapFeatures.IS_FAVORITED.getFeatureId -> RecapFeatures.PREDICTED_IS_FAVORITED.getFeatureId,
-    RecapFeatures.IS_REPLIED.getFeatureId -> RecapFeatures.PREDICTED_IS_REPLIED.getFeatureId,
-    RecapFeatures.IS_RETWEETED.getFeatureId -> RecapFeatures.PREDICTED_IS_RETWEETED.getFeatureId,
-    RecapFeatures.IS_GOOD_CLICKED_CONVO_DESC_V1.getFeatureId -> RecapFeatures.PREDICTED_IS_GOOD_CLICKED_V1.getFeatureId,
-    RecapFeatures.IS_GOOD_CLICKED_CONVO_DESC_V2.getFeatureId -> RecapFeatures.PREDICTED_IS_GOOD_CLICKED_V2.getFeatureId,
-    //    RecapFeatures.IS_NEGATIVE_FEEDBACK_V2.getFeatureName -> RecapFeatures.PREDICTED_IS_NEGATIVE_FEEDBACK_V2.getFeatureId,
-    RecapFeatures.IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED.getFeatureId -> RecapFeatures.PREDICTED_IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED.getFeatureId,
-    RecapFeatures.IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR.getFeatureId -> RecapFeatures.PREDICTED_IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR.getFeatureId
-  )
-
-  val labelFeatureNames: Seq[String] = scoreFeatureIdsMap.keys.toSeq
-}
diff --git a/src/scala/com/twitter/timelines/prediction/features/request_context/BUILD b/src/scala/com/twitter/timelines/prediction/features/request_context/BUILD
deleted file mode 100644
index 6fc497bf3..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/request_context/BUILD
+++ /dev/null
@@ -1,9 +0,0 @@
-scala_library(
-    sources = ["*.scala"],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "src/java/com/twitter/ml/api:api-base",
-        "src/thrift/com/twitter/dal/personal_data:personal_data-java",
-    ],
-)
diff --git a/src/scala/com/twitter/timelines/prediction/features/request_context/BUILD.docx b/src/scala/com/twitter/timelines/prediction/features/request_context/BUILD.docx
new file mode 100644
index 000000000..07d639e5e
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/request_context/BUILD.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/request_context/RequestContextFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/request_context/RequestContextFeatures.docx
new file mode 100644
index 000000000..a7fad92aa
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/request_context/RequestContextFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/request_context/RequestContextFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/request_context/RequestContextFeatures.scala
deleted file mode 100644
index a7dd28852..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/request_context/RequestContextFeatures.scala
+++ /dev/null
@@ -1,57 +0,0 @@
-package com.twitter.timelines.prediction.features.request_context
-
-import com.twitter.ml.api.FeatureContext
-import com.twitter.ml.api.Feature._
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
-import scala.collection.JavaConverters._
-
-object RequestContextFeatures {
-  val COUNTRY_CODE =
-    new Text("request_context.country_code", Set(PrivateCountryOrRegion, InferredCountry).asJava)
-  val LANGUAGE_CODE = new Text(
-    "request_context.language_code",
-    Set(GeneralSettings, ProvidedLanguage, InferredLanguage).asJava)
-  val REQUEST_PROVENANCE = new Text("request_context.request_provenance", Set(AppUsage).asJava)
-  val DISPLAY_WIDTH = new Continuous("request_context.display_width", Set(OtherDeviceInfo).asJava)
-  val DISPLAY_HEIGHT = new Continuous("request_context.display_height", Set(OtherDeviceInfo).asJava)
-  val DISPLAY_DPI = new Continuous("request_context.display_dpi", Set(OtherDeviceInfo).asJava)
-
-  // the following features are not Continuous Features because for e.g. continuity between
-  // 23 and 0 hours cannot be handled that way. instead, we will treat each slice of hours/days
-  // independently, like a set of sparse binary features.
-  val TIMESTAMP_GMT_HOUR =
-    new Discrete("request_context.timestamp_gmt_hour", Set(PrivateTimestamp).asJava)
-  val TIMESTAMP_GMT_DOW =
-    new Discrete("request_context.timestamp_gmt_dow", Set(PrivateTimestamp).asJava)
-
-  val IS_GET_INITIAL = new Binary("request_context.is_get_initial")
-  val IS_GET_MIDDLE = new Binary("request_context.is_get_middle")
-  val IS_GET_NEWER = new Binary("request_context.is_get_newer")
-  val IS_GET_OLDER = new Binary("request_context.is_get_older")
-
-  // the following features are not Binary Features because the source field is Option[Boolean],
-  // and we want to distinguish Some(false) from None. None will be converted to -1.
-  val IS_POLLING = new Discrete("request_context.is_polling")
-  val IS_SESSION_START = new Discrete("request_context.is_session_start")
-
-  // Helps distinguish requests from "home" vs "home_latest" (reverse chron home view).
-  val TIMELINE_KIND = new Text("request_context.timeline_kind")
-
-  val featureContext = new FeatureContext(
-    COUNTRY_CODE,
-    LANGUAGE_CODE,
-    REQUEST_PROVENANCE,
-    DISPLAY_WIDTH,
-    DISPLAY_HEIGHT,
-    DISPLAY_DPI,
-    TIMESTAMP_GMT_HOUR,
-    TIMESTAMP_GMT_DOW,
-    IS_GET_INITIAL,
-    IS_GET_MIDDLE,
-    IS_GET_NEWER,
-    IS_GET_OLDER,
-    IS_POLLING,
-    IS_SESSION_START,
-    TIMELINE_KIND
-  )
-}
diff --git a/src/scala/com/twitter/timelines/prediction/features/simcluster/BUILD b/src/scala/com/twitter/timelines/prediction/features/simcluster/BUILD
deleted file mode 100644
index ec194353b..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/simcluster/BUILD
+++ /dev/null
@@ -1,13 +0,0 @@
-scala_library(
-    sources = ["*.scala"],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "src/java/com/twitter/ml/api:api-base",
-        "src/thrift/com/twitter/dal/personal_data:personal_data-java",
-        "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
-        "src/thrift/com/twitter/timelines/suggests/common:record-scala",
-        "timelines/data_processing/ml_util/aggregation_framework:common_types",
-        "timelines/data_processing/ml_util/aggregation_framework/conversion:for-timelines",
-    ],
-)
diff --git a/src/scala/com/twitter/timelines/prediction/features/simcluster/BUILD.docx b/src/scala/com/twitter/timelines/prediction/features/simcluster/BUILD.docx
new file mode 100644
index 000000000..3b4f7db56
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/simcluster/BUILD.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclusterFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclusterFeatures.docx
new file mode 100644
index 000000000..6032098fb
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclusterFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclusterFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclusterFeatures.scala
deleted file mode 100644
index 4d2b4db81..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclusterFeatures.scala
+++ /dev/null
@@ -1,61 +0,0 @@
-package com.twitter.timelines.prediction.features.simcluster
-
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
-import com.twitter.finagle.stats.StatsReceiver
-import com.twitter.ml.api.Feature._
-import com.twitter.simclusters_v2.thriftscala.ClustersUserIsInterestedIn
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup
-import scala.collection.JavaConverters._
-
-class SimclusterFeaturesHelper(statsReceiver: StatsReceiver) {
-  import SimclusterFeatures._
-
-  private[this] val scopedStatsReceiver = statsReceiver.scope(getClass.getSimpleName)
-  private[this] val invalidSimclusterModelVersion = scopedStatsReceiver
-    .counter("invalidSimclusterModelVersion")
-
-  def fromUserClusterInterestsPair(
-    userInterestClustersPair: (Long, ClustersUserIsInterestedIn)
-  ): Option[SimclusterFeatures] = {
-    val (userId, userInterestClusters) = userInterestClustersPair
-    if (userInterestClusters.knownForModelVersion == SIMCLUSTER_MODEL_VERSION) {
-      val userInterestClustersFavScores = for {
-        (clusterId, scores) <- userInterestClusters.clusterIdToScores
-        favScore <- scores.favScore
-      } yield (clusterId.toString, favScore)
-      Some(
-        SimclusterFeatures(
-          userId,
-          userInterestClusters.knownForModelVersion,
-          userInterestClustersFavScores.toMap
-        )
-      )
-    } else {
-      // We maintain this counter to make sure that the hardcoded modelVersion we are using is correct.
-      invalidSimclusterModelVersion.incr
-      None
-    }
-  }
-}
-
-object SimclusterFeatures {
-  // Check http://go/simclustersv2runbook for production versions
-  // Our models are trained for this specific model version only.
-  val SIMCLUSTER_MODEL_VERSION = "20M_145K_dec11"
-  val prefix = s"simcluster.v2.$SIMCLUSTER_MODEL_VERSION"
-
-  val SIMCLUSTER_USER_INTEREST_CLUSTER_SCORES = new SparseContinuous(
-    s"$prefix.user_interest_cluster_scores",
-    Set(EngagementScore, InferredInterests).asJava
-  )
-  val SIMCLUSTER_USER_INTEREST_CLUSTER_IDS = new SparseBinary(
-    s"$prefix.user_interest_cluster_ids",
-    Set(InferredInterests).asJava
-  )
-  val SIMCLUSTER_MODEL_VERSION_METADATA = new Text("meta.simcluster_version")
-}
-
-case class SimclusterFeatures(
-  userId: Long,
-  modelVersion: String,
-  interestClusterScoresMap: Map[String, Double])
diff --git a/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclusterTweetFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclusterTweetFeatures.docx
new file mode 100644
index 000000000..2e9c6a434
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclusterTweetFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclusterTweetFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclusterTweetFeatures.scala
deleted file mode 100644
index 355a89c22..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclusterTweetFeatures.scala
+++ /dev/null
@@ -1,150 +0,0 @@
-package com.twitter.timelines.prediction.features.simcluster
-
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
-import com.twitter.finagle.stats.StatsReceiver
-import com.twitter.ml.api.{Feature, FeatureContext}
-import com.twitter.ml.api.Feature.{Continuous, SparseBinary, SparseContinuous}
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.conversion._
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup
-import com.twitter.timelines.suggests.common.record.thriftscala.SuggestionRecord
-import scala.collection.JavaConverters._
-
-class SimclusterTweetFeatures(statsReceiver: StatsReceiver) extends CombineCountsBase {
-  import SimclusterTweetFeatures._
-
-  private[this] val scopedStatsReceiver = statsReceiver.scope(getClass.getSimpleName)
-  private[this] val invalidSimclusterModelVersion = scopedStatsReceiver
-    .counter("invalidSimclusterModelVersion")
-  private[this] val getFeaturesFromOverlappingSimclusterIdsCount = scopedStatsReceiver
-    .counter("getFeaturesFromOverlappingSimclusterIdsCount")
-  private[this] val emptySimclusterMaps = scopedStatsReceiver
-    .counter("emptySimclusterMaps")
-  private[this] val nonOverlappingSimclusterMaps = scopedStatsReceiver
-    .counter("nonOverlappingSimclusterMaps")
-
-  // Parameters required by CombineCountsBase
-  override val topK: Int = 5
-  override val hardLimit: Option[Int] = None
-  override val precomputedCountFeatures: Seq[Feature[_]] = Seq(
-    SIMCLUSTER_TWEET_TOPK_SORT_BY_TWEET_SCORE,
-    SIMCLUSTER_TWEET_TOPK_SORT_BY_COMBINED_SCORE
-  )
-
-  private def getFeaturesFromOverlappingSimclusterIds(
-    userSimclustersInterestedInMap: Map[String, Double],
-    tweetSimclustersTopKMap: Map[String, Double]
-  ): Map[Feature[_], List[Double]] = {
-    getFeaturesFromOverlappingSimclusterIdsCount.incr
-    if (userSimclustersInterestedInMap.isEmpty || tweetSimclustersTopKMap.isEmpty) {
-      emptySimclusterMaps.incr
-      Map.empty
-    } else {
-      val overlappingSimclusterIds =
-        userSimclustersInterestedInMap.keySet intersect tweetSimclustersTopKMap.keySet
-      if (overlappingSimclusterIds.isEmpty) {
-        nonOverlappingSimclusterMaps.incr
-        Map.empty
-      } else {
-        val (combinedScores, tweetScores) = overlappingSimclusterIds.map { id =>
-          val tweetScore = tweetSimclustersTopKMap.getOrElse(id, 0.0)
-          val combinedScore = userSimclustersInterestedInMap.getOrElse(id, 0.0) * tweetScore
-          (combinedScore, tweetScore)
-        }.unzip
-        Map(
-          SIMCLUSTER_TWEET_TOPK_SORT_BY_COMBINED_SCORE -> combinedScores.toList,
-          SIMCLUSTER_TWEET_TOPK_SORT_BY_TWEET_SCORE -> tweetScores.toList
-        )
-      }
-    }
-  }
-
-  def getCountFeaturesValuesMap(
-    suggestionRecord: SuggestionRecord,
-    simclustersTweetTopKMap: Map[String, Double]
-  ): Map[Feature[_], List[Double]] = {
-    val userSimclustersInterestedInMap = formatUserSimclustersInterestedIn(suggestionRecord)
-
-    val tweetSimclustersTopKMap = formatTweetSimclustersTopK(simclustersTweetTopKMap)
-
-    getFeaturesFromOverlappingSimclusterIds(userSimclustersInterestedInMap, tweetSimclustersTopKMap)
-  }
-
-  def filterByModelVersion(
-    simclustersMapOpt: Option[Map[String, Double]]
-  ): Option[Map[String, Double]] = {
-    simclustersMapOpt.flatMap { simclustersMap =>
-      val filteredSimclustersMap = simclustersMap.filter {
-        case (clusterId, score) =>
-          // The clusterId format is ModelVersion.IntegerClusterId.ScoreType as specified at
-          // com.twitter.ml.featurestore.catalog.features.recommendations.SimClustersV2TweetTopClusters
-          clusterId.contains(SimclusterFeatures.SIMCLUSTER_MODEL_VERSION)
-      }
-
-      // The assumption is that the simclustersMap will contain clusterIds with the same modelVersion.
-      // We maintain this counter to make sure that the hardcoded modelVersion we are using is correct.
-      if (simclustersMap.size > filteredSimclustersMap.size) {
-        invalidSimclusterModelVersion.incr
-      }
-
-      if (filteredSimclustersMap.nonEmpty) Some(filteredSimclustersMap) else None
-    }
-  }
-
-  val allFeatures: Seq[Feature[_]] = outputFeaturesPostMerge.toSeq ++ Seq(
-    SIMCLUSTER_TWEET_TOPK_CLUSTER_IDS,
-    SIMCLUSTER_TWEET_TOPK_CLUSTER_SCORES)
-  val featureContext = new FeatureContext(allFeatures: _*)
-}
-
-object SimclusterTweetFeatures {
-  val SIMCLUSTER_TWEET_TOPK_CLUSTER_IDS = new SparseBinary(
-    s"${SimclusterFeatures.prefix}.tweet_topk_cluster_ids",
-    Set(InferredInterests).asJava
-  )
-  val SIMCLUSTER_TWEET_TOPK_CLUSTER_SCORES = new SparseContinuous(
-    s"${SimclusterFeatures.prefix}.tweet_topk_cluster_scores",
-    Set(EngagementScore, InferredInterests).asJava
-  )
-
-  val SIMCLUSTER_TWEET_TOPK_CLUSTER_ID =
-    TypedAggregateGroup.sparseFeature(SIMCLUSTER_TWEET_TOPK_CLUSTER_IDS)
-
-  val SIMCLUSTER_TWEET_TOPK_SORT_BY_TWEET_SCORE = new Continuous(
-    s"${SimclusterFeatures.prefix}.tweet_topk_sort_by_tweet_score",
-    Set(EngagementScore, InferredInterests).asJava
-  )
-
-  val SIMCLUSTER_TWEET_TOPK_SORT_BY_COMBINED_SCORE = new Continuous(
-    s"${SimclusterFeatures.prefix}.tweet_topk_sort_by_combined_score",
-    Set(EngagementScore, InferredInterests).asJava
-  )
-
-  def formatUserSimclustersInterestedIn(suggestionRecord: SuggestionRecord): Map[String, Double] = {
-    suggestionRecord.userSimclustersInterestedIn
-      .map { clustersUserIsInterestedIn =>
-        if (clustersUserIsInterestedIn.knownForModelVersion == SimclusterFeatures.SIMCLUSTER_MODEL_VERSION) {
-          clustersUserIsInterestedIn.clusterIdToScores.collect {
-            case (clusterId, scores) if scores.favScore.isDefined =>
-              (clusterId.toString, scores.favScore.get)
-          }
-        } else Map.empty[String, Double]
-      }.getOrElse(Map.empty[String, Double])
-      .toMap
-  }
-
-  def formatTweetSimclustersTopK(
-    simclustersTweetTopKMap: Map[String, Double]
-  ): Map[String, Double] = {
-    simclustersTweetTopKMap.collect {
-      case (clusterId, score) =>
-        // The clusterId format is <ModelVersion.IntegerClusterId.ScoreType> as specified at
-        // com.twitter.ml.featurestore.catalog.features.recommendations.SimClustersV2TweetTopClusters
-        // and we want to extract the IntegerClusterId.
-        // The split function takes a regex; therefore, we need to escape . and we also need to escape
-        // \ since they are both special characters. Hence, the double \\.
-        val clusterIdSplit = clusterId.split("\\.")
-        val integerClusterId = clusterIdSplit(1) // The IntegerClusterId is at position 1.
-        (integerClusterId, score)
-    }
-  }
-}
diff --git a/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclustersScoresFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclustersScoresFeatures.docx
new file mode 100644
index 000000000..6ab4aaa45
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclustersScoresFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclustersScoresFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclustersScoresFeatures.scala
deleted file mode 100644
index 0629636c0..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclustersScoresFeatures.scala
+++ /dev/null
@@ -1,43 +0,0 @@
-package com.twitter.timelines.prediction.features.simcluster
-
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType.SemanticcoreClassification
-import com.twitter.ml.api.Feature
-import com.twitter.ml.api.Feature.Continuous
-import com.twitter.timelines.data_processing.ml_util.aggregation_framework.conversion.CombineCountsBase
-import scala.collection.JavaConverters._
-
-object SimclustersScoresFeatures extends CombineCountsBase {
-  override def topK: Int = 2
-
-  override def hardLimit: Option[Int] = Some(20)
-
-  val prefix = s"recommendations.sim_clusters_scores"
-  val TOPIC_CONSUMER_TWEET_EMBEDDING_Cs = new Continuous(
-    s"$prefix.localized_topic_consumer_tweet_embedding_cosine_similarity",
-    Set(SemanticcoreClassification).asJava)
-  val TOPIC_PRODUCER_TWEET_EMBEDDING_Cs = new Continuous(
-    s"$prefix.topic_producer_tweet_embedding_cosine_similarity",
-    Set(SemanticcoreClassification).asJava)
-  val USER_TOPIC_CONSUMER_TWEET_EMBEDDING_COSINE_SIM = new Continuous(
-    s"$prefix.user_interested_in_localized_topic_consumer_embedding_cosine_similarity",
-    Set(SemanticcoreClassification).asJava)
-  val USER_TOPIC_CONSUMER_TWEET_EMBEDDING_DOT_PRODUCT = new Continuous(
-    s"$prefix.user_interested_in_localized_topic_consumer_embedding_dot_product",
-    Set(SemanticcoreClassification).asJava)
-  val USER_TOPIC_PRODUCER_TWEET_EMBEDDING_COSINE_SIM = new Continuous(
-    s"$prefix.user_interested_in_localized_topic_producer_embedding_cosine_similarity",
-    Set(SemanticcoreClassification).asJava)
-  val USER_TOPIC_PRODUCER_TWEET_EMBEDDING_DOT_PRODUCT = new Continuous(
-    s"$prefix.user_interested_in_localized_topic_producer_embedding_dot_product",
-    Set(SemanticcoreClassification).asJava)
-
-  override def precomputedCountFeatures: Seq[Feature[_]] =
-    Seq(
-      TOPIC_CONSUMER_TWEET_EMBEDDING_Cs,
-      TOPIC_PRODUCER_TWEET_EMBEDDING_Cs,
-      USER_TOPIC_CONSUMER_TWEET_EMBEDDING_COSINE_SIM,
-      USER_TOPIC_CONSUMER_TWEET_EMBEDDING_DOT_PRODUCT,
-      USER_TOPIC_PRODUCER_TWEET_EMBEDDING_COSINE_SIM,
-      USER_TOPIC_PRODUCER_TWEET_EMBEDDING_DOT_PRODUCT
-    )
-}
diff --git a/src/scala/com/twitter/timelines/prediction/features/socialproof/BUILD b/src/scala/com/twitter/timelines/prediction/features/socialproof/BUILD
deleted file mode 100644
index 0c00b1e5b..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/socialproof/BUILD
+++ /dev/null
@@ -1,15 +0,0 @@
-scala_library(
-    name = "socialproof_features",
-    sources = ["*.scala"],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "3rdparty/jvm/com/ibm/icu:icu4j",
-        "src/java/com/twitter/ml/api:api-base",
-        "src/scala/com/twitter/ml/api/util",
-        "src/scala/com/twitter/timelines/util",
-        "src/thrift/com/twitter/dal/personal_data:personal_data-java",
-        "src/thrift/com/twitter/ml/api:data-java",
-        "src/thrift/com/twitter/timelines/socialproof:socialproof-scala",
-    ],
-)
diff --git a/src/scala/com/twitter/timelines/prediction/features/socialproof/BUILD.docx b/src/scala/com/twitter/timelines/prediction/features/socialproof/BUILD.docx
new file mode 100644
index 000000000..47e5d42e0
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/socialproof/BUILD.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/socialproof/SocialProofFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/socialproof/SocialProofFeatures.docx
new file mode 100644
index 000000000..ea3ebf53f
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/socialproof/SocialProofFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/socialproof/SocialProofFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/socialproof/SocialProofFeatures.scala
deleted file mode 100644
index 163ba7efa..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/socialproof/SocialProofFeatures.scala
+++ /dev/null
@@ -1,172 +0,0 @@
-package com.twitter.timelines.prediction.features.socialproof
-
-import com.twitter.ml.api.DataRecord
-import com.twitter.ml.api.Feature.Binary
-import com.twitter.ml.api.Feature.Continuous
-import com.twitter.ml.api.Feature.SparseBinary
-import com.twitter.ml.api.util.FDsl._
-import com.twitter.timelines.prediction.features.socialproof.SocialProofDataRecordFeatures._
-import com.twitter.timelines.socialproof.thriftscala.SocialProof
-import com.twitter.timelines.socialproof.v1.thriftscala.SocialProofType
-import com.twitter.timelines.util.CommonTypes.UserId
-import scala.collection.JavaConverters._
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
-
-abstract class SocialProofUserGroundTruth(userIds: Seq[UserId], count: Int) {
-  require(
-    count >= userIds.size,
-    "count must be equal to or greater than the number of entries in userIds"
-  )
-  // Using Double as the return type to make it more convenient for these values to be used as
-  // ML feature values.
-  val displayedUserCount: Double = userIds.size.toDouble
-  val undisplayedUserCount: Double = count - userIds.size.toDouble
-  val totalCount: Double = count.toDouble
-
-  def featureDisplayedUsers: SparseBinary
-  def featureDisplayedUserCount: Continuous
-  def featureUndisplayedUserCount: Continuous
-  def featureTotalUserCount: Continuous
-
-  def setFeatures(rec: DataRecord): Unit = {
-    rec.setFeatureValue(featureDisplayedUsers, toStringSet(userIds))
-    rec.setFeatureValue(featureDisplayedUserCount, displayedUserCount)
-    rec.setFeatureValue(featureUndisplayedUserCount, undisplayedUserCount)
-    rec.setFeatureValue(featureTotalUserCount, totalCount)
-  }
-  protected def toStringSet(value: Seq[Long]): Set[String] = {
-    value.map(_.toString).toSet
-  }
-}
-
-case class FavoritedBySocialProofUserGroundTruth(userIds: Seq[UserId] = Seq.empty, count: Int = 0)
-    extends SocialProofUserGroundTruth(userIds, count) {
-
-  override val featureDisplayedUsers = SocialProofDisplayedFavoritedByUsers
-  override val featureDisplayedUserCount = SocialProofDisplayedFavoritedByUserCount
-  override val featureUndisplayedUserCount = SocialProofUndisplayedFavoritedByUserCount
-  override val featureTotalUserCount = SocialProofTotalFavoritedByUserCount
-}
-
-case class RetweetedBySocialProofUserGroundTruth(userIds: Seq[UserId] = Seq.empty, count: Int = 0)
-    extends SocialProofUserGroundTruth(userIds, count) {
-
-  override val featureDisplayedUsers = SocialProofDisplayedRetweetedByUsers
-  override val featureDisplayedUserCount = SocialProofDisplayedRetweetedByUserCount
-  override val featureUndisplayedUserCount = SocialProofUndisplayedRetweetedByUserCount
-  override val featureTotalUserCount = SocialProofTotalRetweetedByUserCount
-}
-
-case class RepliedBySocialProofUserGroundTruth(userIds: Seq[UserId] = Seq.empty, count: Int = 0)
-    extends SocialProofUserGroundTruth(userIds, count) {
-
-  override val featureDisplayedUsers = SocialProofDisplayedRepliedByUsers
-  override val featureDisplayedUserCount = SocialProofDisplayedRepliedByUserCount
-  override val featureUndisplayedUserCount = SocialProofUndisplayedRepliedByUserCount
-  override val featureTotalUserCount = SocialProofTotalRepliedByUserCount
-}
-
-case class SocialProofFeatures(
-  hasSocialProof: Boolean,
-  favoritedBy: FavoritedBySocialProofUserGroundTruth = FavoritedBySocialProofUserGroundTruth(),
-  retweetedBy: RetweetedBySocialProofUserGroundTruth = RetweetedBySocialProofUserGroundTruth(),
-  repliedBy: RepliedBySocialProofUserGroundTruth = RepliedBySocialProofUserGroundTruth()) {
-
-  def setFeatures(dataRecord: DataRecord): Unit =
-    if (hasSocialProof) {
-      dataRecord.setFeatureValue(HasSocialProof, hasSocialProof)
-      favoritedBy.setFeatures(dataRecord)
-      retweetedBy.setFeatures(dataRecord)
-      repliedBy.setFeatures(dataRecord)
-    }
-}
-
-object SocialProofFeatures {
-  def apply(socialProofs: Seq[SocialProof]): SocialProofFeatures =
-    socialProofs.foldLeft(SocialProofFeatures(hasSocialProof = socialProofs.nonEmpty))(
-      (prevFeatures, socialProof) => {
-        val userIds = socialProof.v1.userIds
-        val count = socialProof.v1.count
-        socialProof.v1.socialProofType match {
-          case SocialProofType.FavoritedBy =>
-            prevFeatures.copy(favoritedBy = FavoritedBySocialProofUserGroundTruth(userIds, count))
-          case SocialProofType.RetweetedBy =>
-            prevFeatures.copy(retweetedBy = RetweetedBySocialProofUserGroundTruth(userIds, count))
-          case SocialProofType.RepliedBy =>
-            prevFeatures.copy(repliedBy = RepliedBySocialProofUserGroundTruth(userIds, count))
-          case _ =>
-            prevFeatures // skip silently instead of breaking jobs, since this isn't used yet
-        }
-      })
-}
-
-object SocialProofDataRecordFeatures {
-  val HasSocialProof = new Binary("recap.social_proof.has_social_proof")
-
-  val SocialProofDisplayedFavoritedByUsers = new SparseBinary(
-    "recap.social_proof.list.displayed.favorited_by",
-    Set(UserId, PublicLikes, PrivateLikes).asJava
-  )
-  val SocialProofDisplayedFavoritedByUserCount = new Continuous(
-    "recap.social_proof.count.displayed.favorited_by",
-    Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
-  )
-  val SocialProofUndisplayedFavoritedByUserCount = new Continuous(
-    "recap.social_proof.count.undisplayed.favorited_by",
-    Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
-  )
-  val SocialProofTotalFavoritedByUserCount = new Continuous(
-    "recap.social_proof.count.total.favorited_by",
-    Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
-  )
-
-  val SocialProofDisplayedRetweetedByUsers = new SparseBinary(
-    "recap.social_proof.list.displayed.retweeted_by",
-    Set(UserId, PublicRetweets, PrivateRetweets).asJava
-  )
-  val SocialProofDisplayedRetweetedByUserCount = new Continuous(
-    "recap.social_proof.count.displayed.retweeted_by",
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
-  )
-  val SocialProofUndisplayedRetweetedByUserCount = new Continuous(
-    "recap.social_proof.count.undisplayed.retweeted_by",
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
-  )
-  val SocialProofTotalRetweetedByUserCount = new Continuous(
-    "recap.social_proof.count.total.retweeted_by",
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
-  )
-
-  val SocialProofDisplayedRepliedByUsers = new SparseBinary(
-    "recap.social_proof.list.displayed.replied_by",
-    Set(UserId, PublicReplies, PrivateReplies).asJava
-  )
-  val SocialProofDisplayedRepliedByUserCount = new Continuous(
-    "recap.social_proof.count.displayed.replied_by",
-    Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
-  )
-  val SocialProofUndisplayedRepliedByUserCount = new Continuous(
-    "recap.social_proof.count.undisplayed.replied_by",
-    Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
-  )
-  val SocialProofTotalRepliedByUserCount = new Continuous(
-    "recap.social_proof.count.total.replied_by",
-    Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
-  )
-
-  val AllFeatures = Seq(
-    HasSocialProof,
-    SocialProofDisplayedFavoritedByUsers,
-    SocialProofDisplayedFavoritedByUserCount,
-    SocialProofUndisplayedFavoritedByUserCount,
-    SocialProofTotalFavoritedByUserCount,
-    SocialProofDisplayedRetweetedByUsers,
-    SocialProofDisplayedRetweetedByUserCount,
-    SocialProofUndisplayedRetweetedByUserCount,
-    SocialProofTotalRetweetedByUserCount,
-    SocialProofDisplayedRepliedByUsers,
-    SocialProofDisplayedRepliedByUserCount,
-    SocialProofUndisplayedRepliedByUserCount,
-    SocialProofTotalRepliedByUserCount
-  )
-}
diff --git a/src/scala/com/twitter/timelines/prediction/features/time_features/BUILD b/src/scala/com/twitter/timelines/prediction/features/time_features/BUILD
deleted file mode 100644
index b5c49af36..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/time_features/BUILD
+++ /dev/null
@@ -1,10 +0,0 @@
-scala_library(
-    sources = ["*.scala"],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "src/java/com/twitter/ml/api:api-base",
-        "src/thrift/com/twitter/dal/personal_data:personal_data-java",
-        "src/thrift/com/twitter/timelines/time_features:time_features-scala",
-    ],
-)
diff --git a/src/scala/com/twitter/timelines/prediction/features/time_features/BUILD.docx b/src/scala/com/twitter/timelines/prediction/features/time_features/BUILD.docx
new file mode 100644
index 000000000..2b2bb7cb2
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/time_features/BUILD.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/time_features/TimeDataRecordFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/time_features/TimeDataRecordFeatures.docx
new file mode 100644
index 000000000..205e95a36
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/time_features/TimeDataRecordFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/time_features/TimeDataRecordFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/time_features/TimeDataRecordFeatures.scala
deleted file mode 100644
index b398203c3..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/time_features/TimeDataRecordFeatures.scala
+++ /dev/null
@@ -1,111 +0,0 @@
-package com.twitter.timelines.prediction.features.time_features
-
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
-import com.twitter.ml.api.Feature._
-import scala.collection.JavaConverters._
-import com.twitter.util.Duration
-import com.twitter.conversions.DurationOps._
-
-object TimeDataRecordFeatures {
-  val TIME_BETWEEN_NON_POLLING_REQUESTS_AVG = new Continuous(
-    "time_features.time_between_non_polling_requests_avg",
-    Set(PrivateTimestamp).asJava
-  )
-  val TIME_SINCE_TWEET_CREATION = new Continuous("time_features.time_since_tweet_creation")
-  val TIME_SINCE_SOURCE_TWEET_CREATION = new Continuous(
-    "time_features.time_since_source_tweet_creation"
-  )
-  val TIME_SINCE_LAST_NON_POLLING_REQUEST = new Continuous(
-    "time_features.time_since_last_non_polling_request",
-    Set(PrivateTimestamp).asJava
-  )
-  val NON_POLLING_REQUESTS_SINCE_TWEET_CREATION = new Continuous(
-    "time_features.non_polling_requests_since_tweet_creation",
-    Set(PrivateTimestamp).asJava
-  )
-  val TWEET_AGE_RATIO = new Continuous("time_features.tweet_age_ratio")
-  val IS_TWEET_RECYCLED = new Binary("time_features.is_tweet_recycled")
-  // Last Engagement features
-  val LAST_FAVORITE_SINCE_CREATION_HRS = new Continuous(
-    "time_features.earlybird.last_favorite_since_creation_hrs",
-    Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
-  )
-  val LAST_RETWEET_SINCE_CREATION_HRS = new Continuous(
-    "time_features.earlybird.last_retweet_since_creation_hrs",
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
-  )
-  val LAST_REPLY_SINCE_CREATION_HRS = new Continuous(
-    "time_features.earlybird.last_reply_since_creation_hrs",
-    Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
-  )
-  val LAST_QUOTE_SINCE_CREATION_HRS = new Continuous(
-    "time_features.earlybird.last_quote_since_creation_hrs",
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
-  )
-  val TIME_SINCE_LAST_FAVORITE_HRS = new Continuous(
-    "time_features.earlybird.time_since_last_favorite",
-    Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
-  )
-  val TIME_SINCE_LAST_RETWEET_HRS = new Continuous(
-    "time_features.earlybird.time_since_last_retweet",
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
-  )
-  val TIME_SINCE_LAST_REPLY_HRS = new Continuous(
-    "time_features.earlybird.time_since_last_reply",
-    Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
-  )
-  val TIME_SINCE_LAST_QUOTE_HRS = new Continuous(
-    "time_features.earlybird.time_since_last_quote",
-    Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
-  )
-
-  val TIME_SINCE_VIEWER_ACCOUNT_CREATION_SECS =
-    new Continuous(
-      "time_features.time_since_viewer_account_creation_secs",
-      Set(AccountCreationTime, AgeOfAccount).asJava)
-
-  val USER_ID_IS_SNOWFLAKE_ID =
-    new Binary("time_features.time_user_id_is_snowflake_id", Set(UserType).asJava)
-
-  val IS_30_DAY_NEW_USER =
-    new Binary("time_features.is_day_30_new_user", Set(AccountCreationTime, AgeOfAccount).asJava)
-  val IS_12_MONTH_NEW_USER =
-    new Binary("time_features.is_month_12_new_user", Set(AccountCreationTime, AgeOfAccount).asJava)
-  val ACCOUNT_AGE_INTERVAL =
-    new Discrete("time_features.account_age_interval", Set(AgeOfAccount).asJava)
-}
-
-object AccountAgeInterval extends Enumeration {
-  val LTE_1_DAY, GT_1_DAY_LTE_5_DAY, GT_5_DAY_LTE_14_DAY, GT_14_DAY_LTE_30_DAY = Value
-
-  def fromDuration(accountAge: Duration): Option[AccountAgeInterval.Value] = {
-    accountAge match {
-      case a if (a <= 1.day) => Some(LTE_1_DAY)
-      case a if (1.day < a && a <= 5.days) => Some(GT_1_DAY_LTE_5_DAY)
-      case a if (5.days < a && a <= 14.days) => Some(GT_5_DAY_LTE_14_DAY)
-      case a if (14.days < a && a <= 30.days) => Some(GT_14_DAY_LTE_30_DAY)
-      case _ => None
-    }
-  }
-}
-
-case class TimeFeatures(
-  isTweetRecycled: Boolean,
-  timeSinceTweetCreation: Double,
-  isDay30NewUser: Boolean,
-  isMonth12NewUser: Boolean,
-  timeSinceSourceTweetCreation: Double, // same as timeSinceTweetCreation for non-retweets
-  timeSinceViewerAccountCreationSecs: Option[Double],
-  timeBetweenNonPollingRequestsAvg: Option[Double] = None,
-  timeSinceLastNonPollingRequest: Option[Double] = None,
-  nonPollingRequestsSinceTweetCreation: Option[Double] = None,
-  tweetAgeRatio: Option[Double] = None,
-  lastFavSinceCreationHrs: Option[Double] = None,
-  lastRetweetSinceCreationHrs: Option[Double] = None,
-  lastReplySinceCreationHrs: Option[Double] = None,
-  lastQuoteSinceCreationHrs: Option[Double] = None,
-  timeSinceLastFavoriteHrs: Option[Double] = None,
-  timeSinceLastRetweetHrs: Option[Double] = None,
-  timeSinceLastReplyHrs: Option[Double] = None,
-  timeSinceLastQuoteHrs: Option[Double] = None,
-  accountAgeInterval: Option[AccountAgeInterval.Value] = None)
diff --git a/src/scala/com/twitter/timelines/prediction/features/two_hop_features/BUILD b/src/scala/com/twitter/timelines/prediction/features/two_hop_features/BUILD
deleted file mode 100644
index a4ad0eabf..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/two_hop_features/BUILD
+++ /dev/null
@@ -1,10 +0,0 @@
-scala_library(
-    sources = ["*.scala"],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "graph-feature-service/src/main/thrift/com/twitter/graph_feature_service:graph_feature_service_thrift-scala",
-        "src/java/com/twitter/ml/api:api-base",
-        "src/thrift/com/twitter/dal/personal_data:personal_data-java",
-    ],
-)
diff --git a/src/scala/com/twitter/timelines/prediction/features/two_hop_features/BUILD.docx b/src/scala/com/twitter/timelines/prediction/features/two_hop_features/BUILD.docx
new file mode 100644
index 000000000..ddee60e97
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/two_hop_features/BUILD.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/two_hop_features/TwoHopFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/two_hop_features/TwoHopFeatures.docx
new file mode 100644
index 000000000..eee752f33
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/two_hop_features/TwoHopFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/two_hop_features/TwoHopFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/two_hop_features/TwoHopFeatures.scala
deleted file mode 100644
index 03a112578..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/two_hop_features/TwoHopFeatures.scala
+++ /dev/null
@@ -1,93 +0,0 @@
-package com.twitter.timelines.prediction.features.two_hop_features
-
-import com.twitter.graph_feature_service.thriftscala.EdgeType
-import com.twitter.ml.api.Feature._
-import scala.collection.JavaConverters._
-import TwoHopFeaturesConfig.personalDataTypesMap
-
-object TwoHopFeaturesDescriptor {
-  val prefix = "two_hop"
-  val normalizedPostfix = "normalized"
-  val leftNodeDegreePostfix = "left_degree"
-  val rightNodeDegreePostfix = "right_degree"
-
-  type TwoHopFeatureMap = Map[(EdgeType, EdgeType), Continuous]
-  type TwoHopFeatureNodeDegreeMap = Map[EdgeType, Continuous]
-
-  def apply(edgeTypePairs: Seq[(EdgeType, EdgeType)]): TwoHopFeaturesDescriptor = {
-    new TwoHopFeaturesDescriptor(edgeTypePairs)
-  }
-}
-
-class TwoHopFeaturesDescriptor(edgeTypePairs: Seq[(EdgeType, EdgeType)]) {
-  import TwoHopFeaturesDescriptor._
-
-  def getLeftEdge(edgeTypePair: (EdgeType, EdgeType)): EdgeType = {
-    edgeTypePair._1
-  }
-
-  def getLeftEdgeName(edgeTypePair: (EdgeType, EdgeType)): String = {
-    getLeftEdge(edgeTypePair).originalName.toLowerCase
-  }
-
-  def getRightEdge(edgeTypePair: (EdgeType, EdgeType)): EdgeType = {
-    edgeTypePair._2
-  }
-
-  def getRightEdgeName(edgeTypePair: (EdgeType, EdgeType)): String = {
-    getRightEdge(edgeTypePair).originalName.toLowerCase
-  }
-
-  val rawFeaturesMap: TwoHopFeatureMap = edgeTypePairs.map(edgeTypePair => {
-    val leftEdgeType = getLeftEdge(edgeTypePair)
-    val leftEdgeName = getLeftEdgeName(edgeTypePair)
-    val rightEdgeType = getRightEdge(edgeTypePair)
-    val rightEdgeName = getRightEdgeName(edgeTypePair)
-    val personalDataTypes = (
-      personalDataTypesMap.getOrElse(leftEdgeType, Set.empty) ++
-        personalDataTypesMap.getOrElse(rightEdgeType, Set.empty)
-    ).asJava
-    val rawFeature = new Continuous(s"$prefix.$leftEdgeName.$rightEdgeName", personalDataTypes)
-    edgeTypePair -> rawFeature
-  })(collection.breakOut)
-
-  val leftNodeDegreeFeaturesMap: TwoHopFeatureNodeDegreeMap = edgeTypePairs.map(edgeTypePair => {
-    val leftEdgeType = getLeftEdge(edgeTypePair)
-    val leftEdgeName = getLeftEdgeName(edgeTypePair)
-    val personalDataTypes = personalDataTypesMap.getOrElse(leftEdgeType, Set.empty).asJava
-    val leftNodeDegreeFeature =
-      new Continuous(s"$prefix.$leftEdgeName.$leftNodeDegreePostfix", personalDataTypes)
-    leftEdgeType -> leftNodeDegreeFeature
-  })(collection.breakOut)
-
-  val rightNodeDegreeFeaturesMap: TwoHopFeatureNodeDegreeMap = edgeTypePairs.map(edgeTypePair => {
-    val rightEdgeType = getRightEdge(edgeTypePair)
-    val rightEdgeName = getRightEdgeName(edgeTypePair)
-    val personalDataTypes = personalDataTypesMap.getOrElse(rightEdgeType, Set.empty).asJava
-    val rightNodeDegreeFeature =
-      new Continuous(s"$prefix.$rightEdgeName.$rightNodeDegreePostfix", personalDataTypes)
-    rightEdgeType -> rightNodeDegreeFeature
-  })(collection.breakOut)
-
-  val normalizedFeaturesMap: TwoHopFeatureMap = edgeTypePairs.map(edgeTypePair => {
-    val leftEdgeType = getLeftEdge(edgeTypePair)
-    val leftEdgeName = getLeftEdgeName(edgeTypePair)
-    val rightEdgeType = getRightEdge(edgeTypePair)
-    val rightEdgeName = getRightEdgeName(edgeTypePair)
-    val personalDataTypes = (
-      personalDataTypesMap.getOrElse(leftEdgeType, Set.empty) ++
-        personalDataTypesMap.getOrElse(rightEdgeType, Set.empty)
-    ).asJava
-    val normalizedFeature =
-      new Continuous(s"$prefix.$leftEdgeName.$rightEdgeName.$normalizedPostfix", personalDataTypes)
-    edgeTypePair -> normalizedFeature
-  })(collection.breakOut)
-
-  private val rawFeaturesSeq: Seq[Continuous] = rawFeaturesMap.values.toSeq
-  private val leftNodeDegreeFeaturesSeq: Seq[Continuous] = leftNodeDegreeFeaturesMap.values.toSeq
-  private val rightNodeDegreeFeaturesSeq: Seq[Continuous] = rightNodeDegreeFeaturesMap.values.toSeq
-  private val normalizedFeaturesSeq: Seq[Continuous] = normalizedFeaturesMap.values.toSeq
-
-  val featuresSeq: Seq[Continuous] =
-    rawFeaturesSeq ++ leftNodeDegreeFeaturesSeq ++ rightNodeDegreeFeaturesSeq ++ normalizedFeaturesSeq
-}
diff --git a/src/scala/com/twitter/timelines/prediction/features/two_hop_features/TwoHopFeaturesConfig.docx b/src/scala/com/twitter/timelines/prediction/features/two_hop_features/TwoHopFeaturesConfig.docx
new file mode 100644
index 000000000..4168010af
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/two_hop_features/TwoHopFeaturesConfig.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/two_hop_features/TwoHopFeaturesConfig.scala b/src/scala/com/twitter/timelines/prediction/features/two_hop_features/TwoHopFeaturesConfig.scala
deleted file mode 100644
index ece502e30..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/two_hop_features/TwoHopFeaturesConfig.scala
+++ /dev/null
@@ -1,30 +0,0 @@
-package com.twitter.timelines.prediction.features.two_hop_features
-
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType
-import com.twitter.graph_feature_service.thriftscala.{EdgeType, FeatureType}
-
-object TwoHopFeaturesConfig {
-  val leftEdgeTypes = Seq(EdgeType.Following, EdgeType.Favorite, EdgeType.MutualFollow)
-  val rightEdgeTypes = Seq(
-    EdgeType.FollowedBy,
-    EdgeType.FavoritedBy,
-    EdgeType.RetweetedBy,
-    EdgeType.MentionedBy,
-    EdgeType.MutualFollow)
-
-  val edgeTypePairs: Seq[(EdgeType, EdgeType)] = {
-    for (leftEdgeType <- leftEdgeTypes; rightEdgeType <- rightEdgeTypes)
-      yield (leftEdgeType, rightEdgeType)
-  }
-
-  val featureTypes: Seq[FeatureType] = edgeTypePairs.map(pair => FeatureType(pair._1, pair._2))
-
-  val personalDataTypesMap: Map[EdgeType, Set[PersonalDataType]] = Map(
-    EdgeType.Following -> Set(PersonalDataType.CountOfFollowersAndFollowees),
-    EdgeType.Favorite -> Set(
-      PersonalDataType.CountOfPrivateLikes,
-      PersonalDataType.CountOfPublicLikes),
-    EdgeType.MutualFollow -> Set(PersonalDataType.CountOfFollowersAndFollowees),
-    EdgeType.FollowedBy -> Set(PersonalDataType.CountOfFollowersAndFollowees)
-  )
-}
diff --git a/src/scala/com/twitter/timelines/prediction/features/user_health/BUILD b/src/scala/com/twitter/timelines/prediction/features/user_health/BUILD
deleted file mode 100644
index 598e0c066..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/user_health/BUILD
+++ /dev/null
@@ -1,10 +0,0 @@
-scala_library(
-    sources = ["*.scala"],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependencies = [
-        "src/java/com/twitter/ml/api:api-base",
-        "src/thrift/com/twitter/dal/personal_data:personal_data-java",
-        "src/thrift/com/twitter/timelines/author_features/user_health:thrift-scala",
-    ],
-)
diff --git a/src/scala/com/twitter/timelines/prediction/features/user_health/BUILD.docx b/src/scala/com/twitter/timelines/prediction/features/user_health/BUILD.docx
new file mode 100644
index 000000000..69560da7f
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/user_health/BUILD.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/user_health/UserHealthFeatures.docx b/src/scala/com/twitter/timelines/prediction/features/user_health/UserHealthFeatures.docx
new file mode 100644
index 000000000..3b390d55c
Binary files /dev/null and b/src/scala/com/twitter/timelines/prediction/features/user_health/UserHealthFeatures.docx differ
diff --git a/src/scala/com/twitter/timelines/prediction/features/user_health/UserHealthFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/user_health/UserHealthFeatures.scala
deleted file mode 100644
index 7c8c7f8b1..000000000
--- a/src/scala/com/twitter/timelines/prediction/features/user_health/UserHealthFeatures.scala
+++ /dev/null
@@ -1,23 +0,0 @@
-package com.twitter.timelines.prediction.features.user_health
-
-import com.twitter.ml.api.Feature
-import com.twitter.timelines.author_features.user_health.thriftscala.UserState
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType.{UserState => UserStatePDT}
-import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
-import scala.collection.JavaConverters._
-
-object UserHealthFeatures {
-  val UserState = new Feature.Discrete("user_health.user_state", Set(UserStatePDT, UserType).asJava)
-  val IsLightMinusUser =
-    new Feature.Binary("user_health.is_light_minus_user", Set(UserStatePDT, UserType).asJava)
-  val AuthorState =
-    new Feature.Discrete("user_health.author_state", Set(UserStatePDT, UserType).asJava)
-  val NumAuthorFollowers =
-    new Feature.Continuous("author_health.num_followers", Set(CountOfFollowersAndFollowees).asJava)
-  val NumAuthorConnectDays = new Feature.Continuous("author_health.num_connect_days")
-  val NumAuthorConnect = new Feature.Continuous("author_health.num_connect")
-
-  val IsUserVerifiedUnion = new Feature.Binary("user_account.is_user_verified_union")
-}
-
-case class UserHealthFeatures(id: Long, userStateOpt: Option[UserState])
diff --git a/src/thrift/com/twitter/interaction_graph/BUILD b/src/thrift/com/twitter/interaction_graph/BUILD
deleted file mode 100644
index 500c73d77..000000000
--- a/src/thrift/com/twitter/interaction_graph/BUILD
+++ /dev/null
@@ -1,15 +0,0 @@
-create_thrift_libraries(
-    base_name = "interaction_graph",
-    sources = ["*.thrift"],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependency_roots = [
-    ],
-    generate_languages = [
-        "java",
-        "scala",
-        "strato",
-    ],
-    provides_java_name = "interaction_graph-thrift-java",
-    provides_scala_name = "interaction_graph-thrift-scala",
-)
diff --git a/src/thrift/com/twitter/interaction_graph/BUILD.docx b/src/thrift/com/twitter/interaction_graph/BUILD.docx
new file mode 100644
index 000000000..ce119296a
Binary files /dev/null and b/src/thrift/com/twitter/interaction_graph/BUILD.docx differ
diff --git a/src/thrift/com/twitter/interaction_graph/interaction_graph.docx b/src/thrift/com/twitter/interaction_graph/interaction_graph.docx
new file mode 100644
index 000000000..ec4639bf8
Binary files /dev/null and b/src/thrift/com/twitter/interaction_graph/interaction_graph.docx differ
diff --git a/src/thrift/com/twitter/interaction_graph/interaction_graph.thrift b/src/thrift/com/twitter/interaction_graph/interaction_graph.thrift
deleted file mode 100644
index d90df54cf..000000000
--- a/src/thrift/com/twitter/interaction_graph/interaction_graph.thrift
+++ /dev/null
@@ -1,98 +0,0 @@
-namespace java com.twitter.interaction_graph.thriftjava
-#@namespace scala com.twitter.interaction_graph.thriftscala
-#@namespace strato com.twitter.interaction_graph
-
-// These could be either a Vertex or an edge feature name
-// when you add a new feature, update VertexFeatureCombiner.java and EdgeFeatureCombiner.java.
-enum FeatureName {
-  num_retweets = 1
-  num_favorites = 2
-  num_mentions = 3
-  num_direct_messages = 4
-  num_tweet_clicks = 5
-  num_link_clicks = 6
-  num_profile_views = 7
-  num_follows = 8
-  num_unfollows = 9
-  num_mutual_follows = 10
-  address_book_email = 11
-  address_book_phone = 12
-  address_book_in_both = 13
-  address_book_mutual_edge_email = 14
-  address_book_mutual_edge_phone = 15
-  address_book_mutual_edge_in_both = 16
-  total_dwell_time = 17
-  num_inspected_statuses = 18
-  num_photo_tags = 19
-  num_blocks = 20 
-  num_mutes = 21 
-  num_report_as_abuses = 22
-  num_report_as_spams = 23
-  num_tweet_quotes = 24
-  num_push_opens = 25
-  num_ntab_clicks = 26,
-  num_rt_favories = 27,
-  num_rt_replies = 28,
-  num_rt_tweet_quotes = 29,
-  num_rt_retweets = 30,
-  num_rt_mentions = 31,
-  num_rt_tweet_clicks = 32,
-  num_rt_link_clicks = 33
-  num_shares = 34,
-  num_email_click = 35,
-  num_email_open = 36,
-  num_ntab_dislike_7_days = 37,
-  num_push_dismiss = 38,
-  num_push_report_tweet_click = 39,
-  num_push_report_user_click = 40,
-  num_replies = 41,
-  // vertex features after 128
-  num_create_tweets = 129,
-}
-// do remember to update the tests in InteractionGraphAggregationJobTest when adding new features but not updating agg_all
-
-struct TimeSeriesStatistics {
-  1: required double mean;
-  // For computing variance online: http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm
-  2: required double m2_for_variance;
-  3: required double ewma; // Exponentially weighted moving average: ewma_t = \alpha x_t + (1-\alpha) ewma_{t-1}
-  4: required i32 num_elapsed_days; // Total number of days since we started counting this feature
-  5: required i32 num_non_zero_days; // Number of days when the interaction was non-zero (used to compute mean/variance)
-  6: optional i32 num_days_since_last; // Number of days since the latest interaction happen
-}(persisted="true", hasPersonalData = 'false') 
-
-struct VertexFeature {
-  1: required FeatureName name;
-  2: required bool outgoing; // direction e.g. true is num_retweets_by_user, and false is num_retweets_for_user
-  3: required TimeSeriesStatistics tss;
-}(persisted="true", hasPersonalData = 'false')
-
-struct Vertex {
-  1: required i64 user_id(personalDataType = 'UserId');
-  2: optional double weight;
-  3: list<VertexFeature> features;
-}(persisted="true", hasPersonalData = 'true')
-
-/*
- * These features are for an edge (a->b). Examples:
- * (i) follow is whether a follows b
- * (ii) num_retweets is number of b's tweets retweet by a
- */
-struct EdgeFeature {
-  1: required FeatureName name;
-  2: required TimeSeriesStatistics tss;
-}(persisted="true", hasPersonalData = 'false')
-
-struct Edge {
-  1: required i64 source_id(personalDataType = 'UserId');
-  2: required i64 destination_id(personalDataType = 'UserId');
-  3: optional double weight;
-  4: list<EdgeFeature> features;
-}(persisted="true", hasPersonalData = 'true')
-
-// these structs below are used by our ml pipeline
-struct EdgeLabel {
-  1: required i64 source_id(personalDataType = 'UserId');
-  2: required i64 destination_id(personalDataType = 'UserId');
-  3: required set<FeatureName> labels(personalDataType = 'AggregateImpressionEngagementData');
-}(persisted="true", hasPersonalData = 'true')
diff --git a/src/thrift/com/twitter/recos/recos.docx b/src/thrift/com/twitter/recos/recos.docx
new file mode 100644
index 000000000..ffa8efc41
Binary files /dev/null and b/src/thrift/com/twitter/recos/recos.docx differ
diff --git a/src/thrift/com/twitter/recos/recos.thrift b/src/thrift/com/twitter/recos/recos.thrift
deleted file mode 100644
index a0c6c8f03..000000000
--- a/src/thrift/com/twitter/recos/recos.thrift
+++ /dev/null
@@ -1,176 +0,0 @@
-namespace java com.twitter.recos.thriftjava
-#@namespace scala com.twitter.recos.thriftscala
-namespace rb Recos
-
-include "com/twitter/recos/features/tweet.thrift"
-
-enum RecommendTweetDisplayLocation {
-  HomeTimeline       = 0
-  Peek               = 1
-  WelcomeFlow        = 2
-  NetworkDigest      = 3
-  BackfillDigest     = 4
-  NetworkDigestExp1  = 5
-  NetworkDigestExp2  = 6 // deprecated
-  NetworkDigestExp3  = 7 // deprecated
-  HttpEndpoint       = 8
-  HomeTimeline1      = 9
-  HomeTimeline2      = 10
-  HomeTimeline3      = 11
-  HomeTimeline4      = 12
-  Poptart            = 13
-  NetworkDigestExp4  = 14
-  NetworkDigestExp5  = 15
-  NetworkDigestExp6  = 16
-  NetworkDigestExp7  = 17
-  NetworkDigestExp8  = 18
-  NetworkDigestExp9  = 19
-  InstantTimeline1   = 20 // AB1 + whitelist
-  InstantTimeline2   = 21 // AB1 + !whitelist
-  InstantTimeline3   = 22 // AB2 + whitelist
-  InstantTimeline4   = 23 // AB2 + !whitelist
-  BackfillDigestActive  = 24 // deprecated
-  BackfillDigestDormant = 25 // deprecated
-  ExploreUS             = 26 // deprecated
-  ExploreBR             = 27 // deprecated
-  ExploreIN             = 28 // deprecated
-  ExploreES             = 29 // deprecated
-  ExploreJP             = 30 // deprecated
-  MagicRecs             = 31
-  MagicRecs1            = 32
-  MagicRecs2            = 33
-  MagicRecs3            = 34
-  SMSDiscover           = 35
-  FastFollower          = 36
-  InstantTimeline5      = 37 // for instant timeline experiment
-  InstantTimeline6      = 38 // for instant timeline experiment
-  InstantTimeline7      = 39 // for instant timeline experiment
-  InstantTimeline8      = 40 // for instant timeline experiment
-  LoggedOutProfile      = 41
-  LoggedOutPermalink    = 42
-  Poptart2              = 43
-}
-
-enum RelatedTweetDisplayLocation {
-  Permalink       = 0
-  Permalink1      = 1
-  MobilePermalink = 2
-  Permalink3      = 3
-  Permalink4      = 4
-  RelatedTweets   = 5
-  RelatedTweets1  = 6
-  RelatedTweets2  = 7
-  RelatedTweets3  = 8
-  RelatedTweets4  = 9
-  LoggedOutProfile = 10
-  LoggedOutPermalink = 11
-}
-
-enum DDGBucket {
-  Control           = 0
-  Treatment         = 1
-  None              = 2
-}
-
-struct RecommendTweetRequest {
-  1: required i64                                   requesterId           // user id of the requesting user
-  2: required RecommendTweetDisplayLocation         displayLocation       // display location from the client
-  3: optional i64                                   clientId              // twitter api client id
-  4: optional i32                                   maxResults            // number of suggested results to return
-  5: optional list<i64>                             excludedTweetIds      // list of tweet ids to exclude from response
-  6: optional list<i64>                             excludedAuthorIds     // list of author ids to exclude from response
-  7: optional i64                                   guestId               // guestId
-  8: optional string                                languageCode          // Language code
-  9: optional string                                countryCode           // Country code
-  10: optional string                               ipAddress             // ip address of the user
-  11: optional string                               deviceId              // udid/uuid of device
-  12: optional bool                                 populateTweetFeatures // whether to populate tweet features. RecommendedTweet.tweetFeatures in the response will only be populated if this is set.
-}
-
-struct Bucket {
-  1: required string                                experimentName        // name of experiment (or not). experiment could be production or whatever fits
-  2: required string                                bucket                // name of bucket (may or may not be a DDG bucket, e.g., production)
-}
-
-struct RelatedTweetRequest {
-  1: required i64                                   tweetId               // original tweet id
-  2: required RelatedTweetDisplayLocation           displayLocation       // display location from the client
-  3: optional i64                                   clientId              // twitter api client id
-  4: optional i64                                   requesterId           // user id of the requesting user
-  5: optional i32                                   maxResults            // number of suggested results to return
-  6: optional list<i64>                             excludeTweetIds       // list of tweet ids to exclude from response
-  7: optional list<i64>                             excludedAuthorIds     // list of author ids to exclude from response
-  8: optional i64                                   guestId               // guestId
-  9: optional string                                languageCode          // Language code
-  10: optional string                               countryCode           // Country code
-  11: optional string                               ipAddress             // ip address of the user
-  12: optional string                               deviceId              // udid/uuid of device
-  13: optional string                               userAgent             // userAgent of the requesting user
-}
-
-enum SocialProofType {
-  FollowedBy = 1,
-  FavoritedBy = 2,
-  RetweetedBy = 3,
-  SimilarTo = 4,
-  RESERVED_2 = 5,
-  RESERVED_3 = 6,
-  RESERVED_4 = 7,
-  RESERVED_5 = 8,
-  RESERVED_6 = 9,
-  RESERVED_7 = 10
-}
-
-enum Algorithm {
-  Salsa = 1,
-  PastEmailClicks = 2,
-  SimilarToEmailClicks = 3,
-  PastClientEventClicks = 4,
-  VitNews = 5,
-  StrongTieScoring = 6,
-  PollsFromGraph = 7,
-  PollsBasedOnGeo = 8,
-  RESERVED_9 = 9,
-  RESERVED_10 = 10,
-  RESERVED_11 = 11,
-}
-
-struct RecommendedTweet {
-  1: required i64                            tweetId
-  2: required i64                            authorId
-  3: required list<i64>                      socialProof
-  4: required string                         feedbackToken
-  5: optional list<i64>                      favBy          // optionally provide a list of users who fav'ed the tweet if exist
-  6: optional tweet.RecommendedTweetFeatures tweetFeatures  // the features of a recommended tweet
-  7: optional SocialProofType                socialProofType // type of social proof. favBy should be deprecated soon
-  8: optional string                         socialProofOverride // should be set only for DDGs, for en-only experiments. SocialProofType is ignored when this field is set
-  9: optional Algorithm                      algorithm // algorithm used 
-  10: optional double                        score     // score
-  11: optional bool                          isFollowingAuthor // true if the target user follows the author of the tweet 
-}
-
-struct RelatedTweet {
-  1: required i64                  tweetId
-  2: required i64                  authorId
-  3: required double               score
-  4: required string               feedbackToken
-}
-
-struct RecommendTweetResponse {
-  1: required list<RecommendedTweet> tweets
-  2: optional DDGBucket              bucket                // deprecated
-  3: optional Bucket                 assignedBucket        // for client-side experimentation
-}
-
-struct RelatedTweetResponse {
-  1: required list<RelatedTweet>   tweets                                 // a list of related tweets
-  2: optional Bucket               assignedBucket                         // the bucket used for treatment
-}
-
-/**
- * The main interface-definition for Recos.
- */
-service Recos {
-  RecommendTweetResponse recommendTweets  (RecommendTweetRequest request)
-  RelatedTweetResponse relatedTweets  (RelatedTweetRequest request)
-}
diff --git a/src/thrift/com/twitter/recos/recos_common.docx b/src/thrift/com/twitter/recos/recos_common.docx
new file mode 100644
index 000000000..739f8a659
Binary files /dev/null and b/src/thrift/com/twitter/recos/recos_common.docx differ
diff --git a/src/thrift/com/twitter/recos/recos_common.thrift b/src/thrift/com/twitter/recos/recos_common.thrift
deleted file mode 100644
index ece39b8df..000000000
--- a/src/thrift/com/twitter/recos/recos_common.thrift
+++ /dev/null
@@ -1,54 +0,0 @@
-namespace java com.twitter.recos.recos_common.thriftjava
-namespace py gen.twitter.recos.recos_common
-#@namespace scala com.twitter.recos.recos_common.thriftscala
-#@namespace strato com.twitter.recos.recos_common
-namespace rb Recos
-
-// Social proof types for user moment recommendations
-enum MomentSocialProofType {
-  PUBLISH         = 0
-  LIKE            = 1
-  CAPSULE_OPEN    = 2
-}
-
-// Social proof types for tweet/entity recommendations
-enum SocialProofType {
-  CLICK           = 0
-  FAVORITE        = 1
-  RETWEET         = 2
-  REPLY           = 3
-  TWEET           = 4
-  IS_MENTIONED    = 5
-  IS_MEDIATAGGED  = 6
-  QUOTE           = 7
-}
-
-struct SocialProof {
-  1: required i64 userId
-  2: optional i64 metadata
-}
-
-// Social proof types for user recommendations
-enum UserSocialProofType {
-  FOLLOW     = 0
-  MENTION    = 1
-  MEDIATAG   = 2
-}
-
-struct GetRecentEdgesRequest {
-  1: required i64                          requestId        // the node to query from
-  2: optional i32                          maxNumEdges      // the max number of recent edges
-}
-
-struct RecentEdge {
-  1: required i64                          nodeId           // the connecting node id
-  2: required SocialProofType              engagementType   // the engagement type of the edge
-}
-
-struct GetRecentEdgesResponse {
-  1: required list<RecentEdge>             edges            // the _ most recent edges from the query node
-}
-
-struct NodeInfo {
-  1: required list<i64> edges
-}
diff --git a/src/thrift/com/twitter/recos/recos_injector.docx b/src/thrift/com/twitter/recos/recos_injector.docx
new file mode 100644
index 000000000..ed76c0544
Binary files /dev/null and b/src/thrift/com/twitter/recos/recos_injector.docx differ
diff --git a/src/thrift/com/twitter/recos/recos_injector.thrift b/src/thrift/com/twitter/recos/recos_injector.thrift
deleted file mode 100644
index b11bc5c09..000000000
--- a/src/thrift/com/twitter/recos/recos_injector.thrift
+++ /dev/null
@@ -1,22 +0,0 @@
-namespace java com.twitter.recos.recos_injector.thriftjava
-namespace py gen.twitter.recos.recos_injector
-#@namespace scala com.twitter.recos.recos_injector.thriftscala
-namespace rb RecosInjector
-
-####### FOR RECOS INTERNAL USE ONLY -- please do NOT use this in client code  ########
-
-struct UserTweetAuthorGraphMessage {
-  1: required i64 leftId
-  2: required i64 rightId
-  3: required i8 action
-  4: optional i8 card
-  5: optional i64 authorId
-  6: optional Features features
-}
-
-struct Features {
-  1: optional bool hasPhoto
-  2: optional bool hasVideo
-  3: optional bool hasUrl
-  4: optional bool hasHashtag
-}
diff --git a/src/thrift/com/twitter/recos/user_tweet_entity_graph/BUILD b/src/thrift/com/twitter/recos/user_tweet_entity_graph/BUILD
deleted file mode 100644
index ffd17d734..000000000
--- a/src/thrift/com/twitter/recos/user_tweet_entity_graph/BUILD
+++ /dev/null
@@ -1,19 +0,0 @@
-RECOSGRAPH_SOURCES = ["user_tweet_entity_graph.thrift"]
-
-create_thrift_libraries(
-    base_name = "user_tweet_entity_graph",
-    sources = RECOSGRAPH_SOURCES,
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependency_roots = [
-        "src/thrift/com/twitter/recos:recos-common",
-        "src/thrift/com/twitter/recos/features:tweet",
-    ],
-    generate_languages = [
-        "java",
-        "scala",
-        "strato",
-    ],
-    provides_java_name = "user_tweet_entity_graph-java",
-    provides_scala_name = "user_tweet_entity_graph-scala",
-)
diff --git a/src/thrift/com/twitter/recos/user_tweet_entity_graph/BUILD.docx b/src/thrift/com/twitter/recos/user_tweet_entity_graph/BUILD.docx
new file mode 100644
index 000000000..9bbc81829
Binary files /dev/null and b/src/thrift/com/twitter/recos/user_tweet_entity_graph/BUILD.docx differ
diff --git a/src/thrift/com/twitter/recos/user_tweet_entity_graph/CONFIG.docx b/src/thrift/com/twitter/recos/user_tweet_entity_graph/CONFIG.docx
new file mode 100644
index 000000000..4c77b68b2
Binary files /dev/null and b/src/thrift/com/twitter/recos/user_tweet_entity_graph/CONFIG.docx differ
diff --git a/src/thrift/com/twitter/recos/user_tweet_entity_graph/CONFIG.ini b/src/thrift/com/twitter/recos/user_tweet_entity_graph/CONFIG.ini
deleted file mode 100644
index eae222a68..000000000
--- a/src/thrift/com/twitter/recos/user_tweet_entity_graph/CONFIG.ini
+++ /dev/null
@@ -1,7 +0,0 @@
-; See http://go/CONFIG.ini
-
-[jira]
-project: SD
-
-[kite]
-project: recos
diff --git a/src/thrift/com/twitter/recos/user_tweet_entity_graph/user_tweet_entity_graph.docx b/src/thrift/com/twitter/recos/user_tweet_entity_graph/user_tweet_entity_graph.docx
new file mode 100644
index 000000000..fa337b877
Binary files /dev/null and b/src/thrift/com/twitter/recos/user_tweet_entity_graph/user_tweet_entity_graph.docx differ
diff --git a/src/thrift/com/twitter/recos/user_tweet_entity_graph/user_tweet_entity_graph.thrift b/src/thrift/com/twitter/recos/user_tweet_entity_graph/user_tweet_entity_graph.thrift
deleted file mode 100644
index 961fd2bc5..000000000
--- a/src/thrift/com/twitter/recos/user_tweet_entity_graph/user_tweet_entity_graph.thrift
+++ /dev/null
@@ -1,187 +0,0 @@
-namespace java com.twitter.recos.user_tweet_entity_graph.thriftjava
-namespace py gen.twitter.recos.user_tweet_entity_graph
-#@namespace scala com.twitter.recos.user_tweet_entity_graph.thriftscala
-#@namespace strato com.twitter.recos.user_tweet_entity_graph
-namespace rb UserTweetEntityGraph
-
-include "com/twitter/recos/features/tweet.thrift"
-include "com/twitter/recos/recos_common.thrift"
-
-enum TweetType {
-  Summary    = 0
-  Photo      = 1
-  Player     = 2
-  Promote    = 3
-  Regular    = 4
-}
-
-enum RecommendationType {
-  Tweet      = 0
-  Hashtag    = 1 // Entity type
-  Url        = 2 // Entity type
-}
-
-enum TweetEntityDisplayLocation {
-  MagicRecs                 = 0
-  HomeTimeline              = 1
-  HighlightsEmailUrlRecs    = 2
-  Highlights                = 3
-  Email                     = 4
-  MagicRecsF1               = 5
-  GuideVideo                = 6
-  MagicRecsRareTweet        = 7
-  TopArticles               = 8 // Twitter Blue most shared articles page
-  ContentRecommender        = 9
-  FrigateNTab               = 10
-}
-
-struct RecommendTweetEntityRequest {
-  // user id of the requesting user
-  1: required i64                                        requesterId
-
-  // display location from the client
-  2: required TweetEntityDisplayLocation                 displayLocation
-
-  // the recommendation entity types to return
-  3: required list<RecommendationType>                   recommendationTypes
-
-  // seed ids and weights used in left hand side
-  4: required map<i64,double>                            seedsWithWeights
-
-  // number of suggested results per recommendation entity type
-  5: optional map<RecommendationType, i32>               maxResultsByType
-
-  // the tweet age threshold in milliseconds
-  6: optional i64                                        maxTweetAgeInMillis
-
-  // list of tweet ids to exclude from response
-  7: optional list<i64>                                  excludedTweetIds
-
-  // max user social proof size per engagement type
-  8: optional i32                                        maxUserSocialProofSize
-
-  // max tweet social proof size per user
-  9: optional i32                                        maxTweetSocialProofSize
-
-  // min user social proof size per each recommendation entity type
-  10: optional map<RecommendationType, i32>              minUserSocialProofSizes
-
-  // summary, photo, player, promote, regular
-  11: optional list<TweetType>                           tweetTypes
-
-  // the list of social proof types to return
-  12: optional list<recos_common.SocialProofType>        socialProofTypes
-
-  // set of groups of social proof types allowed to be combined for comparison against minUserSocialProofSizes.
-  // e.g. if the input is set<list<Tweet, Favorite>>, then the union of those two social proofs
-  // will be compared against the minUserSocialProofSize of Tweet RecommendationType.
-  13: optional set<list<recos_common.SocialProofType>>   socialProofTypeUnions
-
-  // the recommendations returned in the response are authored by the following users
-  14: optional set<i64>                                  tweetAuthors
-
-  // the tweet engagement age threshold in milliseconds
-  15: optional i64                                       maxEngagementAgeInMillis
-
-  // the recommendations will not return any tweet authored by the following users
-  16: optional set<i64>                                  excludedTweetAuthors
-}
-
-struct TweetRecommendation {
-  // tweet id
-  1: required i64                                                               tweetId
-  // sum of weights of seed users who engaged with the tweet.
-  // If a user engaged with the same tweet twice, liked it and retweeted it, then his/her weight was counted twice.
-  2: required double                                                            score
-    // user social proofs per engagement type
-  3: required map<recos_common.SocialProofType, list<i64>>                      socialProofByType
-  // user social proofs along with edge metadata per engagement type. The value of the map is a list of SocialProofs.
-  4: optional map<recos_common.SocialProofType, list<recos_common.SocialProof>> socialProofs
-}
-
-struct HashtagRecommendation {
-  1: required i32                                       id                   // integer hashtag id, which will be converted to hashtag string by client library.
-  2: required double                                    score
-  // sum of weights of seed users who engaged with the hashtag.
-  // If a user engaged with the same hashtag twice, liked it and retweeted it, then his/her weight was counted twice.
-  3: required map<recos_common.SocialProofType, map<i64, list<i64>>> socialProofByType
-  // user and tweet social proofs per engagement type. The key of inner map is user id, and the value of inner map is
-  // a list of tweet ids that the user engaged with.
-}
-
-struct UrlRecommendation {
-  1: required i32                                       id                   // integer url id, which will be converted to url string by client library.
-  2: required double                                    score
-  // sum of weights of seed users who engaged with the url.
-  // If a user engaged with the same url twice, liked it and retweeted it, then his/her weight was counted twice.
-  3: required map<recos_common.SocialProofType, map<i64, list<i64>>> socialProofByType
-  // user and tweet social proofs per engagement type. The key of inner map is user id, and the value of inner map is
-  // a list of tweet ids that the user engaged with.
-}
-
-union UserTweetEntityRecommendationUnion {
-  1: TweetRecommendation tweetRec
-  2: HashtagRecommendation hashtagRec
-  3: UrlRecommendation urlRec
-}
-
-struct RecommendTweetEntityResponse {
-  1: required list<UserTweetEntityRecommendationUnion> recommendations
-}
-
-struct SocialProofRequest {
-  1: required list<i64>                                  inputTweets             // Only for some tweets we need requst its social proofs.
-  2: required map<i64, double>                           seedsWithWeights        // a set of seed users with weights
-  3: optional i64                                        requesterId             // id of the requesting user
-  4: optional list<recos_common.SocialProofType>         socialProofTypes        // the list of social proof types to return
-}
-
-struct SocialProofResponse {
-  1: required list<TweetRecommendation> socialProofResults
-}
-
-struct RecommendationSocialProofRequest {
-  /**
-   * Clients can request social proof from multiple recommendation types in a single request.
-   * NOTE: Avoid mixing tweet social proof requests with entity social proof requests as the
-   * underlying library call retrieves these differently.
-   */
-  1: required map<RecommendationType, set<i64>>           recommendationIdsForSocialProof
-  // These will be the only valid LHS nodes used to fetch social proof.
-  2: required map<i64, double>                            seedsWithWeights
-  3: optional i64                                         requesterId
-  // The list of valid social proof types to return, e.g. we may only want Favorite and Tweet proofs.
-  4: optional list<recos_common.SocialProofType>          socialProofTypes
-}
-
-struct RecommendationSocialProofResponse {
-  1: required list<UserTweetEntityRecommendationUnion> socialProofResults
-}
-
-/**
- * The main interface-definition for UserTweetEntityGraph.
- */
-service UserTweetEntityGraph {
-  RecommendTweetEntityResponse recommendTweets (RecommendTweetEntityRequest request)
-
-  /**
-   * Given a query user, its seed users, and a set of input tweets, return the social proofs of
-   * input tweets if any.
-   *
-   * Currently this supports clients such as Email Recommendations, MagicRecs, and HomeTimeline.
-   * In order to avoid heavy migration work, we are retaining this endpoint.
-   */
-  SocialProofResponse findTweetSocialProofs(SocialProofRequest request)
-
-  /**
-   * Find social proof for the specified RecommendationType given a set of input ids of that type.
-   * Only find social proofs from the specified seed users with the specified social proof types.
-   *
-   * Currently this supports url social proof generation for Guide.
-   *
-   * This endpoint is flexible enough to support social proof generation for all recommendation
-   * types, and should be used for all future clients of this service.
-   */
-  RecommendationSocialProofResponse findRecommendationSocialProofs(RecommendationSocialProofRequest request)
-}
-
diff --git a/src/thrift/com/twitter/recos/user_tweet_graph/BUILD b/src/thrift/com/twitter/recos/user_tweet_graph/BUILD
deleted file mode 100644
index 5f9f68eb3..000000000
--- a/src/thrift/com/twitter/recos/user_tweet_graph/BUILD
+++ /dev/null
@@ -1,22 +0,0 @@
-RECOSGRAPH_SOURCES = ["user_tweet_graph.thrift"]
-
-create_thrift_libraries(
-    base_name = "user_tweet_graph",
-    sources = RECOSGRAPH_SOURCES,
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependency_roots = [
-        "src/thrift/com/twitter/recos:recos-common",
-        "src/thrift/com/twitter/recos/features:tweet",
-    ],
-    export_roots = [
-        "src/thrift/com/twitter/recos/features:tweet",
-    ],
-    generate_languages = [
-        "java",
-        "scala",
-        "strato",
-    ],
-    provides_java_name = "user_tweet_graph-java",
-    provides_scala_name = "user_tweet_graph-scala",
-)
diff --git a/src/thrift/com/twitter/recos/user_tweet_graph/BUILD.docx b/src/thrift/com/twitter/recos/user_tweet_graph/BUILD.docx
new file mode 100644
index 000000000..8facd45cb
Binary files /dev/null and b/src/thrift/com/twitter/recos/user_tweet_graph/BUILD.docx differ
diff --git a/src/thrift/com/twitter/recos/user_tweet_graph/CONFIG.docx b/src/thrift/com/twitter/recos/user_tweet_graph/CONFIG.docx
new file mode 100644
index 000000000..1aa36df18
Binary files /dev/null and b/src/thrift/com/twitter/recos/user_tweet_graph/CONFIG.docx differ
diff --git a/src/thrift/com/twitter/recos/user_tweet_graph/CONFIG.ini b/src/thrift/com/twitter/recos/user_tweet_graph/CONFIG.ini
deleted file mode 100644
index eae222a68..000000000
--- a/src/thrift/com/twitter/recos/user_tweet_graph/CONFIG.ini
+++ /dev/null
@@ -1,7 +0,0 @@
-; See http://go/CONFIG.ini
-
-[jira]
-project: SD
-
-[kite]
-project: recos
diff --git a/src/thrift/com/twitter/recos/user_tweet_graph/user_tweet_graph.docx b/src/thrift/com/twitter/recos/user_tweet_graph/user_tweet_graph.docx
new file mode 100644
index 000000000..4609f8f17
Binary files /dev/null and b/src/thrift/com/twitter/recos/user_tweet_graph/user_tweet_graph.docx differ
diff --git a/src/thrift/com/twitter/recos/user_tweet_graph/user_tweet_graph.thrift b/src/thrift/com/twitter/recos/user_tweet_graph/user_tweet_graph.thrift
deleted file mode 100644
index 43f294eb1..000000000
--- a/src/thrift/com/twitter/recos/user_tweet_graph/user_tweet_graph.thrift
+++ /dev/null
@@ -1,172 +0,0 @@
-namespace java com.twitter.recos.user_tweet_graph.thriftjava
-namespace py gen.twitter.recos.user_tweet_graph
-#@namespace scala com.twitter.recos.user_tweet_graph.thriftscala
-#@namespace strato com.twitter.recos.user_tweet_graph
-namespace rb UserTweetGraph
-
-include "com/twitter/recos/features/tweet.thrift"
-include "com/twitter/recos/recos_common.thrift"
-
-enum TweetType {
-  Summary    = 0
-  Photo      = 1
-  Player     = 2
-  Promote    = 3
-  Regular    = 4
-}
-
-enum Algorithm {
-  Salsa              = 0
-  SubGraphSalsa      = 1
-}
-
-enum RecommendTweetDisplayLocation {
-  HomeTimeline       = 0
-  WelcomeFlow        = 1
-  NetworkDigest      = 2
-  BackfillDigest     = 3
-  HttpEndpoint       = 4
-  Poptart            = 5
-  InstantTimeline    = 6
-  Explore            = 7
-  MagicRecs          = 8
-  LoggedOutProfile   = 9
-  LoggedOutPermalink = 10
-  VideoHome          = 11
-}
-
-struct RecommendTweetRequest {
-  1: required i64                                      requesterId              // user id of the requesting user
-  2: required RecommendTweetDisplayLocation            displayLocation          // display location from the client
-  3: required i32                                      maxResults               // number of suggested results to return
-  4: required list<i64>                                excludedTweetIds         // list of tweet ids to exclude from response
-  5: required map<i64,double>                          seeds                    // seeds used in salsa random walk
-  6: required i64                                      tweetRecency             // the tweet recency threshold
-  7: required i32                                      minInteraction           // minimum interaction threshold
-  8: required list<TweetType>                          includeTweetTypes        // summary, photo, player, promote, other
-  9: required double                                   resetProbability         // reset probability to query node
-  10: required double                                  queryNodeWeightFraction  // the percentage of weights assigned to query node in seeding
-  11: required i32                                     numRandomWalks           // number of random walks
-  12: required i32                                     maxRandomWalkLength      // max random walk length
-  13: required i32                                     maxSocialProofSize       // max social proof size
-  14: required Algorithm                               algorithm                // algorithm type
-  15: optional list<recos_common.SocialProofType>      socialProofTypes         // the list of social proof types to return
-}
-
-struct RecommendedTweet {
-  1: required i64                                                tweetId
-  2: required double                                             score
-  3: optional list<i64>                                          socialProof              // social proof in aggregate
-  4: optional map<recos_common.SocialProofType, list<i64>>       socialProofPerType       // social proofs per engagement type
-}
-
-struct RecommendTweetResponse {
-  1: required list<RecommendedTweet> tweets
-}
-
-enum RelatedTweetDisplayLocation {
-  Permalink       = 0
-  Permalink1      = 1
-  MobilePermalink = 2
-  Permalink3      = 3
-  Permalink4      = 4
-  RelatedTweets   = 5
-  RelatedTweets1  = 6
-  RelatedTweets2  = 7
-  RelatedTweets3  = 8
-  RelatedTweets4  = 9
-  LoggedOutProfile = 10
-  LoggedOutPermalink = 11
-}
-
-struct UserTweetFeatureResponse {
-  1: optional double                                favAdamicAdarAvg
-  2: optional double                                favAdamicAdarMax 
-  3: optional double                                favLogCosineAvg
-  4: optional double                                favLogCosineMax
-  5: optional double                                retweetAdamicAdarAvg
-  6: optional double                                retweetAdamicAdarMax 
-  7: optional double                                retweetLogCosineAvg
-  8: optional double                                retweetLogCosineMax
-}
-
-struct RelatedTweetRequest {
-  1: required i64                                   tweetId               // original tweet id
-  2: required RelatedTweetDisplayLocation           displayLocation       // display location from the client
-  3: optional string                                algorithm             // additional parameter that the system can interpret
-  4: optional i64                                   requesterId           // user id of the requesting user
-  5: optional i32                                   maxResults            // number of suggested results to return
-  6: optional list<i64>                             excludeTweetIds       // list of tweet ids to exclude from response
-  7: optional i32                                   maxNumNeighbors
-  8: optional i32                                   minNeighborDegree
-  9: optional i32                                   maxNumSamplesPerNeighbor
-  10: optional i32                                  minCooccurrence
-  11: optional i32                                  minQueryDegree
-  12: optional double                               maxLowerMultiplicativeDeviation
-  13: optional double                               maxUpperMultiplicativeDeviation
-  14: optional bool                                 populateTweetFeatures // whether to populate graph features
-  15: optional i32                                  minResultDegree
-  16: optional list<i64>                            additionalTweetIds
-  17: optional double                               minScore
-  18: optional i32                                  maxTweetAgeInHours
-}
-
-struct TweetBasedRelatedTweetRequest {
-  1: required i64                                   tweetId               // query tweet id
-  2: optional i32                                   maxResults            // number of suggested results to return
-  3: optional list<i64>                             excludeTweetIds       // list of tweet ids to exclude from response
-  4: optional i32                                   minQueryDegree        // min degree of query tweet
-  5: optional i32                                   maxNumSamplesPerNeighbor // max number of sampled users who engaged with the query tweet
-  6: optional i32                                   minCooccurrence       // min co-occurrence of related tweet candidate 
-  7: optional i32                                   minResultDegree       // min degree of related tweet candidate 
-  8: optional double                                minScore              // min score of related tweet candidate
-  9: optional i32                                   maxTweetAgeInHours    // max tweet age in hours of related tweet candidate 
-}
-
-struct ProducerBasedRelatedTweetRequest {
-  1: required i64                                   producerId            // query producer id
-  2: optional i32                                   maxResults            // number of suggested results to return
-  3: optional list<i64>                             excludeTweetIds       // list of tweet ids to exclude from response
-  4: optional i32                                   minQueryDegree        // min degree of query producer, e.g. number of followers
-  5: optional i32                                   maxNumFollowers       // max number of sampled users who follow the query producer 
-  6: optional i32                                   minCooccurrence       // min co-occurrence of related tweet candidate 
-  7: optional i32                                   minResultDegree       // min degree of related tweet candidate 
-  8: optional double                                minScore              // min score of related tweet candidate
-  9: optional i32                                   maxTweetAgeInHours    // max tweet age in hours of related tweet candidate 
-}
-
-struct ConsumersBasedRelatedTweetRequest {
-  1: required list<i64>                             consumerSeedSet       // query consumer userId set 
-  2: optional i32                                   maxResults            // number of suggested results to return
-  3: optional list<i64>                             excludeTweetIds       // list of tweet ids to exclude from response 
-  4: optional i32                                   minCooccurrence       // min co-occurrence of related tweet candidate 
-  5: optional i32                                   minResultDegree       // min degree of related tweet candidate 
-  6: optional double                                minScore              // min score of related tweet candidate
-  7: optional i32                                   maxTweetAgeInHours    // max tweet age in hours of related tweet candidate 
-}
-
-struct RelatedTweet {
-  1: required i64                          tweetId
-  2: required double                       score
-  3: optional tweet.GraphFeaturesForTweet  relatedTweetGraphFeatures
-}
-
-struct RelatedTweetResponse {
-  1: required list<RelatedTweet>           tweets
-  2: optional tweet.GraphFeaturesForQuery  queryTweetGraphFeatures
-}
-
-/**
- * The main interface-definition for UserTweetGraph.
- */
-service UserTweetGraph {
-  RecommendTweetResponse recommendTweets (RecommendTweetRequest request)
-  recos_common.GetRecentEdgesResponse getLeftNodeEdges (recos_common.GetRecentEdgesRequest request)
-  recos_common.NodeInfo getRightNode (i64 node)
-  RelatedTweetResponse relatedTweets (RelatedTweetRequest request)
-  RelatedTweetResponse tweetBasedRelatedTweets (TweetBasedRelatedTweetRequest request)
-  RelatedTweetResponse producerBasedRelatedTweets (ProducerBasedRelatedTweetRequest request)
-  RelatedTweetResponse consumersBasedRelatedTweets (ConsumersBasedRelatedTweetRequest request)
-  UserTweetFeatureResponse userTweetFeatures (1: required i64 userId, 2: required i64 tweetId)
-}
-
diff --git a/src/thrift/com/twitter/recos/user_user_graph/BUILD b/src/thrift/com/twitter/recos/user_user_graph/BUILD
deleted file mode 100644
index ef53f847a..000000000
--- a/src/thrift/com/twitter/recos/user_user_graph/BUILD
+++ /dev/null
@@ -1,19 +0,0 @@
-RECOSGRAPH_SOURCES = ["user_user_graph.thrift"]
-
-create_thrift_libraries(
-    base_name = "user_user_graph",
-    sources = RECOSGRAPH_SOURCES,
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependency_roots = [
-        "src/thrift/com/twitter/recos:recos-common",
-        "src/thrift/com/twitter/recos/features:tweet",
-    ],
-    generate_languages = [
-        "java",
-        "scala",
-        "strato",
-    ],
-    provides_java_name = "user_user_graph-java",
-    provides_scala_name = "user_user_graph-scala",
-)
diff --git a/src/thrift/com/twitter/recos/user_user_graph/BUILD.docx b/src/thrift/com/twitter/recos/user_user_graph/BUILD.docx
new file mode 100644
index 000000000..79bb31e78
Binary files /dev/null and b/src/thrift/com/twitter/recos/user_user_graph/BUILD.docx differ
diff --git a/src/thrift/com/twitter/recos/user_user_graph/CONFIG.docx b/src/thrift/com/twitter/recos/user_user_graph/CONFIG.docx
new file mode 100644
index 000000000..1aa36df18
Binary files /dev/null and b/src/thrift/com/twitter/recos/user_user_graph/CONFIG.docx differ
diff --git a/src/thrift/com/twitter/recos/user_user_graph/CONFIG.ini b/src/thrift/com/twitter/recos/user_user_graph/CONFIG.ini
deleted file mode 100644
index eae222a68..000000000
--- a/src/thrift/com/twitter/recos/user_user_graph/CONFIG.ini
+++ /dev/null
@@ -1,7 +0,0 @@
-; See http://go/CONFIG.ini
-
-[jira]
-project: SD
-
-[kite]
-project: recos
diff --git a/src/thrift/com/twitter/recos/user_user_graph/user_user_graph.docx b/src/thrift/com/twitter/recos/user_user_graph/user_user_graph.docx
new file mode 100644
index 000000000..ba8385d35
Binary files /dev/null and b/src/thrift/com/twitter/recos/user_user_graph/user_user_graph.docx differ
diff --git a/src/thrift/com/twitter/recos/user_user_graph/user_user_graph.thrift b/src/thrift/com/twitter/recos/user_user_graph/user_user_graph.thrift
deleted file mode 100644
index 10115c8d9..000000000
--- a/src/thrift/com/twitter/recos/user_user_graph/user_user_graph.thrift
+++ /dev/null
@@ -1,45 +0,0 @@
-namespace java com.twitter.recos.user_user_graph.thriftjava
-namespace py gen.twitter.recos.user_user_graph
-#@namespace scala com.twitter.recos.user_user_graph.thriftscala
-#@namespace strato com.twitter.recos.user_user_graph
-namespace rb UserUserGraph
-
-include "com/twitter/recos/recos_common.thrift"
-
-enum RecommendUserDisplayLocation {
-  MagicRecs                 = 0
-  HomeTimeLine              = 1
-  ConnectTab                = 2
-}
-
-struct RecommendUserRequest {
-  1: required i64                                           requesterId                  // user id of the requesting user
-  2: required RecommendUserDisplayLocation                  displayLocation              // display location from the client
-  3: required map<i64,double>                               seedsWithWeights             // seed ids and weights used in left hand side
-  4: optional list<i64>                                     excludedUserIds              // list of users to exclude from response
-  5: optional i32                                           maxNumResults                // number of results to return
-  6: optional i32                                           maxNumSocialProofs           // number of social proofs per recommendation
-  7: optional map<recos_common.UserSocialProofType, i32>    minUserPerSocialProof        // minimum number of users for each social proof type
-  8: optional list<recos_common.UserSocialProofType>        socialProofTypes             // list of required social proof types. Any recommended user
-                                                                                         // must at least have all of these social proof types
-  9: optional i64                                           maxEdgeEngagementAgeInMillis // only events created during this period are counted
-}
-
-struct RecommendedUser {
-  1: required i64                                               userId             // user id of recommended user
-  2: required double                                            score              // weight of the recommended user
-  3: required map<recos_common.UserSocialProofType, list<i64>>  socialProofs       // the social proofs of the recommended user
-}
-
-struct RecommendUserResponse {
-  1: required list<RecommendedUser>                             recommendedUsers         // list of recommended users
-}
-
-/**
- * The main interface-definition for UserUserGraph.
- */
-service UserUserGraph {
-  // Given a request for recommendations for a specific user,
-  // return a list of candidate users along with their social proofs
-  RecommendUserResponse recommendUsers (RecommendUserRequest request)
-}
diff --git a/src/thrift/com/twitter/recos/user_video_graph/BUILD b/src/thrift/com/twitter/recos/user_video_graph/BUILD
deleted file mode 100644
index f9dcbb8b1..000000000
--- a/src/thrift/com/twitter/recos/user_video_graph/BUILD
+++ /dev/null
@@ -1,22 +0,0 @@
-RECOSGRAPH_SOURCES = ["user_video_graph.thrift"]
-
-create_thrift_libraries(
-    base_name = "user_video_graph",
-    sources = RECOSGRAPH_SOURCES,
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependency_roots = [
-        "src/thrift/com/twitter/recos:recos-common",
-        "src/thrift/com/twitter/recos/features:tweet",
-    ],
-    export_roots = [
-        "src/thrift/com/twitter/recos/features:tweet",
-    ],
-    generate_languages = [
-        "java",
-        "scala",
-        "strato",
-    ],
-    provides_java_name = "user_video_graph-java",
-    provides_scala_name = "user_video_graph-scala",
-)
diff --git a/src/thrift/com/twitter/recos/user_video_graph/BUILD.docx b/src/thrift/com/twitter/recos/user_video_graph/BUILD.docx
new file mode 100644
index 000000000..f7d426e30
Binary files /dev/null and b/src/thrift/com/twitter/recos/user_video_graph/BUILD.docx differ
diff --git a/src/thrift/com/twitter/recos/user_video_graph/CONFIG.docx b/src/thrift/com/twitter/recos/user_video_graph/CONFIG.docx
new file mode 100644
index 000000000..1aa36df18
Binary files /dev/null and b/src/thrift/com/twitter/recos/user_video_graph/CONFIG.docx differ
diff --git a/src/thrift/com/twitter/recos/user_video_graph/CONFIG.ini b/src/thrift/com/twitter/recos/user_video_graph/CONFIG.ini
deleted file mode 100644
index eae222a68..000000000
--- a/src/thrift/com/twitter/recos/user_video_graph/CONFIG.ini
+++ /dev/null
@@ -1,7 +0,0 @@
-; See http://go/CONFIG.ini
-
-[jira]
-project: SD
-
-[kite]
-project: recos
diff --git a/src/thrift/com/twitter/recos/user_video_graph/user_video_graph.docx b/src/thrift/com/twitter/recos/user_video_graph/user_video_graph.docx
new file mode 100644
index 000000000..6c159d77d
Binary files /dev/null and b/src/thrift/com/twitter/recos/user_video_graph/user_video_graph.docx differ
diff --git a/src/thrift/com/twitter/recos/user_video_graph/user_video_graph.thrift b/src/thrift/com/twitter/recos/user_video_graph/user_video_graph.thrift
deleted file mode 100644
index a5d83c1d6..000000000
--- a/src/thrift/com/twitter/recos/user_video_graph/user_video_graph.thrift
+++ /dev/null
@@ -1,64 +0,0 @@
-namespace java com.twitter.recos.user_video_graph.thriftjava
-namespace py gen.twitter.recos.user_video_graph
-#@namespace scala com.twitter.recos.user_video_graph.thriftscala
-#@namespace strato com.twitter.recos.user_video_graph
-namespace rb UserVideoGraph
-
-include "com/twitter/recos/features/tweet.thrift"
-include "com/twitter/recos/recos_common.thrift"
-
-
-struct TweetBasedRelatedTweetRequest {
-  1: required i64                                   tweetId               // query tweet id
-  2: optional i32                                   maxResults            // number of suggested results to return
-  3: optional list<i64>                             excludeTweetIds       // list of tweet ids to exclude from response
-  4: optional i32                                   minQueryDegree        // min degree of query tweet
-  5: optional i32                                   maxNumSamplesPerNeighbor // max number of sampled users who engaged with the query tweet
-  6: optional i32                                   minCooccurrence       // min co-occurrence of related tweet candidate 
-  7: optional i32                                   minResultDegree       // min degree of related tweet candidate 
-  8: optional double                                minScore              // min score of related tweet candidate
-  9: optional i32                                   maxTweetAgeInHours    // max tweet age in hours of related tweet candidate 
-}
-
-struct ProducerBasedRelatedTweetRequest {
-  1: required i64                                   producerId            // query producer id
-  2: optional i32                                   maxResults            // number of suggested results to return
-  3: optional list<i64>                             excludeTweetIds       // list of tweet ids to exclude from response
-  4: optional i32                                   minQueryDegree        // min degree of query producer, e.g. number of followers
-  5: optional i32                                   maxNumFollowers       // max number of sampled users who follow the query producer 
-  6: optional i32                                   minCooccurrence       // min co-occurrence of related tweet candidate 
-  7: optional i32                                   minResultDegree       // min degree of related tweet candidate 
-  8: optional double                                minScore              // min score of related tweet candidate
-  9: optional i32                                   maxTweetAgeInHours    // max tweet age in hours of related tweet candidate 
-}
-
-struct ConsumersBasedRelatedTweetRequest {
-  1: required list<i64>                             consumerSeedSet       // query consumer userId set
-  2: optional i32                                   maxResults            // number of suggested results to return
-  3: optional list<i64>                             excludeTweetIds       // list of tweet ids to exclude from response
-  4: optional i32                                   minCooccurrence       // min co-occurrence of related tweet candidate 
-  5: optional i32                                   minResultDegree       // min degree of related tweet candidate  
-  6: optional double                                minScore              // min score of related tweet candidate
-  7: optional i32                                   maxTweetAgeInHours    // max tweet age in hours of related tweet candidate
-}
-
-struct RelatedTweet {
-  1: required i64                          tweetId
-  2: required double                       score
-  3: optional tweet.GraphFeaturesForTweet  relatedTweetGraphFeatures
-}
-
-struct RelatedTweetResponse {
-  1: required list<RelatedTweet>           tweets
-  2: optional tweet.GraphFeaturesForQuery  queryTweetGraphFeatures
-}
-
-/**
- * The main interface-definition for UserVideoGraph.
- */
-service UserVideoGraph {
-  RelatedTweetResponse tweetBasedRelatedTweets (TweetBasedRelatedTweetRequest request)
-  RelatedTweetResponse producerBasedRelatedTweets (ProducerBasedRelatedTweetRequest request)
-  RelatedTweetResponse consumersBasedRelatedTweets (ConsumersBasedRelatedTweetRequest request)
-}
-
diff --git a/src/thrift/com/twitter/search/common/ranking/ranking.docx b/src/thrift/com/twitter/search/common/ranking/ranking.docx
new file mode 100644
index 000000000..29aff4223
Binary files /dev/null and b/src/thrift/com/twitter/search/common/ranking/ranking.docx differ
diff --git a/src/thrift/com/twitter/search/common/ranking/ranking.thrift b/src/thrift/com/twitter/search/common/ranking/ranking.thrift
deleted file mode 100644
index bd1cff929..000000000
--- a/src/thrift/com/twitter/search/common/ranking/ranking.thrift
+++ /dev/null
@@ -1,366 +0,0 @@
-namespace java com.twitter.search.common.ranking.thriftjava
-#@namespace scala com.twitter.search.common.ranking.thriftscala
-#@namespace strato com.twitter.search.common.ranking
-namespace py gen.twitter.search.common.ranking.ranking
-
-struct ThriftLinearFeatureRankingParams {
-  // values below this will set the score to the minimal one
-  1: optional double min = -1e+100
-  // values above this will set the score to the minimal one
-  2: optional double max = 1e+100
-  3: optional double weight = 0
-}(persisted='true')
-
-struct ThriftAgeDecayRankingParams {
-  // the rate in which the score of older tweets decreases
-  1: optional double slope = 0.003
-  // the age, in minutes, where the age score of a tweet is half of the latest tweet
-  2: optional double halflife = 360.0
-  // the minimal age decay score a tweet will have
-  3: optional double base = 0.6
-}(persisted='true')
-
-enum ThriftScoringFunctionType {
-  LINEAR = 1,
-  MODEL_BASED = 4,
-  TENSORFLOW_BASED = 5,
-
-  // deprecated
-  TOPTWEETS = 2,
-  EXPERIMENTAL = 3,
-}
-
-// The struct to define a class that is to be dynamically loaded in earlybird for
-// experimentation.
-struct ThriftExperimentClass {
-  // the fully qualified class name.
-  1: required string name
-  // data source location (class/jar file) for this dynamic class on HDFS
-  2: optional string location
-  // parameters in key-value pairs for this experimental class
-  3: optional map<string, double> params
-}(persisted='true')
-
-// Deprecated!!
-struct ThriftQueryEngagementParams {
-  // Rate Boosts: given a rate (usually a small fraction), the score will be multiplied by
-  //   (1 + rate) ^ boost
-  // 0 mean no boost, negative numbers are dampens
-  1: optional double retweetRateBoost = 0
-  2: optional double replyRateBoost = 0
-  3: optional double faveRateBoost = 0
-}(persisted='true')
-
-struct ThriftHostQualityParams {
-  // Multiplier applied to host score, for tweets that have links.
-  // A multiplier of 0 means that this boost is not applied
-  1: optional double multiplier = 0.0
-
-  // Do not apply the multiplier to hosts with score above this level.
-  // If 0, the multiplier will be applied to any host.
-  2: optional double maxScoreToModify = 0.0
-
-  // Do not apply the multiplier to hosts with score below this level.
-  // If 0, the multiplier will be applied to any host.
-  3: optional double minScoreToModify = 0.0
-
-  // If true, score modification will be applied to hosts that have unknown scores.
-  // The host-score used will be lower than the score of any known host.
-  4: optional bool applyToUnknownHosts = 0
-}(persisted='true')
-
-struct ThriftCardRankingParams {
-  1: optional double hasCardBoost          = 1.0
-  2: optional double domainMatchBoost      = 1.0
-  3: optional double authorMatchBoost      = 1.0
-  4: optional double titleMatchBoost       = 1.0
-  5: optional double descriptionMatchBoost = 1.0
-}(persisted='true')
-
-# The ids are assigned in 'blocks'. For adding a new field, find an unused id in the appropriate
-# block. Be sure to mention explicitly which ids have been removed so that they are not used again.
-struct ThriftRankingParams {
-  1: optional ThriftScoringFunctionType type
-
-  // Dynamically loaded scorer and collector for quick experimentation.
-  40: optional ThriftExperimentClass expScorer
-  41: optional ThriftExperimentClass expCollector
-
-  // we must set it to a value that fits into a float: otherwise
-  // some earlybird classes that convert it to float will interpret
-  // it as Float.NEGATIVE_INFINITY, and some comparisons will fail
-  2: optional double minScore = -1e+30
-
-  10: optional ThriftLinearFeatureRankingParams parusScoreParams
-  11: optional ThriftLinearFeatureRankingParams retweetCountParams
-  12: optional ThriftLinearFeatureRankingParams replyCountParams
-  15: optional ThriftLinearFeatureRankingParams reputationParams
-  16: optional ThriftLinearFeatureRankingParams luceneScoreParams
-  18: optional ThriftLinearFeatureRankingParams textScoreParams
-  19: optional ThriftLinearFeatureRankingParams urlParams
-  20: optional ThriftLinearFeatureRankingParams isReplyParams
-  21: optional ThriftLinearFeatureRankingParams directFollowRetweetCountParams
-  22: optional ThriftLinearFeatureRankingParams trustedCircleRetweetCountParams
-  23: optional ThriftLinearFeatureRankingParams favCountParams
-  24: optional ThriftLinearFeatureRankingParams multipleReplyCountParams
-  27: optional ThriftLinearFeatureRankingParams embedsImpressionCountParams
-  28: optional ThriftLinearFeatureRankingParams embedsUrlCountParams
-  29: optional ThriftLinearFeatureRankingParams videoViewCountParams
-  66: optional ThriftLinearFeatureRankingParams quotedCountParams
-
-  // A map from MutableFeatureType to linear ranking params
-  25: optional map<byte, ThriftLinearFeatureRankingParams> offlineExperimentalFeatureRankingParams
-
-  // if min/max for score or ThriftLinearFeatureRankingParams should always be
-  // applied or only to non-follows, non-self, non-verified
-  26: optional bool applyFiltersAlways = 0
-
-  // Whether to apply promotion/demotion at all for FeatureBasedScoringFunction
-  70: optional bool applyBoosts = 1
-
-  // UI language is english, tweet language is not
-  30: optional double langEnglishUIBoost = 0.3
-  // tweet language is english, UI language is not
-  31: optional double langEnglishTweetBoost = 0.7
-  // user language differs from tweet language, and neither is english
-  32: optional double langDefaultBoost = 0.1
-  // user that produced tweet is marked as spammer by metastore
-  33: optional double spamUserBoost = 1.0
-  // user that produced tweet is marked as nsfw by metastore
-  34: optional double nsfwUserBoost = 1.0
-  // user that produced tweet is marked as bot (self similarity) by metastore
-  35: optional double botUserBoost = 1.0
-
-  // An alternative way of using lucene score in the ranking function.
-  38: optional bool useLuceneScoreAsBoost = 0
-  39: optional double maxLuceneScoreBoost = 1.2
-
-  // Use user's consumed and produced languages for scoring
-  42: optional bool useUserLanguageInfo = 0
-
-  // Boost (demotion) if the tweet language is not one of user's understandable languages,
-  // nor interface language.
-  43: optional double unknownLanguageBoost = 0.01
-
-  // Use topic ids for scoring.
-  // Deprecated in SEARCH-8616.
-  44: optional bool deprecated_useTopicIDsBoost = 0
-  // Parameters for topic id scoring.  See TopicIDsBoostScorer (and its test) for details.
-  46: optional double deprecated_maxTopicIDsBoost = 3.0
-  47: optional double deprecated_topicIDsBoostExponent = 2.0;
-  48: optional double deprecated_topicIDsBoostSlope = 2.0;
-
-  // Hit Attribute Demotion
-  60: optional bool enableHitDemotion = 0
-  61: optional double noTextHitDemotion = 1.0
-  62: optional double urlOnlyHitDemotion = 1.0
-  63: optional double nameOnlyHitDemotion = 1.0
-  64: optional double separateTextAndNameHitDemotion = 1.0
-  65: optional double separateTextAndUrlHitDemotion = 1.0
-
-  // multiplicative score boost for results deemed offensive
-  100: optional double offensiveBoost = 1
-  // multiplicative score boost for results in the searcher's social circle
-  101: optional double inTrustedCircleBoost = 1
-  // multiplicative score dampen for results with more than one hash tag
-  102: optional double multipleHashtagsOrTrendsBoost = 1
-  // multiplicative score boost for results in the searcher's direct follows
-  103: optional double inDirectFollowBoost = 1
-  // multiplicative score boost for results that has trends
-  104: optional double tweetHasTrendBoost = 1
-  // is tweet from verified account?
-  106: optional double tweetFromVerifiedAccountBoost = 1
-  // is tweet authored by the searcher? (boost is in addition to social boost)
-  107: optional double selfTweetBoost = 1
-  // multiplicative score boost for a tweet that has image url.
-  108: optional double tweetHasImageUrlBoost = 1
-  // multiplicative score boost for a tweet that has video url.
-  109: optional double tweetHasVideoUrlBoost = 1
-  // multiplicative score boost for a tweet that has news url.
-  110: optional double tweetHasNewsUrlBoost = 1
-  // is tweet from a blue-verified account?
-  111: optional double tweetFromBlueVerifiedAccountBoost = 1 (personalDataType = 'UserVerifiedFlag')
-
-  // subtractive penalty applied after boosts for out-of-network replies.
-  120: optional double outOfNetworkReplyPenalty = 10.0
-
-  150: optional ThriftQueryEngagementParams deprecatedQueryEngagementParams
-
-  160: optional ThriftHostQualityParams deprecatedHostQualityParams
-
-  // age decay params for regular tweets
-  203: optional ThriftAgeDecayRankingParams ageDecayParams
-
-  // for card ranking: map between card name ordinal (defined in com.twitter.search.common.constants.CardConstants)
-  // to ranking params
-  400: optional map<byte, ThriftCardRankingParams> cardRankingParams
-
-  // A map from tweet IDs to the score adjustment for that tweet. These are score
-  // adjustments that include one or more features that can depend on the query
-  // string. These features aren't indexed by Earlybird, and so their total contribution
-  // to the scoring function is passed in directly as part of the request. If present,
-  // the score adjustment for a tweet is directly added to the linear component of the
-  // scoring function. Since this signal can be made up of multiple features, any
-  // reweighting or combination of these features is assumed to be done by the caller
-  // (hence there is no need for a weight parameter -- the weights of the features
-  // included in this signal have already been incorporated by the caller).
-  151: optional map<i64, double> querySpecificScoreAdjustments
-
-  // A map from user ID to the score adjustment for tweets from that author.
-  // This field provides a way for adjusting the tweets of a specific set of users with a score
-  // that is not present in the Earlybird features but has to be passed from the clients, such as
-  // real graph weights or a combination of multiple features.
-  // This field should be used mainly for experimentation since it increases the size of the thrift
-  // requests.
-  154: optional map<i64, double> authorSpecificScoreAdjustments
-
-  // -------- Parameters for ThriftScoringFunctionType.MODEL_BASED --------
-  // Selected models along with their weights for the linear combination
-  152: optional map<string, double> selectedModels
-  153: optional bool useLogitScore = false
-
-  // -------- Parameters for ThriftScoringFunctionType.TENSORFLOW_BASED --------
-  // Selected tensorflow model
-  303: optional string selectedTensorflowModel
-
-  // -------- Deprecated Fields --------
-  // ID 303 has been used in the past. Resume additional deprecated fields from 304
-  105: optional double deprecatedTweetHasTrendInTrendingQueryBoost = 1
-  200: optional double deprecatedAgeDecaySlope = 0.003
-  201: optional double deprecatedAgeDecayHalflife = 360.0
-  202: optional double deprecatedAgeDecayBase = 0.6
-  204: optional ThriftAgeDecayRankingParams deprecatedAgeDecayForTrendsParams
-  301: optional double deprecatedNameQueryConfidence = 0.0
-  302: optional double deprecatedHashtagQueryConfidence = 0.0
-  // Whether to use old-style engagement features (normalized by LogNormalizer)
-  // or new ones (normalized by SingleBytePositiveFloatNormalizer)
-  50: optional bool useGranularEngagementFeatures = 0  // DEPRECATED!
-}(persisted='true')
-
-// This sorting mode is used by earlybird to retrieve the top-n facets that
-// are returned to blender
-enum ThriftFacetEarlybirdSortingMode {
-  SORT_BY_SIMPLE_COUNT = 0,
-  SORT_BY_WEIGHTED_COUNT = 1,
-}
-
-// This is the final sort order used by blender after all results from
-// the earlybirds are merged
-enum ThriftFacetFinalSortOrder {
-  // using the created_at date of the first tweet that contained the facet
-  SCORE = 0,
-  SIMPLE_COUNT = 1,
-  WEIGHTED_COUNT = 2,
-  CREATED_AT = 3
-}
-
-struct ThriftFacetRankingOptions {
-  // next available field ID = 38
-
-  // ======================================================================
-  // EARLYBIRD SETTINGS
-  //
-  // These parameters primarily affect how earlybird creates the top-k
-  // candidate list to be re-ranked by blender
-  // ======================================================================
-  // Dynamically loaded scorer and collector for quick experimentation.
-  26: optional ThriftExperimentClass expScorer
-  27: optional ThriftExperimentClass expCollector
-
-  // It should be less than or equal to reputationParams.min, and all
-  // tweepcreds between the two get a score of 1.0.
-  21: optional i32 minTweepcredFilterThreshold
-
-  // the maximum score a single tweet can contribute to the weightedCount
-  22: optional i32 maxScorePerTweet
-
-  15: optional ThriftFacetEarlybirdSortingMode sortingMode
-  // The number of top candidates earlybird returns to blender
-  16: optional i32 numCandidatesFromEarlybird = 100
-
-  // when to early terminate for facet search, overrides the setting in ThriftSearchQuery
-  34: optional i32 maxHitsToProcess = 1000
-
-  // for anti-gaming we want to limit the maximum amount of hits the same user can
-  // contribute.  Set to -1 to disable the anti-gaming filter. Overrides the setting in
-  // ThriftSearchQuery
-  35: optional i32 maxHitsPerUser = 3
-
-  // if the tweepcred of the user is bigger than this value it will not be excluded
-  // by the anti-gaming filter. Overrides the setting in ThriftSearchQuery
-  36: optional i32 maxTweepcredForAntiGaming = 65
-
-  // these settings affect how earlybird computes the weightedCount
-   2: optional ThriftLinearFeatureRankingParams parusScoreParams
-   3: optional ThriftLinearFeatureRankingParams reputationParams
-  17: optional ThriftLinearFeatureRankingParams favoritesParams
-  33: optional ThriftLinearFeatureRankingParams repliesParams
-  37: optional map<byte, ThriftLinearFeatureRankingParams> rankingExpScoreParams
-
-  // penalty counter settings
-  6: optional i32 offensiveTweetPenalty  // set to -1 to disable the offensive filter
-  7: optional i32 antigamingPenalty // set to -1 to disable antigaming filtering
-  // weight of penalty counts from all tweets containing a facet, not just the tweets
-  // matching the query
-  9: optional double queryIndependentPenaltyWeight  // set to 0 to not use query independent penalty weights
-  // penalty for keyword stuffing
-  60: optional i32 multipleHashtagsOrTrendsPenalty
-
-  // Language related boosts, similar to those in relevance ranking options. By default they are
-  // all 1.0 (no-boost).
-  // When the user language is english, facet language is not
-  11: optional double langEnglishUIBoost = 1.0
-  // When the facet language is english, user language is not
-  12: optional double langEnglishFacetBoost = 1.0
-  // When the user language differs from facet/tweet language, and neither is english
-  13: optional double langDefaultBoost = 1.0
-
-  // ======================================================================
-  // BLENDER SETTINGS
-  //
-  // Settings for the facet relevance scoring happening in blender
-  // ======================================================================
-
-  // This block of parameters are only used in the FacetsFutureManager.
-  // limits to discard facets
-  // if a facet has a higher penalty count, it will not be returned
-  5: optional i32 maxPenaltyCount
-  // if a facet has a lower simple count, it will not be returned
-  28: optional i32 minSimpleCount
-  // if a facet has a lower weighted count, it will not be returned
-  8: optional i32 minCount
-  // the maximum allowed value for offensiveCount/facetCount a facet can have in order to be returned
-  10: optional double maxPenaltyCountRatio
-  // if set to true, then facets with offensive display tweets are excluded from the resultset
-  29: optional bool excludePossiblySensitiveFacets
-  // if set to true, then only facets that have a display tweet in their ThriftFacetCountMetadata object
-  // will be returned to the caller
-  30: optional bool onlyReturnFacetsWithDisplayTweet
-
-  // parameters for scoring force-inserted media items
-  // Please check FacetReRanker.java computeScoreForInserted() for their usage.
-  38: optional double forceInsertedBackgroundExp = 0.3
-  39: optional double forceInsertedMinBackgroundCount = 2
-  40: optional double forceInsertedMultiplier = 0.01
-
-  // -----------------------------------------------------
-  // weights for the facet ranking formula
-  18: optional double simpleCountWeight_DEPRECATED
-  19: optional double weightedCountWeight_DEPRECATED
-  20: optional double backgroundModelBoost_DEPRECATED
-
-  // -----------------------------------------------------
-  // Following parameters are used in the FacetsReRanker
-  // age decay params
-  14: optional ThriftAgeDecayRankingParams ageDecayParams
-
-  // used in the facets reranker
-  23: optional double maxNormBoost = 5.0
-  24: optional double globalCountExponent = 3.0
-  25: optional double simpleCountExponent = 3.0
-
-  31: optional ThriftFacetFinalSortOrder finalSortOrder
-
-  // Run facets search as if they happen at this specific time (ms since epoch).
-  32: optional i64 fakeCurrentTimeMs  // not really used anywhere, remove?
-}(persisted='true')
diff --git a/src/thrift/com/twitter/search/earlybird/thrift/earlybird.docx b/src/thrift/com/twitter/search/earlybird/thrift/earlybird.docx
new file mode 100644
index 000000000..56c191ae1
Binary files /dev/null and b/src/thrift/com/twitter/search/earlybird/thrift/earlybird.docx differ
diff --git a/src/thrift/com/twitter/search/earlybird/thrift/earlybird.thrift b/src/thrift/com/twitter/search/earlybird/thrift/earlybird.thrift
deleted file mode 100644
index 0d4547264..000000000
--- a/src/thrift/com/twitter/search/earlybird/thrift/earlybird.thrift
+++ /dev/null
@@ -1,1416 +0,0 @@
-namespace java com.twitter.search.earlybird.thrift
-#@namespace scala com.twitter.search.earlybird.thriftscala
-#@namespace strato com.twitter.search.earlybird
-namespace py gen.twitter.search.earlybird
-
-include "com/twitter/ads/adserver/adserver_common.thrift"
-include "com/twitter/search/common/caching/caching.thrift"
-include "com/twitter/search/common/constants/query.thrift"
-include "com/twitter/search/common/constants/search_language.thrift"
-include "com/twitter/search/common/conversation/conversation.thrift"
-include "com/twitter/search/common/features/features.thrift"
-include "com/twitter/search/common/indexing/status.thrift"
-include "com/twitter/search/common/query/search.thrift"
-include "com/twitter/search/common/ranking/ranking.thrift"
-include "com/twitter/search/common/results/expansions.thrift"
-include "com/twitter/search/common/results/highlight.thrift"
-include "com/twitter/search/common/results/hit_attribution.thrift"
-include "com/twitter/search/common/results/hits.thrift"
-include "com/twitter/search/common/results/social.thrift"
-include "com/twitter/service/spiderduck/gen/metadata_store.thrift"
-include "com/twitter/tweetypie/deprecated.thrift"
-include "com/twitter/tweetypie/tweet.thrift"
-include "com/twitter/escherbird/tweet_annotation.thrift"
-
-enum ThriftSearchRankingMode {
-  // good old realtime search mode
-  RECENCY = 0,
-  // new super fancy relevance ranking
-  RELEVANCE = 1,
-  DEPRECATED_DISCOVERY = 2,
-  // top tweets ranking mode
-  TOPTWEETS = 3,
-  // results from accounts followed by the searcher
-  FOLLOWS = 4,
-
-  PLACE_HOLDER5 = 5,
-  PLACE_HOLDER6 = 6,
-}
-
-enum ThriftSearchResultType {
-  // it's a time-ordered result.
-  RECENCY = 0,
-  // it's a highly relevant tweet (aka top tweet).
-  RELEVANCE = 1,
-  // top tweet result type
-  POPULAR = 2,
-  // promoted tweets (ads)
-  PROMOTED = 3,
-  // relevance-ordered (as opposed to time-ordered) tweets generated from a variety of candidates
-  RELEVANCE_ORDERED = 4,
-
-  PLACE_HOLDER5 = 5,
-  PLACE_HOLDER6 = 6,
-}
-
-enum ThriftSocialFilterType {
-  // filter only users that the searcher is directly following.
-  FOLLOWS = 0,
-  // filter only users that are in searcher's social circle of trust.
-  TRUSTED = 1,
-  // filter both follows and trusted.
-  ALL = 2,
-
-  PLACE_HOLDER3 = 3,
-  PLACE_HOLDER4 = 4,
-
-}
-
-enum ThriftTweetSource {
-  ///// enums set by Earlybird
-  REALTIME_CLUSTER = 1,
-  FULL_ARCHIVE_CLUSTER = 2,
-  REALTIME_PROTECTED_CLUSTER = 4,
-
-  ///// enums set inside Blender
-  ADSERVER = 0,
-  // from top news search, only used in universal search
-  TOP_NEWS = 3,
-  // special tweets included just for EventParrot.
-  FORCE_INCLUDED = 5,
-  // from Content Recommender
-  // from topic to Tweet path
-  CONTENT_RECS_TOPIC_TO_TWEET = 6,
-  // used for hydrating QIG Tweets (go/qig)
-  QIG = 8,
-  // used for TOPTWEETS ranking mode
-  TOP_TWEET = 9,
-  // used for experimental candidate sources
-  EXPERIMENTAL = 7,
-  // from Scanr service
-  SCANR = 10,
-
-  PLACE_HOLDER11 = 11,
-  PLACE_HOLDER12 = 12
-}
-
-enum NamedEntitySource {
-  TEXT = 0,
-  URL = 1,
-
-  PLACE_HOLDER2 = 2,
-  PLACE_HOLDER3 = 3,
-  PLACE_HOLDER4 = 4,
-}
-
-enum ExperimentCluster {
-  EXP0 = 0, // Send requests to the earlybird-realtime-exp0 cluster
-  PLACE_HOLDER1 = 1,
-  PLACE_HOLDER2 = 2,
-}
-
-enum AudioSpaceState {
-   RUNNING = 0,
-   ENDED = 1,
-
-   PLACE_HOLDER2 = 2,
-   PLACE_HOLDER3 = 3,
-   PLACE_HOLDER4 = 4,
-   PLACE_HOLDER5 = 5,
-}
-
-// Contains all scoring and relevance-filtering related controls and options for Earlybird.
-struct ThriftSearchRelevanceOptions {
-  // Next available field ID: 31 and note that 45 and 50 have been used already
-
-  2: optional bool filterDups = 0         // filter out duplicate search results
-  26: optional bool keepDupWithHigherScore = 1 // keep the duplicate tweet with the higher score
-
-  3: optional bool proximityScoring = 0   // whether to do proximity scoring or not
-  4: optional i32 maxConsecutiveSameUser  // filter consecutive results from the same user
-  5: optional ranking.ThriftRankingParams rankingParams  // composed by blender
-  // deprecated in favor of the maxHitsToProcess in CollectorParams
-  6: optional i32 maxHitsToProcess // when to early-terminate for relevance
-  7: optional string experimentName      // what relevance experiment is running
-  8: optional string experimentBucket    // what bucket the user is in; DDG defaults to hard-coded 'control'
-  9: optional bool interpretSinceId = 1   // whether to interpret since_id operator
-
-  24: optional i32 maxHitsPerUser // Overrides ThriftSearchQuery.maxHitsPerUser
-
-  // only used by discovery for capping direct follow tweets
-  10: optional i32 maxConsecutiveDirectFollows
-
-  // Note - the orderByRelevance flag is critical to understanding how merging
-  // and trimming works in relevance mode in the search root.
-  //
-  // When orderByRelevance is true, results are trimmed in score-order.  This means the
-  // client will get the top results from (maxHitsToProcess * numHashPartitions) hits,
-  // ordered by score.
-  //
-  // When orderByRelevance is false, results are trimmed in id-order.  This means the
-  // client will get the top results from an approximation of maxHitsToProcess hits
-  // (across the entire corpus).  These results ordered by ID.
-  14: optional bool orderByRelevance = 0
-
-  // Max blending count for results returned due to from:user rewrites
-  16: optional i32 maxUserBlendCount
-
-  // The weight for proximity phrases generated while translating the serialized query to the
-  // lucene query.
-  19: optional double proximityPhraseWeight = 1.0
-  20: optional i32 proximityPhraseSlop = 255
-
-  // Override the weights of searchable fields.
-  // Negative weight means the the field is not enabled for search by default,
-  // but if it is (e.g., by annotation), the absolute value of the weight shall be
-  // used (if the annotation does not specify a weight).
-  21: optional map<string, double> fieldWeightMapOverride
-
-  // whether disable the coordination in the rewritten disjunction query, term query and phrase query
-  // the details can be found in LuceneVisitor
-  22: optional bool deprecated_disableCoord = 0
-
-  // Root only. Returns all results seen by root to the client without trimming
-  // if set to true.
-  23: optional bool returnAllResults
-
-  // DEPRECATED: All v2 counters will be used explicitly in the scoring function and
-  // returned in their own field (in either metadata or feature map in response).
-  25: optional bool useEngagementCountersV2 = 0
-
-  // -------- PERSONALIZATION-RELATED RELEVANCE OPTIONS --------
-  // Take special care with these options when reasoning about caching.
-
-  // Deprecated in SEARCH-8616.
-  45: optional map<i32, double> deprecated_topicIDWeights
-
-  // Collect hit attribution on queries and likedByUserIDFilter64-enhanced queries to
-  // get likedByUserIds list in metadata field.
-  // NOTE: this flag has no affect on fromUserIDFilter64.
-  50: optional bool collectFieldHitAttributions = 0
-
-  // Whether to collect all hits regardless of their score with RelevanceAllCollector.
-  27: optional bool useRelevanceAllCollector = 0
-
-  // Override features of specific tweets before the tweets are scored.  
-  28: optional map<i64, features.ThriftSearchResultFeatures> perTweetFeaturesOverride
-
-  // Override features of all tweets from specific users before the tweets are scored. 
-  29: optional map<i64, features.ThriftSearchResultFeatures> perUserFeaturesOverride
-
-  // Override features of all tweets before the tweets are scored.
-  30: optional features.ThriftSearchResultFeatures globalFeaturesOverride
-}(persisted='true')
-
-// Facets types that may have different ranking parameters.
-enum ThriftFacetType {
-  DEFAULT = 0,
-  MENTIONS_FACET = 1,
-  HASHTAGS_FACET = 2,
-  // Deprecated in SEARCH-13708
-  DEPRECATED_NAMED_ENTITIES_FACET = 3,
-  STOCKS_FACET = 4,
-  VIDEOS_FACET = 5,
-  IMAGES_FACET = 6,
-  NEWS_FACET = 7,
-  LANGUAGES_FACET = 8,
-  SOURCES_FACET = 9,
-  TWIMG_FACET = 10,
-  FROM_USER_ID_FACET = 11,
-  DEPRECATED_TOPIC_IDS_FACET = 12,
-  RETWEETS_FACET = 13,
-  LINKS_FACET = 14,
-
-  PLACE_HOLDER15 = 15,
-  PLACE_HOLDER16 = 16,
-}
-
-struct ThriftSearchDebugOptions {
-  // Make earlybird only score and return tweets (specified by tweet id) here, regardless
-  // if they have a hit for the current query or not.
-  1: optional set<i64> statusIds;
-
-  // Assorted structures to pass in debug options.
-  2: optional map<string, string> stringMap;
-  3: optional map<string, double> valueMap;
-  4: optional list<double> valueList;
-}(persisted='true')
-
-// These options control what metadata will be returned by earlybird for each search result
-// in the ThriftSearchResultMetadata struct.  These options are currently mostly supported by
-// AbstractRelevanceCollector and partially in SearchResultsCollector.  Most are true by default to
-// preserve backwards compatibility, but can be disabled as necessary to optimize searches returning
-// many results (such as discover).
-struct ThriftSearchResultMetadataOptions {
-  // If true, fills in the tweetUrls field in ThriftSearchResultMetadata.
-  // Populated by AbstractRelevanceCollector.
-  1: optional bool getTweetUrls = 1
-
-  // If true, fills in the resultLocation field in ThriftSearchResultMetadata.
-  // Populated by AbstractRelevanceCollector.
-  2: optional bool getResultLocation = 1
-  
-  // Deprecated in SEARCH-8616.
-  3: optional bool deprecated_getTopicIDs = 1
-
-  // If true, fills in the luceneScore field in ThriftSearchResultMetadata.
-  // Populated by LinearScoringFunction.
-  4: optional bool getLuceneScore = 0
-
-  // Deprecated but used to be for Offline feature values for static index
-  5: optional bool deprecated_getExpFeatureValues = 0
-
-  // If true, will omit all features derivable from packedFeatures, and set packedFeatures
-  // instead.
-  6: optional bool deprecated_usePackedFeatures = 0
-
-  // If true, fills sharedStatusId. For replies this is the in-reply-to status id and for
-  // retweets this is the retweet source status id.
-  // Also fills in the the isRetweet and isReply flags.
-  7: optional bool getInReplyToStatusId = 0
-
-  // If true, fills referencedTweetAuthorId. Also fills in the the isRetweet and isReply flags.
-  8: optional bool getReferencedTweetAuthorId = 0
-
-  // If true, fills media bits (video/vine/periscope/etc.)
-  9: optional bool getMediaBits = 0
-
-  // If true, will return all defined features in the packed features.  This flag does not cover
-  // the above defined features.
-  10: optional bool getAllFeatures = 0
-
-  // If true, will return all features as ThriftSearchResultFeatures format.
-  11: optional bool returnSearchResultFeatures = 0
-
-  // If the client caches some features schemas, client can indicate its cache schemas through
-  // this field based on (version, checksum).
-  12: optional list<features.ThriftSearchFeatureSchemaSpecifier> featureSchemasAvailableInClient
-
-  // Specific feature IDs to return for recency requests. Populated in SearchResultFeatures.
-  // Values must be IDs of CSF fields from EarlybirdFieldConstants.
-  13: optional list<i32> requestedFeatureIDs
-
-  // If true, fills in the namedEntities field in ThriftSearchResultExtraMetadata
-  14: optional bool getNamedEntities = 0
-
-  // If true, fills in the entityAnnotations field in ThriftSearchResultExtraMetadata
-  15: optional bool getEntityAnnotations = 0
-
-  // If true, fills in the fromUserId field in the ThriftSearchResultExtraMetadata
-  16: optional bool getFromUserId = 0
-
-  // If true, fills in the spaces field in the ThriftSearchResultExtraMetadata
-  17: optional bool getSpaces = 0
-
-  18: optional bool getExclusiveConversationAuthorId = 0
-}(persisted='true')
-
-
-// ThriftSearchQuery describes an earlybird search request, which typically consists
-// of these parts:
-//  - a query to retrieve hits
-//  - relevance options to score hits
-//  - a collector to collect hits and process into search results
-// Note that this struct is used in both ThriftBlenderRequest and EarlybirdRequest.
-// Most fields are not set when this struct is embedded in ThriftBlenderRequest, and
-// are filled in by the blender before sending to earlybird.
-struct ThriftSearchQuery {
-  // Next available field ID: 42
-
-  // -------- SECTION ZERO: THINGS USED ONLY BY THE BLENDER --------
-  // See SEARCHQUAL-2398
-  // These fields are used by the blender and clients of the blender, but not by earlybird.
-
-  // blender use only
-  // The raw un-parsed user search query.
-  6: optional string rawQuery(personalDataType = 'SearchQuery')
-
-  // blender use only
-  // Language of the rawQuery.
-  18: optional string queryLang(personalDataType = 'InferredLanguage')
-
-  // blender use only
-  // What page of results to return, indexed from 1.
-  7: optional i32 page = 1
-
-  // blender use only
-  // Number of results to skip (for pagination).  Indexed from 0.
-  2: optional i32 deprecated_resultOffset = 0
-
-
-  // -------- SECTION ONE: RETRIEVAL OPTIONS --------
-  // These options control the query that will be used to retrieve documents / hits.
-
-  // The parsed query tree, serialized to a string.  Restricts the search results to
-  // tweets matching this query.
-  1: optional string serializedQuery(personalDataType = 'SearchQuery')
-
-  // Restricts the search results to tweets having this minimum tweep cred, out of 100.
-  5: optional i32 minTweepCredFilter = -1
-
-  // Restricts the search results to tweets from these users.
-  34: optional list<i64> fromUserIDFilter64(personalDataType = 'PrivateAccountsFollowing, PublicAccountsFollowing')
-  // Restricts the search results to tweets liked by these users.
-  40: optional list<i64> likedByUserIDFilter64(personalDataType = 'PrivateAccountsFollowing, PublicAccountsFollowing')
-
-  // If searchStatusIds are present, earlybird will ignore the serializedQuery completely
-  // and simply score each of searchStatusIds, also bypassing features like duplicate
-  // filtering and early termination.
-  // IMPORTANT: this means that it is possible to get scores equal to ScoringFunction.SKIP_HIT,
-  // for results skipped by the scoring function.
-  31: optional set<i64> searchStatusIds
-
-  35: optional set<i64> deprecated_eventClusterIdsFilter
-
-  41: optional map<string, list<i64>> namedDisjunctionMap
-
-  // -------- SECTION TWO: HIT COLLECTOR OPTIONS --------
-  // These options control what hits will be collected by the hit collector.
-  // Whether we want to collect and return per-field hit attributions is set in RelevanceOptions.
-  // See SEARCH-2784
-  // Number of results to return (after offset/page correction).
-  // This is ignored when searchStatusIds is set.
-  3: required i32 numResults
-
-  // Maximum number of hits to process by the collector.
-  // deprecated in favor of the maxHitsToProcess in CollectorParams
-  4: optional i32 maxHitsToProcess = 1000
-
-  // Collect hit counts for these time periods (in milliseconds).
-  30: optional list<i64> hitCountBuckets
-
-  // If set, earlybird will also return the facet labels of the specified facet fields
-  // in result tweets.
-  33: optional list<string> facetFieldNames
-
-  // Options controlling which search result metadata is returned.
-  36: optional ThriftSearchResultMetadataOptions resultMetadataOptions
-
-  // Collection related Params
-  38: optional search.CollectorParams collectorParams
-
-  // Whether to collect conversation IDs
-  39: optional bool collectConversationId = 0
-
-  // -------- SECTION THREE: RELEVANCE OPTIONS --------
-  // These options control relevance scoring and anti-gaming.
-
-  // Ranking mode (RECENCY means time-ordered ranking with no relevance).
-  8: optional ThriftSearchRankingMode rankingMode = ThriftSearchRankingMode.RECENCY
-
-  // Relevance scoring options.
-  9: optional ThriftSearchRelevanceOptions relevanceOptions
-
-  // Limits the number of hits that can be contributed by the same user, for anti-gaming.
-  // Set to -1 to disable the anti-gaming filter.  This is ignored when searchStatusIds
-  // is set.
-  11: optional i32 maxHitsPerUser = 3
-
-  // Disables anti-gaming filter checks for any tweets that exceed this tweepcred.
-  12: optional i32 maxTweepcredForAntiGaming = 65
-
-  // -------- PERSONALIZATION-RELATED RELEVANCE OPTIONS --------
-  // Take special care with these options when reasoning about caching.  All of these
-  // options, if set, will bypass the cache with the exception of uiLang which is the
-  // only form of personalization allowed for caching.
-
-  // User ID of searcher.  This is used for relevance, and will be used for retrieval
-  // by the protected tweets index.  If set, query will not be cached.
-  20: optional i64 searcherId(personalDataType = 'UserId')
-
-  // Bloom filter containing trusted user IDs.  If set, query will not be cached.
-  10: optional binary trustedFilter(personalDataType = 'UserId')
-
-  // Bloom filter containing direct follow user IDs.  If set, query will not be cached.
-  16: optional binary directFollowFilter(personalDataType = 'UserId, PrivateAccountsFollowing, PublicAccountsFollowing')
-
-  // UI language from the searcher's profile settings.
-  14: optional string uiLang(personalDataType = 'GeneralSettings')
-
-  // Confidence of the understandability of different languages for this user.
-  // uiLang field above is treated as a userlang with a confidence of 1.0.
-  28: optional map<search_language.ThriftLanguage, double> userLangs(personalDataTypeKey = 'InferredLanguage')
-
-  // An alternative to fromUserIDFilter64 that relies on the relevance bloom filters
-  // for user filtering.  Not currently used in production.  Only supported for realtime
-  // searches.
-  // If set, earlybird expects both trustedFilter and directFollowFilter to also be set.
-  17: optional ThriftSocialFilterType socialFilterType
-
-  // -------- SECTION FOUR: DEBUG OPTIONS, FORGOTTEN FEATURES --------
-
-  // Earlybird search debug options.
-  19: optional ThriftSearchDebugOptions debugOptions
-
-  // Overrides the query time for debugging.
-  29: optional i64 timestampMsecs = 0
-
-  // Support for this feature has been removed and this field is left for backwards compatibility
-  // (and to detect improper usage by clients when it is set).
-  25: optional list<string> deprecated_iterativeQueries
-
-  // Specifies a lucene query that will only be used if serializedQuery is not set,
-  // for debugging.  Not currently used in production.
-  27: optional string luceneQuery(personalDataType = 'SearchQuery')
-
-  // This field is deprecated and is not used by earlybirds when processing the query.
-  21: optional i32 deprecated_minDocsToProcess = 0
-}(persisted='true', hasPersonalData = 'true')
-
-
-struct ThriftFacetLabel {
-  1: required string fieldName
-  2: required string label
-  // the number of times this facet has shown up in tweets with offensive words.
-  3: optional i32 offensiveCount = 0
-
-  // only filled for TWIMG facets
-  4: optional string nativePhotoUrl
-}(persisted='true')
-
-struct ThriftSearchResultGeoLocation {
-  1: optional double latitude(personalDataType = 'GpsCoordinates')
-  2: optional double longitude(personalDataType = 'GpsCoordinates')
-  3: optional double distanceKm
-}(persisted='true', hasPersonalData = 'true')
-
-// Contains an expanded url and media type from the URL facet fields in earlybird.
-// Note: thrift copied from status.thrift with unused fields renamed.
-struct ThriftSearchResultUrl {
-  // Next available field ID: 6.  Fields 2-4 removed.
-
-  // Note: this is actually the expanded url.  Rename after deprecated fields are removed.
-  1: required string originalUrl
-
-  // Media type of the url.
-  5: optional metadata_store.MediaTypes mediaType
-}(persisted='true')
-
-struct ThriftSearchResultNamedEntity {
-  1: required string canonicalName
-  2: required string entityType
-  3: required NamedEntitySource source
-}(persisted='true')
-
-struct ThriftSearchResultAudioSpace {
-  1: required string id
-  2: required AudioSpaceState state
-}(persisted='true')
-
-// Even more metadata
-struct ThriftSearchResultExtraMetadata {
-  // Next available field ID: 49
-
-  1: optional double userLangScore
-  2: optional bool hasDifferentLang
-  3: optional bool hasEnglishTweetAndDifferentUILang
-  4: optional bool hasEnglishUIAndDifferentTweetLang
-  5: optional i32 quotedCount
-  6: optional double querySpecificScore
-  7: optional bool hasQuote
-  29: optional i64 quotedTweetId
-  30: optional i64 quotedUserId
-  31: optional search_language.ThriftLanguage cardLang
-  8: optional i64 conversationId
-  9: optional bool isSensitiveContent
-  10: optional bool hasMultipleMediaFlag
-  11: optional bool profileIsEggFlag
-  12: optional bool isUserNewFlag
-  26: optional double authorSpecificScore
-  28: optional bool isComposerSourceCamera
-
-  // temporary V2 engagement counters, original ones in ThriftSearchResultMetadata has log()
-  // applied on them and then converted to int in Thrift, which is effectively a premature
-  // discretization. It doesn't affect the scoring inside Earlybird but for scoring and ML training
-  // outside earlybird, they were bad. These newly added ones stores a proper value of these
-  // counts. This also provides an easier transition to v2 counter when Earlybird is eventually
-  // ready to consume them from DL
-  // See SEARCHQUAL-9536, SEARCH-11181
-  18: optional i32 retweetCountV2
-  19: optional i32 favCountV2
-  20: optional i32 replyCountV2
-  // Tweepcred weighted version of various engagement counts
-  22: optional i32 weightedRetweetCount
-  23: optional i32 weightedReplyCount
-  24: optional i32 weightedFavCount
-  25: optional i32 weightedQuoteCount
-
-  // 2 bits - 0, 1, 2, 3+
-  13: optional i32 numMentions
-  14: optional i32 numHashtags
-
-  // 1 byte - 256 possible languages
-  15: optional i32 linkLanguage
-  // 6 bits - 64 possible values
-  16: optional i32 prevUserTweetEngagement
-
-  17: optional features.ThriftSearchResultFeatures features
-
-  // If the ThriftSearchQuery.likedByUserIdFilter64 and ThriftSearchRelevanceOptions.collectFieldHitAttributions 
-  // fields are set, then this field will contain the list of all users in the query that liked this tweet.
-  // Otherwise, this field is not set.
-  27: optional list<i64> likedByUserIds
-
-
-  // Deprecated. See SEARCHQUAL-10321
-  21: optional double dopamineNonPersonalizedScore
-
-  32: optional list<ThriftSearchResultNamedEntity> namedEntities
-  33: optional list<tweet_annotation.TweetEntityAnnotation> entityAnnotations
-
-  // Health model scores from HML
-  34: optional double toxicityScore // (go/toxicity)
-  35: optional double pBlockScore // (go/pblock)
-  36: optional double experimentalHealthModelScore1
-  37: optional double experimentalHealthModelScore2
-  38: optional double experimentalHealthModelScore3
-  39: optional double experimentalHealthModelScore4
-
-  40: optional i64 directedAtUserId
-
-  // Health model scores from HML (cont.)
-  41: optional double pSpammyTweetScore // (go/pspammytweet)
-  42: optional double pReportedTweetScore // (go/preportedtweet)
-  43: optional double spammyTweetContentScore // (go/spammy-tweet-content)
-  // it is populated by looking up user table and it is only available in archive earlybirds response
-  44: optional bool isUserProtected
-  45: optional list<ThriftSearchResultAudioSpace> spaces
-
-  46: optional i64 exclusiveConversationAuthorId
-  47: optional string cardUri
-  48: optional bool fromBlueVerifiedAccount(personalDataType = 'UserVerifiedFlag')
-}(persisted='true')
-
-// Some basic metadata about a search result.  Useful for re-sorting, filtering, etc.
-//
-// NOTE: DO NOT ADD NEW FIELD!!
-// Stop adding new fields to this struct, all new fields should go to
-// ThriftSearchResultExtraMetadata (VM-1897), or there will be performance issues in production.
-struct ThriftSearchResultMetadata {
-  // Next available field ID: 86
-
-  // -------- BASIC SCORING METADATA --------
-
-  // When resultType is RECENCY most scoring metadata will not be available.
-  1: required ThriftSearchResultType resultType
-
-  // Relevance score computed for this result.
-  3: optional double score
-
-  // True if the result was skipped by the scoring function.  Only set when the collect-all
-  // results collector was used - in other cases skipped results are not returned.
-  // The score will be ScoringFunction.SKIP_HIT when skipped is true.
-  43: optional bool skipped
-
-  // optionally a Lucene-style explanation for this result
-  5: optional string explanation
-
-
-  // -------- NETWORK-BASED SCORING METADATA --------
-
-  // Found the tweet in the trusted circle.
-  6: optional bool isTrusted
-
-  // Found the tweet in the direct follows.
-  8: optional bool isFollow
-
-  // True if the fromUserId of this tweet was whitelisted by the dup / antigaming filter.
-  // This typically indicates the result was from a tweet that matched a fromUserId query.
-  9: optional bool dontFilterUser
-
-
-  // -------- COMMON DOCUMENT METADATA --------
-
-  // User ID of the author.  When isRetweet is true, this is the user ID of the retweeter
-  // and NOT that of the original tweet.
-  7: optional i64 fromUserId = 0
-
-  // When isRetweet (or packed features equivalent) is true, this is the status id of the
-  // original tweet. When isReply and getReplySource are true, this is the status id of the
-  // original tweet. In all other circumstances this is 0.
-  40: optional i64 sharedStatusId = 0
-
-  // When hasCard (or packed features equivalent) is true, this is one of SearchCardType.
-  49: optional i8 cardType = 0
-
-  // -------- EXTENDED DOCUMENT METADATA --------
-  // This is additional metadata from facet fields and column stride fields.
-  // Return of these fields is controlled by ThriftSearchResultMetadataOptions to
-  // allow for fine-grained control over when these fields are returned, as an
-  // optimization for searches returning a large quantity of results.
-
-  // Lucene component of the relevance score.  Only returned when
-  // ThriftSearchResultMetadataOptions.getLuceneScore is true.
-  31: optional double luceneScore = 0.0
-
-  // Urls found in the tweet.  Only returned when
-  // ThriftSearchResultMetadataOptions.getTweetUrls is true.
-  18: optional list<ThriftSearchResultUrl> tweetUrls
-
-  // Deprecated in SEARCH-8616.
-  36: optional list<i32> deprecated_topicIDs
-
-  // Facets available in this tweet, this will only be filled if
-  // ThriftSearchQuery.facetFieldNames is set in the request.
-  22: optional list<ThriftFacetLabel> facetLabels
-
-  // The location of the result, and the distance to it from the center of the query
-  // location.  Only returned when ThriftSearchResultMetadataOptions.getResultLocation is true.
-  35: optional ThriftSearchResultGeoLocation resultLocation
-
-  // Per field hit attribution.
-  55: optional hit_attribution.FieldHitAttribution fieldHitAttribution
-
-  // whether this has geolocation_type:geotag hit
-  57: optional bool geotagHit = 0
-
-  // the user id of the author of the source/referenced tweet (the tweet one replied
-  // to, retweeted and possibly quoted, etc.) (SEARCH-8561)
-  // Only returned when ThriftSearchResultMetadataOptions.getReferencedTweetAuthorId is true.
-  60: optional i64 referencedTweetAuthorId = 0
-
-  // Whether this tweet has certain types of media.
-  // Only returned when ThriftSearchResultMetadataOptions.getMediaBits is true.
-  // "Native video" is either consumer, pro, vine, or periscope.
-  // "Native image" is an image hosted on pic.twitter.com.
-  62: optional bool hasConsumerVideo
-  63: optional bool hasProVideo
-  64: optional bool hasVine
-  65: optional bool hasPeriscope
-  66: optional bool hasNativeVideo
-  67: optional bool hasNativeImage
-
-  // Packed features for this result. This field is never populated.
-  50: optional status.PackedFeatures deprecated_packedFeatures
-
-  // The features stored in earlybird
-
-  // From integer 0 from EarlybirdFeatureConfiguration:
-  16: optional bool isRetweet
-  71: optional bool isSelfTweet
-  10: optional bool isOffensive
-  11: optional bool hasLink
-  12: optional bool hasTrend
-  13: optional bool isReply
-  14: optional bool hasMultipleHashtagsOrTrends
-  23: optional bool fromVerifiedAccount
-  // Static text quality score.  This is actually an int between 0 and 100.
-  30: optional double textScore
-  51: optional search_language.ThriftLanguage language
-
-  // From integer 1 from EarlybirdFeatureConfiguration:
-  52: optional bool hasImage
-  53: optional bool hasVideo
-  28: optional bool hasNews
-  48: optional bool hasCard
-  61: optional bool hasVisibleLink
-  // Tweep cred aka user rep.  This is actually an int between 0 and 100.
-  32: optional double userRep
-  24: optional bool isUserSpam
-  25: optional bool isUserNSFW
-  26: optional bool isUserBot
-  54: optional bool isUserAntiSocial
-
-  // From integer 2 from EarlybirdFeatureConfiguration:
-
-  // Retweet, fav, reply, embeds counts, and video view counts are APPROXIMATE ONLY.
-  // Note that retweetCount, favCount and replyCount are not original unnormalized values,
-  // but after a log2() function for historical reason, this loses us some granularity.
-  // For more accurate counts, use {retweet, fav, reply}CountV2 in extraMetadata.
-  2: optional i32 retweetCount
-  33: optional i32 favCount
-  34: optional i32 replyCount
-  58: optional i32 embedsImpressionCount
-  59: optional i32 embedsUrlCount
-  68: optional i32 videoViewCount
-
-  // Parus score.  This is actually an int between 0 and 100.
-  29: optional double parusScore
-
-  // Extra feature data, all new feature fields you want to return from Earlybird should go into
-  // this one, the outer one is always reaching its limit of the number of fields JVM can
-  // comfortably support!!
-  86: optional ThriftSearchResultExtraMetadata extraMetadata
-
-  // Integer 3 is omitted, see expFeatureValues above for more details.
-
-  // From integer 4 from EarlybirdFeatureConfiguration:
-  // Signature, for duplicate detection and removal.
-  4: optional i32 signature
-
-  // -------- THINGS USED ONLY BY THE BLENDER --------
-
-  // Social proof of the tweet, for network discovery.
-  // Do not use these fields outside of network discovery.
-  41: optional list<i64> retweetedUserIDs64
-  42: optional list<i64> replyUserIDs64
-
-  // Social connection between the search user and this result.
-  19: optional social.ThriftSocialContext socialContext
-
-  // used by RelevanceTimelineSearchWorkflow, whether a tweet should be highlighted or not
-  46: optional bool highlightResult
-
-  // used by RelevanceTimelineSearchWorkflow, the highlight context of the highlighted tweet
-  47: optional highlight.ThriftHighlightContext highlightContext
-
-  // the penguin version used to tokenize the tweets by the serving earlybird index as defined
-  // in com.twitter.common.text.version.PenguinVersion
-  56: optional i8 penguinVersion
-
-  69: optional bool isNullcast
-
-  // This is the normalized ratio(0.00 to 1.00) of nth token(starting before 140) divided by
-  // numTokens and then normalized into 16 positions(4 bits) but on a scale of 0 to 100% as
-  // we unnormalize it for you
-  70: optional double tokenAt140DividedByNumTokensBucket
-
-}(persisted='true')
-
-// Query level result stats.
-// Next id: 20
-struct ThriftSearchResultsRelevanceStats {
-  1: optional i32 numScored = 0
-  // Skipped documents count, they were also scored but their scores got ignored (skipped), note that this is different
-  // from numResultsSkipped in the ThriftSearchResults.
-  2: optional i32 numSkipped = 0
-  3: optional i32 numSkippedForAntiGaming = 0
-  4: optional i32 numSkippedForLowReputation = 0
-  5: optional i32 numSkippedForLowTextScore = 0
-  6: optional i32 numSkippedForSocialFilter = 0
-  7: optional i32 numSkippedForLowFinalScore = 0
-  8: optional i32 oldestScoredTweetAgeInSeconds = 0
-
-  // More counters for various features.
-  9:  optional i32 numFromDirectFollows = 0
-  10: optional i32 numFromTrustedCircle = 0
-  11: optional i32 numReplies = 0
-  12: optional i32 numRepliesTrusted = 0
-  13: optional i32 numRepliesOutOfNetwork = 0
-  14: optional i32 numSelfTweets = 0
-  15: optional i32 numWithMedia = 0
-  16: optional i32 numWithNews = 0
-  17: optional i32 numSpamUser = 0
-  18: optional i32 numOffensive = 0
-  19: optional i32 numBot = 0
-}(persisted='true')
-
-// Per result debug info.
-struct ThriftSearchResultDebugInfo {
-  1: optional string hostname
-  2: optional string clusterName
-  3: optional i32 partitionId
-  4: optional string tiername
-}(persisted='true')
-
-struct ThriftSearchResult {
-  // Next available field ID: 22
-
-  // Result status id.
-  1: required i64 id
-
-  // TweetyPie status of the search result
-  7: optional deprecated.Status tweetypieStatus
-  19: optional tweet.Tweet tweetypieTweet  // v2 struct
-
-  // If the search result is a retweet, this field contains the source TweetyPie status.
-  10: optional deprecated.Status sourceTweetypieStatus
-  20: optional tweet.Tweet sourceTweetypieTweet  // v2 struct
-
-  // If the search result is a quote tweet, this field contains the quoted TweetyPie status.
-  17: optional deprecated.Status quotedTweetypieStatus
-  21: optional tweet.Tweet quotedTweetypieTweet  // v2 struct
-
-  // Additional metadata about a search result.
-  5: optional ThriftSearchResultMetadata metadata
-
-  // Hit highlights for various parts of this tweet
-  // for tweet text
-  6: optional list<hits.ThriftHits> hitHighlights
-  // for the title and description in the card expando.
-  12: optional list<hits.ThriftHits> cardTitleHitHighlights
-  13: optional list<hits.ThriftHits> cardDescriptionHitHighlights
-
-  // Expansion types, if expandResult == False, the expansions set should be ignored.
-  8: optional bool expandResult = 0
-  9: optional set<expansions.ThriftTweetExpansionType> expansions
-
-  // Only set if this is a promoted tweet
-  11: optional adserver_common.AdImpression adImpression
-
-  // where this tweet is from
-  // Since ThriftSearchResult used not only as an Earlybird response, but also an internal
-  // data transfer object of Blender, the value of this field is mutable in Blender, not
-  // necessarily reflecting Earlybird response.
-  14: optional ThriftTweetSource tweetSource
-
-  // the features of a tweet used for relevance timeline
-  // this field is populated by blender in RelevanceTimelineSearchWorkflow
-  15: optional features.ThriftTweetFeatures tweetFeatures
-
-  // the conversation context of a tweet
-  16: optional conversation.ThriftConversationContext conversationContext
-
-  // per-result debugging info that's persisted across merges.
-  18: optional ThriftSearchResultDebugInfo debugInfo
-}(persisted='true')
-
-enum ThriftFacetRankingMode {
-  COUNT = 0,
-  FILTER_WITH_TERM_STATISTICS = 1,
-}
-
-struct ThriftFacetFieldRequest {
-  // next available field ID: 4
-  1: required string fieldName
-  2: optional i32 numResults = 5
-
-  // use facetRankingOptions in ThriftFacetRequest instead
-  3: optional ThriftFacetRankingMode rankingMode = ThriftFacetRankingMode.COUNT
-}(persisted='true')
-
-struct ThriftFacetRequest {
-  // Next available field ID: 7
-  1: optional list<ThriftFacetFieldRequest> facetFields
-  5: optional ranking.ThriftFacetRankingOptions facetRankingOptions
-  6: optional bool usingQueryCache = 0
-}(persisted='true')
-
-struct ThriftTermRequest {
-  1: optional string fieldName = "text"
-  2: required string term
-}(persisted='true')
-
-enum ThriftHistogramGranularityType {
-  MINUTES = 0
-  HOURS = 1,
-  DAYS = 2,
-  CUSTOM = 3,
-
-  PLACE_HOLDER4 = 4,
-  PLACE_HOLDER5 = 5,
-}
-
-struct ThriftHistogramSettings {
-  1: required ThriftHistogramGranularityType granularity
-  2: optional i32 numBins = 60
-  3: optional i32 samplingRate = 1
-  4: optional i32 binSizeInSeconds   // the bin size, only used if granularity is set to CUSTOM.
-}(persisted='true')
-
-// next id is 4
-struct ThriftTermStatisticsRequest {
-  1: optional list<ThriftTermRequest> termRequests
-  2: optional ThriftHistogramSettings histogramSettings
-  // If this is set to true, even if there is no termRequests above, so long as the histogramSettings
-  // is set, Earlybird will return a null->ThriftTermResults entry in the termResults map, containing
-  // the global tweet count histogram for current query, which is the number of tweets matching this
-  // query in different minutes/hours/days.
-  3: optional bool includeGlobalCounts = 0
-  // When this is set, the background facets call does another search in order to find the best
-  // representative tweet for a given term request, the representative tweet is stored in the
-  // metadata of the termstats result
-  4: optional bool scoreTweetsForRepresentatives = 0
-}(persisted='true')
-
-// Next id is 12
-struct ThriftFacetCountMetadata {
-  // this is the id of the first tweet in the index that contained this facet
-  1: optional i64 statusId = -1
-
-  // whether the tweet with the above statusId is NSFW, from an antisocial user,
-  // marked as sensitive content, etc.
-  10: optional bool statusPossiblySensitive
-
-  // the id of the user who sent the tweet above - only returned if
-  // statusId is returned too
-  // NOTE: for native photos we may not be able to determine the user,
-  // even though the statusId can be returned. This is because the statusId
-  // can be determined from the url, but the user can't and the tweet may
-  // not be in the index anymore. In this case statusId would be set but
-  // twitterUserId would not.
-  2: optional i64 twitterUserId = -1
-
-  // the language of the tweet above.
-  8: optional search_language.ThriftLanguage statusLanguage
-
-  // optionally whitelist the fromUserId from dup/twitterUserId filtering
-  3: optional bool dontFilterUser = 0;
-
-  // if this facet is a native photo we return for convenience the
-  // twimg url
-  4: optional string nativePhotoUrl
-
-  // optionally returns some debug information about this facet
-  5: optional string explanation
-
-  // the created_at value for the tweet from statusId - only returned
-  // if statusId is returned too
-  6: optional i64 created_at
-
-  // the maximum tweepcred of the hits that contained this facet
-  7: optional i32 maxTweepCred
-
-  // Whether this facet result is force inserted, instead of organically returned from search.
-  // This field is only used in Blender to mark the force-inserted facet results
-  // (from recent tweets, etc).
-  11: optional bool forceInserted = 0
-}(persisted='true')
-
-struct ThriftTermResults {
-  1: required i32 totalCount
-  2: optional list<i32> histogramBins
-  3: optional ThriftFacetCountMetadata metadata
-}(persisted='true')
-
-struct ThriftTermStatisticsResults {
-  1: required map<ThriftTermRequest,ThriftTermResults> termResults
-  2: optional ThriftHistogramSettings histogramSettings
-  // If histogramSettings are set, this will have a list of ThriftHistogramSettings.numBins binIds,
-  // that the corresponding histogramBins in ThriftTermResults will have counts for.
-  // The binIds will correspond to the times of the hits matching the driving search query for this
-  // term statistics request.
-  // If there were no hits matching the search query, numBins binIds will be returned, but the
-  // values of the binIds will not meaningfully correspond to anything related to the query, and
-  // should not be used. Such cases can be identified by ThriftSearchResults.numHitsProcessed being
-  // set to 0 in the response, and the response not being early terminated.
-  3: optional list<i32> binIds
-  // If set, this id indicates the id of the minimum (oldest) bin that has been completely searched,
-  // even if the query was early terminated. If not set no bin was searched fully, or no histogram
-  // was requested.
-  // Note that if e.g. a query only matches a bin partially (due to e.g. a since operator) the bin
-  // is still considered fully searched if the query did not early terminate.
-  4: optional i32 minCompleteBinId
-}(persisted='true')
-
-struct ThriftFacetCount {
-  // the text of the facet
-  1: required string facetLabel
-
-  // deprecated; currently matches weightedCount for backwards-compatibility reasons
-  2: optional i32 facetCount
-
-  // the simple count of tweets that contained this facet, without any
-  // weighting applied
-  7: optional i32 simpleCount
-
-  // a weighted version of the count, using signals like tweepcred, parus, etc.
-  8: optional i32 weightedCount
-
-  // the number of times this facet occurred in tweets matching the background query
-  // using the term statistics API - only set if FILTER_WITH_TERM_STATISTICS was used
-  3: optional i32 backgroundCount
-
-  // the relevance score that was computed for this facet if FILTER_WITH_TERM_STATISTICS
-  // was used
-  4: optional double score
-
-  // a counter for how often this facet was penalized
-  5: optional i32 penaltyCount
-
-  6: optional ThriftFacetCountMetadata metadata
-}(persisted='true')
-
-// List of facet labels and counts for a given facet field, the
-// total count for this field, and a quality score for this field
-struct ThriftFacetFieldResults {
-  1: required list<ThriftFacetCount> topFacets
-  2: required i32 totalCount
-  3: optional double scoreQuality
-  4: optional i32 totalScore
-  5: optional i32 totalPenalty
-
-  // The ratio of the tweet language in the tweets with this facet field, a map from the language
-  // name to a number between (0.0, 1.0]. Only languages with ratio higher than 0.1 will be included.
-  6: optional map<search_language.ThriftLanguage, double> languageHistogram
-}
-
-struct ThriftFacetResults {
-  1: required map<string, ThriftFacetFieldResults> facetFields
-  2: optional i32 backgroundNumHits
-  // returns optionally a list of user ids that should not get filtered
-  // out by things like antigaming filters, because these users were explicitly
-  // queried for
-  // Note that ThriftFacetCountMetadata returns already dontFilterUser
-  // for facet requests in which case this list is not needed. However, it
-  // is needed for subsequent term statistics queries, were user id lookups
-  // are performed, but a different background query is used.
-  3: optional set<i64> userIDWhitelist
-}
-
-struct ThriftSearchResults {
-  // Next available field ID: 23
-  1: required list<ThriftSearchResult> results = []
-
-  // (SEARCH-11950): Now resultOffset is deprecated, so there is no use in numResultsSkipped too.
-  9: optional i32 deprecated_numResultsSkipped
-
-  // Number of docs that matched the query and were processed.
-  7: optional i32 numHitsProcessed
-
-  // Range of status IDs searched, from max ID to min ID (both inclusive).
-  // These may be unset in case that the search query contained ID or time
-  // operators that were completely out of range for the given index.
-  10: optional i64 maxSearchedStatusID
-  11: optional i64 minSearchedStatusID
-
-  // Time range that was searched (both inclusive).
-  19: optional i32 maxSearchedTimeSinceEpoch
-  20: optional i32 minSearchedTimeSinceEpoch
-
-  12: optional ThriftSearchResultsRelevanceStats relevanceStats
-
-  // Overall quality of this search result set
-  13: optional double score = -1.0
-  18: optional double nsfwRatio = 0.0
-
-  // The count of hit documents in each language.
-  14: optional map<search_language.ThriftLanguage, i32> languageHistogram
-
-  // Hit counts per time period:
-  // The key is a time cutoff in milliseconds (e.g. 60000 msecs ago).
-  // The value is the number of hits that are more recent than the cutoff.
-  15: optional map<i64, i32> hitCounts
-
-  // the total cost for this query
-  16: optional double queryCost
-
-  // Set to non-0 if this query was terminated early (either due to a timeout, or exceeded query cost)
-  // When getting this response from a single earlybird, this will be set to 1, if the query
-  // terminated early.
-  // When getting this response from a search root, this should be set to the number of individual
-  // earlybird requests that were terminated early.
-  17: optional i32 numPartitionsEarlyTerminated
-
-  // If ThriftSearchResults returns features in features.ThriftSearchResultFeature format, this
-  // field would define the schema of the features.
-  // If the earlybird schema is already in the client cached schemas indicated in the request, then
-  // searchFeatureSchema would only have (version, checksum) information.
-  //
-  // Notice that earlybird root only sends one schema back to the superroot even though earlybird
-  // root might receive multiple version of schemas.
-  //
-  // Earlybird roots' schema merge/choose logic when returning results to superroot:
-  // . pick the most occurred versioned schema and return the schema to the superroot
-  // . if the superroot already caches the schema, only send the version information back
-  //
-  // Superroots' schema merge/choose logic when returning results to clients:
-  // . pick the schema based on the order of: realtime > protected > archive
-  // . because of the above ordering, it is possible that archive earlybird schema with a new flush
-  //   version (with new bit features) might be lost to older realtime earlybird schema; this is
-  //   considered to to be rare and acceptable because one realtime earlybird deploy would fix it
-  21: optional features.ThriftSearchFeatureSchema featureSchema
-
-  // How long it took to score the results in earlybird (in nanoseconds). The number of results
-  // that were scored should be set in numHitsProcessed.
-  // Expected to only be set for requests that actually do scoring (i.e. Relevance and TopTweets).
-  22: optional i64 scoringTimeNanos
-
-  8: optional i32 deprecated_numDocsProcessed
-}
-
-// Note: Earlybird no longer respects this field, as it does not contain statuses.
-// Blender should respect it.
-enum EarlybirdReturnStatusType {
-  NO_STATUS = 0
-  // deprecated
-  DEPRECATED_BASIC_STATUS = 1,
-  // deprecated
-  DEPRECATED_SEARCH_STATUS = 2,
-  TWEETYPIE_STATUS = 3,
-
-  PLACE_HOLDER4 = 4,
-  PLACE_HOLDER5 = 5,
-}
-
-struct AdjustedRequestParams {
-  // Next available field ID: 4
-
-  // Adjusted value for EarlybirdRequest.searchQuery.numResults.
-  1: optional i32 numResults
-
-  // Adjusted value for EarlybirdRequest.searchQuery.maxHitsToProcess and
-  // EarlybirdRequest.searchQuery.relevanceOptions.maxHitsToProcess.
-  2: optional i32 maxHitsToProcess
-
-  // Adjusted value for EarlybirdRequest.searchQuery.relevanceOptions.returnAllResults
-  3: optional bool returnAllResults
-}
-
-struct EarlybirdRequest {
-  // Next available field ID: 36
-
-  // -------- COMMON REQUEST OPTIONS --------
-  // These fields contain options respected by all kinds of earlybird requests.
-
-  // Search query containing general earlybird retrieval and hit collection options.
-  // Also contains the options specific to search requests.
-  1: required ThriftSearchQuery searchQuery
-
-  // Common RPC information - client hostname and request ID.
-  12: optional string clientHost
-  13: optional string clientRequestID
-
-  // A string identifying the client that initiated the request.
-  // Ex: macaw-search.prod, webforall.prod, webforall.staging.
-  // The intention is to track the load we get from each client, and eventually enforce
-  // per-client QPS quotas, but this field could also be used to allow access to certain features
-  // only to certain clients, etc.
-  21: optional string clientId
-
-  // The time (in millis since epoch) when the earlybird client issued this request.
-  // Can be used to estimate request timeout time, capturing in-transit time for the request.
-  23: optional i64 clientRequestTimeMs
-
-  // Caching parameters used by earlybird roots.
-  24: optional caching.CachingParams cachingParams
-
-  // Deprecated. See SEARCH-2784
-  // Earlybird requests will be early terminated in a best-effort way to prevent them from
-  // exceeding the given timeout.  If timeout is <= 0 this early termination criteria is
-  // disabled.
-  17: optional i32 timeoutMs = -1
-
-  // Deprecated. See SEARCH-2784
-  // Earlybird requests will be early terminated in a best-effort way to prevent them from
-  // exceeding the given query cost.  If maxQueryCost <= 0 this early termination criteria
-  // is disabled.
-  20: optional double maxQueryCost = -1
-
-
-  // -------- REQUEST-TYPE SPECIFIC OPTIONS --------
-  // These fields contain options for one specific kind of request.  If one of these options
-  // is set the request will be considered to be the appropriate type of request.
-
-  // Options for facet counting requests.
-  11: optional ThriftFacetRequest facetRequest
-
-  // Options for term statistics requests.
-  14: optional ThriftTermStatisticsRequest termStatisticsRequest
-
-
-  // -------- DEBUG OPTIONS --------
-  // Used for debugging only.
-
-  // Debug mode, 0 for no debug information.
-  15: optional i8 debugMode = 0
-
-  // Can be used to pass extra debug arguments to earlybird.
-  34: optional EarlybirdDebugOptions debugOptions
-
-  // Searches a specific segment by time slice id if set and segment id is > 0.
-  22: optional i64 searchSegmentId
-
-  // -------- THINGS USED ONLY BY THE BLENDER --------
-  // These fields are used by the blender and clients of the blender, but not by earlybird.
-
-  // Specifies what kind of status object to return, if any.
-  7: optional EarlybirdReturnStatusType returnStatusType
-
-
-  // -------- THINGS USED BY THE ROOTS --------
-  // These fields are not in use by earlybirds themselves, but are in use by earlybird roots
-  // (and their clients).
-  // These fields live here since we currently reuse the same thrift request and response structs
-  // for both earlybirds and earlybird roots, and could potentially be moved out if we were to
-  // introduce separate request / response structs specifically for the roots.
-
-  // We have a threshold for how many hash partition requests need to succeed at the root level
-  // in order for the earlybird root request to be considered successful.
-  // Each type or earlybird queries (e.g. relevance, or term statistics) has a predefined default
-  // threshold value (e.g. 90% or hash partitions need to succeed for a recency query).
-  // The client can optionally set the threshold value to be something other than the default,
-  // by setting this field to a value in the range of 0 (exclusive) to 1 (inclusive).
-  // If this value is set outside of the (0, 1] range, a CLIENT_ERROR EarlybirdResponseCode will
-  // be returned.
-  25: optional double successfulResponseThreshold
-
-  // Where does the query come from?
-  26: optional query.ThriftQuerySource querySource
-
-  // Whether to get archive results This flag is advisory. A request may still be restricted from
-  // getting reqults from the archive based on the requesting client, query source, requested
-  // time/id range, etc.
-  27: optional bool getOlderResults
-
-  // The list of users followed by the current user.
-  // Used to restrict the values in the fromUserIDFilter64 field when sending a request
-  // to the protectected cluster.
-  28: optional list<i64> followedUserIds
-
-  // The adjusted parameters for the protected request.
-  29: optional AdjustedRequestParams adjustedProtectedRequestParams
-
-  // The adjusted parameters for the full archive request.
-  30: optional AdjustedRequestParams adjustedFullArchiveRequestParams
-
-  // Return only the protected tweets. This flag is used by the SuperRoot to return relevance
-  // results that contain only protected tweets.
-  31: optional bool getProtectedTweetsOnly
-
-  // Tokenize serialized queries with the appropriate Pengin version(s).
-  // Only has an effect on superroot.
-  32: optional bool retokenizeSerializedQuery
-
-  // Flag to ignore tweets that are very recent and could be incompletely indexed.
-  // If false, will allow queries to see results that may violate implicit streaming
-  // guarantees and will search Tweets that have been partially indexed.
-  // See go/indexing-latency for more details. When enabled, prevents seeing tweets
-  // that are less than 15 seconds old (or a similarly configured threshold).
-  // May be set to false unless explicitly set to true.
-  33: optional bool skipVeryRecentTweets = 1
-
-  // Setting an experimental cluster will reroute traffic at the realtime root layer to an experimental
-  // Earlybird cluster. This will have no impact if set on requests to anywhere other than realtime root.
-  35: optional ExperimentCluster experimentClusterToUse
-
-  // Caps number of results returned by roots after merging results from different earlybird partitions/clusters. 
-  // If not set, ThriftSearchQuery.numResults or CollectorParams.numResultsToReturn will be used to cap results. 
-  // This parameter will be ignored if ThriftRelevanceOptions.returnAllResults is set to true.
-  36: optional i32 numResultsToReturnAtRoot
-}
-
-enum EarlybirdResponseCode {
-  SUCCESS = 0,
-  PARTITION_NOT_FOUND = 1,
-  PARTITION_DISABLED = 2,
-  TRANSIENT_ERROR = 3,
-  PERSISTENT_ERROR = 4,
-  CLIENT_ERROR = 5,
-  PARTITION_SKIPPED = 6,
-  // Request was queued up on the server for so long that it timed out, and was not
-  // executed at all.
-  SERVER_TIMEOUT_ERROR = 7,
-  TIER_SKIPPED = 8,
-  // Not enough partitions returned a successful response. The merged response will have partition
-  // counts and early termination info set, but will not have search results.
-  TOO_MANY_PARTITIONS_FAILED_ERROR = 9,
-  // Client went over its quota, and the request was throttled.
-  QUOTA_EXCEEDED_ERROR = 10,
-  // Client's request is blocked based on Search Infra's policy. Search Infra can can block client's
-  // requests based on the query source of the request.
-  REQUEST_BLOCKED_ERROR = 11,
-
-  CLIENT_CANCEL_ERROR = 12,
-
-  CLIENT_BLOCKED_BY_TIER_ERROR = 13,
-
-  PLACE_HOLDER_2015_09_21 = 14,
-}
-
-// A recorded request and response.
-struct EarlybirdRequestResponse {
-  // Where did we send this request to.
-  1: optional string sentTo;
-  2: optional EarlybirdRequest request;
-  // This can't be an EarlybirdResponse, because the thrift compiler for Python
-  // doesn't allow cyclic references and we have some Python utilities that will fail.
-  3: optional string response;
-}
-
-struct EarlybirdDebugInfo {
-  1: optional string host
-  2: optional string parsedQuery
-  3: optional string luceneQuery
-  // Requests sent to dependent services. For example, superroot sends to realtime root,
-  // archive root, etc.
-  4: optional list<EarlybirdRequestResponse> sentRequests;
-  // segment level debug info (eg. hitsPerSegment, max/minSearchedTime etc.)
-  5: optional list<string> collectorDebugInfo
-  6: optional list<string> termStatisticsDebugInfo
-}
-
-struct EarlybirdDebugOptions {
-  1: optional bool includeCollectorDebugInfo
-}
-
-struct TierResponse {
-  1: optional EarlybirdResponseCode tierResponseCode
-  2: optional i32 numPartitions
-  3: optional i32 numSuccessfulPartitions
-}
-
-struct EarlybirdServerStats {
-  // The hostname of the Earlybird that processed this request.
-  1: optional string hostname
-
-  // The partition to which this earlybird belongs.
-  2: optional i32 partition
-
-  // Current Earlybird QPS.
-  // Earlybirds should set this field at the end of a request (not at the start). This would give
-  // roots a more up-to-date view of the load on the earlybirds.
-  3: optional i64 currentQps
-
-  // The time the request waited in the queue before Earlybird started processing it.
-  // This does not include the time spent in the finagle queue: it's the time between the moment
-  // earlybird received the request, and the moment it started processing the request.
-  4: optional i64 queueTimeMillis
-
-  // The average request time in the queue before Earlybird started processing it.
-  // This does not include the time that requests spent in the finagle queue: it's the average time
-  // between the moment earlybird received its requests, and the moment it started processing them.
-  5: optional i64 averageQueueTimeMillis
-
-  // Current average per-request latency as perceived by Earlybird.
-  6: optional i64 averageLatencyMicros
-
-  // The tier to which this earlybird belongs.
-  7: optional string tierName
-}
-
-struct EarlybirdResponse {
-  // Next available field ID: 17
-  1: optional ThriftSearchResults searchResults
-  5: optional ThriftFacetResults facetResults
-  6: optional ThriftTermStatisticsResults termStatisticsResults
-  2: required EarlybirdResponseCode responseCode
-  3: required i64 responseTime
-  7: optional i64 responseTimeMicros
-  // fields below will only be returned if debug > 1 in the request.
-  4: optional string debugString
-  8: optional EarlybirdDebugInfo debugInfo
-
-  // Only exists for merged earlybird response.
-  10: optional i32 numPartitions
-  11: optional i32 numSuccessfulPartitions
-  // Only exists for merged earlybird response from multiple tiers.
-  13: optional list<TierResponse> perTierResponse
-
-  // Total number of segments that were searched. Partially searched segments are fully counted.
-  // e.g. if we searched 1 segment fully, and early terminated half way through the second
-  // segment, this field should be set to 2.
-  15: optional i32 numSearchedSegments
-
-  // Whether the request early terminated, if so, the termination reason.
-  12: optional search.EarlyTerminationInfo earlyTerminationInfo
-
-  // Whether this response is from cache.
-  14: optional bool cacheHit
-
-  // Stats used by roots to determine if we should go into degraded mode.
-  16: optional EarlybirdServerStats earlybirdServerStats
-}
-
-enum EarlybirdStatusCode {
-  STARTING = 0,
-  CURRENT = 1,
-  STOPPING = 2,
-  UNHEALTHY = 3,
-  BLACKLISTED = 4,
-
-  PLACE_HOLDER5 = 5,
-  PLACE_HOLDER6 = 6,
-}
-
-struct EarlybirdStatusResponse {
-  1: required EarlybirdStatusCode code
-  2: required i64 aliveSince
-  3: optional string message
-}
-
-service EarlybirdService {
-  string getName(),
-  EarlybirdStatusResponse getStatus(),
-  EarlybirdResponse search( 1: EarlybirdRequest request )
-}
diff --git a/src/thrift/com/twitter/simclusters_v2/BUILD b/src/thrift/com/twitter/simclusters_v2/BUILD
deleted file mode 100644
index 221cc9184..000000000
--- a/src/thrift/com/twitter/simclusters_v2/BUILD
+++ /dev/null
@@ -1,23 +0,0 @@
-create_thrift_libraries(
-    base_name = "simclusters_v2-thrift",
-    sources = ["*.thrift"],
-    platform = "java8",
-    tags = ["bazel-compatible"],
-    dependency_roots = [
-        "src/thrift/com/twitter/algebird_internal",
-    ],
-    export_roots = [
-        "src/thrift/com/twitter/algebird_internal:algebird_internal",
-    ],
-    generate_languages = [
-        "go",
-        "java",
-        "lua",
-        "python",
-        "ruby",
-        "scala",
-        "strato",
-    ],
-    provides_java_name = "simclusters_v2-thrift-java",
-    provides_scala_name = "simclusters_v2-thrift-scala",
-)
diff --git a/src/thrift/com/twitter/simclusters_v2/BUILD.docx b/src/thrift/com/twitter/simclusters_v2/BUILD.docx
new file mode 100644
index 000000000..3bdfdf2cb
Binary files /dev/null and b/src/thrift/com/twitter/simclusters_v2/BUILD.docx differ
diff --git a/src/thrift/com/twitter/simclusters_v2/abuse.docx b/src/thrift/com/twitter/simclusters_v2/abuse.docx
new file mode 100644
index 000000000..29efae70a
Binary files /dev/null and b/src/thrift/com/twitter/simclusters_v2/abuse.docx differ
diff --git a/src/thrift/com/twitter/simclusters_v2/abuse.thrift b/src/thrift/com/twitter/simclusters_v2/abuse.thrift
deleted file mode 100644
index 60043244b..000000000
--- a/src/thrift/com/twitter/simclusters_v2/abuse.thrift
+++ /dev/null
@@ -1,53 +0,0 @@
-namespace java com.twitter.simclusters_v2.thriftjava
-namespace py gen.twitter.simclusters_v2
-#@namespace scala com.twitter.simclusters_v2.thriftscala
-#@namespace strato com.twitter.simclusters_v2
-
-include "embedding.thrift"
-include "simclusters_presto.thrift"
-
-/**
- * Struct that associates a user with simcluster scores for different
- * interaction types. This is meant to be used as a feature to predict abuse.
- *
- * This thrift struct is meant for exploration purposes. It does not have any
- * assumptions about what type of interactions we use or what types of scores
- * we are keeping track of.
- **/ 
-struct AdhocSingleSideClusterScores {
-  1: required i64 userId(personalDataType = 'UserId')
-  // We can make the interaction types have arbitrary names. In the production
-  // version of this dataset. We should have a different field per interaction
-  // type so that API of what is included is more clear.
-  2: required map<string, embedding.SimClustersEmbedding> interactionScores
-}(persisted="true", hasPersonalData = 'true')
-
-/**
-* This is a prod version of the single side features. It is meant to be used as a value in a key
-* value store. The pair of healthy and unhealthy scores will be different depending on the use case.
-* We will use different stores for different user cases. For instance, the first instance that
-* we implement will use search abuse reports and impressions. We can build stores for new values
-* in the future.
-*
-* The consumer creates the interactions which the author receives.  For instance, the consumer
-* creates an abuse report for an author. The consumer scores are related to the interaction creation
-* behavior of the consumer. The author scores are related to the whether the author receives these
-* interactions.
-*
-**/
-struct SingleSideUserScores {
-  1: required i64 userId(personalDataType = 'UserId')
-  2: required double consumerUnhealthyScore(personalDataType = 'EngagementScore')
-  3: required double consumerHealthyScore(personalDataType = 'EngagementScore')
-  4: required double authorUnhealthyScore(personalDataType = 'EngagementScore')
-  5: required double authorHealthyScore(personalDataType = 'EngagementScore')
-}(persisted="true", hasPersonalData = 'true')
-
-/**
-* Struct that associates a cluster-cluster interaction scores for different
-* interaction types.
-**/
-struct AdhocCrossSimClusterInteractionScores {
-  1: required i64 clusterId
-  2: required list<simclusters_presto.ClustersScore> clusterScores
-}(persisted="true")
diff --git a/src/thrift/com/twitter/simclusters_v2/clustering.docx b/src/thrift/com/twitter/simclusters_v2/clustering.docx
new file mode 100644
index 000000000..96164124c
Binary files /dev/null and b/src/thrift/com/twitter/simclusters_v2/clustering.docx differ
diff --git a/src/thrift/com/twitter/simclusters_v2/clustering.thrift b/src/thrift/com/twitter/simclusters_v2/clustering.thrift
deleted file mode 100644
index 81b8567cb..000000000
--- a/src/thrift/com/twitter/simclusters_v2/clustering.thrift
+++ /dev/null
@@ -1,18 +0,0 @@
-namespace java com.twitter.simclusters_v2.thriftjava
-namespace py gen.twitter.simclusters_v2.clustering
-#@namespace scala com.twitter.simclusters_v2.thriftscala
-#@namespace strato com.twitter.simclusters_v2
-
-/**
- * Struct that represents an ordered list of producer clusters.
- * The list is meant to be ordered by decreasing cluster size.
- **/
-struct OrderedClustersAndMembers {
-  1: required list<set<i64>> orderedClustersAndMembers (personalDataType = 'UserId')
-  // work around BQ not supporting nested struct such as list<set>
-  2: optional list<ClusterMembers> orderedClustersAndMembersStruct (personalDataType = 'UserId')
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct ClusterMembers {
-  1: required set<i64> clusterMembers (personalDataType = 'UserId')
-}(persisted = 'true', hasPersonalData = 'true')
diff --git a/src/thrift/com/twitter/simclusters_v2/embedding.docx b/src/thrift/com/twitter/simclusters_v2/embedding.docx
new file mode 100644
index 000000000..2835d50ca
Binary files /dev/null and b/src/thrift/com/twitter/simclusters_v2/embedding.docx differ
diff --git a/src/thrift/com/twitter/simclusters_v2/embedding.thrift b/src/thrift/com/twitter/simclusters_v2/embedding.thrift
deleted file mode 100644
index 110da0c65..000000000
--- a/src/thrift/com/twitter/simclusters_v2/embedding.thrift
+++ /dev/null
@@ -1,137 +0,0 @@
-namespace java com.twitter.simclusters_v2.thriftjava
-namespace py gen.twitter.simclusters_v2.embedding
-#@namespace scala com.twitter.simclusters_v2.thriftscala
-#@namespace strato com.twitter.simclusters_v2
-
-include "com/twitter/simclusters_v2/identifier.thrift"
-include "com/twitter/simclusters_v2/online_store.thrift"
-
-struct SimClusterWithScore {
-  1: required i32 clusterId(personalDataType = 'InferredInterests')
-  2: required double score(personalDataType = 'EngagementScore')
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct TopSimClustersWithScore {
-  1: required list<SimClusterWithScore> topClusters
-  2: required online_store.ModelVersion modelVersion
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct InternalIdWithScore {
-  1: required identifier.InternalId internalId
-  2: required double score(personalDataType = 'EngagementScore')
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct InternalIdEmbedding {
-  1: required list<InternalIdWithScore> embedding
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct SemanticCoreEntityWithScore {
-  1: required i64 entityId(personalDataType = 'SemanticcoreClassification')
-  2: required double score(personalDataType = 'EngagementScore')
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct TopSemanticCoreEntitiesWithScore {
-  1: required list<SemanticCoreEntityWithScore> topEntities
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct PersistedFullClusterId {
-  1: required online_store.ModelVersion modelVersion
-  2: required i32 clusterId(personalDataType = 'InferredInterests')
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct DayPartitionedClusterId {
-  1: required i32 clusterId(personalDataType = 'InferredInterests')
-  2: required string dayPartition // format: yyyy-MM-dd
-}
-
-struct TopProducerWithScore {
-  1: required i64 userId(personalDataType = 'UserId')
-  2: required double score(personalDataType = 'EngagementScore')
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct TopProducersWithScore {
-  1: required list<TopProducerWithScore> topProducers
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct TweetWithScore {
-  1: required i64 tweetId(personalDataType = 'TweetId')
-  2: required double score(personalDataType = 'EngagementScore')
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct TweetsWithScore {
-  1: required list<TweetWithScore> tweets
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct TweetTopKTweetsWithScore {
-  1: required i64 tweetId(personalDataType = 'TweetId')
-  2: required TweetsWithScore topkTweetsWithScore
-}(persisted = 'true', hasPersonalData = 'true')
-
-/**
-  * The generic SimClustersEmbedding for online long-term storage and real-time calculation.
-  * Use SimClustersEmbeddingId as the only identifier.
-  * Warning: Doesn't include model version and embedding type in the value struct.
-  **/
-struct SimClustersEmbedding {
-  1: required list<SimClusterWithScore> embedding
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct SimClustersEmbeddingWithScore {
-  1: required SimClustersEmbedding embedding
-  2: required double score
-}(persisted = 'true', hasPersonalData = 'false')
-
-/**
-  * This is the recommended structure for aggregating embeddings with time decay - the metadata
-  * stores the information needed for decayed aggregation.
-  **/
-struct SimClustersEmbeddingWithMetadata {
-  1: required SimClustersEmbedding embedding
-  2: required SimClustersEmbeddingMetadata metadata
-}(hasPersonalData = 'true')
-
-struct SimClustersEmbeddingIdWithScore {
-  1: required identifier.SimClustersEmbeddingId id
-  2: required double score
-}(persisted = 'true', hasPersonalData = 'false')
-
-struct SimClustersMultiEmbeddingByValues {
-  1: required list<SimClustersEmbeddingWithScore> embeddings
-}(persisted = 'true', hasPersonalData = 'false')
-
-struct SimClustersMultiEmbeddingByIds {
-  1: required list<SimClustersEmbeddingIdWithScore> ids
-}(persisted = 'true', hasPersonalData = 'false')
-
-/**
- * Generic SimClusters Multiple Embeddings. The identifier.SimClustersMultiEmbeddingId is the key of
- * the multiple embedding.
- **/
-union SimClustersMultiEmbedding {
-  1: SimClustersMultiEmbeddingByValues values
-  2: SimClustersMultiEmbeddingByIds ids
-}(persisted = 'true', hasPersonalData = 'false')
-
-/**
-  * The metadata of a SimClustersEmbedding. The updatedCount represent the version of the Embedding.
-  * For tweet embedding, the updatedCount is same/close to the favorite count.
-  **/
-struct SimClustersEmbeddingMetadata {
-  1: optional i64 updatedAtMs
-  2: optional i64 updatedCount
-}(persisted = 'true', hasPersonalData = 'true')
-
-/**
-  * The data structure for PersistentSimClustersEmbedding Store
-  **/
-struct PersistentSimClustersEmbedding {
-  1: required SimClustersEmbedding embedding
-  2: required SimClustersEmbeddingMetadata metadata
-}(persisted = 'true', hasPersonalData = 'true')
-
-/**
-  * The data structure for the Multi Model PersistentSimClustersEmbedding Store
-  **/
-struct MultiModelPersistentSimClustersEmbedding {
-  1: required map<online_store.ModelVersion, PersistentSimClustersEmbedding> multiModelPersistentSimClustersEmbedding
-}(persisted = 'true', hasPersonalData = 'true')
diff --git a/src/thrift/com/twitter/simclusters_v2/entity.docx b/src/thrift/com/twitter/simclusters_v2/entity.docx
new file mode 100644
index 000000000..a45ee558c
Binary files /dev/null and b/src/thrift/com/twitter/simclusters_v2/entity.docx differ
diff --git a/src/thrift/com/twitter/simclusters_v2/entity.thrift b/src/thrift/com/twitter/simclusters_v2/entity.thrift
deleted file mode 100644
index 1d0ee6946..000000000
--- a/src/thrift/com/twitter/simclusters_v2/entity.thrift
+++ /dev/null
@@ -1,51 +0,0 @@
-namespace java com.twitter.simclusters_v2.thriftjava
-namespace py gen.twitter.simclusters_v2.entity
-#@namespace scala com.twitter.simclusters_v2.thriftscala
-#@namespace strato com.twitter.simclusters_v2
-
-include "com/twitter/algebird_internal/algebird.thrift"
-
-/**
- * Penguin text entity. All fields are required as this is used as a part of a memcache key.
- **/
-struct PenguinKey {
-  1: required string textEntity
-}(hasPersonalData = 'false')
-
-/**
- * NER text entity. All fields are required as this is used as a part of a memcache key.
- **/
-struct NerKey {
-  1: required string textEntity
-  2: required i32 wholeEntityType
-}(hasPersonalData = 'false')
-
-/**
- * Semantic Core text entity. All fields are required as this is used as a part of a memcache key.
- **/
-struct SemanticCoreKey {
-  1: required i64 entityId(personalDataType = 'SemanticcoreClassification')
-}(hasPersonalData = 'true')
-
-/**
- * Represents an entity extracted from a tweet.
- **/
-union TweetTextEntity {
-  1: string hashtag
-  2: PenguinKey penguin
-  3: NerKey ner
-  4: SemanticCoreKey semanticCore
-}(hasPersonalData = 'true')
-
-struct SpaceId {
-  1: string id
-}(hasPersonalData = 'true')
-
-/**
- * All possible entities that simclusters are associated with.
- **/
-union SimClusterEntity {
-  1: i64 tweetId(personalDataType = 'TweetId')
-  2: TweetTextEntity tweetEntity
-  3: SpaceId spaceId
-}(hasPersonalData = 'true')
diff --git a/src/thrift/com/twitter/simclusters_v2/evaluation.docx b/src/thrift/com/twitter/simclusters_v2/evaluation.docx
new file mode 100644
index 000000000..b6f0bb33d
Binary files /dev/null and b/src/thrift/com/twitter/simclusters_v2/evaluation.docx differ
diff --git a/src/thrift/com/twitter/simclusters_v2/evaluation.thrift b/src/thrift/com/twitter/simclusters_v2/evaluation.thrift
deleted file mode 100644
index 85414baf9..000000000
--- a/src/thrift/com/twitter/simclusters_v2/evaluation.thrift
+++ /dev/null
@@ -1,65 +0,0 @@
-namespace java com.twitter.simclusters_v2.thriftjava
-namespace py gen.twitter.simclusters_v2.evaluation
-#@namespace scala com.twitter.simclusters_v2.thriftscala
-#@namespace strato com.twitter.simclusters_v2
-
-/**
- * Surface area at which the reference tweet was displayed to the user
- **/
-enum DisplayLocation {
-  TimelinesRecap = 1,
-  TimelinesRectweet = 2
-}(hasPersonalData = 'false')
-
-struct TweetLabels {
-  1: required bool isClicked = false(personalDataType = 'EngagementsPrivate')
-  2: required bool isLiked = false(personalDataType = 'EngagementsPublic')
-  3: required bool isRetweeted = false(personalDataType = 'EngagementsPublic')
-  4: required bool isQuoted = false(personalDataType = 'EngagementsPublic')
-  5: required bool isReplied = false(personalDataType = 'EngagementsPublic')
-}(persisted = 'true', hasPersonalData = 'true')
-
-/**
- * Data container of a reference tweet with scribed user engagement labels
- */
-struct ReferenceTweet {
-  1: required i64 tweetId(personalDataType = 'TweetId')
-  2: required i64 authorId(personalDataType = 'UserId')
-  3: required i64 timestamp(personalDataType = 'PublicTimestamp')
-  4: required DisplayLocation displayLocation
-  5: required TweetLabels labels
-}(persisted="true", hasPersonalData = 'true')
-
-/**
- * Data container of a candidate tweet generated by the candidate algorithm
- */
-struct CandidateTweet {
-  1: required i64 tweetId(personalDataType = 'TweetId')
-  2: optional double score(personalDataType = 'EngagementScore')
-  // The timestamp here is a synthetically generated timestamp.
-  // for evaluation purpose. Hence left unannotated
-  3: optional i64 timestamp
-}(hasPersonalData = 'true')
-
-/**
- * An encapsulated collection of candidate tweets
- **/
-struct CandidateTweets {
-  1: required i64 targetUserId(personalDataType = 'UserId')
-  2: required list<CandidateTweet> recommendedTweets
-}(hasPersonalData = 'true')
-
-/**
- * An encapsulated collection of reference tweets
- **/
-struct ReferenceTweets {
-  1: required i64 targetUserId(personalDataType = 'UserId')
-  2: required list<ReferenceTweet> impressedTweets
-}(persisted="true", hasPersonalData = 'true')
-
-/**
- * A list of candidate tweets
- **/
-struct CandidateTweetsList {
-  1: required list<CandidateTweet> recommendedTweets
-}(hasPersonalData = 'true')
\ No newline at end of file
diff --git a/src/thrift/com/twitter/simclusters_v2/graph.docx b/src/thrift/com/twitter/simclusters_v2/graph.docx
new file mode 100644
index 000000000..abacbe504
Binary files /dev/null and b/src/thrift/com/twitter/simclusters_v2/graph.docx differ
diff --git a/src/thrift/com/twitter/simclusters_v2/graph.thrift b/src/thrift/com/twitter/simclusters_v2/graph.thrift
deleted file mode 100644
index e67c860d2..000000000
--- a/src/thrift/com/twitter/simclusters_v2/graph.thrift
+++ /dev/null
@@ -1,61 +0,0 @@
-namespace java com.twitter.simclusters_v2.thriftjava
-namespace py gen.twitter.simclusters_v2.graph
-#@namespace scala com.twitter.simclusters_v2.thriftscala
-#@namespace strato com.twitter.simclusters_v2
-
-struct DecayedSums {
-  // last time the decayed sum was updated, in millis. 
-  1: required i64 lastUpdatedTimestamp
-
-  // a map from half life (specified in days) to the decayed sum
-  2: required map<i32, double> halfLifeInDaysToDecayedSums
-}(persisted = 'true', hasPersonalData = 'false')
-
-struct EdgeWithDecayedWeights {
-  1: required i64 sourceId(personalDataType = 'UserId')
-  2: required i64 destinationId(personalDataType = 'UserId')
-  3: required DecayedSums weights
-}(persisted="true", hasPersonalData = "true")
-
-struct NeighborWithWeights {
-  1: required i64 neighborId(personalDataType = 'UserId')
-  2: optional bool isFollowed(personalDataType = 'Follow')
-  3: optional double followScoreNormalizedByNeighborFollowersL2(personalDataType = 'EngagementsPublic')
-  4: optional double favScoreHalfLife100Days(personalDataType = 'EngagementsPublic')
-  5: optional double favScoreHalfLife100DaysNormalizedByNeighborFaversL2(personalDataType = 'EngagementsPublic')
-
-  // log(favScoreHalfLife100Days + 1)
-  6: optional double logFavScore(personalDataType = 'EngagementsPublic')
-
-  // log(favScoreHalfLife100Days + 1) normalized so that a user's incoming weights have unit l2 norm
-  7: optional double logFavScoreL2Normalized(personalDataType = 'EngagementsPublic')
-
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct UserAndNeighbors {
-  1: required i64 userId(personalDataType = 'UserId')
-  2: required list<NeighborWithWeights> neighbors
-}(persisted="true", hasPersonalData = 'true')
-
-struct NormsAndCounts {
-  1: required i64 userId(personalDataType = 'UserId')
-  2: optional double followerL2Norm(personalDataType = 'CountOfFollowersAndFollowees')
-  3: optional double faverL2Norm(personalDataType = 'EngagementsPublic')
-  4: optional i64 followerCount(personalDataType = 'CountOfFollowersAndFollowees')
-  5: optional i64 faverCount(personalDataType = 'EngagementsPublic')
-
-  // sum of the weights on the incoming edges where someone fav'ed this producer
-  6: optional double favWeightsOnFavEdgesSum(personalDataType = 'EngagementsPublic')
-
-  // sum of the fav weights on all the followers of this producer
-  7: optional double favWeightsOnFollowEdgesSum(personalDataType = 'EngagementsPublic')
-  // log(favScore + 1)
-  8: optional double logFavL2Norm(personalDataType = 'EngagementsPublic')
-
-  // sum of log(favScore + 1) on the incoming edges where someone fav'ed this producer
-  9: optional double logFavWeightsOnFavEdgesSum(personalDataType = 'EngagementsPublic')
-
-  // sum of log(favScore + 1) on all the followers of this producer
-  10: optional double logFavWeightsOnFollowEdgesSum(personalDataType = 'EngagementsPublic')
-
-}(persisted="true", hasPersonalData = 'true')
diff --git a/src/thrift/com/twitter/simclusters_v2/identifier.docx b/src/thrift/com/twitter/simclusters_v2/identifier.docx
new file mode 100644
index 000000000..e5720db3f
Binary files /dev/null and b/src/thrift/com/twitter/simclusters_v2/identifier.docx differ
diff --git a/src/thrift/com/twitter/simclusters_v2/identifier.thrift b/src/thrift/com/twitter/simclusters_v2/identifier.thrift
deleted file mode 100644
index b4285e699..000000000
--- a/src/thrift/com/twitter/simclusters_v2/identifier.thrift
+++ /dev/null
@@ -1,205 +0,0 @@
-namespace java com.twitter.simclusters_v2.thriftjava
-namespace py gen.twitter.simclusters_v2.identifier
-#@namespace scala com.twitter.simclusters_v2.thriftscala
-#@namespace strato com.twitter.simclusters_v2
-
-include "com/twitter/simclusters_v2/online_store.thrift"
-
-/**
-  * The uniform type for a SimClusters Embeddings.
-  * Each embeddings have the uniform underlying storage.
-  * Warning: Every EmbeddingType should map to one and only one InternalId.
-  **/
-enum EmbeddingType {
-  // Reserve 001 - 99 for Tweet embeddings
-	FavBasedTweet = 1, // Deprecated
-	FollowBasedTweet = 2, // Deprecated
-	LogFavBasedTweet = 3, // Production Version
-	FavBasedTwistlyTweet = 10, // Deprecated
-	LogFavBasedTwistlyTweet = 11, // Deprecated
-	LogFavLongestL2EmbeddingTweet = 12, // Production Version
-
-  // Tweet embeddings generated from non-fav events
-  // Naming convention: {Event}{Score}BasedTweet
-  // {Event}: The interaction event we use to build the tweet embeddings
-  // {Score}: The score from user InterestedIn embeddings
-  VideoPlayBack50LogFavBasedTweet = 21,
-  RetweetLogFavBasedTweet = 22,
-  ReplyLogFavBasedTweet = 23,
-  PushOpenLogFavBasedTweet = 24,
-
-  // [Experimental] Offline generated FavThroughRate-based Tweet Embedding
-  Pop1000RankDecay11Tweet = 30,
-  Pop10000RankDecay11Tweet = 31,
-  OonPop1000RankDecayTweet = 32,
-
-  // [Experimental] Offline generated production-like LogFavScore-based Tweet Embedding
-  OfflineGeneratedLogFavBasedTweet = 40,
-
-  // Reserve 51-59 for Ads Embedding
-  LogFavBasedAdsTweet = 51, // Experimental embedding for ads tweet candidate
-  LogFavClickBasedAdsTweet = 52, // Experimental embedding for ads tweet candidate
-
-  // Reserve 60-69 for Evergreen content
-  LogFavBasedEvergreenTweet = 60,
-  LogFavBasedRealTimeTweet = 65,
-
-	// Reserve 101 to 149 for Semantic Core Entity embeddings
-  FavBasedSematicCoreEntity = 101, // Deprecated
-  FollowBasedSematicCoreEntity = 102, // Deprecated
-  FavBasedHashtagEntity = 103, // Deprecated
-  FollowBasedHashtagEntity = 104, // Deprecated
-  ProducerFavBasedSemanticCoreEntity = 105, // Deprecated
-  ProducerFollowBasedSemanticCoreEntity = 106,// Deprecated
-  FavBasedLocaleSemanticCoreEntity = 107, // Deprecated
-  FollowBasedLocaleSemanticCoreEntity = 108, // Deprecated
-  LogFavBasedLocaleSemanticCoreEntity = 109, // Deprecated
-  LanguageFilteredProducerFavBasedSemanticCoreEntity = 110, // Deprecated
-  LanguageFilteredFavBasedLocaleSemanticCoreEntity = 111, // Deprecated
-  FavTfgTopic = 112, // TFG topic embedding built from fav-based user interestedIn
-  LogFavTfgTopic = 113, // TFG topic embedding built from logfav-based user interestedIn
-  FavInferredLanguageTfgTopic = 114, // TFG topic embedding built using inferred consumed languages
-  FavBasedKgoApeTopic = 115, // topic embedding using fav-based aggregatable producer embedding of KGO seed accounts.
-  LogFavBasedKgoApeTopic = 116, // topic embedding using log fav-based aggregatable producer embedding of KGO seed accounts.
-  FavBasedOnboardingApeTopic = 117, // topic embedding using fav-based aggregatable producer embedding of onboarding seed accounts.
-  LogFavBasedOnboardingApeTopic = 118, // topic embedding using log fav-based aggregatable producer embedding of onboarding seed accounts.
-  LogFavApeBasedMuseTopic = 119, // Deprecated
-  LogFavApeBasedMuseTopicExperiment = 120 // Deprecated
-
-  // Reserved 201 - 299 for Producer embeddings (KnownFor)
-  FavBasedProducer = 201
-  FollowBasedProducer = 202
-  AggregatableFavBasedProducer = 203 // fav-based aggregatable producer embedding.
-  AggregatableLogFavBasedProducer = 204 // logfav-based aggregatable producer embedding.
-  RelaxedAggregatableLogFavBasedProducer = 205 // logfav-based aggregatable producer embedding.
-  AggregatableFollowBasedProducer = 206 // follow-based aggregatable producer embedding.
-  KnownFor = 300
-
-  // Reserved 301 - 399 for User InterestedIn embeddings
-  FavBasedUserInterestedIn = 301
-  FollowBasedUserInterestedIn = 302
-  LogFavBasedUserInterestedIn = 303
-  RecentFollowBasedUserInterestedIn = 304 // interested-in embedding based on aggregating producer embeddings of recent follows
-  FilteredUserInterestedIn = 305 // interested-in embedding used by twistly read path
-  LogFavBasedUserInterestedInFromAPE = 306
-  FollowBasedUserInterestedInFromAPE = 307
-  TwiceUserInterestedIn = 308 // interested-in multi-embedding based on clustering producer embeddings of neighbors
-  UnfilteredUserInterestedIn = 309
-  UserNextInterestedIn = 310 // next interested-in embedding generated from BeT
-
-  // Denser User InterestedIn, generated by Producer embeddings.
-  FavBasedUserInterestedInFromPE = 311
-  FollowBasedUserInterestedInFromPE = 312
-  LogFavBasedUserInterestedInFromPE = 313
-  FilteredUserInterestedInFromPE = 314 // interested-in embedding used by twistly read path
-
-  // [Experimental] Denser User InterestedIn, generated by aggregating IIAPE embedding from AddressBook
-  LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE = 320
-  LogFavBasedUserInterestedAverageAddressBookFromIIAPE = 321
-  LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE = 322
-  LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE = 323
-  LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE = 324
-  LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE = 325
-
-  //Reserved 401 - 500 for Space embedding
-  FavBasedApeSpace = 401 // DEPRECATED
-  LogFavBasedListenerSpace = 402 // DEPRECATED
-  LogFavBasedAPESpeakerSpace = 403 // DEPRECATED
-  LogFavBasedUserInterestedInListenerSpace = 404 // DEPRECATED
-
-  // Experimental, internal-only IDs
-  ExperimentalThirtyDayRecentFollowBasedUserInterestedIn = 10000 // Like RecentFollowBasedUserInterestedIn, except limited to last 30 days
-	ExperimentalLogFavLongestL2EmbeddingTweet = 10001 // DEPRECATED
-}(persisted = 'true', hasPersonalData = 'false')
-
-/**
-  * The uniform type for a SimClusters MultiEmbeddings.
-  * Warning: Every MultiEmbeddingType should map to one and only one InternalId.
-  **/
-enum MultiEmbeddingType {
-  // Reserved 0-99 for Tweet based MultiEmbedding
-
-  // Reserved 100 - 199 for Topic based MultiEmbedding
-  LogFavApeBasedMuseTopic = 100 // Deprecated
-  LogFavApeBasedMuseTopicExperiment = 101 // Deprecated
-
-  // Reserved 301 - 399 for User InterestedIn embeddings
-  TwiceUserInterestedIn = 301 // interested-in multi-embedding based on clustering producer embeddings of neighbors
-}(persisted = 'true', hasPersonalData = 'true')
-
-// Deprecated. Please use TopicId for future cases.
-struct LocaleEntityId {
-  1: i64 entityId
-  2: string language
-}(persisted = 'true', hasPersonalData = 'false')
-
-enum EngagementType {
-  Favorite = 1,
-  Retweet = 2,
-}
-
-struct UserEngagedTweetId {
-  1: i64 tweetId(personalDataType = 'TweetId')
-  2: i64 userId(personalDataType = 'UserId')
-  3: EngagementType engagementType(personalDataType = 'EventType')
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct TopicId {
-  1: i64 entityId (personalDataType = 'SemanticcoreClassification')
-  // 2-letter ISO 639-1 language code
-  2: optional string language
-  // 2-letter ISO 3166-1 alpha-2 country code
-  3: optional string country
-}(persisted = 'true', hasPersonalData = 'false')
-
-struct TopicSubId {
-  1: i64 entityId (personalDataType = 'SemanticcoreClassification')
-  // 2-letter ISO 639-1 language code
-  2: optional string language
-  // 2-letter ISO 3166-1 alpha-2 country code
-  3: optional string country
-  4: i32 subId
-}(persisted = 'true', hasPersonalData = 'true')
-
-// Will be used for testing purposes in DDG 15536, 15534
-struct UserWithLanguageId {
-  1: required i64 userId(personalDataType = 'UserId')
-  2: optional string langCode(personalDataType = 'InferredLanguage')
-}(persisted = 'true', hasPersonalData = 'true')
-
-/**
-  * The internal identifier type.
-  * Need to add ordering in [[com.twitter.simclusters_v2.common.SimClustersEmbeddingId]]
-  * when adding a new type.
-  **/
-union InternalId {
-  1: i64 tweetId(personalDataType = 'TweetId')
-  2: i64 userId(personalDataType = 'UserId')
-  3: i64 entityId(personalDataType = 'SemanticcoreClassification')
-  4: string hashtag(personalDataType = 'PublicTweetEntitiesAndMetadata')
-  5: i32 clusterId
-  6: LocaleEntityId localeEntityId(personalDataType = 'SemanticcoreClassification')
-  7: UserEngagedTweetId userEngagedTweetId
-  8: TopicId topicId
-  9: TopicSubId topicSubId
-  10: string spaceId
-  11: UserWithLanguageId userWithLanguageId
-}(persisted = 'true', hasPersonalData = 'true')
-
-/**
-  * A uniform identifier type for all kinds of SimClusters based embeddings.
-  **/
-struct SimClustersEmbeddingId {
-  1: required EmbeddingType embeddingType
-  2: required online_store.ModelVersion modelVersion
-  3: required InternalId internalId
-}(persisted = 'true', hasPersonalData = 'true')
-
-/**
-  * A uniform identifier type for multiple SimClusters embeddings
-  **/
-struct SimClustersMultiEmbeddingId {
-  1: required MultiEmbeddingType embeddingType
-  2: required online_store.ModelVersion modelVersion
-  3: required InternalId internalId
-}(persisted = 'true', hasPersonalData = 'true')
diff --git a/src/thrift/com/twitter/simclusters_v2/inferred_entities.docx b/src/thrift/com/twitter/simclusters_v2/inferred_entities.docx
new file mode 100644
index 000000000..ddb07ff81
Binary files /dev/null and b/src/thrift/com/twitter/simclusters_v2/inferred_entities.docx differ
diff --git a/src/thrift/com/twitter/simclusters_v2/inferred_entities.thrift b/src/thrift/com/twitter/simclusters_v2/inferred_entities.thrift
deleted file mode 100644
index db667fb68..000000000
--- a/src/thrift/com/twitter/simclusters_v2/inferred_entities.thrift
+++ /dev/null
@@ -1,38 +0,0 @@
-namespace java com.twitter.simclusters_v2.thriftjava
-namespace py gen.twitter.simclusters_v2.inferred_entities
-#@namespace scala com.twitter.simclusters_v2.thriftscala
-#@namespace strato com.twitter.simclusters_v2
-
-// The SimClusters type we use to infer entity interests about a user
-// Currently used for SimClusters Compliance to store a user's inferred interests
-
-include "online_store.thrift"
-
-enum ClusterType {
-  KnownFor        = 1,
-  InterestedIn    = 2
-}(persisted = 'true', hasPersonalData = 'false')
-
-struct SimClustersSource {
-  1: required ClusterType clusterType
-  2: required online_store.ModelVersion modelVersion
-}(persisted = 'true', hasPersonalData = 'false')
-
-// The source of entities we use to infer entity interests about a user
-enum EntitySource {
-  SimClusters20M145KDec11EntityEmbeddingsByFavScore = 1, // deprecated
-  SimClusters20M145KUpdatedEntityEmbeddingsByFavScore = 2, // deprecated
-  UTTAccountRecommendations = 3 # dataset built by Onboarding team
-  SimClusters20M145K2020EntityEmbeddingsByFavScore = 4
-}(persisted = 'true', hasPersonalData = 'false')
-
-struct InferredEntity {
-  1: required i64 entityId(personalDataType = 'SemanticcoreClassification')
-  2: required double score(personalDataType = 'EngagementScore')
-  3: optional SimClustersSource simclusterSource
-  4: optional EntitySource entitySource
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct SimClustersInferredEntities {
-  1: required list<InferredEntity> entities
-}(persisted = 'true', hasPersonalData = 'true')
diff --git a/src/thrift/com/twitter/simclusters_v2/interests.docx b/src/thrift/com/twitter/simclusters_v2/interests.docx
new file mode 100644
index 000000000..2674747f1
Binary files /dev/null and b/src/thrift/com/twitter/simclusters_v2/interests.docx differ
diff --git a/src/thrift/com/twitter/simclusters_v2/interests.thrift b/src/thrift/com/twitter/simclusters_v2/interests.thrift
deleted file mode 100644
index 5c1a04970..000000000
--- a/src/thrift/com/twitter/simclusters_v2/interests.thrift
+++ /dev/null
@@ -1,259 +0,0 @@
-namespace java com.twitter.simclusters_v2.thriftjava
-namespace py gen.twitter.simclusters_v2.interests
-#@namespace scala com.twitter.simclusters_v2.thriftscala
-#@namespace strato com.twitter.simclusters_v2
-
-/**
- * All of the scores below assume that the knownFor vector for each cluster is already
- * of unit L2 norm i.e. sum of squares is 1. 
- **/
-struct UserToInterestedInClusterScores {
-  // dot product of user's binary follow vector with knownFor vector for this cluster
-  // TIP: By default, use this score or favScore. 
-  1: optional double followScore(personalDataType = 'CountOfFollowersAndFollowees')
-
-  // first compute followScore as defined above
-  // then compute L2 norm of the vector of these scores for this cluster
-  // divide by that.
-  // essentially the more people are interested in this cluster, the lower this score gets
-  // TIP: Use this score if your use case needs to penalize clusters that a lot of other 
-  // users are also interested in
-  2: optional double followScoreClusterNormalizedOnly(personalDataType = 'CountOfFollowersAndFollowees')
-
-  // dot product of user's producer normalized follow vector and knownFor vector for this cluster
-  // i.e. i^th entry in the normalized follow vector = 1.0/sqrt(number of followers of user i)
-  // TIP: Use this score if your use case needs to penalize clusters where the users known for
-  // that cluster are popular. 
-  3: optional double followScoreProducerNormalizedOnly(personalDataType = 'CountOfFollowersAndFollowees')
-
-  // first compute followScoreProducerNormalizedOnly
-  // then compute L2 norm of the vector of these scores for this cluster
-  // divide by that.
-  // essentially the more people are interested in this cluster, the lower this score gets
-  // TIP: Use this score if your use case needs to penalize both clusters that a lot of other
-  // users are interested in, as well as clusters where the users known for that cluster are 
-  // popular.
-  4: optional double followScoreClusterAndProducerNormalized(personalDataType = 'CountOfFollowersAndFollowees')
-
-  // dot product of user's favScoreHalfLife100Days vector with knownFor vector for this cluster 
-  // TIP: By default, use this score or followScore. 
-  5: optional double favScore(personalDataType = 'EngagementsPublic')
-
-  // first compute favScore as defined above
-  // then compute L2 norm of the vector of these scores for this cluster
-  // divide by that.
-  // essentially the more people are interested in this cluster, the lower this score gets
-  // TIP: Use this score if your use case needs to penalize clusters that a lot of other 
-  // users are also interested in
-  6: optional double favScoreClusterNormalizedOnly(personalDataType = 'EngagementsPublic')
-
-  // dot product of user's favScoreHalfLife100DaysNormalizedByNeighborFaversL2 vector with 
-  // knownFor vector for this cluster
-  // TIP: Use this score if your use case needs to penalize clusters where the users known for
-  // that cluster are popular. 
-  7: optional double favScoreProducerNormalizedOnly(personalDataType = 'EngagementsPublic')
-
-  // first compute favScoreProducerNormalizedOnly as defined above
-  // then compute L2 norm of the vector of these scores for this cluster
-  // divide by that.
-  // essentially the more people are interested in this cluster, the lower this score gets
-  // TIP: Use this score if your use case needs to penalize both clusters that a lot of other
-  // users are interested in, as well as clusters where the users known for that cluster are 
-  // popular.
-  8: optional double favScoreClusterAndProducerNormalized(personalDataType = 'EngagementsPublic')
-
-  // list of users who're known for this cluster as well as are being followed by the user.
-  9: optional list<i64> usersBeingFollowed(personalDataType = 'UserId')
- 
-  // list of users who're known for this cluster as well as were faved at some point by the user. 
-  10: optional list<i64> usersThatWereFaved(personalDataType = 'UserId')
-
-  // A pretty close upper bound on the number of users who are interested in this cluster. 
-  // Useful to know if this is a niche community or a popular topic. 
-  11: optional i32 numUsersInterestedInThisClusterUpperBound
-
-  // dot product of user's logFavScore vector with knownFor vector for this cluster 
-  // TIP: this score is under experimentations
-  12: optional double logFavScore(personalDataType = 'EngagementsPublic')
-
-  // first compute logFavScore as defined above
-  // then compute L2 norm of the vector of these scores for this cluster
-  // divide by that.
-  // essentially the more people are interested in this cluster, the lower this score gets
-  // TIP: this score is under experimentations
-  13: optional double logFavScoreClusterNormalizedOnly(personalDataType = 'EngagementsPublic')
-
-  // actual count of number of users who're known for this cluster as well as are being followed by the user.
-  14: optional i32 numUsersBeingFollowed
-
-  // actual count of number of users who're known for this cluster as well as were faved at some point by the user. 
-  15: optional i32 numUsersThatWereFaved
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct UserToInterestedInClusters {
-  1: required i64 userId(personalDataType = 'UserId')
-  2: required string knownForModelVersion
-  3: required map<i32, UserToInterestedInClusterScores> clusterIdToScores(personalDataTypeKey = 'InferredInterests')
-}(persisted="true", hasPersonalData = 'true')
-
-struct LanguageToClusters {
-  1: required string language
-  2: required string knownForModelVersion
-  3: required map<i32, UserToInterestedInClusterScores> clusterIdToScores(personalDataTypeKey = 'InferredInterests')
-}(persisted="true", hasPersonalData = 'true')
-
-struct ClustersUserIsInterestedIn {
-  1: required string knownForModelVersion
-  2: required map<i32, UserToInterestedInClusterScores> clusterIdToScores(personalDataTypeKey = 'InferredInterests')
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct UserToKnownForClusters {
-  1: required i64 userId(personalDataType = 'UserId')
-  2: required string knownForModelVersion
-  3: required map<i32, UserToKnownForClusterScores> clusterIdToScores(personalDataTypeKey = 'InferredInterests')
-}(persisted="true", hasPersonalData = 'true')
-
-struct UserToKnownForClusterScores {
-  1: optional double knownForScore
-}(persisted = 'true', hasPersonalData = 'false')
-
-struct ClustersUserIsKnownFor {
-  1: required string knownForModelVersion
-  2: required map<i32, UserToKnownForClusterScores> clusterIdToScores(personalDataTypeKey = 'InferredInterests')
-}(persisted = 'true', hasPersonalData = 'true')
-
-/** Thrift struct for storing quantile bounds output by QTreeMonoid in Algebird */
-struct QuantileBounds {
-  1: required double lowerBound
-  2: required double upperBound
-}(persisted = 'true', hasPersonalData = 'false')
-
-/** Thrift struct giving the details of the distribution of a set of doubles */
-struct DistributionDetails {
-  1: required double mean
-  2: optional double standardDeviation
-  3: optional double min
-  4: optional QuantileBounds p25
-  5: optional QuantileBounds p50
-  6: optional QuantileBounds p75
-  7: optional QuantileBounds p95
-  8: optional double max
-}(persisted = 'true', hasPersonalData = 'false')
-
-/** Note that the modelVersion here is specified somewhere outside, specifically, as part of the key */
-struct ClusterNeighbor {
-  1: required i32 clusterId
-  /** Note that followCosineSimilarity is same as dot product over followScoreClusterNormalizedOnly
-   * since those scores form a unit vector **/
-  2: optional double followCosineSimilarity
-  /** Note that favCosineSimilarity is same as dot product over favScoreClusterNormalizedOnly
-   * since those scores form a unit vector **/
-  3: optional double favCosineSimilarity
-  /** Note that logFavCosineSimilarity is same as dot product over logFavScoreClusterNormalizedOnly
-   * since those scores form a unit vector **/
-  4: optional double logFavCosineSimilarity
-}(persisted = 'true', hasPersonalData = 'false')
-
-/** Useful for storing the list of users known for a cluster */
-struct UserWithScore {
-  1: required i64 userId(personalDataType = 'UserId')
-  2: required double score
-}(persisted="true", hasPersonalData = 'true')
-
-// deprecated
-struct EdgeCut {
-  1: required double cutEdges
-  2: required double totalVolume
-}(persisted = 'true', hasPersonalData = 'false')
-
-struct ClusterQuality {
-  // deprecated
-  1: optional EdgeCut deprecated_unweightedEdgeCut
-  // deprecated
-  2: optional EdgeCut deprecated_edgeWeightedCut
-  // deprecated
-  3: optional EdgeCut deprecated_nodeAndEdgeWeightedCut
-
-  // correlation of actual weight of (u, v) with I(u & v in same cluster) * score(u) * score(v)
-  4: optional double weightAndProductOfNodeScoresCorrelation
-
-  // fraction of edges staying inside cluster divided by total edges from nodes in the cluster
-  5: optional double unweightedRecall
-
-  // fraction of edge weights staying inside cluster divided by total edge weights from nodes in the cluster
-  6: optional double weightedRecall
-
-  // total edges from nodes in the cluster
-  7: optional double unweightedRecallDenominator
-
-  // total edge weights from nodes in the cluster
-  8: optional double weightedRecallDenominator
-
-  // sum of edge weights inside cluster / { #nodes * (#nodes - 1) }
-  9: optional double relativePrecisionNumerator
-
-  // above divided by the sum of edge weights in the total graph / { n * (n - 1) }
-  10: optional double relativePrecision
-}(persisted = 'true', hasPersonalData = 'false')
-
-/**
-* This struct is the value of the ClusterDetails key-value dataset.
-* The key is (modelVersion, clusterId)
-**/
-struct ClusterDetails {
-  1: required i32 numUsersWithAnyNonZeroScore
-  2: required i32 numUsersWithNonZeroFollowScore
-  3: required i32 numUsersWithNonZeroFavScore
-  4: optional DistributionDetails followScoreDistributionDetails
-  5: optional DistributionDetails favScoreDistributionDetails
-  6: optional list<UserWithScore> knownForUsersAndScores
-  7: optional list<ClusterNeighbor> neighborClusters
-  // fraction of users who're known for this cluster who're marked NSFW_User in UserSource
-  8: optional double fractionKnownForMarkedNSFWUser
-  // the major languages that this cluster's known_fors have as their "language" field in
-  // UserSource, and the fractions
-  9: optional map<string, double> languageToFractionDeviceLanguage
-  // the major country codes that this cluster's known_fors have as their "account_country_code"
-  // field in UserSource, and the fractions
-  10: optional map<string, double> countryCodeToFractionKnownForWithCountryCode
-  11: optional ClusterQuality qualityMeasuredOnSimsGraph
-  12: optional DistributionDetails logFavScoreDistributionDetails
-  // fraction of languages this cluster's known_fors produce based on what penguin_user_languages dataset infers
-  13: optional map<string, double> languageToFractionInferredLanguage
-}(persisted="true", hasPersonalData = 'true')
-
-struct SampledEdge {
-  1: required i64 followerId(personalDataType = 'UserId')
-  2: required i64 followeeId(personalDataType = 'UserId')
-  3: optional double favWtIfFollowEdge
-  4: optional double favWtIfFavEdge
-  5: optional double followScoreToCluster
-  6: optional double favScoreToCluster
-  7: optional double predictedFollowScore
-  8: optional double predictedFavScore
-}(persisted="true", hasPersonalData = 'true')
-
-/**
-* The key here is (modelVersion, clusterId)
-**/
-struct BipartiteClusterQuality {
-  1: optional double inClusterFollowEdges
-  2: optional double inClusterFavEdges
-  3: optional double favWtSumOfInClusterFollowEdges
-  4: optional double favWtSumOfInClusterFavEdges
-  5: optional double outgoingFollowEdges
-  6: optional double outgoingFavEdges
-  7: optional double favWtSumOfOutgoingFollowEdges
-  8: optional double favWtSumOfOutgoingFavEdges
-  9: optional double incomingFollowEdges
-  10: optional double incomingFavEdges
-  11: optional double favWtSumOfIncomingFollowEdges
-  12: optional double favWtSumOfIncomingFavEdges
-  13: optional i32 interestedInSize
-  14: optional list<SampledEdge> sampledEdges
-  15: optional i32 knownForSize
-  16: optional double correlationOfFavWtIfFollowWithPredictedFollow
-  17: optional double correlationOfFavWtIfFavWithPredictedFav
-  18: optional double relativePrecisionUsingFavWtIfFav
-  19: optional double averagePrecisionOfWholeGraphUsingFavWtIfFav
-}(persisted="true", hasPersonalData = 'true')
diff --git a/src/thrift/com/twitter/simclusters_v2/multi_type_graph.docx b/src/thrift/com/twitter/simclusters_v2/multi_type_graph.docx
new file mode 100644
index 000000000..f2a06a2a6
Binary files /dev/null and b/src/thrift/com/twitter/simclusters_v2/multi_type_graph.docx differ
diff --git a/src/thrift/com/twitter/simclusters_v2/multi_type_graph.thrift b/src/thrift/com/twitter/simclusters_v2/multi_type_graph.thrift
deleted file mode 100644
index f7dee7381..000000000
--- a/src/thrift/com/twitter/simclusters_v2/multi_type_graph.thrift
+++ /dev/null
@@ -1,110 +0,0 @@
-namespace java com.twitter.simclusters_v2.thriftjava
-namespace py gen.twitter.simclusters_v2.multi_type_graph
-#@namespace scala com.twitter.simclusters_v2.thriftscala
-#@namespace strato com.twitter.simclusters_v2
-
-include "entity.thrift"
-
-union LeftNode {
-  1: i64 userId(personalDataType = 'UserId')
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct RightNode {
-  1: required RightNodeType rightNodeType(personalDataType = 'EngagementsPublic')
-  2: required Noun noun
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct RightNodeWithEdgeWeight {
-  1: required RightNode rightNode
-  2: required double weight(personalDataType = 'EngagementScore')
-}(persisted = 'true', hasPersonalData = 'true')
-
-enum RightNodeType {
-  FollowUser = 1,
-  FavUser = 2,
-  BlockUser = 3,
-  AbuseReportUser = 4,
-  SpamReportUser = 5,
-  FollowTopic = 6,
-  SignUpCountry = 7,
-  ConsumedLanguage = 8,
-  FavTweet = 9,
-  ReplyTweet = 10,
-  RetweetTweet = 11,
-  NotifOpenOrClickTweet = 12,
-  SearchQuery = 13
-}(persisted = 'true')
-
-union Noun {
-// Note: Each of the following needs to have an ordering defined in Ordering[Noun]
-// in file: multi_type_graph/assemble_multi_type_graph/AssembleMultiTypeGraph.scala
-// Please take note to make changes to Ordering[Noun] when modifying/adding new noun type here
-  1: i64 userId(personalDataType = 'UserId')
-  2: string country(personalDataType = 'InferredCountry')
-  3: string language(personalDataType = 'InferredLanguage')
-  4: i64 topicId(personalDataType = 'TopicFollow')
-  5: i64 tweetId(personalDataType = 'TweetId')
-  6: string query(personalDataType = 'SearchQuery')
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct RightNodeWithEdgeWeightList {
-  1: required list<RightNodeWithEdgeWeight> rightNodeWithEdgeWeightList
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct NounWithFrequency {
-  1: required Noun noun
-  2: required double frequency (personalDataType = 'EngagementScore')
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct NounWithFrequencyList {
-  1: required list<NounWithFrequency> nounWithFrequencyList
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct RightNodeTypeStruct {
-   1: required RightNodeType rightNodeType
-}(persisted = 'true', hasPersonalData = 'false')
-
-struct MultiTypeGraphEdge{
-   1: required LeftNode leftNode
-   2: required RightNodeWithEdgeWeight rightNodeWithEdgeWeight
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct LeftNodeToRightNodeWithEdgeWeightList{
-   1: required LeftNode leftNode
-   2: required RightNodeWithEdgeWeightList rightNodeWithEdgeWeightList
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct RightNodeSimHashSketch {
-  1: required RightNode rightNode
-  2: required list<byte> simHashOfEngagers
-  3: optional double normalizer
-}(persisted='true', hasPersonalData = 'false')
-
-struct SimilarRightNode {
-  1: required RightNode rightNode
-  2: required double score (personalDataType = 'EngagementScore')
-}(persisted='true', hasPersonalData = 'true')
-
-struct SimilarRightNodes {
-  1: required list<SimilarRightNode> rightNodesWithScores
-}(persisted='true', hasPersonalData = 'true')
-
-struct RightNodeWithScore {
-  1: required RightNode rightNode
-  2: required double clusterScore (personalDataType = 'EngagementScore')
-}(persisted='true', hasPersonalData = 'true')
-
-struct RightNodeWithScoreList {
-  1: required list<RightNodeWithScore> rightNodeWithScoreList
-}(persisted='true', hasPersonalData = 'true')
-
-struct RightNodeWithClusters {
-  1: required RightNode rightNode
-  2: required string modelVersion (personalDataType = 'EngagementId')
-  3: required map<i32, double> clusterIdToScores (personalDataTypeKey = 'EngagementId', personalDataTypeValue = 'EngagementScore')
-}(persisted="true", hasPersonalData = 'true')
-
-struct ModelVersionWithClusterScores {
-  1: required string modelVersion (personalDataType = 'EngagementId')
-  2: required map<i32, double> clusterIdToScores (personalDataTypeKey = 'EngagementId', personalDataTypeValue = 'EngagementScore')
-}(persisted = 'true', hasPersonalData = 'true')
diff --git a/src/thrift/com/twitter/simclusters_v2/offline_job_internal.docx b/src/thrift/com/twitter/simclusters_v2/offline_job_internal.docx
new file mode 100644
index 000000000..9ef45e643
Binary files /dev/null and b/src/thrift/com/twitter/simclusters_v2/offline_job_internal.docx differ
diff --git a/src/thrift/com/twitter/simclusters_v2/offline_job_internal.thrift b/src/thrift/com/twitter/simclusters_v2/offline_job_internal.thrift
deleted file mode 100644
index 257ef1f99..000000000
--- a/src/thrift/com/twitter/simclusters_v2/offline_job_internal.thrift
+++ /dev/null
@@ -1,63 +0,0 @@
-namespace java com.twitter.simclusters_v2.thriftjava
-namespace py gen.twitter.simclusters_v2.offline_job_internal
-#@namespace scala com.twitter.simclusters_v2.thriftscala
-#@namespace strato com.twitter.simclusters_v2
-
-include "com/twitter/algebird_internal/algebird.thrift"
-
-// For internal usage only. Mainly for offline_evaluation.
-// Deprecated. Please use 'online_store/ModelVersion'
-enum PersistedModelVersion {
-  MODEL_20M_145K_dec11 = 1,
-  MODEL_20M_145K_updated = 2,
-  MODEL_20M_145K_2020 = 3,
-  RESERVED_4 = 4,
-  RESERVED_5 = 5
-}(persisted = 'true', hasPersonalData = 'false')
-
-enum PersistedScoreType {
-  NORMALIZED_FAV_8_HR_HALF_LIFE = 1,
-  NORMALIZED_FOLLOW_8_HR_HALF_LIFE = 2,
-  NORMALIZED_LOG_FAV_8_HR_HALF_LIFE = 3,
-  RESERVED_4 = 4,
-  RESERVED_5 = 5
-}(persisted = 'true', hasPersonalData = 'false')
-
-struct PersistedScores {
-  1: optional algebird.DecayedValue score
-}(persisted = 'true', hasPersonalData = 'false')
-
-struct TweetAndClusterScores {
-  1: required i64 tweetId(personalDataType = 'TweetId')
-  2: required i32 clusterId(personalDataType = 'InferredInterests')
-  3: required PersistedModelVersion modelVersion
-  4: required PersistedScores scores(personalDataType = 'EngagementScore')
-  5: optional PersistedScoreType scoreType
-}(persisted="true", hasPersonalData = 'true')
-
-struct TweetTopKClustersWithScores {
-  1: required i64 tweetId(personalDataType = 'TweetId')
-  2: required PersistedModelVersion modelVersion
-  3: required map<i32, PersistedScores> topKClusters(personalDataTypeKey = 'InferredInterests')
-  4: optional PersistedScoreType scoreType
-}(persisted="true", hasPersonalData = 'true')
-
-struct ClusterTopKTweetsWithScores {
-  1: required i32 clusterId(personalDataType = 'InferredInterests')
-  2: required PersistedModelVersion modelVersion
-  3: required map<i64, PersistedScores> topKTweets(personalDataTypeKey = 'TweetId')
-  4: optional PersistedScoreType scoreType
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct QueryAndClusterScores {
-  1: required string query(personalDataType = 'SearchQuery')
-  2: required i32 clusterId
-  3: required PersistedModelVersion modelVersion
-  4: required PersistedScores scores
-}(persisted = 'true', hasPersonalData = 'true')
-
-struct QueryTopKClustersWithScores {
-  1: required string query(personalDataType = 'SearchQuery')
-  2: required PersistedModelVersion modelVersion
-  3: required map<i32, PersistedScores> topKClusters
-}(persisted = 'true', hasPersonalData = 'true')
diff --git a/src/thrift/com/twitter/simclusters_v2/online_store.docx b/src/thrift/com/twitter/simclusters_v2/online_store.docx
new file mode 100644
index 000000000..342de0c1d
Binary files /dev/null and b/src/thrift/com/twitter/simclusters_v2/online_store.docx differ
diff --git a/src/thrift/com/twitter/simclusters_v2/online_store.thrift b/src/thrift/com/twitter/simclusters_v2/online_store.thrift
deleted file mode 100644
index fb5aff6ad..000000000
--- a/src/thrift/com/twitter/simclusters_v2/online_store.thrift
+++ /dev/null
@@ -1,92 +0,0 @@
-namespace java com.twitter.simclusters_v2.thriftjava
-namespace py gen.twitter.simclusters_v2.online_store
-#@namespace scala com.twitter.simclusters_v2.thriftscala
-#@namespace strato com.twitter.simclusters_v2
-
-include "entity.thrift"
-include "com/twitter/algebird_internal/algebird.thrift"
-
-/**
- * A SimClusters model version.
- **/
-enum ModelVersion {
-	MODEL_20M_145K_dec11 = 1, // DEPRECATED
-	MODEL_20M_145K_updated = 2, // DEPRECATED
-  MODEL_20M_145K_2020 = 3,
-  RESERVED_4 = 4,
-  RESERVED_5 = 5,
-  RESERVED_6 = 6
-}(persisted = 'true', hasPersonalData = 'false')
-
-/**
- * Uniquely identifies a SimCluster. All fields are required as this is used as a memcache key.
- **/
-struct FullClusterId {
-  1: required ModelVersion modelVersion
-  2: required i32 clusterId
-}(persisted='true', hasPersonalData = 'false')
-
-/**
- * Contains a set of scores per cluster.
- **/
-struct Scores {
-  1: optional algebird.DecayedValue favClusterNormalized8HrHalfLifeScore
-  2: optional algebird.DecayedValue followClusterNormalized8HrHalfLifeScore
-}(hasPersonalData = 'false')
-
-/**
- * A combination of entity and model. All fields are required as this is used as a memcache key.
- **/
-struct EntityWithVersion {
-  1: required entity.SimClusterEntity entity
-  2: required ModelVersion version
-}(hasPersonalData = 'true')
-
-/**
- * Contains top K clusters with corresponding scores. We're representing clusters purely using ints, and
- * omitting the modelVersion, since that is included in the memcache key.
- **/
-struct TopKClustersWithScores {
-  1: optional map<i32, Scores> topClustersByFavClusterNormalizedScore(personalDataTypeKey = 'InferredInterests')
-  2: optional map<i32, Scores> topClustersByFollowClusterNormalizedScore(personalDataTypeKey = 'InferredInterests')
-}(hasPersonalData = 'true')
-
-/**
- * Contains top K text entities with corresponding scores.  We're omitting the modelVersion,
- * since that is included in the memcache key.
- **/
-struct TopKEntitiesWithScores {
-  1: optional map<entity.TweetTextEntity, Scores> topEntitiesByFavClusterNormalizedScore
-  2: optional map<entity.TweetTextEntity, Scores> topEntitiesByFollowClusterNormalizedScore
-}(hasPersonalData = 'true')
-
-/**
- * Contains top K tweets with corresponding scores. We're omitting the modelVersion,
- * since that is included in the memcache key.
- **/
-struct TopKTweetsWithScores {
-  1: optional map<i64, Scores> topTweetsByFavClusterNormalizedScore(personalDataTypeKey='TweetId')
-  2: optional map<i64, Scores> topTweetsByFollowClusterNormalizedScore(personalDataTypeKey='TweetId')
-}(hasPersonalData = 'true')
-
-/**
- * Contains FullClusterId and the corresponding top K tweets and scores.
- **/
-struct ClusterIdToTopKTweetsWithScores {
-  1: required FullClusterId clusterId
-  2: required TopKTweetsWithScores topKTweetsWithScores
-}(hasPersonalData = 'true')
-
-/**
- * Contains a map of Model Version to top K clusters with corresponding scores.
- **/
-struct MultiModelTopKClustersWithScores {
-  1: optional map<ModelVersion, TopKClustersWithScores> multiModelTopKClustersWithScores
-}(hasPersonalData = 'true')
-
-/**
- * Contains a map of Model Version top K tweets with corresponding scores.
- **/
-struct MultiModelTopKTweetsWithScores {
-  1: optional map<ModelVersion, TopKTweetsWithScores> multiModelTopKTweetsWithScores
-}(hasPersonalData = 'true')
diff --git a/src/thrift/com/twitter/simclusters_v2/online_store_internal.docx b/src/thrift/com/twitter/simclusters_v2/online_store_internal.docx
new file mode 100644
index 000000000..85aa50dda
Binary files /dev/null and b/src/thrift/com/twitter/simclusters_v2/online_store_internal.docx differ
diff --git a/src/thrift/com/twitter/simclusters_v2/online_store_internal.thrift b/src/thrift/com/twitter/simclusters_v2/online_store_internal.thrift
deleted file mode 100644
index b5fd6afb9..000000000
--- a/src/thrift/com/twitter/simclusters_v2/online_store_internal.thrift
+++ /dev/null
@@ -1,30 +0,0 @@
-namespace java com.twitter.simclusters_v2.thriftjava
-namespace py gen.twitter.simclusters_v2.online_store_internal
-#@namespace scala com.twitter.simclusters_v2.thriftscala
-#@namespace strato com.twitter.simclusters_v2
-
-include "online_store.thrift"
-
-/**
- * Contains a hash bucket of the clusterId along with the Model Version.
- * All fields are required as this is used as a memcache key.
- **/
-struct FullClusterIdBucket {
-  1: required online_store.ModelVersion modelVersion
-  // (hash(clusterId) mod NUM_BUCKETS_XXXXXX)
-  2: required i32 bucket
-}(hasPersonalData = 'false')
-
-/**
- * Contains scores per clusters. The model is not stored here as it's encoded into the memcache key.
- **/
-struct ClustersWithScores {
- 1: optional map<i32, online_store.Scores> clustersToScore(personalDataTypeKey = 'InferredInterests')
-}(hasPersonalData = 'true')
-
-/**
- * Contains a map of model version to scores per clusters.
- **/
-struct MultiModelClustersWithScores {
- 1: optional map<online_store.ModelVersion,ClustersWithScores> multiModelClustersWithScores
-}(hasPersonalData = 'true')
diff --git a/src/thrift/com/twitter/simclusters_v2/score.docx b/src/thrift/com/twitter/simclusters_v2/score.docx
new file mode 100644
index 000000000..a9a3b842c
Binary files /dev/null and b/src/thrift/com/twitter/simclusters_v2/score.docx differ
diff --git a/src/thrift/com/twitter/simclusters_v2/score.thrift b/src/thrift/com/twitter/simclusters_v2/score.thrift
deleted file mode 100644
index 8ee20e72c..000000000
--- a/src/thrift/com/twitter/simclusters_v2/score.thrift
+++ /dev/null
@@ -1,71 +0,0 @@
-namespace java com.twitter.simclusters_v2.thriftjava
-namespace py gen.twitter.simclusters_v2.score
-#@namespace scala com.twitter.simclusters_v2.thriftscala
-#@namespace strato com.twitter.simclusters_v2
-
-include "com/twitter/simclusters_v2/embedding.thrift"
-include "com/twitter/simclusters_v2/identifier.thrift"
-
-/**
-  * The algorithm type to identify the score algorithm.
-  * Assume that a algorithm support and only support one kind
-  * of [[ScoreInternalId]]
-  **/
-enum ScoringAlgorithm {
-	// Reserve 0001 - 999 for Basic Pairwise Scoring Calculation
-	PairEmbeddingDotProduct = 1,
-	PairEmbeddingCosineSimilarity = 2,
-	PairEmbeddingJaccardSimilarity = 3,
-	PairEmbeddingEuclideanDistance = 4,
-	PairEmbeddingManhattanDistance = 5,
-  PairEmbeddingLogCosineSimilarity = 6,
-  PairEmbeddingExpScaledCosineSimilarity = 7,
-
-	// Reserve 1000 - 1999 for Tweet Similarity Model
-  TagSpaceCosineSimilarity = 1000,
-	WeightedSumTagSpaceRankingExperiment1 = 1001, //deprecated
-	WeightedSumTagSpaceRankingExperiment2 = 1002, //deprecated
-  WeightedSumTagSpaceANNExperiment = 1003,      //deprecated 
-
-	// Reserved for 10001 - 20000 for Aggregate scoring
-	WeightedSumTopicTweetRanking = 10001,
-	CortexTopicTweetLabel = 10002,
-	// Reserved 20001 - 30000 for Topic Tweet scores 
-	CertoNormalizedDotProductScore = 20001,
-	CertoNormalizedCosineScore = 20002
-}(hasPersonalData = 'false')
-
-/**
-  * The identifier type for the score between a pair of SimClusters Embedding.
-  * Used as the persistent key of a SimClustersEmbedding score.
-  * Support score between different [[EmbeddingType]] / [[ModelVersion]]
-  **/
-struct SimClustersEmbeddingPairScoreId {
-  1: required identifier.SimClustersEmbeddingId id1
-  2: required identifier.SimClustersEmbeddingId id2
-}(hasPersonalData = 'true')
-
-/**
-  * The identifier type for the score between a pair of InternalId.
-  **/
-struct GenericPairScoreId {
-  1: required identifier.InternalId id1
-  2: required identifier.InternalId id2
-}(hasPersonalData = 'true')
-
-union ScoreInternalId {
-  1: GenericPairScoreId genericPairScoreId
-  2: SimClustersEmbeddingPairScoreId simClustersEmbeddingPairScoreId
-}
-
-/**
-  * A uniform Identifier type for all kinds of Calculation Score
-  **/
-struct ScoreId {
-  1: required ScoringAlgorithm algorithm
-  2: required ScoreInternalId internalId
-}(hasPersonalData = 'true')
-
-struct Score {
-  1: required double score
-}(hasPersonalData = 'false')
diff --git a/src/thrift/com/twitter/simclusters_v2/simclusters_presto.docx b/src/thrift/com/twitter/simclusters_v2/simclusters_presto.docx
new file mode 100644
index 000000000..a7d872272
Binary files /dev/null and b/src/thrift/com/twitter/simclusters_v2/simclusters_presto.docx differ
diff --git a/src/thrift/com/twitter/simclusters_v2/simclusters_presto.thrift b/src/thrift/com/twitter/simclusters_v2/simclusters_presto.thrift
deleted file mode 100644
index 93eae6c62..000000000
--- a/src/thrift/com/twitter/simclusters_v2/simclusters_presto.thrift
+++ /dev/null
@@ -1,59 +0,0 @@
-namespace java com.twitter.simclusters_v2.thriftjava
-namespace py gen.twitter.simclusters_v2.simclusters_presto
-#@namespace scala com.twitter.simclusters_v2.thriftscala
-#@namespace strato com.twitter.simclusters_v2
-
-include "embedding.thrift"
-include "identifier.thrift"
-include "interests.thrift"
-include "online_store.thrift"
-
-/**
-  * This struct is the presto-compatible "lite" version of the ClusterDetails thrift
-  */
-struct ClusterDetailsLite {
-  1: required online_store.FullClusterId fullClusterId
-  2: required i32 numUsersWithAnyNonZeroScore
-  3: required i32 numUsersWithNonZeroFollowScore
-  4: required i32 numUsersWithNonZeroFavScore
-  5: required list<interests.UserWithScore> knownForUsersAndScores
-}(persisted="true", hasPersonalData = 'true')
-
-struct EmbeddingsLite {
-  1: required i64 entityId
-  2: required i32 clusterId
-  3: required double score
-}(persisted="true", hasPersonalData = 'true')
-
-struct SimClustersEmbeddingWithId {
-  1: required identifier.SimClustersEmbeddingId embeddingId
-  2: required embedding.SimClustersEmbedding embedding
-}(persisted="true", hasPersonalData = 'true')
-
-struct InternalIdEmbeddingWithId {
-  1: required identifier.SimClustersEmbeddingId embeddingId
-  2: required embedding.InternalIdEmbedding embedding
-}(persisted="true", hasPersonalData = 'true')
-
-/**
-* This struct is the presto-compatible version of the fav_tfg_topic_embeddings
-*/
-struct ClustersScore {
-  1: required i64 clusterId(personalDataType = 'SemanticcoreClassification')
-  2: required double score(personalDataType = 'EngagementScore')
-}(persisted="true", hasPersonalData = 'true')
-
-struct FavTfgTopicEmbeddings {
-  1: required identifier.TopicId topicId
-  2: required list<ClustersScore> clusterScore
-}(persisted="true", hasPersonalData = 'true')
-
-struct TfgTopicEmbeddings {
-  1: required identifier.TopicId topicId
-  2: required list<ClustersScore> clusterScore
-}(persisted="true", hasPersonalData = 'true')
-
-struct UserTopicWeightedEmbedding {
-  1: required i64 userId(personalDataType = 'UserId')
-  2: required list<ClustersScore> clusterScore
-}(persisted="true", hasPersonalData = 'true')