Open-sourcing Tweetypie

Tweetypie is the core Tweet service that handles the reading and writing of Tweet data.
2024-12-22 10:11:52 +01:00 · 2023-05-12 09:11:38 -07:00 · 2023-05-12 09:11:38 -07:00 · 01dbfee4c0
commit 01dbfee4c0
parent 90d7ea370e
591 changed files with 68352 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -24,6 +24,7 @@ Product surfaces at Twitter are built on a shared set of data, models, and softw
 |                    | [timelines-aggregation-framework](timelines/data_processing/ml_util/aggregation_framework/README.md) | Framework for generating aggregate features in batch or real time. |
 |                    | [representation-manager](representation-manager/README.md) | Service to retrieve embeddings (i.e. SimClusers and TwHIN). |
 |                    | [twml](twml/README.md) | Legacy machine learning framework built on TensorFlow v1. |
+|                    | [Tweetypie](tweetypie/server/README.md) | Core Tweet service that handles the reading and writing of Tweet data. |

 The product surface currently included in this repository is the For You Timeline.

--- a/tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/AdditionalFields.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/AdditionalFields.scala
@ -0,0 +1,118 @@
+package com.twitter.tweetypie.additionalfields
+
+import com.twitter.tweetypie.thriftscala.Tweet
+import com.twitter.scrooge.TFieldBlob
+import com.twitter.scrooge.ThriftStructField
+
+object AdditionalFields {
+  type FieldId = Short
+
+  /** additional fields really start at 100, be we are ignoring conversation id for now */
+  val StartAdditionalId = 101
+
+  /** all known [[Tweet]] field IDs */
+  val CompiledFieldIds: Seq[FieldId] = Tweet.metaData.fields.map(_.id)
+
+  /** all known [[Tweet]] fields in the "additional-field" range (excludes id) */
+  val CompiledAdditionalFieldMetaDatas: Seq[ThriftStructField[Tweet]] =
+    Tweet.metaData.fields.filter(f => isAdditionalFieldId(f.id))
+
+  val CompiledAdditionalFieldsMap: Map[Short, ThriftStructField[Tweet]] =
+    CompiledAdditionalFieldMetaDatas.map(field => (field.id, field)).toMap
+
+  /** all known [[Tweet]] field IDs in the "additional-field" range */
+  val CompiledAdditionalFieldIds: Seq[FieldId] =
+    CompiledAdditionalFieldsMap.keys.toSeq
+
+  /** all [[Tweet]] field IDs which should be rejected when set as additional
+   * fields on via PostTweetRequest.additionalFields or RetweetRequest.additionalFields */
+  val RejectedFieldIds: Seq[FieldId] = Seq(
+    // Should be provided via PostTweetRequest.conversationControl field. go/convocontrolsbackend
+    Tweet.ConversationControlField.id,
+    // This field should only be set based on whether the client sets the right community
+    // tweet annotation.
+    Tweet.CommunitiesField.id,
+    // This field should not be set by clients and should opt for
+    // [[PostTweetRequest.ExclusiveTweetControlOptions]].
+    // The exclusiveTweetControl field requires the userId to be set
+    // and we shouldn't trust the client to provide the right one.
+    Tweet.ExclusiveTweetControlField.id,
+    // This field should not be set by clients and should opt for
+    // [[PostTweetRequest.TrustedFriendsControlOptions]].
+    // The trustedFriendsControl field requires the trustedFriendsListId to be
+    // set and we shouldn't trust the client to provide the right one.
+    Tweet.TrustedFriendsControlField.id,
+    // This field should not be set by clients and should opt for
+    // [[PostTweetRequest.CollabControlOptions]].
+    // The collabControl field requires a list of Collaborators to be
+    // set and we shouldn't trust the client to provide the right one.
+    Tweet.CollabControlField.id
+  )
+
+  def isAdditionalFieldId(fieldId: FieldId): Boolean =
+    fieldId >= StartAdditionalId
+
+  /**
+   * Provides a list of all additional field IDs on the tweet, which include all
+   * the compiled additional fields and all the provided passthrough fields.  This includes
+   * compiled additional fields where the value is None.
+   */
+  def allAdditionalFieldIds(tweet: Tweet): Seq[FieldId] =
+    CompiledAdditionalFieldIds ++ tweet._passthroughFields.keys
+
+  /**
+   * Provides a list of all field IDs that have a value on the tweet which are not known compiled
+   * additional fields (excludes [[Tweet.id]]).
+   */
+  def unsettableAdditionalFieldIds(tweet: Tweet): Seq[FieldId] =
+    CompiledFieldIds
+      .filter { id =>
+        !isAdditionalFieldId(id) && id != Tweet.IdField.id && tweet.getFieldBlob(id).isDefined
+      } ++
+      tweet._passthroughFields.keys
+
+  /**
+   * Provides a list of all field IDs that have a value on the tweet which are explicitly disallowed
+   * from being set via PostTweetRequest.additionalFields and RetweetRequest.additionalFields
+   */
+  def rejectedAdditionalFieldIds(tweet: Tweet): Seq[FieldId] =
+    RejectedFieldIds
+      .filter { id => tweet.getFieldBlob(id).isDefined }
+
+  def unsettableAdditionalFieldIdsErrorMessage(unsettableFieldIds: Seq[FieldId]): String =
+    s"request may not contain fields: [${unsettableFieldIds.sorted.mkString(", ")}]"
+
+  /**
+   * Provides a list of all additional field IDs that have a value on the tweet,
+   * compiled and passthrough (excludes Tweet.id).
+   */
+  def nonEmptyAdditionalFieldIds(tweet: Tweet): Seq[FieldId] =
+    CompiledAdditionalFieldMetaDatas.collect {
+      case f if f.getValue(tweet) != None => f.id
+    } ++ tweet._passthroughFields.keys
+
+  def additionalFields(tweet: Tweet): Seq[TFieldBlob] =
+    (tweet.getFieldBlobs(CompiledAdditionalFieldIds) ++ tweet._passthroughFields).values.toSeq
+
+  /**
+   * Merge base tweet with additional fields.
+   * Non-additional fields in the additional tweet are ignored.
+   * @param base: a tweet that contains basic fields
+   * @param additional: a tweet object that carries additional fields
+   */
+  def setAdditionalFields(base: Tweet, additional: Tweet): Tweet =
+    setAdditionalFields(base, additionalFields(additional))
+
+  def setAdditionalFields(base: Tweet, additional: Option[Tweet]): Tweet =
+    additional.map(setAdditionalFields(base, _)).getOrElse(base)
+
+  def setAdditionalFields(base: Tweet, additional: Traversable[TFieldBlob]): Tweet =
+    additional.foldLeft(base) { case (t, f) => t.setField(f) }
+
+  /**
+   * Unsets the specified fields on the given tweet.
+   */
+  def unsetFields(tweet: Tweet, fieldIds: Iterable[FieldId]): Tweet = {
+    tweet.unsetFields(fieldIds.toSet)
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/BUILD
@ -0,0 +1,15 @@
+scala_library(
+    sources = ["*.scala"],
+    compiler_option_sets = ["fatal_warnings"],
+    platform = "java8",
+    strict_deps = True,
+    tags = ["bazel-compatible"],
+    dependencies = [
+        "3rdparty/jvm/org/apache/thrift:libthrift",
+        "mediaservices/commons/src/main/thrift:thrift-scala",
+        "scrooge/scrooge-core",
+        "src/thrift/com/twitter/escherbird:media-annotation-structs-scala",
+        "src/thrift/com/twitter/spam/rtf:safety-label-scala",
+        "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala",
+    ],
+)
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/BUILD
@ -0,0 +1,15 @@
+scala_library(
+    compiler_option_sets = ["fatal_warnings"],
+    strict_deps = True,
+    tags = ["bazel-compatible"],
+    dependencies = [
+        "finagle/finagle-memcached/src/main/scala",
+        "scrooge/scrooge-serializer",
+        "stitch/stitch-core",
+        "util/util-core",
+        "util/util-logging",
+        # CachedValue struct
+        "tweetypie/servo/repo/src/main/thrift:thrift-scala",
+        "util/util-slf4j-api/src/main/scala/com/twitter/util/logging",
+    ],
+)
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheOperations.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheOperations.scala
@ -0,0 +1,241 @@
+package com.twitter.tweetypie.caching
+
+import com.twitter.finagle.service.StatsFilter
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.finagle.stats.ExceptionStatsHandler
+import com.twitter.finagle.stats.Counter
+import com.twitter.util.Future
+import com.twitter.util.logging.Logger
+import com.twitter.finagle.memcached
+import scala.util.control.NonFatal
+
+/**
+ * Wrapper around a memcached client that performs serialization and
+ * deserialization, tracks stats, provides tracing, and provides
+ * per-key fresh/stale/failure/miss results.
+ *
+ * The operations that write values to cache will only write values
+ * that the ValueSerializer says are cacheable. The idea here is that
+ * the deserialize and serialize functions must be coherent, and no
+ * matter how you choose to write these values back to cache, the
+ * serializer will have the appropriate knowledge about whether the
+ * values are cacheable.
+ *
+ * For most cases, you will want to use [[StitchCaching]] rather than
+ * calling this wrapper directly.
+ *
+ * @param keySerializer How to convert a K value to a memcached key.
+ *
+ * @param valueSerializer How to serialize and deserialize V values,
+ *   as well as which values are cacheable, and how long to store the
+ *   values in cache.
+ */
+class CacheOperations[K, V](
+  keySerializer: K => String,
+  valueSerializer: ValueSerializer[V],
+  memcachedClient: memcached.Client,
+  statsReceiver: StatsReceiver,
+  logger: Logger,
+  exceptionStatsHandler: ExceptionStatsHandler = StatsFilter.DefaultExceptions) {
+  // The memcached operations that are performed via this
+  // [[CacheOperations]] instance will be tracked under this stats
+  // receiver.
+  //
+  // We count all memcached failures together under this scope,
+  // because memcached operations should not fail unless there are
+  // communication problems, so differentiating the method that was
+  // being called will not give us any useful information.
+  private[this] val memcachedStats: StatsReceiver = statsReceiver.scope("memcached")
+
+  // Incremented for every attempt to `get` a key from cache.
+  private[this] val memcachedGetCounter: Counter = memcachedStats.counter("get")
+
+  // One of these two counters is incremented for every successful
+  // response returned from a `get` call to memcached.
+  private[this] val memcachedNotFoundCounter: Counter = memcachedStats.counter("not_found")
+  private[this] val memcachedFoundCounter: Counter = memcachedStats.counter("found")
+
+  // Records the state of the cache load after serialization. The
+  // policy may transform a value that was successfully loaded from
+  // cache into any result type, which is why we explicitly track
+  // "found" and "not_found" above. If `stale` + `fresh` is not equal
+  // to `found`, then it means that the policy has translated a found
+  // value into a miss or failure. The policy may do this in order to
+  // cause the caching filter to treat the value that was found in
+  // cache in the way it would have treated a miss or failure from
+  // cache.
+  private[this] val resultStats: StatsReceiver = statsReceiver.scope("result")
+  private[this] val resultFreshCounter: Counter = resultStats.counter("fresh")
+  private[this] val resultStaleCounter: Counter = resultStats.counter("stale")
+  private[this] val resultMissCounter: Counter = resultStats.counter("miss")
+  private[this] val resultFailureCounter: Counter = resultStats.counter("failure")
+
+  // Used for recording exceptions that occurred during
+  // deserialization. This will never be incremented if the
+  // deserializer returns a result, even if the result is a
+  // [[CacheResult.Failure]]. See the comment where this stat is
+  // incremented for more details.
+  private[this] val deserializeFailureStats: StatsReceiver = statsReceiver.scope("deserialize")
+
+  private[this] val notSerializedCounter: Counter = statsReceiver.counter("not_serialized")
+
+  /**
+   * Load a batch of values from cache. Mostly this deals with
+   * converting the [[memcached.GetResult]] to a
+   * [[Seq[CachedResult[V]]]]. The result is in the same order as the
+   * keys, and there will always be an entry for each key. This method
+   * should never return a [[Future.exception]].
+   */
+  def get(keys: Seq[K]): Future[Seq[CacheResult[V]]] = {
+    memcachedGetCounter.incr(keys.size)
+    val cacheKeys: Seq[String] = keys.map(keySerializer)
+    if (logger.isTraceEnabled) {
+      logger.trace {
+        val lines: Seq[String] = keys.zip(cacheKeys).map { case (k, c) => s"\n  $k ($c)" }
+        "Starting load for keys:" + lines.mkString
+      }
+    }
+
+    memcachedClient
+      .getResult(cacheKeys)
+      .map { getResult =>
+        memcachedNotFoundCounter.incr(getResult.misses.size)
+        val results: Seq[CacheResult[V]] =
+          cacheKeys.map { cacheKey =>
+            val result: CacheResult[V] =
+              getResult.hits.get(cacheKey) match {
+                case Some(memcachedValue) =>
+                  memcachedFoundCounter.incr()
+                  try {
+                    valueSerializer.deserialize(memcachedValue.value)
+                  } catch {
+                    case NonFatal(e) =>
+                      // If the serializer throws an exception, then
+                      // the serialized value was malformed. In that
+                      // case, we record the failure so that it can be
+                      // detected and fixed, but treat it as a cache
+                      // miss. The reason that we treat it as a miss
+                      // rather than a failure is that a miss will
+                      // cause a write back to cache, and we want to
+                      // write a valid result back to cache to replace
+                      // the bad entry that we just loaded.
+                      //
+                      // A serializer is free to return Miss itself to
+                      // obtain this behavior if it is expected or
+                      // desired, to avoid the logging and stats (and
+                      // the minor overhead of catching an exception).
+                      //
+                      // The exceptions are tracked separately from
+                      // other exceptions so that it is easy to see
+                      // whether the deserializer itself ever throws an
+                      // exception.
+                      exceptionStatsHandler.record(deserializeFailureStats, e)
+                      logger.warn(s"Failed deserializing value for cache key $cacheKey", e)
+                      CacheResult.Miss
+                  }
+
+                case None if getResult.misses.contains(cacheKey) =>
+                  CacheResult.Miss
+
+                case None =>
+                  val exception =
+                    getResult.failures.get(cacheKey) match {
+                      case None =>
+                        // To get here, this was not a hit or a miss,
+                        // so we expect the key to be present in
+                        // failures. If it is not, then either the
+                        // contract of getResult was violated, or this
+                        // method is somehow attempting to access a
+                        // result for a key that was not
+                        // loaded. Either of these indicates a bug, so
+                        // we log a high priority log message.
+                        logger.error(
+                          s"Key $cacheKey not found in hits, misses or failures. " +
+                            "This indicates a bug in the memcached library or " +
+                            "CacheOperations.load"
+                        )
+                        // We return this as a failure because that
+                        // will cause the repo to be consulted and the
+                        // value *not* to be written back to cache,
+                        // which is probably the safest thing to do
+                        // (if we don't know what's going on, default
+                        // to an uncached repo).
+                        new IllegalStateException
+
+                      case Some(e) =>
+                        e
+                    }
+                  exceptionStatsHandler.record(memcachedStats, exception)
+                  CacheResult.Failure(exception)
+              }
+
+            // Count each kind of CacheResult, to make it possible to
+            // see how effective the caching is.
+            result match {
+              case CacheResult.Fresh(_) => resultFreshCounter.incr()
+              case CacheResult.Stale(_) => resultStaleCounter.incr()
+              case CacheResult.Miss => resultMissCounter.incr()
+              case CacheResult.Failure(_) => resultFailureCounter.incr()
+            }
+
+            result
+          }
+
+        if (logger.isTraceEnabled) {
+          logger.trace {
+            val lines: Seq[String] =
+              (keys, cacheKeys, results).zipped.map {
+                case (key, cacheKey, result) => s"\n  $key ($cacheKey) -> $result"
+              }
+
+            "Cache results:" + lines.mkString
+          }
+        }
+
+        results
+      }
+      .handle {
+        case e =>
+          // If there is a failure from the memcached client, fan it
+          // out to each cache key, so that the caller does not need
+          // to handle failure of the batch differently than failure
+          // of individual keys. This should be rare anyway, since the
+          // memcached client already does this for common Finagle
+          // exceptions
+          resultFailureCounter.incr(keys.size)
+          val theFailure: CacheResult[V] = CacheResult.Failure(e)
+          keys.map { _ =>
+            // Record this as many times as we would if it were in the GetResult
+            exceptionStatsHandler.record(memcachedStats, e)
+            theFailure
+          }
+      }
+  }
+
+  // Incremented for every attempt to `set` a key in value.
+  private[this] val memcachedSetCounter: Counter = memcachedStats.counter("set")
+
+  /**
+   * Write an entry back to cache, using `set`. If the serializer does
+   * not serialize the value, then this method will immediately return
+   * with success.
+   */
+  def set(key: K, value: V): Future[Unit] =
+    valueSerializer.serialize(value) match {
+      case Some((expiry, serialized)) =>
+        if (logger.isTraceEnabled) {
+          logger.trace(s"Writing back to cache $key -> $value (expiry = $expiry)")
+        }
+        memcachedSetCounter.incr()
+        memcachedClient
+          .set(key = keySerializer(key), flags = 0, expiry = expiry, value = serialized)
+          .onFailure(exceptionStatsHandler.record(memcachedStats, _))
+
+      case None =>
+        if (logger.isTraceEnabled) {
+          logger.trace(s"Not writing back $key -> $value")
+        }
+        notSerializedCounter.incr()
+        Future.Done
+    }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheResult.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheResult.scala
@ -0,0 +1,45 @@
+package com.twitter.tweetypie.caching
+
+/**
+ * Encodes the possible states of a value loaded from memcached.
+ *
+ * @see [[ValueSerializer]] and [[CacheOperations]]
+ */
+sealed trait CacheResult[+V]
+
+object CacheResult {
+
+  /**
+   * Signals that the value could not be successfully loaded from
+   * cache. `Failure` values should not be written back to cache.
+   *
+   * This value may result from an error talking to the memcached
+   * instance or it may be returned from the Serializer when the value
+   * should not be reused, but should also not be overwritten.
+   */
+  final case class Failure(e: Throwable) extends CacheResult[Nothing]
+
+  /**
+   * Signals that the cache load attempt was successful, but there was
+   * not a usable value.
+   *
+   * When processing a `Miss`, the value should be written back to
+   * cache if it loads successfully.
+   */
+  case object Miss extends CacheResult[Nothing]
+
+  /**
+   * Signals that the value was found in cache.
+   *
+   * It is not necessary to load the value from the original source.
+   */
+  case class Fresh[V](value: V) extends CacheResult[V]
+
+  /**
+   * Signals that the value was found in cache.
+   *
+   * This value should be used, but it should be refreshed
+   * out-of-band.
+   */
+  case class Stale[V](value: V) extends CacheResult[V]
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/Expiry.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/Expiry.scala
@ -0,0 +1,34 @@
+package com.twitter.tweetypie.caching
+
+import com.twitter.util.Duration
+import com.twitter.util.Time
+
+/**
+ * Helpers for creating common expiry functions.
+ *
+ * An expiry function maps from the value to a time in the future when
+ * the value should expire from cache. These are useful in the
+ * implementation of a [[ValueSerializer]].
+ */
+object Expiry {
+
+  /**
+   * Return a time that indicates to memcached to never expire this
+   * value.
+   *
+   * This function takes [[Any]] so that it can be used at any value
+   * type, since it doesn't examine the value at all.
+   */
+  val Never: Any => Time =
+    _ => Time.Top
+
+  /**
+   * Return function that indicates to memcached that the value should
+   * not be used after the `ttl` has elapsed.
+   *
+   * This function takes [[Any]] so that it can be used at any value
+   * type, since it doesn't examine the value at all.
+   */
+  def byAge(ttl: Duration): Any => Time =
+    _ => Time.now + ttl
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/ServoCachedValueSerializer.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/ServoCachedValueSerializer.scala
@ -0,0 +1,140 @@
+package com.twitter.tweetypie.caching
+
+import com.twitter.io.Buf
+import com.twitter.scrooge.CompactThriftSerializer
+import com.twitter.scrooge.ThriftStruct
+import com.twitter.scrooge.ThriftStructCodec
+import com.twitter.servo.cache.thriftscala.CachedValue
+import com.twitter.servo.cache.thriftscala.CachedValueStatus
+import com.twitter.stitch.NotFound
+import com.twitter.util.Return
+import com.twitter.util.Throw
+import com.twitter.util.Time
+import com.twitter.util.Try
+import java.nio.ByteBuffer
+
+object ServoCachedValueSerializer {
+
+  /**
+   * Thrown when the fields of the servo CachedValue struct do not
+   * satisfy the invariants expected by this serialization code.
+   */
+  case class UnexpectedCachedValueState(cachedValue: CachedValue) extends Exception {
+    def message: String = s"Unexpected state for CachedValue. Value was: $cachedValue"
+  }
+
+  val CachedValueThriftSerializer: CompactThriftSerializer[CachedValue] = CompactThriftSerializer(
+    CachedValue)
+}
+
+/**
+ * A [[ValueSerializer]] that is compatible with the use of
+ * Servo's [[CachedValue]] struct by tweetypie:
+ *
+ * - The only [[CachedValueStatus]] values that are cacheable are
+ *   [[CachedValueStatus.Found]] and [[CachedValueStatus.NotFound]].
+ *
+ * - We only track the `cachedAtMsec` field, because tweetypie's cache
+ *   interaction does not use the other fields, and the values that
+ *   are cached this way are never updated, so storing readThroughAt
+ *   or writtenThroughAt would not add any information.
+ *
+ * - When values are present, they are serialized using
+ *   [[org.apache.thrift.protocol.TCompactProtocol]].
+ *
+ * - The CachedValue struct itself is also serialized using TCompactProtocol.
+ *
+ * The serializer operates on [[Try]] values and will cache [[Return]]
+ * and `Throw(NotFound)` values.
+ */
+case class ServoCachedValueSerializer[V <: ThriftStruct](
+  codec: ThriftStructCodec[V],
+  expiry: Try[V] => Time,
+  softTtl: SoftTtl[Try[V]])
+    extends ValueSerializer[Try[V]] {
+  import ServoCachedValueSerializer.UnexpectedCachedValueState
+  import ServoCachedValueSerializer.CachedValueThriftSerializer
+
+  private[this] val ValueThriftSerializer = CompactThriftSerializer(codec)
+
+  /**
+   * Return an expiry based on the value and a
+   * TCompactProtocol-encoded servo CachedValue struct with the
+   * following fields defined:
+   *
+   * - `value`: [[None]]
+   *   for {{{Throw(NotFound)}}, {{{Some(encodedStruct)}}} for
+   *   [[Return]], where {{{encodedStruct}}} is a
+   *   TCompactProtocol-encoding of the value inside of the Return.
+   *
+   * - `status`: [[CachedValueStatus.Found]] if the value is Return,
+   *   and [[CachedValueStatus.NotFound]] if it is Throw(NotFound)
+   *
+   * - `cachedAtMsec`: The current time, accoring to [[Time.now]]
+   *
+   * No other fields will be defined.
+   *
+   * @throws IllegalArgumentException if called with a value that
+   *   should not be cached.
+   */
+  override def serialize(value: Try[V]): Option[(Time, Buf)] = {
+    def serializeCachedValue(payload: Option[ByteBuffer]) = {
+      val cachedValue = CachedValue(
+        value = payload,
+        status = if (payload.isDefined) CachedValueStatus.Found else CachedValueStatus.NotFound,
+        cachedAtMsec = Time.now.inMilliseconds)
+
+      val serialized = Buf.ByteArray.Owned(CachedValueThriftSerializer.toBytes(cachedValue))
+
+      (expiry(value), serialized)
+    }
+
+    value match {
+      case Throw(NotFound) =>
+        Some(serializeCachedValue(None))
+      case Return(struct) =>
+        val payload = Some(ByteBuffer.wrap(ValueThriftSerializer.toBytes(struct)))
+        Some(serializeCachedValue(payload))
+      case _ =>
+        None
+    }
+  }
+
+  /**
+   * Deserializes values serialized by [[serializeValue]]. The
+   * value will be [[CacheResult.Fresh]] or [[CacheResult.Stale]]
+   * depending on the result of {{{softTtl.isFresh}}}.
+   *
+   * @throws UnexpectedCachedValueState if the state of the
+   *   [[CachedValue]] could not be produced by [[serialize]]
+   */
+  override def deserialize(buf: Buf): CacheResult[Try[V]] = {
+    val cachedValue = CachedValueThriftSerializer.fromBytes(Buf.ByteArray.Owned.extract(buf))
+    val hasValue = cachedValue.value.isDefined
+    val isValid =
+      (hasValue && cachedValue.status == CachedValueStatus.Found) ||
+        (!hasValue && cachedValue.status == CachedValueStatus.NotFound)
+
+    if (!isValid) {
+      // Exceptions thrown by deserialization are recorded and treated
+      // as a cache miss by CacheOperations, so throwing this
+      // exception will cause the value in cache to be
+      // overwritten. There will be stats recorded whenever this
+      // happens.
+      throw UnexpectedCachedValueState(cachedValue)
+    }
+
+    val value =
+      cachedValue.value match {
+        case Some(valueBuffer) =>
+          val valueBytes = new Array[Byte](valueBuffer.remaining)
+          valueBuffer.duplicate.get(valueBytes)
+          Return(ValueThriftSerializer.fromBytes(valueBytes))
+
+        case None =>
+          Throw(NotFound)
+      }
+
+    softTtl.toCacheResult(value, Time.fromMilliseconds(cachedValue.cachedAtMsec))
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/SoftTtl.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/SoftTtl.scala
@ -0,0 +1,120 @@
+package com.twitter.tweetypie.caching
+
+import com.twitter.util.Duration
+import com.twitter.util.Time
+import scala.util.Random
+import com.twitter.logging.Logger
+
+/**
+ * Used to determine whether values successfully retrieved from cache
+ * are [[CacheResult.Fresh]] or [[CacheResult.Stale]]. This is useful
+ * in the implementation of a [[ValueSerializer]].
+ */
+trait SoftTtl[-V] {
+
+  /**
+   * Determines whether a cached value was fresh.
+   *
+   * @param cachedAt  the time at which the value was cached.
+   */
+  def isFresh(value: V, cachedAt: Time): Boolean
+
+  /**
+   * Wraps the value in Fresh or Stale depending on the value of `isFresh`.
+   *
+   * (The type variable U exists because it is not allowed to return
+   * values of a contravariant type, so we must define a variable that
+   * is a specific subclass of V. This is worth it because it allows
+   * us to create polymorphic policies without having to specify the
+   * type. Another solution would be to make the type invariant, but
+   * then we would have to specify the type whenever we create an
+   * instance.)
+   */
+  def toCacheResult[U <: V](value: U, cachedAt: Time): CacheResult[U] =
+    if (isFresh(value, cachedAt)) CacheResult.Fresh(value) else CacheResult.Stale(value)
+}
+
+object SoftTtl {
+
+  /**
+   * Regardless of the inputs, the value will always be considered
+   * fresh.
+   */
+  object NeverRefresh extends SoftTtl[Any] {
+    override def isFresh(_unusedValue: Any, _unusedCachedAt: Time): Boolean = true
+  }
+
+  /**
+   * Trigger refresh based on the length of time that a value has been
+   * stored in cache, ignoring the value.
+   *
+   * @param softTtl Items that were cached longer ago than this value
+   *   will be refreshed when they are accessed.
+   *
+   * @param jitter Add nondeterminism to the soft TTL to prevent a
+   *   thundering herd of requests refreshing the value at the same
+   *   time. The time at which the value is considered stale will be
+   *   uniformly spread out over a range of +/- (jitter/2). It is
+   *   valid to set the jitter to zero, which will turn off jittering.
+   *
+   * @param logger If non-null, use this logger rather than one based
+   *   on the class name. This logger is only used for trace-level
+   *   logging.
+   */
+  case class ByAge[V](
+    softTtl: Duration,
+    jitter: Duration,
+    specificLogger: Logger = null,
+    rng: Random = Random)
+      extends SoftTtl[Any] {
+
+    private[this] val logger: Logger =
+      if (specificLogger == null) Logger(getClass) else specificLogger
+
+    private[this] val maxJitterMs: Long = jitter.inMilliseconds
+
+    // this requirement is due to using Random.nextInt to choose the
+    // jitter, but it allows jitter of greater than 24 days
+    require(maxJitterMs <= (Int.MaxValue / 2))
+
+    // Negative jitter probably indicates misuse of the API
+    require(maxJitterMs >= 0)
+
+    // we want period +/- jitter, but the random generator
+    // generates non-negative numbers, so we generate [0, 2 *
+    // maxJitter) and subtract maxJitter to obtain [-maxJitter,
+    // maxJitter)
+    private[this] val maxJitterRangeMs: Int = (maxJitterMs * 2).toInt
+
+    // We perform all calculations in milliseconds, so convert the
+    // period to milliseconds out here.
+    private[this] val softTtlMs: Long = softTtl.inMilliseconds
+
+    // If the value is below this age, it will always be fresh,
+    // regardless of jitter.
+    private[this] val alwaysFreshAgeMs: Long = softTtlMs - maxJitterMs
+
+    // If the value is above this age, it will always be stale,
+    // regardless of jitter.
+    private[this] val alwaysStaleAgeMs: Long = softTtlMs + maxJitterMs
+
+    override def isFresh(value: Any, cachedAt: Time): Boolean = {
+      val ageMs: Long = (Time.now - cachedAt).inMilliseconds
+      val fresh =
+        if (ageMs <= alwaysFreshAgeMs) {
+          true
+        } else if (ageMs > alwaysStaleAgeMs) {
+          false
+        } else {
+          val jitterMs: Long = rng.nextInt(maxJitterRangeMs) - maxJitterMs
+          ageMs <= softTtlMs + jitterMs
+        }
+
+      logger.ifTrace(
+        s"Checked soft ttl: fresh = $fresh, " +
+          s"soft_ttl_ms = $softTtlMs, age_ms = $ageMs, value = $value")
+
+      fresh
+    }
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchAsync.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchAsync.scala
@ -0,0 +1,65 @@
+package com.twitter.tweetypie.caching
+
+import scala.collection.mutable
+import com.twitter.util.Future
+import com.twitter.stitch.Stitch
+import com.twitter.stitch.Runner
+import com.twitter.stitch.FutureRunner
+import com.twitter.stitch.Group
+
+/**
+ * Workaround for a infelicity in the implementation of [[Stitch.async]].
+ *
+ * This has the same semantics to [[Stitch.async]], with the exception
+ * that interrupts to the main computation will not interrupt the
+ * async call.
+ *
+ * The problem that this implementation solves is that we do not want
+ * async calls grouped together with synchronous calls. See the
+ * mailing list thread [1] for discussion. This may eventually be
+ * fixed in Stitch.
+ */
+private[caching] object StitchAsync {
+  // Contains a deferred Stitch that we want to run asynchronously
+  private[this] class AsyncCall(deferred: => Stitch[_]) {
+    def call(): Stitch[_] = deferred
+  }
+
+  private object AsyncGroup extends Group[AsyncCall, Unit] {
+    override def runner(): Runner[AsyncCall, Unit] =
+      new FutureRunner[AsyncCall, Unit] {
+        // All of the deferred calls of any type. When they are
+        // executed in `run`, the normal Stitch batching and deduping
+        // will occur.
+        private[this] val calls = new mutable.ArrayBuffer[AsyncCall]
+
+        def add(call: AsyncCall): Stitch[Unit] = {
+          // Just remember the deferred call.
+          calls.append(call)
+
+          // Since we don't wait for the completion of the effect,
+          // just return a constant value.
+          Stitch.Unit
+        }
+
+        def run(): Future[_] = {
+          // The future returned from this innter invocation of
+          // Stitch.run is not linked to the returned future, so these
+          // effects are not linked to the outer Run in which this
+          // method was invoked.
+          Stitch.run {
+            Stitch.traverse(calls) { asyncCall: AsyncCall =>
+              asyncCall
+                .call()
+                .liftToTry // So that an exception will not interrupt the other calls
+            }
+          }
+          Future.Unit
+        }
+      }
+  }
+
+  def apply(call: => Stitch[_]): Stitch[Unit] =
+    // Group together all of the async calls
+    Stitch.call(new AsyncCall(call), AsyncGroup)
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCacheOperations.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCacheOperations.scala
@ -0,0 +1,62 @@
+package com.twitter.tweetypie.caching
+
+import com.twitter.stitch.MapGroup
+import com.twitter.stitch.Group
+import com.twitter.stitch.Stitch
+import com.twitter.util.Future
+import com.twitter.util.Return
+import com.twitter.util.Try
+
+/**
+ * Wrapper around [[CacheOperations]] providing a [[Stitch]] API.
+ */
+case class StitchCacheOperations[K, V](operations: CacheOperations[K, V]) {
+  import StitchCacheOperations.SetCall
+
+  private[this] val getGroup: Group[K, CacheResult[V]] =
+    MapGroup[K, CacheResult[V]] { keys: Seq[K] =>
+      operations
+        .get(keys)
+        .map(values => keys.zip(values).toMap.mapValues(Return(_)))
+    }
+
+  def get(key: K): Stitch[CacheResult[V]] =
+    Stitch.call(key, getGroup)
+
+  private[this] val setGroup: Group[SetCall[K, V], Unit] =
+    new MapGroup[SetCall[K, V], Unit] {
+
+      override def run(calls: Seq[SetCall[K, V]]): Future[SetCall[K, V] => Try[Unit]] =
+        Future
+          .collectToTry(calls.map(call => operations.set(call.key, call.value)))
+          .map(tries => calls.zip(tries).toMap)
+    }
+
+  /**
+   * Performs a [[CacheOperations.set]].
+   */
+  def set(key: K, value: V): Stitch[Unit] =
+    // This is implemented as a Stitch.call instead of a Stitch.future
+    // in order to handle the case where a batch has a duplicate
+    // key. Each copy of the duplicate key will trigger a write back
+    // to cache, so we dedupe the writes in order to avoid the
+    // extraneous RPC call.
+    Stitch.call(new StitchCacheOperations.SetCall(key, value), setGroup)
+}
+
+object StitchCacheOperations {
+
+  /**
+   * Used as the "call" for [[SetGroup]]. This is essentially a tuple
+   * where equality is defined only by the key.
+   */
+  private class SetCall[K, V](val key: K, val value: V) {
+    override def equals(other: Any): Boolean =
+      other match {
+        case setCall: SetCall[_, _] => key == setCall.key
+        case _ => false
+      }
+
+    override def hashCode: Int = key.hashCode
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCaching.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCaching.scala
@ -0,0 +1,36 @@
+package com.twitter.tweetypie.caching
+
+import com.twitter.stitch.Stitch
+
+/**
+ * Apply caching to a [[Stitch]] function.
+ *
+ * @see CacheResult for more information about the semantics
+ *   implemented here.
+ */
+class StitchCaching[K, V](operations: CacheOperations[K, V], repo: K => Stitch[V])
+    extends (K => Stitch[V]) {
+
+  private[this] val stitchOps = new StitchCacheOperations(operations)
+
+  override def apply(key: K): Stitch[V] =
+    stitchOps.get(key).flatMap {
+      case CacheResult.Fresh(value) =>
+        Stitch.value(value)
+
+      case CacheResult.Stale(staleValue) =>
+        StitchAsync(repo(key).flatMap(refreshed => stitchOps.set(key, refreshed)))
+          .map(_ => staleValue)
+
+      case CacheResult.Miss =>
+        repo(key)
+          .applyEffect(value => StitchAsync(stitchOps.set(key, value)))
+
+      case CacheResult.Failure(_) =>
+        // In the case of failure, we don't attempt to write back to
+        // cache, because cache failure usually means communication
+        // failure, and sending more requests to the cache that holds
+        // the value for this key could make the situation worse.
+        repo(key)
+    }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/ValueSerializer.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/ValueSerializer.scala
@ -0,0 +1,47 @@
+package com.twitter.tweetypie.caching
+
+import com.twitter.io.Buf
+import com.twitter.util.Time
+
+/**
+ * How to store values of type V in cache. This includes whether a
+ * given value is cacheable, how to serialize it, when it should
+ * expire from cache, and how to interpret byte patterns from cache.
+ */
+trait ValueSerializer[V] {
+
+  /**
+   * Prepare the value for storage in cache. When a [[Some]] is
+   * returned, the [[Buf]] should be a valid input to [[deserialize]]
+   * and the [[Time]] will be used as the expiry in the memcached
+   * command.  When [[None]] is returned, it indicates that the value
+   * cannot or should not be written back to cache.
+   *
+   * The most common use case for returning None is caching Try
+   * values, where certain exceptional values encode a cacheable state
+   * of a value. In particular, Throw(NotFound) is commonly used to
+   * encode a missing value, and we usually want to cache those
+   * negative lookups, but we don't want to cache e.g. a timeout
+   * exception.
+   *
+   * @return a pair of expiry time for this cache entry and the bytes
+   *   to store in cache. If you do not want this value to explicitly
+   *   expire, use Time.Top as the expiry.
+   */
+  def serialize(value: V): Option[(Time, Buf)]
+
+  /**
+   * Deserialize a value found in cache. This function converts the
+   * bytes found in memcache to a [[CacheResult]]. In general, you
+   * probably want to return [[CacheResult.Fresh]] or
+   * [[CacheResult.Stale]], but you are free to return any of the
+   * range of [[CacheResult]]s, depending on the behavior that you
+   * want.
+   *
+   * This is a total function because in the common use case, the
+   * bytes stored in cache will be appropriate for the
+   * serializer. This method is free to throw any exception if the
+   * bytes are not valid.
+   */
+  def deserialize(serializedValue: Buf): CacheResult[V]
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/client_id/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/client_id/BUILD
@ -0,0 +1,15 @@
+scala_library(
+    sources = ["*.scala"],
+    compiler_option_sets = ["fatal_warnings"],
+    platform = "java8",
+    strict_deps = True,
+    tags = ["bazel-compatible"],
+    dependencies = [
+        "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication",
+        "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/transport",
+        "finagle/finagle-thrift/src/main/scala",
+        "tweetypie/servo/util/src/main/scala:exception",
+        "strato/src/main/scala/com/twitter/strato/access",
+        "strato/src/main/scala/com/twitter/strato/data",
+    ],
+)
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/client_id/ClientIdHelper.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/client_id/ClientIdHelper.scala
@ -0,0 +1,185 @@
+package com.twitter.tweetypie.client_id
+
+import com.twitter.finagle.mtls.authentication.EmptyServiceIdentifier
+import com.twitter.finagle.mtls.authentication.ServiceIdentifier
+import com.twitter.finagle.mtls.transport.S2STransport
+import com.twitter.finagle.thrift.ClientId
+import com.twitter.servo.util.Gate
+import com.twitter.strato.access.Access
+import com.twitter.strato.access.Access.ForwardedServiceIdentifier
+
+object ClientIdHelper {
+
+  val UnknownClientId = "unknown"
+
+  def default: ClientIdHelper = new ClientIdHelper(UseTransportServiceIdentifier)
+
+  /**
+   * Trims off the last .element, which is usually .prod or .staging
+   */
+  def getClientIdRoot(clientId: String): String =
+    clientId.lastIndexOf('.') match {
+      case -1 => clientId
+      case idx => clientId.substring(0, idx)
+    }
+
+  /**
+   * Returns the last .element without the '.'
+   */
+  def getClientIdEnv(clientId: String): String =
+    clientId.lastIndexOf('.') match {
+      case -1 => clientId
+      case idx => clientId.substring(idx + 1)
+    }
+
+  private[client_id] def asClientId(s: ServiceIdentifier): String = s"${s.service}.${s.environment}"
+}
+
+class ClientIdHelper(serviceIdentifierStrategy: ServiceIdentifierStrategy) {
+
+  private[client_id] val ProcessPathPrefix = "/p/"
+
+  /**
+   * The effective client id is used for request authorization and metrics
+   * attribution. For calls to Tweetypie's thrift API, the thrift ClientId
+   * is used and is expected in the form of "service-name.env". Federated
+   * Strato clients don't support configured ClientIds and instead provide
+   * a "process path" containing instance-specific information. So for
+   * calls to the federated API, we compute an effective client id from
+   * the ServiceIdentifier, if present, in Strato's Access principles. The
+   * implementation avoids computing this identifier unless necessary,
+   * since this method is invoked on every request.
+   */
+  def effectiveClientId: Option[String] = {
+    val clientId: Option[String] = ClientId.current.map(_.name)
+    clientId
+    // Exclude process paths because they are instance-specific and aren't
+    // supported by tweetypie for authorization or metrics purposes.
+      .filterNot(_.startsWith(ProcessPathPrefix))
+      // Try computing a value from the ServiceId if the thrift
+      // ClientId is undefined or unsupported.
+      .orElse(serviceIdentifierStrategy.serviceIdentifier.map(ClientIdHelper.asClientId))
+      // Ultimately fall back to the ClientId value, even when given an
+      // unsupported format, so that error text and debug logs include
+      // the value passed by the caller.
+      .orElse(clientId)
+  }
+
+  def effectiveClientIdRoot: Option[String] = effectiveClientId.map(ClientIdHelper.getClientIdRoot)
+
+  def effectiveServiceIdentifier: Option[ServiceIdentifier] =
+    serviceIdentifierStrategy.serviceIdentifier
+}
+
+/** Logic how to find a [[ServiceIdentifier]] for the purpose of crafting a client ID. */
+trait ServiceIdentifierStrategy {
+  def serviceIdentifier: Option[ServiceIdentifier]
+
+  /**
+   * Returns the only element of given [[Set]] or [[None]].
+   *
+   * This utility is used defensively against a set of principals collected
+   * from [[Access.getPrincipals]]. While the contract is that there should be at most one
+   * instance of each principal kind present in that set, in practice that has not been the case
+   * always. The safest strategy to in that case is to abandon a set completely if more than
+   * one principals are competing.
+   */
+  final protected def onlyElement[T](set: Set[T]): Option[T] =
+    if (set.size <= 1) {
+      set.headOption
+    } else {
+      None
+    }
+}
+
+/**
+ * Picks [[ServiceIdentifier]] from Finagle SSL Transport, if one exists.
+ *
+ * This works for both Thrift API calls as well as StratoFed API calls. Strato's
+ * [[Access#getPrincipals]] collection, which would typically be consulted by StratoFed
+ * column logic, contains the same [[ServiceIdentifier]] derived from the Finagle SSL
+ * transport, so there's no need to have separate strategies for Thrift vs StratoFed
+ * calls.
+ *
+ * This is the default behavior of using [[ServiceIdentifier]] for computing client ID.
+ */
+private[client_id] class UseTransportServiceIdentifier(
+  // overridable for testing
+  getPeerServiceIdentifier: => ServiceIdentifier,
+) extends ServiceIdentifierStrategy {
+  override def serviceIdentifier: Option[ServiceIdentifier] =
+    getPeerServiceIdentifier match {
+      case EmptyServiceIdentifier => None
+      case si => Some(si)
+    }
+}
+
+object UseTransportServiceIdentifier
+    extends UseTransportServiceIdentifier(S2STransport.peerServiceIdentifier)
+
+/**
+ * Picks [[ForwardedServiceIdentifier]] from Strato principals for client ID
+ * if [[ServiceIdentifier]] points at call coming from Strato.
+ * If not present, falls back to [[UseTransportServiceIdentifier]] behavior.
+ *
+ * Tweetypie utilizes the strategy to pick [[ServiceIdentifier]] for the purpose
+ * of generating a client ID when the client ID is absent or unknown.
+ * [[PreferForwardedServiceIdentifierForStrato]] looks for the [[ForwardedServiceIdentifier]]
+ * values set by stratoserver request.
+ * The reason is, stratoserver is effectively a conduit, forwarding the [[ServiceIdentifier]]
+ * of the _actual client_ that is calling stratoserver.
+ * Any direct callers not going through stratoserver will default to [[ServiceIdentfier]].
+ */
+private[client_id] class PreferForwardedServiceIdentifierForStrato(
+  // overridable for testing
+  getPeerServiceIdentifier: => ServiceIdentifier,
+) extends ServiceIdentifierStrategy {
+  val useTransportServiceIdentifier =
+    new UseTransportServiceIdentifier(getPeerServiceIdentifier)
+
+  override def serviceIdentifier: Option[ServiceIdentifier] =
+    useTransportServiceIdentifier.serviceIdentifier match {
+      case Some(serviceIdentifier) if isStrato(serviceIdentifier) =>
+        onlyElement(
+          Access.getPrincipals
+            .collect {
+              case forwarded: ForwardedServiceIdentifier =>
+                forwarded.serviceIdentifier.serviceIdentifier
+            }
+        ).orElse(useTransportServiceIdentifier.serviceIdentifier)
+      case other => other
+    }
+
+  /**
+   * Strato uses various service names like "stratoserver" and "stratoserver-patient".
+   * They all do start with "stratoserver" though, so at the point of implementing,
+   * the safest bet to recognize strato is to look for this prefix.
+   *
+   * This also works for staged strato instances (which it should), despite allowing
+   * for technically any caller to force this strategy, by creating service certificate
+   * with this service name.
+   */
+  private def isStrato(serviceIdentifier: ServiceIdentifier): Boolean =
+    serviceIdentifier.service.startsWith("stratoserver")
+}
+
+object PreferForwardedServiceIdentifierForStrato
+    extends PreferForwardedServiceIdentifierForStrato(S2STransport.peerServiceIdentifier)
+
+/**
+ * [[ServiceIdentifierStrategy]] which dispatches between two delegates based on the value
+ * of a unitary decider every time [[serviceIdentifier]] is called.
+ */
+class ConditionalServiceIdentifierStrategy(
+  private val condition: Gate[Unit],
+  private val ifTrue: ServiceIdentifierStrategy,
+  private val ifFalse: ServiceIdentifierStrategy)
+    extends ServiceIdentifierStrategy {
+
+  override def serviceIdentifier: Option[ServiceIdentifier] =
+    if (condition()) {
+      ifTrue.serviceIdentifier
+    } else {
+      ifFalse.serviceIdentifier
+    }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/context/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/context/BUILD
@ -0,0 +1,19 @@
+scala_library(
+    sources = ["*.scala"],
+    compiler_option_sets = ["fatal_warnings"],
+    platform = "java8",
+    provides = scala_artifact(
+        org = "com.twitter.tweetypie",
+        name = "context",
+        repo = artifactory,
+    ),
+    strict_deps = True,
+    tags = ["bazel-compatible"],
+    dependencies = [
+        "finagle/finagle-core/src/main",
+        "graphql/common/src/main/scala/com/twitter/graphql/common/core",
+        "src/thrift/com/twitter/context:twitter-context-scala",
+        "twitter-context/src/main/scala",
+        "util/util-core:scala",
+    ],
+)
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/context/TweetypieContext.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/context/TweetypieContext.scala
@ -0,0 +1,135 @@
+package com.twitter.tweetypie.context
+
+import com.twitter.context.TwitterContext
+import com.twitter.finagle.Filter
+import com.twitter.finagle.Service
+import com.twitter.finagle.SimpleFilter
+import com.twitter.finagle.context.Contexts
+import com.twitter.io.Buf
+import com.twitter.io.Buf.ByteArray.Owned
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.graphql.common.core.GraphQlClientApplication
+import com.twitter.util.Try
+import java.nio.charset.StandardCharsets.UTF_8
+import scala.util.matching.Regex
+
+/**
+ * Context and filters to help track callers of Tweetypie's endpoints. This context and its
+ * filters were originally added to provide visibility into callers of Tweetypie who are
+ * using the birdherd library to access tweets.
+ *
+ * This context data is intended to be marshalled by callers to Tweetypie, but then the
+ * context data is stripped (moved from broadcast to local). This happens so that the
+ * context data is not forwarded down tweetypie's backend rpc chains, which often result
+ * in transitive calls back into tweetypie. This effectively creates single-hop marshalling.
+ */
+object TweetypieContext {
+  // Bring Tweetypie permitted TwitterContext into scope
+  val TwitterContext: TwitterContext =
+    com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit)
+
+  case class Ctx(via: String)
+  val Empty = Ctx("")
+
+  object Broadcast {
+    private[this] object Key extends Contexts.broadcast.Key[Ctx](id = Ctx.getClass.getName) {
+
+      override def marshal(value: Ctx): Buf =
+        Owned(value.via.getBytes(UTF_8))
+
+      override def tryUnmarshal(buf: Buf): Try[Ctx] =
+        Try(Ctx(new String(Owned.extract(buf), UTF_8)))
+    }
+
+    private[TweetypieContext] def current(): Option[Ctx] =
+      Contexts.broadcast.get(Key)
+
+    def currentOrElse(default: Ctx): Ctx =
+      current().getOrElse(default)
+
+    def letClear[T](f: => T): T =
+      Contexts.broadcast.letClear(Key)(f)
+
+    def let[T](ctx: Ctx)(f: => T): T =
+      if (Empty == ctx) {
+        letClear(f)
+      } else {
+        Contexts.broadcast.let(Key, ctx)(f)
+      }
+
+    // ctx has to be by name so we can re-evaluate it for every request (for usage in ServiceTwitter.scala)
+    def filter(ctx: => Ctx): Filter.TypeAgnostic =
+      new Filter.TypeAgnostic {
+        override def toFilter[Req, Rep]: Filter[Req, Rep, Req, Rep] =
+          (request: Req, service: Service[Req, Rep]) => Broadcast.let(ctx)(service(request))
+      }
+  }
+
+  object Local {
+    private[this] val Key =
+      new Contexts.local.Key[Ctx]
+
+    private[TweetypieContext] def let[T](ctx: Option[Ctx])(f: => T): T =
+      ctx match {
+        case Some(ctx) if ctx != Empty => Contexts.local.let(Key, ctx)(f)
+        case None => Contexts.local.letClear(Key)(f)
+      }
+
+    def current(): Option[Ctx] =
+      Contexts.local.get(Key)
+
+    def filter[Req, Rep]: SimpleFilter[Req, Rep] =
+      (request: Req, service: Service[Req, Rep]) => {
+        val ctx = Broadcast.current()
+        Broadcast.letClear(Local.let(ctx)(service(request)))
+      }
+
+    private[this] def clientAppIdToName(clientAppId: Long) =
+      GraphQlClientApplication.AllById.get(clientAppId).map(_.name).getOrElse("nonTOO")
+
+    private[this] val pathRegexes: Seq[(Regex, String)] = Seq(
+      ("timeline_conversation_.*_json".r, "timeline_conversation__slug__json"),
+      ("user_timeline_.*_json".r, "user_timeline__user__json"),
+      ("[0-9]{2,}".r, "_id_")
+    )
+
+    // `context.via` will either be a string like: "birdherd" or "birdherd:/1.1/statuses/show/123.json,
+    // depending on whether birdherd code was able to determine the path of the request.
+    private[this] def getViaAndPath(via: String): (String, Option[String]) =
+      via.split(":", 2) match {
+        case Array(via, path) =>
+          val sanitizedPath = path
+            .replace('/', '_')
+            .replace('.', '_')
+
+          // Apply each regex in turn
+          val normalizedPath = pathRegexes.foldLeft(sanitizedPath) {
+            case (path, (regex, replacement)) => regex.replaceAllIn(path, replacement)
+          }
+
+          (via, Some(normalizedPath))
+        case Array(via) => (via, None)
+      }
+
+    def trackStats[U](scopes: StatsReceiver*): Unit =
+      for {
+        tweetypieCtx <- TweetypieContext.Local.current()
+        (via, pathOpt) = getViaAndPath(tweetypieCtx.via)
+        twitterCtx <- TwitterContext()
+        clientAppId <- twitterCtx.clientApplicationId
+      } yield {
+        val clientAppName = clientAppIdToName(clientAppId)
+        scopes.foreach { stats =>
+          val ctxStats = stats.scope("context")
+          val viaStats = ctxStats.scope("via", via)
+          viaStats.scope("all").counter("requests").incr()
+          val viaClientStats = viaStats.scope("by_client", clientAppName)
+          viaClientStats.counter("requests").incr()
+          pathOpt.foreach { path =>
+            val viaPathStats = viaStats.scope("by_path", path)
+            viaPathStats.counter("requests").incr()
+          }
+        }
+      }
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/decider/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/BUILD
@ -0,0 +1,15 @@
+scala_library(
+    sources = ["DeciderGates.scala"],
+    compiler_option_sets = ["fatal_warnings"],
+    platform = "java8",
+    strict_deps = True,
+    tags = ["bazel-compatible"],
+    dependencies = [
+        "3rdparty/jvm/com/google/guava",
+        "decider",
+        "finagle/finagle-toggle/src/main/scala/com/twitter/finagle/server",
+        "tweetypie/servo/decider",
+        "tweetypie/servo/util/src/main/scala",
+        "util/util-core:scala",
+    ],
+)
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/decider/DeciderGates.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/DeciderGates.scala
@ -0,0 +1,60 @@
+package com.twitter.tweetypie
+package decider
+
+import com.google.common.hash.Hashing
+import com.twitter.decider.Decider
+import com.twitter.decider.Feature
+import com.twitter.servo.gate.DeciderGate
+import com.twitter.servo.util.Gate
+import java.nio.charset.StandardCharsets
+import scala.collection.mutable
+trait DeciderGates {
+  def overrides: Map[String, Boolean] = Map.empty
+  def decider: Decider
+  def prefix: String
+
+  protected val seenFeatures: mutable.HashSet[String] = new mutable.HashSet[String]
+
+  private def deciderFeature(name: String): Feature = {
+    decider.feature(prefix + "_" + name)
+  }
+
+  def withOverride[T](name: String, mkGate: Feature => Gate[T]): Gate[T] = {
+    seenFeatures += name
+    overrides.get(name).map(Gate.const).getOrElse(mkGate(deciderFeature(name)))
+  }
+
+  protected def linear(name: String): Gate[Unit] = withOverride[Unit](name, DeciderGate.linear)
+  protected def byId(name: String): Gate[Long] = withOverride[Long](name, DeciderGate.byId)
+
+  /**
+   * It returns a Gate[String] that can be used to check availability of the feature.
+   * The string is hashed into a Long and used as an "id" and then used to call servo's
+   * DeciderGate.byId
+   *
+   * @param name decider name
+   * @return Gate[String]
+   */
+  protected def byStringId(name: String): Gate[String] =
+    byId(name).contramap { s: String =>
+      Hashing.sipHash24().hashString(s, StandardCharsets.UTF_8).asLong()
+    }
+
+  def all: Traversable[String] = seenFeatures
+
+  def unusedOverrides: Set[String] = overrides.keySet.diff(all.toSet)
+
+  /**
+   * Generate a map of name -> availability, taking into account overrides.
+   * Overrides are either on or off so map to 10000 or 0, respectively.
+   */
+  def availabilityMap: Map[String, Option[Int]] =
+    all.map { name =>
+      val availability: Option[Int] = overrides
+        .get(name)
+        .map(on => if (on) 10000 else 0)
+        .orElse(deciderFeature(name).availability)
+
+      name -> availability
+    }.toMap
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/BUILD
@ -0,0 +1,10 @@
+scala_library(
+    sources = ["*.scala"],
+    compiler_option_sets = ["fatal_warnings"],
+    platform = "java8",
+    strict_deps = True,
+    tags = ["bazel-compatible"],
+    dependencies = [
+        "decider",
+    ],
+)
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/TweetyPieDeciderOverrides.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/TweetyPieDeciderOverrides.scala
@ -0,0 +1,42 @@
+package com.twitter.tweetypie.decider.overrides
+
+import com.twitter.decider.LocalOverrides
+
+object TweetyPieDeciderOverrides extends LocalOverrides.Namespace("tweetypie", "tweetypie_") {
+  val CheckSpamOnRetweet: LocalOverrides.Override = feature("check_spam_on_retweet")
+  val CheckSpamOnTweet: LocalOverrides.Override = feature("check_spam_on_tweet")
+  val ConversationControlUseFeatureSwitchResults: LocalOverrides.Override = feature(
+    "conversation_control_use_feature_switch_results")
+  val ConversationControlTweetCreateEnabled: LocalOverrides.Override = feature(
+    "conversation_control_tweet_create_enabled")
+  val EnableExclusiveTweetControlValidation: LocalOverrides.Override = feature(
+    "enable_exclusive_tweet_control_validation")
+  val EnableHotKeyCaches: LocalOverrides.Override = feature("enable_hot_key_caches")
+  val HydrateConversationMuted: LocalOverrides.Override = feature("hydrate_conversation_muted")
+  val HydrateExtensionsOnWrite: LocalOverrides.Override = feature("hydrate_extensions_on_write")
+  val HydrateEscherbirdAnnotations: LocalOverrides.Override = feature(
+    "hydrate_escherbird_annotations")
+  val HydrateGnipProfileGeoEnrichment: LocalOverrides.Override = feature(
+    "hydrate_gnip_profile_geo_enrichment")
+  val HydratePastedPics: LocalOverrides.Override = feature("hydrate_pasted_pics")
+  val HydratePerspectivesEditsForOtherSafetyLevels: LocalOverrides.Override = feature(
+    "hydrate_perspectives_edits_for_other_levels")
+  val HydrateScrubEngagements: LocalOverrides.Override = feature("hydrate_scrub_engagements")
+  val LogRepoExceptions: LocalOverrides.Override = feature("log_repo_exceptions")
+  val MediaRefsHydratorIncludePastedMedia: LocalOverrides.Override = feature(
+    "media_refs_hydrator_include_pasted_media")
+  val ShortCircuitLikelyPartialTweetReads: LocalOverrides.Override = feature(
+    "short_circuit_likely_partial_tweet_reads_ms")
+  val RateLimitByLimiterService: LocalOverrides.Override = feature("rate_limit_by_limiter_service")
+  val RateLimitTweetCreationFailure: LocalOverrides.Override = feature(
+    "rate_limit_tweet_creation_failure")
+  val ReplyTweetConversationControlHydrationEnabled = feature(
+    "reply_tweet_conversation_control_hydration_enabled"
+  )
+  val DisableInviteViaMention = feature(
+    "disable_invite_via_mention"
+  )
+  val EnableRemoveUnmentionedImplicitMentions: LocalOverrides.Override = feature(
+    "enable_remove_unmentioned_implicit_mentions")
+  val useReplicatedDeleteTweet2: LocalOverrides.Override = feature("use_replicated_delete_tweet_2")
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/BUILD
@ -0,0 +1,15 @@
+scala_library(
+    compiler_option_sets = ["fatal_warnings"],
+    strict_deps = True,
+    tags = ["bazel-compatible"],
+    dependencies = [
+        "finagle/finagle-core/src/main",
+        "incentives/jiminy/src/main/thrift/com/twitter/incentives/jiminy:thrift-scala",
+        "tweetypie/servo/util/src/main/scala",
+        "stitch/stitch-core",
+        "strato/src/main/scala/com/twitter/strato/client",
+        "tweetypie/server/src/main/scala/com/twitter/tweetypie/core",
+        "util/util-core",
+        "util/util-stats",
+    ],
+)
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/NudgeBuilder.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/NudgeBuilder.scala
@ -0,0 +1,165 @@
+package com.twitter.tweetypie.jiminy.tweetypie
+
+import com.twitter.finagle.stats.CategorizingExceptionStatsHandler
+import com.twitter.finagle.stats.Stat
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.incentives.jiminy.thriftscala._
+import com.twitter.servo.util.FutureArrow
+import com.twitter.servo.util.Gate
+import com.twitter.stitch.Stitch
+import com.twitter.strato.thrift.ScroogeConvImplicits._
+import com.twitter.strato.client.{Client => StratoClient}
+import com.twitter.tweetypie.core.TweetCreateFailure
+import com.twitter.util.Future
+import com.twitter.util.Return
+import com.twitter.util.Throw
+
+case class NudgeBuilderRequest(
+  text: String,
+  inReplyToTweetId: Option[NudgeBuilder.TweetId],
+  conversationId: Option[NudgeBuilder.TweetId],
+  hasQuotedTweet: Boolean,
+  nudgeOptions: Option[CreateTweetNudgeOptions],
+  tweetId: Option[NudgeBuilder.TweetId])
+
+trait NudgeBuilder extends FutureArrow[NudgeBuilderRequest, Unit] {
+
+  /**
+   * Check whether the user should receive a nudge instead of creating
+   * the Tweet. If nudgeOptions is None, then no nudge check will be
+   * performed.
+   *
+   * @return a Future.exception containing a [[TweetCreateFailure]] if the
+   *   user should be nudged, or Future.Unit if the user should not be
+   *   nudged.
+   */
+  def apply(
+    request: NudgeBuilderRequest
+  ): Future[Unit]
+}
+
+object NudgeBuilder {
+  type Type = FutureArrow[NudgeBuilderRequest, Unit]
+  type TweetId = Long
+
+  // darkTrafficCreateNudgeOptions ensure that our dark traffic sends a request that will
+  // accurately test the Jiminy backend. in this case, we specify that we want checks for all
+  // possible nudge types
+  private[this] val darkTrafficCreateNudgeOptions = Some(
+    CreateTweetNudgeOptions(
+      requestedNudgeTypes = Some(
+        Set(
+          TweetNudgeType.PotentiallyToxicTweet,
+          TweetNudgeType.ReviseOrMute,
+          TweetNudgeType.ReviseOrHideThenBlock,
+          TweetNudgeType.ReviseOrBlock
+        )
+      )
+    )
+  )
+
+  private[this] def mkJiminyRequest(
+    request: NudgeBuilderRequest,
+    isDarkRequest: Boolean = false
+  ): CreateTweetNudgeRequest = {
+    val tweetType =
+      if (request.inReplyToTweetId.nonEmpty) TweetType.Reply
+      else if (request.hasQuotedTweet) TweetType.QuoteTweet
+      else TweetType.OriginalTweet
+
+    CreateTweetNudgeRequest(
+      tweetText = request.text,
+      tweetType = tweetType,
+      inReplyToTweetId = request.inReplyToTweetId,
+      conversationId = request.conversationId,
+      createTweetNudgeOptions =
+        if (isDarkRequest) darkTrafficCreateNudgeOptions else request.nudgeOptions,
+      tweetId = request.tweetId
+    )
+  }
+
+  /**
+   * NudgeBuilder implemented by calling the strato column `incentives/createNudge`.
+   *
+   * Stats recorded:
+   *   - latency_ms: Latency histogram (also implicitly number of
+   *     invocations). This is counted only in the case that a nudge
+   *     check was requested (`nudgeOptions` is non-empty)
+   *
+   *   - nudge: The nudge check succeeded and a nudge was created.
+   *
+   *   - no_nudge: The nudge check succeeded, but no nudge was created.
+   *
+   *   - failures: Calling strato to create a nudge failed. Broken out
+   *     by exception.
+   */
+
+  def apply(
+    nudgeArrow: FutureArrow[CreateTweetNudgeRequest, CreateTweetNudgeResponse],
+    enableDarkTraffic: Gate[Unit],
+    stats: StatsReceiver
+  ): NudgeBuilder = {
+    new NudgeBuilder {
+      private[this] val nudgeLatencyStat = stats.stat("latency_ms")
+      private[this] val nudgeCounter = stats.counter("nudge")
+      private[this] val noNudgeCounter = stats.counter("no_nudge")
+      private[this] val darkRequestCounter = stats.counter("dark_request")
+      private[this] val nudgeExceptionHandler = new CategorizingExceptionStatsHandler
+
+      override def apply(
+        request: NudgeBuilderRequest
+      ): Future[Unit] =
+        request.nudgeOptions match {
+          case None =>
+            if (enableDarkTraffic()) {
+              darkRequestCounter.incr()
+              Stat
+                .timeFuture(nudgeLatencyStat) {
+                  nudgeArrow(mkJiminyRequest(request, isDarkRequest = true))
+                }
+                .transform { _ =>
+                  // ignore the response since it is a dark request
+                  Future.Done
+                }
+            } else {
+              Future.Done
+            }
+
+          case Some(_) =>
+            Stat
+              .timeFuture(nudgeLatencyStat) {
+                nudgeArrow(mkJiminyRequest(request))
+              }
+              .transform {
+                case Throw(e) =>
+                  nudgeExceptionHandler.record(stats, e)
+                  // If we failed to invoke the nudge column, then
+                  // just continue on with the Tweet creation.
+                  Future.Done
+
+                case Return(CreateTweetNudgeResponse(Some(nudge))) =>
+                  nudgeCounter.incr()
+                  Future.exception(TweetCreateFailure.Nudged(nudge = nudge))
+
+                case Return(CreateTweetNudgeResponse(None)) =>
+                  noNudgeCounter.incr()
+                  Future.Done
+              }
+        }
+    }
+  }
+
+  def apply(
+    strato: StratoClient,
+    enableDarkTraffic: Gate[Unit],
+    stats: StatsReceiver
+  ): NudgeBuilder = {
+    val executer =
+      strato.executer[CreateTweetNudgeRequest, CreateTweetNudgeResponse](
+        "incentives/createTweetNudge")
+    val nudgeArrow: FutureArrow[CreateTweetNudgeRequest, CreateTweetNudgeResponse] = { req =>
+      Stitch.run(executer.execute(req))
+    }
+    apply(nudgeArrow, enableDarkTraffic, stats)
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/matching/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/BUILD
@ -0,0 +1,18 @@
+scala_library(
+    sources = ["*.scala"],
+    compiler_option_sets = ["fatal_warnings"],
+    platform = "java8",
+    strict_deps = True,
+    tags = ["bazel-compatible"],
+    dependencies = [
+        "src/java/com/twitter/common/text/language:language-identifier",
+        "src/java/com/twitter/common/text/language:locale-util",
+        "src/java/com/twitter/common/text/pipeline",
+        "src/java/com/twitter/common/text/token",
+        "src/java/com/twitter/common_internal/text",
+        "src/java/com/twitter/common_internal/text/version",
+        "tweetypie/src/resources/com/twitter/tweetypie/matching",
+        "util/util-core/src/main/scala/com/twitter/concurrent",
+        "util/util-core/src/main/scala/com/twitter/io",
+    ],
+)
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/matching/TokenSequence.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/TokenSequence.scala
@ -0,0 +1,92 @@
+package com.twitter.tweetypie.matching
+
+object TokenSequence {
+
+  /**
+   * Is `suffix` a suffix of `s`, starting at `offset` in `s`?
+   */
+  def hasSuffixAt(s: CharSequence, suffix: CharSequence, offset: Int): Boolean =
+    if (offset == 0 && (s.eq(suffix) || s == suffix)) {
+      true
+    } else if (suffix.length != (s.length - offset)) {
+      false
+    } else {
+      @annotation.tailrec
+      def go(i: Int): Boolean =
+        if (i == suffix.length) true
+        else if (suffix.charAt(i) == s.charAt(offset + i)) go(i + 1)
+        else false
+
+      go(0)
+    }
+
+  /**
+   * Do two [[CharSequence]]s contain the same characters?
+   *
+   * [[CharSequence]] equality is not sufficient because
+   * [[CharSequence]]s of different types may not consider other
+   * [[CharSequence]]s containing the same characters equivalent.
+   */
+  def sameCharacters(s1: CharSequence, s2: CharSequence): Boolean =
+    hasSuffixAt(s1, s2, 0)
+
+  /**
+   * This method implements the product definition of a token matching a
+   * keyword. That definition is:
+   *
+   * - The token contains the same characters as the keyword.
+   * - The token contains the same characters as the keyword after
+   *   dropping a leading '#' or '@' from the token.
+   *
+   * The intention is that a keyword matches an identical hashtag, but
+   * if the keyword itself is a hashtag, it only matches the hashtag
+   * form.
+   *
+   * The tokenization process should rule out tokens or keywords that
+   * start with multiple '#' characters, even though this implementation
+   * allows for e.g. token "##a" to match "#a".
+   */
+  def tokenMatches(token: CharSequence, keyword: CharSequence): Boolean =
+    if (sameCharacters(token, keyword)) true
+    else if (token.length == 0) false
+    else {
+      val tokenStart = token.charAt(0)
+      (tokenStart == '#' || tokenStart == '@') && hasSuffixAt(token, keyword, 1)
+    }
+}
+
+/**
+ * A sequence of normalized tokens. The sequence depends on the locale
+ * in which the text was parsed and the version of the penguin library
+ * that was used at tokenization time.
+ */
+case class TokenSequence private[matching] (toIndexedSeq: IndexedSeq[CharSequence]) {
+  import TokenSequence.tokenMatches
+
+  private def apply(i: Int): CharSequence = toIndexedSeq(i)
+
+  def isEmpty: Boolean = toIndexedSeq.isEmpty
+  def nonEmpty: Boolean = toIndexedSeq.nonEmpty
+
+  /**
+   * Does the supplied sequence of keywords match a consecutive sequence
+   * of tokens within this sequence?
+   */
+  def containsKeywordSequence(keywords: TokenSequence): Boolean = {
+    val finalIndex = toIndexedSeq.length - keywords.toIndexedSeq.length
+
+    @annotation.tailrec
+    def matchesAt(offset: Int, i: Int): Boolean =
+      if (i >= keywords.toIndexedSeq.length) true
+      else if (tokenMatches(this(i + offset), keywords(i))) matchesAt(offset, i + 1)
+      else false
+
+    @annotation.tailrec
+    def search(offset: Int): Boolean =
+      if (offset > finalIndex) false
+      else if (matchesAt(offset, 0)) true
+      else search(offset + 1)
+
+    search(0)
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/matching/Tokenizer.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/Tokenizer.scala
@ -0,0 +1,156 @@
+package com.twitter.tweetypie.matching
+
+import com.twitter.common.text.language.LocaleUtil
+import com.twitter.common_internal.text.pipeline.TwitterTextNormalizer
+import com.twitter.common_internal.text.pipeline.TwitterTextTokenizer
+import com.twitter.common_internal.text.version.PenguinVersion
+import com.twitter.concurrent.Once
+import com.twitter.io.StreamIO
+import java.util.Locale
+import scala.collection.JavaConverters._
+
+/**
+ * Extract a sequence of normalized tokens from the input text. The
+ * normalization and tokenization are properly configured for keyword
+ * matching between texts.
+ */
+trait Tokenizer {
+  def tokenize(input: String): TokenSequence
+}
+
+object Tokenizer {
+
+  /**
+   * When a Penguin version is not explicitly specified, use this
+   * version of Penguin to perform normalization and tokenization. If
+   * you cache tokenized text, be sure to store the version as well, to
+   * avoid comparing text that was normalized with different algorithms.
+   */
+  val DefaultPenguinVersion: PenguinVersion = PenguinVersion.PENGUIN_6
+
+  /**
+   * If you already know the locale of the text that is being tokenized,
+   * use this method to get a tokenizer that is much more efficient than
+   * the Tweet or Query tokenizer, since it does not have to perform
+   * language detection.
+   */
+  def forLocale(locale: Locale): Tokenizer = get(locale, DefaultPenguinVersion)
+
+  /**
+   * Obtain a `Tokenizer` that will tokenize the text for the given
+   * locale and version of the Penguin library.
+   */
+  def get(locale: Locale, version: PenguinVersion): Tokenizer =
+    TokenizerFactories(version).forLocale(locale)
+
+  /**
+   * Encapsulates the configuration and use of [[TwitterTextTokenizer]]
+   * and [[TwitterTextNormalizer]].
+   */
+  private[this] class TokenizerFactory(version: PenguinVersion) {
+    // The normalizer is thread-safe, so share one instance.
+    private[this] val normalizer =
+      (new TwitterTextNormalizer.Builder(version)).build()
+
+    // The TwitterTextTokenizer is relatively expensive to build,
+    // and is not thread safe, so keep instances of it in a
+    // ThreadLocal.
+    private[this] val local =
+      new ThreadLocal[TwitterTextTokenizer] {
+        override def initialValue: TwitterTextTokenizer =
+          (new TwitterTextTokenizer.Builder(version)).build()
+      }
+
+    /**
+     * Obtain a [[Tokenizer]] for this combination of [[PenguinVersion]]
+     * and [[Locale]].
+     */
+    def forLocale(locale: Locale): Tokenizer =
+      new Tokenizer {
+        override def tokenize(input: String): TokenSequence = {
+          val stream = local.get.getTwitterTokenStreamFor(locale)
+          stream.reset(normalizer.normalize(input, locale))
+          val builder = IndexedSeq.newBuilder[CharSequence]
+          while (stream.incrementToken) builder += stream.term()
+          TokenSequence(builder.result())
+        }
+      }
+  }
+
+  /**
+   * Since there are a small number of Penguin versions, eagerly
+   * initialize a TokenizerFactory for each version, to avoid managing
+   * mutable state.
+   */
+  private[this] val TokenizerFactories: PenguinVersion => TokenizerFactory =
+    PenguinVersion.values.map(v => v -> new TokenizerFactory(v)).toMap
+
+  /**
+   * The set of locales used in warmup. These locales are mentioned in
+   * the logic of TwitterTextTokenizer and TwitterTextNormalizer.
+   */
+  private[this] val WarmUpLocales: Seq[Locale] =
+    Seq
+      .concat(
+        Seq(
+          Locale.JAPANESE,
+          Locale.KOREAN,
+          LocaleUtil.UNKNOWN,
+          LocaleUtil.THAI,
+          LocaleUtil.ARABIC,
+          LocaleUtil.SWEDISH
+        ),
+        LocaleUtil.CHINESE_JAPANESE_LOCALES.asScala,
+        LocaleUtil.CJK_LOCALES.asScala
+      )
+      .toSet
+      .toArray
+      .toSeq
+
+  /**
+   * Load the default inputs that are used for warming up this library.
+   */
+  def warmUpCorpus(): Seq[String] = {
+    val stream = getClass.getResourceAsStream("warmup-text.txt")
+    val bytes =
+      try StreamIO.buffer(stream)
+      finally stream.close()
+    bytes.toString("UTF-8").linesIterator.toArray.toSeq
+  }
+
+  /**
+   * Exercise the functionality of this library on the specified
+   * strings. In general, prefer [[warmUp]] to this method.
+   */
+  def warmUpWith(ver: PenguinVersion, texts: Iterable[String]): Unit =
+    texts.foreach { txt =>
+      // Exercise each locale
+      WarmUpLocales.foreach { loc =>
+        Tokenizer.get(loc, ver).tokenize(txt)
+        UserMutes.builder().withPenguinVersion(ver).withLocale(loc).validate(txt)
+      }
+
+      // Exercise language detection
+      TweetTokenizer.get(ver).tokenize(txt)
+      UserMutes.builder().withPenguinVersion(ver).validate(txt)
+    }
+
+  private[this] val warmUpOnce = Once(warmUpWith(DefaultPenguinVersion, warmUpCorpus()))
+
+  /**
+   * The creation of the first TwitterTextTokenizer is relatively
+   * expensive, and tokenizing some texts may cause significant
+   * initialization.
+   *
+   * This method exercises the functionality of this library
+   * with a range of texts in order to perform as much initialization as
+   * possible before the library is used in a latency-sensitive way.
+   *
+   * The warmup routine will only run once. Subsequent invocations of
+   * `warmUp` will no do additional work, and will return once warmup is
+   * complete.
+   *
+   * The warmup will take on the order of seconds.
+   */
+  def warmUp(): Unit = warmUpOnce()
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/matching/TweetTokenizer.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/TweetTokenizer.scala
@ -0,0 +1,45 @@
+package com.twitter.tweetypie.matching
+
+import com.twitter.common.text.pipeline.TwitterLanguageIdentifier
+import com.twitter.common_internal.text.version.PenguinVersion
+import java.util.Locale
+
+object TweetTokenizer extends Tokenizer {
+  type LocalePicking = Option[Locale] => Tokenizer
+
+  /**
+   * Get a Tokenizer-producing function that uses the supplied locale
+   * to select an appropriate Tokenizer.
+   */
+  def localePicking: LocalePicking = {
+    case None => TweetTokenizer
+    case Some(locale) => Tokenizer.forLocale(locale)
+  }
+
+  private[this] val tweetLangIdentifier =
+    (new TwitterLanguageIdentifier.Builder).buildForTweet()
+
+  /**
+   * Get a Tokenizer that performs Tweet language detection, and uses
+   * that result to tokenize the text. If you already know the locale of
+   * the tweet text, use `Tokenizer.get`, because it's much
+   * cheaper.
+   */
+  def get(version: PenguinVersion): Tokenizer =
+    new Tokenizer {
+      override def tokenize(text: String): TokenSequence = {
+        val locale = tweetLangIdentifier.identify(text).getLocale
+        Tokenizer.get(locale, version).tokenize(text)
+      }
+    }
+
+  private[this] val Default = get(Tokenizer.DefaultPenguinVersion)
+
+  /**
+   * Tokenize the given text using Tweet language detection and
+   * `Tokenizer.DefaultPenguinVersion`. Prefer `Tokenizer.forLocale` if
+   * you already know the language of the text.
+   */
+  override def tokenize(tweetText: String): TokenSequence =
+    Default.tokenize(tweetText)
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/matching/UserMutes.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/UserMutes.scala
@ -0,0 +1,128 @@
+package com.twitter.tweetypie.matching
+
+import com.twitter.common.text.pipeline.TwitterLanguageIdentifier
+import com.twitter.common_internal.text.version.PenguinVersion
+import java.util.Locale
+import scala.collection.JavaConversions.asScalaBuffer
+
+object UserMutesBuilder {
+  private[matching] val Default =
+    new UserMutesBuilder(Tokenizer.DefaultPenguinVersion, None)
+
+  private val queryLangIdentifier =
+    (new TwitterLanguageIdentifier.Builder).buildForQuery()
+}
+
+class UserMutesBuilder private (penguinVersion: PenguinVersion, localeOpt: Option[Locale]) {
+
+  /**
+   * Use the specified Penguin version when tokenizing a keyword mute
+   * string. In general, use the default version, unless you need to
+   * specify a particular version for compatibility with another system
+   * that is using that version.
+   */
+  def withPenguinVersion(ver: PenguinVersion): UserMutesBuilder =
+    if (ver == penguinVersion) this
+    else new UserMutesBuilder(ver, localeOpt)
+
+  /**
+   * Use the specified locale when tokenizing a keyword mute string.
+   */
+  def withLocale(locale: Locale): UserMutesBuilder =
+    if (localeOpt.contains(locale)) this
+    else new UserMutesBuilder(penguinVersion, Some(locale))
+
+  /**
+   * When tokenizing a user mute list, detect the language of the
+   * text. This is significantly more expensive than using a predefined
+   * locale, but is appropriate when the locale is not yet known.
+   */
+  def detectLocale(): UserMutesBuilder =
+    if (localeOpt.isEmpty) this
+    else new UserMutesBuilder(penguinVersion, localeOpt)
+
+  private[this] lazy val tokenizer =
+    localeOpt match {
+      case None =>
+        // No locale was specified, so use a Tokenizer that performs
+        // language detection before tokenizing.
+        new Tokenizer {
+          override def tokenize(text: String): TokenSequence = {
+            val locale = UserMutesBuilder.queryLangIdentifier.identify(text).getLocale
+            Tokenizer.get(locale, penguinVersion).tokenize(text)
+          }
+        }
+
+      case Some(locale) =>
+        Tokenizer.get(locale, penguinVersion)
+    }
+
+  /**
+   * Given a list of the user's raw keyword mutes, return a preprocessed
+   * set of mutes suitable for matching against tweet text. If the input
+   * contains any phrases that fail validation, then they will be
+   * dropped.
+   */
+  def build(rawInput: Seq[String]): UserMutes =
+    UserMutes(rawInput.flatMap(validate(_).right.toOption))
+
+  /**
+   * Java-friendly API for processing a user's list of raw keyword mutes
+   * into a preprocessed form suitable for matching against text.
+   */
+  def fromJavaList(rawInput: java.util.List[String]): UserMutes =
+    build(asScalaBuffer(rawInput).toSeq)
+
+  /**
+   * Validate the raw user input muted phrase. Currently, the only
+   * inputs that are not valid for keyword muting are those inputs that
+   * do not contain any keywords, because those inputs would match all
+   * tweets.
+   */
+  def validate(mutedPhrase: String): Either[UserMutes.ValidationError, TokenSequence] = {
+    val keywords = tokenizer.tokenize(mutedPhrase)
+    if (keywords.isEmpty) UserMutes.EmptyPhraseError else Right(keywords)
+  }
+}
+
+object UserMutes {
+  sealed trait ValidationError
+
+  /**
+   * The phrase's tokenization did not produce any tokens
+   */
+  case object EmptyPhrase extends ValidationError
+
+  private[matching] val EmptyPhraseError = Left(EmptyPhrase)
+
+  /**
+   * Get a [[UserMutesBuilder]] that uses the default Penguin version and
+   * performs language identification to choose a locale.
+   */
+  def builder(): UserMutesBuilder = UserMutesBuilder.Default
+}
+
+/**
+ * A user's muted keyword list, preprocessed into token sequences.
+ */
+case class UserMutes private[matching] (toSeq: Seq[TokenSequence]) {
+
+  /**
+   * Do any of the users' muted keyword sequences occur within the
+   * supplied text?
+   */
+  def matches(text: TokenSequence): Boolean =
+    toSeq.exists(text.containsKeywordSequence)
+
+  /**
+   * Find all positions of matching muted keyword from the user's
+   * muted keyword list
+   */
+  def find(text: TokenSequence): Seq[Int] =
+    toSeq.zipWithIndex.collect {
+      case (token, index) if text.containsKeywordSequence(token) => index
+    }
+
+  def isEmpty: Boolean = toSeq.isEmpty
+  def nonEmpty: Boolean = toSeq.nonEmpty
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/media/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/media/BUILD
@ -0,0 +1,17 @@
+scala_library(
+    sources = ["*.scala"],
+    compiler_option_sets = ["fatal_warnings"],
+    platform = "java8",
+    strict_deps = True,
+    tags = ["bazel-compatible"],
+    dependencies = [
+        "mediaservices/commons/src/main/thrift:thrift-scala",
+        "scrooge/scrooge-core/src/main/scala",
+        "tweetypie/servo/util/src/main/scala",
+        "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala",
+        "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala",
+        "tco-util",
+        "tweetypie/common/src/scala/com/twitter/tweetypie/util",
+        "util/util-logging/src/main/scala/com/twitter/logging",
+    ],
+)
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/media/Media.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/media/Media.scala
@ -0,0 +1,149 @@
+package com.twitter.tweetypie
+package media
+
+import com.twitter.mediaservices.commons.thriftscala.MediaCategory
+import com.twitter.mediaservices.commons.tweetmedia.thriftscala._
+import com.twitter.tco_util.TcoSlug
+import com.twitter.tweetypie.thriftscala._
+import com.twitter.tweetypie.util.TweetLenses
+
+/**
+ * A smörgåsbord of media-related helper methods.
+ */
+object Media {
+  val AnimatedGifContentType = "video/mp4 codecs=avc1.42E0"
+
+  case class MediaTco(expandedUrl: String, url: String, displayUrl: String)
+
+  val ImageContentTypes: Set[MediaContentType] =
+    Set[MediaContentType](
+      MediaContentType.ImageJpeg,
+      MediaContentType.ImagePng,
+      MediaContentType.ImageGif
+    )
+
+  val AnimatedGifContentTypes: Set[MediaContentType] =
+    Set[MediaContentType](
+      MediaContentType.VideoMp4
+    )
+
+  val VideoContentTypes: Set[MediaContentType] =
+    Set[MediaContentType](
+      MediaContentType.VideoGeneric
+    )
+
+  val InUseContentTypes: Set[MediaContentType] =
+    Set[MediaContentType](
+      MediaContentType.ImageGif,
+      MediaContentType.ImageJpeg,
+      MediaContentType.ImagePng,
+      MediaContentType.VideoMp4,
+      MediaContentType.VideoGeneric
+    )
+
+  def isImage(contentType: MediaContentType): Boolean =
+    ImageContentTypes.contains(contentType)
+
+  def contentTypeToString(contentType: MediaContentType): String =
+    contentType match {
+      case MediaContentType.ImageGif => "image/gif"
+      case MediaContentType.ImageJpeg => "image/jpeg"
+      case MediaContentType.ImagePng => "image/png"
+      case MediaContentType.VideoMp4 => "video/mp4"
+      case MediaContentType.VideoGeneric => "video"
+      case _ => throw new IllegalArgumentException(s"UnknownMediaContentType: $contentType")
+    }
+
+  def stringToContentType(str: String): MediaContentType =
+    str match {
+      case "image/gif" => MediaContentType.ImageGif
+      case "image/jpeg" => MediaContentType.ImageJpeg
+      case "image/png" => MediaContentType.ImagePng
+      case "video/mp4" => MediaContentType.VideoMp4
+      case "video" => MediaContentType.VideoGeneric
+      case _ => throw new IllegalArgumentException(s"Unknown Content Type String: $str")
+    }
+
+  def extensionForContentType(cType: MediaContentType): String =
+    cType match {
+      case MediaContentType.ImageJpeg => "jpg"
+      case MediaContentType.ImagePng => "png"
+      case MediaContentType.ImageGif => "gif"
+      case MediaContentType.VideoMp4 => "mp4"
+      case MediaContentType.VideoGeneric => ""
+      case _ => "unknown"
+    }
+
+  /**
+   * Extract a URL entity from a media entity.
+   */
+  def extractUrlEntity(mediaEntity: MediaEntity): UrlEntity =
+    UrlEntity(
+      fromIndex = mediaEntity.fromIndex,
+      toIndex = mediaEntity.toIndex,
+      url = mediaEntity.url,
+      expanded = Some(mediaEntity.expandedUrl),
+      display = Some(mediaEntity.displayUrl)
+    )
+
+  /**
+   * Copy the fields from the URL entity into the media entity.
+   */
+  def copyFromUrlEntity(mediaEntity: MediaEntity, urlEntity: UrlEntity): MediaEntity = {
+    val expandedUrl =
+      urlEntity.expanded.orElse(Option(mediaEntity.expandedUrl)).getOrElse(urlEntity.url)
+
+    val displayUrl =
+      urlEntity.url match {
+        case TcoSlug(slug) => MediaUrl.Display.fromTcoSlug(slug)
+        case _ => urlEntity.expanded.getOrElse(urlEntity.url)
+      }
+
+    mediaEntity.copy(
+      fromIndex = urlEntity.fromIndex,
+      toIndex = urlEntity.toIndex,
+      url = urlEntity.url,
+      expandedUrl = expandedUrl,
+      displayUrl = displayUrl
+    )
+  }
+
+  def getAspectRatio(size: MediaSize): AspectRatio =
+    getAspectRatio(size.width, size.height)
+
+  def getAspectRatio(width: Int, height: Int): AspectRatio = {
+    if (width == 0 || height == 0) {
+      throw new IllegalArgumentException(s"Dimensions must be non zero: ($width, $height)")
+    }
+
+    def calculateGcd(a: Int, b: Int): Int =
+      if (b == 0) a else calculateGcd(b, a % b)
+
+    val gcd = calculateGcd(math.max(width, height), math.min(width, height))
+    AspectRatio((width / gcd).toShort, (height / gcd).toShort)
+  }
+
+  /**
+   * Return just the media that belongs to this tweet
+   */
+  def ownMedia(tweet: Tweet): Seq[MediaEntity] =
+    TweetLenses.media.get(tweet).filter(isOwnMedia(tweet.id, _))
+
+  /**
+   * Does the given media entity, which is was found on the tweet with the specified
+   * tweetId, belong to that tweet?
+   */
+  def isOwnMedia(tweetId: TweetId, entity: MediaEntity): Boolean =
+    entity.sourceStatusId.forall(_ == tweetId)
+
+  /**
+   * Mixed Media is any case where there is more than one media item & any of them is not an image.
+   */
+
+  def isMixedMedia(mediaEntities: Seq[MediaEntity]): Boolean =
+    mediaEntities.length > 1 && (mediaEntities.flatMap(_.mediaInfo).exists {
+      case _: MediaInfo.ImageInfo => false
+      case _ => true
+    } ||
+      mediaEntities.flatMap(_.mediaKey).map(_.mediaCategory).exists(_ != MediaCategory.TweetImage))
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/media/MediaUrl.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/media/MediaUrl.scala
@ -0,0 +1,108 @@
+package com.twitter.tweetypie
+package media
+
+import com.twitter.logging.Logger
+import com.twitter.tweetypie.thriftscala.MediaEntity
+import com.twitter.tweetypie.thriftscala.UrlEntity
+
+/**
+ * Creating and parsing tweet media entity URLs.
+ *
+ * There are four kinds of URL in a media entity:
+ *
+ *   - Display URLs: pic.twitter.com aliases for the short URL, for
+ *     embedding in the tweet text.
+ *
+ *   - Short URLs: regular t.co URLs that expand to the permalink URL.
+ *
+ *   - Permalink URLs: link to a page that displays the media after
+ *     doing authorization
+ *
+ *   - Asset URLs: links to the actual media asset.
+ *
+ */
+object MediaUrl {
+  private[this] val log = Logger(getClass)
+
+  /**
+   * The URL that should be filled in to the displayUrl field of the
+   * media entity. This URL behaves exactly the same as a t.co link
+   * (only the domain is different.)
+   */
+  object Display {
+    val Root = "pic.twitter.com/"
+
+    def fromTcoSlug(tcoSlug: String): String = Root + tcoSlug
+  }
+
+  /**
+   * The link target for the link in the tweet text (the expanded URL
+   * for the media, copied from the URL entity.) For native photos,
+   * this is the tweet permalink page.
+   *
+   * For users without a screen name ("handleless" or NoScreenName users)
+   * a permalink to /i/status/:tweet_id is used.
+   */
+  object Permalink {
+    val Root = "https://twitter.com/"
+    val Internal = "i"
+    val PhotoSuffix = "/photo/1"
+    val VideoSuffix = "/video/1"
+
+    def apply(screenName: String, tweetId: TweetId, isVideo: Boolean): String =
+      Root +
+        (if (screenName.isEmpty) Internal else screenName) +
+        "/status/" +
+        tweetId +
+        (if (isVideo) VideoSuffix else PhotoSuffix)
+
+    private[this] val PermalinkRegex =
+      """https?://twitter.com/(?:#!/)?\w+/status/(\d+)/(?:photo|video)/\d+""".r
+
+    private[this] def getTweetId(permalink: String): Option[TweetId] =
+      permalink match {
+        case PermalinkRegex(tweetIdStr) =>
+          try {
+            Some(tweetIdStr.toLong)
+          } catch {
+            // Digits too big to fit in a Long
+            case _: NumberFormatException => None
+          }
+        case _ => None
+      }
+
+    def getTweetId(urlEntity: UrlEntity): Option[TweetId] =
+      urlEntity.expanded.flatMap(getTweetId)
+
+    def hasTweetId(permalink: String, tweetId: TweetId): Boolean =
+      getTweetId(permalink).contains(tweetId)
+
+    def hasTweetId(mediaEntity: MediaEntity, tweetId: TweetId): Boolean =
+      hasTweetId(mediaEntity.expandedUrl, tweetId)
+
+    def hasTweetId(urlEntity: UrlEntity, tweetId: TweetId): Boolean =
+      getTweetId(urlEntity).contains(tweetId)
+  }
+
+  /**
+   * Converts a url that starts with "https://" to one that starts with "http://".
+   */
+  def httpsToHttp(url: String): String =
+    url.replace("https://", "http://")
+
+  /**
+   * Gets the last path element from an asset url.  This exists temporarily to support
+   * the now deprecated mediaPath element in MediaEntity.
+   */
+  def mediaPathFromUrl(url: String): String =
+    url.lastIndexOf('/') match {
+      case -1 =>
+        log.error("Invalid media path. Could not find last element: " + url)
+        // Better to return a broken preview URL to the client
+        // than to fail the whole request.
+        ""
+
+      case idx =>
+        url.substring(idx + 1)
+    }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/media/package.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/media/package.scala
@ -0,0 +1,7 @@
+package com.twitter.tweetypie
+
+package object media {
+  type TweetId = Long
+  type UserId = Long
+  type MediaId = Long
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/AddTweetHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/AddTweetHandler.scala
@ -0,0 +1,80 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.stitch.Stitch
+import com.twitter.storage.client.manhattan.kv.ManhattanValue
+import com.twitter.tweetypie.storage.TweetUtils.collectWithRateLimitCheck
+import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet
+import com.twitter.tweetypie.thriftscala.Tweet
+import com.twitter.util.Time
+
+object AddTweetHandler {
+  private[storage] type InternalAddTweet = (
+    Tweet,
+    ManhattanOperations.Insert,
+    Scribe,
+    StatsReceiver,
+    Time
+  ) => Stitch[Unit]
+
+  def apply(
+    insert: ManhattanOperations.Insert,
+    scribe: Scribe,
+    stats: StatsReceiver
+  ): TweetStorageClient.AddTweet =
+    tweet => doAddTweet(tweet, insert, scribe, stats, Time.now)
+
+  def makeRecords(
+    storedTweet: StoredTweet,
+    timestamp: Time
+  ): Seq[TweetManhattanRecord] = {
+    val core = CoreFieldsCodec.fromTweet(storedTweet)
+    val packedCoreFieldsBlob = CoreFieldsCodec.toTFieldBlob(core)
+    val coreRecord =
+      TweetManhattanRecord(
+        TweetKey.coreFieldsKey(storedTweet.id),
+        ManhattanValue(TFieldBlobCodec.toByteBuffer(packedCoreFieldsBlob), Some(timestamp))
+      )
+
+    val otherFieldIds =
+      TweetFields.nonCoreInternalFields ++ TweetFields.getAdditionalFieldIds(storedTweet)
+
+    val otherFields =
+      storedTweet
+        .getFieldBlobs(otherFieldIds)
+        .map {
+          case (fieldId, tFieldBlob) =>
+            TweetManhattanRecord(
+              TweetKey.fieldKey(storedTweet.id, fieldId),
+              ManhattanValue(TFieldBlobCodec.toByteBuffer(tFieldBlob), Some(timestamp))
+            )
+        }
+        .toSeq
+    otherFields :+ coreRecord
+  }
+
+  private[storage] val doAddTweet: InternalAddTweet = (
+    tweet: Tweet,
+    insert: ManhattanOperations.Insert,
+    scribe: Scribe,
+    stats: StatsReceiver,
+    timestamp: Time
+  ) => {
+    assert(tweet.coreData.isDefined, s"Tweet ${tweet.id} is missing coreData: $tweet")
+
+    val storedTweet = StorageConversions.toStoredTweet(tweet)
+    val records = makeRecords(storedTweet, timestamp)
+    val inserts = records.map(insert)
+    val insertsWithRateLimitCheck =
+      Stitch.collect(inserts.map(_.liftToTry)).map(collectWithRateLimitCheck).lowerFromTry
+
+    Stats.updatePerFieldQpsCounters(
+      "addTweet",
+      TweetFields.getAdditionalFieldIds(storedTweet),
+      1,
+      stats
+    )
+
+    insertsWithRateLimitCheck.unit.onSuccess { _ => scribe.logAdded(storedTweet) }
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/BUILD
@ -0,0 +1,47 @@
+scala_library(
+    sources = ["*.scala"],
+    compiler_option_sets = ["fatal_warnings"],
+    platform = "java8",
+    strict_deps = True,
+    tags = [
+        "bazel-compatible",
+        "bazel-incompatible-scaladoc",
+    ],
+    dependencies = [
+        "3rdparty/jvm/com/chuusai:shapeless",
+        "3rdparty/jvm/com/fasterxml/jackson/core:jackson-databind",
+        "3rdparty/jvm/com/fasterxml/jackson/module:jackson-module-scala",
+        "3rdparty/jvm/com/google/guava",
+        "3rdparty/jvm/com/twitter/bijection:core",
+        "3rdparty/jvm/com/twitter/bijection:scrooge",
+        "3rdparty/jvm/com/twitter/bijection:thrift",
+        "3rdparty/jvm/commons-codec",
+        "3rdparty/jvm/org/apache/thrift:libthrift",
+        "diffshow",
+        "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authorization",
+        "finagle/finagle-core/src/main",
+        "finagle/finagle-stats",
+        "finagle/finagle-thriftmux/src/main/scala",
+        "mediaservices/commons/src/main/thrift:thrift-scala",
+        "scrooge/scrooge-serializer/src/main/scala",
+        "tweetypie/servo/repo/src/main/scala",
+        "tweetypie/servo/util",
+        "snowflake:id",
+        "src/thrift/com/twitter/escherbird:media-annotation-structs-scala",
+        "src/thrift/com/twitter/manhattan:internal-scala",
+        "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala",
+        "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala",
+        "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala",
+        "stitch/stitch-core",
+        "storage/clients/manhattan/client/src/main/scala",
+        "tbird-thrift:scala",
+        "tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields",
+        "tweetypie/common/src/scala/com/twitter/tweetypie/client_id",
+        "tweetypie/common/src/scala/com/twitter/tweetypie/util",
+        "tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal:storage_internal-scala",
+        "util-internal/scribe",
+        "util/util-core:scala",
+        "util/util-slf4j-api/src/main/scala/com/twitter/util/logging",
+        "util/util-stats/src/main/scala",
+    ],
+)
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/BounceDeleteHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/BounceDeleteHandler.scala
@ -0,0 +1,20 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.util.Time
+
+object BounceDeleteHandler {
+  def apply(
+    insert: ManhattanOperations.Insert,
+    scribe: Scribe
+  ): TweetStorageClient.BounceDelete =
+    tweetId => {
+      val mhTimestamp = Time.now
+      val bounceDeleteRecord = TweetStateRecord
+        .BounceDeleted(tweetId, mhTimestamp.inMillis)
+        .toTweetMhRecord
+
+      insert(bounceDeleteRecord).onSuccess { _ =>
+        scribe.logRemoved(tweetId, mhTimestamp, isSoftDeleted = true)
+      }
+    }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Codecs.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Codecs.scala
@ -0,0 +1,242 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.bijection.Conversion.asMethod
+import com.twitter.bijection.Injection
+import com.twitter.scrooge.TFieldBlob
+import com.twitter.storage.client.manhattan.kv._
+import com.twitter.tweetypie.storage.Response.FieldResponse
+import com.twitter.tweetypie.storage.Response.FieldResponseCode
+import com.twitter.tweetypie.storage_internal.thriftscala.CoreFields
+import com.twitter.tweetypie.storage_internal.thriftscala.InternalTweet
+import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet
+import java.io.ByteArrayOutputStream
+import java.nio.ByteBuffer
+import org.apache.thrift.protocol.TBinaryProtocol
+import org.apache.thrift.transport.TIOStreamTransport
+import org.apache.thrift.transport.TMemoryInputTransport
+import scala.collection.immutable
+import scala.util.control.NoStackTrace
+
+// NOTE: All field ids and Tweet structure in this file correspond to the StoredTweet struct ONLY
+
+object ByteArrayCodec {
+  def toByteBuffer(byteArray: Array[Byte]): ByteBuffer = byteArray.as[ByteBuffer]
+  def fromByteBuffer(buffer: ByteBuffer): Array[Byte] = buffer.as[Array[Byte]]
+}
+
+object StringCodec {
+  private val string2ByteBuffer = Injection.connect[String, Array[Byte], ByteBuffer]
+  def toByteBuffer(strValue: String): ByteBuffer = string2ByteBuffer(strValue)
+  def fromByteBuffer(buffer: ByteBuffer): String = string2ByteBuffer.invert(buffer).get
+}
+
+/**
+ * Terminology
+ * -----------
+ * Tweet id field             : The field number of 'tweetId' in the 'Tweet' thrift structure (i.e "1")
+ *
+ * First AdditionalField id   : The ID if the first additional field in 'Tweet' thrift structure. All field Ids less than this are
+ *                              considered internal and all the ids greater than or equal to this field id are considered 'Additional fields'.
+ *                              This is set to 100.
+ *
+ * Internal Fields            : Fields with ids [1 to firstAdditionalFieldid) (excluding firstAdditionalFieldId)
+ *
+ * Core fields                : (Subset of Internal fields)- Fields with ids [1 to 8, 19]. These fields are "packed" together and stored
+ *                              under a single key. This key is referred to as "CoreFieldsKey" (see @TweetKeyType.CoreFieldsKey).
+ *                              Note: Actually field 1 is skipped when packing as this field is the tweet id and it need not be
+ *                              explicitly stored since the pkey already contains the tweet Id)
+ *
+ * Root Core field id         : The field id under which the packed core fields are stored in Manhattan. (This is field Id "1")
+ *
+ * Required fields            : (Subset of Core fields) - Fields with ids [1 to 5] that MUST be present on every tweet.
+ *
+ * Additional Fields          : All fields with field ids >= 'firstAdditionalFieldId'
+ *
+ * Compiled Additional fields : (Subset of Additional Fields) - All fields that the storage library knows about
+ *                              (i.e present on the latest storage_internal.thrift that is compiled-in).
+ *
+ * Passthrough fields         : (Subset of Additional Fields) - The fields on storage_internal.thrift that the storage library is NOT aware of
+ *                              These field ids are is obtained looking at the "_passThroughFields" member of the scrooge-generated
+ *                             'Tweet' object.
+ *
+ * coreFieldsIdInInternalTweet: This is the field id of the core fields (the only field) in the Internal Tweet struct
+ */
+object TweetFields {
+  val firstAdditionalFieldId: Short = 100
+  val tweetIdField: Short = 1
+  val geoFieldId: Short = 9
+
+  // The field under which all the core field values are stored (in serialized form).
+  val rootCoreFieldId: Short = 1
+
+  val coreFieldIds: immutable.IndexedSeq[FieldId] = {
+    val quotedTweetFieldId: Short = 19
+    (1 to 8).map(_.toShort) ++ Seq(quotedTweetFieldId)
+  }
+  val requiredFieldIds: immutable.IndexedSeq[FieldId] = (1 to 5).map(_.toShort)
+
+  val coreFieldsIdInInternalTweet: Short = 1
+
+  val compiledAdditionalFieldIds: Seq[FieldId] =
+    StoredTweet.metaData.fields.filter(_.id >= firstAdditionalFieldId).map(_.id)
+  val internalFieldIds: Seq[FieldId] =
+    StoredTweet.metaData.fields.filter(_.id < firstAdditionalFieldId).map(_.id)
+  val nonCoreInternalFields: Seq[FieldId] =
+    (internalFieldIds.toSet -- coreFieldIds.toSet).toSeq
+  def getAdditionalFieldIds(tweet: StoredTweet): Seq[FieldId] =
+    compiledAdditionalFieldIds ++ tweet._passthroughFields.keys.toSeq
+}
+
+/**
+ * Helper object to convert TFieldBlob to ByteBuffer that gets stored in Manhattan.
+ *
+ * The following is the format in which the TFieldBlob gets stored:
+ *    [Version][TField][TFieldBlob]
+ */
+object TFieldBlobCodec {
+  val BinaryProtocolFactory: TBinaryProtocol.Factory = new TBinaryProtocol.Factory()
+  val FormatVersion = 1.0
+
+  def toByteBuffer(tFieldBlob: TFieldBlob): ByteBuffer = {
+    val baos = new ByteArrayOutputStream()
+    val prot = BinaryProtocolFactory.getProtocol(new TIOStreamTransport(baos))
+
+    prot.writeDouble(FormatVersion)
+    prot.writeFieldBegin(tFieldBlob.field)
+    prot.writeBinary(ByteArrayCodec.toByteBuffer(tFieldBlob.data))
+
+    ByteArrayCodec.toByteBuffer(baos.toByteArray)
+  }
+
+  def fromByteBuffer(buffer: ByteBuffer): TFieldBlob = {
+    val byteArray = ByteArrayCodec.fromByteBuffer(buffer)
+    val prot = BinaryProtocolFactory.getProtocol(new TMemoryInputTransport(byteArray))
+
+    val version = prot.readDouble()
+    if (version != FormatVersion) {
+      throw new VersionMismatchError(
+        "Version mismatch in decoding ByteBuffer to TFieldBlob. " +
+          "Actual version: " + version + ". Expected version: " + FormatVersion
+      )
+    }
+
+    val tField = prot.readFieldBegin()
+    val dataBuffer = prot.readBinary()
+    val data = ByteArrayCodec.fromByteBuffer(dataBuffer)
+
+    TFieldBlob(tField, data)
+  }
+}
+
+/**
+ * Helper object to help convert 'CoreFields' object to/from TFieldBlob (and also to construct
+ * 'CoreFields' object from a 'StoredTweet' object)
+ *
+ * More details:
+ * - A subset of fields on the 'StoredTweet' thrift structure (2-8,19) are 'packaged' and stored
+ *   together as a serialized TFieldBlob object under a single key in Manhattan (see TweetKeyCodec
+ *   helper object above for more details).
+ *
+ * - To make the packing/unpacking the fields to/from TFieldBlob object, we created the following
+ *   two helper thrift structures 'CoreFields' and 'InternalTweet'
+ *
+ *       // The field Ids and types here MUST exactly match field Ids on 'StoredTweet' thrift structure.
+ *       struct CoreFields {
+ *          2: optional i64 user_id
+ *          ...
+ *          8: optional i64 contributor_id
+ *          ...
+ *          19: optional StoredQuotedTweet stored_quoted_tweet
+ *
+ *       }
+ *
+ *       // The field id of core fields MUST be "1"
+ *       struct InternalTweet {
+ *         1: CoreFields coreFields
+ *       }
+ *
+ * - Given the above two structures, packing/unpacking fields (2-8,19) on StoredTweet object into a TFieldBlob
+ *   becomes very trivial:
+ *     For packing:
+ *       (i) Copy fields (2-8,19) from StoredTweet object to a new CoreFields object
+ *      (ii) Create a new InternalTweet object with the 'CoreFields' object constructed in step (i) above
+ *     (iii) Extract field "1" as a TFieldBlob from InternalField (by calling the scrooge generated "getFieldBlob(1)"
+ *           function on the InternalField objecton
+ *
+ *     For unpacking:
+ *       (i) Create an empty 'InternalField' object
+ *      (ii) Call scrooge-generated 'setField' by passing the tFieldBlob blob (created by packing steps above)
+ *     (iii) Doing step (ii) above will create a hydrated 'CoreField' object that can be accessed by 'coreFields'
+ *           member of 'InternalTweet' object.
+ */
+object CoreFieldsCodec {
+  val coreFieldIds: Seq[FieldId] = CoreFields.metaData.fields.map(_.id)
+
+  // "Pack" the core fields i.e converts 'CoreFields' object to "packed" tFieldBlob (See description
+  // above for more details)
+  def toTFieldBlob(coreFields: CoreFields): TFieldBlob = {
+    InternalTweet(Some(coreFields)).getFieldBlob(TweetFields.coreFieldsIdInInternalTweet).get
+  }
+
+  // "Unpack" the core fields from a packed TFieldBlob into a CoreFields object (see description above for
+  // more details)
+  def fromTFieldBlob(tFieldBlob: TFieldBlob): CoreFields = {
+    InternalTweet().setField(tFieldBlob).coreFields.get
+  }
+
+  // "Unpack" the core fields from a packed TFieldBlob into a Map of core-fieldId-> TFieldBlob
+  def unpackFields(tFieldBlob: TFieldBlob): Map[Short, TFieldBlob] =
+    fromTFieldBlob(tFieldBlob).getFieldBlobs(coreFieldIds)
+
+  // Create a 'CoreFields' thrift object from 'Tweet' thrift object.
+  def fromTweet(tweet: StoredTweet): CoreFields = {
+    // As mentioned above, the field ids and types on the 'CoreFields' struct exactly match the
+    // corresponding fields on StoredTweet structure. So it is safe to call .getField() on Tweet object and
+    // and pass the returned tFleldBlob a 'setField' on 'CoreFields' object.
+    coreFieldIds.foldLeft(CoreFields()) {
+      case (core, fieldId) =>
+        tweet.getFieldBlob(fieldId) match {
+          case None => core
+          case Some(tFieldBlob) => core.setField(tFieldBlob)
+        }
+    }
+  }
+}
+
+/**
+ * Helper object to convert ManhattanException to FieldResponseCode thrift object
+ */
+object FieldResponseCodeCodec {
+  import FieldResponseCodec.ValueNotFoundException
+
+  def fromManhattanException(mhException: ManhattanException): FieldResponseCode = {
+    mhException match {
+      case _: ValueNotFoundException => FieldResponseCode.ValueNotFound
+      case _: InternalErrorManhattanException => FieldResponseCode.Error
+      case _: InvalidRequestManhattanException => FieldResponseCode.InvalidRequest
+      case _: DeniedManhattanException => FieldResponseCode.Error
+      case _: UnsatisfiableManhattanException => FieldResponseCode.Error
+      case _: TimeoutManhattanException => FieldResponseCode.Timeout
+    }
+  }
+}
+
+/**
+ * Helper object to construct FieldResponse thrift object from an Exception.
+ * This is typically called to convert 'ManhattanException' object to 'FieldResponse' thrift object
+ */
+object FieldResponseCodec {
+  class ValueNotFoundException extends ManhattanException("Value not found!") with NoStackTrace
+  private[storage] val NotFound = new ValueNotFoundException
+
+  def fromThrowable(e: Throwable, additionalMsg: Option[String] = None): FieldResponse = {
+    val (respCode, errMsg) = e match {
+      case mhException: ManhattanException =>
+        (FieldResponseCodeCodec.fromManhattanException(mhException), mhException.getMessage)
+      case _ => (FieldResponseCode.Error, e.getMessage)
+    }
+
+    val respMsg = additionalMsg.map(_ + ". " + errMsg).orElse(Some(errMsg.toString))
+    FieldResponse(respCode, respMsg)
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/DeleteAdditionalFieldsHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/DeleteAdditionalFieldsHandler.scala
@ -0,0 +1,67 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.stitch.Stitch
+import com.twitter.storage.client.manhattan.kv.DeniedManhattanException
+import com.twitter.tweetypie.storage.TweetUtils._
+import com.twitter.util.Throw
+import com.twitter.util.Time
+
+object DeleteAdditionalFieldsHandler {
+  def apply(
+    delete: ManhattanOperations.Delete,
+    stats: StatsReceiver
+  ): TweetStorageClient.DeleteAdditionalFields =
+    (unfilteredTweetIds: Seq[TweetId], additionalFields: Seq[Field]) => {
+      val tweetIds = unfilteredTweetIds.filter(_ > 0)
+      val additionalFieldIds = additionalFields.map(_.id)
+      require(additionalFields.nonEmpty, "Additional fields to delete cannot be empty")
+      require(
+        additionalFieldIds.min >= TweetFields.firstAdditionalFieldId,
+        s"Additional fields $additionalFields must be in additional field range (>= ${TweetFields.firstAdditionalFieldId})"
+      )
+
+      Stats.addWidthStat("deleteAdditionalFields", "tweetIds", tweetIds.size, stats)
+      Stats.addWidthStat(
+        "deleteAdditionalFields",
+        "additionalFieldIds",
+        additionalFieldIds.size,
+        stats
+      )
+      Stats.updatePerFieldQpsCounters(
+        "deleteAdditionalFields",
+        additionalFieldIds,
+        tweetIds.size,
+        stats
+      )
+      val mhTimestamp = Time.now
+
+      val stitches = tweetIds.map { tweetId =>
+        val (fieldIds, mhKeysToDelete) =
+          additionalFieldIds.map { fieldId =>
+            (fieldId, TweetKey.additionalFieldsKey(tweetId, fieldId))
+          }.unzip
+
+        val deletionStitches = mhKeysToDelete.map { mhKeyToDelete =>
+          delete(mhKeyToDelete, Some(mhTimestamp)).liftToTry
+        }
+
+        Stitch.collect(deletionStitches).map { responsesTries =>
+          val wasRateLimited = responsesTries.exists {
+            case Throw(e: DeniedManhattanException) => true
+            case _ => false
+          }
+
+          val resultsPerTweet = fieldIds.zip(responsesTries).toMap
+
+          if (wasRateLimited) {
+            buildTweetOverCapacityResponse("deleteAdditionalFields", tweetId, resultsPerTweet)
+          } else {
+            buildTweetResponse("deleteAdditionalFields", tweetId, resultsPerTweet)
+          }
+        }
+      }
+
+      Stitch.collect(stitches)
+    }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Field.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Field.scala
@ -0,0 +1,41 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.tweetypie.additionalfields.AdditionalFields
+import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet
+import com.twitter.tweetypie.thriftscala.{Tweet => TpTweet}
+
+/**
+ * A field of the stored version of a tweet to read, update, or delete.
+ *
+ * There is not a one-to-one correspondence between the fields ids of
+ * [[com.twitter.tweetypie.thriftscala.Tweet]] and
+ * [[com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet]]. For example, in StoredTweet,
+ * the nsfwUser property is field 11; in Tweet, it is a property of the coreData struct in field 2.
+ * To circumvent the confusion of using one set of field ids or the other, callers use instances of
+ * [[Field]] to reference the part of the object to modify.
+ */
+class Field private[storage] (val id: Short) extends AnyVal {
+  override def toString: String = id.toString
+}
+
+/**
+ * NOTE: Make sure `AllUpdatableCompiledFields` is kept up to date when adding any new field
+ */
+object Field {
+  import AdditionalFields.isAdditionalFieldId
+  val Geo: Field = new Field(StoredTweet.GeoField.id)
+  val HasTakedown: Field = new Field(StoredTweet.HasTakedownField.id)
+  val NsfwUser: Field = new Field(StoredTweet.NsfwUserField.id)
+  val NsfwAdmin: Field = new Field(StoredTweet.NsfwAdminField.id)
+  val TweetypieOnlyTakedownCountryCodes: Field =
+    new Field(TpTweet.TweetypieOnlyTakedownCountryCodesField.id)
+  val TweetypieOnlyTakedownReasons: Field =
+    new Field(TpTweet.TweetypieOnlyTakedownReasonsField.id)
+
+  val AllUpdatableCompiledFields: Set[Field] = Set(Geo, HasTakedown, NsfwUser, NsfwAdmin)
+
+  def additionalField(id: Short): Field = {
+    require(isAdditionalFieldId(id), "field id must be in the additional field range")
+    new Field(id)
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetDeletedTweetsHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetDeletedTweetsHandler.scala
@ -0,0 +1,150 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.stitch.Stitch
+import com.twitter.storage.client.manhattan.kv.DeniedManhattanException
+import com.twitter.tweetypie.storage.Response.TweetResponseCode
+import com.twitter.tweetypie.storage.TweetUtils._
+import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet
+import com.twitter.tweetypie.thriftscala.DeletedTweet
+import scala.util.control.NonFatal
+
+sealed trait DeleteState
+object DeleteState {
+
+  /**
+   * This tweet is deleted but has not been permanently deleted from Manhattan. Tweets in this state
+   * may be undeleted.
+   */
+  case object SoftDeleted extends DeleteState
+
+  /**
+   * This tweet is deleted after being bounced for violating the Twitter Rules but has not been
+   * permanently deleted from Manhattan. Tweets in this state may NOT be undeleted.
+   */
+  case object BounceDeleted extends DeleteState
+
+  /**
+   * This tweet has been permanently deleted from Manhattan.
+   */
+  case object HardDeleted extends DeleteState
+
+  /**
+   * There is no data in Manhattan to distinguish this tweet id from one that never existed.
+   */
+  case object NotFound extends DeleteState
+
+  /**
+   * This tweet exists and is not in a deleted state.
+   */
+  case object NotDeleted extends DeleteState
+}
+
+case class DeletedTweetResponse(
+  tweetId: TweetId,
+  overallResponse: TweetResponseCode,
+  deleteState: DeleteState,
+  tweet: Option[DeletedTweet])
+
+object GetDeletedTweetsHandler {
+  def apply(
+    read: ManhattanOperations.Read,
+    stats: StatsReceiver
+  ): TweetStorageClient.GetDeletedTweets =
+    (unfilteredTweetIds: Seq[TweetId]) => {
+      val tweetIds = unfilteredTweetIds.filter(_ > 0)
+
+      Stats.addWidthStat("getDeletedTweets", "tweetIds", tweetIds.size, stats)
+
+      val stitches = tweetIds.map { tweetId =>
+        read(tweetId)
+          .map { mhRecords =>
+            val storedTweet = buildStoredTweet(tweetId, mhRecords)
+
+            TweetStateRecord.mostRecent(mhRecords) match {
+              case Some(m: TweetStateRecord.SoftDeleted) => softDeleted(m, storedTweet)
+              case Some(m: TweetStateRecord.BounceDeleted) => bounceDeleted(m, storedTweet)
+              case Some(m: TweetStateRecord.HardDeleted) => hardDeleted(m, storedTweet)
+              case _ if storedTweet.getFieldBlobs(expectedFields).isEmpty => notFound(tweetId)
+              case _ => notDeleted(tweetId, storedTweet)
+            }
+          }
+          .handle {
+            case _: DeniedManhattanException =>
+              DeletedTweetResponse(
+                tweetId,
+                TweetResponseCode.OverCapacity,
+                DeleteState.NotFound,
+                None
+              )
+
+            case NonFatal(ex) =>
+              TweetUtils.log.warning(
+                ex,
+                s"Unhandled exception in GetDeletedTweetsHandler for tweetId: $tweetId"
+              )
+              DeletedTweetResponse(tweetId, TweetResponseCode.Failure, DeleteState.NotFound, None)
+          }
+      }
+
+      Stitch.collect(stitches)
+    }
+
+  private def notFound(tweetId: TweetId) =
+    DeletedTweetResponse(
+      tweetId = tweetId,
+      overallResponse = TweetResponseCode.Success,
+      deleteState = DeleteState.NotFound,
+      tweet = None
+    )
+
+  private def softDeleted(record: TweetStateRecord.SoftDeleted, storedTweet: StoredTweet) =
+    DeletedTweetResponse(
+      record.tweetId,
+      TweetResponseCode.Success,
+      DeleteState.SoftDeleted,
+      Some(
+        StorageConversions
+          .toDeletedTweet(storedTweet)
+          .copy(deletedAtMsec = Some(record.createdAt))
+      )
+    )
+
+  private def bounceDeleted(record: TweetStateRecord.BounceDeleted, storedTweet: StoredTweet) =
+    DeletedTweetResponse(
+      record.tweetId,
+      TweetResponseCode.Success,
+      DeleteState.BounceDeleted,
+      Some(
+        StorageConversions
+          .toDeletedTweet(storedTweet)
+          .copy(deletedAtMsec = Some(record.createdAt))
+      )
+    )
+
+  private def hardDeleted(record: TweetStateRecord.HardDeleted, storedTweet: StoredTweet) =
+    DeletedTweetResponse(
+      record.tweetId,
+      TweetResponseCode.Success,
+      DeleteState.HardDeleted,
+      Some(
+        StorageConversions
+          .toDeletedTweet(storedTweet)
+          .copy(
+            hardDeletedAtMsec = Some(record.createdAt),
+            deletedAtMsec = Some(record.deletedAt)
+          )
+      )
+    )
+
+  /**
+   * notDeleted returns a tweet to simplify tweetypie.handler.UndeleteTweetHandler
+   */
+  private def notDeleted(tweetId: TweetId, storedTweet: StoredTweet) =
+    DeletedTweetResponse(
+      tweetId = tweetId,
+      overallResponse = TweetResponseCode.Success,
+      deleteState = DeleteState.NotDeleted,
+      tweet = Some(StorageConversions.toDeletedTweet(storedTweet))
+    )
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetStoredTweetHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetStoredTweetHandler.scala
@ -0,0 +1,126 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.conversions.DurationOps._
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.stitch.Stitch
+import com.twitter.stitch.StitchSeqGroup
+import com.twitter.tweetypie.storage.TweetStorageClient.GetStoredTweet
+import com.twitter.tweetypie.storage.TweetStorageClient.GetStoredTweet.Error
+import com.twitter.tweetypie.storage.TweetStorageClient.GetStoredTweet.Response._
+import com.twitter.tweetypie.storage.TweetUtils._
+import com.twitter.tweetypie.thriftscala.Tweet
+import com.twitter.util.Time
+import com.twitter.util.Try
+import scala.collection.mutable
+
+object GetStoredTweetHandler {
+  private[this] object DeletedState {
+    def unapply(stateRecord: Option[TweetStateRecord]): Option[TweetStateRecord] =
+      stateRecord match {
+        case state @ (Some(_: TweetStateRecord.SoftDeleted) | Some(
+              _: TweetStateRecord.HardDeleted) | Some(_: TweetStateRecord.BounceDeleted)) =>
+          state
+        case _ => None
+      }
+  }
+
+  private[this] def deletedAtMs(stateRecord: Option[TweetStateRecord]): Option[Long] =
+    stateRecord match {
+      case Some(d: TweetStateRecord.SoftDeleted) => Some(d.createdAt)
+      case Some(d: TweetStateRecord.BounceDeleted) => Some(d.createdAt)
+      case Some(d: TweetStateRecord.HardDeleted) => Some(d.deletedAt)
+      case _ => None
+    }
+
+  private[this] def tweetResponseFromRecords(
+    tweetId: TweetId,
+    mhRecords: Seq[TweetManhattanRecord],
+    statsReceiver: StatsReceiver,
+  ): GetStoredTweet.Response = {
+    val errs =
+      mutable.Buffer[Error]()
+
+    val hasStoredTweetFields: Boolean = mhRecords.exists {
+      case TweetManhattanRecord(TweetKey(_, _: TweetKey.LKey.FieldKey), _) => true
+      case _ => false
+    }
+
+    val storedTweet = if (hasStoredTweetFields) {
+      Try(buildStoredTweet(tweetId, mhRecords, includeScrubbed = true))
+        .onFailure(_ => errs.append(Error.TweetIsCorrupt))
+        .toOption
+    } else {
+      None
+    }
+
+    val scrubbedFields: Set[FieldId] = extractScrubbedFields(mhRecords)
+    val tweet: Option[Tweet] = storedTweet.map(StorageConversions.fromStoredTweetAllowInvalid)
+    val stateRecords: Seq[TweetStateRecord] = TweetStateRecord.fromTweetMhRecords(mhRecords)
+    val tweetState: Option[TweetStateRecord] = TweetStateRecord.mostRecent(mhRecords)
+
+    storedTweet.foreach { storedTweet =>
+      val storedExpectedFields = storedTweet.getFieldBlobs(expectedFields)
+      val missingExpectedFields = expectedFields.filterNot(storedExpectedFields.contains)
+      if (missingExpectedFields.nonEmpty || !isValid(storedTweet)) {
+        errs.append(Error.TweetFieldsMissingOrInvalid)
+      }
+
+      val invalidScrubbedFields = storedTweet.getFieldBlobs(scrubbedFields).keys
+      if (invalidScrubbedFields.nonEmpty) {
+        errs.append(Error.ScrubbedFieldsPresent)
+      }
+
+      if (deletedAtMs(tweetState).exists(_ < Time.now.inMilliseconds - 14.days.inMilliseconds)) {
+        errs.append(Error.TweetShouldBeHardDeleted)
+      }
+    }
+
+    val err = Option(errs.toList).filter(_.nonEmpty)
+
+    (tweet, tweetState, err) match {
+      case (None, None, None) =>
+        statsReceiver.counter("not_found").incr()
+        NotFound(tweetId)
+
+      case (None, Some(tweetState: TweetStateRecord.HardDeleted), None) =>
+        statsReceiver.counter("hard_deleted").incr()
+        HardDeleted(tweetId, Some(tweetState), stateRecords, scrubbedFields)
+
+      case (None, _, Some(errs)) =>
+        statsReceiver.counter("failed").incr()
+        Failed(tweetId, tweetState, stateRecords, scrubbedFields, errs)
+
+      case (Some(tweet), _, Some(errs)) =>
+        statsReceiver.counter("found_invalid").incr()
+        FoundWithErrors(tweet, tweetState, stateRecords, scrubbedFields, errs)
+
+      case (Some(tweet), DeletedState(state), None) =>
+        statsReceiver.counter("deleted").incr()
+        FoundDeleted(tweet, Some(state), stateRecords, scrubbedFields)
+
+      case (Some(tweet), _, None) =>
+        statsReceiver.counter("found").incr()
+        Found(tweet, tweetState, stateRecords, scrubbedFields)
+    }
+  }
+
+  def apply(read: ManhattanOperations.Read, statsReceiver: StatsReceiver): GetStoredTweet = {
+
+    object mhGroup extends StitchSeqGroup[TweetId, Seq[TweetManhattanRecord]] {
+      override def run(tweetIds: Seq[TweetId]): Stitch[Seq[Seq[TweetManhattanRecord]]] = {
+        Stats.addWidthStat("getStoredTweet", "tweetIds", tweetIds.size, statsReceiver)
+        Stitch.traverse(tweetIds)(read(_))
+      }
+    }
+
+    tweetId =>
+      if (tweetId <= 0) {
+        Stitch.NotFound
+      } else {
+        Stitch
+          .call(tweetId, mhGroup)
+          .map(mhRecords =>
+            tweetResponseFromRecords(tweetId, mhRecords, statsReceiver.scope("getStoredTweet")))
+      }
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetTweetHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetTweetHandler.scala
@ -0,0 +1,167 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.conversions.DurationOps._
+import com.twitter.finagle.stats.Counter
+import com.twitter.finagle.stats.NullStatsReceiver
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.logging.Logger
+import com.twitter.snowflake.id.SnowflakeId
+import com.twitter.stitch.Stitch
+import com.twitter.stitch.StitchSeqGroup
+import com.twitter.storage.client.manhattan.kv.DeniedManhattanException
+import com.twitter.storage.client.manhattan.kv.ManhattanException
+import com.twitter.tweetypie.storage.TweetStateRecord.BounceDeleted
+import com.twitter.tweetypie.storage.TweetStateRecord.HardDeleted
+import com.twitter.tweetypie.storage.TweetStateRecord.SoftDeleted
+import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet
+import com.twitter.tweetypie.storage.TweetUtils._
+import com.twitter.util.Duration
+import com.twitter.util.Return
+import com.twitter.util.Throw
+import com.twitter.util.Time
+
+object GetTweetHandler {
+  private[this] val logger = Logger(getClass)
+
+  //////////////////////////////////////////////////
+  // Logging racy reads for later validation.
+
+  val RacyTweetWindow: Duration = 10.seconds
+
+  /**
+   * If this read is soon after the tweet was created, then we would usually
+   * expect it to be served from cache. This early read indicates that this
+   * tweet is prone to consistency issues, so we log what's present in
+   * Manhattan at the time of the read for later analysis.
+   */
+  private[this] def logRacyRead(tweetId: TweetId, records: Seq[TweetManhattanRecord]): Unit =
+    if (SnowflakeId.isSnowflakeId(tweetId)) {
+      val tweetAge = Time.now.since(SnowflakeId(tweetId).time)
+      if (tweetAge <= RacyTweetWindow) {
+        val sb = new StringBuilder
+        sb.append("racy_tweet_read\t")
+          .append(tweetId)
+          .append('\t')
+          .append(tweetAge.inMilliseconds) // Log the age for analysis purposes
+        records.foreach { rec =>
+          sb.append('\t')
+            .append(rec.lkey)
+          rec.value.timestamp.foreach { ts =>
+            // If there is a timestamp for this key, log it so that we can tell
+            // later on whether a value should have been present. We expect
+            // keys written in a single write to have the same timestamp, and
+            // generally, keys written in separate writes will have different
+            // timestamps. The timestamp value is optional in Manhattan, but
+            // we expect there to always be a value for the timestamp.
+            sb.append(':')
+              .append(ts.inMilliseconds)
+          }
+        }
+        logger.info(sb.toString)
+      }
+    }
+
+  /**
+   * Convert a set of records from Manhattan into a GetTweet.Response.
+   */
+  def tweetResponseFromRecords(
+    tweetId: TweetId,
+    mhRecords: Seq[TweetManhattanRecord],
+    statsReceiver: StatsReceiver = NullStatsReceiver
+  ): GetTweet.Response =
+    if (mhRecords.isEmpty) {
+      GetTweet.Response.NotFound
+    } else {
+      // If no internal fields are present or no required fields present, we consider the tweet
+      // as not returnable (even if some additional fields are present)
+      def tweetFromRecords(tweetId: TweetId, mhRecords: Seq[TweetManhattanRecord]) = {
+        val storedTweet = buildStoredTweet(tweetId, mhRecords)
+        if (storedTweet.getFieldBlobs(expectedFields).nonEmpty) {
+          if (isValid(storedTweet)) {
+            statsReceiver.counter("valid").incr()
+            Some(StorageConversions.fromStoredTweet(storedTweet))
+          } else {
+            log.info(s"Invalid Tweet Id: $tweetId")
+            statsReceiver.counter("invalid").incr()
+            None
+          }
+        } else {
+          // The Tweet contained none of the fields defined in `expectedFields`
+          log.info(s"Expected Fields Not Present Tweet Id: $tweetId")
+          statsReceiver.counter("expected_fields_not_present").incr()
+          None
+        }
+      }
+
+      val stateRecord = TweetStateRecord.mostRecent(mhRecords)
+      stateRecord match {
+        // some  other cases don't require an attempt to construct a Tweet
+        case Some(_: SoftDeleted) | Some(_: HardDeleted) => GetTweet.Response.Deleted
+
+        // all other cases require an attempt to construct a Tweet, which may not be successful
+        case _ =>
+          logRacyRead(tweetId, mhRecords)
+          (stateRecord, tweetFromRecords(tweetId, mhRecords)) match {
+            // BounceDeleted contains the Tweet data so that callers can access data on the the
+            // tweet (e.g. hard delete daemon requires conversationId and userId. There are no
+            // plans for Tweetypie server to make use of the returned tweet at this time.
+            case (Some(_: BounceDeleted), Some(tweet)) => GetTweet.Response.BounceDeleted(tweet)
+            case (Some(_: BounceDeleted), None) => GetTweet.Response.Deleted
+            case (_, Some(tweet)) => GetTweet.Response.Found(tweet)
+            case _ => GetTweet.Response.NotFound
+          }
+      }
+    }
+
+  def apply(read: ManhattanOperations.Read, statsReceiver: StatsReceiver): GetTweet = {
+
+    object stats {
+      val getTweetScope = statsReceiver.scope("getTweet")
+      val deniedCounter: Counter = getTweetScope.counter("mh_denied")
+      val mhExceptionCounter: Counter = getTweetScope.counter("mh_exception")
+      val nonFatalExceptionCounter: Counter = getTweetScope.counter("non_fatal_exception")
+      val notFoundCounter: Counter = getTweetScope.counter("not_found")
+    }
+
+    object mhGroup extends StitchSeqGroup[TweetId, Seq[TweetManhattanRecord]] {
+      override def run(tweetIds: Seq[TweetId]): Stitch[Seq[Seq[TweetManhattanRecord]]] = {
+        Stats.addWidthStat("getTweet", "tweetIds", tweetIds.size, statsReceiver)
+        Stitch.traverse(tweetIds)(read(_))
+      }
+    }
+
+    tweetId =>
+      if (tweetId <= 0) {
+        Stitch.NotFound
+      } else {
+        Stitch
+          .call(tweetId, mhGroup)
+          .map(mhRecords => tweetResponseFromRecords(tweetId, mhRecords, stats.getTweetScope))
+          .liftToTry
+          .map {
+            case Throw(mhException: DeniedManhattanException) =>
+              stats.deniedCounter.incr()
+              Throw(RateLimited("", mhException))
+
+            // Encountered some other Manhattan error
+            case t @ Throw(_: ManhattanException) =>
+              stats.mhExceptionCounter.incr()
+              t
+
+            // Something else happened
+            case t @ Throw(ex) =>
+              stats.nonFatalExceptionCounter.incr()
+              TweetUtils.log
+                .warning(ex, s"Unhandled exception in GetTweetHandler for tweetId: $tweetId")
+              t
+
+            case r @ Return(GetTweet.Response.NotFound) =>
+              stats.notFoundCounter.incr()
+              r
+
+            case r @ Return(_) => r
+          }
+          .lowerFromTry
+      }
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/HardDeleteTweetHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/HardDeleteTweetHandler.scala
@ -0,0 +1,153 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.stitch.Stitch
+import com.twitter.tweetypie.storage.TweetKey.LKey.ForceAddedStateKey
+import com.twitter.tweetypie.storage.TweetStorageClient.HardDeleteTweet
+import com.twitter.tweetypie.storage.TweetStorageClient.HardDeleteTweet.Response._
+import com.twitter.tweetypie.storage.TweetUtils._
+import com.twitter.util.Return
+import com.twitter.util.Throw
+import com.twitter.util.Time
+import com.twitter.util.Try
+
+object HardDeleteTweetHandler {
+
+  /**
+   * When a tweet is removed lkeys with these prefixes will be deleted permanently.
+   */
+  private[storage] def isKeyToBeDeleted(key: TweetKey): Boolean =
+    key.lKey match {
+      case (TweetKey.LKey.CoreFieldsKey | TweetKey.LKey.InternalFieldsKey(_) |
+          TweetKey.LKey.AdditionalFieldsKey(_) | TweetKey.LKey.SoftDeletionStateKey |
+          TweetKey.LKey.BounceDeletionStateKey | TweetKey.LKey.UnDeletionStateKey |
+          TweetKey.LKey.ForceAddedStateKey) =>
+        true
+      case _ => false
+    }
+
+  /**
+   * When hard deleting, there are two actions, writing the record and
+   * removing the tweet data. If we are performing any action, we will
+   * always try to remove the tweet data. If the tweet does not yet have a
+   * hard deletion record, then we will need to write one. This method
+   * returns the HardDeleted record if it needs to be written, and None
+   * if it has already been written.
+   *
+   * If the tweet is not in a deleted state we signal this with a
+   * Throw(NotDeleted).
+   */
+  private[storage] def getHardDeleteStateRecord(
+    tweetId: TweetId,
+    records: Seq[TweetManhattanRecord],
+    mhTimestamp: Time,
+    stats: StatsReceiver
+  ): Try[Option[TweetStateRecord.HardDeleted]] = {
+    val mostRecent = TweetStateRecord.mostRecent(records)
+    val currentStateStr = mostRecent.map(_.name).getOrElse("no_tweet_state_record")
+    stats.counter(currentStateStr).incr()
+
+    mostRecent match {
+      case Some(
+            record @ (TweetStateRecord.SoftDeleted(_, _) | TweetStateRecord.BounceDeleted(_, _))) =>
+        Return(
+          Some(
+            TweetStateRecord.HardDeleted(
+              tweetId = tweetId,
+              // createdAt is the hard deletion timestamp when dealing with hard deletes in Manhattan
+              createdAt = mhTimestamp.inMillis,
+              // deletedAt is the soft deletion timestamp when dealing with hard deletes in Manhattan
+              deletedAt = record.createdAt
+            )
+          )
+        )
+
+      case Some(_: TweetStateRecord.HardDeleted) =>
+        Return(None)
+
+      case Some(_: TweetStateRecord.ForceAdded) =>
+        Throw(NotDeleted(tweetId, Some(ForceAddedStateKey)))
+
+      case Some(_: TweetStateRecord.Undeleted) =>
+        Throw(NotDeleted(tweetId, Some(TweetKey.LKey.UnDeletionStateKey)))
+
+      case None =>
+        Throw(NotDeleted(tweetId, None))
+    }
+  }
+
+  /**
+   * This handler returns HardDeleteTweet.Response.Deleted if data associated with the tweet is deleted,
+   * either as a result of this request or a previous one.
+   *
+   * The most recently added record determines the tweet's state. This method will only delete data
+   * for tweets in the soft-delete or hard-delete state. (Calling hardDeleteTweet for tweets that have
+   * already been hard-deleted will remove any lkeys that may not have been deleted previously).
+   */
+  def apply(
+    read: ManhattanOperations.Read,
+    insert: ManhattanOperations.Insert,
+    delete: ManhattanOperations.Delete,
+    scribe: Scribe,
+    stats: StatsReceiver
+  ): TweetId => Stitch[HardDeleteTweet.Response] = {
+    val hardDeleteStats = stats.scope("hardDeleteTweet")
+    val hardDeleteTweetCancelled = hardDeleteStats.counter("cancelled")
+    val beforeStateStats = hardDeleteStats.scope("before_state")
+
+    def removeRecords(keys: Seq[TweetKey], mhTimestamp: Time): Stitch[Unit] =
+      Stitch
+        .collect(keys.map(key => delete(key, Some(mhTimestamp)).liftToTry))
+        .map(collectWithRateLimitCheck)
+        .lowerFromTry
+
+    def writeRecord(record: Option[TweetStateRecord.HardDeleted]): Stitch[Unit] =
+      record match {
+        case Some(r) =>
+          insert(r.toTweetMhRecord).onSuccess { _ =>
+            scribe.logRemoved(
+              r.tweetId,
+              Time.fromMilliseconds(r.createdAt),
+              isSoftDeleted = false
+            )
+          }
+        case None => Stitch.Unit
+      }
+
+    tweetId =>
+      read(tweetId)
+        .flatMap { records =>
+          val hardDeletionTimestamp = Time.now
+
+          val keysToBeDeleted: Seq[TweetKey] = records.map(_.key).filter(isKeyToBeDeleted)
+
+          getHardDeleteStateRecord(
+            tweetId,
+            records,
+            hardDeletionTimestamp,
+            beforeStateStats) match {
+            case Return(record) =>
+              Stitch
+                .join(
+                  writeRecord(record),
+                  removeRecords(keysToBeDeleted, hardDeletionTimestamp)
+                ).map(_ =>
+                  // If the tweetId is non-snowflake and has previously been hard deleted
+                  // there will be no coreData record to fall back on to get the tweet
+                  // creation time and createdAtMillis will be None.
+                  Deleted(
+                    // deletedAtMillis: when the tweet was hard deleted
+                    deletedAtMillis = Some(hardDeletionTimestamp.inMillis),
+                    // createdAtMillis: when the tweet itself was created
+                    // (as opposed to when the deletion record was created)
+                    createdAtMillis =
+                      TweetUtils.creationTimeFromTweetIdOrMHRecords(tweetId, records)
+                  ))
+            case Throw(notDeleted: NotDeleted) =>
+              hardDeleteTweetCancelled.incr()
+              Stitch.value(notDeleted)
+            case Throw(e) => Stitch.exception(e) // this should never happen
+          }
+        }
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/InspectFields.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/InspectFields.scala
@ -0,0 +1,228 @@
+package com.twitter.tweetypie.storage
+
+import com.google.common.base.CaseFormat
+import com.twitter.conversions.DurationOps._
+import com.twitter.finagle.mtls.authentication.ServiceIdentifier
+import com.twitter.scrooge.TFieldBlob
+import com.twitter.scrooge.ThriftStructFieldInfo
+import com.twitter.stitch.Stitch
+import com.twitter.storage.client.manhattan.kv._
+import com.twitter.tweetypie.additionalfields.AdditionalFields
+import com.twitter.tweetypie.storage.ManhattanOperations.Read
+import com.twitter.tweetypie.storage.TweetUtils._
+import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet
+import com.twitter.tweetypie.thriftscala.{Tweet => TweetypieTweet}
+import com.twitter.util.Duration
+import com.twitter.util.Future
+import com.twitter.util.Return
+import com.twitter.util.Throw
+import diffshow.Container
+import diffshow.DiffShow
+import diffshow.Expr
+import org.apache.commons.codec.binary.Base64
+import scala.util.Try
+import shapeless.Cached
+import shapeless.Strict
+
+// This class is used by the Tweetypie Console to inspect tweet field content in Manhattan
+class InspectFields(svcIdentifier: ServiceIdentifier) {
+  val mhApplicationId = "tbird_mh"
+  val mhDatasetName = "tbird_mh"
+  val mhDestinationName = "/s/manhattan/cylon.native-thrift"
+  val mhTimeout: Duration = 5000.milliseconds
+
+  val localMhEndpoint: ManhattanKVEndpoint =
+    ManhattanKVEndpointBuilder(
+      ManhattanKVClient(
+        mhApplicationId,
+        mhDestinationName,
+        ManhattanKVClientMtlsParams(svcIdentifier)))
+      .defaultGuarantee(Guarantee.SoftDcReadMyWrites)
+      .defaultMaxTimeout(mhTimeout)
+      .build()
+
+  val readOperation: Read = (new ManhattanOperations(mhDatasetName, localMhEndpoint)).read
+
+  def lookup(tweetId: Long): Future[String] = {
+    val result = readOperation(tweetId).liftToTry.map {
+      case Return(mhRecords) =>
+        prettyPrintManhattanRecords(tweetId, TweetKey.padTweetIdStr(tweetId), mhRecords)
+      case Throw(e) => e.toString
+    }
+
+    Stitch.run(result)
+  }
+
+  def storedTweet(tweetId: Long): Future[StoredTweet] = {
+    val result = readOperation(tweetId).liftToTry.map {
+      case Return(mhRecords) =>
+        buildStoredTweet(tweetId, mhRecords)
+      case Throw(e) =>
+        throw e
+    }
+
+    Stitch.run(result)
+  }
+
+  private[this] def prettyPrintManhattanRecords(
+    tweetId: Long,
+    pkey: String,
+    mhRecords: Seq[TweetManhattanRecord]
+  ): String = {
+    if (mhRecords.isEmpty) {
+      "Not Found"
+    } else {
+      val formattedRecords = getFormattedManhattanRecords(tweetId, mhRecords)
+      val keyFieldWidth = formattedRecords.map(_.key.length).max + 2
+      val fieldNameFieldWidth = formattedRecords.map(_.fieldName.length).max + 2
+
+      val formatString = s"    %-${keyFieldWidth}s %-${fieldNameFieldWidth}s %s"
+
+      val recordsString =
+        formattedRecords
+          .map { record =>
+            val content = record.content.replaceAll("\n", "\n" + formatString.format("", "", ""))
+            formatString.format(record.key, record.fieldName, content)
+          }
+          .mkString("\n")
+
+      "/tbird_mh/" + pkey + "/" + "\n" + recordsString
+    }
+  }
+
+  private[this] def getFormattedManhattanRecords(
+    tweetId: Long,
+    mhRecords: Seq[TweetManhattanRecord]
+  ): Seq[FormattedManhattanRecord] = {
+    val storedTweet = buildStoredTweet(tweetId, mhRecords).copy(updatedAt = None)
+    val tweetypieTweet: Option[TweetypieTweet] =
+      Try(StorageConversions.fromStoredTweet(storedTweet)).toOption
+
+    val blobMap: Map[String, TFieldBlob] = getStoredTweetBlobs(mhRecords).map { blob =>
+      getFieldName(blob.field.id) -> blob
+    }.toMap
+
+    mhRecords
+      .map {
+        case TweetManhattanRecord(fullKey, mhValue) =>
+          FormattedManhattanRecord(
+            key = fullKey.lKey.toString,
+            fieldName = getFieldName(fullKey.lKey),
+            content = prettyPrintManhattanValue(
+              fullKey.lKey,
+              mhValue,
+              storedTweet,
+              tweetypieTweet,
+              tweetId,
+              blobMap
+            )
+          )
+      }
+      .sortBy(_.key.replace("external", "xternal")) // sort by key, with internal first
+  }
+
+  private[this] def getFieldNameFromThrift(
+    fieldId: Short,
+    fieldInfos: List[ThriftStructFieldInfo]
+  ): String =
+    fieldInfos
+      .find(info => info.tfield.id == fieldId)
+      .map(_.tfield.name)
+      .getOrElse("<UNKNOWN FIELD>")
+
+  private[this] def isLkeyScrubbedField(lkey: String): Boolean =
+    lkey.split("/")(1) == "scrubbed_fields"
+
+  private[this] def getFieldName(lkey: TweetKey.LKey): String =
+    lkey match {
+      case fieldKey: TweetKey.LKey.FieldKey => getFieldName(fieldKey.fieldId)
+      case _ => ""
+    }
+
+  private[this] def getFieldName(fieldId: Short): String =
+    if (fieldId == 1) {
+      "core_fields"
+    } else if (AdditionalFields.isAdditionalFieldId(fieldId)) {
+      getFieldNameFromThrift(fieldId, TweetypieTweet.fieldInfos)
+    } else {
+      getFieldNameFromThrift(fieldId, StoredTweet.fieldInfos)
+    }
+
+  private[this] def prettyPrintManhattanValue(
+    lkey: TweetKey.LKey,
+    mhValue: TweetManhattanValue,
+    storedTweet: StoredTweet,
+    tweetypieTweet: Option[TweetypieTweet],
+    tweetId: Long,
+    tfieldBlobs: Map[String, TFieldBlob]
+  ): String = {
+    val decoded = lkey match {
+      case _: TweetKey.LKey.MetadataKey =>
+        decodeMetadata(mhValue)
+
+      case fieldKey: TweetKey.LKey.FieldKey =>
+        tfieldBlobs
+          .get(getFieldName(fieldKey.fieldId))
+          .map(blob => decodeField(tweetId, blob, storedTweet, tweetypieTweet))
+
+      case _ =>
+        None
+    }
+
+    decoded.getOrElse { // If all else fails, encode the data as a base64 string
+      val contents = mhValue.contents.array
+      if (contents.isEmpty) {
+        "<NO DATA>"
+      } else {
+        Base64.encodeBase64String(contents)
+      }
+    }
+  }
+
+  private[this] def decodeMetadata(mhValue: TweetManhattanValue): Option[String] = {
+    val byteArray = ByteArrayCodec.fromByteBuffer(mhValue.contents)
+    Try(Json.decode(byteArray).toString).toOption
+  }
+
+  private[this] def decodeField(
+    tweetId: Long,
+    blob: TFieldBlob,
+    storedTweet: StoredTweet,
+    tweetypieTweet: Option[TweetypieTweet]
+  ): String = {
+    val fieldId = blob.field.id
+
+    if (fieldId == 1) {
+      coreFields(storedTweet)
+    } else if (AdditionalFields.isAdditionalFieldId(fieldId)) {
+      decodeTweetWithOneField(TweetypieTweet(tweetId).setField(blob))
+    } else {
+      decodeTweetWithOneField(StoredTweet(tweetId).setField(blob))
+    }
+  }
+
+  // Takes a Tweet or StoredTweet with a single field set and returns the value of that field
+  private[this] def decodeTweetWithOneField[T](
+    tweetWithOneField: T
+  )(
+    implicit ev: Cached[Strict[DiffShow[T]]]
+  ): String = {
+    val config = diffshow.Config(hideFieldWithEmptyVal = true)
+    val tree: Expr = config.transform(DiffShow.show(tweetWithOneField))
+
+    // matches a Tweet or StoredTweet with two values, the first being the id
+    val value = tree.transform {
+      case Container(_, List(diffshow.Field("id", _), diffshow.Field(_, value))) => value
+    }
+
+    config.exprPrinter.apply(value, width = 80).render
+  }
+
+  private[this] def coreFields(storedTweet: StoredTweet): String =
+    diffshow.show(CoreFieldsCodec.fromTweet(storedTweet), hideFieldWithEmptyVal = true)
+
+  private[this] def toCamelCase(s: String): String =
+    CaseFormat.LOWER_UNDERSCORE.to(CaseFormat.LOWER_CAMEL, s)
+}
+
+case class FormattedManhattanRecord(key: String, fieldName: String, content: String)
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Json.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Json.scala
@ -0,0 +1,17 @@
+package com.twitter.tweetypie.storage
+
+import com.fasterxml.jackson.databind.ObjectMapper
+import com.fasterxml.jackson.module.scala.DefaultScalaModule
+
+object Json {
+  val TimestampKey = "timestamp"
+  val SoftDeleteTimestampKey = "softdelete_timestamp"
+
+  private val mapper = new ObjectMapper
+  mapper.registerModule(DefaultScalaModule)
+
+  def encode(m: Map[String, Any]): Array[Byte] = mapper.writeValueAsBytes(m)
+
+  def decode(arr: Array[Byte]): Map[String, Any] =
+    mapper.readValue[Map[String, Any]](arr, classOf[Map[String, Any]])
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanOperations.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanOperations.scala
@ -0,0 +1,103 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.bijection.Injection
+import com.twitter.io.Buf
+import com.twitter.stitch.Stitch
+import com.twitter.storage.client.manhattan.bijections.Bijections.BufInjection
+import com.twitter.storage.client.manhattan.kv.ManhattanKVEndpoint
+import com.twitter.storage.client.manhattan.kv.impl.DescriptorP1L1
+import com.twitter.storage.client.manhattan.kv.impl.Component
+import com.twitter.storage.client.manhattan.kv.{impl => mh}
+import com.twitter.storage.client.manhattan.bijections.Bijections.StringInjection
+import com.twitter.util.Time
+import java.nio.ByteBuffer
+import scala.util.control.NonFatal
+
+case class TweetManhattanRecord(key: TweetKey, value: TweetManhattanValue) {
+  def pkey: TweetId = key.tweetId
+  def lkey: TweetKey.LKey = key.lKey
+
+  /**
+   * Produces a representation that is human-readable, but contains
+   * all of the information from the record. It is not intended for
+   * producing machine-readable values.
+   *
+   * This conversion is relatively expensive, so beware of using it in
+   * hot code paths.
+   */
+  override def toString: String = {
+    val valueString =
+      try {
+        key.lKey match {
+          case _: TweetKey.LKey.MetadataKey =>
+            StringCodec.fromByteBuffer(value.contents)
+
+          case _: TweetKey.LKey.FieldKey =>
+            val tFieldBlob = TFieldBlobCodec.fromByteBuffer(value.contents)
+            s"TFieldBlob(${tFieldBlob.field}, 0x${Buf.slowHexString(tFieldBlob.content)})"
+
+          case TweetKey.LKey.Unknown(_) =>
+            "0x" + Buf.slowHexString(Buf.ByteBuffer.Shared(value.contents))
+        }
+      } catch {
+        case NonFatal(e) =>
+          val hexValue = Buf.slowHexString(Buf.ByteBuffer.Shared(value.contents))
+          s"0x$hexValue (failed to decode due to $e)"
+      }
+
+    s"$key => ${value.copy(contents = valueString)}"
+  }
+}
+
+object ManhattanOperations {
+  type Read = TweetId => Stitch[Seq[TweetManhattanRecord]]
+  type Insert = TweetManhattanRecord => Stitch[Unit]
+  type Delete = (TweetKey, Option[Time]) => Stitch[Unit]
+  type DeleteRange = TweetId => Stitch[Unit]
+
+  object PkeyInjection extends Injection[TweetId, String] {
+    override def apply(tweetId: TweetId): String = TweetKey.padTweetIdStr(tweetId)
+    override def invert(str: String): scala.util.Try[TweetId] = scala.util.Try(str.toLong)
+  }
+
+  case class InvalidLkey(lkeyStr: String) extends Exception
+
+  object LkeyInjection extends Injection[TweetKey.LKey, String] {
+    override def apply(lkey: TweetKey.LKey): String = lkey.toString
+    override def invert(str: String): scala.util.Try[TweetKey.LKey] =
+      scala.util.Success(TweetKey.LKey.fromString(str))
+  }
+
+  val KeyDescriptor: DescriptorP1L1.EmptyKey[TweetId, TweetKey.LKey] =
+    mh.KeyDescriptor(
+      Component(PkeyInjection.andThen(StringInjection)),
+      Component(LkeyInjection.andThen(StringInjection))
+    )
+
+  val ValueDescriptor: mh.ValueDescriptor.EmptyValue[ByteBuffer] = mh.ValueDescriptor(BufInjection)
+}
+
+class ManhattanOperations(dataset: String, mhEndpoint: ManhattanKVEndpoint) {
+  import ManhattanOperations._
+
+  private[this] def pkey(tweetId: TweetId) = KeyDescriptor.withDataset(dataset).withPkey(tweetId)
+
+  def read: Read = { tweetId =>
+    mhEndpoint.slice(pkey(tweetId).under(), ValueDescriptor).map { mhData =>
+      mhData.map {
+        case (key, value) => TweetManhattanRecord(TweetKey(key.pkey, key.lkey), value)
+      }
+    }
+  }
+
+  def insert: Insert =
+    record => {
+      val mhKey = pkey(record.key.tweetId).withLkey(record.key.lKey)
+      mhEndpoint.insert(mhKey, ValueDescriptor.withValue(record.value))
+    }
+
+  def delete: Delete = (key, time) => mhEndpoint.delete(pkey(key.tweetId).withLkey(key.lKey), time)
+
+  def deleteRange: DeleteRange =
+    tweetId => mhEndpoint.deleteRange(KeyDescriptor.withDataset(dataset).withPkey(tweetId).under())
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanTweetStorageClient.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanTweetStorageClient.scala
@ -0,0 +1,451 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.conversions.DurationOps._
+import com.twitter.finagle.mtls.authentication.EmptyServiceIdentifier
+import com.twitter.finagle.mtls.authentication.ServiceIdentifier
+import com.twitter.finagle.ssl.OpportunisticTls
+import com.twitter.finagle.stats.NullStatsReceiver
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.logging.BareFormatter
+import com.twitter.logging.Level
+import com.twitter.logging.ScribeHandler
+import com.twitter.logging._
+import com.twitter.stitch.Stitch
+import com.twitter.storage.client.manhattan.bijections.Bijections._
+import com.twitter.storage.client.manhattan.kv._
+import com.twitter.storage.client.manhattan.kv.impl.ValueDescriptor
+import com.twitter.tweetypie.client_id.ClientIdHelper
+import com.twitter.tweetypie.storage.Scribe.ScribeHandlerFactory
+import com.twitter.tweetypie.storage.TweetStorageClient.BounceDelete
+import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet
+import com.twitter.tweetypie.storage.TweetStorageClient.HardDeleteTweet
+import com.twitter.tweetypie.thriftscala.Tweet
+import com.twitter.tweetypie.util.StitchUtils
+import com.twitter.util.Duration
+import com.twitter.util.Return
+import com.twitter.util.Throw
+import scala.util.Random
+
+object ManhattanTweetStorageClient {
+  object Config {
+
+    /**
+     * The Manhattan dataset where tweets are stored is not externally
+     * configurable because writing tweets to a non-production dataset
+     * requires great care. Staging instances using a different dataset will
+     * write tweets to a non-production store, but will publish events, log to
+     * HDFS, and cache data referencing tweets in that store which are not
+     * accessible by the rest of the production cluster.
+     *
+     * In a completely isolated environment it should be safe to write to
+     * other datasets for testing purposes.
+     */
+    val Dataset = "tbird_mh"
+
+    /**
+     * Once a tweet has been deleted it can only be undeleted within this time
+     * window, after which [[UndeleteHandler]] will return an error on
+     * undelete attempts.
+     */
+    val UndeleteWindowHours = 240
+
+    /**
+     * Default label used for underlying Manhattan Thrift client metrics
+     *
+     * The finagle client metrics will be exported at clnt/:label.
+     */
+    val ThriftClientLabel = "mh_cylon"
+
+    /**
+     * Return the corresponding Wily path for the Cylon cluster in the "other" DC
+     */
+    def remoteDestination(zone: String): String =
+      s"/srv#/prod/${remoteZone(zone)}/manhattan/cylon.native-thrift"
+
+    private def remoteZone(zone: String) = zone match {
+      case "pdxa" => "atla"
+      case "atla" | "localhost" => "pdxa"
+      case _ =>
+        throw new IllegalArgumentException(s"Cannot configure remote DC for unknown zone '$zone'")
+    }
+  }
+
+  /**
+   * @param applicationId Manhattan application id used for quota accounting
+   * @param localDestination Wily path to local Manhattan cluster
+   * @param localTimeout Overall timeout (including retries) for all reads/writes to local cluster
+   * @param remoteDestination Wily path to remote Manhattan cluster, used for undelete and force add
+   * @param remoteTimeout Overall timeout (including retries) for all reads/writes to remote cluster
+   * @param undeleteWindowHours Amount of time during which a deleted tweet can be undeleted
+   * @param thriftClientLabel Label used to scope stats for Manhattan Thrift client
+   * @param maxRequestsPerBatch Configure the Stitch RequestGroup.Generator batch size
+   * @param serviceIdentifier The ServiceIdentifier to use when making connections to a Manhattan cluster
+   * @param opportunisticTlsLevel The level to use for opportunistic TLS for connections to the Manhattan cluster
+   */
+  case class Config(
+    applicationId: String,
+    localDestination: String,
+    localTimeout: Duration,
+    remoteDestination: String,
+    remoteTimeout: Duration,
+    undeleteWindowHours: Int = Config.UndeleteWindowHours,
+    thriftClientLabel: String = Config.ThriftClientLabel,
+    maxRequestsPerBatch: Int = Int.MaxValue,
+    serviceIdentifier: ServiceIdentifier,
+    opportunisticTlsLevel: OpportunisticTls.Level)
+
+  /**
+   * Sanitizes the input for APIs which take in a (Tweet, Seq[Field]) as input.
+   *
+   * NOTE: This function only applies sanity checks which are common to
+   * all APIs which take in a (Tweet, Seq[Field]) as input. API specific
+   * checks are not covered here.
+   *
+   * @param apiStitch the backing API call
+   * @tparam T the output type of the backing API call
+   * @return a stitch function which does some basic input sanity checking
+   */
+  private[storage] def sanitizeTweetFields[T](
+    apiStitch: (Tweet, Seq[Field]) => Stitch[T]
+  ): (Tweet, Seq[Field]) => Stitch[T] =
+    (tweet, fields) => {
+      require(fields.forall(_.id > 0), s"Field ids ${fields} are not positive numbers")
+      apiStitch(tweet, fields)
+    }
+
+  // Returns a handler that asynchronously logs messages to Scribe using the BareFormatter which
+  // logs just the message without any additional metadata
+  def scribeHandler(categoryName: String): HandlerFactory =
+    ScribeHandler(
+      formatter = BareFormatter,
+      maxMessagesPerTransaction = 100,
+      category = categoryName,
+      level = Some(Level.TRACE)
+    )
+
+  /**
+   * A Config appropriate for interactive sessions and scripts.
+   */
+  def develConfig(): Config =
+    Config(
+      applicationId = Option(System.getenv("USER")).getOrElse("<unknown>") + ".devel",
+      localDestination = "/s/manhattan/cylon.native-thrift",
+      localTimeout = 10.seconds,
+      remoteDestination = "/s/manhattan/cylon.native-thrift",
+      remoteTimeout = 10.seconds,
+      undeleteWindowHours = Config.UndeleteWindowHours,
+      thriftClientLabel = Config.ThriftClientLabel,
+      maxRequestsPerBatch = Int.MaxValue,
+      serviceIdentifier = ServiceIdentifier(System.getenv("USER"), "tweetypie", "devel", "local"),
+      opportunisticTlsLevel = OpportunisticTls.Required
+    )
+
+  /**
+   * Build a Manhattan tweet storage client for use in interactive
+   * sessions and scripts.
+   */
+  def devel(): TweetStorageClient =
+    new ManhattanTweetStorageClient(
+      develConfig(),
+      NullStatsReceiver,
+      ClientIdHelper.default,
+    )
+}
+
+class ManhattanTweetStorageClient(
+  config: ManhattanTweetStorageClient.Config,
+  statsReceiver: StatsReceiver,
+  private val clientIdHelper: ClientIdHelper)
+    extends TweetStorageClient {
+  import ManhattanTweetStorageClient._
+
+  lazy val scribeHandlerFactory: ScribeHandlerFactory = scribeHandler _
+  val scribe: Scribe = new Scribe(scribeHandlerFactory, statsReceiver)
+
+  def mkClient(
+    dest: String,
+    label: String
+  ): ManhattanKVClient = {
+    val mhMtlsParams =
+      if (config.serviceIdentifier == EmptyServiceIdentifier) NoMtlsParams
+      else
+        ManhattanKVClientMtlsParams(
+          serviceIdentifier = config.serviceIdentifier,
+          opportunisticTls = config.opportunisticTlsLevel
+        )
+
+    new ManhattanKVClient(
+      config.applicationId,
+      dest,
+      mhMtlsParams,
+      label,
+      Seq(Experiments.ApertureLoadBalancer))
+  }
+
+  val localClient: ManhattanKVClient = mkClient(config.localDestination, config.thriftClientLabel)
+
+  val localMhEndpoint: ManhattanKVEndpoint = ManhattanKVEndpointBuilder(localClient)
+    .defaultGuarantee(Guarantee.SoftDcReadMyWrites)
+    .defaultMaxTimeout(config.localTimeout)
+    .maxRequestsPerBatch(config.maxRequestsPerBatch)
+    .build()
+
+  val localManhattanOperations = new ManhattanOperations(Config.Dataset, localMhEndpoint)
+
+  val remoteClient: ManhattanKVClient =
+    mkClient(config.remoteDestination, s"${config.thriftClientLabel}_remote")
+
+  val remoteMhEndpoint: ManhattanKVEndpoint = ManhattanKVEndpointBuilder(remoteClient)
+    .defaultGuarantee(Guarantee.SoftDcReadMyWrites)
+    .defaultMaxTimeout(config.remoteTimeout)
+    .build()
+
+  val remoteManhattanOperations = new ManhattanOperations(Config.Dataset, remoteMhEndpoint)
+
+  /**
+   * Note: This translation is only useful for non-batch endpoints. Batch endpoints currently
+   * represent failure without propagating an exception
+   * (e.g. [[com.twitter.tweetypie.storage.Response.TweetResponseCode.Failure]]).
+   */
+  private[this] def translateExceptions(
+    apiName: String,
+    statsReceiver: StatsReceiver
+  ): PartialFunction[Throwable, Throwable] = {
+    case e: IllegalArgumentException => ClientError(e.getMessage, e)
+    case e: DeniedManhattanException => RateLimited(e.getMessage, e)
+    case e: VersionMismatchError =>
+      statsReceiver.scope(apiName).counter("mh_version_mismatches").incr()
+      e
+    case e: InternalError =>
+      TweetUtils.log.error(e, s"Error processing $apiName request: ${e.getMessage}")
+      e
+  }
+
+  /**
+   * Count requests per client id producing metrics of the form
+   * .../clients/:root_client_id/requests
+   */
+  def observeClientId[A, B](
+    apiStitch: A => Stitch[B],
+    statsReceiver: StatsReceiver,
+    clientIdHelper: ClientIdHelper,
+  ): A => Stitch[B] = {
+    val clients = statsReceiver.scope("clients")
+
+    val incrementClientRequests = { args: A =>
+      val clientId = clientIdHelper.effectiveClientIdRoot.getOrElse(ClientIdHelper.UnknownClientId)
+      clients.counter(clientId, "requests").incr
+    }
+
+    a => {
+      incrementClientRequests(a)
+      apiStitch(a)
+    }
+  }
+
+  /**
+   * Increment counters based on the overall response status of the returned [[GetTweet.Response]].
+   */
+  def observeGetTweetResponseCode[A](
+    apiStitch: A => Stitch[GetTweet.Response],
+    statsReceiver: StatsReceiver
+  ): A => Stitch[GetTweet.Response] = {
+    val scope = statsReceiver.scope("response_code")
+
+    val success = scope.counter("success")
+    val notFound = scope.counter("not_found")
+    val failure = scope.counter("failure")
+    val overCapacity = scope.counter("over_capacity")
+    val deleted = scope.counter("deleted")
+    val bounceDeleted = scope.counter("bounce_deleted")
+
+    a =>
+      apiStitch(a).respond {
+        case Return(_: GetTweet.Response.Found) => success.incr()
+        case Return(GetTweet.Response.NotFound) => notFound.incr()
+        case Return(_: GetTweet.Response.BounceDeleted) => bounceDeleted.incr()
+        case Return(GetTweet.Response.Deleted) => deleted.incr()
+        case Throw(_: RateLimited) => overCapacity.incr()
+        case Throw(_) => failure.incr()
+      }
+  }
+
+  /**
+   * We do 3 things here:
+   *
+   * - Bookkeeping for overall requests
+   * - Bookkeeping for per api requests
+   * - Translate exceptions
+   *
+   * @param apiName the API being called
+   * @param apiStitch the implementation of the API
+   * @tparam A template for input type of API
+   * @tparam B template for output type of API
+   * @return Function which executes the given API call
+   */
+  private[storage] def endpoint[A, B](
+    apiName: String,
+    apiStitch: A => Stitch[B]
+  ): A => Stitch[B] = {
+    val translateException = translateExceptions(apiName, statsReceiver)
+    val observe = StitchUtils.observe[B](statsReceiver, apiName)
+
+    a =>
+      StitchUtils.translateExceptions(
+        observe(apiStitch(a)),
+        translateException
+      )
+  }
+
+  private[storage] def endpoint2[A, B, C](
+    apiName: String,
+    apiStitch: (A, B) => Stitch[C],
+    clientIdHelper: ClientIdHelper,
+  ): (A, B) => Stitch[C] =
+    Function.untupled(endpoint(apiName, apiStitch.tupled))
+
+  val getTweet: TweetStorageClient.GetTweet = {
+    val stats = statsReceiver.scope("getTweet")
+
+    observeClientId(
+      observeGetTweetResponseCode(
+        endpoint(
+          "getTweet",
+          GetTweetHandler(
+            read = localManhattanOperations.read,
+            statsReceiver = stats,
+          )
+        ),
+        stats,
+      ),
+      stats,
+      clientIdHelper,
+    )
+  }
+
+  val getStoredTweet: TweetStorageClient.GetStoredTweet = {
+    val stats = statsReceiver.scope("getStoredTweet")
+
+    observeClientId(
+      endpoint(
+        "getStoredTweet",
+        GetStoredTweetHandler(
+          read = localManhattanOperations.read,
+          statsReceiver = stats,
+        )
+      ),
+      stats,
+      clientIdHelper,
+    )
+  }
+
+  val addTweet: TweetStorageClient.AddTweet =
+    endpoint(
+      "addTweet",
+      AddTweetHandler(
+        insert = localManhattanOperations.insert,
+        scribe = scribe,
+        stats = statsReceiver
+      )
+    )
+
+  val updateTweet: TweetStorageClient.UpdateTweet =
+    endpoint2(
+      "updateTweet",
+      ManhattanTweetStorageClient.sanitizeTweetFields(
+        UpdateTweetHandler(
+          insert = localManhattanOperations.insert,
+          stats = statsReceiver,
+        )
+      ),
+      clientIdHelper,
+    )
+
+  val softDelete: TweetStorageClient.SoftDelete =
+    endpoint(
+      "softDelete",
+      SoftDeleteHandler(
+        insert = localManhattanOperations.insert,
+        scribe = scribe
+      )
+    )
+
+  val bounceDelete: BounceDelete =
+    endpoint(
+      "bounceDelete",
+      BounceDeleteHandler(
+        insert = localManhattanOperations.insert,
+        scribe = scribe
+      )
+    )
+
+  val undelete: TweetStorageClient.Undelete =
+    endpoint(
+      "undelete",
+      UndeleteHandler(
+        read = localManhattanOperations.read,
+        localInsert = localManhattanOperations.insert,
+        remoteInsert = remoteManhattanOperations.insert,
+        delete = localManhattanOperations.delete,
+        undeleteWindowHours = config.undeleteWindowHours,
+        stats = statsReceiver
+      )
+    )
+
+  val getDeletedTweets: TweetStorageClient.GetDeletedTweets =
+    endpoint(
+      "getDeletedTweets",
+      GetDeletedTweetsHandler(
+        read = localManhattanOperations.read,
+        stats = statsReceiver
+      )
+    )
+
+  val deleteAdditionalFields: TweetStorageClient.DeleteAdditionalFields =
+    endpoint2(
+      "deleteAdditionalFields",
+      DeleteAdditionalFieldsHandler(
+        delete = localManhattanOperations.delete,
+        stats = statsReceiver,
+      ),
+      clientIdHelper,
+    )
+
+  val scrub: TweetStorageClient.Scrub =
+    endpoint2(
+      "scrub",
+      ScrubHandler(
+        insert = localManhattanOperations.insert,
+        delete = localManhattanOperations.delete,
+        scribe = scribe,
+        stats = statsReceiver,
+      ),
+      clientIdHelper,
+    )
+
+  val hardDeleteTweet: HardDeleteTweet =
+    endpoint(
+      "hardDeleteTweet",
+      HardDeleteTweetHandler(
+        read = localManhattanOperations.read,
+        insert = localManhattanOperations.insert,
+        delete = localManhattanOperations.delete,
+        scribe = scribe,
+        stats = statsReceiver
+      )
+    )
+
+  val ping: TweetStorageClient.Ping =
+    () =>
+      Stitch
+        .run(
+          localMhEndpoint
+            .get(
+              ManhattanOperations.KeyDescriptor
+                .withDataset(Config.Dataset)
+                .withPkey(Random.nextLong().abs)
+                .withLkey(TweetKey.LKey.CoreFieldsKey), // could be any lkey
+              ValueDescriptor(BufInjection)
+            ).unit
+        )
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Response.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Response.scala
@ -0,0 +1,30 @@
+package com.twitter.tweetypie.storage
+
+object Response {
+  case class TweetResponse(
+    tweetId: Long,
+    overallResponse: TweetResponseCode,
+    additionalFieldResponses: Option[Map[Short, FieldResponse]] = None)
+
+  sealed trait TweetResponseCode
+
+  object TweetResponseCode {
+    object Success extends TweetResponseCode
+    object Partial extends TweetResponseCode
+    object Failure extends TweetResponseCode
+    object OverCapacity extends TweetResponseCode
+    object Deleted extends TweetResponseCode
+  }
+
+  case class FieldResponse(code: FieldResponseCode, message: Option[String] = None)
+
+  sealed trait FieldResponseCode
+
+  object FieldResponseCode {
+    object Success extends FieldResponseCode
+    object InvalidRequest extends FieldResponseCode
+    object ValueNotFound extends FieldResponseCode
+    object Timeout extends FieldResponseCode
+    object Error extends FieldResponseCode
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Scribe.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Scribe.scala
@ -0,0 +1,85 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.servo.util.FutureEffect
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.logging._
+import com.twitter.scrooge.BinaryThriftStructSerializer
+import com.twitter.servo.util.{Scribe => ServoScribe}
+import com.twitter.tweetypie.storage_internal.thriftscala._
+import com.twitter.tbird.thriftscala.Added
+import com.twitter.tbird.thriftscala.Removed
+import com.twitter.tbird.thriftscala.Scrubbed
+import com.twitter.util.Time
+
+/**
+ * Scribe is used to log tweet writes which are used to generate /tables/statuses in HDFS.
+ *
+ * Write   Scribe Category      Message
+ * -----   ---------------      -------
+ * add     tbird_add_status     [[com.twitter.tbird.thriftscala.Added]]
+ * remove  tbird_remove_status  [[com.twitter.tbird.thriftscala.Removed]]
+ * scrub   tbird_scrub_status   [[com.twitter.tbird.thriftscala.Scrubbed]]
+ *
+ * The thrift representation is encoded using binary thrift protocol format, followed by base64
+ * encoding and converted to string using default character set (utf8). The logger uses BareFormatter.
+ *
+ * The thrift ops are scribed only after the write API call has succeeded.
+ *
+ * The class is thread safe except initial configuration and registration routines,
+ * and no exception is expected unless java heap is out of memory.
+ *
+ * If exception does get thrown, add/remove/scrub operations will fail and
+ * client will have to retry
+ */
+class Scribe(factory: Scribe.ScribeHandlerFactory, statsReceiver: StatsReceiver) {
+  import Scribe._
+
+  private val AddedSerializer = BinaryThriftStructSerializer(Added)
+  private val RemovedSerializer = BinaryThriftStructSerializer(Removed)
+  private val ScrubbedSerializer = BinaryThriftStructSerializer(Scrubbed)
+
+  private val addCounter = statsReceiver.counter("scribe/add/count")
+  private val removeCounter = statsReceiver.counter("scribe/remove/count")
+  private val scrubCounter = statsReceiver.counter("scribe/scrub/count")
+
+  val addHandler: FutureEffect[String] = ServoScribe(factory(scribeAddedCategory)())
+  val removeHandler: FutureEffect[String] = ServoScribe(factory(scribeRemovedCategory)())
+  val scrubHandler: FutureEffect[String] = ServoScribe(factory(scribeScrubbedCategory)())
+
+  private def addedToString(tweet: StoredTweet): String =
+    AddedSerializer.toString(
+      Added(StatusConversions.toTBirdStatus(tweet), Time.now.inMilliseconds, Some(false))
+    )
+
+  private def removedToString(id: Long, at: Time, isSoftDeleted: Boolean): String =
+    RemovedSerializer.toString(Removed(id, at.inMilliseconds, Some(isSoftDeleted)))
+
+  private def scrubbedToString(id: Long, cols: Seq[Int], at: Time): String =
+    ScrubbedSerializer.toString(Scrubbed(id, cols, at.inMilliseconds))
+
+  def logAdded(tweet: StoredTweet): Unit = {
+    addHandler(addedToString(tweet))
+    addCounter.incr()
+  }
+
+  def logRemoved(id: Long, at: Time, isSoftDeleted: Boolean): Unit = {
+    removeHandler(removedToString(id, at, isSoftDeleted))
+    removeCounter.incr()
+  }
+
+  def logScrubbed(id: Long, cols: Seq[Int], at: Time): Unit = {
+    scrubHandler(scrubbedToString(id, cols, at))
+    scrubCounter.incr()
+  }
+}
+
+object Scribe {
+  type ScribeHandlerFactory = (String) => HandlerFactory
+
+  /** WARNING: These categories are white-listed. If you are changing them, the new categories should be white-listed.
+   *  You should followup with CoreWorkflows team (CW) for that.
+   */
+  private val scribeAddedCategory = "tbird_add_status"
+  private val scribeRemovedCategory = "tbird_remove_status"
+  private val scribeScrubbedCategory = "tbird_scrub_status"
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ScrubHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ScrubHandler.scala
@ -0,0 +1,71 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.stitch.Stitch
+import com.twitter.storage.client.manhattan.kv.ManhattanValue
+import com.twitter.tweetypie.storage.TweetUtils._
+import com.twitter.util.Time
+
+/**
+ * Deletes data for the scrubbed field and writes a metadata record.
+ * Provides scrub functionality. Right now, we only allow the scrubbing of the geo field.
+ * It should be simple to add more fields to the allowlist if needed.
+ */
+object ScrubHandler {
+
+  val scrubFieldsAllowlist: Set[Field] = Set(Field.Geo)
+
+  def apply(
+    insert: ManhattanOperations.Insert,
+    delete: ManhattanOperations.Delete,
+    scribe: Scribe,
+    stats: StatsReceiver
+  ): TweetStorageClient.Scrub =
+    (unfilteredTweetIds: Seq[TweetId], columns: Seq[Field]) => {
+      val tweetIds = unfilteredTweetIds.filter(_ > 0)
+
+      require(columns.nonEmpty, "Must specify fields to scrub")
+      require(
+        columns.toSet.size == columns.size,
+        s"Duplicate fields to scrub specified: $columns"
+      )
+      require(
+        columns.forall(scrubFieldsAllowlist.contains(_)),
+        s"Cannot scrub $columns; scrubbable fields are restricted to $scrubFieldsAllowlist"
+      )
+
+      Stats.addWidthStat("scrub", "ids", tweetIds.size, stats)
+      val mhTimestamp = Time.now
+
+      val stitches = tweetIds.map { tweetId =>
+        val deletionStitches = columns.map { field =>
+          val mhKeyToDelete = TweetKey.fieldKey(tweetId, field.id)
+          delete(mhKeyToDelete, Some(mhTimestamp)).liftToTry
+        }
+
+        val collectedStitch =
+          Stitch.collect(deletionStitches).map(collectWithRateLimitCheck).lowerFromTry
+
+        collectedStitch
+          .flatMap { _ =>
+            val scrubbedStitches = columns.map { column =>
+              val scrubbedKey = TweetKey.scrubbedFieldKey(tweetId, column.id)
+              val record =
+                TweetManhattanRecord(
+                  scrubbedKey,
+                  ManhattanValue(StringCodec.toByteBuffer(""), Some(mhTimestamp))
+                )
+
+              insert(record).liftToTry
+            }
+
+            Stitch.collect(scrubbedStitches)
+          }
+          .map(collectWithRateLimitCheck)
+      }
+
+      Stitch.collect(stitches).map(collectWithRateLimitCheck).lowerFromTry.onSuccess { _ =>
+        tweetIds.foreach { id => scribe.logScrubbed(id, columns.map(_.id.toInt), mhTimestamp) }
+      }
+    }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/SoftDeleteHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/SoftDeleteHandler.scala
@ -0,0 +1,20 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.util.Time
+
+object SoftDeleteHandler {
+  def apply(
+    insert: ManhattanOperations.Insert,
+    scribe: Scribe
+  ): TweetStorageClient.SoftDelete =
+    tweetId => {
+      val mhTimestamp = Time.now
+      val softDeleteRecord = TweetStateRecord
+        .SoftDeleted(tweetId, mhTimestamp.inMillis)
+        .toTweetMhRecord
+
+      insert(softDeleteRecord).onSuccess { _ =>
+        scribe.logRemoved(tweetId, mhTimestamp, isSoftDeleted = true)
+      }
+    }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Stats.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Stats.scala
@ -0,0 +1,33 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.finagle.stats.StatsReceiver
+
+object Stats {
+  // These two methods below (addWidthStat and updatePerFieldQpsCounters) are called per RPC call for most APIs,
+  // so we rely on the stats receiver that is passed in to the library to do memoization.
+
+  private[storage] def addWidthStat(
+    rpcName: String,
+    paramName: String,
+    width: Int,
+    stats: StatsReceiver
+  ): Unit =
+    getStat(rpcName, paramName, stats).add(width)
+
+  // Updates the counters for each Additional field. The idea here is to expose the QPS for each
+  // additional field
+  private[storage] def updatePerFieldQpsCounters(
+    rpcName: String,
+    fieldIds: Seq[FieldId],
+    count: Int,
+    stats: StatsReceiver
+  ): Unit = {
+    fieldIds.foreach { fieldId => getCounter(rpcName, fieldId, stats).incr(count) }
+  }
+
+  private def getCounter(rpcName: String, fieldId: FieldId, stats: StatsReceiver) =
+    stats.scope(rpcName, "fields", fieldId.toString).counter("count")
+
+  private def getStat(rpcName: String, paramName: String, stats: StatsReceiver) =
+    stats.scope(rpcName, paramName).stat("width")
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StatusConversions.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StatusConversions.scala
@ -0,0 +1,129 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.tweetypie.storage_internal.thriftscala._
+import com.twitter.tbird.{thriftscala => tbird}
+
+object StatusConversions {
+
+  /**
+   * This is used only in Scribe.scala, when scribing to tbird_add_status
+   * Once we remove that, we can also remove this.
+   */
+  def toTBirdStatus(tweet: StoredTweet): tbird.Status =
+    tbird.Status(
+      id = tweet.id,
+      userId = tweet.userId.get,
+      text = tweet.text.get,
+      createdVia = tweet.createdVia.get,
+      createdAtSec = tweet.createdAtSec.get,
+      reply = tweet.reply.map(toTBirdReply),
+      share = tweet.share.map(toTBirdShare),
+      contributorId = tweet.contributorId,
+      geo = tweet.geo.map(toTBirdGeo),
+      hasTakedown = tweet.hasTakedown.getOrElse(false),
+      nsfwUser = tweet.nsfwUser.getOrElse(false),
+      nsfwAdmin = tweet.nsfwAdmin.getOrElse(false),
+      media = tweet.media.map(_.map(toTBirdMedia)).getOrElse(Seq()),
+      narrowcast = tweet.narrowcast.map(toTBirdNarrowcast),
+      nullcast = tweet.nullcast.getOrElse(false),
+      trackingId = tweet.trackingId
+    )
+
+  /**
+   * This is only used in a test, to verify that the above method `toTBirdStatus`
+   * works, so we can't remove it as long as the above method exists.
+   */
+  def fromTBirdStatus(status: tbird.Status): StoredTweet = {
+    StoredTweet(
+      id = status.id,
+      userId = Some(status.userId),
+      text = Some(status.text),
+      createdVia = Some(status.createdVia),
+      createdAtSec = Some(status.createdAtSec),
+      reply = status.reply.map(fromTBirdReply),
+      share = status.share.map(fromTBirdShare),
+      contributorId = status.contributorId,
+      geo = status.geo.map(fromTBirdGeo),
+      hasTakedown = Some(status.hasTakedown),
+      nsfwUser = Some(status.nsfwUser),
+      nsfwAdmin = Some(status.nsfwAdmin),
+      media = Some(status.media.map(fromTBirdMedia)),
+      narrowcast = status.narrowcast.map(fromTBirdNarrowcast),
+      nullcast = Some(status.nullcast),
+      trackingId = status.trackingId
+    )
+  }
+
+  private def fromTBirdReply(reply: tbird.Reply): StoredReply =
+    StoredReply(
+      inReplyToStatusId = reply.inReplyToStatusId,
+      inReplyToUserId = reply.inReplyToUserId
+    )
+
+  private def fromTBirdShare(share: tbird.Share): StoredShare =
+    StoredShare(
+      sourceStatusId = share.sourceStatusId,
+      sourceUserId = share.sourceUserId,
+      parentStatusId = share.parentStatusId
+    )
+
+  private def fromTBirdGeo(geo: tbird.Geo): StoredGeo =
+    StoredGeo(
+      latitude = geo.latitude,
+      longitude = geo.longitude,
+      geoPrecision = geo.geoPrecision,
+      entityId = geo.entityId
+    )
+
+  private def fromTBirdMedia(media: tbird.MediaEntity): StoredMediaEntity =
+    StoredMediaEntity(
+      id = media.id,
+      mediaType = media.mediaType,
+      width = media.width,
+      height = media.height
+    )
+
+  private def fromTBirdNarrowcast(narrowcast: tbird.Narrowcast): StoredNarrowcast =
+    StoredNarrowcast(
+      language = Some(narrowcast.language),
+      location = Some(narrowcast.location),
+      ids = Some(narrowcast.ids)
+    )
+
+  private def toTBirdReply(reply: StoredReply): tbird.Reply =
+    tbird.Reply(
+      inReplyToStatusId = reply.inReplyToStatusId,
+      inReplyToUserId = reply.inReplyToUserId
+    )
+
+  private def toTBirdShare(share: StoredShare): tbird.Share =
+    tbird.Share(
+      sourceStatusId = share.sourceStatusId,
+      sourceUserId = share.sourceUserId,
+      parentStatusId = share.parentStatusId
+    )
+
+  private def toTBirdGeo(geo: StoredGeo): tbird.Geo =
+    tbird.Geo(
+      latitude = geo.latitude,
+      longitude = geo.longitude,
+      geoPrecision = geo.geoPrecision,
+      entityId = geo.entityId,
+      name = geo.name
+    )
+
+  private def toTBirdMedia(media: StoredMediaEntity): tbird.MediaEntity =
+    tbird.MediaEntity(
+      id = media.id,
+      mediaType = media.mediaType,
+      width = media.width,
+      height = media.height
+    )
+
+  private def toTBirdNarrowcast(narrowcast: StoredNarrowcast): tbird.Narrowcast =
+    tbird.Narrowcast(
+      language = narrowcast.language.getOrElse(Nil),
+      location = narrowcast.location.getOrElse(Nil),
+      ids = narrowcast.ids.getOrElse(Nil)
+    )
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StorageConversions.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StorageConversions.scala
@ -0,0 +1,346 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.mediaservices.commons.tweetmedia.thriftscala._
+import com.twitter.scrooge.TFieldBlob
+import com.twitter.tweetypie.additionalfields.AdditionalFields
+import com.twitter.tweetypie.storage_internal.thriftscala._
+import com.twitter.tweetypie.thriftscala._
+import com.twitter.tweetypie.util.TweetLenses
+
+object StorageConversions {
+  private val tbTweetCompiledAdditionalFieldIds =
+    StoredTweet.metaData.fields.map(_.id).filter(AdditionalFields.isAdditionalFieldId)
+
+  def toStoredReply(reply: Reply, conversationId: Option[TweetId]): StoredReply =
+    StoredReply(
+      inReplyToStatusId = reply.inReplyToStatusId.getOrElse(0),
+      inReplyToUserId = reply.inReplyToUserId,
+      conversationId = conversationId
+    )
+
+  def toStoredShare(share: Share): StoredShare =
+    StoredShare(
+      share.sourceStatusId,
+      share.sourceUserId,
+      share.parentStatusId
+    )
+
+  def toStoredQuotedTweet(qt: QuotedTweet, text: String): Option[StoredQuotedTweet] =
+    qt.permalink
+      .filterNot { p =>
+        text.contains(p.shortUrl)
+      } // omit StoredQuotedTweet when url already in text
+      .map { p =>
+        StoredQuotedTweet(
+          qt.tweetId,
+          qt.userId,
+          p.shortUrl
+        )
+      }
+
+  def toStoredGeo(tweet: Tweet): Option[StoredGeo] =
+    TweetLenses.geoCoordinates.get(tweet) match {
+      case None =>
+        TweetLenses.placeId.get(tweet) match {
+          case None => None
+          case Some(placeId) =>
+            Some(
+              StoredGeo(
+                latitude = 0.0,
+                longitude = 0.0,
+                geoPrecision = 0,
+                entityId = 0,
+                name = Some(placeId)
+              )
+            )
+        }
+      case Some(coords) =>
+        Some(
+          StoredGeo(
+            latitude = coords.latitude,
+            longitude = coords.longitude,
+            geoPrecision = coords.geoPrecision,
+            entityId = if (coords.display) 2 else 0,
+            name = TweetLenses.placeId.get(tweet)
+          )
+        )
+    }
+
+  def toStoredMedia(mediaList: Seq[MediaEntity]): Seq[StoredMediaEntity] =
+    mediaList.filter(_.sourceStatusId.isEmpty).flatMap(toStoredMediaEntity)
+
+  def toStoredMediaEntity(media: MediaEntity): Option[StoredMediaEntity] =
+    media.sizes.find(_.sizeType == MediaSizeType.Orig).map { origSize =>
+      StoredMediaEntity(
+        id = media.mediaId,
+        mediaType = origSize.deprecatedContentType.value.toByte,
+        width = origSize.width.toShort,
+        height = origSize.height.toShort
+      )
+    }
+
+  // The language and ids fields are for compatibility with existing tweets stored in manhattan.
+  def toStoredNarrowcast(narrowcast: Narrowcast): StoredNarrowcast =
+    StoredNarrowcast(
+      language = Some(Seq.empty),
+      location = Some(narrowcast.location),
+      ids = Some(Seq.empty)
+    )
+
+  def toStoredAdditionalFields(from: Seq[TFieldBlob], to: StoredTweet): StoredTweet =
+    from.foldLeft(to) { case (t, f) => t.setField(f) }
+
+  def toStoredAdditionalFields(from: Tweet, to: StoredTweet): StoredTweet =
+    toStoredAdditionalFields(AdditionalFields.additionalFields(from), to)
+
+  def toStoredTweet(tweet: Tweet): StoredTweet = {
+    val storedTweet =
+      StoredTweet(
+        id = tweet.id,
+        userId = Some(TweetLenses.userId(tweet)),
+        text = Some(TweetLenses.text(tweet)),
+        createdVia = Some(TweetLenses.createdVia(tweet)),
+        createdAtSec = Some(TweetLenses.createdAt(tweet)),
+        reply =
+          TweetLenses.reply(tweet).map { r => toStoredReply(r, TweetLenses.conversationId(tweet)) },
+        share = TweetLenses.share(tweet).map(toStoredShare),
+        contributorId = tweet.contributor.map(_.userId),
+        geo = toStoredGeo(tweet),
+        hasTakedown = Some(TweetLenses.hasTakedown(tweet)),
+        nsfwUser = Some(TweetLenses.nsfwUser(tweet)),
+        nsfwAdmin = Some(TweetLenses.nsfwAdmin(tweet)),
+        media = tweet.media.map(toStoredMedia),
+        narrowcast = TweetLenses.narrowcast(tweet).map(toStoredNarrowcast),
+        nullcast = Some(TweetLenses.nullcast(tweet)),
+        trackingId = TweetLenses.trackingId(tweet),
+        quotedTweet = TweetLenses.quotedTweet(tweet).flatMap { qt =>
+          toStoredQuotedTweet(qt, TweetLenses.text(tweet))
+        }
+      )
+    toStoredAdditionalFields(tweet, storedTweet)
+  }
+
+  /**
+   * Does not need core data to be set. Constructs on disk tweet by avoiding the TweetLenses object
+   * and only extracting the specified fields.
+   *
+   * NOTE: Assumes that specified fields are set in the tweet.
+   *
+   * @param tpTweet Tweetypie Tweet to be converted
+   * @param fields the fields to be populated in the on disk Tweet
+   *
+   * @return an on disk Tweet which has only the specified fields set
+   */
+  def toStoredTweetForFields(tpTweet: Tweet, fields: Set[Field]): StoredTweet = {
+
+    // Make sure all the passed in fields are known or additional fields
+    require(
+      (fields -- Field.AllUpdatableCompiledFields)
+        .forall(field => AdditionalFields.isAdditionalFieldId(field.id))
+    )
+
+    val storedTweet =
+      StoredTweet(
+        id = tpTweet.id,
+        geo = if (fields.contains(Field.Geo)) {
+          tpTweet.coreData.get.coordinates match {
+            case None =>
+              tpTweet.coreData.get.placeId match {
+                case None => None
+                case Some(placeId) =>
+                  Some(
+                    StoredGeo(
+                      latitude = 0.0,
+                      longitude = 0.0,
+                      geoPrecision = 0,
+                      entityId = 0,
+                      name = Some(placeId)
+                    )
+                  )
+              }
+            case Some(coords) =>
+              Some(
+                StoredGeo(
+                  latitude = coords.latitude,
+                  longitude = coords.longitude,
+                  geoPrecision = coords.geoPrecision,
+                  entityId = if (coords.display) 2 else 0,
+                  name = tpTweet.coreData.get.placeId
+                )
+              )
+          }
+        } else {
+          None
+        },
+        hasTakedown =
+          if (fields.contains(Field.HasTakedown))
+            Some(tpTweet.coreData.get.hasTakedown)
+          else
+            None,
+        nsfwUser =
+          if (fields.contains(Field.NsfwUser))
+            Some(tpTweet.coreData.get.nsfwUser)
+          else
+            None,
+        nsfwAdmin =
+          if (fields.contains(Field.NsfwAdmin))
+            Some(tpTweet.coreData.get.nsfwAdmin)
+          else
+            None
+      )
+
+    if (fields.map(_.id).exists(AdditionalFields.isAdditionalFieldId))
+      toStoredAdditionalFields(tpTweet, storedTweet)
+    else
+      storedTweet
+  }
+
+  def fromStoredReply(reply: StoredReply): Reply =
+    Reply(
+      Some(reply.inReplyToStatusId).filter(_ > 0),
+      reply.inReplyToUserId
+    )
+
+  def fromStoredShare(share: StoredShare): Share =
+    Share(
+      share.sourceStatusId,
+      share.sourceUserId,
+      share.parentStatusId
+    )
+
+  def fromStoredQuotedTweet(qt: StoredQuotedTweet): QuotedTweet =
+    QuotedTweet(
+      qt.tweetId,
+      qt.userId,
+      Some(
+        ShortenedUrl(
+          shortUrl = qt.shortUrl,
+          longUrl = "", // will be hydrated later via tweetypie's QuotedTweetRefUrlsHydrator
+          displayText = "" //will be hydrated later via tweetypie's QuotedTweetRefUrlsHydrator
+        )
+      )
+    )
+
+  def fromStoredGeo(geo: StoredGeo): GeoCoordinates =
+    GeoCoordinates(
+      latitude = geo.latitude,
+      longitude = geo.longitude,
+      geoPrecision = geo.geoPrecision,
+      display = geo.entityId == 2
+    )
+
+  def fromStoredMediaEntity(media: StoredMediaEntity): MediaEntity =
+    MediaEntity(
+      fromIndex = -1, // will get filled in later
+      toIndex = -1, // will get filled in later
+      url = null, // will get filled in later
+      mediaPath = "", // field is obsolete
+      mediaUrl = null, // will get filled in later
+      mediaUrlHttps = null, // will get filled in later
+      displayUrl = null, // will get filled in later
+      expandedUrl = null, // will get filled in later
+      mediaId = media.id,
+      nsfw = false,
+      sizes = Set(
+        MediaSize(
+          sizeType = MediaSizeType.Orig,
+          resizeMethod = MediaResizeMethod.Fit,
+          deprecatedContentType = MediaContentType(media.mediaType),
+          width = media.width,
+          height = media.height
+        )
+      )
+    )
+
+  def fromStoredNarrowcast(narrowcast: StoredNarrowcast): Narrowcast =
+    Narrowcast(
+      location = narrowcast.location.getOrElse(Seq())
+    )
+
+  def fromStoredTweet(storedTweet: StoredTweet): Tweet = {
+    val coreData =
+      TweetCoreData(
+        userId = storedTweet.userId.get,
+        text = storedTweet.text.get,
+        createdVia = storedTweet.createdVia.get,
+        createdAtSecs = storedTweet.createdAtSec.get,
+        reply = storedTweet.reply.map(fromStoredReply),
+        share = storedTweet.share.map(fromStoredShare),
+        hasTakedown = storedTweet.hasTakedown.getOrElse(false),
+        nsfwUser = storedTweet.nsfwUser.getOrElse(false),
+        nsfwAdmin = storedTweet.nsfwAdmin.getOrElse(false),
+        narrowcast = storedTweet.narrowcast.map(fromStoredNarrowcast),
+        nullcast = storedTweet.nullcast.getOrElse(false),
+        trackingId = storedTweet.trackingId,
+        conversationId = storedTweet.reply.flatMap(_.conversationId),
+        placeId = storedTweet.geo.flatMap(_.name),
+        coordinates = storedTweet.geo.map(fromStoredGeo),
+        hasMedia = if (storedTweet.media.exists(_.nonEmpty)) Some(true) else None
+      )
+
+    // retweets should never have their media, but some tweets incorrectly do.
+    val storedMedia = if (coreData.share.isDefined) Nil else storedTweet.media.toSeq
+
+    val tpTweet =
+      Tweet(
+        id = storedTweet.id,
+        coreData = Some(coreData),
+        contributor = storedTweet.contributorId.map(Contributor(_)),
+        media = Some(storedMedia.flatten.map(fromStoredMediaEntity)),
+        mentions = Some(Seq.empty),
+        urls = Some(Seq.empty),
+        cashtags = Some(Seq.empty),
+        hashtags = Some(Seq.empty),
+        quotedTweet = storedTweet.quotedTweet.map(fromStoredQuotedTweet)
+      )
+    fromStoredAdditionalFields(storedTweet, tpTweet)
+  }
+
+  def fromStoredTweetAllowInvalid(storedTweet: StoredTweet): Tweet = {
+    fromStoredTweet(
+      storedTweet.copy(
+        userId = storedTweet.userId.orElse(Some(-1L)),
+        text = storedTweet.text.orElse(Some("")),
+        createdVia = storedTweet.createdVia.orElse(Some("")),
+        createdAtSec = storedTweet.createdAtSec.orElse(Some(-1L))
+      ))
+  }
+
+  def fromStoredAdditionalFields(from: StoredTweet, to: Tweet): Tweet = {
+    val passThroughAdditionalFields =
+      from._passthroughFields.filterKeys(AdditionalFields.isAdditionalFieldId)
+    val allAdditionalFields =
+      from.getFieldBlobs(tbTweetCompiledAdditionalFieldIds) ++ passThroughAdditionalFields
+    allAdditionalFields.values.foldLeft(to) { case (t, f) => t.setField(f) }
+  }
+
+  def toDeletedTweet(storedTweet: StoredTweet): DeletedTweet = {
+    val noteTweetBlob = storedTweet.getFieldBlob(Tweet.NoteTweetField.id)
+    val noteTweetOption = noteTweetBlob.map(blob => NoteTweet.decode(blob.read))
+    DeletedTweet(
+      id = storedTweet.id,
+      userId = storedTweet.userId,
+      text = storedTweet.text,
+      createdAtSecs = storedTweet.createdAtSec,
+      share = storedTweet.share.map(toDeletedShare),
+      media = storedTweet.media.map(_.map(toDeletedMediaEntity)),
+      noteTweetId = noteTweetOption.map(_.id),
+      isExpandable = noteTweetOption.flatMap(_.isExpandable)
+    )
+  }
+
+  def toDeletedShare(storedShare: StoredShare): DeletedTweetShare =
+    DeletedTweetShare(
+      sourceStatusId = storedShare.sourceStatusId,
+      sourceUserId = storedShare.sourceUserId,
+      parentStatusId = storedShare.parentStatusId
+    )
+
+  def toDeletedMediaEntity(storedMediaEntity: StoredMediaEntity): DeletedTweetMediaEntity =
+    DeletedTweetMediaEntity(
+      id = storedMediaEntity.id,
+      mediaType = storedMediaEntity.mediaType,
+      width = storedMediaEntity.width,
+      height = storedMediaEntity.height
+    )
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TimestampDecoder.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TimestampDecoder.scala
@ -0,0 +1,92 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.util.Return
+import com.twitter.util.Throw
+import com.twitter.util.Time
+import com.twitter.util.Try
+import java.util.Arrays
+import scala.util.control.NoStackTrace
+import scala.util.control.NonFatal
+
+sealed abstract class TimestampType(val keyName: String)
+object TimestampType {
+  object Default extends TimestampType("timestamp")
+  object SoftDelete extends TimestampType("softdelete_timestamp")
+}
+
+/**
+ * TimestampDecoder gets the timestamps associated with state records. The Manhattan timestamp is
+ * used for legacy records (with value "1"), otherwise the timestamp is extracted from the
+ * JSON value.
+ *
+ * See "Metadata" in README.md for further information about state records.
+ */
+object TimestampDecoder {
+  case class UnparsableJson(msg: String, t: Throwable) extends Exception(msg, t) with NoStackTrace
+  case class MissingJsonTimestamp(msg: String) extends Exception(msg) with NoStackTrace
+  case class UnexpectedJsonValue(msg: String) extends Exception(msg) with NoStackTrace
+  case class MissingManhattanTimestamp(msg: String) extends Exception(msg) with NoStackTrace
+
+  private[storage] val LegacyValue: Array[Byte] = Array('1')
+
+  /**
+   * The first backfill of tweet data to Manhattan supplied timestamps in milliseconds where
+   * nanoseconds were expected. The result is that some values have an incorrect Manhattan
+   * timestamp. For these bad timestamps, time.inNanoseconds is actually milliseconds.
+   *
+   * For example, the deletion record for tweet 22225781 has Manhattan timestamp 1970-01-01 00:23:24 +0000.
+   * Contrast with the deletion record for tweet 435404491999813632 with Manhattan timestamp 2014-11-09 14:24:04 +0000
+   *
+   * This threshold value comes from the last time in milliseconds that was interpreted
+   * as nanoseconds, e.g. Time.fromNanoseconds(1438387200000L) == 1970-01-01 00:23:58 +0000
+   */
+  private[storage] val BadTimestampThreshold = Time.at("1970-01-01 00:23:58 +0000")
+
+  def decode(record: TweetManhattanRecord, tsType: TimestampType): Try[Long] =
+    decode(record.value, tsType)
+
+  def decode(mhValue: TweetManhattanValue, tsType: TimestampType): Try[Long] = {
+    val value = ByteArrayCodec.fromByteBuffer(mhValue.contents)
+    if (isLegacyRecord(value)) {
+      nativeManhattanTimestamp(mhValue)
+    } else {
+      jsonTimestamp(value, tsType)
+    }
+  }
+
+  private def isLegacyRecord(value: Array[Byte]) = Arrays.equals(value, LegacyValue)
+
+  private def nativeManhattanTimestamp(mhValue: TweetManhattanValue): Try[Long] =
+    mhValue.timestamp match {
+      case Some(ts) => Return(correctedTimestamp(ts))
+      case None =>
+        Throw(MissingManhattanTimestamp(s"Manhattan timestamp missing in value $mhValue"))
+    }
+
+  private def jsonTimestamp(value: Array[Byte], tsType: TimestampType): Try[Long] =
+    Try { Json.decode(value) }
+      .rescue { case NonFatal(e) => Throw(UnparsableJson(e.getMessage, e)) }
+      .flatMap { m =>
+        m.get(tsType.keyName) match {
+          case Some(v) =>
+            v match {
+              case l: Long => Return(l)
+              case i: Integer => Return(i.toLong)
+              case _ =>
+                Throw(
+                  UnexpectedJsonValue(s"Unexpected value for ${tsType.keyName} in record data $m")
+                )
+            }
+          case None =>
+            Throw(MissingJsonTimestamp(s"Missing key ${tsType.keyName} in record data $m"))
+        }
+      }
+
+  def correctedTime(t: Time): Time =
+    if (t < BadTimestampThreshold) Time.fromMilliseconds(t.inNanoseconds) else t
+
+  def correctedTime(t: Long): Time = correctedTime(Time.fromNanoseconds(t))
+
+  def correctedTimestamp(t: Time): Long =
+    if (t < BadTimestampThreshold) t.inNanoseconds else t.inMilliseconds
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetKey.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetKey.scala
@ -0,0 +1,164 @@
+package com.twitter.tweetypie.storage
+
+/**
+ * Responsible for encoding/decoding Tweet records to/from Manhattan keys
+ *
+ * K/V Scheme:
+ * -----------
+ *      [TweetId]
+ *           /metadata
+ *               /delete_state (a.k.a. hard delete)
+ *               /soft_delete_state
+ *               /bounce_delete_state
+ *               /undelete_state
+ *               /force_added_state
+ *               /scrubbed_fields/
+ *                    /[ScrubbedFieldId_1]
+ *                     ..
+ *                   /[ScrubbedFieldId_M]
+ *          /fields
+ *             /internal
+ *                 /1
+ *                 /9
+ *                 ..
+ *                 /99
+ *             /external
+ *                 /100
+ *                 ..
+ *
+ * IMPORTANT NOTE:
+ * 1) Field Ids 2 to 8 in Tweet thrift struct are considered "core fields" are 'packed' together
+ *    into a TFieldBlob and stored under field id 1 (i.e [DatasetName]/[TweetId]/fields/internal/1).
+ *    This is why we do not see keys from [DatasetName]/[TweetId]/fields/internal/2 to [DatasetName]/
+ *    [TweetId]/fields/internal/8)
+ *
+ * 2) Also, the tweet id (which is the field id 1 in Tweet thrift structure) is not explicitly stored
+ *    in Manhattan. There is no need to explicitly store it since it is a part of the Pkey
+ */
+case class TweetKey(tweetId: TweetId, lKey: TweetKey.LKey) {
+  override def toString: String =
+    s"/${ManhattanOperations.PkeyInjection(tweetId)}/${ManhattanOperations.LkeyInjection(lKey)}"
+}
+
+object TweetKey {
+  // Manhattan uses lexicographical order for keys. To make sure lexicographical order matches the
+  // numerical order, we should pad both tweet id and field ids with leading zeros.
+  // Since tweet id is long and field id is a short, the max width of each can be obtained by doing
+  // Long.MaxValue.toString.length and Short.MaxValue.toString.length respectively
+  private val TweetIdFormatStr = s"%0${Long.MaxValue.toString.length}d"
+  private val FieldIdFormatStr = s"%0${Short.MaxValue.toString.length}d"
+  private[storage] def padTweetIdStr(tweetId: Long): String = TweetIdFormatStr.format(tweetId)
+  private[storage] def padFieldIdStr(fieldId: Short): String = FieldIdFormatStr.format(fieldId)
+
+  def coreFieldsKey(tweetId: TweetId): TweetKey = TweetKey(tweetId, LKey.CoreFieldsKey)
+  def hardDeletionStateKey(tweetId: TweetId): TweetKey =
+    TweetKey(tweetId, LKey.HardDeletionStateKey)
+  def softDeletionStateKey(tweetId: TweetId): TweetKey =
+    TweetKey(tweetId, LKey.SoftDeletionStateKey)
+  def bounceDeletionStateKey(tweetId: TweetId): TweetKey =
+    TweetKey(tweetId, LKey.BounceDeletionStateKey)
+  def unDeletionStateKey(tweetId: TweetId): TweetKey = TweetKey(tweetId, LKey.UnDeletionStateKey)
+  def forceAddedStateKey(tweetId: TweetId): TweetKey = TweetKey(tweetId, LKey.ForceAddedStateKey)
+  def scrubbedGeoFieldKey(tweetId: TweetId): TweetKey = TweetKey(tweetId, LKey.ScrubbedGeoFieldKey)
+  def fieldKey(tweetId: TweetId, fieldId: FieldId): TweetKey =
+    TweetKey(tweetId, LKey.FieldKey(fieldId))
+  def internalFieldsKey(tweetId: TweetId, fieldId: FieldId): TweetKey =
+    TweetKey(tweetId, LKey.InternalFieldsKey(fieldId))
+  def additionalFieldsKey(tweetId: TweetId, fieldId: FieldId): TweetKey =
+    TweetKey(tweetId, LKey.AdditionalFieldsKey(fieldId))
+  def scrubbedFieldKey(tweetId: TweetId, fieldId: FieldId): TweetKey =
+    TweetKey(tweetId, LKey.ScrubbedFieldKey(fieldId))
+
+  // AllFieldsKeyPrefix:       fields
+  // CoreFieldsKey:            fields/internal/1  (Stores subset of StoredTweet fields which are
+  //                             "packed" into a single CoreFields record)
+  // HardDeletionStateKey:     metadata/delete_state
+  // SoftDeletionStateKey:     metadata/soft_delete_state
+  // BounceDeletionStateKey:   metadata/bounce_delete_state
+  // UnDeletionStateKey:       metadata/undelete_state
+  // ForceAddedStateKey:       metadata/force_added_state
+  // FieldKey:                 fields/<group_name>/<padded_field_id> (where <group_name>
+  //                             is 'internal' for field ids < 100 and 'external' for all other
+  //                             fields ids)
+  // InternalFieldsKeyPrefix:  fields/internal
+  // PKey:                     <empty string>
+  // ScrubbedFieldKey:         metadata/scrubbed_fields/<padded_field_id>
+  // ScrubbedFieldKeyPrefix:   metadata/scrubbed_fields
+  sealed abstract class LKey(override val toString: String)
+  object LKey {
+    private val HardDeletionRecordLiteral = "delete_state"
+    private val SoftDeletionRecordLiteral = "soft_delete_state"
+    private val BounceDeletionRecordLiteral = "bounce_delete_state"
+    private val UnDeletionRecordLiteral = "undelete_state"
+    private val ForceAddRecordLiteral = "force_added_state"
+    private val ScrubbedFieldsGroup = "scrubbed_fields"
+    private val InternalFieldsGroup = "internal"
+    private val ExternalFieldsGroup = "external"
+    private val MetadataCategory = "metadata"
+    private val FieldsCategory = "fields"
+    private val InternalFieldsKeyPrefix = s"$FieldsCategory/$InternalFieldsGroup/"
+    private val ExternalFieldsKeyPrefix = s"$FieldsCategory/$ExternalFieldsGroup/"
+    private val ScrubbedFieldsKeyPrefix = s"$MetadataCategory/$ScrubbedFieldsGroup/"
+
+    sealed abstract class MetadataKey(metadataType: String)
+        extends LKey(s"$MetadataCategory/$metadataType")
+    sealed abstract class StateKey(stateType: String) extends MetadataKey(stateType)
+    case object HardDeletionStateKey extends StateKey(s"$HardDeletionRecordLiteral")
+    case object SoftDeletionStateKey extends StateKey(s"$SoftDeletionRecordLiteral")
+    case object BounceDeletionStateKey extends StateKey(s"$BounceDeletionRecordLiteral")
+    case object UnDeletionStateKey extends StateKey(s"$UnDeletionRecordLiteral")
+    case object ForceAddedStateKey extends StateKey(s"$ForceAddRecordLiteral")
+
+    case class ScrubbedFieldKey(fieldId: FieldId)
+        extends MetadataKey(s"$ScrubbedFieldsGroup/${padFieldIdStr(fieldId)}")
+    val ScrubbedGeoFieldKey: LKey.ScrubbedFieldKey = ScrubbedFieldKey(TweetFields.geoFieldId)
+
+    /**
+     * LKey that has one of many possible fields id. This generalize over
+     * internal and additional fields key.
+     */
+    sealed abstract class FieldKey(prefix: String) extends LKey(toString) {
+      def fieldId: FieldId
+      override val toString: String = prefix + padFieldIdStr(fieldId)
+    }
+    object FieldKey {
+      def apply(fieldId: FieldId): FieldKey =
+        fieldId match {
+          case id if id < TweetFields.firstAdditionalFieldId => InternalFieldsKey(fieldId)
+          case _ => AdditionalFieldsKey(fieldId)
+        }
+    }
+
+    case class InternalFieldsKey(fieldId: FieldId) extends FieldKey(InternalFieldsKeyPrefix) {
+      assert(fieldId < TweetFields.firstAdditionalFieldId)
+    }
+    case class AdditionalFieldsKey(fieldId: FieldId) extends FieldKey(ExternalFieldsKeyPrefix) {
+      assert(fieldId >= TweetFields.firstAdditionalFieldId)
+    }
+    val CoreFieldsKey: LKey.InternalFieldsKey = InternalFieldsKey(TweetFields.rootCoreFieldId)
+
+    case class Unknown private (str: String) extends LKey(str)
+
+    def fromString(str: String): LKey = {
+      def extractFieldId(prefix: String): FieldId =
+        str.slice(prefix.length, str.length).toShort
+
+      str match {
+        case CoreFieldsKey.toString => CoreFieldsKey
+        case HardDeletionStateKey.toString => HardDeletionStateKey
+        case SoftDeletionStateKey.toString => SoftDeletionStateKey
+        case BounceDeletionStateKey.toString => BounceDeletionStateKey
+        case UnDeletionStateKey.toString => UnDeletionStateKey
+        case ForceAddedStateKey.toString => ForceAddedStateKey
+        case ScrubbedGeoFieldKey.toString => ScrubbedGeoFieldKey
+        case _ if str.startsWith(InternalFieldsKeyPrefix) =>
+          InternalFieldsKey(extractFieldId(InternalFieldsKeyPrefix))
+        case _ if str.startsWith(ExternalFieldsKeyPrefix) =>
+          AdditionalFieldsKey(extractFieldId(ExternalFieldsKeyPrefix))
+        case _ if str.startsWith(ScrubbedFieldsKeyPrefix) =>
+          ScrubbedFieldKey(extractFieldId(ScrubbedFieldsKeyPrefix))
+        case _ => Unknown(str)
+      }
+    }
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStateRecord.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStateRecord.scala
@ -0,0 +1,90 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.storage.client.manhattan.kv.ManhattanValue
+import com.twitter.util.Time
+
+/**
+ * A [[TweetStateRecord]] represents an action taken on a tweet and can be used to determine a tweet's state.
+ *
+ * The state is determined by the record with the most recent timestamp. In the absence of any
+ * record a tweet is considered found, which is to say the tweet has not been through the
+ * deletion process.
+ *
+ * The [[TweetStateRecord]] type is determined by the lkey of a tweet manhattan record:
+ *    metadata/delete_state      -> HardDeleted
+ *    metadata/soft_delete_state -> SoftDeleted
+ *    metadata/undelete_state    -> Undeleted
+ *    metadata/force_added_state -> ForceAdded
+ *
+ * See the README in this directory for more details about the state of a tweet.
+ */
+sealed trait TweetStateRecord {
+  def tweetId: TweetId
+  def createdAt: Long
+  def stateKey: TweetKey.LKey.StateKey
+  def values: Map[String, Long] = Map("timestamp" -> createdAt)
+  def name: String
+
+  def toTweetMhRecord: TweetManhattanRecord = {
+    val valByteBuffer = ByteArrayCodec.toByteBuffer(Json.encode(values))
+    val value = ManhattanValue(valByteBuffer, Some(Time.fromMilliseconds(createdAt)))
+    TweetManhattanRecord(TweetKey(tweetId, stateKey), value)
+  }
+}
+
+object TweetStateRecord {
+
+  /** When a soft-deleted or bounce deleted tweet is ultimately hard-deleted by an offline job. */
+  case class HardDeleted(tweetId: TweetId, createdAt: Long, deletedAt: Long)
+      extends TweetStateRecord {
+    // timestamp in the mh backend is the hard deletion timestamp
+    override def values = Map("timestamp" -> createdAt, "softdelete_timestamp" -> deletedAt)
+    def stateKey = TweetKey.LKey.HardDeletionStateKey
+    def name = "hard_deleted"
+  }
+
+  /** When a tweet is deleted by the user. It can still be undeleted while in the soft deleted state. */
+  case class SoftDeleted(tweetId: TweetId, createdAt: Long) extends TweetStateRecord {
+    def stateKey = TweetKey.LKey.SoftDeletionStateKey
+    def name = "soft_deleted"
+  }
+
+  /** When a tweet is deleted by go/bouncer for violating Twitter Rules. It MAY NOT be undeleted. */
+  case class BounceDeleted(tweetId: TweetId, createdAt: Long) extends TweetStateRecord {
+    def stateKey = TweetKey.LKey.BounceDeletionStateKey
+    def name = "bounce_deleted"
+  }
+
+  /** When a tweet is undeleted by an internal system. */
+  case class Undeleted(tweetId: TweetId, createdAt: Long) extends TweetStateRecord {
+    def stateKey = TweetKey.LKey.UnDeletionStateKey
+    def name = "undeleted"
+  }
+
+  /** When a tweet is created using the forceAdd endpoint. */
+  case class ForceAdded(tweetId: TweetId, createdAt: Long) extends TweetStateRecord {
+    def stateKey = TweetKey.LKey.ForceAddedStateKey
+    def name = "force_added"
+  }
+
+  def fromTweetMhRecord(record: TweetManhattanRecord): Option[TweetStateRecord] = {
+    def ts = TimestampDecoder.decode(record, TimestampType.Default).getOrElse(0L)
+    def sdts = TimestampDecoder.decode(record, TimestampType.SoftDelete).getOrElse(0L)
+    def tweetId = record.pkey
+
+    record.lkey match {
+      case TweetKey.LKey.HardDeletionStateKey => Some(HardDeleted(tweetId, ts, sdts))
+      case TweetKey.LKey.SoftDeletionStateKey => Some(SoftDeleted(tweetId, ts))
+      case TweetKey.LKey.BounceDeletionStateKey => Some(BounceDeleted(tweetId, ts))
+      case TweetKey.LKey.UnDeletionStateKey => Some(Undeleted(tweetId, ts))
+      case TweetKey.LKey.ForceAddedStateKey => Some(ForceAdded(tweetId, ts))
+      case _ => None
+    }
+  }
+
+  def fromTweetMhRecords(records: Seq[TweetManhattanRecord]): Seq[TweetStateRecord] =
+    records.flatMap(fromTweetMhRecord)
+
+  def mostRecent(records: Seq[TweetManhattanRecord]): Option[TweetStateRecord] =
+    fromTweetMhRecords(records).sortBy(_.createdAt).lastOption
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageClient.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageClient.scala
@ -0,0 +1,201 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.stitch.Stitch
+import com.twitter.tweetypie.storage.Response.TweetResponse
+import com.twitter.tweetypie.thriftscala.Tweet
+import com.twitter.util.Future
+
+/**
+ * Interface for reading and writing tweet data in Manhattan
+ */
+trait TweetStorageClient {
+  import TweetStorageClient._
+  def addTweet: AddTweet
+  def deleteAdditionalFields: DeleteAdditionalFields
+  def getTweet: GetTweet
+  def getStoredTweet: GetStoredTweet
+  def getDeletedTweets: GetDeletedTweets
+  def undelete: Undelete
+  def updateTweet: UpdateTweet
+  def scrub: Scrub
+  def softDelete: SoftDelete
+  def bounceDelete: BounceDelete
+  def hardDeleteTweet: HardDeleteTweet
+  def ping: Ping
+}
+
+object TweetStorageClient {
+  type GetTweet = TweetId => Stitch[GetTweet.Response]
+
+  object GetTweet {
+    sealed trait Response
+    object Response {
+      case class Found(tweet: Tweet) extends Response
+      object NotFound extends Response
+      object Deleted extends Response
+      // On BounceDeleted, provide the full Tweet so that implementations
+      // (i.e. ManhattanTweetStorageClient) don't not need to be aware of the specific tweet
+      // fields required by callers for proper processing of bounced deleted tweets.
+      case class BounceDeleted(tweet: Tweet) extends Response
+    }
+  }
+
+  type GetStoredTweet = TweetId => Stitch[GetStoredTweet.Response]
+
+  object GetStoredTweet {
+    sealed abstract class Error(val message: String) {
+      override def toString: String = message
+    }
+    object Error {
+      case object TweetIsCorrupt extends Error("stored tweet data is corrupt and cannot be decoded")
+
+      case object ScrubbedFieldsPresent
+          extends Error("stored tweet fields that should be scrubbed are still present")
+
+      case object TweetFieldsMissingOrInvalid
+          extends Error("expected tweet fields are missing or contain invalid values")
+
+      case object TweetShouldBeHardDeleted
+          extends Error("stored tweet that should be hard deleted is still present")
+    }
+
+    sealed trait Response
+    object Response {
+      sealed trait StoredTweetMetadata {
+        def state: Option[TweetStateRecord]
+        def allStates: Seq[TweetStateRecord]
+        def scrubbedFields: Set[FieldId]
+      }
+
+      sealed trait StoredTweetErrors {
+        def errs: Seq[Error]
+      }
+
+      /**
+       * Tweet data was found, possibly state records and/or scrubbed field records.
+       */
+      sealed trait FoundAny extends Response with StoredTweetMetadata {
+        def tweet: Tweet
+      }
+
+      object FoundAny {
+        def unapply(
+          response: Response
+        ): Option[
+          (Tweet, Option[TweetStateRecord], Seq[TweetStateRecord], Set[FieldId], Seq[Error])
+        ] =
+          response match {
+            case f: FoundWithErrors =>
+              Some((f.tweet, f.state, f.allStates, f.scrubbedFields, f.errs))
+            case f: FoundAny => Some((f.tweet, f.state, f.allStates, f.scrubbedFields, Seq.empty))
+            case _ => None
+          }
+      }
+
+      /**
+       * No records for this tweet id were found in storage
+       */
+      case class NotFound(id: TweetId) extends Response
+
+      /**
+       * Data related to the Tweet id was found but could not be loaded successfully. The
+       * errs array contains details of the problems.
+       */
+      case class Failed(
+        id: TweetId,
+        state: Option[TweetStateRecord],
+        allStates: Seq[TweetStateRecord],
+        scrubbedFields: Set[FieldId],
+        errs: Seq[Error],
+      ) extends Response
+          with StoredTweetMetadata
+          with StoredTweetErrors
+
+      /**
+       * No Tweet data was found, and the most recent state record found is HardDeleted
+       */
+      case class HardDeleted(
+        id: TweetId,
+        state: Option[TweetStateRecord.HardDeleted],
+        allStates: Seq[TweetStateRecord],
+        scrubbedFields: Set[FieldId],
+      ) extends Response
+          with StoredTweetMetadata
+
+      /**
+       * Tweet data was found, and the most recent state record found, if any, is not
+       * any form of deletion record.
+       */
+      case class Found(
+        tweet: Tweet,
+        state: Option[TweetStateRecord],
+        allStates: Seq[TweetStateRecord],
+        scrubbedFields: Set[FieldId],
+      ) extends FoundAny
+
+      /**
+       * Tweet data was found, and the most recent state record found indicates deletion.
+       */
+      case class FoundDeleted(
+        tweet: Tweet,
+        state: Option[TweetStateRecord],
+        allStates: Seq[TweetStateRecord],
+        scrubbedFields: Set[FieldId],
+      ) extends FoundAny
+
+      /**
+       * Tweet data was found, however errors were detected in the stored data. Required
+       * fields may be missing from the Tweet struct (e.g. CoreData), stored fields that
+       * should be scrubbed remain present, or Tweets that should be hard-deleted remain
+       * in storage. The errs array contains details of the problems.
+       */
+      case class FoundWithErrors(
+        tweet: Tweet,
+        state: Option[TweetStateRecord],
+        allStates: Seq[TweetStateRecord],
+        scrubbedFields: Set[FieldId],
+        errs: Seq[Error],
+      ) extends FoundAny
+          with StoredTweetErrors
+    }
+  }
+
+  type HardDeleteTweet = TweetId => Stitch[HardDeleteTweet.Response]
+  type SoftDelete = TweetId => Stitch[Unit]
+  type BounceDelete = TweetId => Stitch[Unit]
+
+  object HardDeleteTweet {
+    sealed trait Response
+    object Response {
+      case class Deleted(deletedAtMillis: Option[Long], createdAtMillis: Option[Long])
+          extends Response
+      case class NotDeleted(id: TweetId, ineligibleLKey: Option[TweetKey.LKey])
+          extends Throwable
+          with Response
+    }
+  }
+
+  type Undelete = TweetId => Stitch[Undelete.Response]
+  object Undelete {
+    case class Response(
+      code: UndeleteResponseCode,
+      tweet: Option[Tweet] = None,
+      createdAtMillis: Option[Long] = None,
+      archivedAtMillis: Option[Long] = None)
+
+    sealed trait UndeleteResponseCode
+
+    object UndeleteResponseCode {
+      object Success extends UndeleteResponseCode
+      object BackupNotFound extends UndeleteResponseCode
+      object NotCreated extends UndeleteResponseCode
+    }
+  }
+
+  type AddTweet = Tweet => Stitch[Unit]
+  type UpdateTweet = (Tweet, Seq[Field]) => Stitch[TweetResponse]
+  type GetDeletedTweets = Seq[TweetId] => Stitch[Seq[DeletedTweetResponse]]
+  type DeleteAdditionalFields = (Seq[TweetId], Seq[Field]) => Stitch[Seq[TweetResponse]]
+  type Scrub = (Seq[TweetId], Seq[Field]) => Stitch[Unit]
+  type Ping = () => Future[Unit]
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageException.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageException.scala
@ -0,0 +1,34 @@
+package com.twitter.tweetypie.storage
+
+import scala.util.control.NoStackTrace
+
+sealed abstract class TweetStorageException(message: String, cause: Throwable)
+    extends Exception(message, cause)
+
+/**
+ * The request was not properly formed and failed an assertion present in the code. Should not be
+ * retried without modification.
+ */
+case class ClientError(message: String, cause: Throwable)
+    extends TweetStorageException(message, cause)
+    with NoStackTrace
+
+/**
+ * Request was rejected by Manhattan or the in-process rate limiter. Should not be retried.
+ */
+case class RateLimited(message: String, cause: Throwable)
+    extends TweetStorageException(message, cause)
+    with NoStackTrace
+
+/**
+ * Corrupt tweets were requested from Manhattan
+ */
+case class VersionMismatchError(message: String, cause: Throwable = null)
+    extends TweetStorageException(message, cause)
+    with NoStackTrace
+
+/**
+ * All other unhandled exceptions.
+ */
+case class InternalError(message: String, cause: Throwable = null)
+    extends TweetStorageException(message, cause)
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetUtils.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetUtils.scala
@ -0,0 +1,265 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.logging.Logger
+import com.twitter.scrooge.TFieldBlob
+import com.twitter.snowflake.id.SnowflakeId
+import com.twitter.storage.client.manhattan.kv.DeniedManhattanException
+import com.twitter.storage.client.manhattan.kv.ManhattanException
+import com.twitter.tweetypie.storage.Response._
+import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet
+import com.twitter.util.Return
+import com.twitter.util.Throw
+import com.twitter.util.Try
+
+object TweetUtils {
+  val log: Logger = Logger("com.twitter.tweetypie.storage.TweetStorageLibrary")
+  import FieldResponseCodec.ValueNotFoundException
+
+  /**
+   * It's rare, but we have seen tweets with userId=0, which is likely the result of a
+   * failed/partial delete. Treat these as invalid tweets, which are returned to callers
+   * as not found.
+   */
+  def isValid(tweet: StoredTweet): Boolean =
+    tweet.userId.exists(_ != 0) && tweet.text.nonEmpty &&
+      tweet.createdVia.nonEmpty && tweet.createdAtSec.nonEmpty
+
+  /**
+   * Helper function to extract Scrubbed field Ids from the result returned by reading entire tweet prefix
+   * function.
+   *
+   * @param records The sequence of MH records for the given tweetId
+   *
+   * @return The set of scrubbed field ids
+   */
+  private[tweetypie] def extractScrubbedFields(records: Seq[TweetManhattanRecord]): Set[Short] =
+    records
+      .map(r => r.lkey)
+      .collect { case TweetKey.LKey.ScrubbedFieldKey(fieldId) => fieldId }
+      .toSet
+
+  private[tweetypie] val expectedFields =
+    TweetFields.requiredFieldIds.toSet - TweetFields.tweetIdField
+
+  /**
+   * Find the timestamp from a tweetId and a list of MH records. This is used when
+   * you need a timestamp and you aren't sure that tweetId is a snowflake id.
+   *
+   * @param tweetId A tweetId you want the timestamp for.
+   * @param records Tbird_mh records keyed on tweetId, one of which should be the
+   * core fields record.
+   * @return A milliseconds timestamp if one could be found.
+   */
+  private[tweetypie] def creationTimeFromTweetIdOrMHRecords(
+    tweetId: Long,
+    records: Seq[TweetManhattanRecord]
+  ): Option[Long] =
+    SnowflakeId
+      .unixTimeMillisOptFromId(tweetId).orElse({
+        records
+          .find(_.lkey == TweetKey.LKey.CoreFieldsKey)
+          .flatMap { coreFields =>
+            CoreFieldsCodec
+              .fromTFieldBlob(
+                TFieldBlobCodec.fromByteBuffer(coreFields.value.contents)
+              ).createdAtSec.map(seconds => seconds * 1000)
+          }
+      })
+
+  /**
+   * Helper function used to parse manhattan results for fields in a tweet (given in the form of
+   * Sequence of (FieldKey, Try[Unit]) pairs) and build a TweetResponse object.
+   *
+   * @param callerName The name of the caller function. Used for error messages
+   * @param tweetId Id of the Tweet for which TweetResponse is being built
+   * @param fieldResults Sequence of (FieldKey, Try[Unit]).
+   *
+   * @return TweetResponse object
+   */
+  private[tweetypie] def buildTweetResponse(
+    callerName: String,
+    tweetId: Long,
+    fieldResults: Map[FieldId, Try[Unit]]
+  ): TweetResponse = {
+    // Count Found/Not Found
+    val successCount =
+      fieldResults.foldLeft(0) {
+        case (count, (_, Return(_))) => count + 1
+        case (count, (_, Throw(_: ValueNotFoundException))) => count + 1
+        case (count, _) => count
+      }
+
+    val fieldResponsesMap = getFieldResponses(callerName, tweetId, fieldResults)
+
+    val overallCode = if (successCount > 0 && successCount == fieldResults.size) {
+      TweetResponseCode.Success
+    } else {
+
+      // If any field was rate limited, then we consider the entire tweet to be rate limited. So first we scan
+      // the field results to check such an occurrence.
+      val wasRateLimited = fieldResults.exists { fieldResult =>
+        fieldResult._2 match {
+          case Throw(e: DeniedManhattanException) => true
+          case _ => false
+        }
+      }
+
+      // Were we rate limited for any of the additional fields?
+      if (wasRateLimited) {
+        TweetResponseCode.OverCapacity
+      } else if (successCount == 0) {
+        // successCount is < fieldResults.size at this point. So if allOrNone is true or
+        // if successCount == 0 (i.e failed on all Fields), the overall code should be 'Failure'
+        TweetResponseCode.Failure
+      } else {
+        // allOrNone == false AND successCount > 0 at this point. Clearly the overallCode should be Partial
+        TweetResponseCode.Partial
+      }
+    }
+
+    TweetResponse(tweetId, overallCode, Some(fieldResponsesMap))
+
+  }
+
+  /**
+   * Helper function to convert manhattan results into a Map[FieldId, FieldResponse]
+   *
+   * @param fieldResults Sequence of (TweetKey, TFieldBlob).
+   */
+  private[tweetypie] def getFieldResponses(
+    callerName: String,
+    tweetId: TweetId,
+    fieldResults: Map[FieldId, Try[_]]
+  ): Map[FieldId, FieldResponse] =
+    fieldResults.map {
+      case (fieldId, resp) =>
+        def keyStr = TweetKey.fieldKey(tweetId, fieldId).toString
+        resp match {
+          case Return(_) =>
+            fieldId -> FieldResponse(FieldResponseCode.Success, None)
+          case Throw(mhException: ManhattanException) =>
+            val errMsg = s"Exception in $callerName. Key: $keyStr. Error: $mhException"
+            mhException match {
+              case _: ValueNotFoundException => // ValueNotFound is not an error
+              case _ => log.error(errMsg)
+            }
+            fieldId -> FieldResponseCodec.fromThrowable(mhException, Some(errMsg))
+          case Throw(e) =>
+            val errMsg = s"Exception in $callerName. Key: $keyStr. Error: $e"
+            log.error(errMsg)
+            fieldId -> FieldResponse(FieldResponseCode.Error, Some(errMsg))
+        }
+    }
+
+  /**
+   * Helper function to build a TweetResponse object when being rate limited. Its possible that only some of the fields
+   * got rate limited, so we indicate which fields got processed successfully, and which encountered some sort of error.
+   *
+   * @param tweetId Tweet id
+   * @param callerName name of API calling this function
+   * @param fieldResponses field responses for the case where
+   *
+   * @return The TweetResponse object
+   */
+  private[tweetypie] def buildTweetOverCapacityResponse(
+    callerName: String,
+    tweetId: Long,
+    fieldResponses: Map[FieldId, Try[Unit]]
+  ) = {
+    val fieldResponsesMap = getFieldResponses(callerName, tweetId, fieldResponses)
+    TweetResponse(tweetId, TweetResponseCode.OverCapacity, Some(fieldResponsesMap))
+  }
+
+  /**
+   * Build a StoredTweet from a Seq of records. Core fields are handled specially.
+   */
+  private[tweetypie] def buildStoredTweet(
+    tweetId: TweetId,
+    records: Seq[TweetManhattanRecord],
+    includeScrubbed: Boolean = false,
+  ): StoredTweet = {
+    getStoredTweetBlobs(records, includeScrubbed)
+      .flatMap { fieldBlob =>
+        // When fieldId == TweetFields.rootCoreFieldId, we have further work to do since the
+        // 'value' is really serialized/packed version of all core fields. In this case we'll have
+        // to unpack it into many TFieldBlobs.
+        if (fieldBlob.id == TweetFields.rootCoreFieldId) {
+          // We won't throw any error in this function and instead let the caller function handle this
+          // condition (i.e If the caller function does not find any values for the core-fields in
+          // the returned map, it should assume that the tweet is not found)
+          CoreFieldsCodec.unpackFields(fieldBlob).values.toSeq
+        } else {
+          Seq(fieldBlob)
+        }
+      }.foldLeft(StoredTweet(tweetId))(_.setField(_))
+  }
+
+  private[tweetypie] def buildValidStoredTweet(
+    tweetId: TweetId,
+    records: Seq[TweetManhattanRecord]
+  ): Option[StoredTweet] = {
+    val storedTweet = buildStoredTweet(tweetId, records)
+    if (storedTweet.getFieldBlobs(expectedFields).nonEmpty && isValid(storedTweet)) {
+      Some(storedTweet)
+    } else {
+      None
+    }
+  }
+
+  /**
+   * Return a TFieldBlob for each StoredTweet field defined in this set of records.
+   * @param includeScrubbed when false, result will not include scrubbed fields even
+   *                        if the data is present in the set of records.
+   */
+  private[tweetypie] def getStoredTweetBlobs(
+    records: Seq[TweetManhattanRecord],
+    includeScrubbed: Boolean = false,
+  ): Seq[TFieldBlob] = {
+    val scrubbed = extractScrubbedFields(records)
+
+    records
+      .flatMap { r =>
+        // extract LKey.FieldKey records if they are not scrubbed and get their TFieldBlobs
+        r.key match {
+          case fullKey @ TweetKey(_, key: TweetKey.LKey.FieldKey)
+              if includeScrubbed || !scrubbed.contains(key.fieldId) =>
+            try {
+              val fieldBlob = TFieldBlobCodec.fromByteBuffer(r.value.contents)
+              if (fieldBlob.field.id != key.fieldId) {
+                throw new AssertionError(
+                  s"Blob stored for $fullKey has unexpected id ${fieldBlob.field.id}"
+                )
+              }
+              Some(fieldBlob)
+            } catch {
+              case e: VersionMismatchError =>
+                log.error(
+                  s"Failed to decode bytebuffer for $fullKey: ${e.getMessage}"
+                )
+                throw e
+            }
+          case _ => None
+        }
+      }
+  }
+
+  /**
+   * Its important to bubble up rate limiting exceptions as they would likely be the root cause for other issues
+   * (timeouts etc.), so we scan for this particular exception, and if found, we bubble that up specifically
+   *
+   * @param seqOfTries The sequence of tries which may contain within it a rate limit exception
+   *
+   * @return if a rate limiting exn was detected, this will be a Throw(e: DeniedManhattanException)
+   *         otherwise it will be a Return(_) only if all individual tries succeeded
+   */
+  private[tweetypie] def collectWithRateLimitCheck(seqOfTries: Seq[Try[Unit]]): Try[Unit] = {
+    val rateLimitThrowOpt = seqOfTries.find {
+      case Throw(e: DeniedManhattanException) => true
+      case _ => false
+    }
+
+    rateLimitThrowOpt.getOrElse(
+      Try.collect(seqOfTries).map(_ => ())
+    ) // Operation is considered successful only if all the deletions are successful
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UndeleteHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UndeleteHandler.scala
@ -0,0 +1,106 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.stitch.Stitch
+import com.twitter.tweetypie.storage.TweetStorageClient.Undelete
+import com.twitter.tweetypie.storage.TweetUtils._
+import com.twitter.util.Time
+
+object UndeleteHandler {
+  def apply(
+    read: ManhattanOperations.Read,
+    localInsert: ManhattanOperations.Insert,
+    remoteInsert: ManhattanOperations.Insert,
+    delete: ManhattanOperations.Delete,
+    undeleteWindowHours: Int,
+    stats: StatsReceiver
+  ): Undelete = {
+    def withinUndeleteWindow(timestampMs: Long) =
+      (Time.now - Time.fromMilliseconds(timestampMs)).inHours < undeleteWindowHours
+
+    def prepareUndelete(
+      tweetId: TweetId,
+      records: Seq[TweetManhattanRecord]
+    ): (Undelete.Response, Option[TweetManhattanRecord]) = {
+      val undeleteRecord =
+        Some(TweetStateRecord.Undeleted(tweetId, Time.now.inMillis).toTweetMhRecord)
+
+      TweetStateRecord.mostRecent(records) match {
+        // check if we need to undo a soft deletion
+        case Some(TweetStateRecord.SoftDeleted(_, createdAt)) =>
+          if (createdAt > 0) {
+            if (withinUndeleteWindow(createdAt)) {
+              (
+                mkSuccessfulUndeleteResponse(tweetId, records, Some(createdAt)),
+                undeleteRecord
+              )
+            } else {
+              (Undelete.Response(Undelete.UndeleteResponseCode.BackupNotFound), None)
+            }
+          } else {
+            throw InternalError(s"Timestamp unavailable for $tweetId")
+          }
+
+        // BounceDeleted tweets may not be undeleted. see go/bouncedtweet
+        case Some(_: TweetStateRecord.HardDeleted | _: TweetStateRecord.BounceDeleted) =>
+          (Undelete.Response(Undelete.UndeleteResponseCode.BackupNotFound), None)
+
+        case Some(_: TweetStateRecord.Undeleted) =>
+          // We still want to write the undelete record, because at this point, we only know that the local DC's
+          // winning record is not a soft/hard deletion record, while its possible that the remote DC's winning
+          // record might still be a soft deletion record. Having said that, we don't want to set it to true
+          // if the winning record is forceAdd, as the forceAdd call should have ensured that both DCs had the
+          // forceAdd record.
+          (mkSuccessfulUndeleteResponse(tweetId, records), undeleteRecord)
+
+        case Some(_: TweetStateRecord.ForceAdded) =>
+          (mkSuccessfulUndeleteResponse(tweetId, records), None)
+
+        // lets write the undeletion record just in case there is a softdeletion record in flight
+        case None => (mkSuccessfulUndeleteResponse(tweetId, records), undeleteRecord)
+      }
+    }
+
+    // Write the undelete record both locally and remotely to protect
+    // against races with hard delete replication. We only need this
+    // protection for the insertion of the undelete record.
+    def multiInsert(record: TweetManhattanRecord): Stitch[Unit] =
+      Stitch
+        .collect(
+          Seq(
+            localInsert(record).liftToTry,
+            remoteInsert(record).liftToTry
+          )
+        )
+        .map(collectWithRateLimitCheck)
+        .lowerFromTry
+
+    def deleteSoftDeleteRecord(tweetId: TweetId): Stitch[Unit] = {
+      val mhKey = TweetKey.softDeletionStateKey(tweetId)
+      delete(mhKey, None)
+    }
+
+    tweetId =>
+      for {
+        records <- read(tweetId)
+        (response, undeleteRecord) = prepareUndelete(tweetId, records)
+        _ <- Stitch.collect(undeleteRecord.map(multiInsert)).unit
+        _ <- deleteSoftDeleteRecord(tweetId)
+      } yield {
+        response
+      }
+  }
+
+  private[storage] def mkSuccessfulUndeleteResponse(
+    tweetId: TweetId,
+    records: Seq[TweetManhattanRecord],
+    timestampOpt: Option[Long] = None
+  ) =
+    Undelete.Response(
+      Undelete.UndeleteResponseCode.Success,
+      Some(
+        StorageConversions.fromStoredTweet(buildStoredTweet(tweetId, records))
+      ),
+      archivedAtMillis = timestampOpt
+    )
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UpdateTweetHandler.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UpdateTweetHandler.scala
@ -0,0 +1,64 @@
+package com.twitter.tweetypie.storage
+
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.stitch.Stitch
+import com.twitter.storage.client.manhattan.kv.DeniedManhattanException
+import com.twitter.storage.client.manhattan.kv.ManhattanValue
+import com.twitter.tweetypie.storage.TweetUtils._
+import com.twitter.tweetypie.thriftscala.Tweet
+import com.twitter.util.Throw
+import com.twitter.util.Time
+
+object UpdateTweetHandler {
+  def apply(
+    insert: ManhattanOperations.Insert,
+    stats: StatsReceiver
+  ): TweetStorageClient.UpdateTweet = { (tpTweet: Tweet, fields: Seq[Field]) =>
+    require(
+      fields.forall(!TweetFields.coreFieldIds.contains(_)),
+      "Core fields cannot be modified by calling updateTweet; use addTweet instead."
+    )
+    require(
+      areAllFieldsDefined(tpTweet, fields),
+      s"Input tweet $tpTweet does not have specified fields $fields set"
+    )
+
+    val now = Time.now
+    val storedTweet = StorageConversions.toStoredTweetForFields(tpTweet, fields.toSet)
+    val tweetId = storedTweet.id
+    Stats.updatePerFieldQpsCounters("updateTweet", fields.map(_.id), 1, stats)
+
+    val (fieldIds, stitchesPerTweet) =
+      fields.map { field =>
+        val fieldId = field.id
+        val tweetKey = TweetKey.fieldKey(tweetId, fieldId)
+        val blob = storedTweet.getFieldBlob(fieldId).get
+        val value = ManhattanValue(TFieldBlobCodec.toByteBuffer(blob), Some(now))
+        val record = TweetManhattanRecord(tweetKey, value)
+
+        (fieldId, insert(record).liftToTry)
+      }.unzip
+
+    Stitch.collect(stitchesPerTweet).map { seqOfTries =>
+      val fieldkeyAndMhResults = fieldIds.zip(seqOfTries).toMap
+      // If even a single field was rate limited, we will send an overall OverCapacity TweetResponse
+      val wasRateLimited = fieldkeyAndMhResults.exists { keyAndResult =>
+        keyAndResult._2 match {
+          case Throw(e: DeniedManhattanException) => true
+          case _ => false
+        }
+      }
+
+      if (wasRateLimited) {
+        buildTweetOverCapacityResponse("updateTweets", tweetId, fieldkeyAndMhResults)
+      } else {
+        buildTweetResponse("updateTweets", tweetId, fieldkeyAndMhResults)
+      }
+    }
+  }
+
+  private def areAllFieldsDefined(tpTweet: Tweet, fields: Seq[Field]) = {
+    val storedTweet = StorageConversions.toStoredTweetForFields(tpTweet, fields.toSet)
+    fields.map(_.id).forall(storedTweet.getFieldBlob(_).isDefined)
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/package.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/package.scala
@ -0,0 +1,11 @@
+package com.twitter.tweetypie
+
+import com.twitter.storage.client.manhattan.kv.ManhattanValue
+import java.nio.ByteBuffer
+
+package object storage {
+  type TweetId = Long
+  type FieldId = Short
+
+  type TweetManhattanValue = ManhattanValue[ByteBuffer]
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/BUILD
@ -0,0 +1,20 @@
+scala_library(
+    sources = ["*.scala"],
+    compiler_option_sets = ["fatal_warnings"],
+    strict_deps = True,
+    tags = ["bazel-compatible"],
+    dependencies = [
+        "finagle/finagle-core/src/main",
+        "flock-client/src/main/scala",
+        "flock-client/src/main/thrift:thrift-scala",
+        "tweetypie/servo/util/src/main/scala",
+        "snowflake:id",
+        "src/thrift/com/twitter/gizmoduck:thrift-scala",
+        "src/thrift/com/twitter/servo:servo-exception-java",
+        "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala",
+        "tweetypie/server/src/main/scala/com/twitter/tweetypie",
+        "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil",
+        "tweetypie/common/src/scala/com/twitter/tweetypie/util",
+        "util/util-core:scala",
+    ],
+)
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TFlockIndexer.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TFlockIndexer.scala
@ -0,0 +1,532 @@
+/** Copyright 2010 Twitter, Inc. */
+package com.twitter.tweetypie
+package tflock
+
+import com.twitter.finagle.stats.Counter
+import com.twitter.flockdb.client._
+import com.twitter.flockdb.client.thriftscala.Priority
+import com.twitter.snowflake.id.SnowflakeId
+import com.twitter.tweetypie.serverutil.StoredCard
+import com.twitter.tweetypie.thriftscala._
+import com.twitter.util.Future
+import scala.collection.mutable.ListBuffer
+
+object TFlockIndexer {
+
+  /**
+   * Printable names for some edge types currently defined in [[com.twitter.flockdb.client]].
+   * Used to defined stats counters for adding edges.
+   */
+  val graphNames: Map[Int, String] =
+    Map(
+      CardTweetsGraph.id -> "card_tweets",
+      ConversationGraph.id -> "conversation",
+      DirectedAtUserIdGraph.id -> "directed_at_user_id",
+      InvitedUsersGraph.id -> "invited_users",
+      MediaTimelineGraph.id -> "media_timeline",
+      MentionsGraph.id -> "mentions",
+      NarrowcastSentTweetsGraph.id -> "narrowcast_sent_tweets",
+      NullcastedTweetsGraph.id -> "nullcasted_tweets",
+      QuotersGraph.id -> "quoters",
+      QuotesGraph.id -> "quotes",
+      QuoteTweetsIndexGraph.id -> "quote_tweets_index",
+      RepliesToTweetsGraph.id -> "replies_to_tweets",
+      RetweetsByMeGraph.id -> "retweets_by_me",
+      RetweetsGraph.id -> "retweets",
+      RetweetsOfMeGraph.id -> "retweets_of_me",
+      RetweetSourceGraph.id -> "retweet_source",
+      TweetsRetweetedGraph.id -> "tweets_retweeted",
+      UserTimelineGraph.id -> "user_timeline",
+      CreatorSubscriptionTimelineGraph.id -> "creator_subscription_timeline",
+      CreatorSubscriptionMediaTimelineGraph.id -> "creator_subscription_image_timeline",
+    )
+
+  /**
+   * On edge deletion, edges are either archived permanently or retained for 3 months, based on
+   * the retention policy in the above confluence page.
+   *
+   * These two retention policies correspond to the two deletion techniques: archive and remove.
+   * We call removeEdges for edges with a short retention policy and archiveEdges for edges with
+   * a permanent retention policy.
+   */
+  val graphsWithRemovedEdges: Seq[Int] =
+    Seq(
+      CardTweetsGraph.id,
+      CuratedTimelineGraph.id,
+      CuratedTweetsGraph.id,
+      DirectedAtUserIdGraph.id,
+      MediaTimelineGraph.id,
+      MutedConversationsGraph.id,
+      QuotersGraph.id,
+      QuotesGraph.id,
+      QuoteTweetsIndexGraph.id,
+      ReportedTweetsGraph.id,
+      RetweetsOfMeGraph.id,
+      RetweetSourceGraph.id,
+      SoftLikesGraph.id,
+      TweetsRetweetedGraph.id,
+      CreatorSubscriptionTimelineGraph.id,
+      CreatorSubscriptionMediaTimelineGraph.id,
+    )
+
+  /**
+   * These edges should be left in place when bounced tweets are deleted.
+   * These edges are removed during hard deletion.
+   *
+   * This is done so external teams (timelines) can execute on these edges for
+   * tombstone feature.
+   */
+  val bounceDeleteGraphIds: Set[Int] =
+    Set(
+      UserTimelineGraph.id,
+      ConversationGraph.id
+    )
+
+  def makeCounters(stats: StatsReceiver, operation: String): Map[Int, Counter] = {
+    TFlockIndexer.graphNames
+      .mapValues(stats.scope(_).counter(operation))
+      .withDefaultValue(stats.scope("unknown").counter(operation))
+  }
+}
+
+/**
+ * @param backgroundIndexingPriority specifies the queue to use for
+ *   background indexing operations. This is useful for making the
+ *   effects of background indexing operations (such as deleting edges
+ *   for deleted Tweets) available sooner in testing scenarios
+ *   (end-to-end tests or development instances). It is set to
+ *   Priority.Low in production to reduce the load on high priority
+ *   queues that we use for prominently user-visible operations.
+ */
+class TFlockIndexer(
+  tflock: TFlockClient,
+  hasMedia: Tweet => Boolean,
+  backgroundIndexingPriority: Priority,
+  stats: StatsReceiver)
+    extends TweetIndexer {
+  private[this] val FutureNil = Future.Nil
+
+  private[this] val archiveCounters = TFlockIndexer.makeCounters(stats, "archive")
+  private[this] val removeCounters = TFlockIndexer.makeCounters(stats, "remove")
+  private[this] val insertCounters = TFlockIndexer.makeCounters(stats, "insert")
+  private[this] val negateCounters = TFlockIndexer.makeCounters(stats, "negate")
+
+  private[this] val foregroundIndexingPriority: Priority = Priority.High
+
+  override def createIndex(tweet: Tweet): Future[Unit] =
+    createEdges(tweet, isUndelete = false)
+
+  override def undeleteIndex(tweet: Tweet): Future[Unit] =
+    createEdges(tweet, isUndelete = true)
+
+  private[this] case class PartitionedEdges(
+    longRetention: Seq[ExecuteEdge[StatusGraph]] = Nil,
+    shortRetention: Seq[ExecuteEdge[StatusGraph]] = Nil,
+    negate: Seq[ExecuteEdge[StatusGraph]] = Nil,
+    ignore: Seq[ExecuteEdge[StatusGraph]] = Nil)
+
+  private[this] def partitionEdgesForDelete(
+    edges: Seq[ExecuteEdge[StatusGraph]],
+    isBounceDelete: Boolean
+  ) =
+    edges.foldLeft(PartitionedEdges()) {
+      // Two dependees of UserTimelineGraph edge states to satisfy: timelines & safety tools.
+      // Timelines show bounce-deleted tweets as tombstones; regular deletes are not shown.
+      //   - i.e. timelineIds = UserTimelineGraph(Normal || Negative)
+      // Safety tools show deleted tweets to authorized internal review agents
+      //   - i.e. deletedIds = UserTimelineGraph(Removed || Negative)
+      case (partitionedEdges, edge) if isBounceDelete && edge.graphId == UserTimelineGraph.id =>
+        partitionedEdges.copy(negate = edge +: partitionedEdges.negate)
+
+      case (partitionedEdges, edge) if isBounceDelete && edge.graphId == ConversationGraph.id =>
+        // Bounce-deleted tweets remain rendered as tombstones in conversations, so do not modify
+        // the ConversationGraph edge state
+        partitionedEdges.copy(ignore = edge +: partitionedEdges.ignore)
+
+      case (partitionedEdges, edge)
+          if TFlockIndexer.graphsWithRemovedEdges.contains(edge.graphId) =>
+        partitionedEdges.copy(shortRetention = edge +: partitionedEdges.shortRetention)
+
+      case (partitionedEdges, edge) =>
+        partitionedEdges.copy(longRetention = edge +: partitionedEdges.longRetention)
+    }
+
+  override def deleteIndex(tweet: Tweet, isBounceDelete: Boolean): Future[Unit] =
+    for {
+      edges <- getEdges(tweet, isCreate = false, isDelete = true, isUndelete = false)
+      partitionedEdges = partitionEdgesForDelete(edges, isBounceDelete)
+      () <-
+        Future
+          .join(
+            tflock
+              .archiveEdges(partitionedEdges.longRetention, backgroundIndexingPriority)
+              .onSuccess(_ =>
+                partitionedEdges.longRetention.foreach(e => archiveCounters(e.graphId).incr())),
+            tflock
+              .removeEdges(partitionedEdges.shortRetention, backgroundIndexingPriority)
+              .onSuccess(_ =>
+                partitionedEdges.shortRetention.foreach(e => removeCounters(e.graphId).incr())),
+            tflock
+              .negateEdges(partitionedEdges.negate, backgroundIndexingPriority)
+              .onSuccess(_ =>
+                partitionedEdges.negate.foreach(e => negateCounters(e.graphId).incr()))
+          )
+          .unit
+    } yield ()
+
+  /**
+   * This operation is called when a user is put into or taken out of
+   * a state in which their retweets should no longer be visible
+   * (e.g. suspended or ROPO).
+   */
+  override def setRetweetVisibility(retweetId: TweetId, setVisible: Boolean): Future[Unit] = {
+    val retweetEdge = Seq(ExecuteEdge(retweetId, RetweetsGraph, None, Reverse))
+
+    if (setVisible) {
+      tflock
+        .insertEdges(retweetEdge, backgroundIndexingPriority)
+        .onSuccess(_ => insertCounters(RetweetsGraph.id).incr())
+    } else {
+      tflock
+        .archiveEdges(retweetEdge, backgroundIndexingPriority)
+        .onSuccess(_ => archiveCounters(RetweetsGraph.id).incr())
+    }
+  }
+
+  private[this] def createEdges(tweet: Tweet, isUndelete: Boolean): Future[Unit] =
+    for {
+      edges <- getEdges(tweet = tweet, isCreate = true, isDelete = false, isUndelete = isUndelete)
+      () <- tflock.insertEdges(edges, foregroundIndexingPriority)
+    } yield {
+      // Count all the edges we've successfully added:
+      edges.foreach(e => insertCounters(e.graphId).incr())
+    }
+
+  private[this] def addRTEdges(
+    tweet: Tweet,
+    share: Share,
+    isCreate: Boolean,
+    edges: ListBuffer[ExecuteEdge[StatusGraph]],
+    futureEdges: ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]]
+  ): Unit = {
+
+    edges += RetweetsOfMeGraph.edge(share.sourceUserId, tweet.id)
+    edges += RetweetsByMeGraph.edge(getUserId(tweet), tweet.id)
+    edges += RetweetsGraph.edge(share.sourceStatusId, tweet.id)
+
+    if (isCreate) {
+      edges += ExecuteEdge(
+        sourceId = getUserId(tweet),
+        graph = RetweetSourceGraph,
+        destinationIds = Some(Seq(share.sourceStatusId)),
+        direction = Forward,
+        position = Some(SnowflakeId(tweet.id).time.inMillis)
+      )
+      edges.append(TweetsRetweetedGraph.edge(share.sourceUserId, share.sourceStatusId))
+    } else {
+      edges += RetweetSourceGraph.edge(getUserId(tweet), share.sourceStatusId)
+
+      // if this is the last retweet we need to remove it from the source user's
+      // tweets retweeted graph
+      futureEdges.append(
+        tflock.count(RetweetsGraph.from(share.sourceStatusId)).flatMap { count =>
+          if (count <= 1) {
+            tflock.selectAll(RetweetsGraph.from(share.sourceStatusId)).map { tweets =>
+              if (tweets.size <= 1)
+                Seq(TweetsRetweetedGraph.edge(share.sourceUserId, share.sourceStatusId))
+              else
+                Nil
+            }
+          } else {
+            FutureNil
+          }
+        }
+      )
+    }
+  }
+
+  private[this] def addReplyEdges(
+    tweet: Tweet,
+    edges: ListBuffer[ExecuteEdge[StatusGraph]]
+  ): Unit = {
+    getReply(tweet).foreach { reply =>
+      reply.inReplyToStatusId.flatMap { inReplyToStatusId =>
+        edges += RepliesToTweetsGraph.edge(inReplyToStatusId, tweet.id)
+
+        // only index conversationId if this is a reply to another tweet
+        TweetLenses.conversationId.get(tweet).map { conversationId =>
+          edges += ConversationGraph.edge(conversationId, tweet.id)
+        }
+      }
+    }
+  }
+
+  private[this] def addDirectedAtEdges(
+    tweet: Tweet,
+    edges: ListBuffer[ExecuteEdge[StatusGraph]]
+  ): Unit = {
+    TweetLenses.directedAtUser.get(tweet).foreach { directedAtUser =>
+      edges += DirectedAtUserIdGraph.edge(directedAtUser.userId, tweet.id)
+    }
+  }
+
+  private[this] def addMentionEdges(
+    tweet: Tweet,
+    edges: ListBuffer[ExecuteEdge[StatusGraph]]
+  ): Unit = {
+    getMentions(tweet)
+      .flatMap(_.userId).foreach { mention =>
+        edges += MentionsGraph.edge(mention, tweet.id)
+      }
+  }
+
+  private[this] def addQTEdges(
+    tweet: Tweet,
+    edges: ListBuffer[ExecuteEdge[StatusGraph]],
+    futureEdges: ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]],
+    isCreate: Boolean
+  ): Unit = {
+    val userId = getUserId(tweet)
+
+    tweet.quotedTweet.foreach { quotedTweet =>
+      // Regardless of tweet creates/deletes, we add the corresponding edges to the
+      // following two graphs. Note that we're handling the case for
+      // the QuotersGraph slightly differently in the tweet delete case.
+      edges.append(QuotesGraph.edge(quotedTweet.userId, tweet.id))
+      edges.append(QuoteTweetsIndexGraph.edge(quotedTweet.tweetId, tweet.id))
+      if (isCreate) {
+        // As mentioned above, for tweet creates we go ahead and add an edge
+        // to the QuotersGraph without any additional checks.
+        edges.append(QuotersGraph.edge(quotedTweet.tweetId, userId))
+      } else {
+        // For tweet deletes, we only add an edge to be deleted from the
+        // QuotersGraph if the tweeting user isn't quoting the tweet anymore
+        // i.e. if a user has quoted a tweet multiple times, we only delete
+        // an edge from the QuotersGraph if they've deleted all the quotes,
+        // otherwise an edge should exist by definition of what the QuotersGraph
+        // represents.
+
+        // Note: There can be a potential edge case here due to a race condition
+        // in the following scenario.
+        // i)   A quotes a tweet T twice resulting in tweets T1 and T2.
+        // ii)  There should exist edges in the QuotersGraph from T -> A and T1 <-> T, T2 <-> T in
+        //      the QuoteTweetsIndexGraph, but one of the edges haven't been written
+        //      to the QuoteTweetsIndex graph in TFlock yet.
+        // iii) In this scenario, we shouldn't really be deleting an edge as we're doing below.
+        // The approach that we're taking below is a "best effort" approach similar to what we
+        // currently do for RTs.
+
+        // Find all the quotes of the quoted tweet from the quoting user
+        val quotesFromQuotingUser = QuoteTweetsIndexGraph
+          .from(quotedTweet.tweetId)
+          .intersect(UserTimelineGraph.from(userId))
+        futureEdges.append(
+          tflock
+            .count(quotesFromQuotingUser).flatMap { count =>
+              // If this is the last quote of the quoted tweet from the quoting user,
+              // we go ahead and delete the edge from the QuotersGraph.
+              if (count <= 1) {
+                tflock.selectAll(quotesFromQuotingUser).map { tweets =>
+                  if (tweets.size <= 1) {
+                    Seq(QuotersGraph.edge(quotedTweet.tweetId, userId))
+                  } else {
+                    Nil
+                  }
+                }
+              } else {
+                FutureNil
+              }
+            }
+        )
+      }
+    }
+  }
+
+  private[this] def addCardEdges(
+    tweet: Tweet,
+    edges: ListBuffer[ExecuteEdge[StatusGraph]]
+  ): Unit = {
+    // Note that we are indexing only the TOO "stored" cards
+    // (cardUri=card://<cardId>). Rest of the cards are ignored here.
+    tweet.cardReference
+      .collect {
+        case StoredCard(id) =>
+          edges.append(CardTweetsGraph.edge(id, tweet.id))
+      }.getOrElse(())
+  }
+
+  // Note: on undelete, this method restores all archived edges, including those that may have
+  // been archived prior to the delete. This is incorrect behavior but in practice rarely
+  // causes problems, as undeletes are so rare.
+  private[this] def addEdgesForDeleteOrUndelete(
+    tweet: Tweet,
+    edges: ListBuffer[ExecuteEdge[StatusGraph]]
+  ): Unit = {
+    edges.appendAll(
+      Seq(
+        MentionsGraph.edges(tweet.id, None, Reverse),
+        RepliesToTweetsGraph.edges(tweet.id, None)
+      )
+    )
+
+    // When we delete or undelete a conversation control root Tweet we want to archive or restore
+    // all the edges in InvitedUsersGraph from the Tweet id.
+    if (hasConversationControl(tweet) && isConversationRoot(tweet)) {
+      edges.append(InvitedUsersGraph.edges(tweet.id, None))
+    }
+  }
+
+  private[this] def addSimpleEdges(
+    tweet: Tweet,
+    edges: ListBuffer[ExecuteEdge[StatusGraph]]
+  ): Unit = {
+    if (TweetLenses.nullcast.get(tweet)) {
+      edges.append(NullcastedTweetsGraph.edge(getUserId(tweet), tweet.id))
+    } else if (TweetLenses.narrowcast.get(tweet).isDefined) {
+      edges.append(NarrowcastSentTweetsGraph.edge(getUserId(tweet), tweet.id))
+    } else {
+      edges.append(UserTimelineGraph.edge(getUserId(tweet), tweet.id))
+
+      if (hasMedia(tweet))
+        edges.append(MediaTimelineGraph.edge(getUserId(tweet), tweet.id))
+
+      // Index root creator subscription tweets.
+      // Ignore replies because those are not necessarily visible to a user who subscribes to tweet author
+      val isRootTweet: Boolean = tweet.coreData match {
+        case Some(c) => c.reply.isEmpty && c.share.isEmpty
+        case None => true
+      }
+
+      if (tweet.exclusiveTweetControl.isDefined && isRootTweet) {
+        edges.append(CreatorSubscriptionTimelineGraph.edge(getUserId(tweet), tweet.id))
+
+        if (hasMedia(tweet))
+          edges.append(CreatorSubscriptionMediaTimelineGraph.edge(getUserId(tweet), tweet.id))
+      }
+    }
+  }
+
+  /**
+   * Issues edges for each mention of user in a conversation-controlled tweet. This way InvitedUsers
+   * graph accumulates complete set of ids for @mention-invited users, by conversation id.
+   */
+  private def invitedUsersEdgesForCreate(
+    tweet: Tweet,
+    edges: ListBuffer[ExecuteEdge[StatusGraph]]
+  ): Unit = {
+    val conversationId: Long = getConversationId(tweet).getOrElse(tweet.id)
+    val mentions: Seq[UserId] = getMentions(tweet).flatMap(_.userId)
+    edges.appendAll(mentions.map(userId => InvitedUsersGraph.edge(conversationId, userId)))
+  }
+
+  /**
+   * Issues edges of InviteUsersGraph that ought to be deleted for a conversation controlled reply.
+   * These are mentions of users in the given tweet, only if the user was not mentioned elsewhere
+   * in the conversation. This way for a conversation, InvitedUsersGraph would always hold a set
+   * of all users invited to the conversation, and an edge is removed only after the last mention of
+   * a user is deleted.
+   */
+  private def invitedUsersEdgesForDelete(
+    tweet: Tweet,
+    futureEdges: ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]]
+  ): Unit = {
+    getConversationId(tweet).foreach { conversationId: Long =>
+      val mentions: Seq[UserId] = getMentions(tweet).flatMap(_.userId)
+      mentions.foreach { userId =>
+        val tweetIdsWithinConversation = ConversationGraph.from(conversationId)
+        val tweetIdsThatMentionUser = MentionsGraph.from(userId)
+        futureEdges.append(
+          tflock
+            .selectAll(
+              query = tweetIdsThatMentionUser.intersect(tweetIdsWithinConversation),
+              limit = Some(2), // Just need to know if it is >1 or <=1, so 2 are enough.
+              pageSize = None // Provide default, otherwise Mockito complains
+            ).map { tweetIds: Seq[Long] =>
+              if (tweetIds.size <= 1) {
+                Seq(InvitedUsersGraph.edge(conversationId, userId))
+              } else {
+                Nil
+              }
+            }
+        )
+      }
+    }
+  }
+
+  private def hasInviteViaMention(tweet: Tweet): Boolean = {
+    tweet.conversationControl match {
+      case Some(ConversationControl.ByInvitation(controls)) =>
+        controls.inviteViaMention.getOrElse(false)
+      case Some(ConversationControl.Community(controls)) =>
+        controls.inviteViaMention.getOrElse(false)
+      case Some(ConversationControl.Followers(followers)) =>
+        followers.inviteViaMention.getOrElse(false)
+      case _ =>
+        false
+    }
+  }
+
+  private def hasConversationControl(tweet: Tweet): Boolean =
+    tweet.conversationControl.isDefined
+
+  // If a Tweet has a ConversationControl, it must have a ConversationId associated with it so we
+  // can compare the ConversationId with the current Tweet ID to determine if it's the root of the
+  // conversation. See ConversationIdHydrator for more details
+  private def isConversationRoot(tweet: Tweet): Boolean =
+    getConversationId(tweet).get == tweet.id
+
+  private def addInvitedUsersEdges(
+    tweet: Tweet,
+    isCreate: Boolean,
+    isUndelete: Boolean,
+    edges: ListBuffer[ExecuteEdge[StatusGraph]],
+    futureEdges: ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]]
+  ): Unit = {
+    if (hasConversationControl(tweet)) {
+      if (isCreate) {
+        if (isConversationRoot(tweet) && !isUndelete) {
+          // For root Tweets, only add edges for original creates, not for undeletes.
+          // Undeletes are handled by addEdgesForDeleteOrUndelete.
+          invitedUsersEdgesForCreate(tweet, edges)
+        }
+        if (!isConversationRoot(tweet) && hasInviteViaMention(tweet)) {
+          // For replies, only add edges when the conversation control is in inviteViaMention mode.
+          invitedUsersEdgesForCreate(tweet, edges)
+        }
+      } else {
+        if (!isConversationRoot(tweet)) {
+          invitedUsersEdgesForDelete(tweet, futureEdges)
+        }
+      }
+    }
+  }
+
+  private[this] def getEdges(
+    tweet: Tweet,
+    isCreate: Boolean,
+    isDelete: Boolean,
+    isUndelete: Boolean
+  ): Future[Seq[ExecuteEdge[StatusGraph]]] = {
+    val edges = ListBuffer[ExecuteEdge[StatusGraph]]()
+    val futureEdges = ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]]()
+
+    addSimpleEdges(tweet, edges)
+    getShare(tweet) match {
+      case Some(share) => addRTEdges(tweet, share, isCreate, edges, futureEdges)
+      case _ =>
+        addInvitedUsersEdges(tweet, isCreate, isUndelete, edges, futureEdges)
+        addReplyEdges(tweet, edges)
+        addDirectedAtEdges(tweet, edges)
+        addMentionEdges(tweet, edges)
+        addQTEdges(tweet, edges, futureEdges, isCreate)
+        addCardEdges(tweet, edges)
+        if (isDelete || isUndelete) {
+          addEdgesForDeleteOrUndelete(tweet, edges)
+        }
+    }
+
+    Future
+      .collect(futureEdges)
+      .map { moreEdges => (edges ++= moreEdges.flatten).toList }
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TweetIndexer.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TweetIndexer.scala
@ -0,0 +1,30 @@
+/** Copyright 2010 Twitter, Inc. */
+package com.twitter.tweetypie
+package tflock
+
+import com.twitter.tweetypie.thriftscala.Tweet
+import com.twitter.util.Future
+
+trait TweetIndexer {
+
+  /**
+   * Called at tweet-creation time, this method should set up all relevant indices on the tweet.
+   */
+  def createIndex(tweet: Tweet): Future[Unit] = Future.Unit
+
+  /**
+   * Called at tweet-undelete time (which isn't yet handled), this method should
+   * restore all relevant indices on the tweet.
+   */
+  def undeleteIndex(tweet: Tweet): Future[Unit] = Future.Unit
+
+  /**
+   * Called at tweet-delete time, this method should archive all relevant indices on the tweet.
+   */
+  def deleteIndex(tweet: Tweet, isBounceDelete: Boolean): Future[Unit] = Future.Unit
+
+  /**
+   * This method should archive or unarchive the retweet edge in TFlock RetweetsGraph.
+   */
+  def setRetweetVisibility(retweetId: TweetId, visible: Boolean): Future[Unit] = Future.Unit
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/BUILD
@ -0,0 +1,13 @@
+scala_library(
+    sources = ["*.scala"],
+    compiler_option_sets = ["fatal_warnings"],
+    platform = "java8",
+    strict_deps = True,
+    tags = ["bazel-compatible"],
+    dependencies = [
+        "finagle/finagle-core/src/main",
+        "scrooge/scrooge-core/src/main/scala",
+        "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala",
+        "util/util-core:scala",
+    ],
+)
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/NotImplementedTweetService.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/NotImplementedTweetService.scala
@ -0,0 +1,8 @@
+package com.twitter.tweetypie.thriftscala
+
+import com.twitter.finagle.service.FailedService
+
+class NotImplementedTweetService
+    extends TweetService$FinagleClient(
+      new FailedService(new UnsupportedOperationException("not implemented"))
+    )
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/TweetServiceProxy.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/TweetServiceProxy.scala
@ -0,0 +1,79 @@
+package com.twitter.tweetypie.thriftscala
+
+import com.twitter.util.Future
+
+/**
+ * A trait for TweetService implementations that wrap an underlying
+ * TweetService and need to modify only some of the methods.
+ */
+trait TweetServiceProxy extends TweetService.MethodPerEndpoint {
+  protected def underlying: TweetService.MethodPerEndpoint
+
+  /**
+   * Default implementation simply passes through the Future but logic can be added to wrap each
+   * invocation to the underlying TweetService
+   */
+  protected def wrap[A](f: => Future[A]): Future[A] =
+    f
+
+  override def getTweets(request: GetTweetsRequest): Future[Seq[GetTweetResult]] =
+    wrap(underlying.getTweets(request))
+
+  override def getTweetFields(request: GetTweetFieldsRequest): Future[Seq[GetTweetFieldsResult]] =
+    wrap(underlying.getTweetFields(request))
+
+  override def getTweetCounts(request: GetTweetCountsRequest): Future[Seq[GetTweetCountsResult]] =
+    wrap(underlying.getTweetCounts(request))
+
+  override def setAdditionalFields(request: SetAdditionalFieldsRequest): Future[Unit] =
+    wrap(underlying.setAdditionalFields(request))
+
+  override def deleteAdditionalFields(request: DeleteAdditionalFieldsRequest): Future[Unit] =
+    wrap(underlying.deleteAdditionalFields(request))
+
+  override def postTweet(request: PostTweetRequest): Future[PostTweetResult] =
+    wrap(underlying.postTweet(request))
+
+  override def postRetweet(request: RetweetRequest): Future[PostTweetResult] =
+    wrap(underlying.postRetweet(request))
+
+  override def unretweet(request: UnretweetRequest): Future[UnretweetResult] =
+    wrap(underlying.unretweet(request))
+
+  override def getDeletedTweets(
+    request: GetDeletedTweetsRequest
+  ): Future[Seq[GetDeletedTweetResult]] =
+    wrap(underlying.getDeletedTweets(request))
+
+  override def deleteTweets(request: DeleteTweetsRequest): Future[Seq[DeleteTweetResult]] =
+    wrap(underlying.deleteTweets(request))
+
+  override def updatePossiblySensitiveTweet(
+    request: UpdatePossiblySensitiveTweetRequest
+  ): Future[Unit] =
+    wrap(underlying.updatePossiblySensitiveTweet(request))
+
+  override def undeleteTweet(request: UndeleteTweetRequest): Future[UndeleteTweetResponse] =
+    wrap(underlying.undeleteTweet(request))
+
+  override def eraseUserTweets(request: EraseUserTweetsRequest): Future[Unit] =
+    wrap(underlying.eraseUserTweets(request))
+
+  override def incrTweetFavCount(request: IncrTweetFavCountRequest): Future[Unit] =
+    wrap(underlying.incrTweetFavCount(request))
+
+  override def deleteLocationData(request: DeleteLocationDataRequest): Future[Unit] =
+    wrap(underlying.deleteLocationData(request))
+
+  override def scrubGeo(request: GeoScrub): Future[Unit] =
+    wrap(underlying.scrubGeo(request))
+
+  override def takedown(request: TakedownRequest): Future[Unit] =
+    wrap(underlying.takedown(request))
+
+  override def flush(request: FlushRequest): Future[Unit] =
+    wrap(underlying.flush(request))
+
+  override def incrTweetBookmarkCount(request: IncrTweetBookmarkCountRequest): Future[Unit] =
+    wrap(underlying.incrTweetBookmarkCount(request))
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/BUILD
@ -0,0 +1,15 @@
+scala_library(
+    sources = ["*.scala"],
+    compiler_option_sets = ["fatal_warnings"],
+    strict_deps = True,
+    tags = ["bazel-compatible"],
+    dependencies = [
+        "tweetypie/servo/util",
+        "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala",
+        "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala",
+        "tco-util",
+        "tweetypie/common/src/scala/com/twitter/tweetypie/tweettext",
+        "tweetypie/common/src/scala/com/twitter/tweetypie/util",
+        "twitter-text/lib/java/src/main/java/com/twitter/twittertext",
+    ],
+)
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/CashtagTextEntity.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/CashtagTextEntity.scala
@ -0,0 +1,11 @@
+package com.twitter.tweetypie.thriftscala.entities
+
+import com.twitter.tweetypie.thriftscala.CashtagEntity
+import com.twitter.tweetypie.tweettext.TextEntity
+
+object CashtagTextEntity extends TextEntity[CashtagEntity] {
+  override def fromIndex(entity: CashtagEntity): Short = entity.fromIndex
+  override def toIndex(entity: CashtagEntity): Short = entity.toIndex
+  override def move(entity: CashtagEntity, fromIndex: Short, toIndex: Short): CashtagEntity =
+    entity.copy(fromIndex = fromIndex, toIndex = toIndex)
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/EntityExtractor.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/EntityExtractor.scala
@ -0,0 +1,118 @@
+package com.twitter.tweetypie.thriftscala.entities
+
+import com.twitter.servo.data.Mutation
+import com.twitter.tco_util.TcoUrl
+import com.twitter.tweetypie.thriftscala._
+import com.twitter.tweetypie.thriftscala.entities.Implicits._
+import com.twitter.tweetypie.tweettext.PartialHtmlEncoding
+import com.twitter.tweetypie.tweettext.TextEntity
+import com.twitter.tweetypie.tweettext.TextModification
+import com.twitter.tweetypie.util.TweetLenses
+import com.twitter.twittertext.Extractor
+import scala.collection.JavaConverters._
+
+/**
+ * Contains functions to collect urls, mentions, hashtags, and cashtags from the text of tweets and messages
+ */
+object EntityExtractor {
+  // We only use one configuration of com.twitter.twittertext.Extractor, so it's
+  // OK to share one global reference. The only available
+  // configuration option is whether to extract URLs without protocols
+  // (defaults to true)
+  private[this] val extractor = new Extractor
+
+  // The twitter-text library operates on unencoded text, but we store
+  // and process HTML-encoded text. The TextModification returned
+  // from this function contains the decoded text which we will operate on,
+  // but also provides us with the ability to map the indices on
+  // the twitter-text entities back to the entities on the encoded text.
+  private val htmlEncodedTextToEncodeModification: String => TextModification =
+    text =>
+      PartialHtmlEncoding
+        .decodeWithModification(text)
+        .getOrElse(TextModification.identity(text))
+        .inverse
+
+  private[this] val extractAllUrlsFromTextMod: TextModification => Seq[UrlEntity] =
+    extractUrls(false)
+
+  val extractAllUrls: String => Seq[UrlEntity] =
+    htmlEncodedTextToEncodeModification.andThen(extractAllUrlsFromTextMod)
+
+  private[this] val extractTcoUrls: TextModification => Seq[UrlEntity] =
+    extractUrls(true)
+
+  private[this] def extractUrls(tcoOnly: Boolean): TextModification => Seq[UrlEntity] =
+    mkEntityExtractor[UrlEntity](
+      extractor.extractURLsWithIndices(_).asScala.filter { e =>
+        if (tcoOnly) TcoUrl.isTcoUrl(e.getValue) else true
+      },
+      UrlEntity(_, _, _)
+    )
+
+  private[this] val extractMentionsFromTextMod: TextModification => Seq[MentionEntity] =
+    mkEntityExtractor[MentionEntity](
+      extractor.extractMentionedScreennamesWithIndices(_).asScala,
+      MentionEntity(_, _, _)
+    )
+
+  val extractMentions: String => Seq[MentionEntity] =
+    htmlEncodedTextToEncodeModification.andThen(extractMentionsFromTextMod)
+
+  private[this] val extractHashtagsFromTextMod: TextModification => Seq[HashtagEntity] =
+    mkEntityExtractor[HashtagEntity](
+      extractor.extractHashtagsWithIndices(_).asScala,
+      HashtagEntity(_, _, _)
+    )
+
+  val extractHashtags: String => Seq[HashtagEntity] =
+    htmlEncodedTextToEncodeModification.andThen(extractHashtagsFromTextMod)
+
+  private[this] val extractCashtagsFromTextMod: TextModification => Seq[CashtagEntity] =
+    mkEntityExtractor[CashtagEntity](
+      extractor.extractCashtagsWithIndices(_).asScala,
+      CashtagEntity(_, _, _)
+    )
+
+  val extractCashtags: String => Seq[CashtagEntity] =
+    htmlEncodedTextToEncodeModification.andThen(extractCashtagsFromTextMod)
+
+  private[this] def mkEntityExtractor[E: TextEntity](
+    extract: String => Seq[Extractor.Entity],
+    construct: (Short, Short, String) => E
+  ): TextModification => Seq[E] =
+    htmlEncodedMod => {
+      val convert: Extractor.Entity => Option[E] =
+        e =>
+          for {
+            start <- asShort(e.getStart.intValue)
+            end <- asShort(e.getEnd.intValue)
+            if e.getValue != null
+            res <- htmlEncodedMod.reindexEntity(construct(start, end, e.getValue))
+          } yield res
+
+      val entities = extract(htmlEncodedMod.original)
+      extractor.modifyIndicesFromUTF16ToUnicode(htmlEncodedMod.original, entities.asJava)
+      entities.map(convert).flatten
+    }
+
+  private[this] def asShort(i: Int): Option[Short] =
+    if (i.isValidShort) Some(i.toShort) else None
+
+  private[this] def mutation(extractUrls: Boolean): Mutation[Tweet] =
+    Mutation { tweet =>
+      val htmlEncodedMod = htmlEncodedTextToEncodeModification(TweetLenses.text.get(tweet))
+
+      Some(
+        tweet.copy(
+          urls = if (extractUrls) Some(extractTcoUrls(htmlEncodedMod)) else tweet.urls,
+          mentions = Some(extractMentionsFromTextMod(htmlEncodedMod)),
+          hashtags = Some(extractHashtagsFromTextMod(htmlEncodedMod)),
+          cashtags = Some(extractCashtagsFromTextMod(htmlEncodedMod))
+        )
+      )
+    }
+
+  val mutationWithoutUrls: Mutation[Tweet] = mutation(false)
+  val mutationAll: Mutation[Tweet] = mutation(true)
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/HashtagTextEntity.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/HashtagTextEntity.scala
@ -0,0 +1,11 @@
+package com.twitter.tweetypie.thriftscala.entities
+
+import com.twitter.tweetypie.thriftscala.HashtagEntity
+import com.twitter.tweetypie.tweettext.TextEntity
+
+object HashtagTextEntity extends TextEntity[HashtagEntity] {
+  override def fromIndex(entity: HashtagEntity): Short = entity.fromIndex
+  override def toIndex(entity: HashtagEntity): Short = entity.toIndex
+  override def move(entity: HashtagEntity, fromIndex: Short, toIndex: Short): HashtagEntity =
+    entity.copy(fromIndex = fromIndex, toIndex = toIndex)
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/Implicits.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/Implicits.scala
@ -0,0 +1,10 @@
+package com.twitter.tweetypie.thriftscala.entities
+
+object Implicits {
+  implicit val hashtagTextEntity: HashtagTextEntity.type = HashtagTextEntity
+  implicit val cashtagTextEntity: CashtagTextEntity.type = CashtagTextEntity
+  implicit val mentionTextEntity: MentionTextEntity.type = MentionTextEntity
+  implicit val urlTextEntity: UrlTextEntity.type = UrlTextEntity
+  implicit val mediaTextEntity: MediaTextEntity.type = MediaTextEntity
+  implicit val textRangeTextEntity: TextRangeEntityAdapter.type = TextRangeEntityAdapter
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MediaTextEntity.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MediaTextEntity.scala
@ -0,0 +1,11 @@
+package com.twitter.tweetypie.thriftscala.entities
+
+import com.twitter.tweetypie.thriftscala.MediaEntity
+import com.twitter.tweetypie.tweettext.TextEntity
+
+object MediaTextEntity extends TextEntity[MediaEntity] {
+  override def fromIndex(entity: MediaEntity): Short = entity.fromIndex
+  override def toIndex(entity: MediaEntity): Short = entity.toIndex
+  override def move(entity: MediaEntity, fromIndex: Short, toIndex: Short): MediaEntity =
+    entity.copy(fromIndex = fromIndex, toIndex = toIndex)
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MentionTextEntity.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MentionTextEntity.scala
@ -0,0 +1,11 @@
+package com.twitter.tweetypie.thriftscala.entities
+
+import com.twitter.tweetypie.thriftscala.MentionEntity
+import com.twitter.tweetypie.tweettext.TextEntity
+
+object MentionTextEntity extends TextEntity[MentionEntity] {
+  override def fromIndex(entity: MentionEntity): Short = entity.fromIndex
+  override def toIndex(entity: MentionEntity): Short = entity.toIndex
+  override def move(entity: MentionEntity, fromIndex: Short, toIndex: Short): MentionEntity =
+    entity.copy(fromIndex = fromIndex, toIndex = toIndex)
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/TextRangeEntityAdapter.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/TextRangeEntityAdapter.scala
@ -0,0 +1,11 @@
+package com.twitter.tweetypie.thriftscala.entities
+
+import com.twitter.tweetypie.thriftscala.TextRange
+import com.twitter.tweetypie.tweettext.TextEntity
+
+object TextRangeEntityAdapter extends TextEntity[TextRange] {
+  override def fromIndex(entity: TextRange): Short = entity.fromIndex.toShort
+  override def toIndex(entity: TextRange): Short = entity.toIndex.toShort
+  override def move(entity: TextRange, fromIndex: Short, toIndex: Short): TextRange =
+    entity.copy(fromIndex = fromIndex, toIndex = toIndex)
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/UrlTextEntity.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/UrlTextEntity.scala
@ -0,0 +1,11 @@
+package com.twitter.tweetypie.thriftscala.entities
+
+import com.twitter.tweetypie.thriftscala.UrlEntity
+import com.twitter.tweetypie.tweettext.TextEntity
+
+object UrlTextEntity extends TextEntity[UrlEntity] {
+  override def fromIndex(entity: UrlEntity): Short = entity.fromIndex
+  override def toIndex(entity: UrlEntity): Short = entity.toIndex
+  override def move(entity: UrlEntity, fromIndex: Short, toIndex: Short): UrlEntity =
+    entity.copy(fromIndex = fromIndex, toIndex = toIndex)
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/BUILD
@ -0,0 +1,16 @@
+scala_library(
+    sources = ["*.scala"],
+    compiler_option_sets = ["fatal_warnings"],
+    platform = "java8",
+    provides = scala_artifact(
+        org = "com.twitter",
+        name = "tweetypie-tweettext",
+        repo = artifactory,
+    ),
+    strict_deps = True,
+    tags = ["bazel-compatible"],
+    dependencies = [
+        "3rdparty/jvm/com/ibm/icu:icu4j",
+        "twitter-text/lib/java/src/main/java/com/twitter/twittertext",
+    ],
+)
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/GraphemeIndexIterator.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/GraphemeIndexIterator.scala
@ -0,0 +1,44 @@
+package com.twitter.tweetypie.tweettext
+
+import com.ibm.icu.text.BreakIterator
+
+/**
+ * Adapt the [[BreakIterator]] interface to a scala [[Iterator]]
+ * over the offsets of user-perceived characters in a String.
+ */
+object GraphemeIndexIterator {
+
+  /**
+   * Produce an iterator over indices in the string that mark the end
+   * of a user-perceived character (grapheme)
+   */
+  def ends(s: String): Iterator[Offset.CodeUnit] =
+    // The start of every grapheme but the first is also a grapheme
+    // end. The last grapheme ends at the end of the string.
+    starts(s).drop(1) ++ Iterator(Offset.CodeUnit.length(s))
+
+  /**
+   * Produce an iterator over indices in the string that mark the start
+   * of a user-perceived character (grapheme)
+   */
+  def starts(s: String): Iterator[Offset.CodeUnit] =
+    new Iterator[Offset.CodeUnit] {
+      private[this] val it = BreakIterator.getCharacterInstance()
+
+      it.setText(s)
+
+      override def hasNext: Boolean = it.current < s.length
+
+      override def next: Offset.CodeUnit = {
+        if (!hasNext) throw new IllegalArgumentException(s"${it.current()}, ${s.length}")
+
+        // No matter what, we will be returning the value of `current`,
+        // which is the index of the start of the next grapheme.
+        val result = it.current()
+
+        it.next()
+
+        Offset.CodeUnit(result)
+      }
+    }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/IndexConverter.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/IndexConverter.scala
@ -0,0 +1,85 @@
+package com.twitter.tweetypie.tweettext
+
+/**
+ * An efficient converter of indices between code points and code units.
+ */
+class IndexConverter(text: String) {
+  // Keep track of a single corresponding pair of code unit and code point
+  // offsets so that we can re-use counting work if the next requested
+  // entity is near the most recent entity.
+  private var codePointIndex = 0
+  // The code unit index should never split a surrogate pair.
+  private var charIndex = 0
+
+  /**
+   * @param offset Index into the string measured in code units.
+   * @return The code point index that corresponds to the specified character index.
+   */
+  def toCodePoints(offset: Offset.CodeUnit): Offset.CodePoint =
+    Offset.CodePoint(codeUnitsToCodePoints(offset.toInt))
+
+  /**
+   * @param charIndex Index into the string measured in code units.
+   * @return The code point index that corresponds to the specified character index.
+   */
+  def codeUnitsToCodePoints(charIndex: Int): Int = {
+    if (charIndex < this.charIndex) {
+      this.codePointIndex -= text.codePointCount(charIndex, this.charIndex)
+    } else {
+      this.codePointIndex += text.codePointCount(this.charIndex, charIndex)
+    }
+    this.charIndex = charIndex
+
+    // Make sure that charIndex never points to the second code unit of a
+    // surrogate pair.
+    if (charIndex > 0 && Character.isSupplementaryCodePoint(text.codePointAt(charIndex - 1))) {
+      this.charIndex -= 1
+      this.codePointIndex -= 1
+    }
+
+    this.codePointIndex
+  }
+
+  /**
+   * @param offset Index into the string measured in code points.
+   * @return the corresponding code unit index
+   */
+  def toCodeUnits(offset: Offset.CodePoint): Offset.CodeUnit = {
+    this.charIndex = text.offsetByCodePoints(charIndex, offset.toInt - this.codePointIndex)
+    this.codePointIndex = offset.toInt
+    Offset.CodeUnit(this.charIndex)
+  }
+
+  /**
+   * @param codePointIndex Index into the string measured in code points.
+   * @return the corresponding code unit index
+   */
+  def codePointsToCodeUnits(codePointIndex: Int): Int =
+    toCodeUnits(Offset.CodePoint(codePointIndex)).toInt
+
+  /**
+   * Returns a substring which begins at the specified code point `from` and extends to the
+   * code point `to`. Since String.substring only works with character, the method first
+   * converts code point offset to code unit offset.
+   */
+  def substring(from: Offset.CodePoint, to: Offset.CodePoint): String =
+    text.substring(toCodeUnits(from).toInt, toCodeUnits(to).toInt)
+
+  /**
+   * Returns a substring which begins at the specified code point `from` and extends to the
+   * code point `to`. Since String.substring only works with character, the method first
+   * converts code point offset to code unit offset.
+   */
+  def substringByCodePoints(from: Int, to: Int): String =
+    substring(Offset.CodePoint(from), Offset.CodePoint(to))
+
+  /**
+   * Returns a substring which begins at the specified code point `from` and extends to the
+   * end of the string. Since String.substring only works with character, the method first
+   * converts code point offset to code unit offset.
+   */
+  def substringByCodePoints(from: Int): String = {
+    val charFrom = codePointsToCodeUnits(from)
+    text.substring(charFrom)
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Offset.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Offset.scala
@ -0,0 +1,253 @@
+package com.twitter.tweetypie.tweettext
+import scala.collection.immutable
+
+/**
+ * An Offset is a typed index into a String.
+ */
+trait Offset[T] extends Ordering[T] {
+  def toInt(t: T): Int
+  def count(text: String, start: Offset.CodeUnit, end: Offset.CodeUnit): T
+
+  def compare(t1: T, t2: T): Int = toInt(t1).compare(toInt(t2))
+  def length(input: String): T = count(input, Offset.CodeUnit(0), Offset.CodeUnit.length(input))
+}
+
+object Offset {
+
+  /**
+   * UTF-16 code unit offsets are the native offsets for Java/Scala
+   * Strings.
+   */
+  case class CodeUnit(toInt: Int) extends AnyVal with Ordered[CodeUnit] {
+    def compare(other: CodeUnit): Int = toInt.compare(other.toInt)
+    def +(other: CodeUnit) = CodeUnit(toInt + other.toInt)
+    def -(other: CodeUnit) = CodeUnit(toInt - other.toInt)
+    def min(other: CodeUnit): CodeUnit = if (toInt < other.toInt) this else other
+    def max(other: CodeUnit): CodeUnit = if (toInt > other.toInt) this else other
+    def incr: CodeUnit = CodeUnit(toInt + 1)
+    def decr: CodeUnit = CodeUnit(toInt - 1)
+    def until(end: CodeUnit): immutable.IndexedSeq[CodeUnit] =
+      toInt.until(end.toInt).map(CodeUnit(_))
+
+    /**
+     * Converts this `CodeUnit` to the equivalent `CodePoint` within the
+     * given text.
+     */
+    def toCodePoint(text: String): CodePoint =
+      CodePoint(text.codePointCount(0, toInt))
+
+    def offsetByCodePoints(text: String, codePoints: CodePoint): CodeUnit =
+      CodeUnit(text.offsetByCodePoints(toInt, codePoints.toInt))
+  }
+
+  implicit object CodeUnit extends Offset[CodeUnit] {
+    def toInt(u: CodeUnit): Int = u.toInt
+    override def length(text: String): CodeUnit = CodeUnit(text.length)
+    def count(text: String, start: CodeUnit, end: CodeUnit): CodeUnit = end - start
+  }
+
+  /**
+   * Offsets in whole Unicode code points. Any CodePoint is a valid
+   * offset into the String as long as it is >= 0 and less than the
+   * number of code points in the string.
+   */
+  case class CodePoint(toInt: Int) extends AnyVal with Ordered[CodePoint] {
+    def toShort: Short = toInt.toShort
+    def compare(other: CodePoint): Int = toInt.compare(other.toInt)
+    def +(other: CodePoint) = CodePoint(toInt + other.toInt)
+    def -(other: CodePoint) = CodePoint(toInt - other.toInt)
+    def min(other: CodePoint): CodePoint = if (toInt < other.toInt) this else other
+    def max(other: CodePoint): CodePoint = if (toInt > other.toInt) this else other
+    def until(end: CodePoint): immutable.IndexedSeq[CodePoint] =
+      toInt.until(end.toInt).map(CodePoint(_))
+
+    def toCodeUnit(text: String): CodeUnit =
+      CodeUnit(text.offsetByCodePoints(0, toInt))
+  }
+
+  implicit object CodePoint extends Offset[CodePoint] {
+    def toInt(p: CodePoint): Int = p.toInt
+
+    def count(text: String, start: CodeUnit, end: CodeUnit): CodePoint =
+      CodePoint(text.codePointCount(start.toInt, end.toInt))
+  }
+
+  /**
+   * Offsets into the String as if the String were encoded as UTF-8. You
+   * cannot use a [[Utf8]] offset to index a String, because not all
+   * Utf8 indices are valid indices into the String.
+   */
+  case class Utf8(toInt: Int) extends AnyVal with Ordered[Utf8] {
+    def compare(other: Utf8): Int = toInt.compare(other.toInt)
+    def +(other: Utf8) = Utf8(toInt + other.toInt)
+    def -(other: Utf8) = Utf8(toInt - other.toInt)
+    def min(other: Utf8): Utf8 = if (toInt < other.toInt) this else other
+    def max(other: Utf8): Utf8 = if (toInt > other.toInt) this else other
+  }
+
+  implicit object Utf8 extends Offset[Utf8] {
+    def toInt(u: Utf8): Int = u.toInt
+
+    /**
+     * Count how many bytes this section of text would be when encoded as
+     * UTF-8.
+     */
+    def count(s: String, start: CodeUnit, end: CodeUnit): Utf8 = {
+      def go(i: CodeUnit, byteLength: Utf8): Utf8 =
+        if (i < end) {
+          val cp = s.codePointAt(i.toInt)
+          go(i + CodeUnit(Character.charCount(cp)), byteLength + forCodePoint(cp))
+        } else {
+          byteLength
+        }
+
+      go(start, Utf8(0))
+    }
+
+    /**
+     * Unfortunately, there is no convenient API for finding out how many
+     * bytes a unicode code point would take in UTF-8, so we have to
+     * explicitly calculate it.
+     *
+     * @see http://en.wikipedia.org/wiki/UTF-8#Description
+     */
+    def forCodePoint(cp: Int): Utf8 =
+      Utf8 {
+        // if the code point is an unpaired surrogate, it will be converted
+        // into a 1 byte replacement character
+        if (Character.getType(cp) == Character.SURROGATE) 1
+        else {
+          cp match {
+            case _ if cp < 0x80 => 1
+            case _ if cp < 0x800 => 2
+            case _ if cp < 0x10000 => 3
+            case _ => 4
+          }
+        }
+      }
+  }
+
+  /**
+   * Display units count what we consider a "character" in a
+   * Tweet. [[DisplayUnit]] offsets are only valid for text that is
+   * NFC-normalized (See: http://www.unicode.org/reports/tr15) and
+   * HTML-encoded, though this interface cannot enforce that.
+   *
+   * Currently, a [[DisplayUnit]] is equivalent to a single Unicode code
+   * point combined with treating "&lt;", "&gt;", and "&amp;" each as a
+   * single character (since they are displayed as '<', '>', and '&'
+   * respectively). This implementation is not directly exposed.
+   *
+   * It should be possible to change this definition without breaking
+   * code that uses the [[DisplayUnit]] interface e.g. to count
+   * user-perceived characters (graphemes) rather than code points,
+   * though any change has to be made in concert with changing the
+   * mobile client and Web implementations so that the user experience
+   * of character counting remains consistent.
+   */
+  case class DisplayUnit(toInt: Int) extends AnyVal with Ordered[DisplayUnit] {
+    def compare(other: DisplayUnit): Int = toInt.compare(other.toInt)
+    def +(other: DisplayUnit) = DisplayUnit(toInt + other.toInt)
+    def -(other: DisplayUnit) = DisplayUnit(toInt - other.toInt)
+    def min(other: DisplayUnit): DisplayUnit = if (toInt < other.toInt) this else other
+    def max(other: DisplayUnit): DisplayUnit = if (toInt > other.toInt) this else other
+  }
+
+  implicit object DisplayUnit extends Offset[DisplayUnit] {
+    def toInt(d: DisplayUnit): Int = d.toInt
+
+    /**
+     * Returns the number of display units in the specified range of the
+     * given text.  See [[DisplayUnit]] for a descrption of what we
+     * consider a display unit.
+     *
+     * The input string should already be NFC normalized to get
+     * consistent results.  If partially html encoded, it will correctly
+     * count html entities as a single display unit.
+     *
+     * @param text the string containing the characters to count.
+     * @param the index to the first char of the text range
+     * @param the index after the last char of the text range.
+     */
+    def count(text: String, start: CodeUnit, end: CodeUnit): DisplayUnit = {
+      val stop = end.min(CodeUnit.length(text))
+
+      @annotation.tailrec
+      def go(offset: CodeUnit, total: DisplayUnit): DisplayUnit =
+        if (offset >= stop) total
+        else go(offset + at(text, offset), total + DisplayUnit(1))
+
+      go(start, DisplayUnit(0))
+    }
+
+    /**
+     * Return the length of the display unit at the specified offset in
+     * the (NFC-normalized, HTML-encoded) text.
+     */
+    def at(text: String, offset: CodeUnit): CodeUnit =
+      CodeUnit {
+        text.codePointAt(offset.toInt) match {
+          case '&' =>
+            if (text.regionMatches(offset.toInt, "&amp;", 0, 5)) 5
+            else if (text.regionMatches(offset.toInt, "&lt;", 0, 4)) 4
+            else if (text.regionMatches(offset.toInt, "&gt;", 0, 4)) 4
+            else 1
+
+          case cp => Character.charCount(cp)
+        }
+      }
+  }
+
+  /**
+   * Ranges of offsets, useful for avoiding slicing entities.
+   */
+  sealed trait Ranges[T] {
+    def contains(t: T): Boolean
+  }
+
+  object Ranges {
+    private[this] case class Impl[T](toSeq: Seq[(T, T)])(implicit off: Offset[T])
+        extends Ranges[T] {
+      def contains(t: T): Boolean = toSeq.exists { case (lo, hi) => off.gt(t, lo) && off.lt(t, hi) }
+    }
+
+    /**
+     * Non-inclusive range of offsets (matches values that are strictly
+     * between `hi` and `lo`)
+     */
+    def between[T](lo: T, hi: T)(implicit off: Offset[T]): Ranges[T] =
+      if (off.toInt(hi) > off.toInt(lo) + 1 && off.toInt(lo) < Int.MaxValue) Impl(Seq((lo, hi)))
+      else Impl(Nil)
+
+    /**
+     * The union of all of the specified ranges.
+     */
+    def all[T](ranges: Seq[Ranges[T]])(implicit off: Offset[T]): Ranges[T] =
+      Impl(
+        // Preprocess the ranges so that each contains check is as cheap
+        // as possible.
+        ranges
+          .flatMap { case r: Impl[T] => r.toSeq }
+          .sortBy(_._1)
+          .foldLeft(Nil: List[(T, T)]) {
+            case ((a, b) :: out, (c, d)) if off.lt(c, b) => (a, d) :: out
+            case (out, r) => r :: out
+          }
+      )
+
+    def Empty[T: Offset]: Ranges[T] = Impl[T](Nil)
+
+    private[this] val HtmlEscapes = """&(?:amp|lt|gt);""".r
+
+    /**
+     * Match [[CodeUnit]]s that would split a HTML entity.
+     */
+    def htmlEntities(s: String): Ranges[CodeUnit] = {
+      val it = HtmlEscapes.findAllIn(s)
+      all(it.map(_ => between(CodeUnit(it.start), CodeUnit(it.end))).toSeq)
+    }
+
+    def fromCodePointPairs(pairs: Seq[(Int, Int)]): Ranges[CodePoint] =
+      all(pairs.map { case (lo, hi) => between(CodePoint(lo), CodePoint(hi)) })
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/PartialHtmlEncoding.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/PartialHtmlEncoding.scala
@ -0,0 +1,55 @@
+package com.twitter.tweetypie.tweettext
+
+/**
+ * Code used to convert raw user-provided text into an allowable form.
+ */
+object PartialHtmlEncoding {
+
+  /**
+   * Replaces all `<`, `>`, and '&' chars with "&lt;", "&gt;", and "&amp;", respectively.
+   *
+   * Tweet text is HTML-encoded at tweet creation time, and is stored and processed in encoded form.
+   */
+  def encode(text: String): String = {
+    val buf = new StringBuilder
+
+    text.foreach {
+      case '<' => buf.append("&lt;")
+      case '>' => buf.append("&gt;")
+      case '&' => buf.append("&amp;")
+      case c => buf.append(c)
+    }
+
+    buf.toString
+  }
+
+  private val AmpLtRegex = "&lt;".r
+  private val AmpGtRegex = "&gt;".r
+  private val AmpAmpRegex = "&amp;".r
+
+  private val partialHtmlDecoder: (String => String) =
+    ((s: String) => AmpLtRegex.replaceAllIn(s, "<"))
+      .andThen(s => AmpGtRegex.replaceAllIn(s, ">"))
+      .andThen(s => AmpAmpRegex.replaceAllIn(s, "&"))
+
+  /**
+   * The opposite of encode, it replaces all "&lt;", "&gt;", and "&amp;" with
+   * `<`, `>`, and '&', respectively.
+   */
+  def decode(text: String): String =
+    decodeWithModification(text) match {
+      case Some(mod) => mod.updated
+      case None => text
+    }
+
+  /**
+   * Decodes encoded entities, and returns a `TextModification` if the text was modified.
+   */
+  def decodeWithModification(text: String): Option[TextModification] =
+    TextModification.replaceAll(
+      text,
+      AmpLtRegex -> "<",
+      AmpGtRegex -> ">",
+      AmpAmpRegex -> "&"
+    )
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Preprocessor.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Preprocessor.scala
@ -0,0 +1,251 @@
+package com.twitter.tweetypie.tweettext
+import scala.util.matching.Regex
+
+/**
+ * Code used to convert raw user-provided text into an allowable form.
+ */
+object Preprocessor {
+  import TweetText._
+  import TextModification.replaceAll
+
+  /**
+   * Regex for dos-style line endings.
+   */
+  val DosLineEndingRegex: Regex = """\r\n""".r
+
+  /**
+   * Converts \r\n to just \n.
+   */
+  def normalizeNewlines(text: String): String =
+    DosLineEndingRegex.replaceAllIn(text, "\n")
+
+  /**
+   * Characters to strip out of tweet text at write-time.
+   */
+  val unicodeCharsToStrip: Seq[Char] =
+    Seq(
+      '\uFFFE', '\uFEFF', // BOM
+      '\uFFFF', // Special
+      '\u200E', '\u200F', // ltr, rtl
+      '\u202A', '\u202B', '\u202C', '\u202D', '\u202E', // Directional change
+      '\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', '\u0007', '\u0008',
+      '\u0009', '\u000B', '\u000C', '\u000E', '\u000F', '\u0010', '\u0011', '\u0012', '\u0013',
+      '\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019', '\u001A', '\u001B', '\u001C',
+      '\u001D', '\u001E', '\u001F', '\u007F',
+      '\u2065',
+    )
+
+  val UnicodeCharsToStripRegex: Regex = unicodeCharsToStrip.mkString("[", "", "]").r
+
+  /**
+   * Strips out control characters and other non-textual unicode chars that can break xml and/or
+   * json rendering, or be used for exploits.
+   */
+  def stripControlCharacters(text: String): String =
+    UnicodeCharsToStripRegex.replaceAllIn(text, "")
+
+  val Tweetypie674UnicodeSequence: String =
+    "\u0633\u0645\u064e\u0640\u064e\u0651\u0648\u064f\u0648\u064f\u062d\u062e " +
+      "\u0337\u0334\u0310\u062e \u0337\u0334\u0310\u062e \u0337\u0334\u0310\u062e " +
+      "\u0627\u0645\u0627\u0631\u062a\u064a\u062e \u0337\u0334\u0310\u062e"
+
+  val Tweetypie674UnicodeRegex: Regex = Tweetypie674UnicodeSequence.r
+
+  /**
+   * Replace each `Tweetypie674UnicodeSequence` of this string to REPLACEMENT
+   * CHARACTER.
+   *
+   * Apple has a bug in its CoreText library. This aims to prevent
+   * ios clients from being crashed when a tweet contains the specific
+   * unicode sequence.
+   */
+  def avoidCoreTextBug(text: String): String =
+    Tweetypie674UnicodeRegex.replaceAllIn(text, "\ufffd")
+
+  /**
+   * Replace each `Tweetypie674UnicodeSequence` of this string to a REPLACEMENT
+   * CHARACTER, returns a TextModification object that provides information
+   * to also update entity indices.
+   */
+  def replaceCoreTextBugModification(text: String): Option[TextModification] =
+    replaceAll(text, Tweetypie674UnicodeRegex, "\ufffd")
+
+  private val preprocessor: String => String =
+    ((s: String) => nfcNormalize(s))
+      .andThen(stripControlCharacters _)
+      .andThen(trimBlankCharacters _)
+      .andThen(normalizeNewlines _)
+      .andThen(collapseBlankLines _)
+      .andThen(avoidCoreTextBug _)
+
+  /**
+   * Performs the text modifications that are necessary in the write-path before extracting URLs.
+   */
+  def preprocessText(text: String): String =
+    preprocessor(text)
+
+  /**
+   * Replaces all `<`, `>`, and '&' chars with "&lt;", "&gt;", and "&amp;", respectively.
+   *
+   * The original purpose of this was presumably to prevent script injections when
+   * displaying tweets without proper escaping.  Currently, tweets are encoded before
+   * they are stored in the database.
+   *
+   * Note that the pre-escaping of & < and > also happens in the rich text editor in javascript
+   */
+  def partialHtmlEncode(text: String): String =
+    PartialHtmlEncoding.encode(text)
+
+  /**
+   * The opposite of partialHtmlEncode, it replaces all "&lt;", "&gt;", and "&amp;" with
+   * `<`, `>`, and '&', respectively.
+   */
+  def partialHtmlDecode(text: String): String =
+    PartialHtmlEncoding.decode(text)
+
+  /**
+   *
+   * Detects all forms of whitespace, considering as whitespace the following:
+   * This regex detects characters that always or often are rendered as blank space. We use
+   * this to prevent users from inserting excess blank lines and from tweeting effectively
+   * blank tweets.
+   *
+   * Note that these are not all semantically "whitespace", so this regex should not be used
+   * to process non-blank text, e.g. to separate words.
+   *
+   * Codepoints below and the `\p{Z}` regex character property alias are defined in the Unicode
+   * Character Database (UCD) at https://unicode.org/ucd/ and https://unicode.org/reports/tr44/
+   *
+   * The `\p{Z}` regex character property alias is defined specifically in UCD as:
+   *
+   * Zs |	Space_Separator	    | a space character (of various non-zero widths)
+   * Zl	| Line_Separator	    | U+2028 LINE SEPARATOR only
+   * Zp	| Paragraph_Separator	| U+2029 PARAGRAPH SEPARATOR only
+   * Z	| Separator	          | Zs | Zl | Zp
+   * ref: https://unicode.org/reports/tr44/#GC_Values_Table
+   *
+   *  U+0009  Horizontal Tab (included in \s)
+   *  U+000B  Vertical Tab (included in \s)
+   *  U+000C  Form feed  (included in \s)
+   *  U+000D  Carriage return  (included in \s)
+   *  U+0020  space  (included in \s)
+   *  U+0085  Next line (included in \u0085)
+   *  U+061C  arabic letter mark (included in \u061C)
+   *  U+00A0  no-break space (included in \p{Z})
+   *  U+00AD  soft-hyphen marker (included in \u00AD)
+   *  U+1680  ogham space mark (included in \p{Z})
+   *  U+180E  mongolian vowel separator (included in \p{Z} on jdk8 and included in \u180E on jdk11)
+   *  U+2000  en quad (included in \p{Z})
+   *  U+2001  em quad (included in \p{Z})
+   *  U+2002  en space (included in \p{Z})
+   *  U+2003  em space (included in \p{Z})
+   *  U+2004  three-per-em space (included in \p{Z})
+   *  U+2005  four-per-em space (included in \p{Z})
+   *  U+2006  six-per-em space (included in \p{Z})
+   *  U+2007  figure space (included in \p{Z})
+   *  U+2008  punctuation space (included in \p{Z})
+   *  U+2009  thin space (included in \p{Z})
+   *  U+200A  hair space (included in \p{Z})
+   *  U+200B  zero-width (included in \u200B-\u200D)
+   *  U+200C  zero-width non-joiner  (included in \u200B-\u200D)
+   *  U+200D  zero-width joiner (included in \u200B-\u200D)
+   *  U+2028  line separator (included in \p{Z})
+   *  U+2029  paragraph separator (included in \p{Z})
+   *  U+202F  narrow no-break space (included in \p{Z})
+   *  U+205F  medium mathematical space (included in \p{Z})
+   *  U+2061  function application (included in \u2061-\u2064)
+   *  U+2062  invisible times (included in \u2061-\u2064)
+   *  U+2063  invisible separator (included in \u2061-\u2064)
+   *  U+2064  invisible plus (included in \u2061-\u2064)
+   *  U+2066  left-to-right isolate (included in \u2066-\u2069)
+   *  U+2067  right-to-left isolate (included in \u2066-\u2069)
+   *  U+2068  first strong isolate (included in \u2066-\u2069)
+   *  U+2069  pop directional isolate (included in \u2066-\u2069)
+   *  U+206A  inhibit symmetric swapping (included in \u206A-\u206F)
+   *  U+206B  activate symmetric swapping (included in \u206A-\u206F)
+   *  U+206C  inhibit arabic form shaping (included in \u206A-\u206F)
+   *  U+206D  activate arabic form shaping (included in \u206A-\u206F)
+   *  U+206E  national digit shapes (included in \u206A-\u206F)
+   *  U+206F  nominal digit shapes (included in \u206A-\u206F)
+   *  U+2800  braille pattern blank (included in \u2800)
+   *  U+3164  hongul filler (see UCD Ignorable_Code_Point)
+   *  U+FFA0  halfwidth hongul filler (see UCD Ignorable_Code_Point)
+   *  U+3000  ideographic space (included in \p{Z})
+   *  U+FEFF  zero-width no-break space (explicitly included in \uFEFF)
+   */
+  val BlankTextRegex: Regex =
+    """[\s\p{Z}\u180E\u0085\u00AD\u061C\u200B-\u200D\u2061-\u2064\u2066-\u2069\u206A-\u206F\u2800\u3164\uFEFF\uFFA0]*""".r
+
+  /**
+   * Some of the above blank characters are valid at the start of a Tweet (and irrelevant at the end)
+   * such as characters that change the direction of text. When trimming from the start
+   * or end of text we use a smaller set of characters
+   */
+  val BlankWhenLeadingOrTrailingRegex: Regex = """[\s\p{Z}\u180E\u0085\u200B\uFEFF]*""".r
+
+  /**
+   * Matches consecutive blanks, starting at a newline.
+   */
+  val ConsecutiveBlankLinesRegex: Regex = ("""\n(""" + BlankTextRegex + """\n){2,}""").r
+
+  val LeadingBlankCharactersRegex: Regex = ("^" + BlankWhenLeadingOrTrailingRegex).r
+  val TrailingBlankCharactersRegex: Regex = (BlankWhenLeadingOrTrailingRegex + "$").r
+
+  /**
+   * Is the given text empty or contains nothing but whitespace?
+   */
+  def isBlank(text: String): Boolean =
+    BlankTextRegex.pattern.matcher(text).matches()
+
+  /**
+   * See http://confluence.local.twitter.com/display/PROD/Displaying+line+breaks+in+Tweets
+   *
+   * Collapses consecutive blanks lines down to a single blank line.  We can assume that
+   * all newlines have already been normalized to just \n, so we don't have to worry about
+   * \r\n.
+   */
+  def collapseBlankLinesModification(text: String): Option[TextModification] =
+    replaceAll(text, ConsecutiveBlankLinesRegex, "\n\n")
+
+  def collapseBlankLines(text: String): String =
+    ConsecutiveBlankLinesRegex.replaceAllIn(text, "\n\n")
+
+  def trimBlankCharacters(text: String): String =
+    TrailingBlankCharactersRegex.replaceFirstIn(
+      LeadingBlankCharactersRegex.replaceFirstIn(text, ""),
+      ""
+    )
+
+  /** Characters that are not visible on their own. Some of these are used in combination with
+   * other visible characters, and therefore cannot be always stripped from tweets.
+   */
+  private[tweettext] val InvisibleCharacters: Seq[Char] =
+    Seq(
+      '\u2060', '\u2061', '\u2062', '\u2063', '\u2064', '\u206A', '\u206B', '\u206C', '\u206D',
+      '\u206D', '\u206E', '\u206F', '\u200C',
+      '\u200D', // non-printing chars with valid use in Arabic
+      '\u2009', '\u200A', '\u200B', // include very skinny spaces too
+      '\ufe00', '\ufe01', '\ufe02', '\ufe03', '\ufe04', '\ufe05', '\ufe06', '\ufe07', '\ufe08',
+      '\ufe09', '\ufe0A', '\ufe0B', '\ufe0C', '\ufe0D', '\ufe0E', '\ufe0F',
+    )
+
+  private[tweetypie] val InvisibleUnicodePattern: Regex =
+    ("^[" + InvisibleCharacters.mkString + "]+$").r
+
+  def isInvisibleChar(input: Char): Boolean = {
+    InvisibleCharacters contains input
+  }
+
+  /** If string is only "invisible characters", replace full string with whitespace.
+   * The purpose of this method is to remove invisible characters when ONLY invisible characters
+   * appear between two urls, which can be a security vulnerability due to misleading behavior. These
+   * characters cannot be removed as a rule applied to the tweet, because they are used in
+   * conjuction with other characters.
+   */
+  def replaceInvisiblesWithWhitespace(text: String): String = {
+    text match {
+      case invisible @ InvisibleUnicodePattern() => " " * TweetText.codePointLength(invisible)
+      case other => other
+    }
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextEntity.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextEntity.scala
@ -0,0 +1,24 @@
+package com.twitter.tweetypie.tweettext
+
+/**
+ * A type class for entities found within a piece of tweet text.
+ */
+trait TextEntity[T] {
+  def fromIndex(entity: T): Short
+  def toIndex(entity: T): Short
+  def move(entity: T, fromIndex: Short, toIndex: Short): T
+}
+
+object TextEntity {
+  def fromIndex[T: TextEntity](entity: T): Short =
+    implicitly[TextEntity[T]].fromIndex(entity)
+
+  def toIndex[T: TextEntity](entity: T): Short =
+    implicitly[TextEntity[T]].toIndex(entity)
+
+  def move[T: TextEntity](entity: T, fromIndex: Short, toIndex: Short): T =
+    implicitly[TextEntity[T]].move(entity, fromIndex, toIndex)
+
+  def shift[T: TextEntity](entity: T, offset: Short): T =
+    move(entity, (fromIndex(entity) + offset).toShort, (toIndex(entity) + offset).toShort)
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextModification.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextModification.scala
@ -0,0 +1,232 @@
+package com.twitter.tweetypie.tweettext
+
+import scala.util.matching.Regex
+
+object TextModification {
+
+  /**
+   * Lift a text into a TextModification where `original` and `updated` text are the same
+   * and `replacements` is empty.
+   */
+  def identity(text: String): TextModification =
+    TextModification(original = text, updated = text, replacements = Nil)
+
+  /**
+   * Replace each substring that matches the regex with the substitution string, returns a
+   * TextModification object that contains the updated text and enough information to also
+   * update entity indices.
+   *
+   * This method should correctly be taking into account surrogate-pairs.  The returned
+   * TextModification object has code-point offsets, instead of code-unit offsets.
+   */
+  def replaceAll(text: String, regex: Regex, substitution: String): Option[TextModification] =
+    replaceAll(text, regex -> substitution)
+
+  /**
+   * Replaces substrings that match the given `Regex` with the corresonding substitution
+   * string.  Returns a `TextModification` that can be used to reindex entities.
+   */
+  def replaceAll(
+    text: String,
+    regexAndSubstitutions: (Regex, String)*
+  ): Option[TextModification] = {
+    val matches =
+      (for {
+        (r, s) <- regexAndSubstitutions
+        m <- r.findAllIn(text).matchData
+      } yield (m, s)).sortBy { case (m, _) => m.start }
+
+    if (matches.isEmpty) {
+      // no match found, return None to indicate no modifications made
+      None
+    } else {
+      val replacements = List.newBuilder[TextReplacement]
+      val indexConverter = new IndexConverter(text)
+      // contains the retained text, built up as we walk through the regex matches
+      val buf = new StringBuilder(text.length)
+      // the number of code-points copied into buf
+      var codePointsCopied = Offset.CodePoint(0)
+      // always holds the start code-unit offset to copy to buf when we encounter
+      // either a regex match or end-of-string.
+      var anchor = 0
+
+      import indexConverter.toCodePoints
+
+      for ((m, sub) <- matches) {
+        val unchangedText = text.substring(anchor, m.start)
+        val unchangedLen = Offset.CodePoint.length(unchangedText)
+        val subLen = Offset.CodePoint.length(sub)
+
+        // copies the text upto the regex match run, plus the replacement string
+        buf.append(unchangedText).append(sub)
+        codePointsCopied += unchangedLen + subLen
+
+        // the offsets indicate the indices of the matched string in the original
+        // text, and the indices of the replacement string in the updated string
+        replacements +=
+          TextReplacement(
+            originalFrom = toCodePoints(Offset.CodeUnit(m.start)),
+            originalTo = toCodePoints(Offset.CodeUnit(m.end)),
+            updatedFrom = codePointsCopied - subLen,
+            updatedTo = codePointsCopied
+          )
+
+        anchor = m.end
+      }
+
+      buf.append(text.substring(anchor))
+
+      Some(TextModification(text, buf.toString, replacements.result()))
+    }
+  }
+
+  /**
+   * Inserts a string at a specified code point offset.
+   * Returns a `TextModification` that can be used to reindex entities.
+   */
+  def insertAt(
+    originalText: String,
+    insertAt: Offset.CodePoint,
+    textToInsert: String
+  ): TextModification = {
+    val insertAtCodeUnit = insertAt.toCodeUnit(originalText).toInt
+    val (before, after) = originalText.splitAt(insertAtCodeUnit)
+    val updatedText = s"$before$textToInsert$after"
+    val textToInsertLength = TweetText.codePointLength(textToInsert)
+
+    TextModification(
+      original = originalText,
+      updated = updatedText,
+      replacements = List(
+        TextReplacement.fromCodePoints(
+          originalFrom = insertAt.toInt,
+          originalTo = insertAt.toInt,
+          updatedFrom = insertAt.toInt,
+          updatedTo = insertAt.toInt + textToInsertLength
+        ))
+    )
+  }
+}
+
+/**
+ * Encodes information about insertions/deletions/replacements made to a string, providing
+ * the original string, the updated string, and a list of TextReplacement objects
+ * that encode the indices of the segments that were changed.  Using this information,
+ * it is possible to map an offset into the original string to an offset into the updated
+ * string, assuming the text at the offset was not within one of the modified segments.
+ *
+ * All offsets are code-points, not UTF6 code-units.
+ */
+case class TextModification(
+  original: String,
+  updated: String,
+  replacements: List[TextReplacement]) {
+  private val originalLen = Offset.CodePoint.length(original)
+
+  /**
+   * Using an offset into the original String, computes the equivalent offset into the updated
+   * string.  If the offset falls within a segment that was removed/replaced, None is returned.
+   */
+  def reindex(index: Offset.CodePoint): Option[Offset.CodePoint] =
+    reindex(index, Offset.CodePoint(0), replacements)
+
+  /**
+   * Reindexes an entity of type T.  Returns the updated entity, or None if either the `fromIndex`
+   * or `toIndex` value is now out of range.
+   */
+  def reindexEntity[T: TextEntity](e: T): Option[T] =
+    for {
+      from <- reindex(Offset.CodePoint(TextEntity.fromIndex(e)))
+      to <- reindex(Offset.CodePoint(TextEntity.toIndex(e) - 1))
+    } yield TextEntity.move(e, from.toShort, (to.toShort + 1).toShort)
+
+  /**
+   * Reindexes a sequence of entities of type T.  Some entities could be filtered
+   * out if they span a region of text that has been removed.
+   */
+  def reindexEntities[T: TextEntity](es: Seq[T]): Seq[T] =
+    for (e <- es; e2 <- reindexEntity(e)) yield e2
+
+  /**
+   * Swaps `original` and `updated` text and inverts all `TextReplacement` instances.
+   */
+  def inverse: TextModification =
+    TextModification(updated, original, replacements.map(_.inverse))
+
+  // recursively walks through the list of TextReplacement objects computing
+  // offsets to add/substract from 'shift', which accumulates all changes and
+  // then gets added to index at the end.
+  private def reindex(
+    index: Offset.CodePoint,
+    shift: Offset.CodePoint,
+    reps: List[TextReplacement]
+  ): Option[Offset.CodePoint] =
+    reps match {
+      case Nil =>
+        if (index.toInt >= 0 && index <= originalLen)
+          Some(index + shift)
+        else
+          None
+      case (r @ TextReplacement(fr, to, _, _)) :: tail =>
+        if (index < fr) Some(index + shift)
+        else if (index < to) None
+        else reindex(index, shift + r.lengthDelta, tail)
+    }
+}
+
+object TextReplacement {
+  def fromCodePoints(
+    originalFrom: Int,
+    originalTo: Int,
+    updatedFrom: Int,
+    updatedTo: Int
+  ): TextReplacement =
+    TextReplacement(
+      Offset.CodePoint(originalFrom),
+      Offset.CodePoint(originalTo),
+      Offset.CodePoint(updatedFrom),
+      Offset.CodePoint(updatedTo)
+    )
+}
+
+/**
+ * Encodes the indices of a segment of text in one string that maps to a replacement
+ * segment in an updated version of the text.  The replacement segment could be empty
+ * (updatedTo == updatedFrom), indicating the segment was removed.
+ *
+ * All offsets are code-points, not UTF16 code-units.
+ *
+ * `originalFrom` and `updatedFrom` are inclusive.
+ * `originalTo` and `updatedTo` are exclusive.
+ */
+case class TextReplacement(
+  originalFrom: Offset.CodePoint,
+  originalTo: Offset.CodePoint,
+  updatedFrom: Offset.CodePoint,
+  updatedTo: Offset.CodePoint) {
+  def originalLength: Offset.CodePoint = originalTo - originalFrom
+  def updatedLength: Offset.CodePoint = updatedTo - updatedFrom
+  def lengthDelta: Offset.CodePoint = updatedLength - originalLength
+
+  def shiftOriginal(offset: Offset.CodePoint): TextReplacement =
+    copy(originalFrom = originalFrom + offset, originalTo = originalTo + offset)
+
+  def shiftUpdated(offset: Offset.CodePoint): TextReplacement =
+    copy(updatedFrom = updatedFrom + offset, updatedTo = updatedTo + offset)
+
+  def shift(offset: Offset.CodePoint): TextReplacement =
+    TextReplacement(
+      originalFrom + offset,
+      originalTo + offset,
+      updatedFrom + offset,
+      updatedTo + offset
+    )
+
+  def inverse: TextReplacement =
+    TextReplacement(
+      originalFrom = updatedFrom,
+      originalTo = updatedTo,
+      updatedFrom = originalFrom,
+      updatedTo = originalTo
+    )
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Truncator.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Truncator.scala
@ -0,0 +1,159 @@
+package com.twitter.tweetypie.tweettext
+
+import com.twitter.tweetypie.tweettext.TweetText._
+import com.twitter.twittertext.Extractor
+import java.lang.Character
+import scala.annotation.tailrec
+import scala.collection.JavaConverters._
+
+object Truncator {
+  val Ellipsis = "\u2026"
+
+  /**
+   * Truncate tweet text for a retweet. If the text is longer than
+   * either of the length limits, code points are cut off from the end
+   * of the text and replaced with an ellipsis. We keep as much of the
+   * leading text as possible, subject to these constraints:
+   *
+   * - There are no more than `MaxDisplayLength` characters.
+   *
+   * - When converted to UTF-8, the result does not exceed `MaxByteLength`.
+   *
+   * - We do not break within a single grapheme cluster.
+   *
+   * The input is assumed to be partial HTML-encoded and may or may
+   * not be NFC normalized. The result will be partial HTML-encoded
+   * and will be NFC normalized.
+   */
+  def truncateForRetweet(input: String): String = truncateWithEllipsis(input, Ellipsis)
+
+  /**
+   * Truncate to [[com.twitter.tweetypie.tweettext.TweetText#OrginalMaxDisplayLength]] display
+   * units, using "..." as an ellipsis. The resulting text is guaranteed to pass our tweet length
+   * check, but it is not guaranteed to fit in a SMS message.
+   */
+  def truncateForSms(input: String): String = truncateWithEllipsis(input, "...")
+
+  /**
+   * Check the length of the given text, and truncate it if it is longer
+   * than the allowed length for a Tweet. The result of this method will
+   * always have:
+   *
+   * - Display length <= OriginalMaxDisplayLength.
+   * - Length when encoded as UTF-8 <= OriginalMaxUtf8Length.
+   *
+   * If the input would violate this, then the text will be
+   * truncated. When the text is truncated, it will be truncated such
+   * that:
+   *
+   * - Grapheme clusters will not be split.
+   * - The last character before the ellipsis will not be a whitespace
+   *   character.
+   * - The ellipsis text will be appended to the end.
+   */
+  private[this] def truncateWithEllipsis(input: String, ellipsis: String): String = {
+    val text = nfcNormalize(input)
+    val truncateAt =
+      truncationPoint(text, OriginalMaxDisplayLength, OriginalMaxUtf8Length, Some(ellipsis))
+    if (truncateAt.codeUnitOffset.toInt == text.length) text
+    else text.take(truncateAt.codeUnitOffset.toInt) + ellipsis
+  }
+
+  /**
+   * Indicates a potential TruncationPoint in piece of text.
+   *
+   * @param charOffset the utf-16 character offset of the truncation point
+   * @param codePointOffset the offset in code points
+   */
+  case class TruncationPoint(codeUnitOffset: Offset.CodeUnit, codePointOffset: Offset.CodePoint)
+
+  /**
+   * Computes a TruncationPoint for the given text and length constraints.  If `truncated` on
+   * the result is `false`, it means the text will fit within the given constraints without
+   * truncation.  Otherwise, the result indicates both the character and code-point offsets
+   * at which to perform the truncation, and the resulting display length and byte length of
+   * the truncated string.
+   *
+   * Text should be NFC normalized first for best results.
+   *
+   * @param withEllipsis if true, then the truncation point will be computed so that there is space
+   * to append an ellipsis and to still remain within the limits.  The ellipsis is not counted
+   * in the returned display and byte lengths.
+   *
+   * @param atomicUnits may contain a list of ranges that should be treated as atomic unit and
+   * not split.  each tuple is half-open range in code points.
+   */
+  def truncationPoint(
+    text: String,
+    maxDisplayLength: Int = OriginalMaxDisplayLength,
+    maxByteLength: Int = OriginalMaxUtf8Length,
+    withEllipsis: Option[String] = None,
+    atomicUnits: Offset.Ranges[Offset.CodePoint] = Offset.Ranges.Empty
+  ): TruncationPoint = {
+    val breakPoints =
+      GraphemeIndexIterator
+        .ends(text)
+        .filterNot(Offset.Ranges.htmlEntities(text).contains)
+
+    val ellipsisDisplayUnits =
+      withEllipsis.map(Offset.DisplayUnit.length).getOrElse(Offset.DisplayUnit(0))
+    val maxTruncatedDisplayLength = Offset.DisplayUnit(maxDisplayLength) - ellipsisDisplayUnits
+
+    val ellipsisByteLength = withEllipsis.map(Offset.Utf8.length).getOrElse(Offset.Utf8(0))
+    val maxTruncatedByteLength = Offset.Utf8(maxByteLength) - ellipsisByteLength
+
+    var codeUnit = Offset.CodeUnit(0)
+    var codePoint = Offset.CodePoint(0)
+    var displayLength = Offset.DisplayUnit(0)
+    var byteLength = Offset.Utf8(0)
+    var truncateCodeUnit = codeUnit
+    var truncateCodePoint = codePoint
+
+    @tailrec def go(): TruncationPoint =
+      if (displayLength.toInt > maxDisplayLength || byteLength.toInt > maxByteLength) {
+        TruncationPoint(truncateCodeUnit, truncateCodePoint)
+      } else if (codeUnit != truncateCodeUnit &&
+        displayLength <= maxTruncatedDisplayLength &&
+        byteLength <= maxTruncatedByteLength &&
+        (codeUnit.toInt == 0 || !Character.isWhitespace(text.codePointBefore(codeUnit.toInt))) &&
+        !atomicUnits.contains(codePoint)) {
+        // we can advance the truncation point
+        truncateCodeUnit = codeUnit
+        truncateCodePoint = codePoint
+        go()
+      } else if (breakPoints.hasNext) {
+        // there are further truncation points to consider
+        val nextCodeUnit = breakPoints.next
+        codePoint += Offset.CodePoint.count(text, codeUnit, nextCodeUnit)
+        displayLength += Offset.DisplayUnit.count(text, codeUnit, nextCodeUnit)
+        byteLength += Offset.Utf8.count(text, codeUnit, nextCodeUnit)
+        codeUnit = nextCodeUnit
+        go()
+      } else {
+        TruncationPoint(codeUnit, codePoint)
+      }
+
+    go()
+  }
+
+  /**
+   * Truncate the given text, avoiding chopping HTML entities and tweet
+   * entities. This should only be used for testing because it performs
+   * entity extraction, and so is very inefficient.
+   */
+  def truncateForTests(
+    input: String,
+    maxDisplayLength: Int = OriginalMaxDisplayLength,
+    maxByteLength: Int = OriginalMaxUtf8Length
+  ): String = {
+    val text = nfcNormalize(input)
+    val extractor = new Extractor
+    val entities = extractor.extractEntitiesWithIndices(text)
+    extractor.modifyIndicesFromUTF16ToUnicode(text, entities)
+    val avoid = Offset.Ranges.fromCodePointPairs(
+      entities.asScala.map(e => (e.getStart().intValue, e.getEnd().intValue))
+    )
+    val truncateAt = truncationPoint(text, maxDisplayLength, maxByteLength, None, avoid)
+    text.take(truncateAt.codeUnitOffset.toInt)
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TweetText.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TweetText.scala
@ -0,0 +1,62 @@
+package com.twitter.tweetypie.tweettext
+
+import java.text.Normalizer
+
+object TweetText {
+
+  /** The original maximum tweet length, taking into account normalization */
+  private[tweetypie] val OriginalMaxDisplayLength = 140
+
+  /** Maximum number of visible code points allowed in a tweet when tweet length is counted by code
+   * points, taking into account normalization. See also [[MaxVisibleWeightedEmojiLength]].
+   */
+  private[tweetypie] val MaxVisibleWeightedLength = 280
+
+  /** Maximum number of visible code points allowed in a tweet when tweet length is counted by
+   * emoji, taking into account normalization. See also [[MaxVisibleWeightedLength]].
+   * 140 is the max number of Emojis, visible, fully-weighted per Twitter's cramming rules
+   * 10 is the max number of Code Points per Emoji
+   */
+  private[tweetypie] val MaxVisibleWeightedEmojiLength = 140 * 10
+
+  /** Maximum number of bytes when truncating tweet text for a retweet.  Originally was the
+   * max UTF-8 length when tweets were at most 140 characters.
+   * See also [[OriginalMaxDisplayLength]].
+   */
+  private[tweetypie] val OriginalMaxUtf8Length = 600
+
+  /** Maximum number of bytes for tweet text using utf-8 encoding.
+   */
+  private[tweetypie] val MaxUtf8Length = 5708
+
+  /** Maximum number of mentions allowed in tweet text.  This is enforced at tweet creation time */
+  private[tweetypie] val MaxMentions = 50
+
+  /** Maximum number of urls allowed in tweet text.  This is enforced at tweet creation time */
+  private[tweetypie] val MaxUrls = 10
+
+  /** Maximum number of hashtags allowed in tweet text.  This is enforced at tweet creation time */
+  private[tweetypie] val MaxHashtags = 50
+
+  /** Maximum number of cashtags allowed in tweet text.  This is enforced at tweet creation time */
+  private[tweetypie] val MaxCashtags = 50
+
+  /** Maximum length of a hashtag (not including the '#') */
+  private[tweetypie] val MaxHashtagLength = 100
+
+  /**
+   * Normalizes the text according to the unicode NFC spec.
+   */
+  def nfcNormalize(text: String): String = Normalizer.normalize(text, Normalizer.Form.NFC)
+
+  /**
+   * Return the number of "characters" in this text. See
+   * [[Offset.DisplayUnit]].
+   */
+  def displayLength(text: String): Int = Offset.DisplayUnit.length(text).toInt
+
+  /**
+   * Return the number of Unicode code points in this String.
+   */
+  def codePointLength(text: String): Int = Offset.CodePoint.length(text).toInt
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/BUILD
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/BUILD
@ -0,0 +1,76 @@
+scala_library(
+    sources = ["*.scala"],
+    compiler_option_sets = ["fatal_warnings"],
+    platform = "java8",
+    provides = scala_artifact(
+        org = "com.twitter.tweetypie",
+        name = "util",
+        repo = artifactory,
+    ),
+    strict_deps = True,
+    tags = ["bazel-compatible"],
+    dependencies = [
+        "//:scala-reflect",
+        "3rdparty/jvm/commons-codec",
+        "3rdparty/jvm/org/apache/thrift:libthrift",
+        "finagle/finagle-core/src/main",
+        "mediaservices/commons/src/main/thrift:thrift-scala",
+        "scrooge/scrooge-serializer/src/main/scala",
+        "tweetypie/servo/repo",
+        "tweetypie/servo/util",
+        "tweetypie/servo/util/src/main/scala:exception",
+        "src/scala/com/twitter/takedown/util",
+        "src/thrift/com/twitter/dataproducts:enrichments_profilegeo-scala",
+        "src/thrift/com/twitter/escherbird:media-annotation-structs-scala",
+        "src/thrift/com/twitter/expandodo:cards-scala",
+        "src/thrift/com/twitter/gizmoduck:thrift-scala",
+        "src/thrift/com/twitter/servo:servo-exception-scala",
+        "src/thrift/com/twitter/spam/rtf:safety-label-scala",
+        "tweetypie/common/src/thrift/com/twitter/tweetypie:deprecated-scala",
+        "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala",
+        "tweetypie/common/src/thrift/com/twitter/tweetypie:transient_context-scala",
+        "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala",
+        "stitch/stitch-core",
+        "tweet-util",
+        "util/util-core:scala",
+    ],
+)
+
+scala_library(
+    name = "EditControlUtil",
+    sources = [
+        "EditControlUtil.scala",
+        "package.scala",
+    ],
+    compiler_option_sets = ["fatal_warnings"],
+    platform = "java8",
+    provides = scala_artifact(
+        org = "com.twitter.tweetypie",
+        name = "util-EditControlUtil",
+        repo = artifactory,
+    ),
+    strict_deps = True,
+    tags = ["bazel-compatible"],
+    dependencies = [
+        "//:scala-reflect",
+        "3rdparty/jvm/commons-codec",
+        "3rdparty/jvm/org/apache/thrift:libthrift",
+        "finagle/finagle-core/src/main",
+        "mediaservices/commons/src/main/thrift:thrift-scala",
+        "scrooge/scrooge-serializer/src/main/scala",
+        "tweetypie/servo/util/src/main/scala:exception",
+        "src/thrift/com/twitter/dataproducts:enrichments_profilegeo-scala",
+        "src/thrift/com/twitter/escherbird:media-annotation-structs-scala",
+        "src/thrift/com/twitter/expandodo:cards-scala",
+        "src/thrift/com/twitter/gizmoduck:thrift-scala",
+        "src/thrift/com/twitter/servo:servo-exception-scala",
+        "src/thrift/com/twitter/spam/rtf:safety-label-scala",
+        "tweetypie/common/src/thrift/com/twitter/tweetypie:deprecated-scala",
+        "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala",
+        "tweetypie/common/src/thrift/com/twitter/tweetypie:transient_context-scala",
+        "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala",
+        "stitch/stitch-core",
+        "tweet-util",
+        "util/util-core:scala",
+    ],
+)
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityAnnotation.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityAnnotation.scala
@ -0,0 +1,29 @@
+package com.twitter.tweetypie.util
+
+import com.twitter.escherbird.thriftscala.TweetEntityAnnotation
+import com.twitter.tweetypie.thriftscala.EscherbirdEntityAnnotations
+import com.twitter.tweetypie.thriftscala.Tweet
+
+object CommunityAnnotation {
+
+  val groupId: Long = 8
+  val domainId: Long = 31
+
+  def apply(communityId: Long): TweetEntityAnnotation =
+    TweetEntityAnnotation(groupId, domainId, entityId = communityId)
+
+  def unapply(annotation: TweetEntityAnnotation): Option[Long] =
+    annotation match {
+      case TweetEntityAnnotation(`groupId`, `domainId`, entityId) => Some(entityId)
+      case _ => None
+    }
+
+  // Returns None instead of Some(Seq()) when there are non-community annotations present
+  def additionalFieldsToCommunityIDs(additionalFields: Tweet): Option[Seq[Long]] = {
+    additionalFields.escherbirdEntityAnnotations
+      .map {
+        case EscherbirdEntityAnnotations(entityAnnotations) =>
+          entityAnnotations.flatMap(CommunityAnnotation.unapply)
+      }.filter(_.nonEmpty)
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityUtil.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityUtil.scala
@ -0,0 +1,19 @@
+package com.twitter.tweetypie.util
+
+import com.twitter.tweetypie.thriftscala.Communities
+
+object CommunityUtil {
+
+  def communityIds(maybeCommunities: Option[Communities]): Seq[Long] = {
+    maybeCommunities match {
+      case None =>
+        Nil
+      case Some(Communities(seq)) =>
+        seq
+    }
+  }
+
+  def hasCommunity(maybeCommunities: Option[Communities]): Boolean = {
+    maybeCommunities.exists(_.communityIds.nonEmpty)
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/ConversationControls.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/ConversationControls.scala
@ -0,0 +1,112 @@
+package com.twitter.tweetypie
+package util
+
+import com.twitter.tweetypie.thriftscala._
+
+object ConversationControls {
+  object Create {
+    def byInvitation(
+      inviteViaMention: Option[Boolean] = None
+    ): TweetCreateConversationControl.ByInvitation = TweetCreateConversationControl.ByInvitation(
+      TweetCreateConversationControlByInvitation(inviteViaMention = inviteViaMention)
+    )
+
+    def community(
+      inviteViaMention: Option[Boolean] = None
+    ): TweetCreateConversationControl.Community = TweetCreateConversationControl.Community(
+      TweetCreateConversationControlCommunity(inviteViaMention = inviteViaMention)
+    )
+
+    def followers(
+      inviteViaMention: Option[Boolean] = None
+    ): TweetCreateConversationControl.Followers = TweetCreateConversationControl.Followers(
+      TweetCreateConversationControlFollowers(inviteViaMention = inviteViaMention)
+    )
+  }
+
+  object Scenario {
+    case class CommonScenario(
+      createConversationControl: TweetCreateConversationControl,
+      descriptionSuffix: String,
+      expectedConversationControl: (UserId, Seq[UserId]) => ConversationControl,
+      inviteViaMention: Option[Boolean])
+
+    def mkCommunityScenario(inviteViaMention: Option[Boolean]): CommonScenario =
+      CommonScenario(
+        Create.community(inviteViaMention = inviteViaMention),
+        "community",
+        expectedConversationControl = (authorId, userIds) => {
+          community(userIds, authorId, inviteViaMention)
+        },
+        inviteViaMention
+      )
+
+    def mkByInvitationScenario(inviteViaMention: Option[Boolean]): CommonScenario =
+      CommonScenario(
+        Create.byInvitation(inviteViaMention = inviteViaMention),
+        "invited users",
+        expectedConversationControl = (authorId, userIds) => {
+          byInvitation(userIds, authorId, inviteViaMention)
+        },
+        inviteViaMention
+      )
+
+    def mkFollowersScenario(inviteViaMention: Option[Boolean]): CommonScenario =
+      CommonScenario(
+        Create.followers(inviteViaMention = inviteViaMention),
+        "followers",
+        expectedConversationControl = (authorId, userIds) => {
+          followers(userIds, authorId, inviteViaMention)
+        },
+        inviteViaMention
+      )
+
+    val communityScenario = mkCommunityScenario(None)
+    val communityInviteViaMentionScenario = mkCommunityScenario(Some(true))
+
+    val byInvitationScenario = mkByInvitationScenario(None)
+    val byInvitationInviteViaMentionScenario = mkByInvitationScenario(Some(true))
+
+    val followersScenario = mkFollowersScenario(None)
+    val followersInviteViaMentionScenario = mkFollowersScenario(Some(true))
+  }
+
+  def byInvitation(
+    invitedUserIds: Seq[UserId],
+    conversationTweetAuthorId: UserId,
+    inviteViaMention: Option[Boolean] = None
+  ): ConversationControl =
+    ConversationControl.ByInvitation(
+      ConversationControlByInvitation(
+        conversationTweetAuthorId = conversationTweetAuthorId,
+        invitedUserIds = invitedUserIds,
+        inviteViaMention = inviteViaMention
+      )
+    )
+
+  def community(
+    invitedUserIds: Seq[UserId],
+    conversationTweetAuthorId: UserId,
+    inviteViaMention: Option[Boolean] = None
+  ): ConversationControl =
+    ConversationControl.Community(
+      ConversationControlCommunity(
+        conversationTweetAuthorId = conversationTweetAuthorId,
+        invitedUserIds = invitedUserIds,
+        inviteViaMention = inviteViaMention
+      )
+    )
+
+  def followers(
+    invitedUserIds: Seq[UserId],
+    conversationTweetAuthorId: UserId,
+    inviteViaMention: Option[Boolean] = None
+  ): ConversationControl =
+    ConversationControl.Followers(
+      ConversationControlFollowers(
+        conversationTweetAuthorId = conversationTweetAuthorId,
+        invitedUserIds = invitedUserIds,
+        inviteViaMention = inviteViaMention
+      )
+    )
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/EditControlUtil.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/EditControlUtil.scala
@ -0,0 +1,174 @@
+package com.twitter.tweetypie.util
+
+import com.twitter.servo.util.Gate
+import com.twitter.tweetypie.util.TweetEditFailure.TweetEditInvalidEditControlException
+import com.twitter.tweetypie.util.TweetEditFailure.TweetEditUpdateEditControlException
+import com.twitter.tweetypie.thriftscala.EditControl
+import com.twitter.tweetypie.thriftscala.EditControlEdit
+import com.twitter.tweetypie.thriftscala.EditControlInitial
+import com.twitter.tweetypie.thriftscala.Tweet
+import com.twitter.util.Try
+import com.twitter.util.Return
+import com.twitter.util.Throw
+import com.twitter.util.Time
+import com.twitter.util.Duration
+
+object EditControlUtil {
+
+  val maxTweetEditsAllowed = 5
+  val oldEditTimeWindow = Duration.fromMinutes(30)
+  val editTimeWindow = Duration.fromMinutes(60)
+
+  def editControlEdit(
+    initialTweetId: TweetId,
+    editControlInitial: Option[EditControlInitial] = None
+  ): EditControl.Edit =
+    EditControl.Edit(
+      EditControlEdit(initialTweetId = initialTweetId, editControlInitial = editControlInitial))
+
+  // EditControl for the tweet that is not an edit, that is, any regular tweet we create
+  // that can, potentially, be edited later.
+  def makeEditControlInitial(
+    tweetId: TweetId,
+    createdAt: Time,
+    setEditWindowToSixtyMinutes: Gate[Unit] = Gate(_ => false)
+  ): EditControl.Initial = {
+    val editWindow = if (setEditWindowToSixtyMinutes()) editTimeWindow else oldEditTimeWindow
+    val initial = EditControlInitial(
+      editTweetIds = Seq(tweetId),
+      editableUntilMsecs = Some(createdAt.plus(editWindow).inMilliseconds),
+      editsRemaining = Some(maxTweetEditsAllowed),
+      isEditEligible = defaultIsEditEligible,
+    )
+    EditControl.Initial(initial)
+  }
+
+  // Returns if a given latestTweetId is the latest edit in the EditControl
+  def isLatestEdit(
+    tweetEditControl: Option[EditControl],
+    latestTweetId: TweetId
+  ): Try[Boolean] = {
+    tweetEditControl match {
+      case Some(EditControl.Initial(initial)) =>
+        isLatestEditFromEditControlInitial(Some(initial), latestTweetId)
+      case Some(EditControl.Edit(edit)) =>
+        isLatestEditFromEditControlInitial(
+          edit.editControlInitial,
+          latestTweetId
+        )
+      case _ => Throw(TweetEditInvalidEditControlException)
+    }
+  }
+
+  // Returns if a given latestTweetId is the latest edit in the EditControlInitial
+  private def isLatestEditFromEditControlInitial(
+    initialTweetEditControl: Option[EditControlInitial],
+    latestTweetId: TweetId
+  ): Try[Boolean] = {
+    initialTweetEditControl match {
+      case Some(initial) =>
+        Return(latestTweetId == initial.editTweetIds.last)
+      case _ => Throw(TweetEditInvalidEditControlException)
+    }
+  }
+
+  /* Create an updated edit control for an initialTweet given the id of the new edit */
+  def editControlForInitialTweet(
+    initialTweet: Tweet,
+    newEditId: TweetId
+  ): Try[EditControl.Initial] = {
+    initialTweet.editControl match {
+      case Some(EditControl.Initial(initial)) =>
+        Return(EditControl.Initial(plusEdit(initial, newEditId)))
+
+      case Some(EditControl.Edit(_)) => Throw(TweetEditUpdateEditControlException)
+
+      case _ =>
+        initialTweet.coreData match {
+          case Some(coreData) =>
+            Return(
+              makeEditControlInitial(
+                tweetId = initialTweet.id,
+                createdAt = Time.fromMilliseconds(coreData.createdAtSecs * 1000),
+                setEditWindowToSixtyMinutes = Gate(_ => true)
+              )
+            )
+          case None => Throw(new Exception("Tweet Missing Required CoreData"))
+        }
+    }
+  }
+
+  def updateEditControl(tweet: Tweet, newEditId: TweetId): Try[Tweet] =
+    editControlForInitialTweet(tweet, newEditId).map { editControl =>
+      tweet.copy(editControl = Some(editControl))
+    }
+
+  def plusEdit(initial: EditControlInitial, newEditId: TweetId): EditControlInitial = {
+    val newEditTweetIds = (initial.editTweetIds :+ newEditId).distinct.sorted
+    val editsCount = newEditTweetIds.size - 1 // as there is the original tweet ID there too.
+    initial.copy(
+      editTweetIds = newEditTweetIds,
+      editsRemaining = Some(maxTweetEditsAllowed - editsCount),
+    )
+  }
+
+  // The ID of the initial Tweet if this is an edit
+  def getInitialTweetIdIfEdit(tweet: Tweet): Option[TweetId] = tweet.editControl match {
+    case Some(EditControl.Edit(edit)) => Some(edit.initialTweetId)
+    case _ => None
+  }
+
+  // If this is the first tweet in an edit chain, return the same tweet id
+  // otherwise return the result of getInitialTweetId
+  def getInitialTweetId(tweet: Tweet): TweetId =
+    getInitialTweetIdIfEdit(tweet).getOrElse(tweet.id)
+
+  def isInitialTweet(tweet: Tweet): Boolean =
+    getInitialTweetId(tweet) == tweet.id
+
+  // Extracted just so that we can easily track where the values of isEditEligible is coming from.
+  private def defaultIsEditEligible: Option[Boolean] = Some(true)
+
+  // returns true if it's an edit of a Tweet or an initial Tweet that's been edited
+  def isEditTweet(tweet: Tweet): Boolean =
+    tweet.editControl match {
+      case Some(eci: EditControl.Initial) if eci.initial.editTweetIds.size <= 1 => false
+      case Some(_: EditControl.Initial) | Some(_: EditControl.Edit) | Some(
+            EditControl.UnknownUnionField(_)) =>
+        true
+      case None => false
+    }
+
+  // returns true if editControl is from an edit of a Tweet
+  // returns false for any other state, including edit intial.
+  def isEditControlEdit(editControl: EditControl): Boolean = {
+    editControl match {
+      case _: EditControl.Edit | EditControl.UnknownUnionField(_) => true
+      case _ => false
+    }
+  }
+
+  def getEditTweetIds(editControl: Option[EditControl]): Try[Seq[TweetId]] = {
+    editControl match {
+      case Some(EditControl.Edit(EditControlEdit(_, Some(eci)))) =>
+        Return(eci.editTweetIds)
+      case Some(EditControl.Initial(initial)) =>
+        Return(initial.editTweetIds)
+      case _ =>
+        Throw(new Exception(s"EditControlInitial not found in $editControl"))
+    }
+  }
+}
+
+object TweetEditFailure {
+  abstract class TweetEditException(msg: String) extends Exception(msg)
+
+  case object TweetEditGetInitialEditControlException
+      extends TweetEditException("Initial EditControl not found")
+
+  case object TweetEditInvalidEditControlException
+      extends TweetEditException("Invalid EditControl for initial_tweet")
+
+  case object TweetEditUpdateEditControlException
+      extends TweetEditException("Invalid Edit Control Update")
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/RetryPolicyBuilder.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/RetryPolicyBuilder.scala
@ -0,0 +1,45 @@
+package com.twitter.tweetypie.util
+
+import com.twitter.finagle.Backoff
+import com.twitter.finagle.service.RetryPolicy
+import com.twitter.finagle.service.RetryPolicy.RetryableWriteException
+import com.twitter.servo.exception.thriftscala.ServerError
+import com.twitter.util.Duration
+import com.twitter.util.Throw
+import com.twitter.util.TimeoutException
+import com.twitter.util.Try
+
+object RetryPolicyBuilder {
+
+  /**
+   * Retry on any exception.
+   */
+  def anyFailure[A](backoffs: Stream[Duration]): RetryPolicy[Try[A]] =
+    RetryPolicy.backoff[Try[A]](Backoff.fromStream(backoffs)) {
+      case Throw(_) => true
+    }
+
+  /**
+   * Retry on com.twitter.util.TimeoutException
+   */
+  def timeouts[A](backoffs: Stream[Duration]): RetryPolicy[Try[A]] =
+    RetryPolicy.backoff[Try[A]](Backoff.fromStream(backoffs)) {
+      case Throw(_: TimeoutException) => true
+    }
+
+  /**
+   * Retry on com.twitter.finagle.service.RetryableWriteExceptions
+   */
+  def writes[A](backoffs: Stream[Duration]): RetryPolicy[Try[A]] =
+    RetryPolicy.backoff[Try[A]](Backoff.fromStream(backoffs)) {
+      case Throw(RetryableWriteException(_)) => true
+    }
+
+  /**
+   * Retry on com.twitter.servo.exception.thriftscala.ServerError
+   */
+  def servoServerError[A](backoffs: Stream[Duration]): RetryPolicy[Try[A]] =
+    RetryPolicy.backoff[Try[A]](Backoff.fromStream(backoffs)) {
+      case Throw(ServerError(_)) => true
+    }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/StitchUtils.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/StitchUtils.scala
@ -0,0 +1,54 @@
+package com.twitter.tweetypie.util
+
+import com.twitter.finagle.stats.Stat
+import com.twitter.finagle.stats.StatsReceiver
+import com.twitter.servo
+import com.twitter.util.Return
+import com.twitter.util.Throw
+import com.twitter.stitch.Stitch
+
+object StitchUtils {
+  def trackLatency[T](latencyStat: Stat, s: => Stitch[T]): Stitch[T] = {
+    Stitch
+      .time(s)
+      .map {
+        case (res, duration) =>
+          latencyStat.add(duration.inMillis)
+          res
+      }
+      .lowerFromTry
+  }
+
+  def observe[T](statsReceiver: StatsReceiver, apiName: String): Stitch[T] => Stitch[T] = {
+    val stats = statsReceiver.scope(apiName)
+
+    val requests = stats.counter("requests")
+    val success = stats.counter("success")
+    val latencyStat = stats.stat("latency_ms")
+
+    val exceptionCounter =
+      new servo.util.ExceptionCounter(stats, "failures")
+
+    stitch =>
+      trackLatency(latencyStat, stitch)
+        .respond {
+          case Return(_) =>
+            requests.incr()
+            success.incr()
+
+          case Throw(e) =>
+            exceptionCounter(e)
+            requests.incr()
+        }
+  }
+
+  def translateExceptions[T](
+    stitch: Stitch[T],
+    translateException: PartialFunction[Throwable, Throwable]
+  ): Stitch[T] =
+    stitch.rescue {
+      case t if translateException.isDefinedAt(t) =>
+        Stitch.exception(translateException(t))
+      case t => Stitch.exception(t)
+    }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/StringLiteral.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/StringLiteral.scala
@ -0,0 +1,31 @@
+package com.twitter.tweetypie.util
+
+/**
+ * Escape a String into Java or Scala String literal syntax (adds the
+ * surrounding quotes.)
+ *
+ * This is primarily for printing Strings for debugging or logging.
+ */
+object StringLiteral extends (String => String) {
+  private[this] val ControlLimit = ' '
+  private[this] val PrintableLimit = '\u007e'
+  private[this] val Specials =
+    Map('\n' -> 'n', '\r' -> 'r', '\t' -> 't', '"' -> '"', '\'' -> '\'', '\\' -> '\\')
+
+  def apply(str: String): String = {
+    val s = new StringBuilder(str.length)
+    s.append('"')
+    var i = 0
+    while (i < str.length) {
+      val c = str(i)
+      Specials.get(c) match {
+        case None =>
+          if (c >= ControlLimit && c <= PrintableLimit) s.append(c)
+          else s.append("\\u%04x".format(c.toInt))
+        case Some(special) => s.append('\\').append(special)
+      }
+      i += 1
+    }
+    s.append('"').result
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/Takedowns.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/Takedowns.scala
@ -0,0 +1,49 @@
+package com.twitter.tweetypie.util
+
+import com.twitter.takedown.util.TakedownReasons
+import com.twitter.takedown.util.TakedownReasons.CountryCode
+import com.twitter.tseng.withholding.thriftscala.TakedownReason
+import com.twitter.tseng.withholding.thriftscala.UnspecifiedReason
+import com.twitter.tweetypie.thriftscala.Tweet
+
+/**
+ * Contains tweetypie-specific utils for working with TakedownReasons.
+ */
+object Takedowns {
+
+  type CountryCode = String
+
+  /**
+   * Take a list of [[TakedownReason]] and return values to be saved on the [[Tweet]] in fields
+   * tweetypieOnlyTakedownCountryCode and tweetypieOnlyTakedownReason.
+   *
+   * - tweetypieOnlyTakedownCountryCode contains the country_code of all UnspecifiedReasons
+   * - tweetypieOnlyTakedownReason contains all other reasons
+   */
+  def partitionReasons(reasons: Seq[TakedownReason]): (Seq[String], Seq[TakedownReason]) = {
+    val (unspecifiedReasons, specifiedReasons) = reasons.partition {
+      case TakedownReason.UnspecifiedReason(UnspecifiedReason(_)) => true
+      case _ => false
+    }
+    val unspecifiedCountryCodes = unspecifiedReasons.collect(TakedownReasons.reasonToCountryCode)
+    (unspecifiedCountryCodes, specifiedReasons)
+  }
+
+  def fromTweet(t: Tweet): Takedowns =
+    Takedowns(
+      Seq
+        .concat(
+          t.tweetypieOnlyTakedownCountryCodes
+            .getOrElse(Nil).map(TakedownReasons.countryCodeToReason),
+          t.tweetypieOnlyTakedownReasons.getOrElse(Nil)
+        ).toSet
+    )
+}
+
+/**
+ * This class is used to ensure the caller has access to both the full list of reasons as well
+ * as the backwards-compatible list of country codes.
+ */
+case class Takedowns(reasons: Set[TakedownReason]) {
+  def countryCodes: Set[CountryCode] = reasons.collect(TakedownReasons.reasonToCountryCode)
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TransientContextUtil.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TransientContextUtil.scala
@ -0,0 +1,17 @@
+package com.twitter.tweetypie.util
+
+import com.twitter.tweetypie.thriftscala.TransientCreateContext
+import com.twitter.tweetypie.thriftscala.TweetCreateContextKey
+import com.twitter.tweetypie.thriftscala.TweetCreateContextKey.PeriscopeCreatorId
+import com.twitter.tweetypie.thriftscala.TweetCreateContextKey.PeriscopeIsLive
+
+object TransientContextUtil {
+
+  def toAdditionalContext(context: TransientCreateContext): Map[TweetCreateContextKey, String] =
+    Seq
+      .concat(
+        context.periscopeIsLive.map(PeriscopeIsLive -> _.toString), // "true" or "false"
+        context.periscopeCreatorId.map(PeriscopeCreatorId -> _.toString) // userId
+      )
+      .toMap
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetCreationLock.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetCreationLock.scala
@ -0,0 +1,203 @@
+package com.twitter.tweetypie.util
+
+import com.twitter.conversions.DurationOps._
+import com.twitter.logging.Logger
+import com.twitter.mediaservices.commons.mediainformation.thriftscala.UserDefinedProductMetadata
+import com.twitter.scrooge.BinaryThriftStructSerializer
+import com.twitter.servo.cache.ScopedCacheKey
+import com.twitter.servo.util.Transformer
+import com.twitter.tweetypie.thriftscala.PostTweetRequest
+import com.twitter.util.Base64Long
+import com.twitter.util.Time
+import java.nio.ByteBuffer
+import java.security.MessageDigest
+import org.apache.commons.codec.binary.Base64
+import scala.collection.immutable.SortedMap
+
+object TweetCreationLock {
+  case class Key private (userId: UserId, typeCode: String, idOrMd5: String)
+      extends ScopedCacheKey("t", "locker", 2, Base64Long.toBase64(userId), typeCode, idOrMd5) {
+    def uniquenessId: Option[String] =
+      if (typeCode == Key.TypeCode.UniquenessId) Some(idOrMd5) else None
+  }
+
+  object Key {
+    private[this] val log = Logger(getClass)
+
+    object TypeCode {
+      val SourceTweetId = "r"
+      val UniquenessId = "u"
+      val PostTweetRequest = "p"
+    }
+
+    private[this] val serializer = BinaryThriftStructSerializer(PostTweetRequest)
+
+    // normalize the representation of no media ids.
+    private[util] def sanitizeMediaUploadIds(mediaUploadIds: Option[Seq[Long]]) =
+      mediaUploadIds.filter(_.nonEmpty)
+
+    /**
+     * Request deduplication depends on the hash of a serialized Thrift value.
+     *
+     * In order to guarantee that a Map has a reproducible serialized form,
+     * it's necessary to fix the ordering of its keys.
+     */
+    private[util] def sanitizeMediaMetadata(
+      mediaMetadata: Option[scala.collection.Map[MediaId, UserDefinedProductMetadata]]
+    ): Option[scala.collection.Map[MediaId, UserDefinedProductMetadata]] =
+      mediaMetadata.map(m => SortedMap(m.toSeq: _*))
+
+    /**
+     *  Make sure to sanitize request fields with map/set since serialized
+     *  bytes ordering is not guaranteed for same thrift values.
+     */
+    private[util] def sanitizeRequest(request: PostTweetRequest): PostTweetRequest =
+      PostTweetRequest(
+        userId = request.userId,
+        text = request.text,
+        createdVia = "",
+        inReplyToTweetId = request.inReplyToTweetId,
+        geo = request.geo,
+        mediaUploadIds = sanitizeMediaUploadIds(request.mediaUploadIds),
+        narrowcast = request.narrowcast,
+        nullcast = request.nullcast,
+        additionalFields = request.additionalFields,
+        attachmentUrl = request.attachmentUrl,
+        mediaMetadata = sanitizeMediaMetadata(request.mediaMetadata),
+        conversationControl = request.conversationControl,
+        underlyingCreativesContainerId = request.underlyingCreativesContainerId,
+        editOptions = request.editOptions,
+        noteTweetOptions = request.noteTweetOptions
+      )
+
+    def bySourceTweetId(userId: UserId, sourceTweetId: TweetId): Key =
+      Key(userId, TypeCode.SourceTweetId, Base64Long.toBase64(sourceTweetId))
+
+    def byRequest(request: PostTweetRequest): Key =
+      request.uniquenessId match {
+        case Some(uqid) =>
+          byUniquenessId(request.userId, uqid)
+        case None =>
+          val sanitized = sanitizeRequest(request)
+          val sanitizedBytes = serializer.toBytes(sanitized)
+          val digested = MessageDigest.getInstance("SHA-256").digest(sanitizedBytes)
+          val base64Digest = Base64.encodeBase64String(digested)
+          val key = Key(request.userId, TypeCode.PostTweetRequest, base64Digest)
+          log.ifDebug(s"Generated key $key from request:\n${sanitized}")
+          key
+      }
+
+    /**
+     * Key for tweets that have a uniqueness id set. There is only one
+     * namespace of uniqueness ids, across all clients. They are
+     * expected to be Snowflake ids, in order to avoid cache
+     * collisions.
+     */
+    def byUniquenessId(userId: UserId, uniquenessId: Long): Key =
+      Key(userId, TypeCode.UniquenessId, Base64Long.toBase64(uniquenessId))
+  }
+
+  /**
+   * The state of tweet creation for a given Key (request).
+   */
+  sealed trait State
+
+  object State {
+
+    /**
+     * There is no tweet creation currently in progress. (This can
+     * either be represented by no entry in the cache, or this special
+     * marker. This lets us use checkAndSet for deletion to avoid
+     * accidentally overwriting other process' values.)
+     */
+    case object Unlocked extends State
+
+    /**
+     * Some process is attempting to create the tweet.
+     */
+    case class InProgress(token: Long, timestamp: Time) extends State
+
+    /**
+     * The tweet has already been successfully created, and has the
+     * specified id.
+     */
+    case class AlreadyCreated(tweetId: TweetId, timestamp: Time) extends State
+
+    /**
+     * When stored in cache, each state is prefixed by a byte
+     * indicating the type of the entry.
+     */
+    object TypeCode {
+      val Unlocked: Byte = 0.toByte
+      val InProgress: Byte = 1.toByte // + random long + timestamp
+      val AlreadyCreated: Byte = 2.toByte // + tweet id + timestamp
+    }
+
+    private[this] val BufferSize = 17 // type byte + 64-bit value + 64-bit timestamp
+
+    // Constant buffer to use for storing the serialized form on
+    // Unlocked.
+    private[this] val UnlockedBuf = Array[Byte](TypeCode.Unlocked)
+
+    // Store the serialization function in a ThreadLocal so that we can
+    // reuse the buffer between invocations.
+    private[this] val threadLocalSerialize = new ThreadLocal[State => Array[Byte]] {
+      override def initialValue(): State => Array[Byte] = {
+        // Allocate the thread-local state
+        val ary = new Array[Byte](BufferSize)
+        val buf = ByteBuffer.wrap(ary)
+
+        {
+          case Unlocked => UnlockedBuf
+          case InProgress(token, timestamp) =>
+            buf.clear()
+            buf
+              .put(TypeCode.InProgress)
+              .putLong(token)
+              .putLong(timestamp.sinceEpoch.inNanoseconds)
+            ary
+          case AlreadyCreated(tweetId, timestamp) =>
+            buf.clear()
+            buf
+              .put(TypeCode.AlreadyCreated)
+              .putLong(tweetId)
+              .putLong(timestamp.sinceEpoch.inNanoseconds)
+            ary
+        }
+      }
+    }
+
+    /**
+     * Convert this State to the cache representation.
+     */
+    private[this] def toBytes(state: State): Array[Byte] =
+      threadLocalSerialize.get()(state)
+
+    /**
+     * Convert this byte array into a LockState.
+     *
+     * @throws RuntimeException if the buffer is not of the right size
+     *   and format
+     */
+    private[this] def fromBytes(bytes: Array[Byte]): State = {
+      val buf = ByteBuffer.wrap(bytes)
+      val result = buf.get() match {
+        case TypeCode.Unlocked => Unlocked
+        case TypeCode.InProgress => InProgress(buf.getLong(), buf.getLong().nanoseconds.afterEpoch)
+        case TypeCode.AlreadyCreated =>
+          AlreadyCreated(buf.getLong(), buf.getLong().nanoseconds.afterEpoch)
+        case other => throw new RuntimeException("Invalid type code: " + other)
+      }
+      if (buf.remaining != 0) {
+        throw new RuntimeException("Extra data in buffer: " + bytes)
+      }
+      result
+    }
+
+    /**
+     * How to serialize the State for storage in cache.
+     */
+    val Serializer: Transformer[State, Array[Byte]] =
+      Transformer[State, Array[Byte]](tTo = toBytes _, tFrom = fromBytes _)
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetLenses.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetLenses.scala
@ -0,0 +1,506 @@
+package com.twitter.tweetypie.util
+
+import com.twitter.dataproducts.enrichments.thriftscala.ProfileGeoEnrichment
+import com.twitter.expandodo.thriftscala._
+import com.twitter.mediaservices.commons.thriftscala.MediaKey
+import com.twitter.mediaservices.commons.tweetmedia.thriftscala._
+import com.twitter.servo.data.Lens
+import com.twitter.spam.rtf.thriftscala.SafetyLabel
+import com.twitter.tseng.withholding.thriftscala.TakedownReason
+import com.twitter.tweetypie.thriftscala._
+import com.twitter.tweetypie.unmentions.thriftscala.UnmentionData
+
+object TweetLenses {
+  import Lens.checkEq
+
+  def requireSome[A, B](l: Lens[A, Option[B]]): Lens[A, B] =
+    checkEq[A, B](
+      a => l.get(a).get,
+      (a, b) => l.set(a, Some(b))
+    )
+
+  def tweetLens[A](get: Tweet => A, set: (Tweet, A) => Tweet): Lens[Tweet, A] =
+    checkEq[Tweet, A](get, set)
+
+  val id: Lens[Tweet, TweetId] =
+    tweetLens[TweetId](_.id, (t, id) => t.copy(id = id))
+
+  val coreData: Lens[Tweet, Option[TweetCoreData]] =
+    tweetLens[Option[TweetCoreData]](_.coreData, (t, coreData) => t.copy(coreData = coreData))
+
+  val requiredCoreData: Lens[Tweet, TweetCoreData] =
+    requireSome(coreData)
+
+  val optUrls: Lens[Tweet, Option[Seq[UrlEntity]]] =
+    tweetLens[Option[Seq[UrlEntity]]](_.urls, (t, urls) => t.copy(urls = urls))
+
+  val urls: Lens[Tweet, Seq[UrlEntity]] =
+    tweetLens[Seq[UrlEntity]](_.urls.toSeq.flatten, (t, urls) => t.copy(urls = Some(urls)))
+
+  val optMentions: Lens[Tweet, Option[Seq[MentionEntity]]] =
+    tweetLens[Option[Seq[MentionEntity]]](_.mentions, (t, v) => t.copy(mentions = v))
+
+  val mentions: Lens[Tweet, Seq[MentionEntity]] =
+    tweetLens[Seq[MentionEntity]](_.mentions.toSeq.flatten, (t, v) => t.copy(mentions = Some(v)))
+
+  val unmentionData: Lens[Tweet, Option[UnmentionData]] =
+    tweetLens[Option[UnmentionData]](_.unmentionData, (t, v) => t.copy(unmentionData = v))
+
+  val optHashtags: Lens[Tweet, Option[Seq[HashtagEntity]]] =
+    tweetLens[Option[Seq[HashtagEntity]]](_.hashtags, (t, v) => t.copy(hashtags = v))
+
+  val hashtags: Lens[Tweet, Seq[HashtagEntity]] =
+    tweetLens[Seq[HashtagEntity]](_.hashtags.toSeq.flatten, (t, v) => t.copy(hashtags = Some(v)))
+
+  val optCashtags: Lens[Tweet, Option[Seq[CashtagEntity]]] =
+    tweetLens[Option[Seq[CashtagEntity]]](_.cashtags, (t, v) => t.copy(cashtags = v))
+
+  val cashtags: Lens[Tweet, Seq[CashtagEntity]] =
+    tweetLens[Seq[CashtagEntity]](_.cashtags.toSeq.flatten, (t, v) => t.copy(cashtags = Some(v)))
+
+  val optMedia: Lens[Tweet, Option[Seq[MediaEntity]]] =
+    tweetLens[Option[Seq[MediaEntity]]](_.media, (t, v) => t.copy(media = v))
+
+  val media: Lens[Tweet, Seq[MediaEntity]] =
+    tweetLens[Seq[MediaEntity]](_.media.toSeq.flatten, (t, v) => t.copy(media = Some(v)))
+
+  val mediaKeys: Lens[Tweet, Seq[MediaKey]] =
+    tweetLens[Seq[MediaKey]](
+      _.mediaKeys.toSeq.flatten,
+      {
+        case (t, v) => t.copy(mediaKeys = Some(v))
+      })
+
+  val place: Lens[Tweet, Option[Place]] =
+    tweetLens[Option[Place]](
+      _.place,
+      {
+        case (t, v) => t.copy(place = v)
+      })
+
+  val quotedTweet: Lens[Tweet, Option[QuotedTweet]] =
+    tweetLens[Option[QuotedTweet]](
+      _.quotedTweet,
+      {
+        case (t, v) => t.copy(quotedTweet = v)
+      })
+
+  val selfThreadMetadata: Lens[Tweet, Option[SelfThreadMetadata]] =
+    tweetLens[Option[SelfThreadMetadata]](
+      _.selfThreadMetadata,
+      {
+        case (t, v) => t.copy(selfThreadMetadata = v)
+      })
+
+  val composerSource: Lens[Tweet, Option[ComposerSource]] =
+    tweetLens[Option[ComposerSource]](
+      _.composerSource,
+      {
+        case (t, v) => t.copy(composerSource = v)
+      })
+
+  val deviceSource: Lens[Tweet, Option[DeviceSource]] =
+    tweetLens[Option[DeviceSource]](
+      _.deviceSource,
+      {
+        case (t, v) => t.copy(deviceSource = v)
+      })
+
+  val perspective: Lens[Tweet, Option[StatusPerspective]] =
+    tweetLens[Option[StatusPerspective]](
+      _.perspective,
+      {
+        case (t, v) => t.copy(perspective = v)
+      })
+
+  val cards: Lens[Tweet, Option[Seq[Card]]] =
+    tweetLens[Option[Seq[Card]]](
+      _.cards,
+      {
+        case (t, v) => t.copy(cards = v)
+      })
+
+  val card2: Lens[Tweet, Option[Card2]] =
+    tweetLens[Option[Card2]](
+      _.card2,
+      {
+        case (t, v) => t.copy(card2 = v)
+      })
+
+  val cardReference: Lens[Tweet, Option[CardReference]] =
+    tweetLens[Option[CardReference]](
+      _.cardReference,
+      {
+        case (t, v) => t.copy(cardReference = v)
+      })
+
+  val spamLabel: Lens[Tweet, Option[SafetyLabel]] =
+    tweetLens[Option[SafetyLabel]](
+      _.spamLabel,
+      {
+        case (t, v) => t.copy(spamLabel = v)
+      })
+
+  val lowQualityLabel: Lens[Tweet, Option[SafetyLabel]] =
+    tweetLens[Option[SafetyLabel]](
+      _.lowQualityLabel,
+      {
+        case (t, v) => t.copy(lowQualityLabel = v)
+      })
+
+  val nsfwHighPrecisionLabel: Lens[Tweet, Option[SafetyLabel]] =
+    tweetLens[Option[SafetyLabel]](
+      _.nsfwHighPrecisionLabel,
+      {
+        case (t, v) => t.copy(nsfwHighPrecisionLabel = v)
+      })
+
+  val bounceLabel: Lens[Tweet, Option[SafetyLabel]] =
+    tweetLens[Option[SafetyLabel]](
+      _.bounceLabel,
+      {
+        case (t, v) => t.copy(bounceLabel = v)
+      })
+
+  val takedownCountryCodes: Lens[Tweet, Option[Seq[String]]] =
+    tweetLens[Option[Seq[String]]](
+      _.takedownCountryCodes,
+      {
+        case (t, v) => t.copy(takedownCountryCodes = v)
+      })
+
+  val takedownReasons: Lens[Tweet, Option[Seq[TakedownReason]]] =
+    tweetLens[Option[Seq[TakedownReason]]](
+      _.takedownReasons,
+      {
+        case (t, v) => t.copy(takedownReasons = v)
+      })
+
+  val contributor: Lens[Tweet, Option[Contributor]] =
+    tweetLens[Option[Contributor]](
+      _.contributor,
+      {
+        case (t, v) => t.copy(contributor = v)
+      })
+
+  val mediaTags: Lens[Tweet, Option[TweetMediaTags]] =
+    tweetLens[Option[TweetMediaTags]](
+      _.mediaTags,
+      {
+        case (t, v) => t.copy(mediaTags = v)
+      })
+
+  val mediaTagMap: Lens[Tweet, Map[MediaId, Seq[MediaTag]]] =
+    tweetLens[Map[MediaId, Seq[MediaTag]]](
+      _.mediaTags.map { case TweetMediaTags(tagMap) => tagMap.toMap }.getOrElse(Map.empty),
+      (t, v) => {
+        val cleanMap = v.filter { case (_, tags) => tags.nonEmpty }
+        t.copy(mediaTags = if (cleanMap.nonEmpty) Some(TweetMediaTags(cleanMap)) else None)
+      }
+    )
+
+  val escherbirdEntityAnnotations: Lens[Tweet, Option[EscherbirdEntityAnnotations]] =
+    tweetLens[Option[EscherbirdEntityAnnotations]](
+      _.escherbirdEntityAnnotations,
+      {
+        case (t, v) => t.copy(escherbirdEntityAnnotations = v)
+      })
+
+  val communities: Lens[Tweet, Option[Communities]] =
+    tweetLens[Option[Communities]](
+      _.communities,
+      {
+        case (t, v) => t.copy(communities = v)
+      })
+
+  val tweetypieOnlyTakedownCountryCodes: Lens[Tweet, Option[Seq[String]]] =
+    tweetLens[Option[Seq[String]]](
+      _.tweetypieOnlyTakedownCountryCodes,
+      {
+        case (t, v) => t.copy(tweetypieOnlyTakedownCountryCodes = v)
+      })
+
+  val tweetypieOnlyTakedownReasons: Lens[Tweet, Option[Seq[TakedownReason]]] =
+    tweetLens[Option[Seq[TakedownReason]]](
+      _.tweetypieOnlyTakedownReasons,
+      {
+        case (t, v) => t.copy(tweetypieOnlyTakedownReasons = v)
+      })
+
+  val profileGeo: Lens[Tweet, Option[ProfileGeoEnrichment]] =
+    tweetLens[Option[ProfileGeoEnrichment]](
+      _.profileGeoEnrichment,
+      (t, v) => t.copy(profileGeoEnrichment = v)
+    )
+
+  val visibleTextRange: Lens[Tweet, Option[TextRange]] =
+    tweetLens[Option[TextRange]](
+      _.visibleTextRange,
+      {
+        case (t, v) => t.copy(visibleTextRange = v)
+      })
+
+  val selfPermalink: Lens[Tweet, Option[ShortenedUrl]] =
+    tweetLens[Option[ShortenedUrl]](
+      _.selfPermalink,
+      {
+        case (t, v) => t.copy(selfPermalink = v)
+      })
+
+  val extendedTweetMetadata: Lens[Tweet, Option[ExtendedTweetMetadata]] =
+    tweetLens[Option[ExtendedTweetMetadata]](
+      _.extendedTweetMetadata,
+      {
+        case (t, v) => t.copy(extendedTweetMetadata = v)
+      })
+
+  object TweetCoreData {
+    val userId: Lens[TweetCoreData, UserId] = checkEq[TweetCoreData, UserId](
+      _.userId,
+      { (c, v) =>
+        // Pleases the compiler: https://github.com/scala/bug/issues/9171
+        val userId = v
+        c.copy(userId = userId)
+      })
+    val text: Lens[TweetCoreData, String] = checkEq[TweetCoreData, String](
+      _.text,
+      { (c, v) =>
+        // Pleases the compiler: https://github.com/scala/bug/issues/9171
+        val text = v
+        c.copy(text = text)
+      })
+    val createdAt: Lens[TweetCoreData, TweetId] =
+      checkEq[TweetCoreData, Long](_.createdAtSecs, (c, v) => c.copy(createdAtSecs = v))
+    val createdVia: Lens[TweetCoreData, String] =
+      checkEq[TweetCoreData, String](
+        _.createdVia,
+        {
+          case (c, v) => c.copy(createdVia = v)
+        })
+    val hasTakedown: Lens[TweetCoreData, Boolean] =
+      checkEq[TweetCoreData, Boolean](
+        _.hasTakedown,
+        {
+          case (c, v) => c.copy(hasTakedown = v)
+        })
+    val nullcast: Lens[TweetCoreData, Boolean] =
+      checkEq[TweetCoreData, Boolean](
+        _.nullcast,
+        {
+          case (c, v) => c.copy(nullcast = v)
+        })
+    val nsfwUser: Lens[TweetCoreData, Boolean] =
+      checkEq[TweetCoreData, Boolean](
+        _.nsfwUser,
+        {
+          case (c, v) => c.copy(nsfwUser = v)
+        })
+    val nsfwAdmin: Lens[TweetCoreData, Boolean] =
+      checkEq[TweetCoreData, Boolean](
+        _.nsfwAdmin,
+        {
+          case (c, v) => c.copy(nsfwAdmin = v)
+        })
+    val reply: Lens[TweetCoreData, Option[Reply]] =
+      checkEq[TweetCoreData, Option[Reply]](
+        _.reply,
+        {
+          case (c, v) => c.copy(reply = v)
+        })
+    val share: Lens[TweetCoreData, Option[Share]] =
+      checkEq[TweetCoreData, Option[Share]](
+        _.share,
+        {
+          case (c, v) => c.copy(share = v)
+        })
+    val narrowcast: Lens[TweetCoreData, Option[Narrowcast]] =
+      checkEq[TweetCoreData, Option[Narrowcast]](
+        _.narrowcast,
+        {
+          case (c, v) => c.copy(narrowcast = v)
+        })
+    val directedAtUser: Lens[TweetCoreData, Option[DirectedAtUser]] =
+      checkEq[TweetCoreData, Option[DirectedAtUser]](
+        _.directedAtUser,
+        {
+          case (c, v) => c.copy(directedAtUser = v)
+        })
+    val conversationId: Lens[TweetCoreData, Option[ConversationId]] =
+      checkEq[TweetCoreData, Option[ConversationId]](
+        _.conversationId,
+        {
+          case (c, v) => c.copy(conversationId = v)
+        })
+    val placeId: Lens[TweetCoreData, Option[String]] =
+      checkEq[TweetCoreData, Option[String]](
+        _.placeId,
+        {
+          case (c, v) => c.copy(placeId = v)
+        })
+    val geoCoordinates: Lens[TweetCoreData, Option[GeoCoordinates]] =
+      checkEq[TweetCoreData, Option[GeoCoordinates]](
+        _.coordinates,
+        (c, v) => c.copy(coordinates = v)
+      )
+    val trackingId: Lens[TweetCoreData, Option[TweetId]] =
+      checkEq[TweetCoreData, Option[Long]](
+        _.trackingId,
+        {
+          case (c, v) => c.copy(trackingId = v)
+        })
+    val hasMedia: Lens[TweetCoreData, Option[Boolean]] =
+      checkEq[TweetCoreData, Option[Boolean]](
+        _.hasMedia,
+        {
+          case (c, v) => c.copy(hasMedia = v)
+        })
+  }
+
+  val counts: Lens[Tweet, Option[StatusCounts]] =
+    tweetLens[Option[StatusCounts]](
+      _.counts,
+      {
+        case (t, v) => t.copy(counts = v)
+      })
+
+  object StatusCounts {
+    val retweetCount: Lens[StatusCounts, Option[TweetId]] =
+      checkEq[StatusCounts, Option[Long]](
+        _.retweetCount,
+        (c, retweetCount) => c.copy(retweetCount = retweetCount)
+      )
+
+    val replyCount: Lens[StatusCounts, Option[TweetId]] =
+      checkEq[StatusCounts, Option[Long]](
+        _.replyCount,
+        (c, replyCount) => c.copy(replyCount = replyCount)
+      )
+
+    val favoriteCount: Lens[StatusCounts, Option[TweetId]] =
+      checkEq[StatusCounts, Option[Long]](
+        _.favoriteCount,
+        {
+          case (c, v) => c.copy(favoriteCount = v)
+        })
+
+    val quoteCount: Lens[StatusCounts, Option[TweetId]] =
+      checkEq[StatusCounts, Option[Long]](
+        _.quoteCount,
+        {
+          case (c, v) => c.copy(quoteCount = v)
+        })
+  }
+
+  val userId: Lens[Tweet, UserId] = requiredCoreData andThen TweetCoreData.userId
+  val text: Lens[Tweet, String] = requiredCoreData andThen TweetCoreData.text
+  val createdVia: Lens[Tweet, String] = requiredCoreData andThen TweetCoreData.createdVia
+  val createdAt: Lens[Tweet, ConversationId] = requiredCoreData andThen TweetCoreData.createdAt
+  val reply: Lens[Tweet, Option[Reply]] = requiredCoreData andThen TweetCoreData.reply
+  val share: Lens[Tweet, Option[Share]] = requiredCoreData andThen TweetCoreData.share
+  val narrowcast: Lens[Tweet, Option[Narrowcast]] =
+    requiredCoreData andThen TweetCoreData.narrowcast
+  val directedAtUser: Lens[Tweet, Option[DirectedAtUser]] =
+    requiredCoreData andThen TweetCoreData.directedAtUser
+  val conversationId: Lens[Tweet, Option[ConversationId]] =
+    requiredCoreData andThen TweetCoreData.conversationId
+  val placeId: Lens[Tweet, Option[String]] = requiredCoreData andThen TweetCoreData.placeId
+  val geoCoordinates: Lens[Tweet, Option[GeoCoordinates]] =
+    requiredCoreData andThen TweetCoreData.geoCoordinates
+  val hasTakedown: Lens[Tweet, Boolean] = requiredCoreData andThen TweetCoreData.hasTakedown
+  val nsfwAdmin: Lens[Tweet, Boolean] = requiredCoreData andThen TweetCoreData.nsfwAdmin
+  val nsfwUser: Lens[Tweet, Boolean] = requiredCoreData andThen TweetCoreData.nsfwUser
+  val nullcast: Lens[Tweet, Boolean] = requiredCoreData andThen TweetCoreData.nullcast
+  val trackingId: Lens[Tweet, Option[ConversationId]] =
+    requiredCoreData andThen TweetCoreData.trackingId
+  val hasMedia: Lens[Tweet, Option[Boolean]] = requiredCoreData andThen TweetCoreData.hasMedia
+
+  object CashtagEntity {
+    val indices: Lens[CashtagEntity, (Short, Short)] =
+      checkEq[CashtagEntity, (Short, Short)](
+        t => (t.fromIndex, t.toIndex),
+        (t, v) => t.copy(fromIndex = v._1, toIndex = v._2)
+      )
+    val text: Lens[CashtagEntity, String] =
+      checkEq[CashtagEntity, String](_.text, (t, text) => t.copy(text = text))
+  }
+
+  object HashtagEntity {
+    val indices: Lens[HashtagEntity, (Short, Short)] =
+      checkEq[HashtagEntity, (Short, Short)](
+        t => (t.fromIndex, t.toIndex),
+        (t, v) => t.copy(fromIndex = v._1, toIndex = v._2)
+      )
+    val text: Lens[HashtagEntity, String] =
+      checkEq[HashtagEntity, String](_.text, (t, text) => t.copy(text = text))
+  }
+
+  object MediaEntity {
+    val indices: Lens[MediaEntity, (Short, Short)] =
+      checkEq[MediaEntity, (Short, Short)](
+        t => (t.fromIndex, t.toIndex),
+        (t, v) => t.copy(fromIndex = v._1, toIndex = v._2)
+      )
+    val mediaSizes: Lens[MediaEntity, collection.Set[MediaSize]] =
+      checkEq[MediaEntity, scala.collection.Set[MediaSize]](
+        _.sizes,
+        (m, sizes) => m.copy(sizes = sizes)
+      )
+    val url: Lens[MediaEntity, String] =
+      checkEq[MediaEntity, String](
+        _.url,
+        {
+          case (t, v) => t.copy(url = v)
+        })
+    val mediaInfo: Lens[MediaEntity, Option[MediaInfo]] =
+      checkEq[MediaEntity, Option[MediaInfo]](
+        _.mediaInfo,
+        {
+          case (t, v) => t.copy(mediaInfo = v)
+        })
+  }
+
+  object MentionEntity {
+    val indices: Lens[MentionEntity, (Short, Short)] =
+      checkEq[MentionEntity, (Short, Short)](
+        t => (t.fromIndex, t.toIndex),
+        (t, v) => t.copy(fromIndex = v._1, toIndex = v._2)
+      )
+    val screenName: Lens[MentionEntity, String] =
+      checkEq[MentionEntity, String](
+        _.screenName,
+        (t, screenName) => t.copy(screenName = screenName)
+      )
+  }
+
+  object UrlEntity {
+    val indices: Lens[UrlEntity, (Short, Short)] =
+      checkEq[UrlEntity, (Short, Short)](
+        t => (t.fromIndex, t.toIndex),
+        (t, v) => t.copy(fromIndex = v._1, toIndex = v._2)
+      )
+    val url: Lens[UrlEntity, String] =
+      checkEq[UrlEntity, String](_.url, (t, url) => t.copy(url = url))
+  }
+
+  object Contributor {
+    val screenName: Lens[Contributor, Option[String]] =
+      checkEq[Contributor, Option[String]](
+        _.screenName,
+        (c, screenName) => c.copy(screenName = screenName)
+      )
+  }
+
+  object Reply {
+    val inReplyToScreenName: Lens[Reply, Option[String]] =
+      checkEq[Reply, Option[String]](
+        _.inReplyToScreenName,
+        (c, inReplyToScreenName) => c.copy(inReplyToScreenName = inReplyToScreenName)
+      )
+
+    val inReplyToStatusId: Lens[Reply, Option[TweetId]] =
+      checkEq[Reply, Option[TweetId]](
+        _.inReplyToStatusId,
+        (c, inReplyToStatusId) => c.copy(inReplyToStatusId = inReplyToStatusId)
+      )
+  }
+}
--- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetPermalinkUtil.scala
+++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetPermalinkUtil.scala
@ -0,0 +1,18 @@
+package com.twitter.tweetypie.util
+
+import com.twitter.tweetutil.TweetPermalink
+import com.twitter.tweetypie.thriftscala._
+
+object TweetPermalinkUtil {
+  def lastQuotedTweetPermalink(tweet: Tweet): Option[(UrlEntity, TweetPermalink)] =
+    lastQuotedTweetPermalink(TweetLenses.urls.get(tweet))
+
+  def lastQuotedTweetPermalink(urls: Seq[UrlEntity]): Option[(UrlEntity, TweetPermalink)] =
+    urls.flatMap(matchQuotedTweetPermalink).lastOption
+
+  def matchQuotedTweetPermalink(entity: UrlEntity): Option[(UrlEntity, TweetPermalink)] =
+    for {
+      expanded <- entity.expanded
+      permalink <- TweetPermalink.parse(expanded)
+    } yield (entity, permalink)
+}
--- a/Show More
+++ b/Show More