diff --git a/README.md b/README.md index 818e7334e..ebb136186 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ Product surfaces at Twitter are built on a shared set of data, models, and softw | | [timelines-aggregation-framework](timelines/data_processing/ml_util/aggregation_framework/README.md) | Framework for generating aggregate features in batch or real time. | | | [representation-manager](representation-manager/README.md) | Service to retrieve embeddings (i.e. SimClusers and TwHIN). | | | [twml](twml/README.md) | Legacy machine learning framework built on TensorFlow v1. | +| | [Tweetypie](tweetypie/server/README.md) | Core Tweet service that handles the reading and writing of Tweet data. | The product surface currently included in this repository is the For You Timeline. diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/AdditionalFields.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/AdditionalFields.scala new file mode 100644 index 000000000..91e06e4c6 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/AdditionalFields.scala @@ -0,0 +1,118 @@ +package com.twitter.tweetypie.additionalfields + +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.scrooge.TFieldBlob +import com.twitter.scrooge.ThriftStructField + +object AdditionalFields { + type FieldId = Short + + /** additional fields really start at 100, be we are ignoring conversation id for now */ + val StartAdditionalId = 101 + + /** all known [[Tweet]] field IDs */ + val CompiledFieldIds: Seq[FieldId] = Tweet.metaData.fields.map(_.id) + + /** all known [[Tweet]] fields in the "additional-field" range (excludes id) */ + val CompiledAdditionalFieldMetaDatas: Seq[ThriftStructField[Tweet]] = + Tweet.metaData.fields.filter(f => isAdditionalFieldId(f.id)) + + val CompiledAdditionalFieldsMap: Map[Short, ThriftStructField[Tweet]] = + CompiledAdditionalFieldMetaDatas.map(field => (field.id, field)).toMap + + /** all known [[Tweet]] field IDs in the "additional-field" range */ + val CompiledAdditionalFieldIds: Seq[FieldId] = + CompiledAdditionalFieldsMap.keys.toSeq + + /** all [[Tweet]] field IDs which should be rejected when set as additional + * fields on via PostTweetRequest.additionalFields or RetweetRequest.additionalFields */ + val RejectedFieldIds: Seq[FieldId] = Seq( + // Should be provided via PostTweetRequest.conversationControl field. go/convocontrolsbackend + Tweet.ConversationControlField.id, + // This field should only be set based on whether the client sets the right community + // tweet annotation. + Tweet.CommunitiesField.id, + // This field should not be set by clients and should opt for + // [[PostTweetRequest.ExclusiveTweetControlOptions]]. + // The exclusiveTweetControl field requires the userId to be set + // and we shouldn't trust the client to provide the right one. + Tweet.ExclusiveTweetControlField.id, + // This field should not be set by clients and should opt for + // [[PostTweetRequest.TrustedFriendsControlOptions]]. + // The trustedFriendsControl field requires the trustedFriendsListId to be + // set and we shouldn't trust the client to provide the right one. + Tweet.TrustedFriendsControlField.id, + // This field should not be set by clients and should opt for + // [[PostTweetRequest.CollabControlOptions]]. + // The collabControl field requires a list of Collaborators to be + // set and we shouldn't trust the client to provide the right one. + Tweet.CollabControlField.id + ) + + def isAdditionalFieldId(fieldId: FieldId): Boolean = + fieldId >= StartAdditionalId + + /** + * Provides a list of all additional field IDs on the tweet, which include all + * the compiled additional fields and all the provided passthrough fields. This includes + * compiled additional fields where the value is None. + */ + def allAdditionalFieldIds(tweet: Tweet): Seq[FieldId] = + CompiledAdditionalFieldIds ++ tweet._passthroughFields.keys + + /** + * Provides a list of all field IDs that have a value on the tweet which are not known compiled + * additional fields (excludes [[Tweet.id]]). + */ + def unsettableAdditionalFieldIds(tweet: Tweet): Seq[FieldId] = + CompiledFieldIds + .filter { id => + !isAdditionalFieldId(id) && id != Tweet.IdField.id && tweet.getFieldBlob(id).isDefined + } ++ + tweet._passthroughFields.keys + + /** + * Provides a list of all field IDs that have a value on the tweet which are explicitly disallowed + * from being set via PostTweetRequest.additionalFields and RetweetRequest.additionalFields + */ + def rejectedAdditionalFieldIds(tweet: Tweet): Seq[FieldId] = + RejectedFieldIds + .filter { id => tweet.getFieldBlob(id).isDefined } + + def unsettableAdditionalFieldIdsErrorMessage(unsettableFieldIds: Seq[FieldId]): String = + s"request may not contain fields: [${unsettableFieldIds.sorted.mkString(", ")}]" + + /** + * Provides a list of all additional field IDs that have a value on the tweet, + * compiled and passthrough (excludes Tweet.id). + */ + def nonEmptyAdditionalFieldIds(tweet: Tweet): Seq[FieldId] = + CompiledAdditionalFieldMetaDatas.collect { + case f if f.getValue(tweet) != None => f.id + } ++ tweet._passthroughFields.keys + + def additionalFields(tweet: Tweet): Seq[TFieldBlob] = + (tweet.getFieldBlobs(CompiledAdditionalFieldIds) ++ tweet._passthroughFields).values.toSeq + + /** + * Merge base tweet with additional fields. + * Non-additional fields in the additional tweet are ignored. + * @param base: a tweet that contains basic fields + * @param additional: a tweet object that carries additional fields + */ + def setAdditionalFields(base: Tweet, additional: Tweet): Tweet = + setAdditionalFields(base, additionalFields(additional)) + + def setAdditionalFields(base: Tweet, additional: Option[Tweet]): Tweet = + additional.map(setAdditionalFields(base, _)).getOrElse(base) + + def setAdditionalFields(base: Tweet, additional: Traversable[TFieldBlob]): Tweet = + additional.foldLeft(base) { case (t, f) => t.setField(f) } + + /** + * Unsets the specified fields on the given tweet. + */ + def unsetFields(tweet: Tweet, fieldIds: Iterable[FieldId]): Tweet = { + tweet.unsetFields(fieldIds.toSet) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/BUILD new file mode 100644 index 000000000..472135458 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/BUILD @@ -0,0 +1,15 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/org/apache/thrift:libthrift", + "mediaservices/commons/src/main/thrift:thrift-scala", + "scrooge/scrooge-core", + "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", + "src/thrift/com/twitter/spam/rtf:safety-label-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/BUILD new file mode 100644 index 000000000..3e9bc82d8 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/BUILD @@ -0,0 +1,15 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle/finagle-memcached/src/main/scala", + "scrooge/scrooge-serializer", + "stitch/stitch-core", + "util/util-core", + "util/util-logging", + # CachedValue struct + "tweetypie/servo/repo/src/main/thrift:thrift-scala", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheOperations.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheOperations.scala new file mode 100644 index 000000000..816162fad --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheOperations.scala @@ -0,0 +1,241 @@ +package com.twitter.tweetypie.caching + +import com.twitter.finagle.service.StatsFilter +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.stats.ExceptionStatsHandler +import com.twitter.finagle.stats.Counter +import com.twitter.util.Future +import com.twitter.util.logging.Logger +import com.twitter.finagle.memcached +import scala.util.control.NonFatal + +/** + * Wrapper around a memcached client that performs serialization and + * deserialization, tracks stats, provides tracing, and provides + * per-key fresh/stale/failure/miss results. + * + * The operations that write values to cache will only write values + * that the ValueSerializer says are cacheable. The idea here is that + * the deserialize and serialize functions must be coherent, and no + * matter how you choose to write these values back to cache, the + * serializer will have the appropriate knowledge about whether the + * values are cacheable. + * + * For most cases, you will want to use [[StitchCaching]] rather than + * calling this wrapper directly. + * + * @param keySerializer How to convert a K value to a memcached key. + * + * @param valueSerializer How to serialize and deserialize V values, + * as well as which values are cacheable, and how long to store the + * values in cache. + */ +class CacheOperations[K, V]( + keySerializer: K => String, + valueSerializer: ValueSerializer[V], + memcachedClient: memcached.Client, + statsReceiver: StatsReceiver, + logger: Logger, + exceptionStatsHandler: ExceptionStatsHandler = StatsFilter.DefaultExceptions) { + // The memcached operations that are performed via this + // [[CacheOperations]] instance will be tracked under this stats + // receiver. + // + // We count all memcached failures together under this scope, + // because memcached operations should not fail unless there are + // communication problems, so differentiating the method that was + // being called will not give us any useful information. + private[this] val memcachedStats: StatsReceiver = statsReceiver.scope("memcached") + + // Incremented for every attempt to `get` a key from cache. + private[this] val memcachedGetCounter: Counter = memcachedStats.counter("get") + + // One of these two counters is incremented for every successful + // response returned from a `get` call to memcached. + private[this] val memcachedNotFoundCounter: Counter = memcachedStats.counter("not_found") + private[this] val memcachedFoundCounter: Counter = memcachedStats.counter("found") + + // Records the state of the cache load after serialization. The + // policy may transform a value that was successfully loaded from + // cache into any result type, which is why we explicitly track + // "found" and "not_found" above. If `stale` + `fresh` is not equal + // to `found`, then it means that the policy has translated a found + // value into a miss or failure. The policy may do this in order to + // cause the caching filter to treat the value that was found in + // cache in the way it would have treated a miss or failure from + // cache. + private[this] val resultStats: StatsReceiver = statsReceiver.scope("result") + private[this] val resultFreshCounter: Counter = resultStats.counter("fresh") + private[this] val resultStaleCounter: Counter = resultStats.counter("stale") + private[this] val resultMissCounter: Counter = resultStats.counter("miss") + private[this] val resultFailureCounter: Counter = resultStats.counter("failure") + + // Used for recording exceptions that occurred during + // deserialization. This will never be incremented if the + // deserializer returns a result, even if the result is a + // [[CacheResult.Failure]]. See the comment where this stat is + // incremented for more details. + private[this] val deserializeFailureStats: StatsReceiver = statsReceiver.scope("deserialize") + + private[this] val notSerializedCounter: Counter = statsReceiver.counter("not_serialized") + + /** + * Load a batch of values from cache. Mostly this deals with + * converting the [[memcached.GetResult]] to a + * [[Seq[CachedResult[V]]]]. The result is in the same order as the + * keys, and there will always be an entry for each key. This method + * should never return a [[Future.exception]]. + */ + def get(keys: Seq[K]): Future[Seq[CacheResult[V]]] = { + memcachedGetCounter.incr(keys.size) + val cacheKeys: Seq[String] = keys.map(keySerializer) + if (logger.isTraceEnabled) { + logger.trace { + val lines: Seq[String] = keys.zip(cacheKeys).map { case (k, c) => s"\n $k ($c)" } + "Starting load for keys:" + lines.mkString + } + } + + memcachedClient + .getResult(cacheKeys) + .map { getResult => + memcachedNotFoundCounter.incr(getResult.misses.size) + val results: Seq[CacheResult[V]] = + cacheKeys.map { cacheKey => + val result: CacheResult[V] = + getResult.hits.get(cacheKey) match { + case Some(memcachedValue) => + memcachedFoundCounter.incr() + try { + valueSerializer.deserialize(memcachedValue.value) + } catch { + case NonFatal(e) => + // If the serializer throws an exception, then + // the serialized value was malformed. In that + // case, we record the failure so that it can be + // detected and fixed, but treat it as a cache + // miss. The reason that we treat it as a miss + // rather than a failure is that a miss will + // cause a write back to cache, and we want to + // write a valid result back to cache to replace + // the bad entry that we just loaded. + // + // A serializer is free to return Miss itself to + // obtain this behavior if it is expected or + // desired, to avoid the logging and stats (and + // the minor overhead of catching an exception). + // + // The exceptions are tracked separately from + // other exceptions so that it is easy to see + // whether the deserializer itself ever throws an + // exception. + exceptionStatsHandler.record(deserializeFailureStats, e) + logger.warn(s"Failed deserializing value for cache key $cacheKey", e) + CacheResult.Miss + } + + case None if getResult.misses.contains(cacheKey) => + CacheResult.Miss + + case None => + val exception = + getResult.failures.get(cacheKey) match { + case None => + // To get here, this was not a hit or a miss, + // so we expect the key to be present in + // failures. If it is not, then either the + // contract of getResult was violated, or this + // method is somehow attempting to access a + // result for a key that was not + // loaded. Either of these indicates a bug, so + // we log a high priority log message. + logger.error( + s"Key $cacheKey not found in hits, misses or failures. " + + "This indicates a bug in the memcached library or " + + "CacheOperations.load" + ) + // We return this as a failure because that + // will cause the repo to be consulted and the + // value *not* to be written back to cache, + // which is probably the safest thing to do + // (if we don't know what's going on, default + // to an uncached repo). + new IllegalStateException + + case Some(e) => + e + } + exceptionStatsHandler.record(memcachedStats, exception) + CacheResult.Failure(exception) + } + + // Count each kind of CacheResult, to make it possible to + // see how effective the caching is. + result match { + case CacheResult.Fresh(_) => resultFreshCounter.incr() + case CacheResult.Stale(_) => resultStaleCounter.incr() + case CacheResult.Miss => resultMissCounter.incr() + case CacheResult.Failure(_) => resultFailureCounter.incr() + } + + result + } + + if (logger.isTraceEnabled) { + logger.trace { + val lines: Seq[String] = + (keys, cacheKeys, results).zipped.map { + case (key, cacheKey, result) => s"\n $key ($cacheKey) -> $result" + } + + "Cache results:" + lines.mkString + } + } + + results + } + .handle { + case e => + // If there is a failure from the memcached client, fan it + // out to each cache key, so that the caller does not need + // to handle failure of the batch differently than failure + // of individual keys. This should be rare anyway, since the + // memcached client already does this for common Finagle + // exceptions + resultFailureCounter.incr(keys.size) + val theFailure: CacheResult[V] = CacheResult.Failure(e) + keys.map { _ => + // Record this as many times as we would if it were in the GetResult + exceptionStatsHandler.record(memcachedStats, e) + theFailure + } + } + } + + // Incremented for every attempt to `set` a key in value. + private[this] val memcachedSetCounter: Counter = memcachedStats.counter("set") + + /** + * Write an entry back to cache, using `set`. If the serializer does + * not serialize the value, then this method will immediately return + * with success. + */ + def set(key: K, value: V): Future[Unit] = + valueSerializer.serialize(value) match { + case Some((expiry, serialized)) => + if (logger.isTraceEnabled) { + logger.trace(s"Writing back to cache $key -> $value (expiry = $expiry)") + } + memcachedSetCounter.incr() + memcachedClient + .set(key = keySerializer(key), flags = 0, expiry = expiry, value = serialized) + .onFailure(exceptionStatsHandler.record(memcachedStats, _)) + + case None => + if (logger.isTraceEnabled) { + logger.trace(s"Not writing back $key -> $value") + } + notSerializedCounter.incr() + Future.Done + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheResult.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheResult.scala new file mode 100644 index 000000000..c6e9500e7 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheResult.scala @@ -0,0 +1,45 @@ +package com.twitter.tweetypie.caching + +/** + * Encodes the possible states of a value loaded from memcached. + * + * @see [[ValueSerializer]] and [[CacheOperations]] + */ +sealed trait CacheResult[+V] + +object CacheResult { + + /** + * Signals that the value could not be successfully loaded from + * cache. `Failure` values should not be written back to cache. + * + * This value may result from an error talking to the memcached + * instance or it may be returned from the Serializer when the value + * should not be reused, but should also not be overwritten. + */ + final case class Failure(e: Throwable) extends CacheResult[Nothing] + + /** + * Signals that the cache load attempt was successful, but there was + * not a usable value. + * + * When processing a `Miss`, the value should be written back to + * cache if it loads successfully. + */ + case object Miss extends CacheResult[Nothing] + + /** + * Signals that the value was found in cache. + * + * It is not necessary to load the value from the original source. + */ + case class Fresh[V](value: V) extends CacheResult[V] + + /** + * Signals that the value was found in cache. + * + * This value should be used, but it should be refreshed + * out-of-band. + */ + case class Stale[V](value: V) extends CacheResult[V] +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/Expiry.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/Expiry.scala new file mode 100644 index 000000000..1f2a743c1 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/Expiry.scala @@ -0,0 +1,34 @@ +package com.twitter.tweetypie.caching + +import com.twitter.util.Duration +import com.twitter.util.Time + +/** + * Helpers for creating common expiry functions. + * + * An expiry function maps from the value to a time in the future when + * the value should expire from cache. These are useful in the + * implementation of a [[ValueSerializer]]. + */ +object Expiry { + + /** + * Return a time that indicates to memcached to never expire this + * value. + * + * This function takes [[Any]] so that it can be used at any value + * type, since it doesn't examine the value at all. + */ + val Never: Any => Time = + _ => Time.Top + + /** + * Return function that indicates to memcached that the value should + * not be used after the `ttl` has elapsed. + * + * This function takes [[Any]] so that it can be used at any value + * type, since it doesn't examine the value at all. + */ + def byAge(ttl: Duration): Any => Time = + _ => Time.now + ttl +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/ServoCachedValueSerializer.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/ServoCachedValueSerializer.scala new file mode 100644 index 000000000..37aaa2216 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/ServoCachedValueSerializer.scala @@ -0,0 +1,140 @@ +package com.twitter.tweetypie.caching + +import com.twitter.io.Buf +import com.twitter.scrooge.CompactThriftSerializer +import com.twitter.scrooge.ThriftStruct +import com.twitter.scrooge.ThriftStructCodec +import com.twitter.servo.cache.thriftscala.CachedValue +import com.twitter.servo.cache.thriftscala.CachedValueStatus +import com.twitter.stitch.NotFound +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.util.Time +import com.twitter.util.Try +import java.nio.ByteBuffer + +object ServoCachedValueSerializer { + + /** + * Thrown when the fields of the servo CachedValue struct do not + * satisfy the invariants expected by this serialization code. + */ + case class UnexpectedCachedValueState(cachedValue: CachedValue) extends Exception { + def message: String = s"Unexpected state for CachedValue. Value was: $cachedValue" + } + + val CachedValueThriftSerializer: CompactThriftSerializer[CachedValue] = CompactThriftSerializer( + CachedValue) +} + +/** + * A [[ValueSerializer]] that is compatible with the use of + * Servo's [[CachedValue]] struct by tweetypie: + * + * - The only [[CachedValueStatus]] values that are cacheable are + * [[CachedValueStatus.Found]] and [[CachedValueStatus.NotFound]]. + * + * - We only track the `cachedAtMsec` field, because tweetypie's cache + * interaction does not use the other fields, and the values that + * are cached this way are never updated, so storing readThroughAt + * or writtenThroughAt would not add any information. + * + * - When values are present, they are serialized using + * [[org.apache.thrift.protocol.TCompactProtocol]]. + * + * - The CachedValue struct itself is also serialized using TCompactProtocol. + * + * The serializer operates on [[Try]] values and will cache [[Return]] + * and `Throw(NotFound)` values. + */ +case class ServoCachedValueSerializer[V <: ThriftStruct]( + codec: ThriftStructCodec[V], + expiry: Try[V] => Time, + softTtl: SoftTtl[Try[V]]) + extends ValueSerializer[Try[V]] { + import ServoCachedValueSerializer.UnexpectedCachedValueState + import ServoCachedValueSerializer.CachedValueThriftSerializer + + private[this] val ValueThriftSerializer = CompactThriftSerializer(codec) + + /** + * Return an expiry based on the value and a + * TCompactProtocol-encoded servo CachedValue struct with the + * following fields defined: + * + * - `value`: [[None]] + * for {{{Throw(NotFound)}}, {{{Some(encodedStruct)}}} for + * [[Return]], where {{{encodedStruct}}} is a + * TCompactProtocol-encoding of the value inside of the Return. + * + * - `status`: [[CachedValueStatus.Found]] if the value is Return, + * and [[CachedValueStatus.NotFound]] if it is Throw(NotFound) + * + * - `cachedAtMsec`: The current time, accoring to [[Time.now]] + * + * No other fields will be defined. + * + * @throws IllegalArgumentException if called with a value that + * should not be cached. + */ + override def serialize(value: Try[V]): Option[(Time, Buf)] = { + def serializeCachedValue(payload: Option[ByteBuffer]) = { + val cachedValue = CachedValue( + value = payload, + status = if (payload.isDefined) CachedValueStatus.Found else CachedValueStatus.NotFound, + cachedAtMsec = Time.now.inMilliseconds) + + val serialized = Buf.ByteArray.Owned(CachedValueThriftSerializer.toBytes(cachedValue)) + + (expiry(value), serialized) + } + + value match { + case Throw(NotFound) => + Some(serializeCachedValue(None)) + case Return(struct) => + val payload = Some(ByteBuffer.wrap(ValueThriftSerializer.toBytes(struct))) + Some(serializeCachedValue(payload)) + case _ => + None + } + } + + /** + * Deserializes values serialized by [[serializeValue]]. The + * value will be [[CacheResult.Fresh]] or [[CacheResult.Stale]] + * depending on the result of {{{softTtl.isFresh}}}. + * + * @throws UnexpectedCachedValueState if the state of the + * [[CachedValue]] could not be produced by [[serialize]] + */ + override def deserialize(buf: Buf): CacheResult[Try[V]] = { + val cachedValue = CachedValueThriftSerializer.fromBytes(Buf.ByteArray.Owned.extract(buf)) + val hasValue = cachedValue.value.isDefined + val isValid = + (hasValue && cachedValue.status == CachedValueStatus.Found) || + (!hasValue && cachedValue.status == CachedValueStatus.NotFound) + + if (!isValid) { + // Exceptions thrown by deserialization are recorded and treated + // as a cache miss by CacheOperations, so throwing this + // exception will cause the value in cache to be + // overwritten. There will be stats recorded whenever this + // happens. + throw UnexpectedCachedValueState(cachedValue) + } + + val value = + cachedValue.value match { + case Some(valueBuffer) => + val valueBytes = new Array[Byte](valueBuffer.remaining) + valueBuffer.duplicate.get(valueBytes) + Return(ValueThriftSerializer.fromBytes(valueBytes)) + + case None => + Throw(NotFound) + } + + softTtl.toCacheResult(value, Time.fromMilliseconds(cachedValue.cachedAtMsec)) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/SoftTtl.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/SoftTtl.scala new file mode 100644 index 000000000..ad2237924 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/SoftTtl.scala @@ -0,0 +1,120 @@ +package com.twitter.tweetypie.caching + +import com.twitter.util.Duration +import com.twitter.util.Time +import scala.util.Random +import com.twitter.logging.Logger + +/** + * Used to determine whether values successfully retrieved from cache + * are [[CacheResult.Fresh]] or [[CacheResult.Stale]]. This is useful + * in the implementation of a [[ValueSerializer]]. + */ +trait SoftTtl[-V] { + + /** + * Determines whether a cached value was fresh. + * + * @param cachedAt the time at which the value was cached. + */ + def isFresh(value: V, cachedAt: Time): Boolean + + /** + * Wraps the value in Fresh or Stale depending on the value of `isFresh`. + * + * (The type variable U exists because it is not allowed to return + * values of a contravariant type, so we must define a variable that + * is a specific subclass of V. This is worth it because it allows + * us to create polymorphic policies without having to specify the + * type. Another solution would be to make the type invariant, but + * then we would have to specify the type whenever we create an + * instance.) + */ + def toCacheResult[U <: V](value: U, cachedAt: Time): CacheResult[U] = + if (isFresh(value, cachedAt)) CacheResult.Fresh(value) else CacheResult.Stale(value) +} + +object SoftTtl { + + /** + * Regardless of the inputs, the value will always be considered + * fresh. + */ + object NeverRefresh extends SoftTtl[Any] { + override def isFresh(_unusedValue: Any, _unusedCachedAt: Time): Boolean = true + } + + /** + * Trigger refresh based on the length of time that a value has been + * stored in cache, ignoring the value. + * + * @param softTtl Items that were cached longer ago than this value + * will be refreshed when they are accessed. + * + * @param jitter Add nondeterminism to the soft TTL to prevent a + * thundering herd of requests refreshing the value at the same + * time. The time at which the value is considered stale will be + * uniformly spread out over a range of +/- (jitter/2). It is + * valid to set the jitter to zero, which will turn off jittering. + * + * @param logger If non-null, use this logger rather than one based + * on the class name. This logger is only used for trace-level + * logging. + */ + case class ByAge[V]( + softTtl: Duration, + jitter: Duration, + specificLogger: Logger = null, + rng: Random = Random) + extends SoftTtl[Any] { + + private[this] val logger: Logger = + if (specificLogger == null) Logger(getClass) else specificLogger + + private[this] val maxJitterMs: Long = jitter.inMilliseconds + + // this requirement is due to using Random.nextInt to choose the + // jitter, but it allows jitter of greater than 24 days + require(maxJitterMs <= (Int.MaxValue / 2)) + + // Negative jitter probably indicates misuse of the API + require(maxJitterMs >= 0) + + // we want period +/- jitter, but the random generator + // generates non-negative numbers, so we generate [0, 2 * + // maxJitter) and subtract maxJitter to obtain [-maxJitter, + // maxJitter) + private[this] val maxJitterRangeMs: Int = (maxJitterMs * 2).toInt + + // We perform all calculations in milliseconds, so convert the + // period to milliseconds out here. + private[this] val softTtlMs: Long = softTtl.inMilliseconds + + // If the value is below this age, it will always be fresh, + // regardless of jitter. + private[this] val alwaysFreshAgeMs: Long = softTtlMs - maxJitterMs + + // If the value is above this age, it will always be stale, + // regardless of jitter. + private[this] val alwaysStaleAgeMs: Long = softTtlMs + maxJitterMs + + override def isFresh(value: Any, cachedAt: Time): Boolean = { + val ageMs: Long = (Time.now - cachedAt).inMilliseconds + val fresh = + if (ageMs <= alwaysFreshAgeMs) { + true + } else if (ageMs > alwaysStaleAgeMs) { + false + } else { + val jitterMs: Long = rng.nextInt(maxJitterRangeMs) - maxJitterMs + ageMs <= softTtlMs + jitterMs + } + + logger.ifTrace( + s"Checked soft ttl: fresh = $fresh, " + + s"soft_ttl_ms = $softTtlMs, age_ms = $ageMs, value = $value") + + fresh + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchAsync.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchAsync.scala new file mode 100644 index 000000000..45861f04c --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchAsync.scala @@ -0,0 +1,65 @@ +package com.twitter.tweetypie.caching + +import scala.collection.mutable +import com.twitter.util.Future +import com.twitter.stitch.Stitch +import com.twitter.stitch.Runner +import com.twitter.stitch.FutureRunner +import com.twitter.stitch.Group + +/** + * Workaround for a infelicity in the implementation of [[Stitch.async]]. + * + * This has the same semantics to [[Stitch.async]], with the exception + * that interrupts to the main computation will not interrupt the + * async call. + * + * The problem that this implementation solves is that we do not want + * async calls grouped together with synchronous calls. See the + * mailing list thread [1] for discussion. This may eventually be + * fixed in Stitch. + */ +private[caching] object StitchAsync { + // Contains a deferred Stitch that we want to run asynchronously + private[this] class AsyncCall(deferred: => Stitch[_]) { + def call(): Stitch[_] = deferred + } + + private object AsyncGroup extends Group[AsyncCall, Unit] { + override def runner(): Runner[AsyncCall, Unit] = + new FutureRunner[AsyncCall, Unit] { + // All of the deferred calls of any type. When they are + // executed in `run`, the normal Stitch batching and deduping + // will occur. + private[this] val calls = new mutable.ArrayBuffer[AsyncCall] + + def add(call: AsyncCall): Stitch[Unit] = { + // Just remember the deferred call. + calls.append(call) + + // Since we don't wait for the completion of the effect, + // just return a constant value. + Stitch.Unit + } + + def run(): Future[_] = { + // The future returned from this innter invocation of + // Stitch.run is not linked to the returned future, so these + // effects are not linked to the outer Run in which this + // method was invoked. + Stitch.run { + Stitch.traverse(calls) { asyncCall: AsyncCall => + asyncCall + .call() + .liftToTry // So that an exception will not interrupt the other calls + } + } + Future.Unit + } + } + } + + def apply(call: => Stitch[_]): Stitch[Unit] = + // Group together all of the async calls + Stitch.call(new AsyncCall(call), AsyncGroup) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCacheOperations.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCacheOperations.scala new file mode 100644 index 000000000..8c9de67ff --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCacheOperations.scala @@ -0,0 +1,62 @@ +package com.twitter.tweetypie.caching + +import com.twitter.stitch.MapGroup +import com.twitter.stitch.Group +import com.twitter.stitch.Stitch +import com.twitter.util.Future +import com.twitter.util.Return +import com.twitter.util.Try + +/** + * Wrapper around [[CacheOperations]] providing a [[Stitch]] API. + */ +case class StitchCacheOperations[K, V](operations: CacheOperations[K, V]) { + import StitchCacheOperations.SetCall + + private[this] val getGroup: Group[K, CacheResult[V]] = + MapGroup[K, CacheResult[V]] { keys: Seq[K] => + operations + .get(keys) + .map(values => keys.zip(values).toMap.mapValues(Return(_))) + } + + def get(key: K): Stitch[CacheResult[V]] = + Stitch.call(key, getGroup) + + private[this] val setGroup: Group[SetCall[K, V], Unit] = + new MapGroup[SetCall[K, V], Unit] { + + override def run(calls: Seq[SetCall[K, V]]): Future[SetCall[K, V] => Try[Unit]] = + Future + .collectToTry(calls.map(call => operations.set(call.key, call.value))) + .map(tries => calls.zip(tries).toMap) + } + + /** + * Performs a [[CacheOperations.set]]. + */ + def set(key: K, value: V): Stitch[Unit] = + // This is implemented as a Stitch.call instead of a Stitch.future + // in order to handle the case where a batch has a duplicate + // key. Each copy of the duplicate key will trigger a write back + // to cache, so we dedupe the writes in order to avoid the + // extraneous RPC call. + Stitch.call(new StitchCacheOperations.SetCall(key, value), setGroup) +} + +object StitchCacheOperations { + + /** + * Used as the "call" for [[SetGroup]]. This is essentially a tuple + * where equality is defined only by the key. + */ + private class SetCall[K, V](val key: K, val value: V) { + override def equals(other: Any): Boolean = + other match { + case setCall: SetCall[_, _] => key == setCall.key + case _ => false + } + + override def hashCode: Int = key.hashCode + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCaching.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCaching.scala new file mode 100644 index 000000000..830bd11a2 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCaching.scala @@ -0,0 +1,36 @@ +package com.twitter.tweetypie.caching + +import com.twitter.stitch.Stitch + +/** + * Apply caching to a [[Stitch]] function. + * + * @see CacheResult for more information about the semantics + * implemented here. + */ +class StitchCaching[K, V](operations: CacheOperations[K, V], repo: K => Stitch[V]) + extends (K => Stitch[V]) { + + private[this] val stitchOps = new StitchCacheOperations(operations) + + override def apply(key: K): Stitch[V] = + stitchOps.get(key).flatMap { + case CacheResult.Fresh(value) => + Stitch.value(value) + + case CacheResult.Stale(staleValue) => + StitchAsync(repo(key).flatMap(refreshed => stitchOps.set(key, refreshed))) + .map(_ => staleValue) + + case CacheResult.Miss => + repo(key) + .applyEffect(value => StitchAsync(stitchOps.set(key, value))) + + case CacheResult.Failure(_) => + // In the case of failure, we don't attempt to write back to + // cache, because cache failure usually means communication + // failure, and sending more requests to the cache that holds + // the value for this key could make the situation worse. + repo(key) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/ValueSerializer.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/ValueSerializer.scala new file mode 100644 index 000000000..42335d0ff --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/ValueSerializer.scala @@ -0,0 +1,47 @@ +package com.twitter.tweetypie.caching + +import com.twitter.io.Buf +import com.twitter.util.Time + +/** + * How to store values of type V in cache. This includes whether a + * given value is cacheable, how to serialize it, when it should + * expire from cache, and how to interpret byte patterns from cache. + */ +trait ValueSerializer[V] { + + /** + * Prepare the value for storage in cache. When a [[Some]] is + * returned, the [[Buf]] should be a valid input to [[deserialize]] + * and the [[Time]] will be used as the expiry in the memcached + * command. When [[None]] is returned, it indicates that the value + * cannot or should not be written back to cache. + * + * The most common use case for returning None is caching Try + * values, where certain exceptional values encode a cacheable state + * of a value. In particular, Throw(NotFound) is commonly used to + * encode a missing value, and we usually want to cache those + * negative lookups, but we don't want to cache e.g. a timeout + * exception. + * + * @return a pair of expiry time for this cache entry and the bytes + * to store in cache. If you do not want this value to explicitly + * expire, use Time.Top as the expiry. + */ + def serialize(value: V): Option[(Time, Buf)] + + /** + * Deserialize a value found in cache. This function converts the + * bytes found in memcache to a [[CacheResult]]. In general, you + * probably want to return [[CacheResult.Fresh]] or + * [[CacheResult.Stale]], but you are free to return any of the + * range of [[CacheResult]]s, depending on the behavior that you + * want. + * + * This is a total function because in the common use case, the + * bytes stored in cache will be appropriate for the + * serializer. This method is free to throw any exception if the + * bytes are not valid. + */ + def deserialize(serializedValue: Buf): CacheResult[V] +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/client_id/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/client_id/BUILD new file mode 100644 index 000000000..c29029d8c --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/client_id/BUILD @@ -0,0 +1,15 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/transport", + "finagle/finagle-thrift/src/main/scala", + "tweetypie/servo/util/src/main/scala:exception", + "strato/src/main/scala/com/twitter/strato/access", + "strato/src/main/scala/com/twitter/strato/data", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/client_id/ClientIdHelper.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/client_id/ClientIdHelper.scala new file mode 100644 index 000000000..8741ca80d --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/client_id/ClientIdHelper.scala @@ -0,0 +1,185 @@ +package com.twitter.tweetypie.client_id + +import com.twitter.finagle.mtls.authentication.EmptyServiceIdentifier +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.mtls.transport.S2STransport +import com.twitter.finagle.thrift.ClientId +import com.twitter.servo.util.Gate +import com.twitter.strato.access.Access +import com.twitter.strato.access.Access.ForwardedServiceIdentifier + +object ClientIdHelper { + + val UnknownClientId = "unknown" + + def default: ClientIdHelper = new ClientIdHelper(UseTransportServiceIdentifier) + + /** + * Trims off the last .element, which is usually .prod or .staging + */ + def getClientIdRoot(clientId: String): String = + clientId.lastIndexOf('.') match { + case -1 => clientId + case idx => clientId.substring(0, idx) + } + + /** + * Returns the last .element without the '.' + */ + def getClientIdEnv(clientId: String): String = + clientId.lastIndexOf('.') match { + case -1 => clientId + case idx => clientId.substring(idx + 1) + } + + private[client_id] def asClientId(s: ServiceIdentifier): String = s"${s.service}.${s.environment}" +} + +class ClientIdHelper(serviceIdentifierStrategy: ServiceIdentifierStrategy) { + + private[client_id] val ProcessPathPrefix = "/p/" + + /** + * The effective client id is used for request authorization and metrics + * attribution. For calls to Tweetypie's thrift API, the thrift ClientId + * is used and is expected in the form of "service-name.env". Federated + * Strato clients don't support configured ClientIds and instead provide + * a "process path" containing instance-specific information. So for + * calls to the federated API, we compute an effective client id from + * the ServiceIdentifier, if present, in Strato's Access principles. The + * implementation avoids computing this identifier unless necessary, + * since this method is invoked on every request. + */ + def effectiveClientId: Option[String] = { + val clientId: Option[String] = ClientId.current.map(_.name) + clientId + // Exclude process paths because they are instance-specific and aren't + // supported by tweetypie for authorization or metrics purposes. + .filterNot(_.startsWith(ProcessPathPrefix)) + // Try computing a value from the ServiceId if the thrift + // ClientId is undefined or unsupported. + .orElse(serviceIdentifierStrategy.serviceIdentifier.map(ClientIdHelper.asClientId)) + // Ultimately fall back to the ClientId value, even when given an + // unsupported format, so that error text and debug logs include + // the value passed by the caller. + .orElse(clientId) + } + + def effectiveClientIdRoot: Option[String] = effectiveClientId.map(ClientIdHelper.getClientIdRoot) + + def effectiveServiceIdentifier: Option[ServiceIdentifier] = + serviceIdentifierStrategy.serviceIdentifier +} + +/** Logic how to find a [[ServiceIdentifier]] for the purpose of crafting a client ID. */ +trait ServiceIdentifierStrategy { + def serviceIdentifier: Option[ServiceIdentifier] + + /** + * Returns the only element of given [[Set]] or [[None]]. + * + * This utility is used defensively against a set of principals collected + * from [[Access.getPrincipals]]. While the contract is that there should be at most one + * instance of each principal kind present in that set, in practice that has not been the case + * always. The safest strategy to in that case is to abandon a set completely if more than + * one principals are competing. + */ + final protected def onlyElement[T](set: Set[T]): Option[T] = + if (set.size <= 1) { + set.headOption + } else { + None + } +} + +/** + * Picks [[ServiceIdentifier]] from Finagle SSL Transport, if one exists. + * + * This works for both Thrift API calls as well as StratoFed API calls. Strato's + * [[Access#getPrincipals]] collection, which would typically be consulted by StratoFed + * column logic, contains the same [[ServiceIdentifier]] derived from the Finagle SSL + * transport, so there's no need to have separate strategies for Thrift vs StratoFed + * calls. + * + * This is the default behavior of using [[ServiceIdentifier]] for computing client ID. + */ +private[client_id] class UseTransportServiceIdentifier( + // overridable for testing + getPeerServiceIdentifier: => ServiceIdentifier, +) extends ServiceIdentifierStrategy { + override def serviceIdentifier: Option[ServiceIdentifier] = + getPeerServiceIdentifier match { + case EmptyServiceIdentifier => None + case si => Some(si) + } +} + +object UseTransportServiceIdentifier + extends UseTransportServiceIdentifier(S2STransport.peerServiceIdentifier) + +/** + * Picks [[ForwardedServiceIdentifier]] from Strato principals for client ID + * if [[ServiceIdentifier]] points at call coming from Strato. + * If not present, falls back to [[UseTransportServiceIdentifier]] behavior. + * + * Tweetypie utilizes the strategy to pick [[ServiceIdentifier]] for the purpose + * of generating a client ID when the client ID is absent or unknown. + * [[PreferForwardedServiceIdentifierForStrato]] looks for the [[ForwardedServiceIdentifier]] + * values set by stratoserver request. + * The reason is, stratoserver is effectively a conduit, forwarding the [[ServiceIdentifier]] + * of the _actual client_ that is calling stratoserver. + * Any direct callers not going through stratoserver will default to [[ServiceIdentfier]]. + */ +private[client_id] class PreferForwardedServiceIdentifierForStrato( + // overridable for testing + getPeerServiceIdentifier: => ServiceIdentifier, +) extends ServiceIdentifierStrategy { + val useTransportServiceIdentifier = + new UseTransportServiceIdentifier(getPeerServiceIdentifier) + + override def serviceIdentifier: Option[ServiceIdentifier] = + useTransportServiceIdentifier.serviceIdentifier match { + case Some(serviceIdentifier) if isStrato(serviceIdentifier) => + onlyElement( + Access.getPrincipals + .collect { + case forwarded: ForwardedServiceIdentifier => + forwarded.serviceIdentifier.serviceIdentifier + } + ).orElse(useTransportServiceIdentifier.serviceIdentifier) + case other => other + } + + /** + * Strato uses various service names like "stratoserver" and "stratoserver-patient". + * They all do start with "stratoserver" though, so at the point of implementing, + * the safest bet to recognize strato is to look for this prefix. + * + * This also works for staged strato instances (which it should), despite allowing + * for technically any caller to force this strategy, by creating service certificate + * with this service name. + */ + private def isStrato(serviceIdentifier: ServiceIdentifier): Boolean = + serviceIdentifier.service.startsWith("stratoserver") +} + +object PreferForwardedServiceIdentifierForStrato + extends PreferForwardedServiceIdentifierForStrato(S2STransport.peerServiceIdentifier) + +/** + * [[ServiceIdentifierStrategy]] which dispatches between two delegates based on the value + * of a unitary decider every time [[serviceIdentifier]] is called. + */ +class ConditionalServiceIdentifierStrategy( + private val condition: Gate[Unit], + private val ifTrue: ServiceIdentifierStrategy, + private val ifFalse: ServiceIdentifierStrategy) + extends ServiceIdentifierStrategy { + + override def serviceIdentifier: Option[ServiceIdentifier] = + if (condition()) { + ifTrue.serviceIdentifier + } else { + ifFalse.serviceIdentifier + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/context/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/context/BUILD new file mode 100644 index 000000000..30cef76c5 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/context/BUILD @@ -0,0 +1,19 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + provides = scala_artifact( + org = "com.twitter.tweetypie", + name = "context", + repo = artifactory, + ), + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle/finagle-core/src/main", + "graphql/common/src/main/scala/com/twitter/graphql/common/core", + "src/thrift/com/twitter/context:twitter-context-scala", + "twitter-context/src/main/scala", + "util/util-core:scala", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/context/TweetypieContext.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/context/TweetypieContext.scala new file mode 100644 index 000000000..4d987a02c --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/context/TweetypieContext.scala @@ -0,0 +1,135 @@ +package com.twitter.tweetypie.context + +import com.twitter.context.TwitterContext +import com.twitter.finagle.Filter +import com.twitter.finagle.Service +import com.twitter.finagle.SimpleFilter +import com.twitter.finagle.context.Contexts +import com.twitter.io.Buf +import com.twitter.io.Buf.ByteArray.Owned +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.graphql.common.core.GraphQlClientApplication +import com.twitter.util.Try +import java.nio.charset.StandardCharsets.UTF_8 +import scala.util.matching.Regex + +/** + * Context and filters to help track callers of Tweetypie's endpoints. This context and its + * filters were originally added to provide visibility into callers of Tweetypie who are + * using the birdherd library to access tweets. + * + * This context data is intended to be marshalled by callers to Tweetypie, but then the + * context data is stripped (moved from broadcast to local). This happens so that the + * context data is not forwarded down tweetypie's backend rpc chains, which often result + * in transitive calls back into tweetypie. This effectively creates single-hop marshalling. + */ +object TweetypieContext { + // Bring Tweetypie permitted TwitterContext into scope + val TwitterContext: TwitterContext = + com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) + + case class Ctx(via: String) + val Empty = Ctx("") + + object Broadcast { + private[this] object Key extends Contexts.broadcast.Key[Ctx](id = Ctx.getClass.getName) { + + override def marshal(value: Ctx): Buf = + Owned(value.via.getBytes(UTF_8)) + + override def tryUnmarshal(buf: Buf): Try[Ctx] = + Try(Ctx(new String(Owned.extract(buf), UTF_8))) + } + + private[TweetypieContext] def current(): Option[Ctx] = + Contexts.broadcast.get(Key) + + def currentOrElse(default: Ctx): Ctx = + current().getOrElse(default) + + def letClear[T](f: => T): T = + Contexts.broadcast.letClear(Key)(f) + + def let[T](ctx: Ctx)(f: => T): T = + if (Empty == ctx) { + letClear(f) + } else { + Contexts.broadcast.let(Key, ctx)(f) + } + + // ctx has to be by name so we can re-evaluate it for every request (for usage in ServiceTwitter.scala) + def filter(ctx: => Ctx): Filter.TypeAgnostic = + new Filter.TypeAgnostic { + override def toFilter[Req, Rep]: Filter[Req, Rep, Req, Rep] = + (request: Req, service: Service[Req, Rep]) => Broadcast.let(ctx)(service(request)) + } + } + + object Local { + private[this] val Key = + new Contexts.local.Key[Ctx] + + private[TweetypieContext] def let[T](ctx: Option[Ctx])(f: => T): T = + ctx match { + case Some(ctx) if ctx != Empty => Contexts.local.let(Key, ctx)(f) + case None => Contexts.local.letClear(Key)(f) + } + + def current(): Option[Ctx] = + Contexts.local.get(Key) + + def filter[Req, Rep]: SimpleFilter[Req, Rep] = + (request: Req, service: Service[Req, Rep]) => { + val ctx = Broadcast.current() + Broadcast.letClear(Local.let(ctx)(service(request))) + } + + private[this] def clientAppIdToName(clientAppId: Long) = + GraphQlClientApplication.AllById.get(clientAppId).map(_.name).getOrElse("nonTOO") + + private[this] val pathRegexes: Seq[(Regex, String)] = Seq( + ("timeline_conversation_.*_json".r, "timeline_conversation__slug__json"), + ("user_timeline_.*_json".r, "user_timeline__user__json"), + ("[0-9]{2,}".r, "_id_") + ) + + // `context.via` will either be a string like: "birdherd" or "birdherd:/1.1/statuses/show/123.json, + // depending on whether birdherd code was able to determine the path of the request. + private[this] def getViaAndPath(via: String): (String, Option[String]) = + via.split(":", 2) match { + case Array(via, path) => + val sanitizedPath = path + .replace('/', '_') + .replace('.', '_') + + // Apply each regex in turn + val normalizedPath = pathRegexes.foldLeft(sanitizedPath) { + case (path, (regex, replacement)) => regex.replaceAllIn(path, replacement) + } + + (via, Some(normalizedPath)) + case Array(via) => (via, None) + } + + def trackStats[U](scopes: StatsReceiver*): Unit = + for { + tweetypieCtx <- TweetypieContext.Local.current() + (via, pathOpt) = getViaAndPath(tweetypieCtx.via) + twitterCtx <- TwitterContext() + clientAppId <- twitterCtx.clientApplicationId + } yield { + val clientAppName = clientAppIdToName(clientAppId) + scopes.foreach { stats => + val ctxStats = stats.scope("context") + val viaStats = ctxStats.scope("via", via) + viaStats.scope("all").counter("requests").incr() + val viaClientStats = viaStats.scope("by_client", clientAppName) + viaClientStats.counter("requests").incr() + pathOpt.foreach { path => + val viaPathStats = viaStats.scope("by_path", path) + viaPathStats.counter("requests").incr() + } + } + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/decider/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/BUILD new file mode 100644 index 000000000..8c40f583a --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/BUILD @@ -0,0 +1,15 @@ +scala_library( + sources = ["DeciderGates.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/google/guava", + "decider", + "finagle/finagle-toggle/src/main/scala/com/twitter/finagle/server", + "tweetypie/servo/decider", + "tweetypie/servo/util/src/main/scala", + "util/util-core:scala", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/decider/DeciderGates.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/DeciderGates.scala new file mode 100644 index 000000000..56df716f6 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/DeciderGates.scala @@ -0,0 +1,60 @@ +package com.twitter.tweetypie +package decider + +import com.google.common.hash.Hashing +import com.twitter.decider.Decider +import com.twitter.decider.Feature +import com.twitter.servo.gate.DeciderGate +import com.twitter.servo.util.Gate +import java.nio.charset.StandardCharsets +import scala.collection.mutable +trait DeciderGates { + def overrides: Map[String, Boolean] = Map.empty + def decider: Decider + def prefix: String + + protected val seenFeatures: mutable.HashSet[String] = new mutable.HashSet[String] + + private def deciderFeature(name: String): Feature = { + decider.feature(prefix + "_" + name) + } + + def withOverride[T](name: String, mkGate: Feature => Gate[T]): Gate[T] = { + seenFeatures += name + overrides.get(name).map(Gate.const).getOrElse(mkGate(deciderFeature(name))) + } + + protected def linear(name: String): Gate[Unit] = withOverride[Unit](name, DeciderGate.linear) + protected def byId(name: String): Gate[Long] = withOverride[Long](name, DeciderGate.byId) + + /** + * It returns a Gate[String] that can be used to check availability of the feature. + * The string is hashed into a Long and used as an "id" and then used to call servo's + * DeciderGate.byId + * + * @param name decider name + * @return Gate[String] + */ + protected def byStringId(name: String): Gate[String] = + byId(name).contramap { s: String => + Hashing.sipHash24().hashString(s, StandardCharsets.UTF_8).asLong() + } + + def all: Traversable[String] = seenFeatures + + def unusedOverrides: Set[String] = overrides.keySet.diff(all.toSet) + + /** + * Generate a map of name -> availability, taking into account overrides. + * Overrides are either on or off so map to 10000 or 0, respectively. + */ + def availabilityMap: Map[String, Option[Int]] = + all.map { name => + val availability: Option[Int] = overrides + .get(name) + .map(on => if (on) 10000 else 0) + .orElse(deciderFeature(name).availability) + + name -> availability + }.toMap +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/BUILD new file mode 100644 index 000000000..a23ca66e4 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/BUILD @@ -0,0 +1,10 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "decider", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/TweetyPieDeciderOverrides.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/TweetyPieDeciderOverrides.scala new file mode 100644 index 000000000..7b396f3f8 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/TweetyPieDeciderOverrides.scala @@ -0,0 +1,42 @@ +package com.twitter.tweetypie.decider.overrides + +import com.twitter.decider.LocalOverrides + +object TweetyPieDeciderOverrides extends LocalOverrides.Namespace("tweetypie", "tweetypie_") { + val CheckSpamOnRetweet: LocalOverrides.Override = feature("check_spam_on_retweet") + val CheckSpamOnTweet: LocalOverrides.Override = feature("check_spam_on_tweet") + val ConversationControlUseFeatureSwitchResults: LocalOverrides.Override = feature( + "conversation_control_use_feature_switch_results") + val ConversationControlTweetCreateEnabled: LocalOverrides.Override = feature( + "conversation_control_tweet_create_enabled") + val EnableExclusiveTweetControlValidation: LocalOverrides.Override = feature( + "enable_exclusive_tweet_control_validation") + val EnableHotKeyCaches: LocalOverrides.Override = feature("enable_hot_key_caches") + val HydrateConversationMuted: LocalOverrides.Override = feature("hydrate_conversation_muted") + val HydrateExtensionsOnWrite: LocalOverrides.Override = feature("hydrate_extensions_on_write") + val HydrateEscherbirdAnnotations: LocalOverrides.Override = feature( + "hydrate_escherbird_annotations") + val HydrateGnipProfileGeoEnrichment: LocalOverrides.Override = feature( + "hydrate_gnip_profile_geo_enrichment") + val HydratePastedPics: LocalOverrides.Override = feature("hydrate_pasted_pics") + val HydratePerspectivesEditsForOtherSafetyLevels: LocalOverrides.Override = feature( + "hydrate_perspectives_edits_for_other_levels") + val HydrateScrubEngagements: LocalOverrides.Override = feature("hydrate_scrub_engagements") + val LogRepoExceptions: LocalOverrides.Override = feature("log_repo_exceptions") + val MediaRefsHydratorIncludePastedMedia: LocalOverrides.Override = feature( + "media_refs_hydrator_include_pasted_media") + val ShortCircuitLikelyPartialTweetReads: LocalOverrides.Override = feature( + "short_circuit_likely_partial_tweet_reads_ms") + val RateLimitByLimiterService: LocalOverrides.Override = feature("rate_limit_by_limiter_service") + val RateLimitTweetCreationFailure: LocalOverrides.Override = feature( + "rate_limit_tweet_creation_failure") + val ReplyTweetConversationControlHydrationEnabled = feature( + "reply_tweet_conversation_control_hydration_enabled" + ) + val DisableInviteViaMention = feature( + "disable_invite_via_mention" + ) + val EnableRemoveUnmentionedImplicitMentions: LocalOverrides.Override = feature( + "enable_remove_unmentioned_implicit_mentions") + val useReplicatedDeleteTweet2: LocalOverrides.Override = feature("use_replicated_delete_tweet_2") +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/BUILD new file mode 100644 index 000000000..de6522d52 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/BUILD @@ -0,0 +1,15 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle/finagle-core/src/main", + "incentives/jiminy/src/main/thrift/com/twitter/incentives/jiminy:thrift-scala", + "tweetypie/servo/util/src/main/scala", + "stitch/stitch-core", + "strato/src/main/scala/com/twitter/strato/client", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "util/util-core", + "util/util-stats", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/NudgeBuilder.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/NudgeBuilder.scala new file mode 100644 index 000000000..dd123206f --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/NudgeBuilder.scala @@ -0,0 +1,165 @@ +package com.twitter.tweetypie.jiminy.tweetypie + +import com.twitter.finagle.stats.CategorizingExceptionStatsHandler +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.incentives.jiminy.thriftscala._ +import com.twitter.servo.util.FutureArrow +import com.twitter.servo.util.Gate +import com.twitter.stitch.Stitch +import com.twitter.strato.thrift.ScroogeConvImplicits._ +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.util.Future +import com.twitter.util.Return +import com.twitter.util.Throw + +case class NudgeBuilderRequest( + text: String, + inReplyToTweetId: Option[NudgeBuilder.TweetId], + conversationId: Option[NudgeBuilder.TweetId], + hasQuotedTweet: Boolean, + nudgeOptions: Option[CreateTweetNudgeOptions], + tweetId: Option[NudgeBuilder.TweetId]) + +trait NudgeBuilder extends FutureArrow[NudgeBuilderRequest, Unit] { + + /** + * Check whether the user should receive a nudge instead of creating + * the Tweet. If nudgeOptions is None, then no nudge check will be + * performed. + * + * @return a Future.exception containing a [[TweetCreateFailure]] if the + * user should be nudged, or Future.Unit if the user should not be + * nudged. + */ + def apply( + request: NudgeBuilderRequest + ): Future[Unit] +} + +object NudgeBuilder { + type Type = FutureArrow[NudgeBuilderRequest, Unit] + type TweetId = Long + + // darkTrafficCreateNudgeOptions ensure that our dark traffic sends a request that will + // accurately test the Jiminy backend. in this case, we specify that we want checks for all + // possible nudge types + private[this] val darkTrafficCreateNudgeOptions = Some( + CreateTweetNudgeOptions( + requestedNudgeTypes = Some( + Set( + TweetNudgeType.PotentiallyToxicTweet, + TweetNudgeType.ReviseOrMute, + TweetNudgeType.ReviseOrHideThenBlock, + TweetNudgeType.ReviseOrBlock + ) + ) + ) + ) + + private[this] def mkJiminyRequest( + request: NudgeBuilderRequest, + isDarkRequest: Boolean = false + ): CreateTweetNudgeRequest = { + val tweetType = + if (request.inReplyToTweetId.nonEmpty) TweetType.Reply + else if (request.hasQuotedTweet) TweetType.QuoteTweet + else TweetType.OriginalTweet + + CreateTweetNudgeRequest( + tweetText = request.text, + tweetType = tweetType, + inReplyToTweetId = request.inReplyToTweetId, + conversationId = request.conversationId, + createTweetNudgeOptions = + if (isDarkRequest) darkTrafficCreateNudgeOptions else request.nudgeOptions, + tweetId = request.tweetId + ) + } + + /** + * NudgeBuilder implemented by calling the strato column `incentives/createNudge`. + * + * Stats recorded: + * - latency_ms: Latency histogram (also implicitly number of + * invocations). This is counted only in the case that a nudge + * check was requested (`nudgeOptions` is non-empty) + * + * - nudge: The nudge check succeeded and a nudge was created. + * + * - no_nudge: The nudge check succeeded, but no nudge was created. + * + * - failures: Calling strato to create a nudge failed. Broken out + * by exception. + */ + + def apply( + nudgeArrow: FutureArrow[CreateTweetNudgeRequest, CreateTweetNudgeResponse], + enableDarkTraffic: Gate[Unit], + stats: StatsReceiver + ): NudgeBuilder = { + new NudgeBuilder { + private[this] val nudgeLatencyStat = stats.stat("latency_ms") + private[this] val nudgeCounter = stats.counter("nudge") + private[this] val noNudgeCounter = stats.counter("no_nudge") + private[this] val darkRequestCounter = stats.counter("dark_request") + private[this] val nudgeExceptionHandler = new CategorizingExceptionStatsHandler + + override def apply( + request: NudgeBuilderRequest + ): Future[Unit] = + request.nudgeOptions match { + case None => + if (enableDarkTraffic()) { + darkRequestCounter.incr() + Stat + .timeFuture(nudgeLatencyStat) { + nudgeArrow(mkJiminyRequest(request, isDarkRequest = true)) + } + .transform { _ => + // ignore the response since it is a dark request + Future.Done + } + } else { + Future.Done + } + + case Some(_) => + Stat + .timeFuture(nudgeLatencyStat) { + nudgeArrow(mkJiminyRequest(request)) + } + .transform { + case Throw(e) => + nudgeExceptionHandler.record(stats, e) + // If we failed to invoke the nudge column, then + // just continue on with the Tweet creation. + Future.Done + + case Return(CreateTweetNudgeResponse(Some(nudge))) => + nudgeCounter.incr() + Future.exception(TweetCreateFailure.Nudged(nudge = nudge)) + + case Return(CreateTweetNudgeResponse(None)) => + noNudgeCounter.incr() + Future.Done + } + } + } + } + + def apply( + strato: StratoClient, + enableDarkTraffic: Gate[Unit], + stats: StatsReceiver + ): NudgeBuilder = { + val executer = + strato.executer[CreateTweetNudgeRequest, CreateTweetNudgeResponse]( + "incentives/createTweetNudge") + val nudgeArrow: FutureArrow[CreateTweetNudgeRequest, CreateTweetNudgeResponse] = { req => + Stitch.run(executer.execute(req)) + } + apply(nudgeArrow, enableDarkTraffic, stats) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/matching/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/BUILD new file mode 100644 index 000000000..52259fc54 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/BUILD @@ -0,0 +1,18 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "src/java/com/twitter/common/text/language:language-identifier", + "src/java/com/twitter/common/text/language:locale-util", + "src/java/com/twitter/common/text/pipeline", + "src/java/com/twitter/common/text/token", + "src/java/com/twitter/common_internal/text", + "src/java/com/twitter/common_internal/text/version", + "tweetypie/src/resources/com/twitter/tweetypie/matching", + "util/util-core/src/main/scala/com/twitter/concurrent", + "util/util-core/src/main/scala/com/twitter/io", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/matching/TokenSequence.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/TokenSequence.scala new file mode 100644 index 000000000..09e9695cc --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/TokenSequence.scala @@ -0,0 +1,92 @@ +package com.twitter.tweetypie.matching + +object TokenSequence { + + /** + * Is `suffix` a suffix of `s`, starting at `offset` in `s`? + */ + def hasSuffixAt(s: CharSequence, suffix: CharSequence, offset: Int): Boolean = + if (offset == 0 && (s.eq(suffix) || s == suffix)) { + true + } else if (suffix.length != (s.length - offset)) { + false + } else { + @annotation.tailrec + def go(i: Int): Boolean = + if (i == suffix.length) true + else if (suffix.charAt(i) == s.charAt(offset + i)) go(i + 1) + else false + + go(0) + } + + /** + * Do two [[CharSequence]]s contain the same characters? + * + * [[CharSequence]] equality is not sufficient because + * [[CharSequence]]s of different types may not consider other + * [[CharSequence]]s containing the same characters equivalent. + */ + def sameCharacters(s1: CharSequence, s2: CharSequence): Boolean = + hasSuffixAt(s1, s2, 0) + + /** + * This method implements the product definition of a token matching a + * keyword. That definition is: + * + * - The token contains the same characters as the keyword. + * - The token contains the same characters as the keyword after + * dropping a leading '#' or '@' from the token. + * + * The intention is that a keyword matches an identical hashtag, but + * if the keyword itself is a hashtag, it only matches the hashtag + * form. + * + * The tokenization process should rule out tokens or keywords that + * start with multiple '#' characters, even though this implementation + * allows for e.g. token "##a" to match "#a". + */ + def tokenMatches(token: CharSequence, keyword: CharSequence): Boolean = + if (sameCharacters(token, keyword)) true + else if (token.length == 0) false + else { + val tokenStart = token.charAt(0) + (tokenStart == '#' || tokenStart == '@') && hasSuffixAt(token, keyword, 1) + } +} + +/** + * A sequence of normalized tokens. The sequence depends on the locale + * in which the text was parsed and the version of the penguin library + * that was used at tokenization time. + */ +case class TokenSequence private[matching] (toIndexedSeq: IndexedSeq[CharSequence]) { + import TokenSequence.tokenMatches + + private def apply(i: Int): CharSequence = toIndexedSeq(i) + + def isEmpty: Boolean = toIndexedSeq.isEmpty + def nonEmpty: Boolean = toIndexedSeq.nonEmpty + + /** + * Does the supplied sequence of keywords match a consecutive sequence + * of tokens within this sequence? + */ + def containsKeywordSequence(keywords: TokenSequence): Boolean = { + val finalIndex = toIndexedSeq.length - keywords.toIndexedSeq.length + + @annotation.tailrec + def matchesAt(offset: Int, i: Int): Boolean = + if (i >= keywords.toIndexedSeq.length) true + else if (tokenMatches(this(i + offset), keywords(i))) matchesAt(offset, i + 1) + else false + + @annotation.tailrec + def search(offset: Int): Boolean = + if (offset > finalIndex) false + else if (matchesAt(offset, 0)) true + else search(offset + 1) + + search(0) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/matching/Tokenizer.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/Tokenizer.scala new file mode 100644 index 000000000..7cb3cd315 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/Tokenizer.scala @@ -0,0 +1,156 @@ +package com.twitter.tweetypie.matching + +import com.twitter.common.text.language.LocaleUtil +import com.twitter.common_internal.text.pipeline.TwitterTextNormalizer +import com.twitter.common_internal.text.pipeline.TwitterTextTokenizer +import com.twitter.common_internal.text.version.PenguinVersion +import com.twitter.concurrent.Once +import com.twitter.io.StreamIO +import java.util.Locale +import scala.collection.JavaConverters._ + +/** + * Extract a sequence of normalized tokens from the input text. The + * normalization and tokenization are properly configured for keyword + * matching between texts. + */ +trait Tokenizer { + def tokenize(input: String): TokenSequence +} + +object Tokenizer { + + /** + * When a Penguin version is not explicitly specified, use this + * version of Penguin to perform normalization and tokenization. If + * you cache tokenized text, be sure to store the version as well, to + * avoid comparing text that was normalized with different algorithms. + */ + val DefaultPenguinVersion: PenguinVersion = PenguinVersion.PENGUIN_6 + + /** + * If you already know the locale of the text that is being tokenized, + * use this method to get a tokenizer that is much more efficient than + * the Tweet or Query tokenizer, since it does not have to perform + * language detection. + */ + def forLocale(locale: Locale): Tokenizer = get(locale, DefaultPenguinVersion) + + /** + * Obtain a `Tokenizer` that will tokenize the text for the given + * locale and version of the Penguin library. + */ + def get(locale: Locale, version: PenguinVersion): Tokenizer = + TokenizerFactories(version).forLocale(locale) + + /** + * Encapsulates the configuration and use of [[TwitterTextTokenizer]] + * and [[TwitterTextNormalizer]]. + */ + private[this] class TokenizerFactory(version: PenguinVersion) { + // The normalizer is thread-safe, so share one instance. + private[this] val normalizer = + (new TwitterTextNormalizer.Builder(version)).build() + + // The TwitterTextTokenizer is relatively expensive to build, + // and is not thread safe, so keep instances of it in a + // ThreadLocal. + private[this] val local = + new ThreadLocal[TwitterTextTokenizer] { + override def initialValue: TwitterTextTokenizer = + (new TwitterTextTokenizer.Builder(version)).build() + } + + /** + * Obtain a [[Tokenizer]] for this combination of [[PenguinVersion]] + * and [[Locale]]. + */ + def forLocale(locale: Locale): Tokenizer = + new Tokenizer { + override def tokenize(input: String): TokenSequence = { + val stream = local.get.getTwitterTokenStreamFor(locale) + stream.reset(normalizer.normalize(input, locale)) + val builder = IndexedSeq.newBuilder[CharSequence] + while (stream.incrementToken) builder += stream.term() + TokenSequence(builder.result()) + } + } + } + + /** + * Since there are a small number of Penguin versions, eagerly + * initialize a TokenizerFactory for each version, to avoid managing + * mutable state. + */ + private[this] val TokenizerFactories: PenguinVersion => TokenizerFactory = + PenguinVersion.values.map(v => v -> new TokenizerFactory(v)).toMap + + /** + * The set of locales used in warmup. These locales are mentioned in + * the logic of TwitterTextTokenizer and TwitterTextNormalizer. + */ + private[this] val WarmUpLocales: Seq[Locale] = + Seq + .concat( + Seq( + Locale.JAPANESE, + Locale.KOREAN, + LocaleUtil.UNKNOWN, + LocaleUtil.THAI, + LocaleUtil.ARABIC, + LocaleUtil.SWEDISH + ), + LocaleUtil.CHINESE_JAPANESE_LOCALES.asScala, + LocaleUtil.CJK_LOCALES.asScala + ) + .toSet + .toArray + .toSeq + + /** + * Load the default inputs that are used for warming up this library. + */ + def warmUpCorpus(): Seq[String] = { + val stream = getClass.getResourceAsStream("warmup-text.txt") + val bytes = + try StreamIO.buffer(stream) + finally stream.close() + bytes.toString("UTF-8").linesIterator.toArray.toSeq + } + + /** + * Exercise the functionality of this library on the specified + * strings. In general, prefer [[warmUp]] to this method. + */ + def warmUpWith(ver: PenguinVersion, texts: Iterable[String]): Unit = + texts.foreach { txt => + // Exercise each locale + WarmUpLocales.foreach { loc => + Tokenizer.get(loc, ver).tokenize(txt) + UserMutes.builder().withPenguinVersion(ver).withLocale(loc).validate(txt) + } + + // Exercise language detection + TweetTokenizer.get(ver).tokenize(txt) + UserMutes.builder().withPenguinVersion(ver).validate(txt) + } + + private[this] val warmUpOnce = Once(warmUpWith(DefaultPenguinVersion, warmUpCorpus())) + + /** + * The creation of the first TwitterTextTokenizer is relatively + * expensive, and tokenizing some texts may cause significant + * initialization. + * + * This method exercises the functionality of this library + * with a range of texts in order to perform as much initialization as + * possible before the library is used in a latency-sensitive way. + * + * The warmup routine will only run once. Subsequent invocations of + * `warmUp` will no do additional work, and will return once warmup is + * complete. + * + * The warmup will take on the order of seconds. + */ + def warmUp(): Unit = warmUpOnce() +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/matching/TweetTokenizer.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/TweetTokenizer.scala new file mode 100644 index 000000000..592891235 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/TweetTokenizer.scala @@ -0,0 +1,45 @@ +package com.twitter.tweetypie.matching + +import com.twitter.common.text.pipeline.TwitterLanguageIdentifier +import com.twitter.common_internal.text.version.PenguinVersion +import java.util.Locale + +object TweetTokenizer extends Tokenizer { + type LocalePicking = Option[Locale] => Tokenizer + + /** + * Get a Tokenizer-producing function that uses the supplied locale + * to select an appropriate Tokenizer. + */ + def localePicking: LocalePicking = { + case None => TweetTokenizer + case Some(locale) => Tokenizer.forLocale(locale) + } + + private[this] val tweetLangIdentifier = + (new TwitterLanguageIdentifier.Builder).buildForTweet() + + /** + * Get a Tokenizer that performs Tweet language detection, and uses + * that result to tokenize the text. If you already know the locale of + * the tweet text, use `Tokenizer.get`, because it's much + * cheaper. + */ + def get(version: PenguinVersion): Tokenizer = + new Tokenizer { + override def tokenize(text: String): TokenSequence = { + val locale = tweetLangIdentifier.identify(text).getLocale + Tokenizer.get(locale, version).tokenize(text) + } + } + + private[this] val Default = get(Tokenizer.DefaultPenguinVersion) + + /** + * Tokenize the given text using Tweet language detection and + * `Tokenizer.DefaultPenguinVersion`. Prefer `Tokenizer.forLocale` if + * you already know the language of the text. + */ + override def tokenize(tweetText: String): TokenSequence = + Default.tokenize(tweetText) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/matching/UserMutes.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/UserMutes.scala new file mode 100644 index 000000000..dc7430c86 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/UserMutes.scala @@ -0,0 +1,128 @@ +package com.twitter.tweetypie.matching + +import com.twitter.common.text.pipeline.TwitterLanguageIdentifier +import com.twitter.common_internal.text.version.PenguinVersion +import java.util.Locale +import scala.collection.JavaConversions.asScalaBuffer + +object UserMutesBuilder { + private[matching] val Default = + new UserMutesBuilder(Tokenizer.DefaultPenguinVersion, None) + + private val queryLangIdentifier = + (new TwitterLanguageIdentifier.Builder).buildForQuery() +} + +class UserMutesBuilder private (penguinVersion: PenguinVersion, localeOpt: Option[Locale]) { + + /** + * Use the specified Penguin version when tokenizing a keyword mute + * string. In general, use the default version, unless you need to + * specify a particular version for compatibility with another system + * that is using that version. + */ + def withPenguinVersion(ver: PenguinVersion): UserMutesBuilder = + if (ver == penguinVersion) this + else new UserMutesBuilder(ver, localeOpt) + + /** + * Use the specified locale when tokenizing a keyword mute string. + */ + def withLocale(locale: Locale): UserMutesBuilder = + if (localeOpt.contains(locale)) this + else new UserMutesBuilder(penguinVersion, Some(locale)) + + /** + * When tokenizing a user mute list, detect the language of the + * text. This is significantly more expensive than using a predefined + * locale, but is appropriate when the locale is not yet known. + */ + def detectLocale(): UserMutesBuilder = + if (localeOpt.isEmpty) this + else new UserMutesBuilder(penguinVersion, localeOpt) + + private[this] lazy val tokenizer = + localeOpt match { + case None => + // No locale was specified, so use a Tokenizer that performs + // language detection before tokenizing. + new Tokenizer { + override def tokenize(text: String): TokenSequence = { + val locale = UserMutesBuilder.queryLangIdentifier.identify(text).getLocale + Tokenizer.get(locale, penguinVersion).tokenize(text) + } + } + + case Some(locale) => + Tokenizer.get(locale, penguinVersion) + } + + /** + * Given a list of the user's raw keyword mutes, return a preprocessed + * set of mutes suitable for matching against tweet text. If the input + * contains any phrases that fail validation, then they will be + * dropped. + */ + def build(rawInput: Seq[String]): UserMutes = + UserMutes(rawInput.flatMap(validate(_).right.toOption)) + + /** + * Java-friendly API for processing a user's list of raw keyword mutes + * into a preprocessed form suitable for matching against text. + */ + def fromJavaList(rawInput: java.util.List[String]): UserMutes = + build(asScalaBuffer(rawInput).toSeq) + + /** + * Validate the raw user input muted phrase. Currently, the only + * inputs that are not valid for keyword muting are those inputs that + * do not contain any keywords, because those inputs would match all + * tweets. + */ + def validate(mutedPhrase: String): Either[UserMutes.ValidationError, TokenSequence] = { + val keywords = tokenizer.tokenize(mutedPhrase) + if (keywords.isEmpty) UserMutes.EmptyPhraseError else Right(keywords) + } +} + +object UserMutes { + sealed trait ValidationError + + /** + * The phrase's tokenization did not produce any tokens + */ + case object EmptyPhrase extends ValidationError + + private[matching] val EmptyPhraseError = Left(EmptyPhrase) + + /** + * Get a [[UserMutesBuilder]] that uses the default Penguin version and + * performs language identification to choose a locale. + */ + def builder(): UserMutesBuilder = UserMutesBuilder.Default +} + +/** + * A user's muted keyword list, preprocessed into token sequences. + */ +case class UserMutes private[matching] (toSeq: Seq[TokenSequence]) { + + /** + * Do any of the users' muted keyword sequences occur within the + * supplied text? + */ + def matches(text: TokenSequence): Boolean = + toSeq.exists(text.containsKeywordSequence) + + /** + * Find all positions of matching muted keyword from the user's + * muted keyword list + */ + def find(text: TokenSequence): Seq[Int] = + toSeq.zipWithIndex.collect { + case (token, index) if text.containsKeywordSequence(token) => index + } + + def isEmpty: Boolean = toSeq.isEmpty + def nonEmpty: Boolean = toSeq.nonEmpty +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/media/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/media/BUILD new file mode 100644 index 000000000..2b1e9ec79 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/media/BUILD @@ -0,0 +1,17 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "mediaservices/commons/src/main/thrift:thrift-scala", + "scrooge/scrooge-core/src/main/scala", + "tweetypie/servo/util/src/main/scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "tco-util", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "util/util-logging/src/main/scala/com/twitter/logging", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/media/Media.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/media/Media.scala new file mode 100644 index 000000000..bd0e6f4a3 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/media/Media.scala @@ -0,0 +1,149 @@ +package com.twitter.tweetypie +package media + +import com.twitter.mediaservices.commons.thriftscala.MediaCategory +import com.twitter.mediaservices.commons.tweetmedia.thriftscala._ +import com.twitter.tco_util.TcoSlug +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.util.TweetLenses + +/** + * A smörgåsbord of media-related helper methods. + */ +object Media { + val AnimatedGifContentType = "video/mp4 codecs=avc1.42E0" + + case class MediaTco(expandedUrl: String, url: String, displayUrl: String) + + val ImageContentTypes: Set[MediaContentType] = + Set[MediaContentType]( + MediaContentType.ImageJpeg, + MediaContentType.ImagePng, + MediaContentType.ImageGif + ) + + val AnimatedGifContentTypes: Set[MediaContentType] = + Set[MediaContentType]( + MediaContentType.VideoMp4 + ) + + val VideoContentTypes: Set[MediaContentType] = + Set[MediaContentType]( + MediaContentType.VideoGeneric + ) + + val InUseContentTypes: Set[MediaContentType] = + Set[MediaContentType]( + MediaContentType.ImageGif, + MediaContentType.ImageJpeg, + MediaContentType.ImagePng, + MediaContentType.VideoMp4, + MediaContentType.VideoGeneric + ) + + def isImage(contentType: MediaContentType): Boolean = + ImageContentTypes.contains(contentType) + + def contentTypeToString(contentType: MediaContentType): String = + contentType match { + case MediaContentType.ImageGif => "image/gif" + case MediaContentType.ImageJpeg => "image/jpeg" + case MediaContentType.ImagePng => "image/png" + case MediaContentType.VideoMp4 => "video/mp4" + case MediaContentType.VideoGeneric => "video" + case _ => throw new IllegalArgumentException(s"UnknownMediaContentType: $contentType") + } + + def stringToContentType(str: String): MediaContentType = + str match { + case "image/gif" => MediaContentType.ImageGif + case "image/jpeg" => MediaContentType.ImageJpeg + case "image/png" => MediaContentType.ImagePng + case "video/mp4" => MediaContentType.VideoMp4 + case "video" => MediaContentType.VideoGeneric + case _ => throw new IllegalArgumentException(s"Unknown Content Type String: $str") + } + + def extensionForContentType(cType: MediaContentType): String = + cType match { + case MediaContentType.ImageJpeg => "jpg" + case MediaContentType.ImagePng => "png" + case MediaContentType.ImageGif => "gif" + case MediaContentType.VideoMp4 => "mp4" + case MediaContentType.VideoGeneric => "" + case _ => "unknown" + } + + /** + * Extract a URL entity from a media entity. + */ + def extractUrlEntity(mediaEntity: MediaEntity): UrlEntity = + UrlEntity( + fromIndex = mediaEntity.fromIndex, + toIndex = mediaEntity.toIndex, + url = mediaEntity.url, + expanded = Some(mediaEntity.expandedUrl), + display = Some(mediaEntity.displayUrl) + ) + + /** + * Copy the fields from the URL entity into the media entity. + */ + def copyFromUrlEntity(mediaEntity: MediaEntity, urlEntity: UrlEntity): MediaEntity = { + val expandedUrl = + urlEntity.expanded.orElse(Option(mediaEntity.expandedUrl)).getOrElse(urlEntity.url) + + val displayUrl = + urlEntity.url match { + case TcoSlug(slug) => MediaUrl.Display.fromTcoSlug(slug) + case _ => urlEntity.expanded.getOrElse(urlEntity.url) + } + + mediaEntity.copy( + fromIndex = urlEntity.fromIndex, + toIndex = urlEntity.toIndex, + url = urlEntity.url, + expandedUrl = expandedUrl, + displayUrl = displayUrl + ) + } + + def getAspectRatio(size: MediaSize): AspectRatio = + getAspectRatio(size.width, size.height) + + def getAspectRatio(width: Int, height: Int): AspectRatio = { + if (width == 0 || height == 0) { + throw new IllegalArgumentException(s"Dimensions must be non zero: ($width, $height)") + } + + def calculateGcd(a: Int, b: Int): Int = + if (b == 0) a else calculateGcd(b, a % b) + + val gcd = calculateGcd(math.max(width, height), math.min(width, height)) + AspectRatio((width / gcd).toShort, (height / gcd).toShort) + } + + /** + * Return just the media that belongs to this tweet + */ + def ownMedia(tweet: Tweet): Seq[MediaEntity] = + TweetLenses.media.get(tweet).filter(isOwnMedia(tweet.id, _)) + + /** + * Does the given media entity, which is was found on the tweet with the specified + * tweetId, belong to that tweet? + */ + def isOwnMedia(tweetId: TweetId, entity: MediaEntity): Boolean = + entity.sourceStatusId.forall(_ == tweetId) + + /** + * Mixed Media is any case where there is more than one media item & any of them is not an image. + */ + + def isMixedMedia(mediaEntities: Seq[MediaEntity]): Boolean = + mediaEntities.length > 1 && (mediaEntities.flatMap(_.mediaInfo).exists { + case _: MediaInfo.ImageInfo => false + case _ => true + } || + mediaEntities.flatMap(_.mediaKey).map(_.mediaCategory).exists(_ != MediaCategory.TweetImage)) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/media/MediaUrl.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/media/MediaUrl.scala new file mode 100644 index 000000000..eb26dfad8 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/media/MediaUrl.scala @@ -0,0 +1,108 @@ +package com.twitter.tweetypie +package media + +import com.twitter.logging.Logger +import com.twitter.tweetypie.thriftscala.MediaEntity +import com.twitter.tweetypie.thriftscala.UrlEntity + +/** + * Creating and parsing tweet media entity URLs. + * + * There are four kinds of URL in a media entity: + * + * - Display URLs: pic.twitter.com aliases for the short URL, for + * embedding in the tweet text. + * + * - Short URLs: regular t.co URLs that expand to the permalink URL. + * + * - Permalink URLs: link to a page that displays the media after + * doing authorization + * + * - Asset URLs: links to the actual media asset. + * + */ +object MediaUrl { + private[this] val log = Logger(getClass) + + /** + * The URL that should be filled in to the displayUrl field of the + * media entity. This URL behaves exactly the same as a t.co link + * (only the domain is different.) + */ + object Display { + val Root = "pic.twitter.com/" + + def fromTcoSlug(tcoSlug: String): String = Root + tcoSlug + } + + /** + * The link target for the link in the tweet text (the expanded URL + * for the media, copied from the URL entity.) For native photos, + * this is the tweet permalink page. + * + * For users without a screen name ("handleless" or NoScreenName users) + * a permalink to /i/status/:tweet_id is used. + */ + object Permalink { + val Root = "https://twitter.com/" + val Internal = "i" + val PhotoSuffix = "/photo/1" + val VideoSuffix = "/video/1" + + def apply(screenName: String, tweetId: TweetId, isVideo: Boolean): String = + Root + + (if (screenName.isEmpty) Internal else screenName) + + "/status/" + + tweetId + + (if (isVideo) VideoSuffix else PhotoSuffix) + + private[this] val PermalinkRegex = + """https?://twitter.com/(?:#!/)?\w+/status/(\d+)/(?:photo|video)/\d+""".r + + private[this] def getTweetId(permalink: String): Option[TweetId] = + permalink match { + case PermalinkRegex(tweetIdStr) => + try { + Some(tweetIdStr.toLong) + } catch { + // Digits too big to fit in a Long + case _: NumberFormatException => None + } + case _ => None + } + + def getTweetId(urlEntity: UrlEntity): Option[TweetId] = + urlEntity.expanded.flatMap(getTweetId) + + def hasTweetId(permalink: String, tweetId: TweetId): Boolean = + getTweetId(permalink).contains(tweetId) + + def hasTweetId(mediaEntity: MediaEntity, tweetId: TweetId): Boolean = + hasTweetId(mediaEntity.expandedUrl, tweetId) + + def hasTweetId(urlEntity: UrlEntity, tweetId: TweetId): Boolean = + getTweetId(urlEntity).contains(tweetId) + } + + /** + * Converts a url that starts with "https://" to one that starts with "http://". + */ + def httpsToHttp(url: String): String = + url.replace("https://", "http://") + + /** + * Gets the last path element from an asset url. This exists temporarily to support + * the now deprecated mediaPath element in MediaEntity. + */ + def mediaPathFromUrl(url: String): String = + url.lastIndexOf('/') match { + case -1 => + log.error("Invalid media path. Could not find last element: " + url) + // Better to return a broken preview URL to the client + // than to fail the whole request. + "" + + case idx => + url.substring(idx + 1) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/media/package.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/media/package.scala new file mode 100644 index 000000000..d8fb9b2d1 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/media/package.scala @@ -0,0 +1,7 @@ +package com.twitter.tweetypie + +package object media { + type TweetId = Long + type UserId = Long + type MediaId = Long +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/AddTweetHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/AddTweetHandler.scala new file mode 100644 index 000000000..a0035b9e5 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/AddTweetHandler.scala @@ -0,0 +1,80 @@ +package com.twitter.tweetypie.storage + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.kv.ManhattanValue +import com.twitter.tweetypie.storage.TweetUtils.collectWithRateLimitCheck +import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.util.Time + +object AddTweetHandler { + private[storage] type InternalAddTweet = ( + Tweet, + ManhattanOperations.Insert, + Scribe, + StatsReceiver, + Time + ) => Stitch[Unit] + + def apply( + insert: ManhattanOperations.Insert, + scribe: Scribe, + stats: StatsReceiver + ): TweetStorageClient.AddTweet = + tweet => doAddTweet(tweet, insert, scribe, stats, Time.now) + + def makeRecords( + storedTweet: StoredTweet, + timestamp: Time + ): Seq[TweetManhattanRecord] = { + val core = CoreFieldsCodec.fromTweet(storedTweet) + val packedCoreFieldsBlob = CoreFieldsCodec.toTFieldBlob(core) + val coreRecord = + TweetManhattanRecord( + TweetKey.coreFieldsKey(storedTweet.id), + ManhattanValue(TFieldBlobCodec.toByteBuffer(packedCoreFieldsBlob), Some(timestamp)) + ) + + val otherFieldIds = + TweetFields.nonCoreInternalFields ++ TweetFields.getAdditionalFieldIds(storedTweet) + + val otherFields = + storedTweet + .getFieldBlobs(otherFieldIds) + .map { + case (fieldId, tFieldBlob) => + TweetManhattanRecord( + TweetKey.fieldKey(storedTweet.id, fieldId), + ManhattanValue(TFieldBlobCodec.toByteBuffer(tFieldBlob), Some(timestamp)) + ) + } + .toSeq + otherFields :+ coreRecord + } + + private[storage] val doAddTweet: InternalAddTweet = ( + tweet: Tweet, + insert: ManhattanOperations.Insert, + scribe: Scribe, + stats: StatsReceiver, + timestamp: Time + ) => { + assert(tweet.coreData.isDefined, s"Tweet ${tweet.id} is missing coreData: $tweet") + + val storedTweet = StorageConversions.toStoredTweet(tweet) + val records = makeRecords(storedTweet, timestamp) + val inserts = records.map(insert) + val insertsWithRateLimitCheck = + Stitch.collect(inserts.map(_.liftToTry)).map(collectWithRateLimitCheck).lowerFromTry + + Stats.updatePerFieldQpsCounters( + "addTweet", + TweetFields.getAdditionalFieldIds(storedTweet), + 1, + stats + ) + + insertsWithRateLimitCheck.unit.onSuccess { _ => scribe.logAdded(storedTweet) } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/BUILD new file mode 100644 index 000000000..6a3db82e7 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/BUILD @@ -0,0 +1,47 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = [ + "bazel-compatible", + "bazel-incompatible-scaladoc", + ], + dependencies = [ + "3rdparty/jvm/com/chuusai:shapeless", + "3rdparty/jvm/com/fasterxml/jackson/core:jackson-databind", + "3rdparty/jvm/com/fasterxml/jackson/module:jackson-module-scala", + "3rdparty/jvm/com/google/guava", + "3rdparty/jvm/com/twitter/bijection:core", + "3rdparty/jvm/com/twitter/bijection:scrooge", + "3rdparty/jvm/com/twitter/bijection:thrift", + "3rdparty/jvm/commons-codec", + "3rdparty/jvm/org/apache/thrift:libthrift", + "diffshow", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authorization", + "finagle/finagle-core/src/main", + "finagle/finagle-stats", + "finagle/finagle-thriftmux/src/main/scala", + "mediaservices/commons/src/main/thrift:thrift-scala", + "scrooge/scrooge-serializer/src/main/scala", + "tweetypie/servo/repo/src/main/scala", + "tweetypie/servo/util", + "snowflake:id", + "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", + "src/thrift/com/twitter/manhattan:internal-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-core", + "storage/clients/manhattan/client/src/main/scala", + "tbird-thrift:scala", + "tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields", + "tweetypie/common/src/scala/com/twitter/tweetypie/client_id", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal:storage_internal-scala", + "util-internal/scribe", + "util/util-core:scala", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + "util/util-stats/src/main/scala", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/BounceDeleteHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/BounceDeleteHandler.scala new file mode 100644 index 000000000..224c09cb0 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/BounceDeleteHandler.scala @@ -0,0 +1,20 @@ +package com.twitter.tweetypie.storage + +import com.twitter.util.Time + +object BounceDeleteHandler { + def apply( + insert: ManhattanOperations.Insert, + scribe: Scribe + ): TweetStorageClient.BounceDelete = + tweetId => { + val mhTimestamp = Time.now + val bounceDeleteRecord = TweetStateRecord + .BounceDeleted(tweetId, mhTimestamp.inMillis) + .toTweetMhRecord + + insert(bounceDeleteRecord).onSuccess { _ => + scribe.logRemoved(tweetId, mhTimestamp, isSoftDeleted = true) + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Codecs.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Codecs.scala new file mode 100644 index 000000000..670014f26 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Codecs.scala @@ -0,0 +1,242 @@ +package com.twitter.tweetypie.storage + +import com.twitter.bijection.Conversion.asMethod +import com.twitter.bijection.Injection +import com.twitter.scrooge.TFieldBlob +import com.twitter.storage.client.manhattan.kv._ +import com.twitter.tweetypie.storage.Response.FieldResponse +import com.twitter.tweetypie.storage.Response.FieldResponseCode +import com.twitter.tweetypie.storage_internal.thriftscala.CoreFields +import com.twitter.tweetypie.storage_internal.thriftscala.InternalTweet +import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet +import java.io.ByteArrayOutputStream +import java.nio.ByteBuffer +import org.apache.thrift.protocol.TBinaryProtocol +import org.apache.thrift.transport.TIOStreamTransport +import org.apache.thrift.transport.TMemoryInputTransport +import scala.collection.immutable +import scala.util.control.NoStackTrace + +// NOTE: All field ids and Tweet structure in this file correspond to the StoredTweet struct ONLY + +object ByteArrayCodec { + def toByteBuffer(byteArray: Array[Byte]): ByteBuffer = byteArray.as[ByteBuffer] + def fromByteBuffer(buffer: ByteBuffer): Array[Byte] = buffer.as[Array[Byte]] +} + +object StringCodec { + private val string2ByteBuffer = Injection.connect[String, Array[Byte], ByteBuffer] + def toByteBuffer(strValue: String): ByteBuffer = string2ByteBuffer(strValue) + def fromByteBuffer(buffer: ByteBuffer): String = string2ByteBuffer.invert(buffer).get +} + +/** + * Terminology + * ----------- + * Tweet id field : The field number of 'tweetId' in the 'Tweet' thrift structure (i.e "1") + * + * First AdditionalField id : The ID if the first additional field in 'Tweet' thrift structure. All field Ids less than this are + * considered internal and all the ids greater than or equal to this field id are considered 'Additional fields'. + * This is set to 100. + * + * Internal Fields : Fields with ids [1 to firstAdditionalFieldid) (excluding firstAdditionalFieldId) + * + * Core fields : (Subset of Internal fields)- Fields with ids [1 to 8, 19]. These fields are "packed" together and stored + * under a single key. This key is referred to as "CoreFieldsKey" (see @TweetKeyType.CoreFieldsKey). + * Note: Actually field 1 is skipped when packing as this field is the tweet id and it need not be + * explicitly stored since the pkey already contains the tweet Id) + * + * Root Core field id : The field id under which the packed core fields are stored in Manhattan. (This is field Id "1") + * + * Required fields : (Subset of Core fields) - Fields with ids [1 to 5] that MUST be present on every tweet. + * + * Additional Fields : All fields with field ids >= 'firstAdditionalFieldId' + * + * Compiled Additional fields : (Subset of Additional Fields) - All fields that the storage library knows about + * (i.e present on the latest storage_internal.thrift that is compiled-in). + * + * Passthrough fields : (Subset of Additional Fields) - The fields on storage_internal.thrift that the storage library is NOT aware of + * These field ids are is obtained looking at the "_passThroughFields" member of the scrooge-generated + * 'Tweet' object. + * + * coreFieldsIdInInternalTweet: This is the field id of the core fields (the only field) in the Internal Tweet struct + */ +object TweetFields { + val firstAdditionalFieldId: Short = 100 + val tweetIdField: Short = 1 + val geoFieldId: Short = 9 + + // The field under which all the core field values are stored (in serialized form). + val rootCoreFieldId: Short = 1 + + val coreFieldIds: immutable.IndexedSeq[FieldId] = { + val quotedTweetFieldId: Short = 19 + (1 to 8).map(_.toShort) ++ Seq(quotedTweetFieldId) + } + val requiredFieldIds: immutable.IndexedSeq[FieldId] = (1 to 5).map(_.toShort) + + val coreFieldsIdInInternalTweet: Short = 1 + + val compiledAdditionalFieldIds: Seq[FieldId] = + StoredTweet.metaData.fields.filter(_.id >= firstAdditionalFieldId).map(_.id) + val internalFieldIds: Seq[FieldId] = + StoredTweet.metaData.fields.filter(_.id < firstAdditionalFieldId).map(_.id) + val nonCoreInternalFields: Seq[FieldId] = + (internalFieldIds.toSet -- coreFieldIds.toSet).toSeq + def getAdditionalFieldIds(tweet: StoredTweet): Seq[FieldId] = + compiledAdditionalFieldIds ++ tweet._passthroughFields.keys.toSeq +} + +/** + * Helper object to convert TFieldBlob to ByteBuffer that gets stored in Manhattan. + * + * The following is the format in which the TFieldBlob gets stored: + * [Version][TField][TFieldBlob] + */ +object TFieldBlobCodec { + val BinaryProtocolFactory: TBinaryProtocol.Factory = new TBinaryProtocol.Factory() + val FormatVersion = 1.0 + + def toByteBuffer(tFieldBlob: TFieldBlob): ByteBuffer = { + val baos = new ByteArrayOutputStream() + val prot = BinaryProtocolFactory.getProtocol(new TIOStreamTransport(baos)) + + prot.writeDouble(FormatVersion) + prot.writeFieldBegin(tFieldBlob.field) + prot.writeBinary(ByteArrayCodec.toByteBuffer(tFieldBlob.data)) + + ByteArrayCodec.toByteBuffer(baos.toByteArray) + } + + def fromByteBuffer(buffer: ByteBuffer): TFieldBlob = { + val byteArray = ByteArrayCodec.fromByteBuffer(buffer) + val prot = BinaryProtocolFactory.getProtocol(new TMemoryInputTransport(byteArray)) + + val version = prot.readDouble() + if (version != FormatVersion) { + throw new VersionMismatchError( + "Version mismatch in decoding ByteBuffer to TFieldBlob. " + + "Actual version: " + version + ". Expected version: " + FormatVersion + ) + } + + val tField = prot.readFieldBegin() + val dataBuffer = prot.readBinary() + val data = ByteArrayCodec.fromByteBuffer(dataBuffer) + + TFieldBlob(tField, data) + } +} + +/** + * Helper object to help convert 'CoreFields' object to/from TFieldBlob (and also to construct + * 'CoreFields' object from a 'StoredTweet' object) + * + * More details: + * - A subset of fields on the 'StoredTweet' thrift structure (2-8,19) are 'packaged' and stored + * together as a serialized TFieldBlob object under a single key in Manhattan (see TweetKeyCodec + * helper object above for more details). + * + * - To make the packing/unpacking the fields to/from TFieldBlob object, we created the following + * two helper thrift structures 'CoreFields' and 'InternalTweet' + * + * // The field Ids and types here MUST exactly match field Ids on 'StoredTweet' thrift structure. + * struct CoreFields { + * 2: optional i64 user_id + * ... + * 8: optional i64 contributor_id + * ... + * 19: optional StoredQuotedTweet stored_quoted_tweet + * + * } + * + * // The field id of core fields MUST be "1" + * struct InternalTweet { + * 1: CoreFields coreFields + * } + * + * - Given the above two structures, packing/unpacking fields (2-8,19) on StoredTweet object into a TFieldBlob + * becomes very trivial: + * For packing: + * (i) Copy fields (2-8,19) from StoredTweet object to a new CoreFields object + * (ii) Create a new InternalTweet object with the 'CoreFields' object constructed in step (i) above + * (iii) Extract field "1" as a TFieldBlob from InternalField (by calling the scrooge generated "getFieldBlob(1)" + * function on the InternalField objecton + * + * For unpacking: + * (i) Create an empty 'InternalField' object + * (ii) Call scrooge-generated 'setField' by passing the tFieldBlob blob (created by packing steps above) + * (iii) Doing step (ii) above will create a hydrated 'CoreField' object that can be accessed by 'coreFields' + * member of 'InternalTweet' object. + */ +object CoreFieldsCodec { + val coreFieldIds: Seq[FieldId] = CoreFields.metaData.fields.map(_.id) + + // "Pack" the core fields i.e converts 'CoreFields' object to "packed" tFieldBlob (See description + // above for more details) + def toTFieldBlob(coreFields: CoreFields): TFieldBlob = { + InternalTweet(Some(coreFields)).getFieldBlob(TweetFields.coreFieldsIdInInternalTweet).get + } + + // "Unpack" the core fields from a packed TFieldBlob into a CoreFields object (see description above for + // more details) + def fromTFieldBlob(tFieldBlob: TFieldBlob): CoreFields = { + InternalTweet().setField(tFieldBlob).coreFields.get + } + + // "Unpack" the core fields from a packed TFieldBlob into a Map of core-fieldId-> TFieldBlob + def unpackFields(tFieldBlob: TFieldBlob): Map[Short, TFieldBlob] = + fromTFieldBlob(tFieldBlob).getFieldBlobs(coreFieldIds) + + // Create a 'CoreFields' thrift object from 'Tweet' thrift object. + def fromTweet(tweet: StoredTweet): CoreFields = { + // As mentioned above, the field ids and types on the 'CoreFields' struct exactly match the + // corresponding fields on StoredTweet structure. So it is safe to call .getField() on Tweet object and + // and pass the returned tFleldBlob a 'setField' on 'CoreFields' object. + coreFieldIds.foldLeft(CoreFields()) { + case (core, fieldId) => + tweet.getFieldBlob(fieldId) match { + case None => core + case Some(tFieldBlob) => core.setField(tFieldBlob) + } + } + } +} + +/** + * Helper object to convert ManhattanException to FieldResponseCode thrift object + */ +object FieldResponseCodeCodec { + import FieldResponseCodec.ValueNotFoundException + + def fromManhattanException(mhException: ManhattanException): FieldResponseCode = { + mhException match { + case _: ValueNotFoundException => FieldResponseCode.ValueNotFound + case _: InternalErrorManhattanException => FieldResponseCode.Error + case _: InvalidRequestManhattanException => FieldResponseCode.InvalidRequest + case _: DeniedManhattanException => FieldResponseCode.Error + case _: UnsatisfiableManhattanException => FieldResponseCode.Error + case _: TimeoutManhattanException => FieldResponseCode.Timeout + } + } +} + +/** + * Helper object to construct FieldResponse thrift object from an Exception. + * This is typically called to convert 'ManhattanException' object to 'FieldResponse' thrift object + */ +object FieldResponseCodec { + class ValueNotFoundException extends ManhattanException("Value not found!") with NoStackTrace + private[storage] val NotFound = new ValueNotFoundException + + def fromThrowable(e: Throwable, additionalMsg: Option[String] = None): FieldResponse = { + val (respCode, errMsg) = e match { + case mhException: ManhattanException => + (FieldResponseCodeCodec.fromManhattanException(mhException), mhException.getMessage) + case _ => (FieldResponseCode.Error, e.getMessage) + } + + val respMsg = additionalMsg.map(_ + ". " + errMsg).orElse(Some(errMsg.toString)) + FieldResponse(respCode, respMsg) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/DeleteAdditionalFieldsHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/DeleteAdditionalFieldsHandler.scala new file mode 100644 index 000000000..5c89c7a5e --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/DeleteAdditionalFieldsHandler.scala @@ -0,0 +1,67 @@ +package com.twitter.tweetypie.storage + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.kv.DeniedManhattanException +import com.twitter.tweetypie.storage.TweetUtils._ +import com.twitter.util.Throw +import com.twitter.util.Time + +object DeleteAdditionalFieldsHandler { + def apply( + delete: ManhattanOperations.Delete, + stats: StatsReceiver + ): TweetStorageClient.DeleteAdditionalFields = + (unfilteredTweetIds: Seq[TweetId], additionalFields: Seq[Field]) => { + val tweetIds = unfilteredTweetIds.filter(_ > 0) + val additionalFieldIds = additionalFields.map(_.id) + require(additionalFields.nonEmpty, "Additional fields to delete cannot be empty") + require( + additionalFieldIds.min >= TweetFields.firstAdditionalFieldId, + s"Additional fields $additionalFields must be in additional field range (>= ${TweetFields.firstAdditionalFieldId})" + ) + + Stats.addWidthStat("deleteAdditionalFields", "tweetIds", tweetIds.size, stats) + Stats.addWidthStat( + "deleteAdditionalFields", + "additionalFieldIds", + additionalFieldIds.size, + stats + ) + Stats.updatePerFieldQpsCounters( + "deleteAdditionalFields", + additionalFieldIds, + tweetIds.size, + stats + ) + val mhTimestamp = Time.now + + val stitches = tweetIds.map { tweetId => + val (fieldIds, mhKeysToDelete) = + additionalFieldIds.map { fieldId => + (fieldId, TweetKey.additionalFieldsKey(tweetId, fieldId)) + }.unzip + + val deletionStitches = mhKeysToDelete.map { mhKeyToDelete => + delete(mhKeyToDelete, Some(mhTimestamp)).liftToTry + } + + Stitch.collect(deletionStitches).map { responsesTries => + val wasRateLimited = responsesTries.exists { + case Throw(e: DeniedManhattanException) => true + case _ => false + } + + val resultsPerTweet = fieldIds.zip(responsesTries).toMap + + if (wasRateLimited) { + buildTweetOverCapacityResponse("deleteAdditionalFields", tweetId, resultsPerTweet) + } else { + buildTweetResponse("deleteAdditionalFields", tweetId, resultsPerTweet) + } + } + } + + Stitch.collect(stitches) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Field.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Field.scala new file mode 100644 index 000000000..093559234 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Field.scala @@ -0,0 +1,41 @@ +package com.twitter.tweetypie.storage + +import com.twitter.tweetypie.additionalfields.AdditionalFields +import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet +import com.twitter.tweetypie.thriftscala.{Tweet => TpTweet} + +/** + * A field of the stored version of a tweet to read, update, or delete. + * + * There is not a one-to-one correspondence between the fields ids of + * [[com.twitter.tweetypie.thriftscala.Tweet]] and + * [[com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet]]. For example, in StoredTweet, + * the nsfwUser property is field 11; in Tweet, it is a property of the coreData struct in field 2. + * To circumvent the confusion of using one set of field ids or the other, callers use instances of + * [[Field]] to reference the part of the object to modify. + */ +class Field private[storage] (val id: Short) extends AnyVal { + override def toString: String = id.toString +} + +/** + * NOTE: Make sure `AllUpdatableCompiledFields` is kept up to date when adding any new field + */ +object Field { + import AdditionalFields.isAdditionalFieldId + val Geo: Field = new Field(StoredTweet.GeoField.id) + val HasTakedown: Field = new Field(StoredTweet.HasTakedownField.id) + val NsfwUser: Field = new Field(StoredTweet.NsfwUserField.id) + val NsfwAdmin: Field = new Field(StoredTweet.NsfwAdminField.id) + val TweetypieOnlyTakedownCountryCodes: Field = + new Field(TpTweet.TweetypieOnlyTakedownCountryCodesField.id) + val TweetypieOnlyTakedownReasons: Field = + new Field(TpTweet.TweetypieOnlyTakedownReasonsField.id) + + val AllUpdatableCompiledFields: Set[Field] = Set(Geo, HasTakedown, NsfwUser, NsfwAdmin) + + def additionalField(id: Short): Field = { + require(isAdditionalFieldId(id), "field id must be in the additional field range") + new Field(id) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetDeletedTweetsHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetDeletedTweetsHandler.scala new file mode 100644 index 000000000..dfacaa4a6 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetDeletedTweetsHandler.scala @@ -0,0 +1,150 @@ +package com.twitter.tweetypie.storage + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.kv.DeniedManhattanException +import com.twitter.tweetypie.storage.Response.TweetResponseCode +import com.twitter.tweetypie.storage.TweetUtils._ +import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet +import com.twitter.tweetypie.thriftscala.DeletedTweet +import scala.util.control.NonFatal + +sealed trait DeleteState +object DeleteState { + + /** + * This tweet is deleted but has not been permanently deleted from Manhattan. Tweets in this state + * may be undeleted. + */ + case object SoftDeleted extends DeleteState + + /** + * This tweet is deleted after being bounced for violating the Twitter Rules but has not been + * permanently deleted from Manhattan. Tweets in this state may NOT be undeleted. + */ + case object BounceDeleted extends DeleteState + + /** + * This tweet has been permanently deleted from Manhattan. + */ + case object HardDeleted extends DeleteState + + /** + * There is no data in Manhattan to distinguish this tweet id from one that never existed. + */ + case object NotFound extends DeleteState + + /** + * This tweet exists and is not in a deleted state. + */ + case object NotDeleted extends DeleteState +} + +case class DeletedTweetResponse( + tweetId: TweetId, + overallResponse: TweetResponseCode, + deleteState: DeleteState, + tweet: Option[DeletedTweet]) + +object GetDeletedTweetsHandler { + def apply( + read: ManhattanOperations.Read, + stats: StatsReceiver + ): TweetStorageClient.GetDeletedTweets = + (unfilteredTweetIds: Seq[TweetId]) => { + val tweetIds = unfilteredTweetIds.filter(_ > 0) + + Stats.addWidthStat("getDeletedTweets", "tweetIds", tweetIds.size, stats) + + val stitches = tweetIds.map { tweetId => + read(tweetId) + .map { mhRecords => + val storedTweet = buildStoredTweet(tweetId, mhRecords) + + TweetStateRecord.mostRecent(mhRecords) match { + case Some(m: TweetStateRecord.SoftDeleted) => softDeleted(m, storedTweet) + case Some(m: TweetStateRecord.BounceDeleted) => bounceDeleted(m, storedTweet) + case Some(m: TweetStateRecord.HardDeleted) => hardDeleted(m, storedTweet) + case _ if storedTweet.getFieldBlobs(expectedFields).isEmpty => notFound(tweetId) + case _ => notDeleted(tweetId, storedTweet) + } + } + .handle { + case _: DeniedManhattanException => + DeletedTweetResponse( + tweetId, + TweetResponseCode.OverCapacity, + DeleteState.NotFound, + None + ) + + case NonFatal(ex) => + TweetUtils.log.warning( + ex, + s"Unhandled exception in GetDeletedTweetsHandler for tweetId: $tweetId" + ) + DeletedTweetResponse(tweetId, TweetResponseCode.Failure, DeleteState.NotFound, None) + } + } + + Stitch.collect(stitches) + } + + private def notFound(tweetId: TweetId) = + DeletedTweetResponse( + tweetId = tweetId, + overallResponse = TweetResponseCode.Success, + deleteState = DeleteState.NotFound, + tweet = None + ) + + private def softDeleted(record: TweetStateRecord.SoftDeleted, storedTweet: StoredTweet) = + DeletedTweetResponse( + record.tweetId, + TweetResponseCode.Success, + DeleteState.SoftDeleted, + Some( + StorageConversions + .toDeletedTweet(storedTweet) + .copy(deletedAtMsec = Some(record.createdAt)) + ) + ) + + private def bounceDeleted(record: TweetStateRecord.BounceDeleted, storedTweet: StoredTweet) = + DeletedTweetResponse( + record.tweetId, + TweetResponseCode.Success, + DeleteState.BounceDeleted, + Some( + StorageConversions + .toDeletedTweet(storedTweet) + .copy(deletedAtMsec = Some(record.createdAt)) + ) + ) + + private def hardDeleted(record: TweetStateRecord.HardDeleted, storedTweet: StoredTweet) = + DeletedTweetResponse( + record.tweetId, + TweetResponseCode.Success, + DeleteState.HardDeleted, + Some( + StorageConversions + .toDeletedTweet(storedTweet) + .copy( + hardDeletedAtMsec = Some(record.createdAt), + deletedAtMsec = Some(record.deletedAt) + ) + ) + ) + + /** + * notDeleted returns a tweet to simplify tweetypie.handler.UndeleteTweetHandler + */ + private def notDeleted(tweetId: TweetId, storedTweet: StoredTweet) = + DeletedTweetResponse( + tweetId = tweetId, + overallResponse = TweetResponseCode.Success, + deleteState = DeleteState.NotDeleted, + tweet = Some(StorageConversions.toDeletedTweet(storedTweet)) + ) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetStoredTweetHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetStoredTweetHandler.scala new file mode 100644 index 000000000..eafdda5e9 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetStoredTweetHandler.scala @@ -0,0 +1,126 @@ +package com.twitter.tweetypie.storage + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.stitch.Stitch +import com.twitter.stitch.StitchSeqGroup +import com.twitter.tweetypie.storage.TweetStorageClient.GetStoredTweet +import com.twitter.tweetypie.storage.TweetStorageClient.GetStoredTweet.Error +import com.twitter.tweetypie.storage.TweetStorageClient.GetStoredTweet.Response._ +import com.twitter.tweetypie.storage.TweetUtils._ +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.util.Time +import com.twitter.util.Try +import scala.collection.mutable + +object GetStoredTweetHandler { + private[this] object DeletedState { + def unapply(stateRecord: Option[TweetStateRecord]): Option[TweetStateRecord] = + stateRecord match { + case state @ (Some(_: TweetStateRecord.SoftDeleted) | Some( + _: TweetStateRecord.HardDeleted) | Some(_: TweetStateRecord.BounceDeleted)) => + state + case _ => None + } + } + + private[this] def deletedAtMs(stateRecord: Option[TweetStateRecord]): Option[Long] = + stateRecord match { + case Some(d: TweetStateRecord.SoftDeleted) => Some(d.createdAt) + case Some(d: TweetStateRecord.BounceDeleted) => Some(d.createdAt) + case Some(d: TweetStateRecord.HardDeleted) => Some(d.deletedAt) + case _ => None + } + + private[this] def tweetResponseFromRecords( + tweetId: TweetId, + mhRecords: Seq[TweetManhattanRecord], + statsReceiver: StatsReceiver, + ): GetStoredTweet.Response = { + val errs = + mutable.Buffer[Error]() + + val hasStoredTweetFields: Boolean = mhRecords.exists { + case TweetManhattanRecord(TweetKey(_, _: TweetKey.LKey.FieldKey), _) => true + case _ => false + } + + val storedTweet = if (hasStoredTweetFields) { + Try(buildStoredTweet(tweetId, mhRecords, includeScrubbed = true)) + .onFailure(_ => errs.append(Error.TweetIsCorrupt)) + .toOption + } else { + None + } + + val scrubbedFields: Set[FieldId] = extractScrubbedFields(mhRecords) + val tweet: Option[Tweet] = storedTweet.map(StorageConversions.fromStoredTweetAllowInvalid) + val stateRecords: Seq[TweetStateRecord] = TweetStateRecord.fromTweetMhRecords(mhRecords) + val tweetState: Option[TweetStateRecord] = TweetStateRecord.mostRecent(mhRecords) + + storedTweet.foreach { storedTweet => + val storedExpectedFields = storedTweet.getFieldBlobs(expectedFields) + val missingExpectedFields = expectedFields.filterNot(storedExpectedFields.contains) + if (missingExpectedFields.nonEmpty || !isValid(storedTweet)) { + errs.append(Error.TweetFieldsMissingOrInvalid) + } + + val invalidScrubbedFields = storedTweet.getFieldBlobs(scrubbedFields).keys + if (invalidScrubbedFields.nonEmpty) { + errs.append(Error.ScrubbedFieldsPresent) + } + + if (deletedAtMs(tweetState).exists(_ < Time.now.inMilliseconds - 14.days.inMilliseconds)) { + errs.append(Error.TweetShouldBeHardDeleted) + } + } + + val err = Option(errs.toList).filter(_.nonEmpty) + + (tweet, tweetState, err) match { + case (None, None, None) => + statsReceiver.counter("not_found").incr() + NotFound(tweetId) + + case (None, Some(tweetState: TweetStateRecord.HardDeleted), None) => + statsReceiver.counter("hard_deleted").incr() + HardDeleted(tweetId, Some(tweetState), stateRecords, scrubbedFields) + + case (None, _, Some(errs)) => + statsReceiver.counter("failed").incr() + Failed(tweetId, tweetState, stateRecords, scrubbedFields, errs) + + case (Some(tweet), _, Some(errs)) => + statsReceiver.counter("found_invalid").incr() + FoundWithErrors(tweet, tweetState, stateRecords, scrubbedFields, errs) + + case (Some(tweet), DeletedState(state), None) => + statsReceiver.counter("deleted").incr() + FoundDeleted(tweet, Some(state), stateRecords, scrubbedFields) + + case (Some(tweet), _, None) => + statsReceiver.counter("found").incr() + Found(tweet, tweetState, stateRecords, scrubbedFields) + } + } + + def apply(read: ManhattanOperations.Read, statsReceiver: StatsReceiver): GetStoredTweet = { + + object mhGroup extends StitchSeqGroup[TweetId, Seq[TweetManhattanRecord]] { + override def run(tweetIds: Seq[TweetId]): Stitch[Seq[Seq[TweetManhattanRecord]]] = { + Stats.addWidthStat("getStoredTweet", "tweetIds", tweetIds.size, statsReceiver) + Stitch.traverse(tweetIds)(read(_)) + } + } + + tweetId => + if (tweetId <= 0) { + Stitch.NotFound + } else { + Stitch + .call(tweetId, mhGroup) + .map(mhRecords => + tweetResponseFromRecords(tweetId, mhRecords, statsReceiver.scope("getStoredTweet"))) + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetTweetHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetTweetHandler.scala new file mode 100644 index 000000000..f68025e2d --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetTweetHandler.scala @@ -0,0 +1,167 @@ +package com.twitter.tweetypie.storage + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.stats.Counter +import com.twitter.finagle.stats.NullStatsReceiver +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.logging.Logger +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.stitch.Stitch +import com.twitter.stitch.StitchSeqGroup +import com.twitter.storage.client.manhattan.kv.DeniedManhattanException +import com.twitter.storage.client.manhattan.kv.ManhattanException +import com.twitter.tweetypie.storage.TweetStateRecord.BounceDeleted +import com.twitter.tweetypie.storage.TweetStateRecord.HardDeleted +import com.twitter.tweetypie.storage.TweetStateRecord.SoftDeleted +import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet +import com.twitter.tweetypie.storage.TweetUtils._ +import com.twitter.util.Duration +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.util.Time + +object GetTweetHandler { + private[this] val logger = Logger(getClass) + + ////////////////////////////////////////////////// + // Logging racy reads for later validation. + + val RacyTweetWindow: Duration = 10.seconds + + /** + * If this read is soon after the tweet was created, then we would usually + * expect it to be served from cache. This early read indicates that this + * tweet is prone to consistency issues, so we log what's present in + * Manhattan at the time of the read for later analysis. + */ + private[this] def logRacyRead(tweetId: TweetId, records: Seq[TweetManhattanRecord]): Unit = + if (SnowflakeId.isSnowflakeId(tweetId)) { + val tweetAge = Time.now.since(SnowflakeId(tweetId).time) + if (tweetAge <= RacyTweetWindow) { + val sb = new StringBuilder + sb.append("racy_tweet_read\t") + .append(tweetId) + .append('\t') + .append(tweetAge.inMilliseconds) // Log the age for analysis purposes + records.foreach { rec => + sb.append('\t') + .append(rec.lkey) + rec.value.timestamp.foreach { ts => + // If there is a timestamp for this key, log it so that we can tell + // later on whether a value should have been present. We expect + // keys written in a single write to have the same timestamp, and + // generally, keys written in separate writes will have different + // timestamps. The timestamp value is optional in Manhattan, but + // we expect there to always be a value for the timestamp. + sb.append(':') + .append(ts.inMilliseconds) + } + } + logger.info(sb.toString) + } + } + + /** + * Convert a set of records from Manhattan into a GetTweet.Response. + */ + def tweetResponseFromRecords( + tweetId: TweetId, + mhRecords: Seq[TweetManhattanRecord], + statsReceiver: StatsReceiver = NullStatsReceiver + ): GetTweet.Response = + if (mhRecords.isEmpty) { + GetTweet.Response.NotFound + } else { + // If no internal fields are present or no required fields present, we consider the tweet + // as not returnable (even if some additional fields are present) + def tweetFromRecords(tweetId: TweetId, mhRecords: Seq[TweetManhattanRecord]) = { + val storedTweet = buildStoredTweet(tweetId, mhRecords) + if (storedTweet.getFieldBlobs(expectedFields).nonEmpty) { + if (isValid(storedTweet)) { + statsReceiver.counter("valid").incr() + Some(StorageConversions.fromStoredTweet(storedTweet)) + } else { + log.info(s"Invalid Tweet Id: $tweetId") + statsReceiver.counter("invalid").incr() + None + } + } else { + // The Tweet contained none of the fields defined in `expectedFields` + log.info(s"Expected Fields Not Present Tweet Id: $tweetId") + statsReceiver.counter("expected_fields_not_present").incr() + None + } + } + + val stateRecord = TweetStateRecord.mostRecent(mhRecords) + stateRecord match { + // some other cases don't require an attempt to construct a Tweet + case Some(_: SoftDeleted) | Some(_: HardDeleted) => GetTweet.Response.Deleted + + // all other cases require an attempt to construct a Tweet, which may not be successful + case _ => + logRacyRead(tweetId, mhRecords) + (stateRecord, tweetFromRecords(tweetId, mhRecords)) match { + // BounceDeleted contains the Tweet data so that callers can access data on the the + // tweet (e.g. hard delete daemon requires conversationId and userId. There are no + // plans for Tweetypie server to make use of the returned tweet at this time. + case (Some(_: BounceDeleted), Some(tweet)) => GetTweet.Response.BounceDeleted(tweet) + case (Some(_: BounceDeleted), None) => GetTweet.Response.Deleted + case (_, Some(tweet)) => GetTweet.Response.Found(tweet) + case _ => GetTweet.Response.NotFound + } + } + } + + def apply(read: ManhattanOperations.Read, statsReceiver: StatsReceiver): GetTweet = { + + object stats { + val getTweetScope = statsReceiver.scope("getTweet") + val deniedCounter: Counter = getTweetScope.counter("mh_denied") + val mhExceptionCounter: Counter = getTweetScope.counter("mh_exception") + val nonFatalExceptionCounter: Counter = getTweetScope.counter("non_fatal_exception") + val notFoundCounter: Counter = getTweetScope.counter("not_found") + } + + object mhGroup extends StitchSeqGroup[TweetId, Seq[TweetManhattanRecord]] { + override def run(tweetIds: Seq[TweetId]): Stitch[Seq[Seq[TweetManhattanRecord]]] = { + Stats.addWidthStat("getTweet", "tweetIds", tweetIds.size, statsReceiver) + Stitch.traverse(tweetIds)(read(_)) + } + } + + tweetId => + if (tweetId <= 0) { + Stitch.NotFound + } else { + Stitch + .call(tweetId, mhGroup) + .map(mhRecords => tweetResponseFromRecords(tweetId, mhRecords, stats.getTweetScope)) + .liftToTry + .map { + case Throw(mhException: DeniedManhattanException) => + stats.deniedCounter.incr() + Throw(RateLimited("", mhException)) + + // Encountered some other Manhattan error + case t @ Throw(_: ManhattanException) => + stats.mhExceptionCounter.incr() + t + + // Something else happened + case t @ Throw(ex) => + stats.nonFatalExceptionCounter.incr() + TweetUtils.log + .warning(ex, s"Unhandled exception in GetTweetHandler for tweetId: $tweetId") + t + + case r @ Return(GetTweet.Response.NotFound) => + stats.notFoundCounter.incr() + r + + case r @ Return(_) => r + } + .lowerFromTry + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/HardDeleteTweetHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/HardDeleteTweetHandler.scala new file mode 100644 index 000000000..8483926f4 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/HardDeleteTweetHandler.scala @@ -0,0 +1,153 @@ +package com.twitter.tweetypie.storage + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.storage.TweetKey.LKey.ForceAddedStateKey +import com.twitter.tweetypie.storage.TweetStorageClient.HardDeleteTweet +import com.twitter.tweetypie.storage.TweetStorageClient.HardDeleteTweet.Response._ +import com.twitter.tweetypie.storage.TweetUtils._ +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.util.Time +import com.twitter.util.Try + +object HardDeleteTweetHandler { + + /** + * When a tweet is removed lkeys with these prefixes will be deleted permanently. + */ + private[storage] def isKeyToBeDeleted(key: TweetKey): Boolean = + key.lKey match { + case (TweetKey.LKey.CoreFieldsKey | TweetKey.LKey.InternalFieldsKey(_) | + TweetKey.LKey.AdditionalFieldsKey(_) | TweetKey.LKey.SoftDeletionStateKey | + TweetKey.LKey.BounceDeletionStateKey | TweetKey.LKey.UnDeletionStateKey | + TweetKey.LKey.ForceAddedStateKey) => + true + case _ => false + } + + /** + * When hard deleting, there are two actions, writing the record and + * removing the tweet data. If we are performing any action, we will + * always try to remove the tweet data. If the tweet does not yet have a + * hard deletion record, then we will need to write one. This method + * returns the HardDeleted record if it needs to be written, and None + * if it has already been written. + * + * If the tweet is not in a deleted state we signal this with a + * Throw(NotDeleted). + */ + private[storage] def getHardDeleteStateRecord( + tweetId: TweetId, + records: Seq[TweetManhattanRecord], + mhTimestamp: Time, + stats: StatsReceiver + ): Try[Option[TweetStateRecord.HardDeleted]] = { + val mostRecent = TweetStateRecord.mostRecent(records) + val currentStateStr = mostRecent.map(_.name).getOrElse("no_tweet_state_record") + stats.counter(currentStateStr).incr() + + mostRecent match { + case Some( + record @ (TweetStateRecord.SoftDeleted(_, _) | TweetStateRecord.BounceDeleted(_, _))) => + Return( + Some( + TweetStateRecord.HardDeleted( + tweetId = tweetId, + // createdAt is the hard deletion timestamp when dealing with hard deletes in Manhattan + createdAt = mhTimestamp.inMillis, + // deletedAt is the soft deletion timestamp when dealing with hard deletes in Manhattan + deletedAt = record.createdAt + ) + ) + ) + + case Some(_: TweetStateRecord.HardDeleted) => + Return(None) + + case Some(_: TweetStateRecord.ForceAdded) => + Throw(NotDeleted(tweetId, Some(ForceAddedStateKey))) + + case Some(_: TweetStateRecord.Undeleted) => + Throw(NotDeleted(tweetId, Some(TweetKey.LKey.UnDeletionStateKey))) + + case None => + Throw(NotDeleted(tweetId, None)) + } + } + + /** + * This handler returns HardDeleteTweet.Response.Deleted if data associated with the tweet is deleted, + * either as a result of this request or a previous one. + * + * The most recently added record determines the tweet's state. This method will only delete data + * for tweets in the soft-delete or hard-delete state. (Calling hardDeleteTweet for tweets that have + * already been hard-deleted will remove any lkeys that may not have been deleted previously). + */ + def apply( + read: ManhattanOperations.Read, + insert: ManhattanOperations.Insert, + delete: ManhattanOperations.Delete, + scribe: Scribe, + stats: StatsReceiver + ): TweetId => Stitch[HardDeleteTweet.Response] = { + val hardDeleteStats = stats.scope("hardDeleteTweet") + val hardDeleteTweetCancelled = hardDeleteStats.counter("cancelled") + val beforeStateStats = hardDeleteStats.scope("before_state") + + def removeRecords(keys: Seq[TweetKey], mhTimestamp: Time): Stitch[Unit] = + Stitch + .collect(keys.map(key => delete(key, Some(mhTimestamp)).liftToTry)) + .map(collectWithRateLimitCheck) + .lowerFromTry + + def writeRecord(record: Option[TweetStateRecord.HardDeleted]): Stitch[Unit] = + record match { + case Some(r) => + insert(r.toTweetMhRecord).onSuccess { _ => + scribe.logRemoved( + r.tweetId, + Time.fromMilliseconds(r.createdAt), + isSoftDeleted = false + ) + } + case None => Stitch.Unit + } + + tweetId => + read(tweetId) + .flatMap { records => + val hardDeletionTimestamp = Time.now + + val keysToBeDeleted: Seq[TweetKey] = records.map(_.key).filter(isKeyToBeDeleted) + + getHardDeleteStateRecord( + tweetId, + records, + hardDeletionTimestamp, + beforeStateStats) match { + case Return(record) => + Stitch + .join( + writeRecord(record), + removeRecords(keysToBeDeleted, hardDeletionTimestamp) + ).map(_ => + // If the tweetId is non-snowflake and has previously been hard deleted + // there will be no coreData record to fall back on to get the tweet + // creation time and createdAtMillis will be None. + Deleted( + // deletedAtMillis: when the tweet was hard deleted + deletedAtMillis = Some(hardDeletionTimestamp.inMillis), + // createdAtMillis: when the tweet itself was created + // (as opposed to when the deletion record was created) + createdAtMillis = + TweetUtils.creationTimeFromTweetIdOrMHRecords(tweetId, records) + )) + case Throw(notDeleted: NotDeleted) => + hardDeleteTweetCancelled.incr() + Stitch.value(notDeleted) + case Throw(e) => Stitch.exception(e) // this should never happen + } + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/InspectFields.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/InspectFields.scala new file mode 100644 index 000000000..113a749cb --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/InspectFields.scala @@ -0,0 +1,228 @@ +package com.twitter.tweetypie.storage + +import com.google.common.base.CaseFormat +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.scrooge.TFieldBlob +import com.twitter.scrooge.ThriftStructFieldInfo +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.kv._ +import com.twitter.tweetypie.additionalfields.AdditionalFields +import com.twitter.tweetypie.storage.ManhattanOperations.Read +import com.twitter.tweetypie.storage.TweetUtils._ +import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet +import com.twitter.tweetypie.thriftscala.{Tweet => TweetypieTweet} +import com.twitter.util.Duration +import com.twitter.util.Future +import com.twitter.util.Return +import com.twitter.util.Throw +import diffshow.Container +import diffshow.DiffShow +import diffshow.Expr +import org.apache.commons.codec.binary.Base64 +import scala.util.Try +import shapeless.Cached +import shapeless.Strict + +// This class is used by the Tweetypie Console to inspect tweet field content in Manhattan +class InspectFields(svcIdentifier: ServiceIdentifier) { + val mhApplicationId = "tbird_mh" + val mhDatasetName = "tbird_mh" + val mhDestinationName = "/s/manhattan/cylon.native-thrift" + val mhTimeout: Duration = 5000.milliseconds + + val localMhEndpoint: ManhattanKVEndpoint = + ManhattanKVEndpointBuilder( + ManhattanKVClient( + mhApplicationId, + mhDestinationName, + ManhattanKVClientMtlsParams(svcIdentifier))) + .defaultGuarantee(Guarantee.SoftDcReadMyWrites) + .defaultMaxTimeout(mhTimeout) + .build() + + val readOperation: Read = (new ManhattanOperations(mhDatasetName, localMhEndpoint)).read + + def lookup(tweetId: Long): Future[String] = { + val result = readOperation(tweetId).liftToTry.map { + case Return(mhRecords) => + prettyPrintManhattanRecords(tweetId, TweetKey.padTweetIdStr(tweetId), mhRecords) + case Throw(e) => e.toString + } + + Stitch.run(result) + } + + def storedTweet(tweetId: Long): Future[StoredTweet] = { + val result = readOperation(tweetId).liftToTry.map { + case Return(mhRecords) => + buildStoredTweet(tweetId, mhRecords) + case Throw(e) => + throw e + } + + Stitch.run(result) + } + + private[this] def prettyPrintManhattanRecords( + tweetId: Long, + pkey: String, + mhRecords: Seq[TweetManhattanRecord] + ): String = { + if (mhRecords.isEmpty) { + "Not Found" + } else { + val formattedRecords = getFormattedManhattanRecords(tweetId, mhRecords) + val keyFieldWidth = formattedRecords.map(_.key.length).max + 2 + val fieldNameFieldWidth = formattedRecords.map(_.fieldName.length).max + 2 + + val formatString = s" %-${keyFieldWidth}s %-${fieldNameFieldWidth}s %s" + + val recordsString = + formattedRecords + .map { record => + val content = record.content.replaceAll("\n", "\n" + formatString.format("", "", "")) + formatString.format(record.key, record.fieldName, content) + } + .mkString("\n") + + "/tbird_mh/" + pkey + "/" + "\n" + recordsString + } + } + + private[this] def getFormattedManhattanRecords( + tweetId: Long, + mhRecords: Seq[TweetManhattanRecord] + ): Seq[FormattedManhattanRecord] = { + val storedTweet = buildStoredTweet(tweetId, mhRecords).copy(updatedAt = None) + val tweetypieTweet: Option[TweetypieTweet] = + Try(StorageConversions.fromStoredTweet(storedTweet)).toOption + + val blobMap: Map[String, TFieldBlob] = getStoredTweetBlobs(mhRecords).map { blob => + getFieldName(blob.field.id) -> blob + }.toMap + + mhRecords + .map { + case TweetManhattanRecord(fullKey, mhValue) => + FormattedManhattanRecord( + key = fullKey.lKey.toString, + fieldName = getFieldName(fullKey.lKey), + content = prettyPrintManhattanValue( + fullKey.lKey, + mhValue, + storedTweet, + tweetypieTweet, + tweetId, + blobMap + ) + ) + } + .sortBy(_.key.replace("external", "xternal")) // sort by key, with internal first + } + + private[this] def getFieldNameFromThrift( + fieldId: Short, + fieldInfos: List[ThriftStructFieldInfo] + ): String = + fieldInfos + .find(info => info.tfield.id == fieldId) + .map(_.tfield.name) + .getOrElse("") + + private[this] def isLkeyScrubbedField(lkey: String): Boolean = + lkey.split("/")(1) == "scrubbed_fields" + + private[this] def getFieldName(lkey: TweetKey.LKey): String = + lkey match { + case fieldKey: TweetKey.LKey.FieldKey => getFieldName(fieldKey.fieldId) + case _ => "" + } + + private[this] def getFieldName(fieldId: Short): String = + if (fieldId == 1) { + "core_fields" + } else if (AdditionalFields.isAdditionalFieldId(fieldId)) { + getFieldNameFromThrift(fieldId, TweetypieTweet.fieldInfos) + } else { + getFieldNameFromThrift(fieldId, StoredTweet.fieldInfos) + } + + private[this] def prettyPrintManhattanValue( + lkey: TweetKey.LKey, + mhValue: TweetManhattanValue, + storedTweet: StoredTweet, + tweetypieTweet: Option[TweetypieTweet], + tweetId: Long, + tfieldBlobs: Map[String, TFieldBlob] + ): String = { + val decoded = lkey match { + case _: TweetKey.LKey.MetadataKey => + decodeMetadata(mhValue) + + case fieldKey: TweetKey.LKey.FieldKey => + tfieldBlobs + .get(getFieldName(fieldKey.fieldId)) + .map(blob => decodeField(tweetId, blob, storedTweet, tweetypieTweet)) + + case _ => + None + } + + decoded.getOrElse { // If all else fails, encode the data as a base64 string + val contents = mhValue.contents.array + if (contents.isEmpty) { + "" + } else { + Base64.encodeBase64String(contents) + } + } + } + + private[this] def decodeMetadata(mhValue: TweetManhattanValue): Option[String] = { + val byteArray = ByteArrayCodec.fromByteBuffer(mhValue.contents) + Try(Json.decode(byteArray).toString).toOption + } + + private[this] def decodeField( + tweetId: Long, + blob: TFieldBlob, + storedTweet: StoredTweet, + tweetypieTweet: Option[TweetypieTweet] + ): String = { + val fieldId = blob.field.id + + if (fieldId == 1) { + coreFields(storedTweet) + } else if (AdditionalFields.isAdditionalFieldId(fieldId)) { + decodeTweetWithOneField(TweetypieTweet(tweetId).setField(blob)) + } else { + decodeTweetWithOneField(StoredTweet(tweetId).setField(blob)) + } + } + + // Takes a Tweet or StoredTweet with a single field set and returns the value of that field + private[this] def decodeTweetWithOneField[T]( + tweetWithOneField: T + )( + implicit ev: Cached[Strict[DiffShow[T]]] + ): String = { + val config = diffshow.Config(hideFieldWithEmptyVal = true) + val tree: Expr = config.transform(DiffShow.show(tweetWithOneField)) + + // matches a Tweet or StoredTweet with two values, the first being the id + val value = tree.transform { + case Container(_, List(diffshow.Field("id", _), diffshow.Field(_, value))) => value + } + + config.exprPrinter.apply(value, width = 80).render + } + + private[this] def coreFields(storedTweet: StoredTweet): String = + diffshow.show(CoreFieldsCodec.fromTweet(storedTweet), hideFieldWithEmptyVal = true) + + private[this] def toCamelCase(s: String): String = + CaseFormat.LOWER_UNDERSCORE.to(CaseFormat.LOWER_CAMEL, s) +} + +case class FormattedManhattanRecord(key: String, fieldName: String, content: String) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Json.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Json.scala new file mode 100644 index 000000000..e5f087a34 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Json.scala @@ -0,0 +1,17 @@ +package com.twitter.tweetypie.storage + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule + +object Json { + val TimestampKey = "timestamp" + val SoftDeleteTimestampKey = "softdelete_timestamp" + + private val mapper = new ObjectMapper + mapper.registerModule(DefaultScalaModule) + + def encode(m: Map[String, Any]): Array[Byte] = mapper.writeValueAsBytes(m) + + def decode(arr: Array[Byte]): Map[String, Any] = + mapper.readValue[Map[String, Any]](arr, classOf[Map[String, Any]]) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanOperations.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanOperations.scala new file mode 100644 index 000000000..fed0af6c7 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanOperations.scala @@ -0,0 +1,103 @@ +package com.twitter.tweetypie.storage + +import com.twitter.bijection.Injection +import com.twitter.io.Buf +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.bijections.Bijections.BufInjection +import com.twitter.storage.client.manhattan.kv.ManhattanKVEndpoint +import com.twitter.storage.client.manhattan.kv.impl.DescriptorP1L1 +import com.twitter.storage.client.manhattan.kv.impl.Component +import com.twitter.storage.client.manhattan.kv.{impl => mh} +import com.twitter.storage.client.manhattan.bijections.Bijections.StringInjection +import com.twitter.util.Time +import java.nio.ByteBuffer +import scala.util.control.NonFatal + +case class TweetManhattanRecord(key: TweetKey, value: TweetManhattanValue) { + def pkey: TweetId = key.tweetId + def lkey: TweetKey.LKey = key.lKey + + /** + * Produces a representation that is human-readable, but contains + * all of the information from the record. It is not intended for + * producing machine-readable values. + * + * This conversion is relatively expensive, so beware of using it in + * hot code paths. + */ + override def toString: String = { + val valueString = + try { + key.lKey match { + case _: TweetKey.LKey.MetadataKey => + StringCodec.fromByteBuffer(value.contents) + + case _: TweetKey.LKey.FieldKey => + val tFieldBlob = TFieldBlobCodec.fromByteBuffer(value.contents) + s"TFieldBlob(${tFieldBlob.field}, 0x${Buf.slowHexString(tFieldBlob.content)})" + + case TweetKey.LKey.Unknown(_) => + "0x" + Buf.slowHexString(Buf.ByteBuffer.Shared(value.contents)) + } + } catch { + case NonFatal(e) => + val hexValue = Buf.slowHexString(Buf.ByteBuffer.Shared(value.contents)) + s"0x$hexValue (failed to decode due to $e)" + } + + s"$key => ${value.copy(contents = valueString)}" + } +} + +object ManhattanOperations { + type Read = TweetId => Stitch[Seq[TweetManhattanRecord]] + type Insert = TweetManhattanRecord => Stitch[Unit] + type Delete = (TweetKey, Option[Time]) => Stitch[Unit] + type DeleteRange = TweetId => Stitch[Unit] + + object PkeyInjection extends Injection[TweetId, String] { + override def apply(tweetId: TweetId): String = TweetKey.padTweetIdStr(tweetId) + override def invert(str: String): scala.util.Try[TweetId] = scala.util.Try(str.toLong) + } + + case class InvalidLkey(lkeyStr: String) extends Exception + + object LkeyInjection extends Injection[TweetKey.LKey, String] { + override def apply(lkey: TweetKey.LKey): String = lkey.toString + override def invert(str: String): scala.util.Try[TweetKey.LKey] = + scala.util.Success(TweetKey.LKey.fromString(str)) + } + + val KeyDescriptor: DescriptorP1L1.EmptyKey[TweetId, TweetKey.LKey] = + mh.KeyDescriptor( + Component(PkeyInjection.andThen(StringInjection)), + Component(LkeyInjection.andThen(StringInjection)) + ) + + val ValueDescriptor: mh.ValueDescriptor.EmptyValue[ByteBuffer] = mh.ValueDescriptor(BufInjection) +} + +class ManhattanOperations(dataset: String, mhEndpoint: ManhattanKVEndpoint) { + import ManhattanOperations._ + + private[this] def pkey(tweetId: TweetId) = KeyDescriptor.withDataset(dataset).withPkey(tweetId) + + def read: Read = { tweetId => + mhEndpoint.slice(pkey(tweetId).under(), ValueDescriptor).map { mhData => + mhData.map { + case (key, value) => TweetManhattanRecord(TweetKey(key.pkey, key.lkey), value) + } + } + } + + def insert: Insert = + record => { + val mhKey = pkey(record.key.tweetId).withLkey(record.key.lKey) + mhEndpoint.insert(mhKey, ValueDescriptor.withValue(record.value)) + } + + def delete: Delete = (key, time) => mhEndpoint.delete(pkey(key.tweetId).withLkey(key.lKey), time) + + def deleteRange: DeleteRange = + tweetId => mhEndpoint.deleteRange(KeyDescriptor.withDataset(dataset).withPkey(tweetId).under()) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanTweetStorageClient.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanTweetStorageClient.scala new file mode 100644 index 000000000..daf6a3076 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanTweetStorageClient.scala @@ -0,0 +1,451 @@ +package com.twitter.tweetypie.storage + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.mtls.authentication.EmptyServiceIdentifier +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.ssl.OpportunisticTls +import com.twitter.finagle.stats.NullStatsReceiver +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.logging.BareFormatter +import com.twitter.logging.Level +import com.twitter.logging.ScribeHandler +import com.twitter.logging._ +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.bijections.Bijections._ +import com.twitter.storage.client.manhattan.kv._ +import com.twitter.storage.client.manhattan.kv.impl.ValueDescriptor +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.storage.Scribe.ScribeHandlerFactory +import com.twitter.tweetypie.storage.TweetStorageClient.BounceDelete +import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet +import com.twitter.tweetypie.storage.TweetStorageClient.HardDeleteTweet +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.tweetypie.util.StitchUtils +import com.twitter.util.Duration +import com.twitter.util.Return +import com.twitter.util.Throw +import scala.util.Random + +object ManhattanTweetStorageClient { + object Config { + + /** + * The Manhattan dataset where tweets are stored is not externally + * configurable because writing tweets to a non-production dataset + * requires great care. Staging instances using a different dataset will + * write tweets to a non-production store, but will publish events, log to + * HDFS, and cache data referencing tweets in that store which are not + * accessible by the rest of the production cluster. + * + * In a completely isolated environment it should be safe to write to + * other datasets for testing purposes. + */ + val Dataset = "tbird_mh" + + /** + * Once a tweet has been deleted it can only be undeleted within this time + * window, after which [[UndeleteHandler]] will return an error on + * undelete attempts. + */ + val UndeleteWindowHours = 240 + + /** + * Default label used for underlying Manhattan Thrift client metrics + * + * The finagle client metrics will be exported at clnt/:label. + */ + val ThriftClientLabel = "mh_cylon" + + /** + * Return the corresponding Wily path for the Cylon cluster in the "other" DC + */ + def remoteDestination(zone: String): String = + s"/srv#/prod/${remoteZone(zone)}/manhattan/cylon.native-thrift" + + private def remoteZone(zone: String) = zone match { + case "pdxa" => "atla" + case "atla" | "localhost" => "pdxa" + case _ => + throw new IllegalArgumentException(s"Cannot configure remote DC for unknown zone '$zone'") + } + } + + /** + * @param applicationId Manhattan application id used for quota accounting + * @param localDestination Wily path to local Manhattan cluster + * @param localTimeout Overall timeout (including retries) for all reads/writes to local cluster + * @param remoteDestination Wily path to remote Manhattan cluster, used for undelete and force add + * @param remoteTimeout Overall timeout (including retries) for all reads/writes to remote cluster + * @param undeleteWindowHours Amount of time during which a deleted tweet can be undeleted + * @param thriftClientLabel Label used to scope stats for Manhattan Thrift client + * @param maxRequestsPerBatch Configure the Stitch RequestGroup.Generator batch size + * @param serviceIdentifier The ServiceIdentifier to use when making connections to a Manhattan cluster + * @param opportunisticTlsLevel The level to use for opportunistic TLS for connections to the Manhattan cluster + */ + case class Config( + applicationId: String, + localDestination: String, + localTimeout: Duration, + remoteDestination: String, + remoteTimeout: Duration, + undeleteWindowHours: Int = Config.UndeleteWindowHours, + thriftClientLabel: String = Config.ThriftClientLabel, + maxRequestsPerBatch: Int = Int.MaxValue, + serviceIdentifier: ServiceIdentifier, + opportunisticTlsLevel: OpportunisticTls.Level) + + /** + * Sanitizes the input for APIs which take in a (Tweet, Seq[Field]) as input. + * + * NOTE: This function only applies sanity checks which are common to + * all APIs which take in a (Tweet, Seq[Field]) as input. API specific + * checks are not covered here. + * + * @param apiStitch the backing API call + * @tparam T the output type of the backing API call + * @return a stitch function which does some basic input sanity checking + */ + private[storage] def sanitizeTweetFields[T]( + apiStitch: (Tweet, Seq[Field]) => Stitch[T] + ): (Tweet, Seq[Field]) => Stitch[T] = + (tweet, fields) => { + require(fields.forall(_.id > 0), s"Field ids ${fields} are not positive numbers") + apiStitch(tweet, fields) + } + + // Returns a handler that asynchronously logs messages to Scribe using the BareFormatter which + // logs just the message without any additional metadata + def scribeHandler(categoryName: String): HandlerFactory = + ScribeHandler( + formatter = BareFormatter, + maxMessagesPerTransaction = 100, + category = categoryName, + level = Some(Level.TRACE) + ) + + /** + * A Config appropriate for interactive sessions and scripts. + */ + def develConfig(): Config = + Config( + applicationId = Option(System.getenv("USER")).getOrElse("") + ".devel", + localDestination = "/s/manhattan/cylon.native-thrift", + localTimeout = 10.seconds, + remoteDestination = "/s/manhattan/cylon.native-thrift", + remoteTimeout = 10.seconds, + undeleteWindowHours = Config.UndeleteWindowHours, + thriftClientLabel = Config.ThriftClientLabel, + maxRequestsPerBatch = Int.MaxValue, + serviceIdentifier = ServiceIdentifier(System.getenv("USER"), "tweetypie", "devel", "local"), + opportunisticTlsLevel = OpportunisticTls.Required + ) + + /** + * Build a Manhattan tweet storage client for use in interactive + * sessions and scripts. + */ + def devel(): TweetStorageClient = + new ManhattanTweetStorageClient( + develConfig(), + NullStatsReceiver, + ClientIdHelper.default, + ) +} + +class ManhattanTweetStorageClient( + config: ManhattanTweetStorageClient.Config, + statsReceiver: StatsReceiver, + private val clientIdHelper: ClientIdHelper) + extends TweetStorageClient { + import ManhattanTweetStorageClient._ + + lazy val scribeHandlerFactory: ScribeHandlerFactory = scribeHandler _ + val scribe: Scribe = new Scribe(scribeHandlerFactory, statsReceiver) + + def mkClient( + dest: String, + label: String + ): ManhattanKVClient = { + val mhMtlsParams = + if (config.serviceIdentifier == EmptyServiceIdentifier) NoMtlsParams + else + ManhattanKVClientMtlsParams( + serviceIdentifier = config.serviceIdentifier, + opportunisticTls = config.opportunisticTlsLevel + ) + + new ManhattanKVClient( + config.applicationId, + dest, + mhMtlsParams, + label, + Seq(Experiments.ApertureLoadBalancer)) + } + + val localClient: ManhattanKVClient = mkClient(config.localDestination, config.thriftClientLabel) + + val localMhEndpoint: ManhattanKVEndpoint = ManhattanKVEndpointBuilder(localClient) + .defaultGuarantee(Guarantee.SoftDcReadMyWrites) + .defaultMaxTimeout(config.localTimeout) + .maxRequestsPerBatch(config.maxRequestsPerBatch) + .build() + + val localManhattanOperations = new ManhattanOperations(Config.Dataset, localMhEndpoint) + + val remoteClient: ManhattanKVClient = + mkClient(config.remoteDestination, s"${config.thriftClientLabel}_remote") + + val remoteMhEndpoint: ManhattanKVEndpoint = ManhattanKVEndpointBuilder(remoteClient) + .defaultGuarantee(Guarantee.SoftDcReadMyWrites) + .defaultMaxTimeout(config.remoteTimeout) + .build() + + val remoteManhattanOperations = new ManhattanOperations(Config.Dataset, remoteMhEndpoint) + + /** + * Note: This translation is only useful for non-batch endpoints. Batch endpoints currently + * represent failure without propagating an exception + * (e.g. [[com.twitter.tweetypie.storage.Response.TweetResponseCode.Failure]]). + */ + private[this] def translateExceptions( + apiName: String, + statsReceiver: StatsReceiver + ): PartialFunction[Throwable, Throwable] = { + case e: IllegalArgumentException => ClientError(e.getMessage, e) + case e: DeniedManhattanException => RateLimited(e.getMessage, e) + case e: VersionMismatchError => + statsReceiver.scope(apiName).counter("mh_version_mismatches").incr() + e + case e: InternalError => + TweetUtils.log.error(e, s"Error processing $apiName request: ${e.getMessage}") + e + } + + /** + * Count requests per client id producing metrics of the form + * .../clients/:root_client_id/requests + */ + def observeClientId[A, B]( + apiStitch: A => Stitch[B], + statsReceiver: StatsReceiver, + clientIdHelper: ClientIdHelper, + ): A => Stitch[B] = { + val clients = statsReceiver.scope("clients") + + val incrementClientRequests = { args: A => + val clientId = clientIdHelper.effectiveClientIdRoot.getOrElse(ClientIdHelper.UnknownClientId) + clients.counter(clientId, "requests").incr + } + + a => { + incrementClientRequests(a) + apiStitch(a) + } + } + + /** + * Increment counters based on the overall response status of the returned [[GetTweet.Response]]. + */ + def observeGetTweetResponseCode[A]( + apiStitch: A => Stitch[GetTweet.Response], + statsReceiver: StatsReceiver + ): A => Stitch[GetTweet.Response] = { + val scope = statsReceiver.scope("response_code") + + val success = scope.counter("success") + val notFound = scope.counter("not_found") + val failure = scope.counter("failure") + val overCapacity = scope.counter("over_capacity") + val deleted = scope.counter("deleted") + val bounceDeleted = scope.counter("bounce_deleted") + + a => + apiStitch(a).respond { + case Return(_: GetTweet.Response.Found) => success.incr() + case Return(GetTweet.Response.NotFound) => notFound.incr() + case Return(_: GetTweet.Response.BounceDeleted) => bounceDeleted.incr() + case Return(GetTweet.Response.Deleted) => deleted.incr() + case Throw(_: RateLimited) => overCapacity.incr() + case Throw(_) => failure.incr() + } + } + + /** + * We do 3 things here: + * + * - Bookkeeping for overall requests + * - Bookkeeping for per api requests + * - Translate exceptions + * + * @param apiName the API being called + * @param apiStitch the implementation of the API + * @tparam A template for input type of API + * @tparam B template for output type of API + * @return Function which executes the given API call + */ + private[storage] def endpoint[A, B]( + apiName: String, + apiStitch: A => Stitch[B] + ): A => Stitch[B] = { + val translateException = translateExceptions(apiName, statsReceiver) + val observe = StitchUtils.observe[B](statsReceiver, apiName) + + a => + StitchUtils.translateExceptions( + observe(apiStitch(a)), + translateException + ) + } + + private[storage] def endpoint2[A, B, C]( + apiName: String, + apiStitch: (A, B) => Stitch[C], + clientIdHelper: ClientIdHelper, + ): (A, B) => Stitch[C] = + Function.untupled(endpoint(apiName, apiStitch.tupled)) + + val getTweet: TweetStorageClient.GetTweet = { + val stats = statsReceiver.scope("getTweet") + + observeClientId( + observeGetTweetResponseCode( + endpoint( + "getTweet", + GetTweetHandler( + read = localManhattanOperations.read, + statsReceiver = stats, + ) + ), + stats, + ), + stats, + clientIdHelper, + ) + } + + val getStoredTweet: TweetStorageClient.GetStoredTweet = { + val stats = statsReceiver.scope("getStoredTweet") + + observeClientId( + endpoint( + "getStoredTweet", + GetStoredTweetHandler( + read = localManhattanOperations.read, + statsReceiver = stats, + ) + ), + stats, + clientIdHelper, + ) + } + + val addTweet: TweetStorageClient.AddTweet = + endpoint( + "addTweet", + AddTweetHandler( + insert = localManhattanOperations.insert, + scribe = scribe, + stats = statsReceiver + ) + ) + + val updateTweet: TweetStorageClient.UpdateTweet = + endpoint2( + "updateTweet", + ManhattanTweetStorageClient.sanitizeTweetFields( + UpdateTweetHandler( + insert = localManhattanOperations.insert, + stats = statsReceiver, + ) + ), + clientIdHelper, + ) + + val softDelete: TweetStorageClient.SoftDelete = + endpoint( + "softDelete", + SoftDeleteHandler( + insert = localManhattanOperations.insert, + scribe = scribe + ) + ) + + val bounceDelete: BounceDelete = + endpoint( + "bounceDelete", + BounceDeleteHandler( + insert = localManhattanOperations.insert, + scribe = scribe + ) + ) + + val undelete: TweetStorageClient.Undelete = + endpoint( + "undelete", + UndeleteHandler( + read = localManhattanOperations.read, + localInsert = localManhattanOperations.insert, + remoteInsert = remoteManhattanOperations.insert, + delete = localManhattanOperations.delete, + undeleteWindowHours = config.undeleteWindowHours, + stats = statsReceiver + ) + ) + + val getDeletedTweets: TweetStorageClient.GetDeletedTweets = + endpoint( + "getDeletedTweets", + GetDeletedTweetsHandler( + read = localManhattanOperations.read, + stats = statsReceiver + ) + ) + + val deleteAdditionalFields: TweetStorageClient.DeleteAdditionalFields = + endpoint2( + "deleteAdditionalFields", + DeleteAdditionalFieldsHandler( + delete = localManhattanOperations.delete, + stats = statsReceiver, + ), + clientIdHelper, + ) + + val scrub: TweetStorageClient.Scrub = + endpoint2( + "scrub", + ScrubHandler( + insert = localManhattanOperations.insert, + delete = localManhattanOperations.delete, + scribe = scribe, + stats = statsReceiver, + ), + clientIdHelper, + ) + + val hardDeleteTweet: HardDeleteTweet = + endpoint( + "hardDeleteTweet", + HardDeleteTweetHandler( + read = localManhattanOperations.read, + insert = localManhattanOperations.insert, + delete = localManhattanOperations.delete, + scribe = scribe, + stats = statsReceiver + ) + ) + + val ping: TweetStorageClient.Ping = + () => + Stitch + .run( + localMhEndpoint + .get( + ManhattanOperations.KeyDescriptor + .withDataset(Config.Dataset) + .withPkey(Random.nextLong().abs) + .withLkey(TweetKey.LKey.CoreFieldsKey), // could be any lkey + ValueDescriptor(BufInjection) + ).unit + ) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Response.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Response.scala new file mode 100644 index 000000000..8444a7d96 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Response.scala @@ -0,0 +1,30 @@ +package com.twitter.tweetypie.storage + +object Response { + case class TweetResponse( + tweetId: Long, + overallResponse: TweetResponseCode, + additionalFieldResponses: Option[Map[Short, FieldResponse]] = None) + + sealed trait TweetResponseCode + + object TweetResponseCode { + object Success extends TweetResponseCode + object Partial extends TweetResponseCode + object Failure extends TweetResponseCode + object OverCapacity extends TweetResponseCode + object Deleted extends TweetResponseCode + } + + case class FieldResponse(code: FieldResponseCode, message: Option[String] = None) + + sealed trait FieldResponseCode + + object FieldResponseCode { + object Success extends FieldResponseCode + object InvalidRequest extends FieldResponseCode + object ValueNotFound extends FieldResponseCode + object Timeout extends FieldResponseCode + object Error extends FieldResponseCode + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Scribe.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Scribe.scala new file mode 100644 index 000000000..89b3e8efc --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Scribe.scala @@ -0,0 +1,85 @@ +package com.twitter.tweetypie.storage + +import com.twitter.servo.util.FutureEffect +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.logging._ +import com.twitter.scrooge.BinaryThriftStructSerializer +import com.twitter.servo.util.{Scribe => ServoScribe} +import com.twitter.tweetypie.storage_internal.thriftscala._ +import com.twitter.tbird.thriftscala.Added +import com.twitter.tbird.thriftscala.Removed +import com.twitter.tbird.thriftscala.Scrubbed +import com.twitter.util.Time + +/** + * Scribe is used to log tweet writes which are used to generate /tables/statuses in HDFS. + * + * Write Scribe Category Message + * ----- --------------- ------- + * add tbird_add_status [[com.twitter.tbird.thriftscala.Added]] + * remove tbird_remove_status [[com.twitter.tbird.thriftscala.Removed]] + * scrub tbird_scrub_status [[com.twitter.tbird.thriftscala.Scrubbed]] + * + * The thrift representation is encoded using binary thrift protocol format, followed by base64 + * encoding and converted to string using default character set (utf8). The logger uses BareFormatter. + * + * The thrift ops are scribed only after the write API call has succeeded. + * + * The class is thread safe except initial configuration and registration routines, + * and no exception is expected unless java heap is out of memory. + * + * If exception does get thrown, add/remove/scrub operations will fail and + * client will have to retry + */ +class Scribe(factory: Scribe.ScribeHandlerFactory, statsReceiver: StatsReceiver) { + import Scribe._ + + private val AddedSerializer = BinaryThriftStructSerializer(Added) + private val RemovedSerializer = BinaryThriftStructSerializer(Removed) + private val ScrubbedSerializer = BinaryThriftStructSerializer(Scrubbed) + + private val addCounter = statsReceiver.counter("scribe/add/count") + private val removeCounter = statsReceiver.counter("scribe/remove/count") + private val scrubCounter = statsReceiver.counter("scribe/scrub/count") + + val addHandler: FutureEffect[String] = ServoScribe(factory(scribeAddedCategory)()) + val removeHandler: FutureEffect[String] = ServoScribe(factory(scribeRemovedCategory)()) + val scrubHandler: FutureEffect[String] = ServoScribe(factory(scribeScrubbedCategory)()) + + private def addedToString(tweet: StoredTweet): String = + AddedSerializer.toString( + Added(StatusConversions.toTBirdStatus(tweet), Time.now.inMilliseconds, Some(false)) + ) + + private def removedToString(id: Long, at: Time, isSoftDeleted: Boolean): String = + RemovedSerializer.toString(Removed(id, at.inMilliseconds, Some(isSoftDeleted))) + + private def scrubbedToString(id: Long, cols: Seq[Int], at: Time): String = + ScrubbedSerializer.toString(Scrubbed(id, cols, at.inMilliseconds)) + + def logAdded(tweet: StoredTweet): Unit = { + addHandler(addedToString(tweet)) + addCounter.incr() + } + + def logRemoved(id: Long, at: Time, isSoftDeleted: Boolean): Unit = { + removeHandler(removedToString(id, at, isSoftDeleted)) + removeCounter.incr() + } + + def logScrubbed(id: Long, cols: Seq[Int], at: Time): Unit = { + scrubHandler(scrubbedToString(id, cols, at)) + scrubCounter.incr() + } +} + +object Scribe { + type ScribeHandlerFactory = (String) => HandlerFactory + + /** WARNING: These categories are white-listed. If you are changing them, the new categories should be white-listed. + * You should followup with CoreWorkflows team (CW) for that. + */ + private val scribeAddedCategory = "tbird_add_status" + private val scribeRemovedCategory = "tbird_remove_status" + private val scribeScrubbedCategory = "tbird_scrub_status" +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ScrubHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ScrubHandler.scala new file mode 100644 index 000000000..7bbae6251 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ScrubHandler.scala @@ -0,0 +1,71 @@ +package com.twitter.tweetypie.storage + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.kv.ManhattanValue +import com.twitter.tweetypie.storage.TweetUtils._ +import com.twitter.util.Time + +/** + * Deletes data for the scrubbed field and writes a metadata record. + * Provides scrub functionality. Right now, we only allow the scrubbing of the geo field. + * It should be simple to add more fields to the allowlist if needed. + */ +object ScrubHandler { + + val scrubFieldsAllowlist: Set[Field] = Set(Field.Geo) + + def apply( + insert: ManhattanOperations.Insert, + delete: ManhattanOperations.Delete, + scribe: Scribe, + stats: StatsReceiver + ): TweetStorageClient.Scrub = + (unfilteredTweetIds: Seq[TweetId], columns: Seq[Field]) => { + val tweetIds = unfilteredTweetIds.filter(_ > 0) + + require(columns.nonEmpty, "Must specify fields to scrub") + require( + columns.toSet.size == columns.size, + s"Duplicate fields to scrub specified: $columns" + ) + require( + columns.forall(scrubFieldsAllowlist.contains(_)), + s"Cannot scrub $columns; scrubbable fields are restricted to $scrubFieldsAllowlist" + ) + + Stats.addWidthStat("scrub", "ids", tweetIds.size, stats) + val mhTimestamp = Time.now + + val stitches = tweetIds.map { tweetId => + val deletionStitches = columns.map { field => + val mhKeyToDelete = TweetKey.fieldKey(tweetId, field.id) + delete(mhKeyToDelete, Some(mhTimestamp)).liftToTry + } + + val collectedStitch = + Stitch.collect(deletionStitches).map(collectWithRateLimitCheck).lowerFromTry + + collectedStitch + .flatMap { _ => + val scrubbedStitches = columns.map { column => + val scrubbedKey = TweetKey.scrubbedFieldKey(tweetId, column.id) + val record = + TweetManhattanRecord( + scrubbedKey, + ManhattanValue(StringCodec.toByteBuffer(""), Some(mhTimestamp)) + ) + + insert(record).liftToTry + } + + Stitch.collect(scrubbedStitches) + } + .map(collectWithRateLimitCheck) + } + + Stitch.collect(stitches).map(collectWithRateLimitCheck).lowerFromTry.onSuccess { _ => + tweetIds.foreach { id => scribe.logScrubbed(id, columns.map(_.id.toInt), mhTimestamp) } + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/SoftDeleteHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/SoftDeleteHandler.scala new file mode 100644 index 000000000..ea350ccb9 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/SoftDeleteHandler.scala @@ -0,0 +1,20 @@ +package com.twitter.tweetypie.storage + +import com.twitter.util.Time + +object SoftDeleteHandler { + def apply( + insert: ManhattanOperations.Insert, + scribe: Scribe + ): TweetStorageClient.SoftDelete = + tweetId => { + val mhTimestamp = Time.now + val softDeleteRecord = TweetStateRecord + .SoftDeleted(tweetId, mhTimestamp.inMillis) + .toTweetMhRecord + + insert(softDeleteRecord).onSuccess { _ => + scribe.logRemoved(tweetId, mhTimestamp, isSoftDeleted = true) + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Stats.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Stats.scala new file mode 100644 index 000000000..87d8b41a1 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Stats.scala @@ -0,0 +1,33 @@ +package com.twitter.tweetypie.storage + +import com.twitter.finagle.stats.StatsReceiver + +object Stats { + // These two methods below (addWidthStat and updatePerFieldQpsCounters) are called per RPC call for most APIs, + // so we rely on the stats receiver that is passed in to the library to do memoization. + + private[storage] def addWidthStat( + rpcName: String, + paramName: String, + width: Int, + stats: StatsReceiver + ): Unit = + getStat(rpcName, paramName, stats).add(width) + + // Updates the counters for each Additional field. The idea here is to expose the QPS for each + // additional field + private[storage] def updatePerFieldQpsCounters( + rpcName: String, + fieldIds: Seq[FieldId], + count: Int, + stats: StatsReceiver + ): Unit = { + fieldIds.foreach { fieldId => getCounter(rpcName, fieldId, stats).incr(count) } + } + + private def getCounter(rpcName: String, fieldId: FieldId, stats: StatsReceiver) = + stats.scope(rpcName, "fields", fieldId.toString).counter("count") + + private def getStat(rpcName: String, paramName: String, stats: StatsReceiver) = + stats.scope(rpcName, paramName).stat("width") +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StatusConversions.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StatusConversions.scala new file mode 100644 index 000000000..77dfed9ba --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StatusConversions.scala @@ -0,0 +1,129 @@ +package com.twitter.tweetypie.storage + +import com.twitter.tweetypie.storage_internal.thriftscala._ +import com.twitter.tbird.{thriftscala => tbird} + +object StatusConversions { + + /** + * This is used only in Scribe.scala, when scribing to tbird_add_status + * Once we remove that, we can also remove this. + */ + def toTBirdStatus(tweet: StoredTweet): tbird.Status = + tbird.Status( + id = tweet.id, + userId = tweet.userId.get, + text = tweet.text.get, + createdVia = tweet.createdVia.get, + createdAtSec = tweet.createdAtSec.get, + reply = tweet.reply.map(toTBirdReply), + share = tweet.share.map(toTBirdShare), + contributorId = tweet.contributorId, + geo = tweet.geo.map(toTBirdGeo), + hasTakedown = tweet.hasTakedown.getOrElse(false), + nsfwUser = tweet.nsfwUser.getOrElse(false), + nsfwAdmin = tweet.nsfwAdmin.getOrElse(false), + media = tweet.media.map(_.map(toTBirdMedia)).getOrElse(Seq()), + narrowcast = tweet.narrowcast.map(toTBirdNarrowcast), + nullcast = tweet.nullcast.getOrElse(false), + trackingId = tweet.trackingId + ) + + /** + * This is only used in a test, to verify that the above method `toTBirdStatus` + * works, so we can't remove it as long as the above method exists. + */ + def fromTBirdStatus(status: tbird.Status): StoredTweet = { + StoredTweet( + id = status.id, + userId = Some(status.userId), + text = Some(status.text), + createdVia = Some(status.createdVia), + createdAtSec = Some(status.createdAtSec), + reply = status.reply.map(fromTBirdReply), + share = status.share.map(fromTBirdShare), + contributorId = status.contributorId, + geo = status.geo.map(fromTBirdGeo), + hasTakedown = Some(status.hasTakedown), + nsfwUser = Some(status.nsfwUser), + nsfwAdmin = Some(status.nsfwAdmin), + media = Some(status.media.map(fromTBirdMedia)), + narrowcast = status.narrowcast.map(fromTBirdNarrowcast), + nullcast = Some(status.nullcast), + trackingId = status.trackingId + ) + } + + private def fromTBirdReply(reply: tbird.Reply): StoredReply = + StoredReply( + inReplyToStatusId = reply.inReplyToStatusId, + inReplyToUserId = reply.inReplyToUserId + ) + + private def fromTBirdShare(share: tbird.Share): StoredShare = + StoredShare( + sourceStatusId = share.sourceStatusId, + sourceUserId = share.sourceUserId, + parentStatusId = share.parentStatusId + ) + + private def fromTBirdGeo(geo: tbird.Geo): StoredGeo = + StoredGeo( + latitude = geo.latitude, + longitude = geo.longitude, + geoPrecision = geo.geoPrecision, + entityId = geo.entityId + ) + + private def fromTBirdMedia(media: tbird.MediaEntity): StoredMediaEntity = + StoredMediaEntity( + id = media.id, + mediaType = media.mediaType, + width = media.width, + height = media.height + ) + + private def fromTBirdNarrowcast(narrowcast: tbird.Narrowcast): StoredNarrowcast = + StoredNarrowcast( + language = Some(narrowcast.language), + location = Some(narrowcast.location), + ids = Some(narrowcast.ids) + ) + + private def toTBirdReply(reply: StoredReply): tbird.Reply = + tbird.Reply( + inReplyToStatusId = reply.inReplyToStatusId, + inReplyToUserId = reply.inReplyToUserId + ) + + private def toTBirdShare(share: StoredShare): tbird.Share = + tbird.Share( + sourceStatusId = share.sourceStatusId, + sourceUserId = share.sourceUserId, + parentStatusId = share.parentStatusId + ) + + private def toTBirdGeo(geo: StoredGeo): tbird.Geo = + tbird.Geo( + latitude = geo.latitude, + longitude = geo.longitude, + geoPrecision = geo.geoPrecision, + entityId = geo.entityId, + name = geo.name + ) + + private def toTBirdMedia(media: StoredMediaEntity): tbird.MediaEntity = + tbird.MediaEntity( + id = media.id, + mediaType = media.mediaType, + width = media.width, + height = media.height + ) + + private def toTBirdNarrowcast(narrowcast: StoredNarrowcast): tbird.Narrowcast = + tbird.Narrowcast( + language = narrowcast.language.getOrElse(Nil), + location = narrowcast.location.getOrElse(Nil), + ids = narrowcast.ids.getOrElse(Nil) + ) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StorageConversions.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StorageConversions.scala new file mode 100644 index 000000000..d424a8817 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StorageConversions.scala @@ -0,0 +1,346 @@ +package com.twitter.tweetypie.storage + +import com.twitter.mediaservices.commons.tweetmedia.thriftscala._ +import com.twitter.scrooge.TFieldBlob +import com.twitter.tweetypie.additionalfields.AdditionalFields +import com.twitter.tweetypie.storage_internal.thriftscala._ +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.util.TweetLenses + +object StorageConversions { + private val tbTweetCompiledAdditionalFieldIds = + StoredTweet.metaData.fields.map(_.id).filter(AdditionalFields.isAdditionalFieldId) + + def toStoredReply(reply: Reply, conversationId: Option[TweetId]): StoredReply = + StoredReply( + inReplyToStatusId = reply.inReplyToStatusId.getOrElse(0), + inReplyToUserId = reply.inReplyToUserId, + conversationId = conversationId + ) + + def toStoredShare(share: Share): StoredShare = + StoredShare( + share.sourceStatusId, + share.sourceUserId, + share.parentStatusId + ) + + def toStoredQuotedTweet(qt: QuotedTweet, text: String): Option[StoredQuotedTweet] = + qt.permalink + .filterNot { p => + text.contains(p.shortUrl) + } // omit StoredQuotedTweet when url already in text + .map { p => + StoredQuotedTweet( + qt.tweetId, + qt.userId, + p.shortUrl + ) + } + + def toStoredGeo(tweet: Tweet): Option[StoredGeo] = + TweetLenses.geoCoordinates.get(tweet) match { + case None => + TweetLenses.placeId.get(tweet) match { + case None => None + case Some(placeId) => + Some( + StoredGeo( + latitude = 0.0, + longitude = 0.0, + geoPrecision = 0, + entityId = 0, + name = Some(placeId) + ) + ) + } + case Some(coords) => + Some( + StoredGeo( + latitude = coords.latitude, + longitude = coords.longitude, + geoPrecision = coords.geoPrecision, + entityId = if (coords.display) 2 else 0, + name = TweetLenses.placeId.get(tweet) + ) + ) + } + + def toStoredMedia(mediaList: Seq[MediaEntity]): Seq[StoredMediaEntity] = + mediaList.filter(_.sourceStatusId.isEmpty).flatMap(toStoredMediaEntity) + + def toStoredMediaEntity(media: MediaEntity): Option[StoredMediaEntity] = + media.sizes.find(_.sizeType == MediaSizeType.Orig).map { origSize => + StoredMediaEntity( + id = media.mediaId, + mediaType = origSize.deprecatedContentType.value.toByte, + width = origSize.width.toShort, + height = origSize.height.toShort + ) + } + + // The language and ids fields are for compatibility with existing tweets stored in manhattan. + def toStoredNarrowcast(narrowcast: Narrowcast): StoredNarrowcast = + StoredNarrowcast( + language = Some(Seq.empty), + location = Some(narrowcast.location), + ids = Some(Seq.empty) + ) + + def toStoredAdditionalFields(from: Seq[TFieldBlob], to: StoredTweet): StoredTweet = + from.foldLeft(to) { case (t, f) => t.setField(f) } + + def toStoredAdditionalFields(from: Tweet, to: StoredTweet): StoredTweet = + toStoredAdditionalFields(AdditionalFields.additionalFields(from), to) + + def toStoredTweet(tweet: Tweet): StoredTweet = { + val storedTweet = + StoredTweet( + id = tweet.id, + userId = Some(TweetLenses.userId(tweet)), + text = Some(TweetLenses.text(tweet)), + createdVia = Some(TweetLenses.createdVia(tweet)), + createdAtSec = Some(TweetLenses.createdAt(tweet)), + reply = + TweetLenses.reply(tweet).map { r => toStoredReply(r, TweetLenses.conversationId(tweet)) }, + share = TweetLenses.share(tweet).map(toStoredShare), + contributorId = tweet.contributor.map(_.userId), + geo = toStoredGeo(tweet), + hasTakedown = Some(TweetLenses.hasTakedown(tweet)), + nsfwUser = Some(TweetLenses.nsfwUser(tweet)), + nsfwAdmin = Some(TweetLenses.nsfwAdmin(tweet)), + media = tweet.media.map(toStoredMedia), + narrowcast = TweetLenses.narrowcast(tweet).map(toStoredNarrowcast), + nullcast = Some(TweetLenses.nullcast(tweet)), + trackingId = TweetLenses.trackingId(tweet), + quotedTweet = TweetLenses.quotedTweet(tweet).flatMap { qt => + toStoredQuotedTweet(qt, TweetLenses.text(tweet)) + } + ) + toStoredAdditionalFields(tweet, storedTweet) + } + + /** + * Does not need core data to be set. Constructs on disk tweet by avoiding the TweetLenses object + * and only extracting the specified fields. + * + * NOTE: Assumes that specified fields are set in the tweet. + * + * @param tpTweet Tweetypie Tweet to be converted + * @param fields the fields to be populated in the on disk Tweet + * + * @return an on disk Tweet which has only the specified fields set + */ + def toStoredTweetForFields(tpTweet: Tweet, fields: Set[Field]): StoredTweet = { + + // Make sure all the passed in fields are known or additional fields + require( + (fields -- Field.AllUpdatableCompiledFields) + .forall(field => AdditionalFields.isAdditionalFieldId(field.id)) + ) + + val storedTweet = + StoredTweet( + id = tpTweet.id, + geo = if (fields.contains(Field.Geo)) { + tpTweet.coreData.get.coordinates match { + case None => + tpTweet.coreData.get.placeId match { + case None => None + case Some(placeId) => + Some( + StoredGeo( + latitude = 0.0, + longitude = 0.0, + geoPrecision = 0, + entityId = 0, + name = Some(placeId) + ) + ) + } + case Some(coords) => + Some( + StoredGeo( + latitude = coords.latitude, + longitude = coords.longitude, + geoPrecision = coords.geoPrecision, + entityId = if (coords.display) 2 else 0, + name = tpTweet.coreData.get.placeId + ) + ) + } + } else { + None + }, + hasTakedown = + if (fields.contains(Field.HasTakedown)) + Some(tpTweet.coreData.get.hasTakedown) + else + None, + nsfwUser = + if (fields.contains(Field.NsfwUser)) + Some(tpTweet.coreData.get.nsfwUser) + else + None, + nsfwAdmin = + if (fields.contains(Field.NsfwAdmin)) + Some(tpTweet.coreData.get.nsfwAdmin) + else + None + ) + + if (fields.map(_.id).exists(AdditionalFields.isAdditionalFieldId)) + toStoredAdditionalFields(tpTweet, storedTweet) + else + storedTweet + } + + def fromStoredReply(reply: StoredReply): Reply = + Reply( + Some(reply.inReplyToStatusId).filter(_ > 0), + reply.inReplyToUserId + ) + + def fromStoredShare(share: StoredShare): Share = + Share( + share.sourceStatusId, + share.sourceUserId, + share.parentStatusId + ) + + def fromStoredQuotedTweet(qt: StoredQuotedTweet): QuotedTweet = + QuotedTweet( + qt.tweetId, + qt.userId, + Some( + ShortenedUrl( + shortUrl = qt.shortUrl, + longUrl = "", // will be hydrated later via tweetypie's QuotedTweetRefUrlsHydrator + displayText = "" //will be hydrated later via tweetypie's QuotedTweetRefUrlsHydrator + ) + ) + ) + + def fromStoredGeo(geo: StoredGeo): GeoCoordinates = + GeoCoordinates( + latitude = geo.latitude, + longitude = geo.longitude, + geoPrecision = geo.geoPrecision, + display = geo.entityId == 2 + ) + + def fromStoredMediaEntity(media: StoredMediaEntity): MediaEntity = + MediaEntity( + fromIndex = -1, // will get filled in later + toIndex = -1, // will get filled in later + url = null, // will get filled in later + mediaPath = "", // field is obsolete + mediaUrl = null, // will get filled in later + mediaUrlHttps = null, // will get filled in later + displayUrl = null, // will get filled in later + expandedUrl = null, // will get filled in later + mediaId = media.id, + nsfw = false, + sizes = Set( + MediaSize( + sizeType = MediaSizeType.Orig, + resizeMethod = MediaResizeMethod.Fit, + deprecatedContentType = MediaContentType(media.mediaType), + width = media.width, + height = media.height + ) + ) + ) + + def fromStoredNarrowcast(narrowcast: StoredNarrowcast): Narrowcast = + Narrowcast( + location = narrowcast.location.getOrElse(Seq()) + ) + + def fromStoredTweet(storedTweet: StoredTweet): Tweet = { + val coreData = + TweetCoreData( + userId = storedTweet.userId.get, + text = storedTweet.text.get, + createdVia = storedTweet.createdVia.get, + createdAtSecs = storedTweet.createdAtSec.get, + reply = storedTweet.reply.map(fromStoredReply), + share = storedTweet.share.map(fromStoredShare), + hasTakedown = storedTweet.hasTakedown.getOrElse(false), + nsfwUser = storedTweet.nsfwUser.getOrElse(false), + nsfwAdmin = storedTweet.nsfwAdmin.getOrElse(false), + narrowcast = storedTweet.narrowcast.map(fromStoredNarrowcast), + nullcast = storedTweet.nullcast.getOrElse(false), + trackingId = storedTweet.trackingId, + conversationId = storedTweet.reply.flatMap(_.conversationId), + placeId = storedTweet.geo.flatMap(_.name), + coordinates = storedTweet.geo.map(fromStoredGeo), + hasMedia = if (storedTweet.media.exists(_.nonEmpty)) Some(true) else None + ) + + // retweets should never have their media, but some tweets incorrectly do. + val storedMedia = if (coreData.share.isDefined) Nil else storedTweet.media.toSeq + + val tpTweet = + Tweet( + id = storedTweet.id, + coreData = Some(coreData), + contributor = storedTweet.contributorId.map(Contributor(_)), + media = Some(storedMedia.flatten.map(fromStoredMediaEntity)), + mentions = Some(Seq.empty), + urls = Some(Seq.empty), + cashtags = Some(Seq.empty), + hashtags = Some(Seq.empty), + quotedTweet = storedTweet.quotedTweet.map(fromStoredQuotedTweet) + ) + fromStoredAdditionalFields(storedTweet, tpTweet) + } + + def fromStoredTweetAllowInvalid(storedTweet: StoredTweet): Tweet = { + fromStoredTweet( + storedTweet.copy( + userId = storedTweet.userId.orElse(Some(-1L)), + text = storedTweet.text.orElse(Some("")), + createdVia = storedTweet.createdVia.orElse(Some("")), + createdAtSec = storedTweet.createdAtSec.orElse(Some(-1L)) + )) + } + + def fromStoredAdditionalFields(from: StoredTweet, to: Tweet): Tweet = { + val passThroughAdditionalFields = + from._passthroughFields.filterKeys(AdditionalFields.isAdditionalFieldId) + val allAdditionalFields = + from.getFieldBlobs(tbTweetCompiledAdditionalFieldIds) ++ passThroughAdditionalFields + allAdditionalFields.values.foldLeft(to) { case (t, f) => t.setField(f) } + } + + def toDeletedTweet(storedTweet: StoredTweet): DeletedTweet = { + val noteTweetBlob = storedTweet.getFieldBlob(Tweet.NoteTweetField.id) + val noteTweetOption = noteTweetBlob.map(blob => NoteTweet.decode(blob.read)) + DeletedTweet( + id = storedTweet.id, + userId = storedTweet.userId, + text = storedTweet.text, + createdAtSecs = storedTweet.createdAtSec, + share = storedTweet.share.map(toDeletedShare), + media = storedTweet.media.map(_.map(toDeletedMediaEntity)), + noteTweetId = noteTweetOption.map(_.id), + isExpandable = noteTweetOption.flatMap(_.isExpandable) + ) + } + + def toDeletedShare(storedShare: StoredShare): DeletedTweetShare = + DeletedTweetShare( + sourceStatusId = storedShare.sourceStatusId, + sourceUserId = storedShare.sourceUserId, + parentStatusId = storedShare.parentStatusId + ) + + def toDeletedMediaEntity(storedMediaEntity: StoredMediaEntity): DeletedTweetMediaEntity = + DeletedTweetMediaEntity( + id = storedMediaEntity.id, + mediaType = storedMediaEntity.mediaType, + width = storedMediaEntity.width, + height = storedMediaEntity.height + ) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TimestampDecoder.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TimestampDecoder.scala new file mode 100644 index 000000000..52e907594 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TimestampDecoder.scala @@ -0,0 +1,92 @@ +package com.twitter.tweetypie.storage + +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.util.Time +import com.twitter.util.Try +import java.util.Arrays +import scala.util.control.NoStackTrace +import scala.util.control.NonFatal + +sealed abstract class TimestampType(val keyName: String) +object TimestampType { + object Default extends TimestampType("timestamp") + object SoftDelete extends TimestampType("softdelete_timestamp") +} + +/** + * TimestampDecoder gets the timestamps associated with state records. The Manhattan timestamp is + * used for legacy records (with value "1"), otherwise the timestamp is extracted from the + * JSON value. + * + * See "Metadata" in README.md for further information about state records. + */ +object TimestampDecoder { + case class UnparsableJson(msg: String, t: Throwable) extends Exception(msg, t) with NoStackTrace + case class MissingJsonTimestamp(msg: String) extends Exception(msg) with NoStackTrace + case class UnexpectedJsonValue(msg: String) extends Exception(msg) with NoStackTrace + case class MissingManhattanTimestamp(msg: String) extends Exception(msg) with NoStackTrace + + private[storage] val LegacyValue: Array[Byte] = Array('1') + + /** + * The first backfill of tweet data to Manhattan supplied timestamps in milliseconds where + * nanoseconds were expected. The result is that some values have an incorrect Manhattan + * timestamp. For these bad timestamps, time.inNanoseconds is actually milliseconds. + * + * For example, the deletion record for tweet 22225781 has Manhattan timestamp 1970-01-01 00:23:24 +0000. + * Contrast with the deletion record for tweet 435404491999813632 with Manhattan timestamp 2014-11-09 14:24:04 +0000 + * + * This threshold value comes from the last time in milliseconds that was interpreted + * as nanoseconds, e.g. Time.fromNanoseconds(1438387200000L) == 1970-01-01 00:23:58 +0000 + */ + private[storage] val BadTimestampThreshold = Time.at("1970-01-01 00:23:58 +0000") + + def decode(record: TweetManhattanRecord, tsType: TimestampType): Try[Long] = + decode(record.value, tsType) + + def decode(mhValue: TweetManhattanValue, tsType: TimestampType): Try[Long] = { + val value = ByteArrayCodec.fromByteBuffer(mhValue.contents) + if (isLegacyRecord(value)) { + nativeManhattanTimestamp(mhValue) + } else { + jsonTimestamp(value, tsType) + } + } + + private def isLegacyRecord(value: Array[Byte]) = Arrays.equals(value, LegacyValue) + + private def nativeManhattanTimestamp(mhValue: TweetManhattanValue): Try[Long] = + mhValue.timestamp match { + case Some(ts) => Return(correctedTimestamp(ts)) + case None => + Throw(MissingManhattanTimestamp(s"Manhattan timestamp missing in value $mhValue")) + } + + private def jsonTimestamp(value: Array[Byte], tsType: TimestampType): Try[Long] = + Try { Json.decode(value) } + .rescue { case NonFatal(e) => Throw(UnparsableJson(e.getMessage, e)) } + .flatMap { m => + m.get(tsType.keyName) match { + case Some(v) => + v match { + case l: Long => Return(l) + case i: Integer => Return(i.toLong) + case _ => + Throw( + UnexpectedJsonValue(s"Unexpected value for ${tsType.keyName} in record data $m") + ) + } + case None => + Throw(MissingJsonTimestamp(s"Missing key ${tsType.keyName} in record data $m")) + } + } + + def correctedTime(t: Time): Time = + if (t < BadTimestampThreshold) Time.fromMilliseconds(t.inNanoseconds) else t + + def correctedTime(t: Long): Time = correctedTime(Time.fromNanoseconds(t)) + + def correctedTimestamp(t: Time): Long = + if (t < BadTimestampThreshold) t.inNanoseconds else t.inMilliseconds +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetKey.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetKey.scala new file mode 100644 index 000000000..ed5d01141 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetKey.scala @@ -0,0 +1,164 @@ +package com.twitter.tweetypie.storage + +/** + * Responsible for encoding/decoding Tweet records to/from Manhattan keys + * + * K/V Scheme: + * ----------- + * [TweetId] + * /metadata + * /delete_state (a.k.a. hard delete) + * /soft_delete_state + * /bounce_delete_state + * /undelete_state + * /force_added_state + * /scrubbed_fields/ + * /[ScrubbedFieldId_1] + * .. + * /[ScrubbedFieldId_M] + * /fields + * /internal + * /1 + * /9 + * .. + * /99 + * /external + * /100 + * .. + * + * IMPORTANT NOTE: + * 1) Field Ids 2 to 8 in Tweet thrift struct are considered "core fields" are 'packed' together + * into a TFieldBlob and stored under field id 1 (i.e [DatasetName]/[TweetId]/fields/internal/1). + * This is why we do not see keys from [DatasetName]/[TweetId]/fields/internal/2 to [DatasetName]/ + * [TweetId]/fields/internal/8) + * + * 2) Also, the tweet id (which is the field id 1 in Tweet thrift structure) is not explicitly stored + * in Manhattan. There is no need to explicitly store it since it is a part of the Pkey + */ +case class TweetKey(tweetId: TweetId, lKey: TweetKey.LKey) { + override def toString: String = + s"/${ManhattanOperations.PkeyInjection(tweetId)}/${ManhattanOperations.LkeyInjection(lKey)}" +} + +object TweetKey { + // Manhattan uses lexicographical order for keys. To make sure lexicographical order matches the + // numerical order, we should pad both tweet id and field ids with leading zeros. + // Since tweet id is long and field id is a short, the max width of each can be obtained by doing + // Long.MaxValue.toString.length and Short.MaxValue.toString.length respectively + private val TweetIdFormatStr = s"%0${Long.MaxValue.toString.length}d" + private val FieldIdFormatStr = s"%0${Short.MaxValue.toString.length}d" + private[storage] def padTweetIdStr(tweetId: Long): String = TweetIdFormatStr.format(tweetId) + private[storage] def padFieldIdStr(fieldId: Short): String = FieldIdFormatStr.format(fieldId) + + def coreFieldsKey(tweetId: TweetId): TweetKey = TweetKey(tweetId, LKey.CoreFieldsKey) + def hardDeletionStateKey(tweetId: TweetId): TweetKey = + TweetKey(tweetId, LKey.HardDeletionStateKey) + def softDeletionStateKey(tweetId: TweetId): TweetKey = + TweetKey(tweetId, LKey.SoftDeletionStateKey) + def bounceDeletionStateKey(tweetId: TweetId): TweetKey = + TweetKey(tweetId, LKey.BounceDeletionStateKey) + def unDeletionStateKey(tweetId: TweetId): TweetKey = TweetKey(tweetId, LKey.UnDeletionStateKey) + def forceAddedStateKey(tweetId: TweetId): TweetKey = TweetKey(tweetId, LKey.ForceAddedStateKey) + def scrubbedGeoFieldKey(tweetId: TweetId): TweetKey = TweetKey(tweetId, LKey.ScrubbedGeoFieldKey) + def fieldKey(tweetId: TweetId, fieldId: FieldId): TweetKey = + TweetKey(tweetId, LKey.FieldKey(fieldId)) + def internalFieldsKey(tweetId: TweetId, fieldId: FieldId): TweetKey = + TweetKey(tweetId, LKey.InternalFieldsKey(fieldId)) + def additionalFieldsKey(tweetId: TweetId, fieldId: FieldId): TweetKey = + TweetKey(tweetId, LKey.AdditionalFieldsKey(fieldId)) + def scrubbedFieldKey(tweetId: TweetId, fieldId: FieldId): TweetKey = + TweetKey(tweetId, LKey.ScrubbedFieldKey(fieldId)) + + // AllFieldsKeyPrefix: fields + // CoreFieldsKey: fields/internal/1 (Stores subset of StoredTweet fields which are + // "packed" into a single CoreFields record) + // HardDeletionStateKey: metadata/delete_state + // SoftDeletionStateKey: metadata/soft_delete_state + // BounceDeletionStateKey: metadata/bounce_delete_state + // UnDeletionStateKey: metadata/undelete_state + // ForceAddedStateKey: metadata/force_added_state + // FieldKey: fields// (where + // is 'internal' for field ids < 100 and 'external' for all other + // fields ids) + // InternalFieldsKeyPrefix: fields/internal + // PKey: + // ScrubbedFieldKey: metadata/scrubbed_fields/ + // ScrubbedFieldKeyPrefix: metadata/scrubbed_fields + sealed abstract class LKey(override val toString: String) + object LKey { + private val HardDeletionRecordLiteral = "delete_state" + private val SoftDeletionRecordLiteral = "soft_delete_state" + private val BounceDeletionRecordLiteral = "bounce_delete_state" + private val UnDeletionRecordLiteral = "undelete_state" + private val ForceAddRecordLiteral = "force_added_state" + private val ScrubbedFieldsGroup = "scrubbed_fields" + private val InternalFieldsGroup = "internal" + private val ExternalFieldsGroup = "external" + private val MetadataCategory = "metadata" + private val FieldsCategory = "fields" + private val InternalFieldsKeyPrefix = s"$FieldsCategory/$InternalFieldsGroup/" + private val ExternalFieldsKeyPrefix = s"$FieldsCategory/$ExternalFieldsGroup/" + private val ScrubbedFieldsKeyPrefix = s"$MetadataCategory/$ScrubbedFieldsGroup/" + + sealed abstract class MetadataKey(metadataType: String) + extends LKey(s"$MetadataCategory/$metadataType") + sealed abstract class StateKey(stateType: String) extends MetadataKey(stateType) + case object HardDeletionStateKey extends StateKey(s"$HardDeletionRecordLiteral") + case object SoftDeletionStateKey extends StateKey(s"$SoftDeletionRecordLiteral") + case object BounceDeletionStateKey extends StateKey(s"$BounceDeletionRecordLiteral") + case object UnDeletionStateKey extends StateKey(s"$UnDeletionRecordLiteral") + case object ForceAddedStateKey extends StateKey(s"$ForceAddRecordLiteral") + + case class ScrubbedFieldKey(fieldId: FieldId) + extends MetadataKey(s"$ScrubbedFieldsGroup/${padFieldIdStr(fieldId)}") + val ScrubbedGeoFieldKey: LKey.ScrubbedFieldKey = ScrubbedFieldKey(TweetFields.geoFieldId) + + /** + * LKey that has one of many possible fields id. This generalize over + * internal and additional fields key. + */ + sealed abstract class FieldKey(prefix: String) extends LKey(toString) { + def fieldId: FieldId + override val toString: String = prefix + padFieldIdStr(fieldId) + } + object FieldKey { + def apply(fieldId: FieldId): FieldKey = + fieldId match { + case id if id < TweetFields.firstAdditionalFieldId => InternalFieldsKey(fieldId) + case _ => AdditionalFieldsKey(fieldId) + } + } + + case class InternalFieldsKey(fieldId: FieldId) extends FieldKey(InternalFieldsKeyPrefix) { + assert(fieldId < TweetFields.firstAdditionalFieldId) + } + case class AdditionalFieldsKey(fieldId: FieldId) extends FieldKey(ExternalFieldsKeyPrefix) { + assert(fieldId >= TweetFields.firstAdditionalFieldId) + } + val CoreFieldsKey: LKey.InternalFieldsKey = InternalFieldsKey(TweetFields.rootCoreFieldId) + + case class Unknown private (str: String) extends LKey(str) + + def fromString(str: String): LKey = { + def extractFieldId(prefix: String): FieldId = + str.slice(prefix.length, str.length).toShort + + str match { + case CoreFieldsKey.toString => CoreFieldsKey + case HardDeletionStateKey.toString => HardDeletionStateKey + case SoftDeletionStateKey.toString => SoftDeletionStateKey + case BounceDeletionStateKey.toString => BounceDeletionStateKey + case UnDeletionStateKey.toString => UnDeletionStateKey + case ForceAddedStateKey.toString => ForceAddedStateKey + case ScrubbedGeoFieldKey.toString => ScrubbedGeoFieldKey + case _ if str.startsWith(InternalFieldsKeyPrefix) => + InternalFieldsKey(extractFieldId(InternalFieldsKeyPrefix)) + case _ if str.startsWith(ExternalFieldsKeyPrefix) => + AdditionalFieldsKey(extractFieldId(ExternalFieldsKeyPrefix)) + case _ if str.startsWith(ScrubbedFieldsKeyPrefix) => + ScrubbedFieldKey(extractFieldId(ScrubbedFieldsKeyPrefix)) + case _ => Unknown(str) + } + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStateRecord.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStateRecord.scala new file mode 100644 index 000000000..a5d31a62d --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStateRecord.scala @@ -0,0 +1,90 @@ +package com.twitter.tweetypie.storage + +import com.twitter.storage.client.manhattan.kv.ManhattanValue +import com.twitter.util.Time + +/** + * A [[TweetStateRecord]] represents an action taken on a tweet and can be used to determine a tweet's state. + * + * The state is determined by the record with the most recent timestamp. In the absence of any + * record a tweet is considered found, which is to say the tweet has not been through the + * deletion process. + * + * The [[TweetStateRecord]] type is determined by the lkey of a tweet manhattan record: + * metadata/delete_state -> HardDeleted + * metadata/soft_delete_state -> SoftDeleted + * metadata/undelete_state -> Undeleted + * metadata/force_added_state -> ForceAdded + * + * See the README in this directory for more details about the state of a tweet. + */ +sealed trait TweetStateRecord { + def tweetId: TweetId + def createdAt: Long + def stateKey: TweetKey.LKey.StateKey + def values: Map[String, Long] = Map("timestamp" -> createdAt) + def name: String + + def toTweetMhRecord: TweetManhattanRecord = { + val valByteBuffer = ByteArrayCodec.toByteBuffer(Json.encode(values)) + val value = ManhattanValue(valByteBuffer, Some(Time.fromMilliseconds(createdAt))) + TweetManhattanRecord(TweetKey(tweetId, stateKey), value) + } +} + +object TweetStateRecord { + + /** When a soft-deleted or bounce deleted tweet is ultimately hard-deleted by an offline job. */ + case class HardDeleted(tweetId: TweetId, createdAt: Long, deletedAt: Long) + extends TweetStateRecord { + // timestamp in the mh backend is the hard deletion timestamp + override def values = Map("timestamp" -> createdAt, "softdelete_timestamp" -> deletedAt) + def stateKey = TweetKey.LKey.HardDeletionStateKey + def name = "hard_deleted" + } + + /** When a tweet is deleted by the user. It can still be undeleted while in the soft deleted state. */ + case class SoftDeleted(tweetId: TweetId, createdAt: Long) extends TweetStateRecord { + def stateKey = TweetKey.LKey.SoftDeletionStateKey + def name = "soft_deleted" + } + + /** When a tweet is deleted by go/bouncer for violating Twitter Rules. It MAY NOT be undeleted. */ + case class BounceDeleted(tweetId: TweetId, createdAt: Long) extends TweetStateRecord { + def stateKey = TweetKey.LKey.BounceDeletionStateKey + def name = "bounce_deleted" + } + + /** When a tweet is undeleted by an internal system. */ + case class Undeleted(tweetId: TweetId, createdAt: Long) extends TweetStateRecord { + def stateKey = TweetKey.LKey.UnDeletionStateKey + def name = "undeleted" + } + + /** When a tweet is created using the forceAdd endpoint. */ + case class ForceAdded(tweetId: TweetId, createdAt: Long) extends TweetStateRecord { + def stateKey = TweetKey.LKey.ForceAddedStateKey + def name = "force_added" + } + + def fromTweetMhRecord(record: TweetManhattanRecord): Option[TweetStateRecord] = { + def ts = TimestampDecoder.decode(record, TimestampType.Default).getOrElse(0L) + def sdts = TimestampDecoder.decode(record, TimestampType.SoftDelete).getOrElse(0L) + def tweetId = record.pkey + + record.lkey match { + case TweetKey.LKey.HardDeletionStateKey => Some(HardDeleted(tweetId, ts, sdts)) + case TweetKey.LKey.SoftDeletionStateKey => Some(SoftDeleted(tweetId, ts)) + case TweetKey.LKey.BounceDeletionStateKey => Some(BounceDeleted(tweetId, ts)) + case TweetKey.LKey.UnDeletionStateKey => Some(Undeleted(tweetId, ts)) + case TweetKey.LKey.ForceAddedStateKey => Some(ForceAdded(tweetId, ts)) + case _ => None + } + } + + def fromTweetMhRecords(records: Seq[TweetManhattanRecord]): Seq[TweetStateRecord] = + records.flatMap(fromTweetMhRecord) + + def mostRecent(records: Seq[TweetManhattanRecord]): Option[TweetStateRecord] = + fromTweetMhRecords(records).sortBy(_.createdAt).lastOption +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageClient.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageClient.scala new file mode 100644 index 000000000..69023abc2 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageClient.scala @@ -0,0 +1,201 @@ +package com.twitter.tweetypie.storage + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.storage.Response.TweetResponse +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.util.Future + +/** + * Interface for reading and writing tweet data in Manhattan + */ +trait TweetStorageClient { + import TweetStorageClient._ + def addTweet: AddTweet + def deleteAdditionalFields: DeleteAdditionalFields + def getTweet: GetTweet + def getStoredTweet: GetStoredTweet + def getDeletedTweets: GetDeletedTweets + def undelete: Undelete + def updateTweet: UpdateTweet + def scrub: Scrub + def softDelete: SoftDelete + def bounceDelete: BounceDelete + def hardDeleteTweet: HardDeleteTweet + def ping: Ping +} + +object TweetStorageClient { + type GetTweet = TweetId => Stitch[GetTweet.Response] + + object GetTweet { + sealed trait Response + object Response { + case class Found(tweet: Tweet) extends Response + object NotFound extends Response + object Deleted extends Response + // On BounceDeleted, provide the full Tweet so that implementations + // (i.e. ManhattanTweetStorageClient) don't not need to be aware of the specific tweet + // fields required by callers for proper processing of bounced deleted tweets. + case class BounceDeleted(tweet: Tweet) extends Response + } + } + + type GetStoredTweet = TweetId => Stitch[GetStoredTweet.Response] + + object GetStoredTweet { + sealed abstract class Error(val message: String) { + override def toString: String = message + } + object Error { + case object TweetIsCorrupt extends Error("stored tweet data is corrupt and cannot be decoded") + + case object ScrubbedFieldsPresent + extends Error("stored tweet fields that should be scrubbed are still present") + + case object TweetFieldsMissingOrInvalid + extends Error("expected tweet fields are missing or contain invalid values") + + case object TweetShouldBeHardDeleted + extends Error("stored tweet that should be hard deleted is still present") + } + + sealed trait Response + object Response { + sealed trait StoredTweetMetadata { + def state: Option[TweetStateRecord] + def allStates: Seq[TweetStateRecord] + def scrubbedFields: Set[FieldId] + } + + sealed trait StoredTweetErrors { + def errs: Seq[Error] + } + + /** + * Tweet data was found, possibly state records and/or scrubbed field records. + */ + sealed trait FoundAny extends Response with StoredTweetMetadata { + def tweet: Tweet + } + + object FoundAny { + def unapply( + response: Response + ): Option[ + (Tweet, Option[TweetStateRecord], Seq[TweetStateRecord], Set[FieldId], Seq[Error]) + ] = + response match { + case f: FoundWithErrors => + Some((f.tweet, f.state, f.allStates, f.scrubbedFields, f.errs)) + case f: FoundAny => Some((f.tweet, f.state, f.allStates, f.scrubbedFields, Seq.empty)) + case _ => None + } + } + + /** + * No records for this tweet id were found in storage + */ + case class NotFound(id: TweetId) extends Response + + /** + * Data related to the Tweet id was found but could not be loaded successfully. The + * errs array contains details of the problems. + */ + case class Failed( + id: TweetId, + state: Option[TweetStateRecord], + allStates: Seq[TweetStateRecord], + scrubbedFields: Set[FieldId], + errs: Seq[Error], + ) extends Response + with StoredTweetMetadata + with StoredTweetErrors + + /** + * No Tweet data was found, and the most recent state record found is HardDeleted + */ + case class HardDeleted( + id: TweetId, + state: Option[TweetStateRecord.HardDeleted], + allStates: Seq[TweetStateRecord], + scrubbedFields: Set[FieldId], + ) extends Response + with StoredTweetMetadata + + /** + * Tweet data was found, and the most recent state record found, if any, is not + * any form of deletion record. + */ + case class Found( + tweet: Tweet, + state: Option[TweetStateRecord], + allStates: Seq[TweetStateRecord], + scrubbedFields: Set[FieldId], + ) extends FoundAny + + /** + * Tweet data was found, and the most recent state record found indicates deletion. + */ + case class FoundDeleted( + tweet: Tweet, + state: Option[TweetStateRecord], + allStates: Seq[TweetStateRecord], + scrubbedFields: Set[FieldId], + ) extends FoundAny + + /** + * Tweet data was found, however errors were detected in the stored data. Required + * fields may be missing from the Tweet struct (e.g. CoreData), stored fields that + * should be scrubbed remain present, or Tweets that should be hard-deleted remain + * in storage. The errs array contains details of the problems. + */ + case class FoundWithErrors( + tweet: Tweet, + state: Option[TweetStateRecord], + allStates: Seq[TweetStateRecord], + scrubbedFields: Set[FieldId], + errs: Seq[Error], + ) extends FoundAny + with StoredTweetErrors + } + } + + type HardDeleteTweet = TweetId => Stitch[HardDeleteTweet.Response] + type SoftDelete = TweetId => Stitch[Unit] + type BounceDelete = TweetId => Stitch[Unit] + + object HardDeleteTweet { + sealed trait Response + object Response { + case class Deleted(deletedAtMillis: Option[Long], createdAtMillis: Option[Long]) + extends Response + case class NotDeleted(id: TweetId, ineligibleLKey: Option[TweetKey.LKey]) + extends Throwable + with Response + } + } + + type Undelete = TweetId => Stitch[Undelete.Response] + object Undelete { + case class Response( + code: UndeleteResponseCode, + tweet: Option[Tweet] = None, + createdAtMillis: Option[Long] = None, + archivedAtMillis: Option[Long] = None) + + sealed trait UndeleteResponseCode + + object UndeleteResponseCode { + object Success extends UndeleteResponseCode + object BackupNotFound extends UndeleteResponseCode + object NotCreated extends UndeleteResponseCode + } + } + + type AddTweet = Tweet => Stitch[Unit] + type UpdateTweet = (Tweet, Seq[Field]) => Stitch[TweetResponse] + type GetDeletedTweets = Seq[TweetId] => Stitch[Seq[DeletedTweetResponse]] + type DeleteAdditionalFields = (Seq[TweetId], Seq[Field]) => Stitch[Seq[TweetResponse]] + type Scrub = (Seq[TweetId], Seq[Field]) => Stitch[Unit] + type Ping = () => Future[Unit] +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageException.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageException.scala new file mode 100644 index 000000000..7f1bd6b1e --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageException.scala @@ -0,0 +1,34 @@ +package com.twitter.tweetypie.storage + +import scala.util.control.NoStackTrace + +sealed abstract class TweetStorageException(message: String, cause: Throwable) + extends Exception(message, cause) + +/** + * The request was not properly formed and failed an assertion present in the code. Should not be + * retried without modification. + */ +case class ClientError(message: String, cause: Throwable) + extends TweetStorageException(message, cause) + with NoStackTrace + +/** + * Request was rejected by Manhattan or the in-process rate limiter. Should not be retried. + */ +case class RateLimited(message: String, cause: Throwable) + extends TweetStorageException(message, cause) + with NoStackTrace + +/** + * Corrupt tweets were requested from Manhattan + */ +case class VersionMismatchError(message: String, cause: Throwable = null) + extends TweetStorageException(message, cause) + with NoStackTrace + +/** + * All other unhandled exceptions. + */ +case class InternalError(message: String, cause: Throwable = null) + extends TweetStorageException(message, cause) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetUtils.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetUtils.scala new file mode 100644 index 000000000..b10ef107d --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetUtils.scala @@ -0,0 +1,265 @@ +package com.twitter.tweetypie.storage + +import com.twitter.logging.Logger +import com.twitter.scrooge.TFieldBlob +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.storage.client.manhattan.kv.DeniedManhattanException +import com.twitter.storage.client.manhattan.kv.ManhattanException +import com.twitter.tweetypie.storage.Response._ +import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.util.Try + +object TweetUtils { + val log: Logger = Logger("com.twitter.tweetypie.storage.TweetStorageLibrary") + import FieldResponseCodec.ValueNotFoundException + + /** + * It's rare, but we have seen tweets with userId=0, which is likely the result of a + * failed/partial delete. Treat these as invalid tweets, which are returned to callers + * as not found. + */ + def isValid(tweet: StoredTweet): Boolean = + tweet.userId.exists(_ != 0) && tweet.text.nonEmpty && + tweet.createdVia.nonEmpty && tweet.createdAtSec.nonEmpty + + /** + * Helper function to extract Scrubbed field Ids from the result returned by reading entire tweet prefix + * function. + * + * @param records The sequence of MH records for the given tweetId + * + * @return The set of scrubbed field ids + */ + private[tweetypie] def extractScrubbedFields(records: Seq[TweetManhattanRecord]): Set[Short] = + records + .map(r => r.lkey) + .collect { case TweetKey.LKey.ScrubbedFieldKey(fieldId) => fieldId } + .toSet + + private[tweetypie] val expectedFields = + TweetFields.requiredFieldIds.toSet - TweetFields.tweetIdField + + /** + * Find the timestamp from a tweetId and a list of MH records. This is used when + * you need a timestamp and you aren't sure that tweetId is a snowflake id. + * + * @param tweetId A tweetId you want the timestamp for. + * @param records Tbird_mh records keyed on tweetId, one of which should be the + * core fields record. + * @return A milliseconds timestamp if one could be found. + */ + private[tweetypie] def creationTimeFromTweetIdOrMHRecords( + tweetId: Long, + records: Seq[TweetManhattanRecord] + ): Option[Long] = + SnowflakeId + .unixTimeMillisOptFromId(tweetId).orElse({ + records + .find(_.lkey == TweetKey.LKey.CoreFieldsKey) + .flatMap { coreFields => + CoreFieldsCodec + .fromTFieldBlob( + TFieldBlobCodec.fromByteBuffer(coreFields.value.contents) + ).createdAtSec.map(seconds => seconds * 1000) + } + }) + + /** + * Helper function used to parse manhattan results for fields in a tweet (given in the form of + * Sequence of (FieldKey, Try[Unit]) pairs) and build a TweetResponse object. + * + * @param callerName The name of the caller function. Used for error messages + * @param tweetId Id of the Tweet for which TweetResponse is being built + * @param fieldResults Sequence of (FieldKey, Try[Unit]). + * + * @return TweetResponse object + */ + private[tweetypie] def buildTweetResponse( + callerName: String, + tweetId: Long, + fieldResults: Map[FieldId, Try[Unit]] + ): TweetResponse = { + // Count Found/Not Found + val successCount = + fieldResults.foldLeft(0) { + case (count, (_, Return(_))) => count + 1 + case (count, (_, Throw(_: ValueNotFoundException))) => count + 1 + case (count, _) => count + } + + val fieldResponsesMap = getFieldResponses(callerName, tweetId, fieldResults) + + val overallCode = if (successCount > 0 && successCount == fieldResults.size) { + TweetResponseCode.Success + } else { + + // If any field was rate limited, then we consider the entire tweet to be rate limited. So first we scan + // the field results to check such an occurrence. + val wasRateLimited = fieldResults.exists { fieldResult => + fieldResult._2 match { + case Throw(e: DeniedManhattanException) => true + case _ => false + } + } + + // Were we rate limited for any of the additional fields? + if (wasRateLimited) { + TweetResponseCode.OverCapacity + } else if (successCount == 0) { + // successCount is < fieldResults.size at this point. So if allOrNone is true or + // if successCount == 0 (i.e failed on all Fields), the overall code should be 'Failure' + TweetResponseCode.Failure + } else { + // allOrNone == false AND successCount > 0 at this point. Clearly the overallCode should be Partial + TweetResponseCode.Partial + } + } + + TweetResponse(tweetId, overallCode, Some(fieldResponsesMap)) + + } + + /** + * Helper function to convert manhattan results into a Map[FieldId, FieldResponse] + * + * @param fieldResults Sequence of (TweetKey, TFieldBlob). + */ + private[tweetypie] def getFieldResponses( + callerName: String, + tweetId: TweetId, + fieldResults: Map[FieldId, Try[_]] + ): Map[FieldId, FieldResponse] = + fieldResults.map { + case (fieldId, resp) => + def keyStr = TweetKey.fieldKey(tweetId, fieldId).toString + resp match { + case Return(_) => + fieldId -> FieldResponse(FieldResponseCode.Success, None) + case Throw(mhException: ManhattanException) => + val errMsg = s"Exception in $callerName. Key: $keyStr. Error: $mhException" + mhException match { + case _: ValueNotFoundException => // ValueNotFound is not an error + case _ => log.error(errMsg) + } + fieldId -> FieldResponseCodec.fromThrowable(mhException, Some(errMsg)) + case Throw(e) => + val errMsg = s"Exception in $callerName. Key: $keyStr. Error: $e" + log.error(errMsg) + fieldId -> FieldResponse(FieldResponseCode.Error, Some(errMsg)) + } + } + + /** + * Helper function to build a TweetResponse object when being rate limited. Its possible that only some of the fields + * got rate limited, so we indicate which fields got processed successfully, and which encountered some sort of error. + * + * @param tweetId Tweet id + * @param callerName name of API calling this function + * @param fieldResponses field responses for the case where + * + * @return The TweetResponse object + */ + private[tweetypie] def buildTweetOverCapacityResponse( + callerName: String, + tweetId: Long, + fieldResponses: Map[FieldId, Try[Unit]] + ) = { + val fieldResponsesMap = getFieldResponses(callerName, tweetId, fieldResponses) + TweetResponse(tweetId, TweetResponseCode.OverCapacity, Some(fieldResponsesMap)) + } + + /** + * Build a StoredTweet from a Seq of records. Core fields are handled specially. + */ + private[tweetypie] def buildStoredTweet( + tweetId: TweetId, + records: Seq[TweetManhattanRecord], + includeScrubbed: Boolean = false, + ): StoredTweet = { + getStoredTweetBlobs(records, includeScrubbed) + .flatMap { fieldBlob => + // When fieldId == TweetFields.rootCoreFieldId, we have further work to do since the + // 'value' is really serialized/packed version of all core fields. In this case we'll have + // to unpack it into many TFieldBlobs. + if (fieldBlob.id == TweetFields.rootCoreFieldId) { + // We won't throw any error in this function and instead let the caller function handle this + // condition (i.e If the caller function does not find any values for the core-fields in + // the returned map, it should assume that the tweet is not found) + CoreFieldsCodec.unpackFields(fieldBlob).values.toSeq + } else { + Seq(fieldBlob) + } + }.foldLeft(StoredTweet(tweetId))(_.setField(_)) + } + + private[tweetypie] def buildValidStoredTweet( + tweetId: TweetId, + records: Seq[TweetManhattanRecord] + ): Option[StoredTweet] = { + val storedTweet = buildStoredTweet(tweetId, records) + if (storedTweet.getFieldBlobs(expectedFields).nonEmpty && isValid(storedTweet)) { + Some(storedTweet) + } else { + None + } + } + + /** + * Return a TFieldBlob for each StoredTweet field defined in this set of records. + * @param includeScrubbed when false, result will not include scrubbed fields even + * if the data is present in the set of records. + */ + private[tweetypie] def getStoredTweetBlobs( + records: Seq[TweetManhattanRecord], + includeScrubbed: Boolean = false, + ): Seq[TFieldBlob] = { + val scrubbed = extractScrubbedFields(records) + + records + .flatMap { r => + // extract LKey.FieldKey records if they are not scrubbed and get their TFieldBlobs + r.key match { + case fullKey @ TweetKey(_, key: TweetKey.LKey.FieldKey) + if includeScrubbed || !scrubbed.contains(key.fieldId) => + try { + val fieldBlob = TFieldBlobCodec.fromByteBuffer(r.value.contents) + if (fieldBlob.field.id != key.fieldId) { + throw new AssertionError( + s"Blob stored for $fullKey has unexpected id ${fieldBlob.field.id}" + ) + } + Some(fieldBlob) + } catch { + case e: VersionMismatchError => + log.error( + s"Failed to decode bytebuffer for $fullKey: ${e.getMessage}" + ) + throw e + } + case _ => None + } + } + } + + /** + * Its important to bubble up rate limiting exceptions as they would likely be the root cause for other issues + * (timeouts etc.), so we scan for this particular exception, and if found, we bubble that up specifically + * + * @param seqOfTries The sequence of tries which may contain within it a rate limit exception + * + * @return if a rate limiting exn was detected, this will be a Throw(e: DeniedManhattanException) + * otherwise it will be a Return(_) only if all individual tries succeeded + */ + private[tweetypie] def collectWithRateLimitCheck(seqOfTries: Seq[Try[Unit]]): Try[Unit] = { + val rateLimitThrowOpt = seqOfTries.find { + case Throw(e: DeniedManhattanException) => true + case _ => false + } + + rateLimitThrowOpt.getOrElse( + Try.collect(seqOfTries).map(_ => ()) + ) // Operation is considered successful only if all the deletions are successful + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UndeleteHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UndeleteHandler.scala new file mode 100644 index 000000000..f0e14eb9d --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UndeleteHandler.scala @@ -0,0 +1,106 @@ +package com.twitter.tweetypie.storage + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.storage.TweetStorageClient.Undelete +import com.twitter.tweetypie.storage.TweetUtils._ +import com.twitter.util.Time + +object UndeleteHandler { + def apply( + read: ManhattanOperations.Read, + localInsert: ManhattanOperations.Insert, + remoteInsert: ManhattanOperations.Insert, + delete: ManhattanOperations.Delete, + undeleteWindowHours: Int, + stats: StatsReceiver + ): Undelete = { + def withinUndeleteWindow(timestampMs: Long) = + (Time.now - Time.fromMilliseconds(timestampMs)).inHours < undeleteWindowHours + + def prepareUndelete( + tweetId: TweetId, + records: Seq[TweetManhattanRecord] + ): (Undelete.Response, Option[TweetManhattanRecord]) = { + val undeleteRecord = + Some(TweetStateRecord.Undeleted(tweetId, Time.now.inMillis).toTweetMhRecord) + + TweetStateRecord.mostRecent(records) match { + // check if we need to undo a soft deletion + case Some(TweetStateRecord.SoftDeleted(_, createdAt)) => + if (createdAt > 0) { + if (withinUndeleteWindow(createdAt)) { + ( + mkSuccessfulUndeleteResponse(tweetId, records, Some(createdAt)), + undeleteRecord + ) + } else { + (Undelete.Response(Undelete.UndeleteResponseCode.BackupNotFound), None) + } + } else { + throw InternalError(s"Timestamp unavailable for $tweetId") + } + + // BounceDeleted tweets may not be undeleted. see go/bouncedtweet + case Some(_: TweetStateRecord.HardDeleted | _: TweetStateRecord.BounceDeleted) => + (Undelete.Response(Undelete.UndeleteResponseCode.BackupNotFound), None) + + case Some(_: TweetStateRecord.Undeleted) => + // We still want to write the undelete record, because at this point, we only know that the local DC's + // winning record is not a soft/hard deletion record, while its possible that the remote DC's winning + // record might still be a soft deletion record. Having said that, we don't want to set it to true + // if the winning record is forceAdd, as the forceAdd call should have ensured that both DCs had the + // forceAdd record. + (mkSuccessfulUndeleteResponse(tweetId, records), undeleteRecord) + + case Some(_: TweetStateRecord.ForceAdded) => + (mkSuccessfulUndeleteResponse(tweetId, records), None) + + // lets write the undeletion record just in case there is a softdeletion record in flight + case None => (mkSuccessfulUndeleteResponse(tweetId, records), undeleteRecord) + } + } + + // Write the undelete record both locally and remotely to protect + // against races with hard delete replication. We only need this + // protection for the insertion of the undelete record. + def multiInsert(record: TweetManhattanRecord): Stitch[Unit] = + Stitch + .collect( + Seq( + localInsert(record).liftToTry, + remoteInsert(record).liftToTry + ) + ) + .map(collectWithRateLimitCheck) + .lowerFromTry + + def deleteSoftDeleteRecord(tweetId: TweetId): Stitch[Unit] = { + val mhKey = TweetKey.softDeletionStateKey(tweetId) + delete(mhKey, None) + } + + tweetId => + for { + records <- read(tweetId) + (response, undeleteRecord) = prepareUndelete(tweetId, records) + _ <- Stitch.collect(undeleteRecord.map(multiInsert)).unit + _ <- deleteSoftDeleteRecord(tweetId) + } yield { + response + } + } + + private[storage] def mkSuccessfulUndeleteResponse( + tweetId: TweetId, + records: Seq[TweetManhattanRecord], + timestampOpt: Option[Long] = None + ) = + Undelete.Response( + Undelete.UndeleteResponseCode.Success, + Some( + StorageConversions.fromStoredTweet(buildStoredTweet(tweetId, records)) + ), + archivedAtMillis = timestampOpt + ) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UpdateTweetHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UpdateTweetHandler.scala new file mode 100644 index 000000000..7bf68f6ef --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UpdateTweetHandler.scala @@ -0,0 +1,64 @@ +package com.twitter.tweetypie.storage + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.kv.DeniedManhattanException +import com.twitter.storage.client.manhattan.kv.ManhattanValue +import com.twitter.tweetypie.storage.TweetUtils._ +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.util.Throw +import com.twitter.util.Time + +object UpdateTweetHandler { + def apply( + insert: ManhattanOperations.Insert, + stats: StatsReceiver + ): TweetStorageClient.UpdateTweet = { (tpTweet: Tweet, fields: Seq[Field]) => + require( + fields.forall(!TweetFields.coreFieldIds.contains(_)), + "Core fields cannot be modified by calling updateTweet; use addTweet instead." + ) + require( + areAllFieldsDefined(tpTweet, fields), + s"Input tweet $tpTweet does not have specified fields $fields set" + ) + + val now = Time.now + val storedTweet = StorageConversions.toStoredTweetForFields(tpTweet, fields.toSet) + val tweetId = storedTweet.id + Stats.updatePerFieldQpsCounters("updateTweet", fields.map(_.id), 1, stats) + + val (fieldIds, stitchesPerTweet) = + fields.map { field => + val fieldId = field.id + val tweetKey = TweetKey.fieldKey(tweetId, fieldId) + val blob = storedTweet.getFieldBlob(fieldId).get + val value = ManhattanValue(TFieldBlobCodec.toByteBuffer(blob), Some(now)) + val record = TweetManhattanRecord(tweetKey, value) + + (fieldId, insert(record).liftToTry) + }.unzip + + Stitch.collect(stitchesPerTweet).map { seqOfTries => + val fieldkeyAndMhResults = fieldIds.zip(seqOfTries).toMap + // If even a single field was rate limited, we will send an overall OverCapacity TweetResponse + val wasRateLimited = fieldkeyAndMhResults.exists { keyAndResult => + keyAndResult._2 match { + case Throw(e: DeniedManhattanException) => true + case _ => false + } + } + + if (wasRateLimited) { + buildTweetOverCapacityResponse("updateTweets", tweetId, fieldkeyAndMhResults) + } else { + buildTweetResponse("updateTweets", tweetId, fieldkeyAndMhResults) + } + } + } + + private def areAllFieldsDefined(tpTweet: Tweet, fields: Seq[Field]) = { + val storedTweet = StorageConversions.toStoredTweetForFields(tpTweet, fields.toSet) + fields.map(_.id).forall(storedTweet.getFieldBlob(_).isDefined) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/package.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/package.scala new file mode 100644 index 000000000..57a02248b --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/package.scala @@ -0,0 +1,11 @@ +package com.twitter.tweetypie + +import com.twitter.storage.client.manhattan.kv.ManhattanValue +import java.nio.ByteBuffer + +package object storage { + type TweetId = Long + type FieldId = Short + + type TweetManhattanValue = ManhattanValue[ByteBuffer] +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/BUILD new file mode 100644 index 000000000..e93c3b2ba --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/BUILD @@ -0,0 +1,20 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle/finagle-core/src/main", + "flock-client/src/main/scala", + "flock-client/src/main/thrift:thrift-scala", + "tweetypie/servo/util/src/main/scala", + "snowflake:id", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/servo:servo-exception-java", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "util/util-core:scala", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TFlockIndexer.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TFlockIndexer.scala new file mode 100644 index 000000000..046ff226a --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TFlockIndexer.scala @@ -0,0 +1,532 @@ +/** Copyright 2010 Twitter, Inc. */ +package com.twitter.tweetypie +package tflock + +import com.twitter.finagle.stats.Counter +import com.twitter.flockdb.client._ +import com.twitter.flockdb.client.thriftscala.Priority +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.tweetypie.serverutil.StoredCard +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.Future +import scala.collection.mutable.ListBuffer + +object TFlockIndexer { + + /** + * Printable names for some edge types currently defined in [[com.twitter.flockdb.client]]. + * Used to defined stats counters for adding edges. + */ + val graphNames: Map[Int, String] = + Map( + CardTweetsGraph.id -> "card_tweets", + ConversationGraph.id -> "conversation", + DirectedAtUserIdGraph.id -> "directed_at_user_id", + InvitedUsersGraph.id -> "invited_users", + MediaTimelineGraph.id -> "media_timeline", + MentionsGraph.id -> "mentions", + NarrowcastSentTweetsGraph.id -> "narrowcast_sent_tweets", + NullcastedTweetsGraph.id -> "nullcasted_tweets", + QuotersGraph.id -> "quoters", + QuotesGraph.id -> "quotes", + QuoteTweetsIndexGraph.id -> "quote_tweets_index", + RepliesToTweetsGraph.id -> "replies_to_tweets", + RetweetsByMeGraph.id -> "retweets_by_me", + RetweetsGraph.id -> "retweets", + RetweetsOfMeGraph.id -> "retweets_of_me", + RetweetSourceGraph.id -> "retweet_source", + TweetsRetweetedGraph.id -> "tweets_retweeted", + UserTimelineGraph.id -> "user_timeline", + CreatorSubscriptionTimelineGraph.id -> "creator_subscription_timeline", + CreatorSubscriptionMediaTimelineGraph.id -> "creator_subscription_image_timeline", + ) + + /** + * On edge deletion, edges are either archived permanently or retained for 3 months, based on + * the retention policy in the above confluence page. + * + * These two retention policies correspond to the two deletion techniques: archive and remove. + * We call removeEdges for edges with a short retention policy and archiveEdges for edges with + * a permanent retention policy. + */ + val graphsWithRemovedEdges: Seq[Int] = + Seq( + CardTweetsGraph.id, + CuratedTimelineGraph.id, + CuratedTweetsGraph.id, + DirectedAtUserIdGraph.id, + MediaTimelineGraph.id, + MutedConversationsGraph.id, + QuotersGraph.id, + QuotesGraph.id, + QuoteTweetsIndexGraph.id, + ReportedTweetsGraph.id, + RetweetsOfMeGraph.id, + RetweetSourceGraph.id, + SoftLikesGraph.id, + TweetsRetweetedGraph.id, + CreatorSubscriptionTimelineGraph.id, + CreatorSubscriptionMediaTimelineGraph.id, + ) + + /** + * These edges should be left in place when bounced tweets are deleted. + * These edges are removed during hard deletion. + * + * This is done so external teams (timelines) can execute on these edges for + * tombstone feature. + */ + val bounceDeleteGraphIds: Set[Int] = + Set( + UserTimelineGraph.id, + ConversationGraph.id + ) + + def makeCounters(stats: StatsReceiver, operation: String): Map[Int, Counter] = { + TFlockIndexer.graphNames + .mapValues(stats.scope(_).counter(operation)) + .withDefaultValue(stats.scope("unknown").counter(operation)) + } +} + +/** + * @param backgroundIndexingPriority specifies the queue to use for + * background indexing operations. This is useful for making the + * effects of background indexing operations (such as deleting edges + * for deleted Tweets) available sooner in testing scenarios + * (end-to-end tests or development instances). It is set to + * Priority.Low in production to reduce the load on high priority + * queues that we use for prominently user-visible operations. + */ +class TFlockIndexer( + tflock: TFlockClient, + hasMedia: Tweet => Boolean, + backgroundIndexingPriority: Priority, + stats: StatsReceiver) + extends TweetIndexer { + private[this] val FutureNil = Future.Nil + + private[this] val archiveCounters = TFlockIndexer.makeCounters(stats, "archive") + private[this] val removeCounters = TFlockIndexer.makeCounters(stats, "remove") + private[this] val insertCounters = TFlockIndexer.makeCounters(stats, "insert") + private[this] val negateCounters = TFlockIndexer.makeCounters(stats, "negate") + + private[this] val foregroundIndexingPriority: Priority = Priority.High + + override def createIndex(tweet: Tweet): Future[Unit] = + createEdges(tweet, isUndelete = false) + + override def undeleteIndex(tweet: Tweet): Future[Unit] = + createEdges(tweet, isUndelete = true) + + private[this] case class PartitionedEdges( + longRetention: Seq[ExecuteEdge[StatusGraph]] = Nil, + shortRetention: Seq[ExecuteEdge[StatusGraph]] = Nil, + negate: Seq[ExecuteEdge[StatusGraph]] = Nil, + ignore: Seq[ExecuteEdge[StatusGraph]] = Nil) + + private[this] def partitionEdgesForDelete( + edges: Seq[ExecuteEdge[StatusGraph]], + isBounceDelete: Boolean + ) = + edges.foldLeft(PartitionedEdges()) { + // Two dependees of UserTimelineGraph edge states to satisfy: timelines & safety tools. + // Timelines show bounce-deleted tweets as tombstones; regular deletes are not shown. + // - i.e. timelineIds = UserTimelineGraph(Normal || Negative) + // Safety tools show deleted tweets to authorized internal review agents + // - i.e. deletedIds = UserTimelineGraph(Removed || Negative) + case (partitionedEdges, edge) if isBounceDelete && edge.graphId == UserTimelineGraph.id => + partitionedEdges.copy(negate = edge +: partitionedEdges.negate) + + case (partitionedEdges, edge) if isBounceDelete && edge.graphId == ConversationGraph.id => + // Bounce-deleted tweets remain rendered as tombstones in conversations, so do not modify + // the ConversationGraph edge state + partitionedEdges.copy(ignore = edge +: partitionedEdges.ignore) + + case (partitionedEdges, edge) + if TFlockIndexer.graphsWithRemovedEdges.contains(edge.graphId) => + partitionedEdges.copy(shortRetention = edge +: partitionedEdges.shortRetention) + + case (partitionedEdges, edge) => + partitionedEdges.copy(longRetention = edge +: partitionedEdges.longRetention) + } + + override def deleteIndex(tweet: Tweet, isBounceDelete: Boolean): Future[Unit] = + for { + edges <- getEdges(tweet, isCreate = false, isDelete = true, isUndelete = false) + partitionedEdges = partitionEdgesForDelete(edges, isBounceDelete) + () <- + Future + .join( + tflock + .archiveEdges(partitionedEdges.longRetention, backgroundIndexingPriority) + .onSuccess(_ => + partitionedEdges.longRetention.foreach(e => archiveCounters(e.graphId).incr())), + tflock + .removeEdges(partitionedEdges.shortRetention, backgroundIndexingPriority) + .onSuccess(_ => + partitionedEdges.shortRetention.foreach(e => removeCounters(e.graphId).incr())), + tflock + .negateEdges(partitionedEdges.negate, backgroundIndexingPriority) + .onSuccess(_ => + partitionedEdges.negate.foreach(e => negateCounters(e.graphId).incr())) + ) + .unit + } yield () + + /** + * This operation is called when a user is put into or taken out of + * a state in which their retweets should no longer be visible + * (e.g. suspended or ROPO). + */ + override def setRetweetVisibility(retweetId: TweetId, setVisible: Boolean): Future[Unit] = { + val retweetEdge = Seq(ExecuteEdge(retweetId, RetweetsGraph, None, Reverse)) + + if (setVisible) { + tflock + .insertEdges(retweetEdge, backgroundIndexingPriority) + .onSuccess(_ => insertCounters(RetweetsGraph.id).incr()) + } else { + tflock + .archiveEdges(retweetEdge, backgroundIndexingPriority) + .onSuccess(_ => archiveCounters(RetweetsGraph.id).incr()) + } + } + + private[this] def createEdges(tweet: Tweet, isUndelete: Boolean): Future[Unit] = + for { + edges <- getEdges(tweet = tweet, isCreate = true, isDelete = false, isUndelete = isUndelete) + () <- tflock.insertEdges(edges, foregroundIndexingPriority) + } yield { + // Count all the edges we've successfully added: + edges.foreach(e => insertCounters(e.graphId).incr()) + } + + private[this] def addRTEdges( + tweet: Tweet, + share: Share, + isCreate: Boolean, + edges: ListBuffer[ExecuteEdge[StatusGraph]], + futureEdges: ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]] + ): Unit = { + + edges += RetweetsOfMeGraph.edge(share.sourceUserId, tweet.id) + edges += RetweetsByMeGraph.edge(getUserId(tweet), tweet.id) + edges += RetweetsGraph.edge(share.sourceStatusId, tweet.id) + + if (isCreate) { + edges += ExecuteEdge( + sourceId = getUserId(tweet), + graph = RetweetSourceGraph, + destinationIds = Some(Seq(share.sourceStatusId)), + direction = Forward, + position = Some(SnowflakeId(tweet.id).time.inMillis) + ) + edges.append(TweetsRetweetedGraph.edge(share.sourceUserId, share.sourceStatusId)) + } else { + edges += RetweetSourceGraph.edge(getUserId(tweet), share.sourceStatusId) + + // if this is the last retweet we need to remove it from the source user's + // tweets retweeted graph + futureEdges.append( + tflock.count(RetweetsGraph.from(share.sourceStatusId)).flatMap { count => + if (count <= 1) { + tflock.selectAll(RetweetsGraph.from(share.sourceStatusId)).map { tweets => + if (tweets.size <= 1) + Seq(TweetsRetweetedGraph.edge(share.sourceUserId, share.sourceStatusId)) + else + Nil + } + } else { + FutureNil + } + } + ) + } + } + + private[this] def addReplyEdges( + tweet: Tweet, + edges: ListBuffer[ExecuteEdge[StatusGraph]] + ): Unit = { + getReply(tweet).foreach { reply => + reply.inReplyToStatusId.flatMap { inReplyToStatusId => + edges += RepliesToTweetsGraph.edge(inReplyToStatusId, tweet.id) + + // only index conversationId if this is a reply to another tweet + TweetLenses.conversationId.get(tweet).map { conversationId => + edges += ConversationGraph.edge(conversationId, tweet.id) + } + } + } + } + + private[this] def addDirectedAtEdges( + tweet: Tweet, + edges: ListBuffer[ExecuteEdge[StatusGraph]] + ): Unit = { + TweetLenses.directedAtUser.get(tweet).foreach { directedAtUser => + edges += DirectedAtUserIdGraph.edge(directedAtUser.userId, tweet.id) + } + } + + private[this] def addMentionEdges( + tweet: Tweet, + edges: ListBuffer[ExecuteEdge[StatusGraph]] + ): Unit = { + getMentions(tweet) + .flatMap(_.userId).foreach { mention => + edges += MentionsGraph.edge(mention, tweet.id) + } + } + + private[this] def addQTEdges( + tweet: Tweet, + edges: ListBuffer[ExecuteEdge[StatusGraph]], + futureEdges: ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]], + isCreate: Boolean + ): Unit = { + val userId = getUserId(tweet) + + tweet.quotedTweet.foreach { quotedTweet => + // Regardless of tweet creates/deletes, we add the corresponding edges to the + // following two graphs. Note that we're handling the case for + // the QuotersGraph slightly differently in the tweet delete case. + edges.append(QuotesGraph.edge(quotedTweet.userId, tweet.id)) + edges.append(QuoteTweetsIndexGraph.edge(quotedTweet.tweetId, tweet.id)) + if (isCreate) { + // As mentioned above, for tweet creates we go ahead and add an edge + // to the QuotersGraph without any additional checks. + edges.append(QuotersGraph.edge(quotedTweet.tweetId, userId)) + } else { + // For tweet deletes, we only add an edge to be deleted from the + // QuotersGraph if the tweeting user isn't quoting the tweet anymore + // i.e. if a user has quoted a tweet multiple times, we only delete + // an edge from the QuotersGraph if they've deleted all the quotes, + // otherwise an edge should exist by definition of what the QuotersGraph + // represents. + + // Note: There can be a potential edge case here due to a race condition + // in the following scenario. + // i) A quotes a tweet T twice resulting in tweets T1 and T2. + // ii) There should exist edges in the QuotersGraph from T -> A and T1 <-> T, T2 <-> T in + // the QuoteTweetsIndexGraph, but one of the edges haven't been written + // to the QuoteTweetsIndex graph in TFlock yet. + // iii) In this scenario, we shouldn't really be deleting an edge as we're doing below. + // The approach that we're taking below is a "best effort" approach similar to what we + // currently do for RTs. + + // Find all the quotes of the quoted tweet from the quoting user + val quotesFromQuotingUser = QuoteTweetsIndexGraph + .from(quotedTweet.tweetId) + .intersect(UserTimelineGraph.from(userId)) + futureEdges.append( + tflock + .count(quotesFromQuotingUser).flatMap { count => + // If this is the last quote of the quoted tweet from the quoting user, + // we go ahead and delete the edge from the QuotersGraph. + if (count <= 1) { + tflock.selectAll(quotesFromQuotingUser).map { tweets => + if (tweets.size <= 1) { + Seq(QuotersGraph.edge(quotedTweet.tweetId, userId)) + } else { + Nil + } + } + } else { + FutureNil + } + } + ) + } + } + } + + private[this] def addCardEdges( + tweet: Tweet, + edges: ListBuffer[ExecuteEdge[StatusGraph]] + ): Unit = { + // Note that we are indexing only the TOO "stored" cards + // (cardUri=card://). Rest of the cards are ignored here. + tweet.cardReference + .collect { + case StoredCard(id) => + edges.append(CardTweetsGraph.edge(id, tweet.id)) + }.getOrElse(()) + } + + // Note: on undelete, this method restores all archived edges, including those that may have + // been archived prior to the delete. This is incorrect behavior but in practice rarely + // causes problems, as undeletes are so rare. + private[this] def addEdgesForDeleteOrUndelete( + tweet: Tweet, + edges: ListBuffer[ExecuteEdge[StatusGraph]] + ): Unit = { + edges.appendAll( + Seq( + MentionsGraph.edges(tweet.id, None, Reverse), + RepliesToTweetsGraph.edges(tweet.id, None) + ) + ) + + // When we delete or undelete a conversation control root Tweet we want to archive or restore + // all the edges in InvitedUsersGraph from the Tweet id. + if (hasConversationControl(tweet) && isConversationRoot(tweet)) { + edges.append(InvitedUsersGraph.edges(tweet.id, None)) + } + } + + private[this] def addSimpleEdges( + tweet: Tweet, + edges: ListBuffer[ExecuteEdge[StatusGraph]] + ): Unit = { + if (TweetLenses.nullcast.get(tweet)) { + edges.append(NullcastedTweetsGraph.edge(getUserId(tweet), tweet.id)) + } else if (TweetLenses.narrowcast.get(tweet).isDefined) { + edges.append(NarrowcastSentTweetsGraph.edge(getUserId(tweet), tweet.id)) + } else { + edges.append(UserTimelineGraph.edge(getUserId(tweet), tweet.id)) + + if (hasMedia(tweet)) + edges.append(MediaTimelineGraph.edge(getUserId(tweet), tweet.id)) + + // Index root creator subscription tweets. + // Ignore replies because those are not necessarily visible to a user who subscribes to tweet author + val isRootTweet: Boolean = tweet.coreData match { + case Some(c) => c.reply.isEmpty && c.share.isEmpty + case None => true + } + + if (tweet.exclusiveTweetControl.isDefined && isRootTweet) { + edges.append(CreatorSubscriptionTimelineGraph.edge(getUserId(tweet), tweet.id)) + + if (hasMedia(tweet)) + edges.append(CreatorSubscriptionMediaTimelineGraph.edge(getUserId(tweet), tweet.id)) + } + } + } + + /** + * Issues edges for each mention of user in a conversation-controlled tweet. This way InvitedUsers + * graph accumulates complete set of ids for @mention-invited users, by conversation id. + */ + private def invitedUsersEdgesForCreate( + tweet: Tweet, + edges: ListBuffer[ExecuteEdge[StatusGraph]] + ): Unit = { + val conversationId: Long = getConversationId(tweet).getOrElse(tweet.id) + val mentions: Seq[UserId] = getMentions(tweet).flatMap(_.userId) + edges.appendAll(mentions.map(userId => InvitedUsersGraph.edge(conversationId, userId))) + } + + /** + * Issues edges of InviteUsersGraph that ought to be deleted for a conversation controlled reply. + * These are mentions of users in the given tweet, only if the user was not mentioned elsewhere + * in the conversation. This way for a conversation, InvitedUsersGraph would always hold a set + * of all users invited to the conversation, and an edge is removed only after the last mention of + * a user is deleted. + */ + private def invitedUsersEdgesForDelete( + tweet: Tweet, + futureEdges: ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]] + ): Unit = { + getConversationId(tweet).foreach { conversationId: Long => + val mentions: Seq[UserId] = getMentions(tweet).flatMap(_.userId) + mentions.foreach { userId => + val tweetIdsWithinConversation = ConversationGraph.from(conversationId) + val tweetIdsThatMentionUser = MentionsGraph.from(userId) + futureEdges.append( + tflock + .selectAll( + query = tweetIdsThatMentionUser.intersect(tweetIdsWithinConversation), + limit = Some(2), // Just need to know if it is >1 or <=1, so 2 are enough. + pageSize = None // Provide default, otherwise Mockito complains + ).map { tweetIds: Seq[Long] => + if (tweetIds.size <= 1) { + Seq(InvitedUsersGraph.edge(conversationId, userId)) + } else { + Nil + } + } + ) + } + } + } + + private def hasInviteViaMention(tweet: Tweet): Boolean = { + tweet.conversationControl match { + case Some(ConversationControl.ByInvitation(controls)) => + controls.inviteViaMention.getOrElse(false) + case Some(ConversationControl.Community(controls)) => + controls.inviteViaMention.getOrElse(false) + case Some(ConversationControl.Followers(followers)) => + followers.inviteViaMention.getOrElse(false) + case _ => + false + } + } + + private def hasConversationControl(tweet: Tweet): Boolean = + tweet.conversationControl.isDefined + + // If a Tweet has a ConversationControl, it must have a ConversationId associated with it so we + // can compare the ConversationId with the current Tweet ID to determine if it's the root of the + // conversation. See ConversationIdHydrator for more details + private def isConversationRoot(tweet: Tweet): Boolean = + getConversationId(tweet).get == tweet.id + + private def addInvitedUsersEdges( + tweet: Tweet, + isCreate: Boolean, + isUndelete: Boolean, + edges: ListBuffer[ExecuteEdge[StatusGraph]], + futureEdges: ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]] + ): Unit = { + if (hasConversationControl(tweet)) { + if (isCreate) { + if (isConversationRoot(tweet) && !isUndelete) { + // For root Tweets, only add edges for original creates, not for undeletes. + // Undeletes are handled by addEdgesForDeleteOrUndelete. + invitedUsersEdgesForCreate(tweet, edges) + } + if (!isConversationRoot(tweet) && hasInviteViaMention(tweet)) { + // For replies, only add edges when the conversation control is in inviteViaMention mode. + invitedUsersEdgesForCreate(tweet, edges) + } + } else { + if (!isConversationRoot(tweet)) { + invitedUsersEdgesForDelete(tweet, futureEdges) + } + } + } + } + + private[this] def getEdges( + tweet: Tweet, + isCreate: Boolean, + isDelete: Boolean, + isUndelete: Boolean + ): Future[Seq[ExecuteEdge[StatusGraph]]] = { + val edges = ListBuffer[ExecuteEdge[StatusGraph]]() + val futureEdges = ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]]() + + addSimpleEdges(tweet, edges) + getShare(tweet) match { + case Some(share) => addRTEdges(tweet, share, isCreate, edges, futureEdges) + case _ => + addInvitedUsersEdges(tweet, isCreate, isUndelete, edges, futureEdges) + addReplyEdges(tweet, edges) + addDirectedAtEdges(tweet, edges) + addMentionEdges(tweet, edges) + addQTEdges(tweet, edges, futureEdges, isCreate) + addCardEdges(tweet, edges) + if (isDelete || isUndelete) { + addEdgesForDeleteOrUndelete(tweet, edges) + } + } + + Future + .collect(futureEdges) + .map { moreEdges => (edges ++= moreEdges.flatten).toList } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TweetIndexer.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TweetIndexer.scala new file mode 100644 index 000000000..9145a4362 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TweetIndexer.scala @@ -0,0 +1,30 @@ +/** Copyright 2010 Twitter, Inc. */ +package com.twitter.tweetypie +package tflock + +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.util.Future + +trait TweetIndexer { + + /** + * Called at tweet-creation time, this method should set up all relevant indices on the tweet. + */ + def createIndex(tweet: Tweet): Future[Unit] = Future.Unit + + /** + * Called at tweet-undelete time (which isn't yet handled), this method should + * restore all relevant indices on the tweet. + */ + def undeleteIndex(tweet: Tweet): Future[Unit] = Future.Unit + + /** + * Called at tweet-delete time, this method should archive all relevant indices on the tweet. + */ + def deleteIndex(tweet: Tweet, isBounceDelete: Boolean): Future[Unit] = Future.Unit + + /** + * This method should archive or unarchive the retweet edge in TFlock RetweetsGraph. + */ + def setRetweetVisibility(retweetId: TweetId, visible: Boolean): Future[Unit] = Future.Unit +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/BUILD new file mode 100644 index 000000000..c7ad2b832 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/BUILD @@ -0,0 +1,13 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle/finagle-core/src/main", + "scrooge/scrooge-core/src/main/scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "util/util-core:scala", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/NotImplementedTweetService.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/NotImplementedTweetService.scala new file mode 100644 index 000000000..f450abd15 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/NotImplementedTweetService.scala @@ -0,0 +1,8 @@ +package com.twitter.tweetypie.thriftscala + +import com.twitter.finagle.service.FailedService + +class NotImplementedTweetService + extends TweetService$FinagleClient( + new FailedService(new UnsupportedOperationException("not implemented")) + ) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/TweetServiceProxy.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/TweetServiceProxy.scala new file mode 100644 index 000000000..df3ca4362 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/TweetServiceProxy.scala @@ -0,0 +1,79 @@ +package com.twitter.tweetypie.thriftscala + +import com.twitter.util.Future + +/** + * A trait for TweetService implementations that wrap an underlying + * TweetService and need to modify only some of the methods. + */ +trait TweetServiceProxy extends TweetService.MethodPerEndpoint { + protected def underlying: TweetService.MethodPerEndpoint + + /** + * Default implementation simply passes through the Future but logic can be added to wrap each + * invocation to the underlying TweetService + */ + protected def wrap[A](f: => Future[A]): Future[A] = + f + + override def getTweets(request: GetTweetsRequest): Future[Seq[GetTweetResult]] = + wrap(underlying.getTweets(request)) + + override def getTweetFields(request: GetTweetFieldsRequest): Future[Seq[GetTweetFieldsResult]] = + wrap(underlying.getTweetFields(request)) + + override def getTweetCounts(request: GetTweetCountsRequest): Future[Seq[GetTweetCountsResult]] = + wrap(underlying.getTweetCounts(request)) + + override def setAdditionalFields(request: SetAdditionalFieldsRequest): Future[Unit] = + wrap(underlying.setAdditionalFields(request)) + + override def deleteAdditionalFields(request: DeleteAdditionalFieldsRequest): Future[Unit] = + wrap(underlying.deleteAdditionalFields(request)) + + override def postTweet(request: PostTweetRequest): Future[PostTweetResult] = + wrap(underlying.postTweet(request)) + + override def postRetweet(request: RetweetRequest): Future[PostTweetResult] = + wrap(underlying.postRetweet(request)) + + override def unretweet(request: UnretweetRequest): Future[UnretweetResult] = + wrap(underlying.unretweet(request)) + + override def getDeletedTweets( + request: GetDeletedTweetsRequest + ): Future[Seq[GetDeletedTweetResult]] = + wrap(underlying.getDeletedTweets(request)) + + override def deleteTweets(request: DeleteTweetsRequest): Future[Seq[DeleteTweetResult]] = + wrap(underlying.deleteTweets(request)) + + override def updatePossiblySensitiveTweet( + request: UpdatePossiblySensitiveTweetRequest + ): Future[Unit] = + wrap(underlying.updatePossiblySensitiveTweet(request)) + + override def undeleteTweet(request: UndeleteTweetRequest): Future[UndeleteTweetResponse] = + wrap(underlying.undeleteTweet(request)) + + override def eraseUserTweets(request: EraseUserTweetsRequest): Future[Unit] = + wrap(underlying.eraseUserTweets(request)) + + override def incrTweetFavCount(request: IncrTweetFavCountRequest): Future[Unit] = + wrap(underlying.incrTweetFavCount(request)) + + override def deleteLocationData(request: DeleteLocationDataRequest): Future[Unit] = + wrap(underlying.deleteLocationData(request)) + + override def scrubGeo(request: GeoScrub): Future[Unit] = + wrap(underlying.scrubGeo(request)) + + override def takedown(request: TakedownRequest): Future[Unit] = + wrap(underlying.takedown(request)) + + override def flush(request: FlushRequest): Future[Unit] = + wrap(underlying.flush(request)) + + override def incrTweetBookmarkCount(request: IncrTweetBookmarkCountRequest): Future[Unit] = + wrap(underlying.incrTweetBookmarkCount(request)) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/BUILD new file mode 100644 index 000000000..ff66fe5b2 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/BUILD @@ -0,0 +1,15 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "tweetypie/servo/util", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "tco-util", + "tweetypie/common/src/scala/com/twitter/tweetypie/tweettext", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "twitter-text/lib/java/src/main/java/com/twitter/twittertext", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/CashtagTextEntity.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/CashtagTextEntity.scala new file mode 100644 index 000000000..09c0941ec --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/CashtagTextEntity.scala @@ -0,0 +1,11 @@ +package com.twitter.tweetypie.thriftscala.entities + +import com.twitter.tweetypie.thriftscala.CashtagEntity +import com.twitter.tweetypie.tweettext.TextEntity + +object CashtagTextEntity extends TextEntity[CashtagEntity] { + override def fromIndex(entity: CashtagEntity): Short = entity.fromIndex + override def toIndex(entity: CashtagEntity): Short = entity.toIndex + override def move(entity: CashtagEntity, fromIndex: Short, toIndex: Short): CashtagEntity = + entity.copy(fromIndex = fromIndex, toIndex = toIndex) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/EntityExtractor.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/EntityExtractor.scala new file mode 100644 index 000000000..c9d7b30bc --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/EntityExtractor.scala @@ -0,0 +1,118 @@ +package com.twitter.tweetypie.thriftscala.entities + +import com.twitter.servo.data.Mutation +import com.twitter.tco_util.TcoUrl +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.thriftscala.entities.Implicits._ +import com.twitter.tweetypie.tweettext.PartialHtmlEncoding +import com.twitter.tweetypie.tweettext.TextEntity +import com.twitter.tweetypie.tweettext.TextModification +import com.twitter.tweetypie.util.TweetLenses +import com.twitter.twittertext.Extractor +import scala.collection.JavaConverters._ + +/** + * Contains functions to collect urls, mentions, hashtags, and cashtags from the text of tweets and messages + */ +object EntityExtractor { + // We only use one configuration of com.twitter.twittertext.Extractor, so it's + // OK to share one global reference. The only available + // configuration option is whether to extract URLs without protocols + // (defaults to true) + private[this] val extractor = new Extractor + + // The twitter-text library operates on unencoded text, but we store + // and process HTML-encoded text. The TextModification returned + // from this function contains the decoded text which we will operate on, + // but also provides us with the ability to map the indices on + // the twitter-text entities back to the entities on the encoded text. + private val htmlEncodedTextToEncodeModification: String => TextModification = + text => + PartialHtmlEncoding + .decodeWithModification(text) + .getOrElse(TextModification.identity(text)) + .inverse + + private[this] val extractAllUrlsFromTextMod: TextModification => Seq[UrlEntity] = + extractUrls(false) + + val extractAllUrls: String => Seq[UrlEntity] = + htmlEncodedTextToEncodeModification.andThen(extractAllUrlsFromTextMod) + + private[this] val extractTcoUrls: TextModification => Seq[UrlEntity] = + extractUrls(true) + + private[this] def extractUrls(tcoOnly: Boolean): TextModification => Seq[UrlEntity] = + mkEntityExtractor[UrlEntity]( + extractor.extractURLsWithIndices(_).asScala.filter { e => + if (tcoOnly) TcoUrl.isTcoUrl(e.getValue) else true + }, + UrlEntity(_, _, _) + ) + + private[this] val extractMentionsFromTextMod: TextModification => Seq[MentionEntity] = + mkEntityExtractor[MentionEntity]( + extractor.extractMentionedScreennamesWithIndices(_).asScala, + MentionEntity(_, _, _) + ) + + val extractMentions: String => Seq[MentionEntity] = + htmlEncodedTextToEncodeModification.andThen(extractMentionsFromTextMod) + + private[this] val extractHashtagsFromTextMod: TextModification => Seq[HashtagEntity] = + mkEntityExtractor[HashtagEntity]( + extractor.extractHashtagsWithIndices(_).asScala, + HashtagEntity(_, _, _) + ) + + val extractHashtags: String => Seq[HashtagEntity] = + htmlEncodedTextToEncodeModification.andThen(extractHashtagsFromTextMod) + + private[this] val extractCashtagsFromTextMod: TextModification => Seq[CashtagEntity] = + mkEntityExtractor[CashtagEntity]( + extractor.extractCashtagsWithIndices(_).asScala, + CashtagEntity(_, _, _) + ) + + val extractCashtags: String => Seq[CashtagEntity] = + htmlEncodedTextToEncodeModification.andThen(extractCashtagsFromTextMod) + + private[this] def mkEntityExtractor[E: TextEntity]( + extract: String => Seq[Extractor.Entity], + construct: (Short, Short, String) => E + ): TextModification => Seq[E] = + htmlEncodedMod => { + val convert: Extractor.Entity => Option[E] = + e => + for { + start <- asShort(e.getStart.intValue) + end <- asShort(e.getEnd.intValue) + if e.getValue != null + res <- htmlEncodedMod.reindexEntity(construct(start, end, e.getValue)) + } yield res + + val entities = extract(htmlEncodedMod.original) + extractor.modifyIndicesFromUTF16ToUnicode(htmlEncodedMod.original, entities.asJava) + entities.map(convert).flatten + } + + private[this] def asShort(i: Int): Option[Short] = + if (i.isValidShort) Some(i.toShort) else None + + private[this] def mutation(extractUrls: Boolean): Mutation[Tweet] = + Mutation { tweet => + val htmlEncodedMod = htmlEncodedTextToEncodeModification(TweetLenses.text.get(tweet)) + + Some( + tweet.copy( + urls = if (extractUrls) Some(extractTcoUrls(htmlEncodedMod)) else tweet.urls, + mentions = Some(extractMentionsFromTextMod(htmlEncodedMod)), + hashtags = Some(extractHashtagsFromTextMod(htmlEncodedMod)), + cashtags = Some(extractCashtagsFromTextMod(htmlEncodedMod)) + ) + ) + } + + val mutationWithoutUrls: Mutation[Tweet] = mutation(false) + val mutationAll: Mutation[Tweet] = mutation(true) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/HashtagTextEntity.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/HashtagTextEntity.scala new file mode 100644 index 000000000..4ba86ebc8 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/HashtagTextEntity.scala @@ -0,0 +1,11 @@ +package com.twitter.tweetypie.thriftscala.entities + +import com.twitter.tweetypie.thriftscala.HashtagEntity +import com.twitter.tweetypie.tweettext.TextEntity + +object HashtagTextEntity extends TextEntity[HashtagEntity] { + override def fromIndex(entity: HashtagEntity): Short = entity.fromIndex + override def toIndex(entity: HashtagEntity): Short = entity.toIndex + override def move(entity: HashtagEntity, fromIndex: Short, toIndex: Short): HashtagEntity = + entity.copy(fromIndex = fromIndex, toIndex = toIndex) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/Implicits.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/Implicits.scala new file mode 100644 index 000000000..a68595dee --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/Implicits.scala @@ -0,0 +1,10 @@ +package com.twitter.tweetypie.thriftscala.entities + +object Implicits { + implicit val hashtagTextEntity: HashtagTextEntity.type = HashtagTextEntity + implicit val cashtagTextEntity: CashtagTextEntity.type = CashtagTextEntity + implicit val mentionTextEntity: MentionTextEntity.type = MentionTextEntity + implicit val urlTextEntity: UrlTextEntity.type = UrlTextEntity + implicit val mediaTextEntity: MediaTextEntity.type = MediaTextEntity + implicit val textRangeTextEntity: TextRangeEntityAdapter.type = TextRangeEntityAdapter +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MediaTextEntity.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MediaTextEntity.scala new file mode 100644 index 000000000..45c145399 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MediaTextEntity.scala @@ -0,0 +1,11 @@ +package com.twitter.tweetypie.thriftscala.entities + +import com.twitter.tweetypie.thriftscala.MediaEntity +import com.twitter.tweetypie.tweettext.TextEntity + +object MediaTextEntity extends TextEntity[MediaEntity] { + override def fromIndex(entity: MediaEntity): Short = entity.fromIndex + override def toIndex(entity: MediaEntity): Short = entity.toIndex + override def move(entity: MediaEntity, fromIndex: Short, toIndex: Short): MediaEntity = + entity.copy(fromIndex = fromIndex, toIndex = toIndex) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MentionTextEntity.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MentionTextEntity.scala new file mode 100644 index 000000000..f4ce11a43 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MentionTextEntity.scala @@ -0,0 +1,11 @@ +package com.twitter.tweetypie.thriftscala.entities + +import com.twitter.tweetypie.thriftscala.MentionEntity +import com.twitter.tweetypie.tweettext.TextEntity + +object MentionTextEntity extends TextEntity[MentionEntity] { + override def fromIndex(entity: MentionEntity): Short = entity.fromIndex + override def toIndex(entity: MentionEntity): Short = entity.toIndex + override def move(entity: MentionEntity, fromIndex: Short, toIndex: Short): MentionEntity = + entity.copy(fromIndex = fromIndex, toIndex = toIndex) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/TextRangeEntityAdapter.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/TextRangeEntityAdapter.scala new file mode 100644 index 000000000..a0dd5be79 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/TextRangeEntityAdapter.scala @@ -0,0 +1,11 @@ +package com.twitter.tweetypie.thriftscala.entities + +import com.twitter.tweetypie.thriftscala.TextRange +import com.twitter.tweetypie.tweettext.TextEntity + +object TextRangeEntityAdapter extends TextEntity[TextRange] { + override def fromIndex(entity: TextRange): Short = entity.fromIndex.toShort + override def toIndex(entity: TextRange): Short = entity.toIndex.toShort + override def move(entity: TextRange, fromIndex: Short, toIndex: Short): TextRange = + entity.copy(fromIndex = fromIndex, toIndex = toIndex) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/UrlTextEntity.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/UrlTextEntity.scala new file mode 100644 index 000000000..8ab52747a --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/UrlTextEntity.scala @@ -0,0 +1,11 @@ +package com.twitter.tweetypie.thriftscala.entities + +import com.twitter.tweetypie.thriftscala.UrlEntity +import com.twitter.tweetypie.tweettext.TextEntity + +object UrlTextEntity extends TextEntity[UrlEntity] { + override def fromIndex(entity: UrlEntity): Short = entity.fromIndex + override def toIndex(entity: UrlEntity): Short = entity.toIndex + override def move(entity: UrlEntity, fromIndex: Short, toIndex: Short): UrlEntity = + entity.copy(fromIndex = fromIndex, toIndex = toIndex) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/BUILD new file mode 100644 index 000000000..0fb3b965a --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/BUILD @@ -0,0 +1,16 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + provides = scala_artifact( + org = "com.twitter", + name = "tweetypie-tweettext", + repo = artifactory, + ), + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/ibm/icu:icu4j", + "twitter-text/lib/java/src/main/java/com/twitter/twittertext", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/GraphemeIndexIterator.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/GraphemeIndexIterator.scala new file mode 100644 index 000000000..e24076f55 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/GraphemeIndexIterator.scala @@ -0,0 +1,44 @@ +package com.twitter.tweetypie.tweettext + +import com.ibm.icu.text.BreakIterator + +/** + * Adapt the [[BreakIterator]] interface to a scala [[Iterator]] + * over the offsets of user-perceived characters in a String. + */ +object GraphemeIndexIterator { + + /** + * Produce an iterator over indices in the string that mark the end + * of a user-perceived character (grapheme) + */ + def ends(s: String): Iterator[Offset.CodeUnit] = + // The start of every grapheme but the first is also a grapheme + // end. The last grapheme ends at the end of the string. + starts(s).drop(1) ++ Iterator(Offset.CodeUnit.length(s)) + + /** + * Produce an iterator over indices in the string that mark the start + * of a user-perceived character (grapheme) + */ + def starts(s: String): Iterator[Offset.CodeUnit] = + new Iterator[Offset.CodeUnit] { + private[this] val it = BreakIterator.getCharacterInstance() + + it.setText(s) + + override def hasNext: Boolean = it.current < s.length + + override def next: Offset.CodeUnit = { + if (!hasNext) throw new IllegalArgumentException(s"${it.current()}, ${s.length}") + + // No matter what, we will be returning the value of `current`, + // which is the index of the start of the next grapheme. + val result = it.current() + + it.next() + + Offset.CodeUnit(result) + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/IndexConverter.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/IndexConverter.scala new file mode 100644 index 000000000..6a4cb0f5a --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/IndexConverter.scala @@ -0,0 +1,85 @@ +package com.twitter.tweetypie.tweettext + +/** + * An efficient converter of indices between code points and code units. + */ +class IndexConverter(text: String) { + // Keep track of a single corresponding pair of code unit and code point + // offsets so that we can re-use counting work if the next requested + // entity is near the most recent entity. + private var codePointIndex = 0 + // The code unit index should never split a surrogate pair. + private var charIndex = 0 + + /** + * @param offset Index into the string measured in code units. + * @return The code point index that corresponds to the specified character index. + */ + def toCodePoints(offset: Offset.CodeUnit): Offset.CodePoint = + Offset.CodePoint(codeUnitsToCodePoints(offset.toInt)) + + /** + * @param charIndex Index into the string measured in code units. + * @return The code point index that corresponds to the specified character index. + */ + def codeUnitsToCodePoints(charIndex: Int): Int = { + if (charIndex < this.charIndex) { + this.codePointIndex -= text.codePointCount(charIndex, this.charIndex) + } else { + this.codePointIndex += text.codePointCount(this.charIndex, charIndex) + } + this.charIndex = charIndex + + // Make sure that charIndex never points to the second code unit of a + // surrogate pair. + if (charIndex > 0 && Character.isSupplementaryCodePoint(text.codePointAt(charIndex - 1))) { + this.charIndex -= 1 + this.codePointIndex -= 1 + } + + this.codePointIndex + } + + /** + * @param offset Index into the string measured in code points. + * @return the corresponding code unit index + */ + def toCodeUnits(offset: Offset.CodePoint): Offset.CodeUnit = { + this.charIndex = text.offsetByCodePoints(charIndex, offset.toInt - this.codePointIndex) + this.codePointIndex = offset.toInt + Offset.CodeUnit(this.charIndex) + } + + /** + * @param codePointIndex Index into the string measured in code points. + * @return the corresponding code unit index + */ + def codePointsToCodeUnits(codePointIndex: Int): Int = + toCodeUnits(Offset.CodePoint(codePointIndex)).toInt + + /** + * Returns a substring which begins at the specified code point `from` and extends to the + * code point `to`. Since String.substring only works with character, the method first + * converts code point offset to code unit offset. + */ + def substring(from: Offset.CodePoint, to: Offset.CodePoint): String = + text.substring(toCodeUnits(from).toInt, toCodeUnits(to).toInt) + + /** + * Returns a substring which begins at the specified code point `from` and extends to the + * code point `to`. Since String.substring only works with character, the method first + * converts code point offset to code unit offset. + */ + def substringByCodePoints(from: Int, to: Int): String = + substring(Offset.CodePoint(from), Offset.CodePoint(to)) + + /** + * Returns a substring which begins at the specified code point `from` and extends to the + * end of the string. Since String.substring only works with character, the method first + * converts code point offset to code unit offset. + */ + def substringByCodePoints(from: Int): String = { + val charFrom = codePointsToCodeUnits(from) + text.substring(charFrom) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Offset.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Offset.scala new file mode 100644 index 000000000..119458643 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Offset.scala @@ -0,0 +1,253 @@ +package com.twitter.tweetypie.tweettext +import scala.collection.immutable + +/** + * An Offset is a typed index into a String. + */ +trait Offset[T] extends Ordering[T] { + def toInt(t: T): Int + def count(text: String, start: Offset.CodeUnit, end: Offset.CodeUnit): T + + def compare(t1: T, t2: T): Int = toInt(t1).compare(toInt(t2)) + def length(input: String): T = count(input, Offset.CodeUnit(0), Offset.CodeUnit.length(input)) +} + +object Offset { + + /** + * UTF-16 code unit offsets are the native offsets for Java/Scala + * Strings. + */ + case class CodeUnit(toInt: Int) extends AnyVal with Ordered[CodeUnit] { + def compare(other: CodeUnit): Int = toInt.compare(other.toInt) + def +(other: CodeUnit) = CodeUnit(toInt + other.toInt) + def -(other: CodeUnit) = CodeUnit(toInt - other.toInt) + def min(other: CodeUnit): CodeUnit = if (toInt < other.toInt) this else other + def max(other: CodeUnit): CodeUnit = if (toInt > other.toInt) this else other + def incr: CodeUnit = CodeUnit(toInt + 1) + def decr: CodeUnit = CodeUnit(toInt - 1) + def until(end: CodeUnit): immutable.IndexedSeq[CodeUnit] = + toInt.until(end.toInt).map(CodeUnit(_)) + + /** + * Converts this `CodeUnit` to the equivalent `CodePoint` within the + * given text. + */ + def toCodePoint(text: String): CodePoint = + CodePoint(text.codePointCount(0, toInt)) + + def offsetByCodePoints(text: String, codePoints: CodePoint): CodeUnit = + CodeUnit(text.offsetByCodePoints(toInt, codePoints.toInt)) + } + + implicit object CodeUnit extends Offset[CodeUnit] { + def toInt(u: CodeUnit): Int = u.toInt + override def length(text: String): CodeUnit = CodeUnit(text.length) + def count(text: String, start: CodeUnit, end: CodeUnit): CodeUnit = end - start + } + + /** + * Offsets in whole Unicode code points. Any CodePoint is a valid + * offset into the String as long as it is >= 0 and less than the + * number of code points in the string. + */ + case class CodePoint(toInt: Int) extends AnyVal with Ordered[CodePoint] { + def toShort: Short = toInt.toShort + def compare(other: CodePoint): Int = toInt.compare(other.toInt) + def +(other: CodePoint) = CodePoint(toInt + other.toInt) + def -(other: CodePoint) = CodePoint(toInt - other.toInt) + def min(other: CodePoint): CodePoint = if (toInt < other.toInt) this else other + def max(other: CodePoint): CodePoint = if (toInt > other.toInt) this else other + def until(end: CodePoint): immutable.IndexedSeq[CodePoint] = + toInt.until(end.toInt).map(CodePoint(_)) + + def toCodeUnit(text: String): CodeUnit = + CodeUnit(text.offsetByCodePoints(0, toInt)) + } + + implicit object CodePoint extends Offset[CodePoint] { + def toInt(p: CodePoint): Int = p.toInt + + def count(text: String, start: CodeUnit, end: CodeUnit): CodePoint = + CodePoint(text.codePointCount(start.toInt, end.toInt)) + } + + /** + * Offsets into the String as if the String were encoded as UTF-8. You + * cannot use a [[Utf8]] offset to index a String, because not all + * Utf8 indices are valid indices into the String. + */ + case class Utf8(toInt: Int) extends AnyVal with Ordered[Utf8] { + def compare(other: Utf8): Int = toInt.compare(other.toInt) + def +(other: Utf8) = Utf8(toInt + other.toInt) + def -(other: Utf8) = Utf8(toInt - other.toInt) + def min(other: Utf8): Utf8 = if (toInt < other.toInt) this else other + def max(other: Utf8): Utf8 = if (toInt > other.toInt) this else other + } + + implicit object Utf8 extends Offset[Utf8] { + def toInt(u: Utf8): Int = u.toInt + + /** + * Count how many bytes this section of text would be when encoded as + * UTF-8. + */ + def count(s: String, start: CodeUnit, end: CodeUnit): Utf8 = { + def go(i: CodeUnit, byteLength: Utf8): Utf8 = + if (i < end) { + val cp = s.codePointAt(i.toInt) + go(i + CodeUnit(Character.charCount(cp)), byteLength + forCodePoint(cp)) + } else { + byteLength + } + + go(start, Utf8(0)) + } + + /** + * Unfortunately, there is no convenient API for finding out how many + * bytes a unicode code point would take in UTF-8, so we have to + * explicitly calculate it. + * + * @see http://en.wikipedia.org/wiki/UTF-8#Description + */ + def forCodePoint(cp: Int): Utf8 = + Utf8 { + // if the code point is an unpaired surrogate, it will be converted + // into a 1 byte replacement character + if (Character.getType(cp) == Character.SURROGATE) 1 + else { + cp match { + case _ if cp < 0x80 => 1 + case _ if cp < 0x800 => 2 + case _ if cp < 0x10000 => 3 + case _ => 4 + } + } + } + } + + /** + * Display units count what we consider a "character" in a + * Tweet. [[DisplayUnit]] offsets are only valid for text that is + * NFC-normalized (See: http://www.unicode.org/reports/tr15) and + * HTML-encoded, though this interface cannot enforce that. + * + * Currently, a [[DisplayUnit]] is equivalent to a single Unicode code + * point combined with treating "<", ">", and "&" each as a + * single character (since they are displayed as '<', '>', and '&' + * respectively). This implementation is not directly exposed. + * + * It should be possible to change this definition without breaking + * code that uses the [[DisplayUnit]] interface e.g. to count + * user-perceived characters (graphemes) rather than code points, + * though any change has to be made in concert with changing the + * mobile client and Web implementations so that the user experience + * of character counting remains consistent. + */ + case class DisplayUnit(toInt: Int) extends AnyVal with Ordered[DisplayUnit] { + def compare(other: DisplayUnit): Int = toInt.compare(other.toInt) + def +(other: DisplayUnit) = DisplayUnit(toInt + other.toInt) + def -(other: DisplayUnit) = DisplayUnit(toInt - other.toInt) + def min(other: DisplayUnit): DisplayUnit = if (toInt < other.toInt) this else other + def max(other: DisplayUnit): DisplayUnit = if (toInt > other.toInt) this else other + } + + implicit object DisplayUnit extends Offset[DisplayUnit] { + def toInt(d: DisplayUnit): Int = d.toInt + + /** + * Returns the number of display units in the specified range of the + * given text. See [[DisplayUnit]] for a descrption of what we + * consider a display unit. + * + * The input string should already be NFC normalized to get + * consistent results. If partially html encoded, it will correctly + * count html entities as a single display unit. + * + * @param text the string containing the characters to count. + * @param the index to the first char of the text range + * @param the index after the last char of the text range. + */ + def count(text: String, start: CodeUnit, end: CodeUnit): DisplayUnit = { + val stop = end.min(CodeUnit.length(text)) + + @annotation.tailrec + def go(offset: CodeUnit, total: DisplayUnit): DisplayUnit = + if (offset >= stop) total + else go(offset + at(text, offset), total + DisplayUnit(1)) + + go(start, DisplayUnit(0)) + } + + /** + * Return the length of the display unit at the specified offset in + * the (NFC-normalized, HTML-encoded) text. + */ + def at(text: String, offset: CodeUnit): CodeUnit = + CodeUnit { + text.codePointAt(offset.toInt) match { + case '&' => + if (text.regionMatches(offset.toInt, "&", 0, 5)) 5 + else if (text.regionMatches(offset.toInt, "<", 0, 4)) 4 + else if (text.regionMatches(offset.toInt, ">", 0, 4)) 4 + else 1 + + case cp => Character.charCount(cp) + } + } + } + + /** + * Ranges of offsets, useful for avoiding slicing entities. + */ + sealed trait Ranges[T] { + def contains(t: T): Boolean + } + + object Ranges { + private[this] case class Impl[T](toSeq: Seq[(T, T)])(implicit off: Offset[T]) + extends Ranges[T] { + def contains(t: T): Boolean = toSeq.exists { case (lo, hi) => off.gt(t, lo) && off.lt(t, hi) } + } + + /** + * Non-inclusive range of offsets (matches values that are strictly + * between `hi` and `lo`) + */ + def between[T](lo: T, hi: T)(implicit off: Offset[T]): Ranges[T] = + if (off.toInt(hi) > off.toInt(lo) + 1 && off.toInt(lo) < Int.MaxValue) Impl(Seq((lo, hi))) + else Impl(Nil) + + /** + * The union of all of the specified ranges. + */ + def all[T](ranges: Seq[Ranges[T]])(implicit off: Offset[T]): Ranges[T] = + Impl( + // Preprocess the ranges so that each contains check is as cheap + // as possible. + ranges + .flatMap { case r: Impl[T] => r.toSeq } + .sortBy(_._1) + .foldLeft(Nil: List[(T, T)]) { + case ((a, b) :: out, (c, d)) if off.lt(c, b) => (a, d) :: out + case (out, r) => r :: out + } + ) + + def Empty[T: Offset]: Ranges[T] = Impl[T](Nil) + + private[this] val HtmlEscapes = """&(?:amp|lt|gt);""".r + + /** + * Match [[CodeUnit]]s that would split a HTML entity. + */ + def htmlEntities(s: String): Ranges[CodeUnit] = { + val it = HtmlEscapes.findAllIn(s) + all(it.map(_ => between(CodeUnit(it.start), CodeUnit(it.end))).toSeq) + } + + def fromCodePointPairs(pairs: Seq[(Int, Int)]): Ranges[CodePoint] = + all(pairs.map { case (lo, hi) => between(CodePoint(lo), CodePoint(hi)) }) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/PartialHtmlEncoding.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/PartialHtmlEncoding.scala new file mode 100644 index 000000000..7f1f338c3 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/PartialHtmlEncoding.scala @@ -0,0 +1,55 @@ +package com.twitter.tweetypie.tweettext + +/** + * Code used to convert raw user-provided text into an allowable form. + */ +object PartialHtmlEncoding { + + /** + * Replaces all `<`, `>`, and '&' chars with "<", ">", and "&", respectively. + * + * Tweet text is HTML-encoded at tweet creation time, and is stored and processed in encoded form. + */ + def encode(text: String): String = { + val buf = new StringBuilder + + text.foreach { + case '<' => buf.append("<") + case '>' => buf.append(">") + case '&' => buf.append("&") + case c => buf.append(c) + } + + buf.toString + } + + private val AmpLtRegex = "<".r + private val AmpGtRegex = ">".r + private val AmpAmpRegex = "&".r + + private val partialHtmlDecoder: (String => String) = + ((s: String) => AmpLtRegex.replaceAllIn(s, "<")) + .andThen(s => AmpGtRegex.replaceAllIn(s, ">")) + .andThen(s => AmpAmpRegex.replaceAllIn(s, "&")) + + /** + * The opposite of encode, it replaces all "<", ">", and "&" with + * `<`, `>`, and '&', respectively. + */ + def decode(text: String): String = + decodeWithModification(text) match { + case Some(mod) => mod.updated + case None => text + } + + /** + * Decodes encoded entities, and returns a `TextModification` if the text was modified. + */ + def decodeWithModification(text: String): Option[TextModification] = + TextModification.replaceAll( + text, + AmpLtRegex -> "<", + AmpGtRegex -> ">", + AmpAmpRegex -> "&" + ) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Preprocessor.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Preprocessor.scala new file mode 100644 index 000000000..0e5c06915 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Preprocessor.scala @@ -0,0 +1,251 @@ +package com.twitter.tweetypie.tweettext +import scala.util.matching.Regex + +/** + * Code used to convert raw user-provided text into an allowable form. + */ +object Preprocessor { + import TweetText._ + import TextModification.replaceAll + + /** + * Regex for dos-style line endings. + */ + val DosLineEndingRegex: Regex = """\r\n""".r + + /** + * Converts \r\n to just \n. + */ + def normalizeNewlines(text: String): String = + DosLineEndingRegex.replaceAllIn(text, "\n") + + /** + * Characters to strip out of tweet text at write-time. + */ + val unicodeCharsToStrip: Seq[Char] = + Seq( + '\uFFFE', '\uFEFF', // BOM + '\uFFFF', // Special + '\u200E', '\u200F', // ltr, rtl + '\u202A', '\u202B', '\u202C', '\u202D', '\u202E', // Directional change + '\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', '\u0007', '\u0008', + '\u0009', '\u000B', '\u000C', '\u000E', '\u000F', '\u0010', '\u0011', '\u0012', '\u0013', + '\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019', '\u001A', '\u001B', '\u001C', + '\u001D', '\u001E', '\u001F', '\u007F', + '\u2065', + ) + + val UnicodeCharsToStripRegex: Regex = unicodeCharsToStrip.mkString("[", "", "]").r + + /** + * Strips out control characters and other non-textual unicode chars that can break xml and/or + * json rendering, or be used for exploits. + */ + def stripControlCharacters(text: String): String = + UnicodeCharsToStripRegex.replaceAllIn(text, "") + + val Tweetypie674UnicodeSequence: String = + "\u0633\u0645\u064e\u0640\u064e\u0651\u0648\u064f\u0648\u064f\u062d\u062e " + + "\u0337\u0334\u0310\u062e \u0337\u0334\u0310\u062e \u0337\u0334\u0310\u062e " + + "\u0627\u0645\u0627\u0631\u062a\u064a\u062e \u0337\u0334\u0310\u062e" + + val Tweetypie674UnicodeRegex: Regex = Tweetypie674UnicodeSequence.r + + /** + * Replace each `Tweetypie674UnicodeSequence` of this string to REPLACEMENT + * CHARACTER. + * + * Apple has a bug in its CoreText library. This aims to prevent + * ios clients from being crashed when a tweet contains the specific + * unicode sequence. + */ + def avoidCoreTextBug(text: String): String = + Tweetypie674UnicodeRegex.replaceAllIn(text, "\ufffd") + + /** + * Replace each `Tweetypie674UnicodeSequence` of this string to a REPLACEMENT + * CHARACTER, returns a TextModification object that provides information + * to also update entity indices. + */ + def replaceCoreTextBugModification(text: String): Option[TextModification] = + replaceAll(text, Tweetypie674UnicodeRegex, "\ufffd") + + private val preprocessor: String => String = + ((s: String) => nfcNormalize(s)) + .andThen(stripControlCharacters _) + .andThen(trimBlankCharacters _) + .andThen(normalizeNewlines _) + .andThen(collapseBlankLines _) + .andThen(avoidCoreTextBug _) + + /** + * Performs the text modifications that are necessary in the write-path before extracting URLs. + */ + def preprocessText(text: String): String = + preprocessor(text) + + /** + * Replaces all `<`, `>`, and '&' chars with "<", ">", and "&", respectively. + * + * The original purpose of this was presumably to prevent script injections when + * displaying tweets without proper escaping. Currently, tweets are encoded before + * they are stored in the database. + * + * Note that the pre-escaping of & < and > also happens in the rich text editor in javascript + */ + def partialHtmlEncode(text: String): String = + PartialHtmlEncoding.encode(text) + + /** + * The opposite of partialHtmlEncode, it replaces all "<", ">", and "&" with + * `<`, `>`, and '&', respectively. + */ + def partialHtmlDecode(text: String): String = + PartialHtmlEncoding.decode(text) + + /** + * + * Detects all forms of whitespace, considering as whitespace the following: + * This regex detects characters that always or often are rendered as blank space. We use + * this to prevent users from inserting excess blank lines and from tweeting effectively + * blank tweets. + * + * Note that these are not all semantically "whitespace", so this regex should not be used + * to process non-blank text, e.g. to separate words. + * + * Codepoints below and the `\p{Z}` regex character property alias are defined in the Unicode + * Character Database (UCD) at https://unicode.org/ucd/ and https://unicode.org/reports/tr44/ + * + * The `\p{Z}` regex character property alias is defined specifically in UCD as: + * + * Zs | Space_Separator | a space character (of various non-zero widths) + * Zl | Line_Separator | U+2028 LINE SEPARATOR only + * Zp | Paragraph_Separator | U+2029 PARAGRAPH SEPARATOR only + * Z | Separator | Zs | Zl | Zp + * ref: https://unicode.org/reports/tr44/#GC_Values_Table + * + * U+0009 Horizontal Tab (included in \s) + * U+000B Vertical Tab (included in \s) + * U+000C Form feed (included in \s) + * U+000D Carriage return (included in \s) + * U+0020 space (included in \s) + * U+0085 Next line (included in \u0085) + * U+061C arabic letter mark (included in \u061C) + * U+00A0 no-break space (included in \p{Z}) + * U+00AD soft-hyphen marker (included in \u00AD) + * U+1680 ogham space mark (included in \p{Z}) + * U+180E mongolian vowel separator (included in \p{Z} on jdk8 and included in \u180E on jdk11) + * U+2000 en quad (included in \p{Z}) + * U+2001 em quad (included in \p{Z}) + * U+2002 en space (included in \p{Z}) + * U+2003 em space (included in \p{Z}) + * U+2004 three-per-em space (included in \p{Z}) + * U+2005 four-per-em space (included in \p{Z}) + * U+2006 six-per-em space (included in \p{Z}) + * U+2007 figure space (included in \p{Z}) + * U+2008 punctuation space (included in \p{Z}) + * U+2009 thin space (included in \p{Z}) + * U+200A hair space (included in \p{Z}) + * U+200B zero-width (included in \u200B-\u200D) + * U+200C zero-width non-joiner (included in \u200B-\u200D) + * U+200D zero-width joiner (included in \u200B-\u200D) + * U+2028 line separator (included in \p{Z}) + * U+2029 paragraph separator (included in \p{Z}) + * U+202F narrow no-break space (included in \p{Z}) + * U+205F medium mathematical space (included in \p{Z}) + * U+2061 function application (included in \u2061-\u2064) + * U+2062 invisible times (included in \u2061-\u2064) + * U+2063 invisible separator (included in \u2061-\u2064) + * U+2064 invisible plus (included in \u2061-\u2064) + * U+2066 left-to-right isolate (included in \u2066-\u2069) + * U+2067 right-to-left isolate (included in \u2066-\u2069) + * U+2068 first strong isolate (included in \u2066-\u2069) + * U+2069 pop directional isolate (included in \u2066-\u2069) + * U+206A inhibit symmetric swapping (included in \u206A-\u206F) + * U+206B activate symmetric swapping (included in \u206A-\u206F) + * U+206C inhibit arabic form shaping (included in \u206A-\u206F) + * U+206D activate arabic form shaping (included in \u206A-\u206F) + * U+206E national digit shapes (included in \u206A-\u206F) + * U+206F nominal digit shapes (included in \u206A-\u206F) + * U+2800 braille pattern blank (included in \u2800) + * U+3164 hongul filler (see UCD Ignorable_Code_Point) + * U+FFA0 halfwidth hongul filler (see UCD Ignorable_Code_Point) + * U+3000 ideographic space (included in \p{Z}) + * U+FEFF zero-width no-break space (explicitly included in \uFEFF) + */ + val BlankTextRegex: Regex = + """[\s\p{Z}\u180E\u0085\u00AD\u061C\u200B-\u200D\u2061-\u2064\u2066-\u2069\u206A-\u206F\u2800\u3164\uFEFF\uFFA0]*""".r + + /** + * Some of the above blank characters are valid at the start of a Tweet (and irrelevant at the end) + * such as characters that change the direction of text. When trimming from the start + * or end of text we use a smaller set of characters + */ + val BlankWhenLeadingOrTrailingRegex: Regex = """[\s\p{Z}\u180E\u0085\u200B\uFEFF]*""".r + + /** + * Matches consecutive blanks, starting at a newline. + */ + val ConsecutiveBlankLinesRegex: Regex = ("""\n(""" + BlankTextRegex + """\n){2,}""").r + + val LeadingBlankCharactersRegex: Regex = ("^" + BlankWhenLeadingOrTrailingRegex).r + val TrailingBlankCharactersRegex: Regex = (BlankWhenLeadingOrTrailingRegex + "$").r + + /** + * Is the given text empty or contains nothing but whitespace? + */ + def isBlank(text: String): Boolean = + BlankTextRegex.pattern.matcher(text).matches() + + /** + * See http://confluence.local.twitter.com/display/PROD/Displaying+line+breaks+in+Tweets + * + * Collapses consecutive blanks lines down to a single blank line. We can assume that + * all newlines have already been normalized to just \n, so we don't have to worry about + * \r\n. + */ + def collapseBlankLinesModification(text: String): Option[TextModification] = + replaceAll(text, ConsecutiveBlankLinesRegex, "\n\n") + + def collapseBlankLines(text: String): String = + ConsecutiveBlankLinesRegex.replaceAllIn(text, "\n\n") + + def trimBlankCharacters(text: String): String = + TrailingBlankCharactersRegex.replaceFirstIn( + LeadingBlankCharactersRegex.replaceFirstIn(text, ""), + "" + ) + + /** Characters that are not visible on their own. Some of these are used in combination with + * other visible characters, and therefore cannot be always stripped from tweets. + */ + private[tweettext] val InvisibleCharacters: Seq[Char] = + Seq( + '\u2060', '\u2061', '\u2062', '\u2063', '\u2064', '\u206A', '\u206B', '\u206C', '\u206D', + '\u206D', '\u206E', '\u206F', '\u200C', + '\u200D', // non-printing chars with valid use in Arabic + '\u2009', '\u200A', '\u200B', // include very skinny spaces too + '\ufe00', '\ufe01', '\ufe02', '\ufe03', '\ufe04', '\ufe05', '\ufe06', '\ufe07', '\ufe08', + '\ufe09', '\ufe0A', '\ufe0B', '\ufe0C', '\ufe0D', '\ufe0E', '\ufe0F', + ) + + private[tweetypie] val InvisibleUnicodePattern: Regex = + ("^[" + InvisibleCharacters.mkString + "]+$").r + + def isInvisibleChar(input: Char): Boolean = { + InvisibleCharacters contains input + } + + /** If string is only "invisible characters", replace full string with whitespace. + * The purpose of this method is to remove invisible characters when ONLY invisible characters + * appear between two urls, which can be a security vulnerability due to misleading behavior. These + * characters cannot be removed as a rule applied to the tweet, because they are used in + * conjuction with other characters. + */ + def replaceInvisiblesWithWhitespace(text: String): String = { + text match { + case invisible @ InvisibleUnicodePattern() => " " * TweetText.codePointLength(invisible) + case other => other + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextEntity.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextEntity.scala new file mode 100644 index 000000000..e24eb7061 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextEntity.scala @@ -0,0 +1,24 @@ +package com.twitter.tweetypie.tweettext + +/** + * A type class for entities found within a piece of tweet text. + */ +trait TextEntity[T] { + def fromIndex(entity: T): Short + def toIndex(entity: T): Short + def move(entity: T, fromIndex: Short, toIndex: Short): T +} + +object TextEntity { + def fromIndex[T: TextEntity](entity: T): Short = + implicitly[TextEntity[T]].fromIndex(entity) + + def toIndex[T: TextEntity](entity: T): Short = + implicitly[TextEntity[T]].toIndex(entity) + + def move[T: TextEntity](entity: T, fromIndex: Short, toIndex: Short): T = + implicitly[TextEntity[T]].move(entity, fromIndex, toIndex) + + def shift[T: TextEntity](entity: T, offset: Short): T = + move(entity, (fromIndex(entity) + offset).toShort, (toIndex(entity) + offset).toShort) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextModification.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextModification.scala new file mode 100644 index 000000000..053a4e115 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextModification.scala @@ -0,0 +1,232 @@ +package com.twitter.tweetypie.tweettext + +import scala.util.matching.Regex + +object TextModification { + + /** + * Lift a text into a TextModification where `original` and `updated` text are the same + * and `replacements` is empty. + */ + def identity(text: String): TextModification = + TextModification(original = text, updated = text, replacements = Nil) + + /** + * Replace each substring that matches the regex with the substitution string, returns a + * TextModification object that contains the updated text and enough information to also + * update entity indices. + * + * This method should correctly be taking into account surrogate-pairs. The returned + * TextModification object has code-point offsets, instead of code-unit offsets. + */ + def replaceAll(text: String, regex: Regex, substitution: String): Option[TextModification] = + replaceAll(text, regex -> substitution) + + /** + * Replaces substrings that match the given `Regex` with the corresonding substitution + * string. Returns a `TextModification` that can be used to reindex entities. + */ + def replaceAll( + text: String, + regexAndSubstitutions: (Regex, String)* + ): Option[TextModification] = { + val matches = + (for { + (r, s) <- regexAndSubstitutions + m <- r.findAllIn(text).matchData + } yield (m, s)).sortBy { case (m, _) => m.start } + + if (matches.isEmpty) { + // no match found, return None to indicate no modifications made + None + } else { + val replacements = List.newBuilder[TextReplacement] + val indexConverter = new IndexConverter(text) + // contains the retained text, built up as we walk through the regex matches + val buf = new StringBuilder(text.length) + // the number of code-points copied into buf + var codePointsCopied = Offset.CodePoint(0) + // always holds the start code-unit offset to copy to buf when we encounter + // either a regex match or end-of-string. + var anchor = 0 + + import indexConverter.toCodePoints + + for ((m, sub) <- matches) { + val unchangedText = text.substring(anchor, m.start) + val unchangedLen = Offset.CodePoint.length(unchangedText) + val subLen = Offset.CodePoint.length(sub) + + // copies the text upto the regex match run, plus the replacement string + buf.append(unchangedText).append(sub) + codePointsCopied += unchangedLen + subLen + + // the offsets indicate the indices of the matched string in the original + // text, and the indices of the replacement string in the updated string + replacements += + TextReplacement( + originalFrom = toCodePoints(Offset.CodeUnit(m.start)), + originalTo = toCodePoints(Offset.CodeUnit(m.end)), + updatedFrom = codePointsCopied - subLen, + updatedTo = codePointsCopied + ) + + anchor = m.end + } + + buf.append(text.substring(anchor)) + + Some(TextModification(text, buf.toString, replacements.result())) + } + } + + /** + * Inserts a string at a specified code point offset. + * Returns a `TextModification` that can be used to reindex entities. + */ + def insertAt( + originalText: String, + insertAt: Offset.CodePoint, + textToInsert: String + ): TextModification = { + val insertAtCodeUnit = insertAt.toCodeUnit(originalText).toInt + val (before, after) = originalText.splitAt(insertAtCodeUnit) + val updatedText = s"$before$textToInsert$after" + val textToInsertLength = TweetText.codePointLength(textToInsert) + + TextModification( + original = originalText, + updated = updatedText, + replacements = List( + TextReplacement.fromCodePoints( + originalFrom = insertAt.toInt, + originalTo = insertAt.toInt, + updatedFrom = insertAt.toInt, + updatedTo = insertAt.toInt + textToInsertLength + )) + ) + } +} + +/** + * Encodes information about insertions/deletions/replacements made to a string, providing + * the original string, the updated string, and a list of TextReplacement objects + * that encode the indices of the segments that were changed. Using this information, + * it is possible to map an offset into the original string to an offset into the updated + * string, assuming the text at the offset was not within one of the modified segments. + * + * All offsets are code-points, not UTF6 code-units. + */ +case class TextModification( + original: String, + updated: String, + replacements: List[TextReplacement]) { + private val originalLen = Offset.CodePoint.length(original) + + /** + * Using an offset into the original String, computes the equivalent offset into the updated + * string. If the offset falls within a segment that was removed/replaced, None is returned. + */ + def reindex(index: Offset.CodePoint): Option[Offset.CodePoint] = + reindex(index, Offset.CodePoint(0), replacements) + + /** + * Reindexes an entity of type T. Returns the updated entity, or None if either the `fromIndex` + * or `toIndex` value is now out of range. + */ + def reindexEntity[T: TextEntity](e: T): Option[T] = + for { + from <- reindex(Offset.CodePoint(TextEntity.fromIndex(e))) + to <- reindex(Offset.CodePoint(TextEntity.toIndex(e) - 1)) + } yield TextEntity.move(e, from.toShort, (to.toShort + 1).toShort) + + /** + * Reindexes a sequence of entities of type T. Some entities could be filtered + * out if they span a region of text that has been removed. + */ + def reindexEntities[T: TextEntity](es: Seq[T]): Seq[T] = + for (e <- es; e2 <- reindexEntity(e)) yield e2 + + /** + * Swaps `original` and `updated` text and inverts all `TextReplacement` instances. + */ + def inverse: TextModification = + TextModification(updated, original, replacements.map(_.inverse)) + + // recursively walks through the list of TextReplacement objects computing + // offsets to add/substract from 'shift', which accumulates all changes and + // then gets added to index at the end. + private def reindex( + index: Offset.CodePoint, + shift: Offset.CodePoint, + reps: List[TextReplacement] + ): Option[Offset.CodePoint] = + reps match { + case Nil => + if (index.toInt >= 0 && index <= originalLen) + Some(index + shift) + else + None + case (r @ TextReplacement(fr, to, _, _)) :: tail => + if (index < fr) Some(index + shift) + else if (index < to) None + else reindex(index, shift + r.lengthDelta, tail) + } +} + +object TextReplacement { + def fromCodePoints( + originalFrom: Int, + originalTo: Int, + updatedFrom: Int, + updatedTo: Int + ): TextReplacement = + TextReplacement( + Offset.CodePoint(originalFrom), + Offset.CodePoint(originalTo), + Offset.CodePoint(updatedFrom), + Offset.CodePoint(updatedTo) + ) +} + +/** + * Encodes the indices of a segment of text in one string that maps to a replacement + * segment in an updated version of the text. The replacement segment could be empty + * (updatedTo == updatedFrom), indicating the segment was removed. + * + * All offsets are code-points, not UTF16 code-units. + * + * `originalFrom` and `updatedFrom` are inclusive. + * `originalTo` and `updatedTo` are exclusive. + */ +case class TextReplacement( + originalFrom: Offset.CodePoint, + originalTo: Offset.CodePoint, + updatedFrom: Offset.CodePoint, + updatedTo: Offset.CodePoint) { + def originalLength: Offset.CodePoint = originalTo - originalFrom + def updatedLength: Offset.CodePoint = updatedTo - updatedFrom + def lengthDelta: Offset.CodePoint = updatedLength - originalLength + + def shiftOriginal(offset: Offset.CodePoint): TextReplacement = + copy(originalFrom = originalFrom + offset, originalTo = originalTo + offset) + + def shiftUpdated(offset: Offset.CodePoint): TextReplacement = + copy(updatedFrom = updatedFrom + offset, updatedTo = updatedTo + offset) + + def shift(offset: Offset.CodePoint): TextReplacement = + TextReplacement( + originalFrom + offset, + originalTo + offset, + updatedFrom + offset, + updatedTo + offset + ) + + def inverse: TextReplacement = + TextReplacement( + originalFrom = updatedFrom, + originalTo = updatedTo, + updatedFrom = originalFrom, + updatedTo = originalTo + ) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Truncator.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Truncator.scala new file mode 100644 index 000000000..c9f6e28cc --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Truncator.scala @@ -0,0 +1,159 @@ +package com.twitter.tweetypie.tweettext + +import com.twitter.tweetypie.tweettext.TweetText._ +import com.twitter.twittertext.Extractor +import java.lang.Character +import scala.annotation.tailrec +import scala.collection.JavaConverters._ + +object Truncator { + val Ellipsis = "\u2026" + + /** + * Truncate tweet text for a retweet. If the text is longer than + * either of the length limits, code points are cut off from the end + * of the text and replaced with an ellipsis. We keep as much of the + * leading text as possible, subject to these constraints: + * + * - There are no more than `MaxDisplayLength` characters. + * + * - When converted to UTF-8, the result does not exceed `MaxByteLength`. + * + * - We do not break within a single grapheme cluster. + * + * The input is assumed to be partial HTML-encoded and may or may + * not be NFC normalized. The result will be partial HTML-encoded + * and will be NFC normalized. + */ + def truncateForRetweet(input: String): String = truncateWithEllipsis(input, Ellipsis) + + /** + * Truncate to [[com.twitter.tweetypie.tweettext.TweetText#OrginalMaxDisplayLength]] display + * units, using "..." as an ellipsis. The resulting text is guaranteed to pass our tweet length + * check, but it is not guaranteed to fit in a SMS message. + */ + def truncateForSms(input: String): String = truncateWithEllipsis(input, "...") + + /** + * Check the length of the given text, and truncate it if it is longer + * than the allowed length for a Tweet. The result of this method will + * always have: + * + * - Display length <= OriginalMaxDisplayLength. + * - Length when encoded as UTF-8 <= OriginalMaxUtf8Length. + * + * If the input would violate this, then the text will be + * truncated. When the text is truncated, it will be truncated such + * that: + * + * - Grapheme clusters will not be split. + * - The last character before the ellipsis will not be a whitespace + * character. + * - The ellipsis text will be appended to the end. + */ + private[this] def truncateWithEllipsis(input: String, ellipsis: String): String = { + val text = nfcNormalize(input) + val truncateAt = + truncationPoint(text, OriginalMaxDisplayLength, OriginalMaxUtf8Length, Some(ellipsis)) + if (truncateAt.codeUnitOffset.toInt == text.length) text + else text.take(truncateAt.codeUnitOffset.toInt) + ellipsis + } + + /** + * Indicates a potential TruncationPoint in piece of text. + * + * @param charOffset the utf-16 character offset of the truncation point + * @param codePointOffset the offset in code points + */ + case class TruncationPoint(codeUnitOffset: Offset.CodeUnit, codePointOffset: Offset.CodePoint) + + /** + * Computes a TruncationPoint for the given text and length constraints. If `truncated` on + * the result is `false`, it means the text will fit within the given constraints without + * truncation. Otherwise, the result indicates both the character and code-point offsets + * at which to perform the truncation, and the resulting display length and byte length of + * the truncated string. + * + * Text should be NFC normalized first for best results. + * + * @param withEllipsis if true, then the truncation point will be computed so that there is space + * to append an ellipsis and to still remain within the limits. The ellipsis is not counted + * in the returned display and byte lengths. + * + * @param atomicUnits may contain a list of ranges that should be treated as atomic unit and + * not split. each tuple is half-open range in code points. + */ + def truncationPoint( + text: String, + maxDisplayLength: Int = OriginalMaxDisplayLength, + maxByteLength: Int = OriginalMaxUtf8Length, + withEllipsis: Option[String] = None, + atomicUnits: Offset.Ranges[Offset.CodePoint] = Offset.Ranges.Empty + ): TruncationPoint = { + val breakPoints = + GraphemeIndexIterator + .ends(text) + .filterNot(Offset.Ranges.htmlEntities(text).contains) + + val ellipsisDisplayUnits = + withEllipsis.map(Offset.DisplayUnit.length).getOrElse(Offset.DisplayUnit(0)) + val maxTruncatedDisplayLength = Offset.DisplayUnit(maxDisplayLength) - ellipsisDisplayUnits + + val ellipsisByteLength = withEllipsis.map(Offset.Utf8.length).getOrElse(Offset.Utf8(0)) + val maxTruncatedByteLength = Offset.Utf8(maxByteLength) - ellipsisByteLength + + var codeUnit = Offset.CodeUnit(0) + var codePoint = Offset.CodePoint(0) + var displayLength = Offset.DisplayUnit(0) + var byteLength = Offset.Utf8(0) + var truncateCodeUnit = codeUnit + var truncateCodePoint = codePoint + + @tailrec def go(): TruncationPoint = + if (displayLength.toInt > maxDisplayLength || byteLength.toInt > maxByteLength) { + TruncationPoint(truncateCodeUnit, truncateCodePoint) + } else if (codeUnit != truncateCodeUnit && + displayLength <= maxTruncatedDisplayLength && + byteLength <= maxTruncatedByteLength && + (codeUnit.toInt == 0 || !Character.isWhitespace(text.codePointBefore(codeUnit.toInt))) && + !atomicUnits.contains(codePoint)) { + // we can advance the truncation point + truncateCodeUnit = codeUnit + truncateCodePoint = codePoint + go() + } else if (breakPoints.hasNext) { + // there are further truncation points to consider + val nextCodeUnit = breakPoints.next + codePoint += Offset.CodePoint.count(text, codeUnit, nextCodeUnit) + displayLength += Offset.DisplayUnit.count(text, codeUnit, nextCodeUnit) + byteLength += Offset.Utf8.count(text, codeUnit, nextCodeUnit) + codeUnit = nextCodeUnit + go() + } else { + TruncationPoint(codeUnit, codePoint) + } + + go() + } + + /** + * Truncate the given text, avoiding chopping HTML entities and tweet + * entities. This should only be used for testing because it performs + * entity extraction, and so is very inefficient. + */ + def truncateForTests( + input: String, + maxDisplayLength: Int = OriginalMaxDisplayLength, + maxByteLength: Int = OriginalMaxUtf8Length + ): String = { + val text = nfcNormalize(input) + val extractor = new Extractor + val entities = extractor.extractEntitiesWithIndices(text) + extractor.modifyIndicesFromUTF16ToUnicode(text, entities) + val avoid = Offset.Ranges.fromCodePointPairs( + entities.asScala.map(e => (e.getStart().intValue, e.getEnd().intValue)) + ) + val truncateAt = truncationPoint(text, maxDisplayLength, maxByteLength, None, avoid) + text.take(truncateAt.codeUnitOffset.toInt) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TweetText.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TweetText.scala new file mode 100644 index 000000000..cb2ae3069 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TweetText.scala @@ -0,0 +1,62 @@ +package com.twitter.tweetypie.tweettext + +import java.text.Normalizer + +object TweetText { + + /** The original maximum tweet length, taking into account normalization */ + private[tweetypie] val OriginalMaxDisplayLength = 140 + + /** Maximum number of visible code points allowed in a tweet when tweet length is counted by code + * points, taking into account normalization. See also [[MaxVisibleWeightedEmojiLength]]. + */ + private[tweetypie] val MaxVisibleWeightedLength = 280 + + /** Maximum number of visible code points allowed in a tweet when tweet length is counted by + * emoji, taking into account normalization. See also [[MaxVisibleWeightedLength]]. + * 140 is the max number of Emojis, visible, fully-weighted per Twitter's cramming rules + * 10 is the max number of Code Points per Emoji + */ + private[tweetypie] val MaxVisibleWeightedEmojiLength = 140 * 10 + + /** Maximum number of bytes when truncating tweet text for a retweet. Originally was the + * max UTF-8 length when tweets were at most 140 characters. + * See also [[OriginalMaxDisplayLength]]. + */ + private[tweetypie] val OriginalMaxUtf8Length = 600 + + /** Maximum number of bytes for tweet text using utf-8 encoding. + */ + private[tweetypie] val MaxUtf8Length = 5708 + + /** Maximum number of mentions allowed in tweet text. This is enforced at tweet creation time */ + private[tweetypie] val MaxMentions = 50 + + /** Maximum number of urls allowed in tweet text. This is enforced at tweet creation time */ + private[tweetypie] val MaxUrls = 10 + + /** Maximum number of hashtags allowed in tweet text. This is enforced at tweet creation time */ + private[tweetypie] val MaxHashtags = 50 + + /** Maximum number of cashtags allowed in tweet text. This is enforced at tweet creation time */ + private[tweetypie] val MaxCashtags = 50 + + /** Maximum length of a hashtag (not including the '#') */ + private[tweetypie] val MaxHashtagLength = 100 + + /** + * Normalizes the text according to the unicode NFC spec. + */ + def nfcNormalize(text: String): String = Normalizer.normalize(text, Normalizer.Form.NFC) + + /** + * Return the number of "characters" in this text. See + * [[Offset.DisplayUnit]]. + */ + def displayLength(text: String): Int = Offset.DisplayUnit.length(text).toInt + + /** + * Return the number of Unicode code points in this String. + */ + def codePointLength(text: String): Int = Offset.CodePoint.length(text).toInt +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/util/BUILD new file mode 100644 index 000000000..9a3c54773 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/BUILD @@ -0,0 +1,76 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + provides = scala_artifact( + org = "com.twitter.tweetypie", + name = "util", + repo = artifactory, + ), + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "//:scala-reflect", + "3rdparty/jvm/commons-codec", + "3rdparty/jvm/org/apache/thrift:libthrift", + "finagle/finagle-core/src/main", + "mediaservices/commons/src/main/thrift:thrift-scala", + "scrooge/scrooge-serializer/src/main/scala", + "tweetypie/servo/repo", + "tweetypie/servo/util", + "tweetypie/servo/util/src/main/scala:exception", + "src/scala/com/twitter/takedown/util", + "src/thrift/com/twitter/dataproducts:enrichments_profilegeo-scala", + "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", + "src/thrift/com/twitter/expandodo:cards-scala", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/servo:servo-exception-scala", + "src/thrift/com/twitter/spam/rtf:safety-label-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:deprecated-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:transient_context-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-core", + "tweet-util", + "util/util-core:scala", + ], +) + +scala_library( + name = "EditControlUtil", + sources = [ + "EditControlUtil.scala", + "package.scala", + ], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + provides = scala_artifact( + org = "com.twitter.tweetypie", + name = "util-EditControlUtil", + repo = artifactory, + ), + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "//:scala-reflect", + "3rdparty/jvm/commons-codec", + "3rdparty/jvm/org/apache/thrift:libthrift", + "finagle/finagle-core/src/main", + "mediaservices/commons/src/main/thrift:thrift-scala", + "scrooge/scrooge-serializer/src/main/scala", + "tweetypie/servo/util/src/main/scala:exception", + "src/thrift/com/twitter/dataproducts:enrichments_profilegeo-scala", + "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", + "src/thrift/com/twitter/expandodo:cards-scala", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/servo:servo-exception-scala", + "src/thrift/com/twitter/spam/rtf:safety-label-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:deprecated-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:transient_context-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-core", + "tweet-util", + "util/util-core:scala", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityAnnotation.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityAnnotation.scala new file mode 100644 index 000000000..6a89f6a3a --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityAnnotation.scala @@ -0,0 +1,29 @@ +package com.twitter.tweetypie.util + +import com.twitter.escherbird.thriftscala.TweetEntityAnnotation +import com.twitter.tweetypie.thriftscala.EscherbirdEntityAnnotations +import com.twitter.tweetypie.thriftscala.Tweet + +object CommunityAnnotation { + + val groupId: Long = 8 + val domainId: Long = 31 + + def apply(communityId: Long): TweetEntityAnnotation = + TweetEntityAnnotation(groupId, domainId, entityId = communityId) + + def unapply(annotation: TweetEntityAnnotation): Option[Long] = + annotation match { + case TweetEntityAnnotation(`groupId`, `domainId`, entityId) => Some(entityId) + case _ => None + } + + // Returns None instead of Some(Seq()) when there are non-community annotations present + def additionalFieldsToCommunityIDs(additionalFields: Tweet): Option[Seq[Long]] = { + additionalFields.escherbirdEntityAnnotations + .map { + case EscherbirdEntityAnnotations(entityAnnotations) => + entityAnnotations.flatMap(CommunityAnnotation.unapply) + }.filter(_.nonEmpty) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityUtil.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityUtil.scala new file mode 100644 index 000000000..a455fe3d8 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityUtil.scala @@ -0,0 +1,19 @@ +package com.twitter.tweetypie.util + +import com.twitter.tweetypie.thriftscala.Communities + +object CommunityUtil { + + def communityIds(maybeCommunities: Option[Communities]): Seq[Long] = { + maybeCommunities match { + case None => + Nil + case Some(Communities(seq)) => + seq + } + } + + def hasCommunity(maybeCommunities: Option[Communities]): Boolean = { + maybeCommunities.exists(_.communityIds.nonEmpty) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/ConversationControls.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/ConversationControls.scala new file mode 100644 index 000000000..cb0ea84fb --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/ConversationControls.scala @@ -0,0 +1,112 @@ +package com.twitter.tweetypie +package util + +import com.twitter.tweetypie.thriftscala._ + +object ConversationControls { + object Create { + def byInvitation( + inviteViaMention: Option[Boolean] = None + ): TweetCreateConversationControl.ByInvitation = TweetCreateConversationControl.ByInvitation( + TweetCreateConversationControlByInvitation(inviteViaMention = inviteViaMention) + ) + + def community( + inviteViaMention: Option[Boolean] = None + ): TweetCreateConversationControl.Community = TweetCreateConversationControl.Community( + TweetCreateConversationControlCommunity(inviteViaMention = inviteViaMention) + ) + + def followers( + inviteViaMention: Option[Boolean] = None + ): TweetCreateConversationControl.Followers = TweetCreateConversationControl.Followers( + TweetCreateConversationControlFollowers(inviteViaMention = inviteViaMention) + ) + } + + object Scenario { + case class CommonScenario( + createConversationControl: TweetCreateConversationControl, + descriptionSuffix: String, + expectedConversationControl: (UserId, Seq[UserId]) => ConversationControl, + inviteViaMention: Option[Boolean]) + + def mkCommunityScenario(inviteViaMention: Option[Boolean]): CommonScenario = + CommonScenario( + Create.community(inviteViaMention = inviteViaMention), + "community", + expectedConversationControl = (authorId, userIds) => { + community(userIds, authorId, inviteViaMention) + }, + inviteViaMention + ) + + def mkByInvitationScenario(inviteViaMention: Option[Boolean]): CommonScenario = + CommonScenario( + Create.byInvitation(inviteViaMention = inviteViaMention), + "invited users", + expectedConversationControl = (authorId, userIds) => { + byInvitation(userIds, authorId, inviteViaMention) + }, + inviteViaMention + ) + + def mkFollowersScenario(inviteViaMention: Option[Boolean]): CommonScenario = + CommonScenario( + Create.followers(inviteViaMention = inviteViaMention), + "followers", + expectedConversationControl = (authorId, userIds) => { + followers(userIds, authorId, inviteViaMention) + }, + inviteViaMention + ) + + val communityScenario = mkCommunityScenario(None) + val communityInviteViaMentionScenario = mkCommunityScenario(Some(true)) + + val byInvitationScenario = mkByInvitationScenario(None) + val byInvitationInviteViaMentionScenario = mkByInvitationScenario(Some(true)) + + val followersScenario = mkFollowersScenario(None) + val followersInviteViaMentionScenario = mkFollowersScenario(Some(true)) + } + + def byInvitation( + invitedUserIds: Seq[UserId], + conversationTweetAuthorId: UserId, + inviteViaMention: Option[Boolean] = None + ): ConversationControl = + ConversationControl.ByInvitation( + ConversationControlByInvitation( + conversationTweetAuthorId = conversationTweetAuthorId, + invitedUserIds = invitedUserIds, + inviteViaMention = inviteViaMention + ) + ) + + def community( + invitedUserIds: Seq[UserId], + conversationTweetAuthorId: UserId, + inviteViaMention: Option[Boolean] = None + ): ConversationControl = + ConversationControl.Community( + ConversationControlCommunity( + conversationTweetAuthorId = conversationTweetAuthorId, + invitedUserIds = invitedUserIds, + inviteViaMention = inviteViaMention + ) + ) + + def followers( + invitedUserIds: Seq[UserId], + conversationTweetAuthorId: UserId, + inviteViaMention: Option[Boolean] = None + ): ConversationControl = + ConversationControl.Followers( + ConversationControlFollowers( + conversationTweetAuthorId = conversationTweetAuthorId, + invitedUserIds = invitedUserIds, + inviteViaMention = inviteViaMention + ) + ) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/EditControlUtil.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/EditControlUtil.scala new file mode 100644 index 000000000..7135e9538 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/EditControlUtil.scala @@ -0,0 +1,174 @@ +package com.twitter.tweetypie.util + +import com.twitter.servo.util.Gate +import com.twitter.tweetypie.util.TweetEditFailure.TweetEditInvalidEditControlException +import com.twitter.tweetypie.util.TweetEditFailure.TweetEditUpdateEditControlException +import com.twitter.tweetypie.thriftscala.EditControl +import com.twitter.tweetypie.thriftscala.EditControlEdit +import com.twitter.tweetypie.thriftscala.EditControlInitial +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.util.Try +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.util.Time +import com.twitter.util.Duration + +object EditControlUtil { + + val maxTweetEditsAllowed = 5 + val oldEditTimeWindow = Duration.fromMinutes(30) + val editTimeWindow = Duration.fromMinutes(60) + + def editControlEdit( + initialTweetId: TweetId, + editControlInitial: Option[EditControlInitial] = None + ): EditControl.Edit = + EditControl.Edit( + EditControlEdit(initialTweetId = initialTweetId, editControlInitial = editControlInitial)) + + // EditControl for the tweet that is not an edit, that is, any regular tweet we create + // that can, potentially, be edited later. + def makeEditControlInitial( + tweetId: TweetId, + createdAt: Time, + setEditWindowToSixtyMinutes: Gate[Unit] = Gate(_ => false) + ): EditControl.Initial = { + val editWindow = if (setEditWindowToSixtyMinutes()) editTimeWindow else oldEditTimeWindow + val initial = EditControlInitial( + editTweetIds = Seq(tweetId), + editableUntilMsecs = Some(createdAt.plus(editWindow).inMilliseconds), + editsRemaining = Some(maxTweetEditsAllowed), + isEditEligible = defaultIsEditEligible, + ) + EditControl.Initial(initial) + } + + // Returns if a given latestTweetId is the latest edit in the EditControl + def isLatestEdit( + tweetEditControl: Option[EditControl], + latestTweetId: TweetId + ): Try[Boolean] = { + tweetEditControl match { + case Some(EditControl.Initial(initial)) => + isLatestEditFromEditControlInitial(Some(initial), latestTweetId) + case Some(EditControl.Edit(edit)) => + isLatestEditFromEditControlInitial( + edit.editControlInitial, + latestTweetId + ) + case _ => Throw(TweetEditInvalidEditControlException) + } + } + + // Returns if a given latestTweetId is the latest edit in the EditControlInitial + private def isLatestEditFromEditControlInitial( + initialTweetEditControl: Option[EditControlInitial], + latestTweetId: TweetId + ): Try[Boolean] = { + initialTweetEditControl match { + case Some(initial) => + Return(latestTweetId == initial.editTweetIds.last) + case _ => Throw(TweetEditInvalidEditControlException) + } + } + + /* Create an updated edit control for an initialTweet given the id of the new edit */ + def editControlForInitialTweet( + initialTweet: Tweet, + newEditId: TweetId + ): Try[EditControl.Initial] = { + initialTweet.editControl match { + case Some(EditControl.Initial(initial)) => + Return(EditControl.Initial(plusEdit(initial, newEditId))) + + case Some(EditControl.Edit(_)) => Throw(TweetEditUpdateEditControlException) + + case _ => + initialTweet.coreData match { + case Some(coreData) => + Return( + makeEditControlInitial( + tweetId = initialTweet.id, + createdAt = Time.fromMilliseconds(coreData.createdAtSecs * 1000), + setEditWindowToSixtyMinutes = Gate(_ => true) + ) + ) + case None => Throw(new Exception("Tweet Missing Required CoreData")) + } + } + } + + def updateEditControl(tweet: Tweet, newEditId: TweetId): Try[Tweet] = + editControlForInitialTweet(tweet, newEditId).map { editControl => + tweet.copy(editControl = Some(editControl)) + } + + def plusEdit(initial: EditControlInitial, newEditId: TweetId): EditControlInitial = { + val newEditTweetIds = (initial.editTweetIds :+ newEditId).distinct.sorted + val editsCount = newEditTweetIds.size - 1 // as there is the original tweet ID there too. + initial.copy( + editTweetIds = newEditTweetIds, + editsRemaining = Some(maxTweetEditsAllowed - editsCount), + ) + } + + // The ID of the initial Tweet if this is an edit + def getInitialTweetIdIfEdit(tweet: Tweet): Option[TweetId] = tweet.editControl match { + case Some(EditControl.Edit(edit)) => Some(edit.initialTweetId) + case _ => None + } + + // If this is the first tweet in an edit chain, return the same tweet id + // otherwise return the result of getInitialTweetId + def getInitialTweetId(tweet: Tweet): TweetId = + getInitialTweetIdIfEdit(tweet).getOrElse(tweet.id) + + def isInitialTweet(tweet: Tweet): Boolean = + getInitialTweetId(tweet) == tweet.id + + // Extracted just so that we can easily track where the values of isEditEligible is coming from. + private def defaultIsEditEligible: Option[Boolean] = Some(true) + + // returns true if it's an edit of a Tweet or an initial Tweet that's been edited + def isEditTweet(tweet: Tweet): Boolean = + tweet.editControl match { + case Some(eci: EditControl.Initial) if eci.initial.editTweetIds.size <= 1 => false + case Some(_: EditControl.Initial) | Some(_: EditControl.Edit) | Some( + EditControl.UnknownUnionField(_)) => + true + case None => false + } + + // returns true if editControl is from an edit of a Tweet + // returns false for any other state, including edit intial. + def isEditControlEdit(editControl: EditControl): Boolean = { + editControl match { + case _: EditControl.Edit | EditControl.UnknownUnionField(_) => true + case _ => false + } + } + + def getEditTweetIds(editControl: Option[EditControl]): Try[Seq[TweetId]] = { + editControl match { + case Some(EditControl.Edit(EditControlEdit(_, Some(eci)))) => + Return(eci.editTweetIds) + case Some(EditControl.Initial(initial)) => + Return(initial.editTweetIds) + case _ => + Throw(new Exception(s"EditControlInitial not found in $editControl")) + } + } +} + +object TweetEditFailure { + abstract class TweetEditException(msg: String) extends Exception(msg) + + case object TweetEditGetInitialEditControlException + extends TweetEditException("Initial EditControl not found") + + case object TweetEditInvalidEditControlException + extends TweetEditException("Invalid EditControl for initial_tweet") + + case object TweetEditUpdateEditControlException + extends TweetEditException("Invalid Edit Control Update") +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/RetryPolicyBuilder.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/RetryPolicyBuilder.scala new file mode 100644 index 000000000..ce0b49079 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/RetryPolicyBuilder.scala @@ -0,0 +1,45 @@ +package com.twitter.tweetypie.util + +import com.twitter.finagle.Backoff +import com.twitter.finagle.service.RetryPolicy +import com.twitter.finagle.service.RetryPolicy.RetryableWriteException +import com.twitter.servo.exception.thriftscala.ServerError +import com.twitter.util.Duration +import com.twitter.util.Throw +import com.twitter.util.TimeoutException +import com.twitter.util.Try + +object RetryPolicyBuilder { + + /** + * Retry on any exception. + */ + def anyFailure[A](backoffs: Stream[Duration]): RetryPolicy[Try[A]] = + RetryPolicy.backoff[Try[A]](Backoff.fromStream(backoffs)) { + case Throw(_) => true + } + + /** + * Retry on com.twitter.util.TimeoutException + */ + def timeouts[A](backoffs: Stream[Duration]): RetryPolicy[Try[A]] = + RetryPolicy.backoff[Try[A]](Backoff.fromStream(backoffs)) { + case Throw(_: TimeoutException) => true + } + + /** + * Retry on com.twitter.finagle.service.RetryableWriteExceptions + */ + def writes[A](backoffs: Stream[Duration]): RetryPolicy[Try[A]] = + RetryPolicy.backoff[Try[A]](Backoff.fromStream(backoffs)) { + case Throw(RetryableWriteException(_)) => true + } + + /** + * Retry on com.twitter.servo.exception.thriftscala.ServerError + */ + def servoServerError[A](backoffs: Stream[Duration]): RetryPolicy[Try[A]] = + RetryPolicy.backoff[Try[A]](Backoff.fromStream(backoffs)) { + case Throw(ServerError(_)) => true + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/StitchUtils.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/StitchUtils.scala new file mode 100644 index 000000000..7113beed5 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/StitchUtils.scala @@ -0,0 +1,54 @@ +package com.twitter.tweetypie.util + +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.servo +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.stitch.Stitch + +object StitchUtils { + def trackLatency[T](latencyStat: Stat, s: => Stitch[T]): Stitch[T] = { + Stitch + .time(s) + .map { + case (res, duration) => + latencyStat.add(duration.inMillis) + res + } + .lowerFromTry + } + + def observe[T](statsReceiver: StatsReceiver, apiName: String): Stitch[T] => Stitch[T] = { + val stats = statsReceiver.scope(apiName) + + val requests = stats.counter("requests") + val success = stats.counter("success") + val latencyStat = stats.stat("latency_ms") + + val exceptionCounter = + new servo.util.ExceptionCounter(stats, "failures") + + stitch => + trackLatency(latencyStat, stitch) + .respond { + case Return(_) => + requests.incr() + success.incr() + + case Throw(e) => + exceptionCounter(e) + requests.incr() + } + } + + def translateExceptions[T]( + stitch: Stitch[T], + translateException: PartialFunction[Throwable, Throwable] + ): Stitch[T] = + stitch.rescue { + case t if translateException.isDefinedAt(t) => + Stitch.exception(translateException(t)) + case t => Stitch.exception(t) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/StringLiteral.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/StringLiteral.scala new file mode 100644 index 000000000..ccddcf540 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/StringLiteral.scala @@ -0,0 +1,31 @@ +package com.twitter.tweetypie.util + +/** + * Escape a String into Java or Scala String literal syntax (adds the + * surrounding quotes.) + * + * This is primarily for printing Strings for debugging or logging. + */ +object StringLiteral extends (String => String) { + private[this] val ControlLimit = ' ' + private[this] val PrintableLimit = '\u007e' + private[this] val Specials = + Map('\n' -> 'n', '\r' -> 'r', '\t' -> 't', '"' -> '"', '\'' -> '\'', '\\' -> '\\') + + def apply(str: String): String = { + val s = new StringBuilder(str.length) + s.append('"') + var i = 0 + while (i < str.length) { + val c = str(i) + Specials.get(c) match { + case None => + if (c >= ControlLimit && c <= PrintableLimit) s.append(c) + else s.append("\\u%04x".format(c.toInt)) + case Some(special) => s.append('\\').append(special) + } + i += 1 + } + s.append('"').result + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/Takedowns.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/Takedowns.scala new file mode 100644 index 000000000..643971969 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/Takedowns.scala @@ -0,0 +1,49 @@ +package com.twitter.tweetypie.util + +import com.twitter.takedown.util.TakedownReasons +import com.twitter.takedown.util.TakedownReasons.CountryCode +import com.twitter.tseng.withholding.thriftscala.TakedownReason +import com.twitter.tseng.withholding.thriftscala.UnspecifiedReason +import com.twitter.tweetypie.thriftscala.Tweet + +/** + * Contains tweetypie-specific utils for working with TakedownReasons. + */ +object Takedowns { + + type CountryCode = String + + /** + * Take a list of [[TakedownReason]] and return values to be saved on the [[Tweet]] in fields + * tweetypieOnlyTakedownCountryCode and tweetypieOnlyTakedownReason. + * + * - tweetypieOnlyTakedownCountryCode contains the country_code of all UnspecifiedReasons + * - tweetypieOnlyTakedownReason contains all other reasons + */ + def partitionReasons(reasons: Seq[TakedownReason]): (Seq[String], Seq[TakedownReason]) = { + val (unspecifiedReasons, specifiedReasons) = reasons.partition { + case TakedownReason.UnspecifiedReason(UnspecifiedReason(_)) => true + case _ => false + } + val unspecifiedCountryCodes = unspecifiedReasons.collect(TakedownReasons.reasonToCountryCode) + (unspecifiedCountryCodes, specifiedReasons) + } + + def fromTweet(t: Tweet): Takedowns = + Takedowns( + Seq + .concat( + t.tweetypieOnlyTakedownCountryCodes + .getOrElse(Nil).map(TakedownReasons.countryCodeToReason), + t.tweetypieOnlyTakedownReasons.getOrElse(Nil) + ).toSet + ) +} + +/** + * This class is used to ensure the caller has access to both the full list of reasons as well + * as the backwards-compatible list of country codes. + */ +case class Takedowns(reasons: Set[TakedownReason]) { + def countryCodes: Set[CountryCode] = reasons.collect(TakedownReasons.reasonToCountryCode) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TransientContextUtil.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TransientContextUtil.scala new file mode 100644 index 000000000..9fa6d77a0 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TransientContextUtil.scala @@ -0,0 +1,17 @@ +package com.twitter.tweetypie.util + +import com.twitter.tweetypie.thriftscala.TransientCreateContext +import com.twitter.tweetypie.thriftscala.TweetCreateContextKey +import com.twitter.tweetypie.thriftscala.TweetCreateContextKey.PeriscopeCreatorId +import com.twitter.tweetypie.thriftscala.TweetCreateContextKey.PeriscopeIsLive + +object TransientContextUtil { + + def toAdditionalContext(context: TransientCreateContext): Map[TweetCreateContextKey, String] = + Seq + .concat( + context.periscopeIsLive.map(PeriscopeIsLive -> _.toString), // "true" or "false" + context.periscopeCreatorId.map(PeriscopeCreatorId -> _.toString) // userId + ) + .toMap +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetCreationLock.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetCreationLock.scala new file mode 100644 index 000000000..06295fa25 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetCreationLock.scala @@ -0,0 +1,203 @@ +package com.twitter.tweetypie.util + +import com.twitter.conversions.DurationOps._ +import com.twitter.logging.Logger +import com.twitter.mediaservices.commons.mediainformation.thriftscala.UserDefinedProductMetadata +import com.twitter.scrooge.BinaryThriftStructSerializer +import com.twitter.servo.cache.ScopedCacheKey +import com.twitter.servo.util.Transformer +import com.twitter.tweetypie.thriftscala.PostTweetRequest +import com.twitter.util.Base64Long +import com.twitter.util.Time +import java.nio.ByteBuffer +import java.security.MessageDigest +import org.apache.commons.codec.binary.Base64 +import scala.collection.immutable.SortedMap + +object TweetCreationLock { + case class Key private (userId: UserId, typeCode: String, idOrMd5: String) + extends ScopedCacheKey("t", "locker", 2, Base64Long.toBase64(userId), typeCode, idOrMd5) { + def uniquenessId: Option[String] = + if (typeCode == Key.TypeCode.UniquenessId) Some(idOrMd5) else None + } + + object Key { + private[this] val log = Logger(getClass) + + object TypeCode { + val SourceTweetId = "r" + val UniquenessId = "u" + val PostTweetRequest = "p" + } + + private[this] val serializer = BinaryThriftStructSerializer(PostTweetRequest) + + // normalize the representation of no media ids. + private[util] def sanitizeMediaUploadIds(mediaUploadIds: Option[Seq[Long]]) = + mediaUploadIds.filter(_.nonEmpty) + + /** + * Request deduplication depends on the hash of a serialized Thrift value. + * + * In order to guarantee that a Map has a reproducible serialized form, + * it's necessary to fix the ordering of its keys. + */ + private[util] def sanitizeMediaMetadata( + mediaMetadata: Option[scala.collection.Map[MediaId, UserDefinedProductMetadata]] + ): Option[scala.collection.Map[MediaId, UserDefinedProductMetadata]] = + mediaMetadata.map(m => SortedMap(m.toSeq: _*)) + + /** + * Make sure to sanitize request fields with map/set since serialized + * bytes ordering is not guaranteed for same thrift values. + */ + private[util] def sanitizeRequest(request: PostTweetRequest): PostTweetRequest = + PostTweetRequest( + userId = request.userId, + text = request.text, + createdVia = "", + inReplyToTweetId = request.inReplyToTweetId, + geo = request.geo, + mediaUploadIds = sanitizeMediaUploadIds(request.mediaUploadIds), + narrowcast = request.narrowcast, + nullcast = request.nullcast, + additionalFields = request.additionalFields, + attachmentUrl = request.attachmentUrl, + mediaMetadata = sanitizeMediaMetadata(request.mediaMetadata), + conversationControl = request.conversationControl, + underlyingCreativesContainerId = request.underlyingCreativesContainerId, + editOptions = request.editOptions, + noteTweetOptions = request.noteTweetOptions + ) + + def bySourceTweetId(userId: UserId, sourceTweetId: TweetId): Key = + Key(userId, TypeCode.SourceTweetId, Base64Long.toBase64(sourceTweetId)) + + def byRequest(request: PostTweetRequest): Key = + request.uniquenessId match { + case Some(uqid) => + byUniquenessId(request.userId, uqid) + case None => + val sanitized = sanitizeRequest(request) + val sanitizedBytes = serializer.toBytes(sanitized) + val digested = MessageDigest.getInstance("SHA-256").digest(sanitizedBytes) + val base64Digest = Base64.encodeBase64String(digested) + val key = Key(request.userId, TypeCode.PostTweetRequest, base64Digest) + log.ifDebug(s"Generated key $key from request:\n${sanitized}") + key + } + + /** + * Key for tweets that have a uniqueness id set. There is only one + * namespace of uniqueness ids, across all clients. They are + * expected to be Snowflake ids, in order to avoid cache + * collisions. + */ + def byUniquenessId(userId: UserId, uniquenessId: Long): Key = + Key(userId, TypeCode.UniquenessId, Base64Long.toBase64(uniquenessId)) + } + + /** + * The state of tweet creation for a given Key (request). + */ + sealed trait State + + object State { + + /** + * There is no tweet creation currently in progress. (This can + * either be represented by no entry in the cache, or this special + * marker. This lets us use checkAndSet for deletion to avoid + * accidentally overwriting other process' values.) + */ + case object Unlocked extends State + + /** + * Some process is attempting to create the tweet. + */ + case class InProgress(token: Long, timestamp: Time) extends State + + /** + * The tweet has already been successfully created, and has the + * specified id. + */ + case class AlreadyCreated(tweetId: TweetId, timestamp: Time) extends State + + /** + * When stored in cache, each state is prefixed by a byte + * indicating the type of the entry. + */ + object TypeCode { + val Unlocked: Byte = 0.toByte + val InProgress: Byte = 1.toByte // + random long + timestamp + val AlreadyCreated: Byte = 2.toByte // + tweet id + timestamp + } + + private[this] val BufferSize = 17 // type byte + 64-bit value + 64-bit timestamp + + // Constant buffer to use for storing the serialized form on + // Unlocked. + private[this] val UnlockedBuf = Array[Byte](TypeCode.Unlocked) + + // Store the serialization function in a ThreadLocal so that we can + // reuse the buffer between invocations. + private[this] val threadLocalSerialize = new ThreadLocal[State => Array[Byte]] { + override def initialValue(): State => Array[Byte] = { + // Allocate the thread-local state + val ary = new Array[Byte](BufferSize) + val buf = ByteBuffer.wrap(ary) + + { + case Unlocked => UnlockedBuf + case InProgress(token, timestamp) => + buf.clear() + buf + .put(TypeCode.InProgress) + .putLong(token) + .putLong(timestamp.sinceEpoch.inNanoseconds) + ary + case AlreadyCreated(tweetId, timestamp) => + buf.clear() + buf + .put(TypeCode.AlreadyCreated) + .putLong(tweetId) + .putLong(timestamp.sinceEpoch.inNanoseconds) + ary + } + } + } + + /** + * Convert this State to the cache representation. + */ + private[this] def toBytes(state: State): Array[Byte] = + threadLocalSerialize.get()(state) + + /** + * Convert this byte array into a LockState. + * + * @throws RuntimeException if the buffer is not of the right size + * and format + */ + private[this] def fromBytes(bytes: Array[Byte]): State = { + val buf = ByteBuffer.wrap(bytes) + val result = buf.get() match { + case TypeCode.Unlocked => Unlocked + case TypeCode.InProgress => InProgress(buf.getLong(), buf.getLong().nanoseconds.afterEpoch) + case TypeCode.AlreadyCreated => + AlreadyCreated(buf.getLong(), buf.getLong().nanoseconds.afterEpoch) + case other => throw new RuntimeException("Invalid type code: " + other) + } + if (buf.remaining != 0) { + throw new RuntimeException("Extra data in buffer: " + bytes) + } + result + } + + /** + * How to serialize the State for storage in cache. + */ + val Serializer: Transformer[State, Array[Byte]] = + Transformer[State, Array[Byte]](tTo = toBytes _, tFrom = fromBytes _) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetLenses.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetLenses.scala new file mode 100644 index 000000000..6334c5d43 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetLenses.scala @@ -0,0 +1,506 @@ +package com.twitter.tweetypie.util + +import com.twitter.dataproducts.enrichments.thriftscala.ProfileGeoEnrichment +import com.twitter.expandodo.thriftscala._ +import com.twitter.mediaservices.commons.thriftscala.MediaKey +import com.twitter.mediaservices.commons.tweetmedia.thriftscala._ +import com.twitter.servo.data.Lens +import com.twitter.spam.rtf.thriftscala.SafetyLabel +import com.twitter.tseng.withholding.thriftscala.TakedownReason +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.unmentions.thriftscala.UnmentionData + +object TweetLenses { + import Lens.checkEq + + def requireSome[A, B](l: Lens[A, Option[B]]): Lens[A, B] = + checkEq[A, B]( + a => l.get(a).get, + (a, b) => l.set(a, Some(b)) + ) + + def tweetLens[A](get: Tweet => A, set: (Tweet, A) => Tweet): Lens[Tweet, A] = + checkEq[Tweet, A](get, set) + + val id: Lens[Tweet, TweetId] = + tweetLens[TweetId](_.id, (t, id) => t.copy(id = id)) + + val coreData: Lens[Tweet, Option[TweetCoreData]] = + tweetLens[Option[TweetCoreData]](_.coreData, (t, coreData) => t.copy(coreData = coreData)) + + val requiredCoreData: Lens[Tweet, TweetCoreData] = + requireSome(coreData) + + val optUrls: Lens[Tweet, Option[Seq[UrlEntity]]] = + tweetLens[Option[Seq[UrlEntity]]](_.urls, (t, urls) => t.copy(urls = urls)) + + val urls: Lens[Tweet, Seq[UrlEntity]] = + tweetLens[Seq[UrlEntity]](_.urls.toSeq.flatten, (t, urls) => t.copy(urls = Some(urls))) + + val optMentions: Lens[Tweet, Option[Seq[MentionEntity]]] = + tweetLens[Option[Seq[MentionEntity]]](_.mentions, (t, v) => t.copy(mentions = v)) + + val mentions: Lens[Tweet, Seq[MentionEntity]] = + tweetLens[Seq[MentionEntity]](_.mentions.toSeq.flatten, (t, v) => t.copy(mentions = Some(v))) + + val unmentionData: Lens[Tweet, Option[UnmentionData]] = + tweetLens[Option[UnmentionData]](_.unmentionData, (t, v) => t.copy(unmentionData = v)) + + val optHashtags: Lens[Tweet, Option[Seq[HashtagEntity]]] = + tweetLens[Option[Seq[HashtagEntity]]](_.hashtags, (t, v) => t.copy(hashtags = v)) + + val hashtags: Lens[Tweet, Seq[HashtagEntity]] = + tweetLens[Seq[HashtagEntity]](_.hashtags.toSeq.flatten, (t, v) => t.copy(hashtags = Some(v))) + + val optCashtags: Lens[Tweet, Option[Seq[CashtagEntity]]] = + tweetLens[Option[Seq[CashtagEntity]]](_.cashtags, (t, v) => t.copy(cashtags = v)) + + val cashtags: Lens[Tweet, Seq[CashtagEntity]] = + tweetLens[Seq[CashtagEntity]](_.cashtags.toSeq.flatten, (t, v) => t.copy(cashtags = Some(v))) + + val optMedia: Lens[Tweet, Option[Seq[MediaEntity]]] = + tweetLens[Option[Seq[MediaEntity]]](_.media, (t, v) => t.copy(media = v)) + + val media: Lens[Tweet, Seq[MediaEntity]] = + tweetLens[Seq[MediaEntity]](_.media.toSeq.flatten, (t, v) => t.copy(media = Some(v))) + + val mediaKeys: Lens[Tweet, Seq[MediaKey]] = + tweetLens[Seq[MediaKey]]( + _.mediaKeys.toSeq.flatten, + { + case (t, v) => t.copy(mediaKeys = Some(v)) + }) + + val place: Lens[Tweet, Option[Place]] = + tweetLens[Option[Place]]( + _.place, + { + case (t, v) => t.copy(place = v) + }) + + val quotedTweet: Lens[Tweet, Option[QuotedTweet]] = + tweetLens[Option[QuotedTweet]]( + _.quotedTweet, + { + case (t, v) => t.copy(quotedTweet = v) + }) + + val selfThreadMetadata: Lens[Tweet, Option[SelfThreadMetadata]] = + tweetLens[Option[SelfThreadMetadata]]( + _.selfThreadMetadata, + { + case (t, v) => t.copy(selfThreadMetadata = v) + }) + + val composerSource: Lens[Tweet, Option[ComposerSource]] = + tweetLens[Option[ComposerSource]]( + _.composerSource, + { + case (t, v) => t.copy(composerSource = v) + }) + + val deviceSource: Lens[Tweet, Option[DeviceSource]] = + tweetLens[Option[DeviceSource]]( + _.deviceSource, + { + case (t, v) => t.copy(deviceSource = v) + }) + + val perspective: Lens[Tweet, Option[StatusPerspective]] = + tweetLens[Option[StatusPerspective]]( + _.perspective, + { + case (t, v) => t.copy(perspective = v) + }) + + val cards: Lens[Tweet, Option[Seq[Card]]] = + tweetLens[Option[Seq[Card]]]( + _.cards, + { + case (t, v) => t.copy(cards = v) + }) + + val card2: Lens[Tweet, Option[Card2]] = + tweetLens[Option[Card2]]( + _.card2, + { + case (t, v) => t.copy(card2 = v) + }) + + val cardReference: Lens[Tweet, Option[CardReference]] = + tweetLens[Option[CardReference]]( + _.cardReference, + { + case (t, v) => t.copy(cardReference = v) + }) + + val spamLabel: Lens[Tweet, Option[SafetyLabel]] = + tweetLens[Option[SafetyLabel]]( + _.spamLabel, + { + case (t, v) => t.copy(spamLabel = v) + }) + + val lowQualityLabel: Lens[Tweet, Option[SafetyLabel]] = + tweetLens[Option[SafetyLabel]]( + _.lowQualityLabel, + { + case (t, v) => t.copy(lowQualityLabel = v) + }) + + val nsfwHighPrecisionLabel: Lens[Tweet, Option[SafetyLabel]] = + tweetLens[Option[SafetyLabel]]( + _.nsfwHighPrecisionLabel, + { + case (t, v) => t.copy(nsfwHighPrecisionLabel = v) + }) + + val bounceLabel: Lens[Tweet, Option[SafetyLabel]] = + tweetLens[Option[SafetyLabel]]( + _.bounceLabel, + { + case (t, v) => t.copy(bounceLabel = v) + }) + + val takedownCountryCodes: Lens[Tweet, Option[Seq[String]]] = + tweetLens[Option[Seq[String]]]( + _.takedownCountryCodes, + { + case (t, v) => t.copy(takedownCountryCodes = v) + }) + + val takedownReasons: Lens[Tweet, Option[Seq[TakedownReason]]] = + tweetLens[Option[Seq[TakedownReason]]]( + _.takedownReasons, + { + case (t, v) => t.copy(takedownReasons = v) + }) + + val contributor: Lens[Tweet, Option[Contributor]] = + tweetLens[Option[Contributor]]( + _.contributor, + { + case (t, v) => t.copy(contributor = v) + }) + + val mediaTags: Lens[Tweet, Option[TweetMediaTags]] = + tweetLens[Option[TweetMediaTags]]( + _.mediaTags, + { + case (t, v) => t.copy(mediaTags = v) + }) + + val mediaTagMap: Lens[Tweet, Map[MediaId, Seq[MediaTag]]] = + tweetLens[Map[MediaId, Seq[MediaTag]]]( + _.mediaTags.map { case TweetMediaTags(tagMap) => tagMap.toMap }.getOrElse(Map.empty), + (t, v) => { + val cleanMap = v.filter { case (_, tags) => tags.nonEmpty } + t.copy(mediaTags = if (cleanMap.nonEmpty) Some(TweetMediaTags(cleanMap)) else None) + } + ) + + val escherbirdEntityAnnotations: Lens[Tweet, Option[EscherbirdEntityAnnotations]] = + tweetLens[Option[EscherbirdEntityAnnotations]]( + _.escherbirdEntityAnnotations, + { + case (t, v) => t.copy(escherbirdEntityAnnotations = v) + }) + + val communities: Lens[Tweet, Option[Communities]] = + tweetLens[Option[Communities]]( + _.communities, + { + case (t, v) => t.copy(communities = v) + }) + + val tweetypieOnlyTakedownCountryCodes: Lens[Tweet, Option[Seq[String]]] = + tweetLens[Option[Seq[String]]]( + _.tweetypieOnlyTakedownCountryCodes, + { + case (t, v) => t.copy(tweetypieOnlyTakedownCountryCodes = v) + }) + + val tweetypieOnlyTakedownReasons: Lens[Tweet, Option[Seq[TakedownReason]]] = + tweetLens[Option[Seq[TakedownReason]]]( + _.tweetypieOnlyTakedownReasons, + { + case (t, v) => t.copy(tweetypieOnlyTakedownReasons = v) + }) + + val profileGeo: Lens[Tweet, Option[ProfileGeoEnrichment]] = + tweetLens[Option[ProfileGeoEnrichment]]( + _.profileGeoEnrichment, + (t, v) => t.copy(profileGeoEnrichment = v) + ) + + val visibleTextRange: Lens[Tweet, Option[TextRange]] = + tweetLens[Option[TextRange]]( + _.visibleTextRange, + { + case (t, v) => t.copy(visibleTextRange = v) + }) + + val selfPermalink: Lens[Tweet, Option[ShortenedUrl]] = + tweetLens[Option[ShortenedUrl]]( + _.selfPermalink, + { + case (t, v) => t.copy(selfPermalink = v) + }) + + val extendedTweetMetadata: Lens[Tweet, Option[ExtendedTweetMetadata]] = + tweetLens[Option[ExtendedTweetMetadata]]( + _.extendedTweetMetadata, + { + case (t, v) => t.copy(extendedTweetMetadata = v) + }) + + object TweetCoreData { + val userId: Lens[TweetCoreData, UserId] = checkEq[TweetCoreData, UserId]( + _.userId, + { (c, v) => + // Pleases the compiler: https://github.com/scala/bug/issues/9171 + val userId = v + c.copy(userId = userId) + }) + val text: Lens[TweetCoreData, String] = checkEq[TweetCoreData, String]( + _.text, + { (c, v) => + // Pleases the compiler: https://github.com/scala/bug/issues/9171 + val text = v + c.copy(text = text) + }) + val createdAt: Lens[TweetCoreData, TweetId] = + checkEq[TweetCoreData, Long](_.createdAtSecs, (c, v) => c.copy(createdAtSecs = v)) + val createdVia: Lens[TweetCoreData, String] = + checkEq[TweetCoreData, String]( + _.createdVia, + { + case (c, v) => c.copy(createdVia = v) + }) + val hasTakedown: Lens[TweetCoreData, Boolean] = + checkEq[TweetCoreData, Boolean]( + _.hasTakedown, + { + case (c, v) => c.copy(hasTakedown = v) + }) + val nullcast: Lens[TweetCoreData, Boolean] = + checkEq[TweetCoreData, Boolean]( + _.nullcast, + { + case (c, v) => c.copy(nullcast = v) + }) + val nsfwUser: Lens[TweetCoreData, Boolean] = + checkEq[TweetCoreData, Boolean]( + _.nsfwUser, + { + case (c, v) => c.copy(nsfwUser = v) + }) + val nsfwAdmin: Lens[TweetCoreData, Boolean] = + checkEq[TweetCoreData, Boolean]( + _.nsfwAdmin, + { + case (c, v) => c.copy(nsfwAdmin = v) + }) + val reply: Lens[TweetCoreData, Option[Reply]] = + checkEq[TweetCoreData, Option[Reply]]( + _.reply, + { + case (c, v) => c.copy(reply = v) + }) + val share: Lens[TweetCoreData, Option[Share]] = + checkEq[TweetCoreData, Option[Share]]( + _.share, + { + case (c, v) => c.copy(share = v) + }) + val narrowcast: Lens[TweetCoreData, Option[Narrowcast]] = + checkEq[TweetCoreData, Option[Narrowcast]]( + _.narrowcast, + { + case (c, v) => c.copy(narrowcast = v) + }) + val directedAtUser: Lens[TweetCoreData, Option[DirectedAtUser]] = + checkEq[TweetCoreData, Option[DirectedAtUser]]( + _.directedAtUser, + { + case (c, v) => c.copy(directedAtUser = v) + }) + val conversationId: Lens[TweetCoreData, Option[ConversationId]] = + checkEq[TweetCoreData, Option[ConversationId]]( + _.conversationId, + { + case (c, v) => c.copy(conversationId = v) + }) + val placeId: Lens[TweetCoreData, Option[String]] = + checkEq[TweetCoreData, Option[String]]( + _.placeId, + { + case (c, v) => c.copy(placeId = v) + }) + val geoCoordinates: Lens[TweetCoreData, Option[GeoCoordinates]] = + checkEq[TweetCoreData, Option[GeoCoordinates]]( + _.coordinates, + (c, v) => c.copy(coordinates = v) + ) + val trackingId: Lens[TweetCoreData, Option[TweetId]] = + checkEq[TweetCoreData, Option[Long]]( + _.trackingId, + { + case (c, v) => c.copy(trackingId = v) + }) + val hasMedia: Lens[TweetCoreData, Option[Boolean]] = + checkEq[TweetCoreData, Option[Boolean]]( + _.hasMedia, + { + case (c, v) => c.copy(hasMedia = v) + }) + } + + val counts: Lens[Tweet, Option[StatusCounts]] = + tweetLens[Option[StatusCounts]]( + _.counts, + { + case (t, v) => t.copy(counts = v) + }) + + object StatusCounts { + val retweetCount: Lens[StatusCounts, Option[TweetId]] = + checkEq[StatusCounts, Option[Long]]( + _.retweetCount, + (c, retweetCount) => c.copy(retweetCount = retweetCount) + ) + + val replyCount: Lens[StatusCounts, Option[TweetId]] = + checkEq[StatusCounts, Option[Long]]( + _.replyCount, + (c, replyCount) => c.copy(replyCount = replyCount) + ) + + val favoriteCount: Lens[StatusCounts, Option[TweetId]] = + checkEq[StatusCounts, Option[Long]]( + _.favoriteCount, + { + case (c, v) => c.copy(favoriteCount = v) + }) + + val quoteCount: Lens[StatusCounts, Option[TweetId]] = + checkEq[StatusCounts, Option[Long]]( + _.quoteCount, + { + case (c, v) => c.copy(quoteCount = v) + }) + } + + val userId: Lens[Tweet, UserId] = requiredCoreData andThen TweetCoreData.userId + val text: Lens[Tweet, String] = requiredCoreData andThen TweetCoreData.text + val createdVia: Lens[Tweet, String] = requiredCoreData andThen TweetCoreData.createdVia + val createdAt: Lens[Tweet, ConversationId] = requiredCoreData andThen TweetCoreData.createdAt + val reply: Lens[Tweet, Option[Reply]] = requiredCoreData andThen TweetCoreData.reply + val share: Lens[Tweet, Option[Share]] = requiredCoreData andThen TweetCoreData.share + val narrowcast: Lens[Tweet, Option[Narrowcast]] = + requiredCoreData andThen TweetCoreData.narrowcast + val directedAtUser: Lens[Tweet, Option[DirectedAtUser]] = + requiredCoreData andThen TweetCoreData.directedAtUser + val conversationId: Lens[Tweet, Option[ConversationId]] = + requiredCoreData andThen TweetCoreData.conversationId + val placeId: Lens[Tweet, Option[String]] = requiredCoreData andThen TweetCoreData.placeId + val geoCoordinates: Lens[Tweet, Option[GeoCoordinates]] = + requiredCoreData andThen TweetCoreData.geoCoordinates + val hasTakedown: Lens[Tweet, Boolean] = requiredCoreData andThen TweetCoreData.hasTakedown + val nsfwAdmin: Lens[Tweet, Boolean] = requiredCoreData andThen TweetCoreData.nsfwAdmin + val nsfwUser: Lens[Tweet, Boolean] = requiredCoreData andThen TweetCoreData.nsfwUser + val nullcast: Lens[Tweet, Boolean] = requiredCoreData andThen TweetCoreData.nullcast + val trackingId: Lens[Tweet, Option[ConversationId]] = + requiredCoreData andThen TweetCoreData.trackingId + val hasMedia: Lens[Tweet, Option[Boolean]] = requiredCoreData andThen TweetCoreData.hasMedia + + object CashtagEntity { + val indices: Lens[CashtagEntity, (Short, Short)] = + checkEq[CashtagEntity, (Short, Short)]( + t => (t.fromIndex, t.toIndex), + (t, v) => t.copy(fromIndex = v._1, toIndex = v._2) + ) + val text: Lens[CashtagEntity, String] = + checkEq[CashtagEntity, String](_.text, (t, text) => t.copy(text = text)) + } + + object HashtagEntity { + val indices: Lens[HashtagEntity, (Short, Short)] = + checkEq[HashtagEntity, (Short, Short)]( + t => (t.fromIndex, t.toIndex), + (t, v) => t.copy(fromIndex = v._1, toIndex = v._2) + ) + val text: Lens[HashtagEntity, String] = + checkEq[HashtagEntity, String](_.text, (t, text) => t.copy(text = text)) + } + + object MediaEntity { + val indices: Lens[MediaEntity, (Short, Short)] = + checkEq[MediaEntity, (Short, Short)]( + t => (t.fromIndex, t.toIndex), + (t, v) => t.copy(fromIndex = v._1, toIndex = v._2) + ) + val mediaSizes: Lens[MediaEntity, collection.Set[MediaSize]] = + checkEq[MediaEntity, scala.collection.Set[MediaSize]]( + _.sizes, + (m, sizes) => m.copy(sizes = sizes) + ) + val url: Lens[MediaEntity, String] = + checkEq[MediaEntity, String]( + _.url, + { + case (t, v) => t.copy(url = v) + }) + val mediaInfo: Lens[MediaEntity, Option[MediaInfo]] = + checkEq[MediaEntity, Option[MediaInfo]]( + _.mediaInfo, + { + case (t, v) => t.copy(mediaInfo = v) + }) + } + + object MentionEntity { + val indices: Lens[MentionEntity, (Short, Short)] = + checkEq[MentionEntity, (Short, Short)]( + t => (t.fromIndex, t.toIndex), + (t, v) => t.copy(fromIndex = v._1, toIndex = v._2) + ) + val screenName: Lens[MentionEntity, String] = + checkEq[MentionEntity, String]( + _.screenName, + (t, screenName) => t.copy(screenName = screenName) + ) + } + + object UrlEntity { + val indices: Lens[UrlEntity, (Short, Short)] = + checkEq[UrlEntity, (Short, Short)]( + t => (t.fromIndex, t.toIndex), + (t, v) => t.copy(fromIndex = v._1, toIndex = v._2) + ) + val url: Lens[UrlEntity, String] = + checkEq[UrlEntity, String](_.url, (t, url) => t.copy(url = url)) + } + + object Contributor { + val screenName: Lens[Contributor, Option[String]] = + checkEq[Contributor, Option[String]]( + _.screenName, + (c, screenName) => c.copy(screenName = screenName) + ) + } + + object Reply { + val inReplyToScreenName: Lens[Reply, Option[String]] = + checkEq[Reply, Option[String]]( + _.inReplyToScreenName, + (c, inReplyToScreenName) => c.copy(inReplyToScreenName = inReplyToScreenName) + ) + + val inReplyToStatusId: Lens[Reply, Option[TweetId]] = + checkEq[Reply, Option[TweetId]]( + _.inReplyToStatusId, + (c, inReplyToStatusId) => c.copy(inReplyToStatusId = inReplyToStatusId) + ) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetPermalinkUtil.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetPermalinkUtil.scala new file mode 100644 index 000000000..5a0bbcb2d --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetPermalinkUtil.scala @@ -0,0 +1,18 @@ +package com.twitter.tweetypie.util + +import com.twitter.tweetutil.TweetPermalink +import com.twitter.tweetypie.thriftscala._ + +object TweetPermalinkUtil { + def lastQuotedTweetPermalink(tweet: Tweet): Option[(UrlEntity, TweetPermalink)] = + lastQuotedTweetPermalink(TweetLenses.urls.get(tweet)) + + def lastQuotedTweetPermalink(urls: Seq[UrlEntity]): Option[(UrlEntity, TweetPermalink)] = + urls.flatMap(matchQuotedTweetPermalink).lastOption + + def matchQuotedTweetPermalink(entity: UrlEntity): Option[(UrlEntity, TweetPermalink)] = + for { + expanded <- entity.expanded + permalink <- TweetPermalink.parse(expanded) + } yield (entity, permalink) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetTransformer.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetTransformer.scala new file mode 100644 index 000000000..a9b9c8748 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetTransformer.scala @@ -0,0 +1,128 @@ +package com.twitter.tweetypie.util + +import com.twitter.tweetypie.thriftscala._ + +object TweetTransformer { + def toStatus(tweet: Tweet): Status = { + assert(tweet.coreData.nonEmpty, "tweet core data is missing") + val coreData = tweet.coreData.get + + val toGeo: Option[Geo] = + coreData.coordinates match { + case Some(coords) => + Some( + Geo( + latitude = coords.latitude, + longitude = coords.longitude, + geoPrecision = coords.geoPrecision, + entityId = if (coords.display) 2 else 0, + name = coreData.placeId, + place = tweet.place, + placeId = coreData.placeId, + coordinates = Some(coords) + ) + ) + case _ => + coreData.placeId match { + case None => None + case Some(_) => + Some(Geo(name = coreData.placeId, place = tweet.place, placeId = coreData.placeId)) + } + } + + Status( + id = tweet.id, + userId = coreData.userId, + text = coreData.text, + createdVia = coreData.createdVia, + createdAt = coreData.createdAtSecs, + urls = tweet.urls.getOrElse(Seq.empty), + mentions = tweet.mentions.getOrElse(Seq.empty), + hashtags = tweet.hashtags.getOrElse(Seq.empty), + cashtags = tweet.cashtags.getOrElse(Seq.empty), + media = tweet.media.getOrElse(Seq.empty), + reply = tweet.coreData.flatMap(_.reply), + directedAtUser = tweet.coreData.flatMap(_.directedAtUser), + share = tweet.coreData.flatMap(_.share), + quotedTweet = tweet.quotedTweet, + geo = toGeo, + hasTakedown = coreData.hasTakedown, + nsfwUser = coreData.nsfwUser, + nsfwAdmin = coreData.nsfwAdmin, + counts = tweet.counts, + deviceSource = tweet.deviceSource, + narrowcast = coreData.narrowcast, + takedownCountryCodes = tweet.takedownCountryCodes, + perspective = tweet.perspective, + cards = tweet.cards, + card2 = tweet.card2, + nullcast = coreData.nullcast, + conversationId = coreData.conversationId, + language = tweet.language, + trackingId = coreData.trackingId, + spamLabels = tweet.spamLabels, + hasMedia = coreData.hasMedia, + contributor = tweet.contributor, + mediaTags = tweet.mediaTags + ) + } + + def toTweet(status: Status): Tweet = { + val coreData = + TweetCoreData( + userId = status.userId, + text = status.text, + createdVia = status.createdVia, + createdAtSecs = status.createdAt, + reply = status.reply, + directedAtUser = status.directedAtUser, + share = status.share, + hasTakedown = status.hasTakedown, + nsfwUser = status.nsfwUser, + nsfwAdmin = status.nsfwAdmin, + nullcast = status.nullcast, + narrowcast = status.narrowcast, + trackingId = status.trackingId, + conversationId = status.conversationId, + hasMedia = status.hasMedia, + coordinates = toCoords(status), + placeId = status.geo.flatMap(_.placeId) + ) + + Tweet( + id = status.id, + coreData = Some(coreData), + urls = Some(status.urls), + mentions = Some(status.mentions), + hashtags = Some(status.hashtags), + cashtags = Some(status.cashtags), + media = Some(status.media), + place = status.geo.flatMap(_.place), + quotedTweet = status.quotedTweet, + takedownCountryCodes = status.takedownCountryCodes, + counts = status.counts, + deviceSource = status.deviceSource, + perspective = status.perspective, + cards = status.cards, + card2 = status.card2, + language = status.language, + spamLabels = status.spamLabels, + contributor = status.contributor, + mediaTags = status.mediaTags + ) + } + + private def toCoords(status: Status): Option[GeoCoordinates] = + status.geo.map { geo => + if (geo.coordinates.nonEmpty) geo.coordinates.get + // Status from monorail have the coordinates as the top level fields in Geo, + // while the nested struct is empty. So we need to copy from the flat fields. + else + GeoCoordinates( + latitude = geo.latitude, + longitude = geo.longitude, + geoPrecision = geo.geoPrecision, + display = geo.entityId == 2 + ) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/AlertableExceptionLoggingFilter.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/AlertableExceptionLoggingFilter.scala new file mode 100644 index 000000000..0dae0bfdc --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/AlertableExceptionLoggingFilter.scala @@ -0,0 +1,41 @@ +package com.twitter.tweetypie.util.logging + +import ch.qos.logback.classic.spi.ILoggingEvent +import ch.qos.logback.classic.spi.ThrowableProxy +import ch.qos.logback.core.filter.Filter +import ch.qos.logback.core.spi.FilterReply +import com.twitter.tweetypie.serverutil.ExceptionCounter.isAlertable + +/** + * This class is currently being used by logback to log alertable exceptions to a seperate file. + * + * Filters do not change the log levels of individual loggers. Filters filter out specific messages + * for specific appenders. This allows us to have a log file with lots of information you will + * mostly not need and a log file with only important information. This type of filtering cannot be + * accomplished by changing the log levels of loggers, because the logger levels are global. We want + * to change the semantics for specific destinations (appenders). + */ +class AlertableExceptionLoggingFilter extends Filter[ILoggingEvent] { + private[this] val IgnorableLoggers: Set[String] = + Set( + "com.github.benmanes.caffeine.cache.BoundedLocalCache", + "abdecider", + "org.apache.kafka.common.network.SaslChannelBuilder", + "com.twitter.finagle.netty4.channel.ChannelStatsHandler$" + ) + + def include(proxy: ThrowableProxy, event: ILoggingEvent): Boolean = + isAlertable(proxy.getThrowable()) && !IgnorableLoggers(event.getLoggerName) + + override def decide(event: ILoggingEvent): FilterReply = + if (!isStarted) { + FilterReply.NEUTRAL + } else { + event.getThrowableProxy() match { + case proxy: ThrowableProxy if include(proxy, event) => + FilterReply.NEUTRAL + case _ => + FilterReply.DENY + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/BUILD new file mode 100644 index 000000000..68702d3cf --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/BUILD @@ -0,0 +1,17 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = [ + "bazel-compatible", + "logging_impl_check_whitelisted_target", + ], + dependencies = [ + "3rdparty/jvm/ch/qos/logback:logback-classic", + "3rdparty/jvm/com/google/guava", + "finagle/finagle-memcached/src/main/scala", + "src/thrift/com/twitter/servo:servo-exception-java", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", + "util/util-stats/src/main/scala/com/twitter/finagle/stats", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/OnlyImportantLogsLoggingFilter.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/OnlyImportantLogsLoggingFilter.scala new file mode 100644 index 000000000..fe035bddf --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/OnlyImportantLogsLoggingFilter.scala @@ -0,0 +1,30 @@ +package com.twitter.tweetypie.util.logging + +import ch.qos.logback.classic.Level +import ch.qos.logback.classic.spi.ILoggingEvent +import ch.qos.logback.core.filter.Filter +import ch.qos.logback.core.spi.FilterReply + +/** + * This class is currently being used by logback to log statements from tweetypie at one level and + * log statements from other packages at another. + * + * Filters do not change the log levels of individual loggers. Filters filter out specific messages + * for specific appenders. This allows us to have a log file with lots of information you will + * mostly not need and a log file with only important information. This type of filtering cannot be + * accomplished by changing the log levels of loggers, because the logger levels are global. We want + * to change the semantics for specific destinations (appenders). + */ +class OnlyImportantLogsLoggingFilter extends Filter[ILoggingEvent] { + private[this] def notImportant(loggerName: String): Boolean = + !loggerName.startsWith("com.twitter.tweetypie") + + override def decide(event: ILoggingEvent): FilterReply = + if (!isStarted || event.getLevel.isGreaterOrEqual(Level.WARN)) { + FilterReply.NEUTRAL + } else if (notImportant(event.getLoggerName())) { + FilterReply.DENY + } else { + FilterReply.NEUTRAL + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/package.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/package.scala new file mode 100644 index 000000000..c99d3afa7 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/package.scala @@ -0,0 +1,9 @@ +package com.twitter.tweetypie + +package object util { + type TweetId = Long + type UserId = Long + type MediaId = Long + type ConversationId = Long + type PlaceId = String +} diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/BUILD b/tweetypie/common/src/thrift/com/twitter/tweetypie/BUILD new file mode 100644 index 000000000..1ccf63deb --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/BUILD @@ -0,0 +1,353 @@ +create_thrift_libraries( + base_name = "media-entity", + sources = ["media_entity.thrift"], + platform = "java8", + tags = ["bazel-compatible"], + dependency_roots = [ + "mediaservices/commons/src/main/thrift", + ], + export_roots = [ + "mediaservices/commons/src/main/thrift:thrift", + ], + generate_languages = [ + "go", + "java", + "lua", + "python", + "ruby", + "scala", + "strato", + ], + provides_java_name = "tweetypie-media-entity-thrift-java", + provides_scala_name = "tweetypie-media-entity-thrift-scala", +) + +create_thrift_libraries( + base_name = "edit-control", + sources = ["edit_control.thrift"], + platform = "java8", + tags = ["bazel-compatible"], + generate_languages = [ + "go", + "java", + "lua", + "python", + "ruby", + "scala", + "strato", + ], + provides_java_name = "tweetypie-edit-control-thrift-java", + provides_scala_name = "tweetypie-edit-control-thrift-scala", +) + +create_thrift_libraries( + base_name = "api-fields", + sources = ["api_fields.thrift"], + platform = "java8", + tags = ["bazel-compatible"], + generate_languages = [ + "go", + "java", + "lua", + "python", + "ruby", + "scala", + "strato", + ], + provides_java_name = "tweetypie-api-fields-thrift-java", + provides_scala_name = "tweetypie-api-fields-thrift-scala", +) + +create_thrift_libraries( + base_name = "note-tweet", + sources = ["note_tweet.thrift"], + platform = "java8", + tags = ["bazel-compatible"], + generate_languages = [ + "go", + "java", + "lua", + "python", + "ruby", + "scala", + "strato", + ], + provides_java_name = "tweetypie-note-tweet-thrift-java", + provides_scala_name = "tweetypie-note-tweet-thrift-scala", +) + +create_thrift_libraries( + base_name = "tweet", + sources = [ + "creative-entity-enrichments/creative_entity_enrichments.thrift", + "geo/tweet_location_info.thrift", + "media/media_ref.thrift", + "tweet.thrift", + "unmentions/unmentions.thrift", + ], + platform = "java8", + tags = ["bazel-compatible"], + dependency_roots = [ + ":api-fields", + ":edit-control", + ":media-entity", + ":note-tweet", + "mediaservices/commons/src/main/thrift", + "src/thrift/com/twitter/content-health/toxicreplyfilter", + "src/thrift/com/twitter/dataproducts:enrichments_profilegeo", + "src/thrift/com/twitter/escherbird:tweet-annotation", + "src/thrift/com/twitter/expandodo:cards", + "src/thrift/com/twitter/geoduck", + "src/thrift/com/twitter/service/scarecrow/gen:tiered-actions", + "src/thrift/com/twitter/spam/rtf:safety-label", + "src/thrift/com/twitter/timelines/self_thread:thrift", + "src/thrift/com/twitter/tseng/withholding:thrift", + "src/thrift/com/twitter/tweet_pivots:tweet-pivots", + "strato/config/src/thrift/com/twitter/strato/columns/creative_entity_enrichments", + "unified-cards/thrift/src/main/thrift:thrift-contract", + ], + export_roots = [ + ":api-fields", + ":edit-control", + ":media-entity", + ":note-tweet", + "mediaservices/commons/src/main/thrift:thrift", + "src/thrift/com/twitter/content-health/toxicreplyfilter", + "src/thrift/com/twitter/dataproducts:enrichments_profilegeo", + "src/thrift/com/twitter/escherbird:tweet-annotation", + "src/thrift/com/twitter/expandodo:cards", + "src/thrift/com/twitter/geoduck:geoduck", + "src/thrift/com/twitter/service/scarecrow/gen:tiered-actions", + "src/thrift/com/twitter/spam/rtf:safety-label", + "src/thrift/com/twitter/timelines/self_thread:thrift", + "src/thrift/com/twitter/tseng/withholding:thrift", + "src/thrift/com/twitter/tweet_pivots:tweet-pivots", + "strato/config/src/thrift/com/twitter/strato/columns/creative_entity_enrichments", + ], + generate_languages = [ + "go", + "java", + "lua", + "python", + "scala", + "strato", + ], + provides_java_name = "tweetypie-tweet-thrift-java", + provides_python_name = "tweetypie-tweet-thrift-python", + provides_scala_name = "tweetypie-tweet-thrift-scala", +) + +create_thrift_libraries( + base_name = "service", + sources = [ + "deleted_tweet.thrift", + "tweet_service.thrift", + ], + platform = "java8", + tags = ["bazel-compatible"], + dependency_roots = [ + ":audit", + ":transient_context", + ":tweet", + "carousel/service/thrift:service", + "incentives/jiminy/src/main/thrift/com/twitter/incentives/jiminy:thrift", + "mediaservices/commons/src/main/thrift", + "src/thrift/com/twitter/bouncer:bounce-action-thrift", + "src/thrift/com/twitter/context:feature-context", + "src/thrift/com/twitter/servo:servo-exception", + "src/thrift/com/twitter/spam/features:safety-meta-data", + "src/thrift/com/twitter/spam/rtf:safety-label", + "src/thrift/com/twitter/spam/rtf:safety-level", + "src/thrift/com/twitter/spam/rtf:safety-result", + "src/thrift/com/twitter/tseng/withholding:thrift", + ], + export_roots = [ + ":transient_context", + ":tweet", + "carousel/service/thrift:service", + "incentives/jiminy/src/main/thrift/com/twitter/incentives/jiminy:thrift", + "src/thrift/com/twitter/bouncer:bounce-action-thrift", + "src/thrift/com/twitter/context:feature-context", + "src/thrift/com/twitter/spam/features:safety-meta-data", + "src/thrift/com/twitter/spam/rtf:safety-level", + "src/thrift/com/twitter/spam/rtf:safety-result", + ], + generate_languages = [ + "go", + "java", + "python", + "scala", + "strato", + ], + provides_java_name = "tweetypie-service-thrift-java", + provides_python_name = "tweetypie-service-thrift-python", + provides_scala_name = "tweetypie-service-thrift-scala", +) + +create_thrift_libraries( + base_name = "events", + sources = [ + "retweet_archival_event.thrift", + "tweet_events.thrift", + ], + platform = "java8", + tags = ["bazel-compatible"], + dependency_roots = [ + ":audit", + ":transient_context", + ":tweet", + "src/thrift/com/twitter/gizmoduck:user-thrift", + ], + export_roots = [ + ":audit", + ":transient_context", + ":tweet", + "src/thrift/com/twitter/gizmoduck:user-thrift", + ], + generate_languages = [ + "java", + "python", + "scala", + "strato", + ], + provides_java_name = "tweetypie-events-thrift-java", + provides_scala_name = "tweetypie-events-thrift-scala", +) + +create_thrift_libraries( + base_name = "audit", + sources = ["tweet_audit.thrift"], + platform = "java8", + tags = ["bazel-compatible"], + generate_languages = [ + "go", + "java", + "lua", + "python", + "scala", + "strato", + ], + provides_java_name = "tweetypie-audit-thrift-java", + provides_scala_name = "tweetypie-audit-thrift-scala", +) + +create_thrift_libraries( + base_name = "deprecated", + sources = ["deprecated.thrift"], + platform = "java8", + tags = ["bazel-compatible"], + dependency_roots = [ + ":service", + ":tweet", + "mediaservices/commons/src/main/thrift", + "src/thrift/com/twitter/expandodo:cards", + "src/thrift/com/twitter/gizmoduck:user-thrift", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity", + ], + generate_languages = [ + "java", + "python", + "scala", + "strato", + ], + provides_java_name = "tweetypie-deprecated-thrift-java", + provides_scala_name = "tweetypie-deprecated-thrift-scala", +) + +create_thrift_libraries( + base_name = "delete_location_data", + sources = ["delete_location_data.thrift"], + tags = ["bazel-compatible"], + provides_java_name = "delete-location-data-java", + provides_scala_name = "delete-location-data-scala", +) + +create_thrift_libraries( + base_name = "transient_context", + sources = ["transient_context.thrift"], + platform = "java8", + tags = ["bazel-compatible"], + dependency_roots = [ + ":tweet", + ], + generate_languages = [ + "go", + "java", + "lua", + "python", + "scala", + "strato", + ], + provides_java_name = "transient-context-java", + provides_scala_name = "transient-context-scala", +) + +create_thrift_libraries( + base_name = "tweet_comparison_service", + sources = ["tweet_comparison_service.thrift"], + tags = ["bazel-compatible"], + dependency_roots = [ + ":service", + "src/thrift/com/twitter/context:twitter-context", + ], + generate_languages = [ + "java", + "scala", + ], + provides_java_name = "tweet-comparison-service-thrift-java", + provides_scala_name = "tweet-comparison-service-thrift-scala", +) + +create_thrift_libraries( + base_name = "tweet_service_graphql", + sources = ["tweet_service_graphql.thrift"], + platform = "java8", + tags = ["bazel-compatible"], + dependency_roots = [ + "src/thrift/com/twitter/ads/callback:engagement_request", + "strato/config/src/thrift/com/twitter/strato/graphql", + ], + generate_languages = [ + "scala", + "strato", + ], + provides_scala_name = "tweet-service-graphql-scala", +) + +create_thrift_libraries( + base_name = "stored-tweet-info", + sources = [ + "stored_tweet_info.thrift", + ], + platform = "java8", + tags = ["bazel-compatible"], + dependency_roots = [ + ":tweet", + ], + generate_languages = [ + "java", + "scala", + "strato", + ], + provides_java_name = "tweetypie-stored-tweet-info-thrift-java", + provides_scala_name = "tweetypie-stored-tweet-info-thrift-scala", +) + +create_thrift_libraries( + base_name = "tweet-service-federated", + sources = [ + "tweet_service_federated.thrift", + ], + platform = "java8", + tags = ["bazel-compatible"], + dependency_roots = [ + ":stored-tweet-info", + ], + generate_languages = [ + "java", + "scala", + "strato", + ], + provides_java_name = "tweetypie-service-federated-thrift-java", + provides_scala_name = "tweetypie-service-federated-thrift-scala", +) diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/api_fields.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/api_fields.thrift new file mode 100644 index 000000000..d48cbf171 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/api_fields.thrift @@ -0,0 +1,18 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace py gen.twitter.tweetypie.api_fields +namespace rb TweetyPie +// Specific namespace to avoid golang circular import +namespace go tweetypie.tweet + +// Structs used specifically for rendering through graphql. + +/** + * Perspective of a Tweet from the point of view of a User. + */ +struct TweetPerspective { + 1: bool favorited + 2: bool retweeted + 3: optional bool bookmarked +}(persisted='true', hasPersonalData = 'false', strato.graphql.typename='TweetPerspective') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/creative-entity-enrichments/creative_entity_enrichments.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/creative-entity-enrichments/creative_entity_enrichments.thrift new file mode 100644 index 000000000..48a50ca03 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/creative-entity-enrichments/creative_entity_enrichments.thrift @@ -0,0 +1,21 @@ +namespace java com.twitter.tweetypie.creative_entity_enrichments.thriftjava +#@ namespace scala com.twitter.tweetypie.creative_entity_enrichments.thriftscala +#@ namespace strato com.twitter.tweetypie.creative_entity_enrichments +namespace py gen.twitter.tweetypie.creative_entity_enrichments + +include "com/twitter/strato/columns/creative_entity_enrichments/enrichments.thrift" + +struct CreativeEntityEnrichmentRef { + 1: required i64 enrichmentId +}(persisted='true', hasPersonalData='false') + +/** + * This struct represents a collection of enrichments applied to a tweet. + * The enrichment for a tweet is just a metadata attached to a tweet + * Each enrichment has a unique id (EnrichmentId) to uniquely identify an enrichment. + * + * enrichment_type signifies the type of an enrichment (eg: Interactive Text). + */ +struct CreativeEntityEnrichments { + 1: required map enrichment_type_to_ref +}(persisted='true', hasPersonalData='false') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/delete_location_data.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/delete_location_data.thrift new file mode 100644 index 000000000..35f68dd10 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/delete_location_data.thrift @@ -0,0 +1,32 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +namespace py gen.twitter.tweetypie +namespace rb TweetyPie +namespace go tweetypie + +/** + * Event that triggers deletion of the geo information on tweets created + * at timestamp_ms or earlier. + */ +struct DeleteLocationData { + /** + * The id of the user whose tweets should have their geo information + * removed. + */ + 1: required i64 user_id (personalDataType='UserId') + + /** + * The time at which this request was initiated. Tweets by this user + * whose snowflake ids contain timestamps less than or equal to this + * value will no longer be returned with geo information. + */ + 2: required i64 timestamp_ms + + /** + * The last time this user requested deletion of location data prior + * to this request. This value may be omitted, but should be included + * if available for implementation efficiency, since it eliminates the + * need to scan tweets older than this value for geo information. + */ + 3: optional i64 last_timestamp_ms +}(persisted='true', hasPersonalData='true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/deleted_tweet.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/deleted_tweet.thrift new file mode 100644 index 000000000..cedf451d5 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/deleted_tweet.thrift @@ -0,0 +1,86 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace py gen.twitter.tweetypie.deletedtweet +namespace rb TweetyPie +namespace go tweetypie + +// Structs used for response from getDeletedTweets + +struct DeletedTweetMediaEntity { + 1: required i64 id + 2: required i8 mediaType + 3: required i16 width + 4: required i16 height +} (persisted = 'true') + +struct DeletedTweetShare { + 1: required i64 sourceStatusId + 2: required i64 sourceUserId + 3: required i64 parentStatusId +} (persisted = 'true') + +/** + * A tweet that has been soft- or hard-deleted. + * + * Originally DeletedTweet used the same field ids as tbird.Status. + * This is no longer the case. + */ +struct DeletedTweet { + // Uses the same field ids as tbird.thrift so we can easily map and add fields later + 1: required i64 id + + /** + * User who created the tweet. Only available for soft-deleted tweets. + */ + 2: optional i64 userId + + /** + * Content of the tweet. Only available for soft-deleted tweets. + */ + 3: optional string text + + /** + * When the tweet was created. Only available for soft-deleted tweets. + */ + 5: optional i64 createdAtSecs + + /** + * Retweet information if the deleted tweet was a retweet. Only available + * for soft-deleted tweets. + */ + 7: optional DeletedTweetShare share + + /** + * Media metadata if the deleted tweet included media. Only available for + * soft-deleted tweets. + */ + 14: optional list media + + /** + * The time when this tweet was deleted by a user, in epoch milliseconds, either normally (aka + * "softDelete") or via a bouncer flow (aka "bounceDelete"). + * + * This data is not available for all deleted tweets. + */ + 18: optional i64 deletedAtMsec + + /** + * The time when this tweet was permanently deleted, in epoch milliseconds. + * + * This data is not available for all deleted tweets. + */ + 19: optional i64 hardDeletedAtMsec + + /** + * The ID of the NoteTweet associated with this Tweet if one exists. This is used by safety tools + * to fetch the NoteTweet content when viewing soft deleted Tweets. + */ + 20: optional i64 noteTweetId + + /** + * Specifies if the Tweet can be expanded into the NoteTweet, or if they have the same text. Can + * be used to distinguish between Longer Tweets and RichText Tweets. + */ + 21: optional bool isExpandable +} (persisted = 'true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/deprecated.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/deprecated.thrift new file mode 100644 index 000000000..55cdde2d7 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/deprecated.thrift @@ -0,0 +1,99 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace py gen.twitter.tweetypie.deprecated +namespace rb TweetyPie + +include "com/twitter/expandodo/cards.thrift" +include "com/twitter/gizmoduck/user.thrift" +include "com/twitter/tweetypie/media_entity.thrift" +include "com/twitter/tweetypie/tweet.thrift" +include "com/twitter/tweetypie/tweet_service.thrift" + +/** + * @deprecated Use Place + */ +struct Geo { + /** + * @deprecated Use coordinates.latitude + */ + 1: double latitude = 0.0 (personalDataType = 'GpsCoordinates') + + /** + * @deprecated Use coordinates.longitude + */ + 2: double longitude = 0.0 (personalDataType = 'GpsCoordinates') + + /** + * @deprecated Use coordinates.geo_precision + */ + 3: i32 geo_precision = 0 + + /** + * 0: don't show lat/long + * 2: show + * + * @deprecated + */ + 4: i64 entity_id = 0 + + /** + * @deprecated Use place_id + */ + 5: optional string name (personalDataType = 'PublishedCoarseLocationTweet') + + 6: optional tweet.Place place // provided if StatusRequestOptions.load_places is set + 7: optional string place_id // ex: ad2f50942562790b + 8: optional tweet.GeoCoordinates coordinates +}(persisted = 'true', hasPersonalData = 'true') + +/** + * @deprecated Use Tweet and APIs that accept or return Tweet. + */ +struct Status { + 1: i64 id (personalDataType = 'TweetId') + 2: i64 user_id (personalDataType = 'UserId') + 3: string text (personalDataType = 'PrivateTweets, PublicTweets') + 4: string created_via (personalDataType = 'ClientType') + 5: i64 created_at // in seconds + 6: list urls = [] + 7: list mentions = [] + 8: list hashtags = [] + 29: list cashtags = [] + 9: list media = [] + 10: optional tweet.Reply reply + 31: optional tweet.DirectedAtUser directed_at_user + 11: optional tweet.Share share + 32: optional tweet.QuotedTweet quoted_tweet + 12: optional tweet.Contributor contributor + 13: optional Geo geo + // has_takedown indicates if there is a takedown specifically on this tweet. + // takedown_country_codes contains takedown countries for both the tweet and the user, + // so has_takedown might be false while takedown_country_codes is non-empty. + 14: bool has_takedown = 0 + 15: bool nsfw_user = 0 + 16: bool nsfw_admin = 0 + 17: optional tweet.StatusCounts counts + // 18: obsoleted + 19: optional tweet.DeviceSource device_source // not set on DB failure + 20: optional tweet.Narrowcast narrowcast + 21: optional list takedown_country_codes (personalDataType = 'ContentRestrictionStatus') + 22: optional tweet.StatusPerspective perspective // not set if no user ID or on TLS failure + 23: optional list cards // only included if StatusRequestOptions.include_cards == true + // only included when StatusRequestOptions.include_cards == true + // and StatusRequestOptions.cards_platform_key is set to valid value + 30: optional cards.Card2 card2 + 24: bool nullcast = 0 + 25: optional i64 conversation_id (personalDataType = 'TweetId') + 26: optional tweet.Language language + 27: optional i64 tracking_id (personalDataType = 'ImpressionId') + 28: optional map spam_labels + 33: optional bool has_media + // obsolete 34: optional list topic_labels + // Additional fields for flexible schema + 101: optional tweet.TweetMediaTags media_tags + 103: optional tweet.CardBindingValues binding_values + 104: optional tweet.ReplyAddresses reply_addresses + 105: optional tweet.TwitterSuggestInfo twitter_suggest_info +}(persisted = 'true', hasPersonalData = 'true') + diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/edit_control.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/edit_control.thrift new file mode 100644 index 000000000..d1eb83a33 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/edit_control.thrift @@ -0,0 +1,71 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace py gen.twitter.tweetypie.edit_control +namespace rb TweetyPie +// Specific namespace to avoid golang circular import +namespace go tweetypie.tweet + +/** + * EditControlInitial is present on all new Tweets. Initially, edit_tweet_ids will only contain the id of the new Tweet. + * Subsequent edits will append the edited Tweet ids to edit_tweet_ids. +**/ +struct EditControlInitial { + /** + * A list of all edits of this initial Tweet, including the initial Tweet id, + * and in ascending time order (the oldest revision first). + */ + 1: required list edit_tweet_ids = [] (personalDataType = 'TweetId', strato.json.numbers.type = 'string') + /** + * Epoch timestamp in milli-seconds (UTC) after which the tweet will no longer be editable. + */ + 2: optional i64 editable_until_msecs (strato.json.numbers.type = 'string') + /** + * Number of edits that are available for this Tweet. This starts at 5 and decrements with each edit. + */ + 3: optional i64 edits_remaining (strato.json.numbers.type = 'string') + + /** + * Specifies whether the Tweet has any intrinsic properties that mean it can't be edited + * (for example, we have a business rule that poll Tweets can't be edited). + * + * If a Tweet edit expires due to time frame or number of edits, this field still is set + * to true for Tweets that could have been edited. + */ + 4: optional bool is_edit_eligible +}(persisted='true', hasPersonalData = 'true', strato.graphql.typename='EditControlInitial') + +/** + * EditControlEdit is present for any Tweets that are an edit of another Tweet. The full list of edits can be retrieved + * from the edit_control_initial field, which will always be hydrated. +**/ +struct EditControlEdit { + /** + * The id of the initial Tweet in an edit chain + */ + 1: required i64 initial_tweet_id (personalDataType = 'TweetId', strato.json.numbers.type = 'string') + /** + * This field is only used during hydration to return the EditControl of the initial Tweet for + * a subsequently edited version. + */ + 2: optional EditControlInitial edit_control_initial +}(persisted='true', hasPersonalData = 'true', strato.graphql.typename='EditControlEdit') + + +/** + * Tweet metadata about edits of a Tweet. A list of edits to a Tweet are represented as a chain of + * Tweets linked to each other using the EditControl field. + * + * EditControl can be either EditControlInitial which means that the Tweet is unedited or the first Tweet in + * an edit chain, or EditControlEdit which means it is a Tweet in the edit chain after the first + * Tweet. + */ +union EditControl { + 1: EditControlInitial initial + 2: EditControlEdit edit +}(persisted='true', hasPersonalData = 'true', strato.graphql.typename='EditControl') + + +service FederatedServiceBase { + EditControl getEditControl(1: required i64 tweetId) +} diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/geo/tweet_location_info.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/geo/tweet_location_info.thrift new file mode 100644 index 000000000..500e9ffcf --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/geo/tweet_location_info.thrift @@ -0,0 +1,72 @@ +namespace java com.twitter.tweetypie.geo.thriftjava +#@namespace scala com.twitter.tweetypie.geo.thriftscala +#@namespace strato com.twitter.tweetypie.geo +namespace py gen.twitter.tweetypie.geo +namespace rb TweetyPie + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// // +// This file contains type definitions to support the Geo field added to Tweet flexible schema ONLY. // +// It is unlikely to be re-usable so treat it them as private outside the subpackage defined here. // +// // +// In respect to back storage, consider it has limited capacity, provisioned to address particular use cases. // +// There is no free resources outside its current usage plus a future projection (see Storage Capacity below). // +// For example: // +// 1- Adding extra fields to TweetLocationInfo will likely require extra storage. // +// 2- Increase on front-load QPS (read or write) may require extra sharding to not impact delay percentiles. // +// Failure to observe these may impact Tweetypie write-path and read-path. // +// // +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/** + * Flags how a _Place_ is published into a tweet (a.k.a. geotagging). + */ +enum GeoTagPlaceSource { + /** + * Tweet is tagged to a place but it is impossible to determine its source. + * E.g.: created from non-TOO clients or legacy TOO clients + */ + UNKNOWN = 0 + /** + * Tweet is tagged to a Place by reverse geocoding its coordinates. + */ + COORDINATES = 1 + /** + * Tweet is tagged to a Place by the client application on user's behalf. + * N.B.: COORDINATES is not AUTO because the API request doesn't publish a Place + */ + AUTO = 2 + EXPLICIT = 3 + + // free to use, added for backwards compatibility on client code. + RESERVED_4 = 4 + RESERVED_5 = 5 + RESERVED_6 = 6 + RESERVED_7 = 7 +} + +/** + * Information about Tweet's Location(s). + * Designed to enable custom consumption experiences of the Tweet's location(s). + * E.g.: Tweet's perspectival view of a Location entity + * + * To guarantee user's rights of privacy: + * + * - Only include user's published location data or unpublished location data that + * is EXPLICITLY set as publicly available by the user. + * + * - Never include user's unpublished (aka shared) location data that + * is NOT EXPLICITLY set as publicly available by the user. + * + * E.g.: User is asked to share their GPS coordinates with Twitter from mobile client, + * under the guarantee it won't be made publicly available. + * + * Design notes: + * - Tweet's geotagged Place is represented by Tweet.place instead of being a field here. + */ +struct TweetLocationInfo { + /** + * Represents how the Tweet author published the "from" location in a Tweet (a.k.a geo-tagged). + */ + 1: optional GeoTagPlaceSource geotag_place_source +}(persisted='true', hasPersonalData='false') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/media/media_ref.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/media/media_ref.thrift new file mode 100644 index 000000000..f2a739094 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/media/media_ref.thrift @@ -0,0 +1,20 @@ +namespace java com.twitter.tweetypie.media.thriftjava +#@namespace scala com.twitter.tweetypie.media.thriftscala +#@namespace strato com.twitter.tweetypie.media +namespace py gen.twitter.tweetypie.media +namespace rb TweetyPie + + +/** +* A MediaRef represents a reference to a piece of media in MediaInfoService, along with metadata +* about the source Tweet that the media came from in case of pasted media. +**/ +struct MediaRef { + 1: string generic_media_key (personalDataType = 'MediaId') + + // For Tweets with pasted media, the id of the Tweet where this media was copied from + 2: optional i64 source_tweet_id (personalDataType = 'TweetId') + + // The author of source_tweet_id + 3: optional i64 source_user_id (personalDataType = 'UserId') +}(persisted='true', hasPersonalData='true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/media_entity.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/media_entity.thrift new file mode 100644 index 000000000..c5b411710 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/media_entity.thrift @@ -0,0 +1,135 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace py gen.twitter.tweetypie.media_entity +namespace rb TweetyPie.media_entity +namespace go tweetypie.media_entity + +include "com/twitter/mediaservices/commons/MediaInformation.thrift" +include "com/twitter/mediaservices/commons/MediaCommon.thrift" +include "com/twitter/mediaservices/commons/TweetMedia.thrift" + +/** + * DEPRECATED + * An RGB color. + * + * Each i8 should be interpreted as unsigned, ranging in value from 0 to + * 255. Borrowed from gizmoduck/user.thrift. + * + * The way in which we use ColorValue here is as metadata for a media file, + * so it needs to be annotated as having personal data. Fields that are of + * structured types cannot be annotated, so we have to put the annotation + * on the structure itself's fields even though it's more confusing to do so + * and could introduce issues if someone else reuses ColorValue outside of + * the context of a media file. + */ +struct ColorValue { + 1: i8 red (personalDataType = 'MediaFile') + 2: i8 green (personalDataType = 'MediaFile') + 3: i8 blue (personalDataType = 'MediaFile') +}(persisted = 'true', hasPersonalData = 'true') + +struct MediaEntity { + 1: i16 from_index (personalDataType = 'MediaFile') + 2: i16 to_index (personalDataType = 'MediaFile') + + /** + * The shortened t.co url found in the tweet text. + */ + 3: string url (personalDataType = 'ShortUrl') + + /** + * The text to display in place of the shortened url. + */ + 4: string display_url (personalDataType = 'LongUrl') + + /** + * The url to the media asset (a preview image in the case of a video). + */ + 5: string media_url (personalDataType = 'LongUrl') + + /** + * The https version of media_url. + */ + 6: string media_url_https (personalDataType = 'LongUrl') + + /** + * The expanded media permalink. + */ + 7: string expanded_url (personalDataType = 'LongUrl') + + 8: MediaCommon.MediaId media_id (strato.space = "Media", strato.name = "media", personalDataType = 'MediaId') + 9: bool nsfw + 10: set sizes + 11: string media_path + 12: optional bool is_protected + + /** + * The tweet that this MediaEntity was originally attached to. This value will be set if this + * MediaEntity is either on a retweet or a tweet with pasted-pic. + */ + 13: optional i64 source_status_id (strato.space = "Tweet", strato.name = "sourceStatus", personalDataType = 'TweetId') + + + /** + * The user to attribute views of the media to. + * + * This field should be set when the media's attributableUserId field does not match the current + * Tweet's owner. Retweets of a Tweet with media and "managed media" are some reasons this may + * occur. When the value is None any views should be attributed to the tweet's owner. + **/ + 14: optional i64 source_user_id (strato.space = "User", strato.name = "sourceUser", personalDataType = 'UserId') + + /** + * Additional information specific to the media type. + * + * This field is optional with images (as the image information is in the + * previous fields), but required for animated GIF and native video (as, in + * this case, the previous fields only describe the preview image). + */ + 15: optional TweetMedia.MediaInfo media_info + + /** + * DEPRECATED + * The dominant color for the entire image (or keyframe for video or GIF). + * + * This can be used for placeholders while the media downloads (either a + * solid color or a gradient using the grid). + */ + 16: optional ColorValue dominant_color_overall + + /** + * DEPRECATED + * Dominant color of each quadrant of the image (keyframe for video or GIF). + * + * If present this list should have 4 elements, corresponding to + * [top_left, top_right, bottom_left, bottom_right] + */ + 17: optional list dominant_color_grid + + // obsolete 18: optional map extensions + + /** + * Stratostore extension points data encoded as a Strato record. + */ + 19: optional binary extensions_reply + + /** + * Holds metadata defined by the user for the tweet-asset relationship. + */ + 20: optional MediaInformation.UserDefinedProductMetadata metadata + + /** + * Media key used to interact with the media systems. + */ + 21: optional MediaCommon.MediaKey media_key + + /** + * Flexible structure for additional media metadata. This field is only + * included in a read-path request if specifically requested. It will + * always be included, when applicable, in write-path responses. + */ + 22: optional MediaInformation.AdditionalMetadata additional_metadata + +}(persisted='true', hasPersonalData = 'true') + diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/note_tweet.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/note_tweet.thrift new file mode 100644 index 000000000..e8313a924 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/note_tweet.thrift @@ -0,0 +1,13 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace py gen.twitter.tweetypie.tweet_note +namespace rb TweetyPie +// Specific namespace to avoid golang circular import +namespace go tweetypie.tweet + +// Struct representing a NoteTweet associated with a Tweet +struct NoteTweet { + 1: required i64 id (strato.space = 'NoteTweet', strato.name = "note_tweet", personalDataType = 'TwitterArticleID') + 2: optional bool is_expandable (strato.name = "is_expandable") +} (persisted='true', hasPersonalData = 'true', strato.graphql.typename = 'NoteTweetData') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/retweet_archival_event.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/retweet_archival_event.thrift new file mode 100644 index 000000000..0476dbded --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/retweet_archival_event.thrift @@ -0,0 +1,30 @@ +namespace java com.twitter.tweetypie.thriftjava +namespace py gen.twitter.tweetypie.retweet_archival_event +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace rb TweetyPie +namespace go tweetypie + +/** + * This event is published to "retweet_archival_events" when Tweetypie processes an + * AsyncSetRetweetVisibilityRequest. + * + * This is useful for services (Interaction Counter, Insights Track) that need to + * know when the retweet engagement count of a tweet has been modified due to the + * retweeting user being put in to or out of suspension or read-only mode. + */ +struct RetweetArchivalEvent { + // The retweet id affected by this archival event. + 1: required i64 retweet_id (personalDataType = 'TweetId') + // The source tweet id for the retweet. This tweet had its retweet count modified. + 2: required i64 src_tweet_id (personalDataType = 'TweetId') + 3: required i64 retweet_user_id (personalDataType = 'UserId') + 4: required i64 src_tweet_user_id (personalDataType = 'UserId') + // Approximate time in milliseconds for when the count modification occurred, based on + // Unix Epoch (1 January 1970 00:00:00 UTC). Tweetypie will use the time when it is + // about to send the asynchronous write request to tflock for this timestamp. + 5: required i64 timestamp_ms + // Marks if this event is for archiving(True) or unarchiving(False) action. + // Archiving indicates an engagement count decrement occurred and unarchiving indicates an incremental. + 6: optional bool is_archiving_action +}(persisted='true', hasPersonalData = 'true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/BUILD b/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/BUILD new file mode 100644 index 000000000..c619298c4 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/BUILD @@ -0,0 +1,11 @@ +create_thrift_libraries( + base_name = "storage_internal", + sources = ["*.thrift"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + generate_languages = [ + "java", + "scala", + ], +) diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/storage_internal.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/storage_internal.thrift new file mode 100644 index 000000000..f614fa762 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/storage_internal.thrift @@ -0,0 +1,79 @@ +namespace java com.twitter.tweetypie.storage_internal.thriftjava +#@namespace scala com.twitter.tweetypie.storage_internal.thriftscala + +struct StoredReply { + 1: i64 in_reply_to_status_id (personalDataType = 'TweetId') + 2: i64 in_reply_to_user_id (personalDataType = 'UserId') + 3: optional i64 conversation_id (personalDataType = 'TweetId') +} (hasPersonalData = 'true', persisted='true') + +struct StoredShare { + 1: i64 source_status_id (personalDataType = 'TweetId') + 2: i64 source_user_id (personalDataType = 'UserId') + 3: i64 parent_status_id (personalDataType = 'TweetId') +} (hasPersonalData = 'true', persisted='true') + +struct StoredGeo { + 1: double latitude (personalDataType = 'GpsCoordinates') + 2: double longitude (personalDataType = 'GpsCoordinates') + 3: i32 geo_precision (personalDataType = 'GpsCoordinates') + 4: i64 entity_id (personalDataType = 'PublishedPreciseLocationTweet, PublishedCoarseLocationTweet') + 5: optional string name (personalDataType = 'PublishedPreciseLocationTweet, PublishedCoarseLocationTweet') +} (hasPersonalData = 'true', persisted='true') + +struct StoredMediaEntity { + 1: i64 id (personalDataType = 'MediaId') + 2: i8 media_type (personalDataType = 'ContentTypeTweetMedia') + 3: i16 width + 4: i16 height +} (hasPersonalData = 'true', persisted='true') + +struct StoredNarrowcast { + 1: optional list language (personalDataType = 'InferredLanguage') + 2: optional list location (personalDataType = 'PublishedCoarseLocationTweet') + 3: optional list ids (personalDataType = 'TweetId') +} (hasPersonalData = 'true', persisted='true') + +struct StoredQuotedTweet { + 1: i64 tweet_id (personalDataType = 'TweetId') // the tweet id being quoted + 2: i64 user_id (personalDataType = 'UserId') // the user id being quoted + 3: string short_url (personalDataType = 'ShortUrl') // tco url - used when rendering in backwards-compat mode +} (hasPersonalData = 'true', persisted='true') + +struct StoredTweet { + 1: i64 id (personalDataType = 'TweetId') + 2: optional i64 user_id (personalDataType = 'UserId') + 3: optional string text (personalDataType = 'PrivateTweets, PublicTweets') + 4: optional string created_via (personalDataType = 'ClientType') + 5: optional i64 created_at_sec (personalDataType = 'PrivateTimestamp, PublicTimestamp') // in seconds + + 6: optional StoredReply reply + 7: optional StoredShare share + 8: optional i64 contributor_id (personalDataType = 'Contributor') + 9: optional StoredGeo geo + 11: optional bool has_takedown + 12: optional bool nsfw_user (personalDataType = 'TweetSafetyLabels') + 13: optional bool nsfw_admin (personalDataType = 'TweetSafetyLabels') + 14: optional list media + 15: optional StoredNarrowcast narrowcast + 16: optional bool nullcast + 17: optional i64 tracking_id (personalDataType = 'ImpressionId') + 18: optional i64 updated_at (personalDataType = 'PrivateTimestamp, PublicTimestamp') + 19: optional StoredQuotedTweet quoted_tweet +} (hasPersonalData = 'true', persisted='true') + +struct CoreFields { + 2: optional i64 user_id (personalDataType = 'UserId') + 3: optional string text (personalDataType = 'PrivateTweets, PublicTweets') + 4: optional string created_via (personalDataType = 'ClientType') + 5: optional i64 created_at_sec (personalDataType = 'PrivateTimestamp, PublicTimestamp') + + 6: optional StoredReply reply + 7: optional StoredShare share + 8: optional i64 contributor_id (personalDataType = 'Contributor') + 19: optional StoredQuotedTweet quoted_tweet +} (hasPersonalData = 'true', persisted='true') + +struct InternalTweet { + 1: optional CoreFields core_fields +} (hasPersonalData = 'true', persisted='true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/stored_tweet_info.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/stored_tweet_info.thrift new file mode 100644 index 000000000..4c37451fc --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/stored_tweet_info.thrift @@ -0,0 +1,52 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie + +include "com/twitter/tweetypie/tweet.thrift" + +struct HardDeleted { + 1: i64 soft_deleted_timestamp_msec + 2: i64 timestamp_msec +} + +struct SoftDeleted { + 1: i64 timestamp_msec +} + +struct BounceDeleted { + 1: i64 timestamp_msec +} + +struct Undeleted { + 1: i64 timestamp_msec +} + +struct ForceAdded { + 1: i64 timestamp_msec +} + +struct NotFound {} + +union StoredTweetState { + 1: HardDeleted hard_deleted + 2: SoftDeleted soft_deleted + 3: BounceDeleted bounce_deleted + 4: Undeleted undeleted + 5: ForceAdded force_added + 6: NotFound not_found +} + +enum StoredTweetError { + CORRUPT = 1, + SCRUBBED_FIELDS_PRESENT = 2, + FIELDS_MISSING_OR_INVALID = 3, + SHOULD_BE_HARD_DELETED = 4, + FAILED_FETCH = 5 +} + +struct StoredTweetInfo { + 1: required i64 tweet_id + 2: optional tweet.Tweet tweet + 3: optional StoredTweetState stored_tweet_state + 4: required list errors = [] +} diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/transient_context.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/transient_context.thrift new file mode 100644 index 000000000..942e42d35 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/transient_context.thrift @@ -0,0 +1,64 @@ +/** + * This file contains definitions for transient, passthrough structured data. + * + * If you need to add structured data that Tweetypie accepts in a request + * and passes the data through to one or more backends (eg. EventBus), this + * is the place to put it. Tweetypie may or may not inspect the data and + * alter the behavior based on it, but it won't change it. + */ + +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace py gen.twitter.tweetypie.transient_context +namespace rb TweetyPie +namespace go tweetypie + +include "com/twitter/tweetypie/tweet.thrift" + +enum BatchComposeMode { + /** + * This is the first Tweet in a batch. + */ + BATCH_FIRST = 1 + + /** + * This is any of the subsequent Tweets in a batch. + */ + BATCH_SUBSEQUENT = 2 +} + +/** + * Data supplied at Tweet creation time that is not served by Tweetypie, but + * is passed through to consumers of the tweet_events eventbus stream as part + * of TweetCreateEvent. + * This is different from additional_context in that Tweetypie + * inspects this data as well, and we prefer structs over strings. + * If adding a new field that will be passed through to eventbus, prefer this + * over additional_context. + */ +struct TransientCreateContext { + /** + * Indicates whether a Tweet was created using a batch composer, and if so + * position of a Tweet within the batch. + * + * A value of 'None' indicates that the tweet was not created in a batch. + * + * More info: https://docs.google.com/document/d/1dJ9K0KzXPzhk0V-Nsekt0CAdOvyVI8sH9ESEiA2eDW4/edit + */ + 1: optional BatchComposeMode batch_compose + + /** + * Indicates if the tweet contains a live Periscope streaming video. + * + * This enables Periscope LiveFollow. + */ + 2: optional bool periscope_is_live + + /** + * Indicates the userId of the live Periscope streaming video. + * + * This enables Periscope LiveFollow. + */ + 3: optional i64 periscope_creator_id (personalDataType='UserId') +}(persisted='true', hasPersonalData='true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet.thrift new file mode 100644 index 000000000..bffca50c5 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet.thrift @@ -0,0 +1,1652 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace py gen.twitter.tweetypie.tweet +namespace rb TweetyPie +// Specific namespace to avoid golang circular import +namespace go tweetypie.tweet + +include "com/twitter/escherbird/tweet_annotation.thrift" +include "com/twitter/expandodo/cards.thrift" +include "com/twitter/content-health/toxicreplyfilter/filtered_reply_details.thrift" +include "com/twitter/dataproducts/enrichments_profilegeo.thrift" +include "com/twitter/geoduck/public/thriftv1/geoduck_common.thrift" +include "com/twitter/mediaservices/commons/MediaCommon.thrift" +include "com/twitter/mediaservices/commons/MediaInformation.thrift" +include "com/twitter/tweetypie/api_fields.thrift" +include "com/twitter/tweetypie/edit_control.thrift" +include "com/twitter/tweetypie/media_entity.thrift" +include "com/twitter/tweetypie/note_tweet.thrift" +include "com/twitter/service/scarecrow/gen/tiered_actions.thrift" +include "com/twitter/spam/rtf/safety_label.thrift" +include "com/twitter/timelines/self_thread/self_thread.thrift" +include "com/twitter/tseng/withholding/withholding.thrift" +include "com/twitter/tweet_pivots/tweet_pivots.thrift" +include "com/twitter/tweetypie/geo/tweet_location_info.thrift" +include "com/twitter/tweetypie/media/media_ref.thrift" +include "unified_cards_contract.thrift" +include "com/twitter/tweetypie/creative-entity-enrichments/creative_entity_enrichments.thrift" +include "com/twitter/tweetypie/unmentions/unmentions.thrift" + +/** + * IDs are annotated with their corresponding space for Strato. + */ + +/** + * A Reply is data about a tweet in response to another tweet or a + * user. + * + * This struct will be present if: + * 1. This tweet is a reply to another tweet, or + * 2. This tweet is directed at a user (the tweet's text begins with + * an @mention). + */ +struct Reply { + /** + * The id of the tweet that this tweet is replying to. + * + * This field will be missing for directed-at tweets (tweets whose + * text begins with an @mention) that are not replying to another + * tweet. + */ + 1: optional i64 in_reply_to_status_id (strato.space = "Tweet", strato.name = "inReplyToStatus", personalDataType = 'TweetId', tweetEditAllowed='false') + + /** + * The user to whom this tweet is directed. + * + * If in_reply_to_status_id is set, this field is the author of that tweet. + * If in_reply_to_status_id is not set, this field is the user mentioned at + * the beginning of the tweet. + */ + 2: i64 in_reply_to_user_id (strato.space = "User", strato.name = "inReplyToUser", personalDataType = 'UserId') + + /** + * The current username of in_reply_to_user_id. + * + * This field is not set when Gizmoduck returns a failure to Tweetypie. + */ + 3: optional string in_reply_to_screen_name (personalDataType = 'Username') +}(persisted='true', hasPersonalData = 'true') + +/** + * Includes information about the user a tweet is directed at (when a tweet + * begins with @mention). + * + * Tweets with a DirectedAtUser are delivered to users who follow both the + * author and the DirectedAtUser. Normally the DirectedAtUser will be the same + * as Reply.in_reply_to_user_id, but will be different if the tweet's author + * rearranges the @mentions in a reply. + */ +struct DirectedAtUser { + 1: i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId') + 2: string screen_name (personalDataType = 'Username') +}(persisted='true', hasPersonalData = 'true') + +/** + * A Share is data about the source tweet of a retweet. + * + * Share was the internal name for the retweet feature. + */ +struct Share { + /** + * The id of the original tweet that was retweeted. + * + * This is always a tweet and never a retweet (unlike parent_status_id). + */ + 1: i64 source_status_id (strato.space = "Tweet", strato.name = "sourceStatus", personalDataType = 'TweetId') + + /* + * The user id of the original tweet's author. + */ + 2: i64 source_user_id (strato.space = "User", strato.name = "sourceUser", personalDataType = 'UserId') + + /** + * The id of the tweet that the user retweeted. + * + * Often this is the same as source_status_id, but it is different when a + * user retweets via another retweet. For example, user A posts tweet id 1, + * user B retweets it, creating tweet 2. If user user C sees B's retweet and + * retweets it, the result is another retweet of tweet id 1, with the parent + * status id of tweet 2. + */ + 3: i64 parent_status_id (strato.space = "Tweet", strato.name = "parentStatus", personalDataType = 'TweetId') +}(persisted='true', hasPersonalData = 'true') + +/** + * A record mapping a shortened URL (usually t.co) to a long url, and a prettified + * display text. This is similar to data found in UrlEntity, and may replace that + * data in the future. + */ +struct ShortenedUrl { + /** + * Shortened t.co URL. + */ + 1: string short_url (personalDataType = 'ShortUrl') + + /** + * Original, full-length URL. + */ + 2: string long_url (personalDataType = 'LongUrl') + + /** + * Truncated version of expanded URL that does not include protocol and is + * limited to 27 characters. + */ + 3: string display_text (personalDataType = 'LongUrl') +}(persisted='true', hasPersonalData = 'true') + +/** + * A QuotedTweet is data about a tweet referenced within another tweet. + * + * QuotedTweet is included if Tweet.QuotedTweetField is requested, and the + * linked-to tweet is public and visible at the time that the linking tweet + * is hydrated, which can be during write-time or later after a cache-miss + * read. Since linked-to tweets can be deleted, and users can become + * suspended, deactivated, or protected, the presence of this value is not a + * guarantee that the quoted tweet is still public and visible. + * + * Because a tweet quoting another tweet may not require a permalink URL in + * the tweet's text, the URLs in ShortenedUrl may be useful to clients that + * require maintaining a legacy-rendering of the tweet's text with the permalink. + * See ShortenedUrl for details. Clients should avoid reading permalink whenever + * possible and prefer the QuotedTweet's tweet_id and user_id instead. + * + * we always populate the permalink on tweet hydration unless there are partial + * hydration errors or inner quoted tweet is filtered due to visibility rules. + * + */ +struct QuotedTweet { + 1: i64 tweet_id (strato.space = "Tweet", strato.name = "tweet", personalDataType = 'TweetId') + 2: i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId') + 3: optional ShortenedUrl permalink // URLs to access the quoted-tweet +}(persisted='true', hasPersonalData = 'true') + +/** + * A Contributor is a user who has access to another user's account. + */ +struct Contributor { + 1: i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId') + 2: optional string screen_name (personalDataType = 'Username')// not set on Gizmoduck failure +}(persisted='true', hasPersonalData = 'true') + +struct GeoCoordinates { + 1: double latitude (personalDataType = 'GpsCoordinates') + 2: double longitude (personalDataType = 'GpsCoordinates') + 3: i32 geo_precision = 0 (personalDataType = 'GpsCoordinates') + + /** + * Whether or not make the coordinates public. + * + * This parameter is needed because coordinates are not typically published + * by the author. If false: A tweet has geo coordinates shared but not make + * it public. + */ + 4: bool display = 1 +}(persisted='true', hasPersonalData = 'true') + +enum PlaceType { + UNKNOWN = 0 + COUNTRY = 1 + ADMIN = 2 + CITY = 3 + NEIGHBORHOOD = 4 + POI = 5 +} + +enum PlaceNameType { + NORMAL = 0 + ABBREVIATION = 1 + SYNONYM = 2 +} + +struct PlaceName { + 1: string name + 2: string language = "" + 3: PlaceNameType type + 4: bool preferred +}(persisted='true', hasPersonalData='false') + +/** + * A Place is the physical and political properties of a location on Earth. + */ +struct Place { + /** + * Geo service identifier. + */ + 1: string id (personalDataType = 'PublishedPreciseLocationTweet, PublishedCoarseLocationTweet') + + /** + * Granularity of place. + */ + 2: PlaceType type + + /** + * The name of this place composed with its parent locations. + * + * For example, the full name for "Brooklyn" would be "Brooklyn, NY". This + * name is returned in the language specified by + * GetTweetOptions.language_tag. + */ + 3: string full_name (personalDataType = 'InferredLocation') + + /** + * The best name for this place as determined by geoduck heuristics. + * + * This name is returned in the language specified by + * GetTweetOptions.language_tag. + * + * @see com.twitter.geoduck.util.primitives.bestPlaceNameMatchingFilter + */ + 4: string name (personalDataType = 'PublishedPreciseLocationTweet, PublishedCoarseLocationTweet') + + /** + * Arbitrary key/value data from the geoduck PlaceAttributes for this place. + */ + 5: map attributes (personalDataTypeKey = 'PostalCode') + + 7: set names + + /** + * The ISO 3166-1 alpha-2 code for the country containing this place. + */ + 9: optional string country_code (personalDataType = 'PublishedCoarseLocationTweet') + + /** + * The best name for the country containing this place as determined by + * geoduck heuristics. + * + * This name is returned in the language specified by + * GetTweetOptions.language_tag. + */ + 10: optional string country_name (personalDataType = 'PublishedCoarseLocationTweet') + + /** + * A simplified polygon that encompasses the place's geometry. + */ + 11: optional list bounding_box + + /** + * An unordered list of geo service identifiers for places that contain this + * one from the most immediate parent up to the country. + */ + 12: optional set containers (personalDataType = 'PublishedCoarseLocationTweet') + + /** + * A centroid-like coordinate that is within the geometry of the place. + */ + 13: optional GeoCoordinates centroid + + /** + * Reason this place is being suppressed from display. + * + * This field is present when we previously had a place for this ID, but are + * now choosing not to hydrate it and instead providing fake place metadata + * along with a reason for not including place information. + */ + 14: optional geoduck_common.WithheldReason withheldReason +}(persisted='true', hasPersonalData='true') + +/** + * A UrlEntity is the position and content of a t.co shortened URL in the + * tweet's text. + * + * If Talon returns an error to Tweetypie during tweet hydration, the + * UrlEntity will be omitted from the response. UrlEntities are not included + * for non-t.co-wrapped URLs found in older tweets, for spam and user safety + * reasons. +*/ +struct UrlEntity { + /** + * The position of this entity's first character, in zero-indexed Unicode + * code points. + */ + 1: i16 from_index + + /** + * The position after this entity's last character, in zero-indexed Unicode + * code points. + */ + 2: i16 to_index + + /** + * Shortened t.co URL. + */ + 3: string url (personalDataType = 'ShortUrl') + + /** + * Original, full-length URL. + * + * This field will always be present on URL entities returned by + * Tweetypie; it is optional as an implementation artifact. + */ + 4: optional string expanded (personalDataType = 'LongUrl') + + /** + * Truncated version of expanded URL that does not include protocol and is + * limited to 27 characters. + * + * This field will always be present on URL entities returned by + * Tweetypie; it is optional as an implementation artifact. + */ + 5: optional string display (personalDataType = 'LongUrl') + + 6: optional i64 click_count (personalDataType = 'CountOfTweetEntitiesClicked') +}(persisted = 'true', hasPersonalData = 'true') + +/** + * A MentionEntity is the position and content of a mention, (the "@" + * character followed by the name of another valid user) in a tweet's text. + * + * If Gizmoduck returns an error to Tweetypie during tweet hydration that + * MentionEntity will be omitted from the response. + */ +struct MentionEntity { + /** + * The position of this entity's first character ("@"), in zero-indexed + * Unicode code points. + */ + 1: i16 from_index + + /** + * The position after this entity's last character, in zero-indexed Unicode + * code points. + */ + 2: i16 to_index + + /** + * Contents of the mention without the leading "@". + */ + 3: string screen_name (personalDataType = 'Username') + + /** + * User id of the current user with the mentioned screen name. + * + * In the current implementation user id does not necessarily identify the + * user who was originally mentioned when the tweet was created, only the + * user who owns the mentioned screen name at the time of hydration. If a + * mentioned user changes their screen name and a second user takes the old + * name, this field identifies the second user. + * + * This field will always be present on mention entities returned by + * Tweetypie; it is optional as an implementation artifact. + */ + 4: optional i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId') + + /** + * Display name of the current user with the mentioned screen name. + * + * See user_id for caveats about which user's name is used here. This field + * will always be present on mention entities returned by Tweetypie; it is + * optional as an implementation artifact. + */ + 5: optional string name (personalDataType = 'DisplayName') + + /** + * Indicates if the user referred to by this MentionEntity has been unmentioned + * from the conversation. If this field is set to true, the fromIndex and toIndex + * fields will have a value of 0. + * + * @deprecated isUnmentioned is no longer being populated + */ + 6: optional bool isUnmentioned (personalDataType = 'ContentPrivacySettings') +}(persisted = 'true', hasPersonalData = 'true') + +/** + * A list of users that are mentioned in the tweet and have a blocking + * relationship with the tweet author. Mentions for these users will be unlinked + * in the tweet. + */ +struct BlockingUnmentions { + 1: optional list unmentioned_user_ids (strato.space = 'User', strato.name = 'users', personalDataType = 'UserId') +}(persisted = 'true', hasPersonalData = 'true', strato.graphql.typename = 'BlockingUnmentions') + +/** + * A list of users that are mentioned in the tweet and have indicated they do not want + * to be mentioned via their mention settings. Mentions for these users will be unlinked + * in the tweet by Twitter owned and operated clients. + */ +struct SettingsUnmentions { + 1: optional list unmentioned_user_ids (strato.space = 'User', strato.name = 'users', personalDataType = 'UserId') +}(persisted = 'true', hasPersonalData = 'true', strato.graphql.typename = 'SettingsUnmentions') + +/** + * A HashtagEntity is the position and content of a hashtag (a term starting + * with "#") in a tweet's text. + */ +struct HashtagEntity { + /** + * The position of this entity's first character ("#"), in zero-indexed + * Unicode code points. + */ + 1: i16 from_index + + /** + * The position after this entity's last character, in zero-indexed Unicode + * code points. + */ + 2: i16 to_index + + /** + * Contents of the hashtag without the leading "#". + */ + 3: string text (personalDataType = 'PrivateTweetEntitiesAndMetadata, PublicTweetEntitiesAndMetadata') +}(persisted = 'true', hasPersonalData = 'true') + +/** + * A CashtagEntity is the position and content of a cashtag (a term starting + * with "$") in a tweet's text. + */ +struct CashtagEntity { + /** + * The position of this entity's first character, in zero-indexed Unicode + * code points. + */ + 1: i16 from_index + + /** + * The position after this entity's last character, in zero-indexed Unicode + * code points. + */ + 2: i16 to_index + + /** + * Contents of the cashtag without the leading "$" + */ + 3: string text (personalDataType = 'PrivateTweetEntitiesAndMetadata, PublicTweetEntitiesAndMetadata') +}(persisted = 'true', hasPersonalData = 'true') + +enum MediaTagType { + USER = 0 + RESERVED_1 = 1 + RESERVED_2 = 2 + RESERVED_3 = 3 + RESERVED_4 = 4 +} + +struct MediaTag { + 1: MediaTagType tag_type + 2: optional i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId') + 3: optional string screen_name (personalDataType = 'Username') + 4: optional string name (personalDataType = 'DisplayName') +}(persisted='true', hasPersonalData = 'true') + +struct TweetMediaTags { + 1: map> tag_map +}(persisted='true', hasPersonalData = 'true') + +/** + * A UserMention is a user reference not stored in the tweet text. + * + * @deprecated Was used only in ReplyAddresses + */ +struct UserMention { + 1: i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId') + 2: optional string screen_name (personalDataType = 'Username') + 3: optional string name (personalDataType = 'DisplayName') +}(persisted='true', hasPersonalData = 'true') + +/** + * ReplyAddresses is a list of reply entities which are stored outside of the + * text. + * + * @deprecated + */ +struct ReplyAddresses { + 1: list users = [] +}(persisted='true', hasPersonalData = 'true') + +/** + * SchedulingInfo is metadata about tweets created by the tweet scheduling + * service. + */ +// +struct SchedulingInfo { + /** + * Id of the corresponding scheduled tweet before it was created as a real + * tweet. + */ + 1: i64 scheduled_tweet_id (personalDataType = 'TweetId') +}(persisted='true', hasPersonalData = 'true') + +/** + * @deprecated + */ +enum SuggestType { + WTF_CARD = 0 + WORLD_CUP = 1 + WTD_CARD = 2 + NEWS_CARD = 3 + RESERVED_4 = 4 + RESERVED_5 = 5 + RESERVED_6 = 6 + RESERVED_7 = 7 + RESERVED_8 = 8 + RESERVED_9 = 9 + RESERVED_10 = 10 + RESERVED_11 = 11 +} + +/** + * @deprecated + */ +enum TwitterSuggestsVisibilityType { + /** + * Always public to everyone + */ + PUBLIC = 1 + + /** + * Inherits visibility rules of personalized_for_user_id. + */ + RESTRICTED = 2 + + /** + * Only visible to personalized_for_user_id (and author). + */ + PRIVATE = 3 +} + +/** + * TwitterSuggestInfo is details about a synthetic tweet generated by an early + * version of Twitter Suggests. + * + * @deprecated + */ +struct TwitterSuggestInfo { + 1: SuggestType suggest_type + 2: TwitterSuggestsVisibilityType visibility_type + 3: optional i64 personalized_for_user_id (strato.space = "User", strato.name = "personalizedForUser", personalDataType = 'UserId') + 4: optional i64 display_timestamp_secs (personalDataType = 'PublicTimestamp') +}(persisted='true', hasPersonalData = 'true') + +/** + * A DeviceSource contains information about the client application from which + * a tweet was sent. + * + * This information is stored in Passbird. The developer that owns a client + * application provides this information on https://apps.twitter.com. + */ +struct DeviceSource { + + /** + * The id of the client in the now deprecated device_sources MySQL table. + * + * Today this value will always be 0. + * + * @deprecated Use client_app_id + */ + 1: required i64 id (personalDataType = 'AppId') + + /** + * Identifier for the client in the format "oauth:" + */ + 2: string parameter + + /** + * Identifier for the client in the format "oauth:" + */ + 3: string internal_name + + /** + * Developer-provided name of the client application. + */ + 4: string name + + /** + * Developer-provided publicly accessible home page for the client + * application. + */ + 5: string url + + /** + * HTML fragment with a link to the client-provided URL + */ + 6: string display + + /** + * This field is marked optional for backwards compatibility but will always + * be populated by Tweetypie. + */ + 7: optional i64 client_app_id (personalDataType = 'AppId') +}(persisted='true', hasPersonalData = 'true') + +/** + * A Narrowcast restricts delivery of a tweet geographically. + * + * Narrowcasts allow multi-national advertisers to create geo-relevant content + * from a central handle that is only delivered to to followers in a + * particular country or set of countries. + */ +struct Narrowcast { + 2: list location = [] (personalDataType = 'PublishedCoarseLocationTweet') +}(persisted='true', hasPersonalData = 'true') + +/** + * StatusCounts is a summary of engagement metrics for a tweet. + * + * These metrics are loaded from TFlock. + */ +struct StatusCounts { + + /** + * Number of times this tweet has been retweeted. + * + * This number may not match the list of users who have retweeted because it + * includes retweets from protected and suspended users who are not listed. + */ + 1: optional i64 retweet_count (personalDataType = 'CountOfPrivateRetweets, CountOfPublicRetweets', strato.json.numbers.type = 'int53') + + /** + * Number of direct replies to this tweet. + * + * This number does not include replies to replies. + */ + 2: optional i64 reply_count (personalDataType = 'CountOfPrivateReplies, CountOfPublicReplies', strato.json.numbers.type = 'int53') + + /** + * Number of favorites this tweet has received. + * + * This number may not match the list of users who have favorited a tweet + * because it includes favorites from protected and suspended users who are + * not listed. + */ + 3: optional i64 favorite_count (personalDataType = 'CountOfPrivateLikes, CountOfPublicLikes', strato.json.numbers.type = 'int53') + + /** + * @deprecated + */ + 4: optional i64 unique_users_impressed_count (strato.json.numbers.type = 'int53') + + /** + * Number of replies to this tweet including replies to replies. + * + * @deprecated + */ + 5: optional i64 descendent_reply_count (personalDataType = 'CountOfPrivateReplies, CountOfPublicReplies', strato.json.numbers.type = 'int53') + + /** + * Number of times this tweet has been quote tweeted. + * + * This number may not match the list of users who have quote tweeted because it + * includes quote tweets from protected and suspended users who are not listed. + */ + 6: optional i64 quote_count (personalDataType = 'CountOfPrivateRetweets, CountOfPublicRetweets', strato.json.numbers.type = 'int53') + + /** + * Number of bookmarks this tweet has received. + */ + 7: optional i64 bookmark_count (personalDataType = 'CountOfPrivateLikes', strato.json.numbers.type = 'int53') + +}(persisted='true', hasPersonalData = 'true', strato.graphql.typename='StatusCounts') + +/** + * A is a tweet's properties from one user's point of view. + */ +struct StatusPerspective { + 1: i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId') + + /** + * Whether user_id has favorited this tweet. + */ + 2: bool favorited + + /** + * Whether user_id has retweeted this tweet. + */ + 3: bool retweeted + + /** + * If user_id has retweeted this tweet, retweet_id identifies that tweet. + */ + 4: optional i64 retweet_id (strato.space = "Tweet", strato.name = "retweet", personalDataType = 'TweetId') + + /** + * Whether user_id has reported this tweet as spam, offensive, or otherwise + * objectionable. + */ + 5: bool reported + + /** + * Whether user_id has bookmarked this tweet. + */ + 6: optional bool bookmarked +}(persisted='true', hasPersonalData = 'true') + +/** + * A Language is a guess about the human language of a tweet's text. + * + * Language is determined by TwitterLanguageIdentifier from the + * com.twitter.common.text package (commonly called "Penguin"). + */ +struct Language { + /** + * Language code in BCP-47 format. + */ + 1: required string language (personalDataType = 'InferredLanguage') + + /** + * Language direction. + */ + 2: bool right_to_left + + /** + * Confidence level of the detected language. + */ + 3: double confidence = 1.0 + + /** + * Other possible languages and their confidence levels. + */ + 4: optional map other_candidates +}(persisted='true', hasPersonalData = 'true') + +/** + * A SupplementalLanguage is a guess about the human language of a tweet's + * text. + * + * SupplementalLanguage is typically determined by a third-party translation + * service. It is only stored when the service detects a different language + * than TwitterLanguageIdentifier. + * + * @deprecated 2020-07-08 no longer populated. + */ +struct SupplementalLanguage { + /** + * Language code in BCP-47 format. + */ + 1: required string language (personalDataType = 'InferredLanguage') +}(persisted='true', hasPersonalData = 'true') + +/** + * A SpamLabel is a collection of spam actions for a tweet. + * + * Absence of a SpamLabel indicates that no action needs to be taken + */ +struct SpamLabel { + /** + * Filter this content at render-time + * + * @deprecated 2014-05-19 Use filter_renders + */ + 1: bool spam = 0 + + 2: optional set actions; +}(persisted='true') + + +/** + * The available types of spam signal + * + * @deprecated + */ +enum SpamSignalType { + MENTION = 1 + SEARCH = 2 + STREAMING = 4 + # OBSOLETE HOME_TIMELINE = 3 + # OBSOLETE NOTIFICATION = 5 + # OBSOLETE CONVERSATION = 6 + # OBSOLETE CREATION = 7 + RESERVED_VALUE_8 = 8 + RESERVED_VALUE_9 = 9 + RESERVED_VALUE_10 = 10 +} + +/** + * @deprecated + * CardBindingValues is a collection of key-value pairs used to render a card. + */ +struct CardBindingValues { + 1: list pairs = [] +}(persisted='true') + +/** + * A CardReference is a mechanism for explicitly associating a card with a + * tweet. + */ +struct CardReference { + /** + * Link to the card to associate with a tweet. + * + * This URI may reference either a card stored in the card service, or + * another resource, such as a crawled web page URL. This value supercedes + * any URL present in tweet text. + */ + 1: string card_uri +}(persisted='true') + +/** + * A TweetPivot is a semantic entity related to a tweet. + * + * TweetPivots are used to direct to the user to another related location. For + * example, a "See more about " UI element that takes the user to + * when clicked. + */ +struct TweetPivot { + 1: required tweet_annotation.TweetEntityAnnotation annotation + 2: required tweet_pivots.TweetPivotData data +}(persisted='true') + +struct TweetPivots { + 1: required list tweet_pivots +}(persisted='true') + +struct EscherbirdEntityAnnotations { + 1: list entity_annotations +}(persisted='true') + +struct TextRange { + /** + * The inclusive index of the start of the range, in zero-indexed Unicode + * code points. + */ + 1: required i32 from_index + + /** + * The exclusive index of the end of the range, in zero-indexed Unicode + * code points. + */ + 2: required i32 to_index +}(persisted='true') + +struct TweetCoreData { + 1: i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId', tweetEditAllowed='false') + + /** + * The body of the tweet consisting of the user-supplied displayable message + * and: + * - an optional prefix list of @mentions + * - an optional suffix attachment url. + * + * The indices from visible_text_range specify the substring of text indended + * to be displayed, whose length is limited to 140 display characters. Note + * that the visible substring may be longer than 140 characters due to HTML + * entity encoding of &, <, and > . + + * For retweets the text is that of the original tweet, prepended with "RT + * @username: " and truncated to 140 characters. + */ + 2: string text (personalDataType = 'PrivateTweets, PublicTweets') + + /** + * The client from which this tweet was created + * + * The format of this value is oauth:. + */ + 3: string created_via (personalDataType = 'ClientType') + + /** + * Time this tweet was created. + * + * This value is seconds since the Unix epoch. For tweets with Snowflake IDs + * this value is redundant, since a millisecond-precision timestamp is part + * of the id. + */ + 4: i64 created_at_secs + + /** + * Present when this tweet is a reply to another tweet or another user. + */ + 5: optional Reply reply + + /** + * Present when a tweet begins with an @mention or has metadata indicating the directed-at user. + */ + 6: optional DirectedAtUser directed_at_user + + /** + * Present when this tweet is a retweet. + */ + 7: optional Share share + + /** + * Whether there is a takedown country code or takedown reason set for this specific tweet. + * + * See takedown_country_codes for the countries where the takedown is active. (deprecated) + * See takedown_reasons for a list of reasons why the tweet is taken down. + * + * has_takedown will be set to true if either this specific tweet or the author has a + * takedown active. + */ + 8: bool has_takedown = 0 + + /** + * Whether this tweet might be not-safe-for-work, judged by the tweet author. + * + * Users can flag their own accounts as not-safe-for-work in account + * preferences by selecting "Mark media I tweet as containing material that + * may be sensitive" and each tweet created after that point will have + * this flag set. + * + * The value can also be updated after tweet create time via the + * update_possibly_sensitive_tweet method. + */ + 9: bool nsfw_user = 0 + + /** + * Whether this tweet might be not-safe-for-work, judged by an internal Twitter + * support agent. + * + * This tweet value originates from the user's nsfw_admin flag at + * tweet create time but can be updated afterwards using the + * update_possibly_sensitive_tweet method. + */ + 10: bool nsfw_admin = 0 + + /** + * When nullcast is true a tweet is not delivered to a user's followers, not + * shown in the user's timeline, and does not appear in search results. + * + * This is primarily used to create tweets that can be used as ads without + * broadcasting them to an advertiser's followers. + */ + 11: bool nullcast = 0 (tweetEditAllowed='false') + + /** + * Narrowcast limits delivery of a tweet to followers in specific geographic + * regions. + */ + 12: optional Narrowcast narrowcast (tweetEditAllowed='false') + + /** + * The impression id of the ad from which this tweet was created. + * + * This is set when a user retweets or replies to a promoted tweet. It is + * used to attribute the "earned" exposure of an advertisement. + */ + 13: optional i64 tracking_id (personalDataType = 'ImpressionId', tweetEditAllowed='false') + + /** + * A shared identifier among all the tweets in the reply chain for a single + * tweet. + * + * The conversation id is the id of the tweet that started the conversation. + */ + 14: optional i64 conversation_id (strato.space = "Tweet", strato.name = "conversation", personalDataType = 'TweetId') + + /** + * Whether this tweet has media of any type. + * + * Media can be in the form of media entities, media cards, or URLs in the + * tweet text that link to media partners. + * + * @see MediaIndexHelper + */ + 15: optional bool has_media + + /** + * Supported for legacy clients to associate a location with a Tweet. + * + * Twitter owned clients must use place_id REST API param for geo-tagging. + * + * @deprecated Use place_id REST API param + */ + 16: optional GeoCoordinates coordinates (personalDataType = 'GpsCoordinates', tweetEditAllowed='false') + + /** + * The location where a tweet was sent from. + * + * Place is either published in API request explicitly or implicitly reverse + * geocoded from API lat/lon coordinates params. + * + * Tweetypie implementation notes: + * - Currently, if both place_id and coordinates are specified, coordinates + * takes precedence in geo-tagging. I.e.: Place returned rgc(coordinates) + * sets the place_id field. + * - place_id is reverse geocoded on write-path. + */ + 17: optional string place_id (personalDataType = 'PublishedPreciseLocationTweet, PublishedCoarseLocationTweet') +}(persisted='true', hasPersonalData = 'true', tweetEditAllowed='false') + +/** + * List of community ID's the tweet belongs to. + */ +struct Communities { + 1: required list community_ids (personalDataType = 'EngagementId') +}(persisted='true') + +/** + * Tweet metadata that is present on extended tweets, a tweet whose total text length is greater + * than the classic limit of 140 characters. + */ +struct ExtendedTweetMetadata { + /** + * @deprecated was display_count + */ + 1: i32 unused1 = 0 + + /** + * The index, in unicode code points, at which the tweet text should be truncated + * for rendering in a public API backwards-compatible mode. Once truncated to this + * point, the text should be appended with an ellipsis, a space, and the short_url + * from self_permalink. The resulting text must conform to the 140 display glyph + * limit. + */ + 2: required i32 api_compatible_truncation_index + + /** + * @deprecated was default_display_truncation_index + */ + 3: i32 unused3 = 0 + + /** + * @deprecated was is_long_form + */ + 4: bool unused4 = 0 + + /** + * @deprecated was preview_range + */ + 5: optional TextRange unused5 + + /** + * @deprecated was extended_preview_range + */ + 6: optional TextRange unused6 +}(persisted='true') + +/** + * @deprecated use TransientCreateContext instead + */ +enum TweetCreateContextKey { + PERISCOPE_IS_LIVE = 0, + PERISCOPE_CREATOR_ID = 1 +} + +/** + * DirectedAtUserMetadata is a tweetypie-internal structure that can be used to store metadata about + * a directed-at user on the tweet. + * + * Note: absence of this field does not imply the tweet does not have a DirectedAtUser, see + * tweet.directedAtUserMetadata for more information. + */ +struct DirectedAtUserMetadata { + /** + * ID of the user a tweet is directed-at. + */ + 1: optional i64 user_id (personalDataType = 'UserId') +}(persisted='true', hasPersonalData = 'true') + +/** + * Tweet metadata that may be present on tweets in a self-thread (tweetstorm). + * + * A self-thread is a tree of self-replies that may either: + * 1. begin as a reply to another user's tweet (called a non-root self-thread) or + * 2. stand alone (called root self-thread). + * + * Note that not all self-threads have SelfThreadMetadata. + */ +struct SelfThreadMetadata { + /** + * A shared identifier among all the tweets in the self-thread (tweetstorm). + * + * The tweetstorm id is the id of the tweet that started the self thread. + * + * If the id matches the tweet's conversation_id then it is a root self-thread, otherwise it is + * a non-root self-thread. + */ + 1: required i64 id (personalDataType = 'TweetId') + + /** + * Indicates if the tweet with this SelfThreadMetadata is a leaf in the self-thread tree. + * This flag might be used to encourage the author to extend their tweetstorm at the end. + */ + 2: bool isLeaf = 0 +}(persisted='true', hasPersonalData = 'true') + +/** + * Composer flow used to create this tweet. Unless using the News Camera (go/newscamera) + * flow, this should be `STANDARD`. + * + * When set to `CAMERA`, clients are expected to display the tweet with a different UI + * to emphasize attached media. + */ +enum ComposerSource { + STANDARD = 1 + CAMERA = 2 +} + + +/** + * The conversation owner and users in invited_user_ids can reply + **/ +struct ConversationControlByInvitation { + 1: required list invited_user_ids (personalDataType = 'UserId') + 2: required i64 conversation_tweet_author_id (personalDataType = 'UserId') + 3: optional bool invite_via_mention +}(persisted='true', hasPersonalData = 'true') + +/** + * The conversation owner, users in invited_user_ids, and users who the conversation owner follows can reply + **/ +struct ConversationControlCommunity { + 1: required list invited_user_ids (personalDataType = 'UserId') + 2: required i64 conversation_tweet_author_id (personalDataType = 'UserId') + 3: optional bool invite_via_mention +}(persisted='true', hasPersonalData = 'true') + +/** + * The conversation owner, users in invited_user_ids, and users who follows the conversation owner can reply + **/ +struct ConversationControlFollowers { + 1: required list invited_user_ids (personalDataType = 'UserId') + 2: required i64 conversation_tweet_author_id (personalDataType = 'UserId') + 3: optional bool invite_via_mention +}(persisted='true', hasPersonalData = 'true') + +/** +* This tweet metadata captures restrictions on who is allowed to reply in a conversation. +*/ +union ConversationControl { + + 1: ConversationControlCommunity community + + 2: ConversationControlByInvitation byInvitation + + 3: ConversationControlFollowers followers +}(persisted='true', hasPersonalData = 'true') + +// This tweet metadata shows the exclusivity of a tweet and is used to determine +// whether replies / visibility of a tweet is limited +struct ExclusiveTweetControl { + 1: required i64 conversation_author_id (personalDataType = 'UserId') +}(persisted='true', hasPersonalData = 'true') + +/** + * Tweet metadata for a Trusted Friends tweet. + * + * A Trusted Friends tweet is a tweet whose visibility is restricted to members + * of an author-specified list. + * + * Replies to a Trusted Friends tweet will inherit a copy of this metadata from + * the root tweet. + */ +struct TrustedFriendsControl { + /** + * The ID of the Trusted Friends List whose members can view this tweet. + */ + 1: required i64 trusted_friends_list_id (personalDataType = 'TrustedFriendsListMetadata') +}(persisted='true', hasPersonalData = 'true') + +enum CollabInvitationStatus { + PENDING = 0 + ACCEPTED = 1 + REJECTED = 2 +} + +/** + * Represents a user who has been invited to collaborate on a CollabTweet, associated with whether + * they have accepted or rejected collaboration + */ +struct InvitedCollaborator { + 1: required i64 collaborator_user_id (personalDataType = 'UserId') + 2: required CollabInvitationStatus collab_invitation_status +}(persisted='true', hasPersonalData='true') + +/** + * Present if Tweet is a CollabInvitation awaiting publishing, stores list of invited Collaborators + */ +struct CollabInvitation { + 1: required list invited_collaborators +}(persisted='true', hasPersonalData='true') + +/** + * Present if Tweet is a published CollabTweet, stores list of Collaborators + */ +struct CollabTweet { + 1: required list collaborator_user_ids (personalDataType = 'UserId') +}(persisted='true', hasPersonalData='true') + +/** + * CollabTweets treat multiple users as co-authors or "Collaborators" of a single "Collab Tweet". + * + * When creating a Collab Tweet, the original author will begin by creating a CollabInvitation which + * is sent to another Collaborator to accept or reject collaboration. If and when other + * Collaborators have accepted, the CollabInvitation is replaced by a CollabTweet which is published + * publicly and fanned out to followers of all Collaborators. A CollabInvitation will be hidden from + * anyone except the list of Collaborators using VF. The CollabTweet will then be fanned out like + * a regular Tweet to the profiles and combined audiences of all Collaborators. + * + * A Tweet representing a CollabTweet or CollabInvitation is denoted by the presence of a + * CollabControl field on a Tweet. + */ +union CollabControl { + 1: CollabInvitation collab_invitation + 2: CollabTweet collab_tweet +}(persisted='true', hasPersonalData='true') + +/** + * A Tweet is a message that belongs to a Twitter user. + * + * The Tweet struct replaces the deprecated Status struct. All fields except + * id are optional. + * + * This struct supports the additional fields flexible schema. Additional fields are + * defined starting from field 101. + * + * The guidelines for adding a new Additional field: + * 1. It's required to define the additional field as an optional struct. + * Inside the struct, define optional or non-optional field(s) according + * to your needs. + * 2. If you have several immutable piece of data that are always accessed + * together, you should define them in the same struct for better storage + * locality. + * 3. If your data model has several mutable pieces, and different piece can + * be updated in a close succession, you should group them into + * separate structs and each struct contains one mutable piece. + */ +struct Tweet { + /** + * The primary key for a tweet. + * + * A tweet's id is assigned by the tweet service at creation time. Since + * 2010-11-04 tweet ids have been generated using Snowflake. Prior to this + * ids were assigned sequentially by MySQL AUTOINCREMENT. + */ + 1: i64 id (personalDataType = 'TweetId') + + /** + * The essential properties of a tweet. + * + * This field will always be present on tweets returned by Tweetypie. It is + * marked optional so an empty tweet can be provided to write additional + * fields. + */ + 2: optional TweetCoreData core_data + + /** + * URLs extracted from the tweet's text. + */ + 3: optional list urls + + /** + * Mentions extracted from the tweet's text. + */ + 4: optional list mentions + + /** + * Hashtags extracted from the tweet's text. + */ + 5: optional list hashtags + + /** + * Cashtags extracted from the tweet's text + */ + 6: optional list cashtags + + 7: optional list media + + /** + * Place identified by Tweet.core_data.place_id. + */ + 10: optional Place place + + 11: optional QuotedTweet quoted_tweet + + /** + * The list of countries where this tweet will not be shown. + * + * This field contains countries for both the tweet and the user, so it may + * contain values even if has_takedown is false. + * + * @deprecated, use field 30 takedown_reasons which includes the same information and more + */ + 12: optional list takedown_country_codes (personalDataType = 'ContentRestrictionStatus') + + /** + * Interaction metrics for this tweet. + * + * Included when one of GetTweetOptions.load_retweet_count, + * GetTweetOptions.load_reply_count, or GetTweetOptions.load_favorite_count + * is set. This can be missing in a PARTIAL response if the TFlock request + * fails. + */ + 13: optional StatusCounts counts + + /** + * Properties of the client from which the tweet was sent. + * + * This can be missing in a PARTIAL response if the Passbird request fails. + */ + 14: optional DeviceSource device_source + + /** + * Properties of this tweet from the point of view of + * GetTweetOptions.for_user_id. + * + * This field is included only when for_user_id is provided and + * include_perspective == true This can be missing in a PARTIAL response if + * the timeline service request fails. + */ + 15: optional StatusPerspective perspective + + /** + * Version 1 cards. + * + * This field is included only when GetTweetOptions.include_cards == true. + */ + 16: optional list cards + + /** + * Version 2 cards. + * + * This field is included only included when GetTweetOptions.include_cards + * == true and GetTweetOptions.cards_platform_key is set to valid value. + */ + 17: optional cards.Card2 card2 + + /** + * Human language of tweet text as determined by TwitterLanguageIdentifier. + */ + 18: optional Language language + + /** + * @deprecated + */ + 19: optional map spam_labels + + /** + * User responsible for creating this tweet when it is not the same as the + * core_data.user_id. + * + * This is sensitive information and must not be shared externally (via UI, + * API, or streaming) except to the the owner of the tweet + * (core_data.user_id) or a contributor to the owner's account. + */ + 20: optional Contributor contributor + + // obsolete 21: optional list topic_labels + + 22: optional enrichments_profilegeo.ProfileGeoEnrichment profile_geo_enrichment + + // Maps extension name to value; only populated if the request contained an extension on tweets. + // obsolete 24: optional map extensions + + /** + * Deprecated. + * Semantic entities that are related to this tweet. + */ + 25: optional TweetPivots tweet_pivots + + /** + * @deprecated + * Strato Tweet Extensions support has moved to birdherd. + * + * Internal thrift clients should query strato columns directly and + * not rely upon ext/*.Tweet columns which are designed to serve + * client APIs. + */ + 26: optional binary extensions_reply + + /** + * Has the requesting user muted the conversation referred to by + * `conversation_id`? When this field is absent, the conversation may + * or may not be muted. Use the `include_conversation_muted` field in + * GetTweetOptions to request this field. + * + * If this field has a value, the value applies to the user in the + * `for_user_id` field of the requesting `GetTweetOptions`. + */ + 27: optional bool conversation_muted + + /** + * The user id of the tweet referenced by conversation_id + * + * @deprecated Was conversation_owner_id. This was never implemented. + */ + 28: optional i64 unused28 + + /** + * Has this tweet been removed from its conversation by the conversation owner? + * + * @deprecated Was is_removed_from_conversation. This was never implemented. + */ + 29: optional bool unused29 + + /** + * A list of takedown reasons indicating which country and reason this tweet was taken down. + */ + 30: optional list takedown_reasons + + /** + * @obsolete, self-thread metadata is now stored in field 151, self_thread_metadata + */ + 31: optional self_thread.SelfThreadInfo self_thread_info + + // field 32 to 99 are reserved + // field 100 is used for flexible schema proof of concept + // additional fields + // these fields are stored in Manhattan flexible schema + 101: optional TweetMediaTags media_tags + 102: optional SchedulingInfo scheduling_info + + /** + * @deprecated + */ + 103: optional CardBindingValues binding_values + + /** + * @deprecated + */ + 104: optional ReplyAddresses reply_addresses + + /** + * OBSOLETE, but originally contained information about synthetic tweets created by the first + * version of Twitter Suggests. + * + * @deprecated + */ + 105: optional TwitterSuggestInfo obsolete_twitter_suggest_info + + 106: optional EscherbirdEntityAnnotations escherbird_entity_annotations (personalDataType = 'AnnotationValue') + + // @deprecated 2021-07-19 + 107: optional safety_label.SafetyLabel spam_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 108: optional safety_label.SafetyLabel abusive_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 109: optional safety_label.SafetyLabel low_quality_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 110: optional safety_label.SafetyLabel nsfw_high_precision_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 111: optional safety_label.SafetyLabel nsfw_high_recall_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 112: optional safety_label.SafetyLabel abusive_high_recall_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 113: optional safety_label.SafetyLabel low_quality_high_recall_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 114: optional safety_label.SafetyLabel persona_non_grata_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 115: optional safety_label.SafetyLabel recommendations_low_quality_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 116: optional safety_label.SafetyLabel experimentation_label (personalDataType = 'TweetSafetyLabels') + + 117: optional tweet_location_info.TweetLocationInfo tweet_location_info + 118: optional CardReference card_reference + + /** + * @deprecated 2020-07-08 no longer populated. + */ + 119: optional SupplementalLanguage supplemental_language + + // field 120, additional_media_metadata, is deprecated. + // field 121, media_metadatas, is deprecated + + // under certain circumstances, including long form tweets, we create and store a self-permalink + // to this tweet. in the case of a long-form tweet, this will be used in a truncated version + // of the tweet text. + 122: optional ShortenedUrl self_permalink + + // metadata that is present on extended tweets. + 123: optional ExtendedTweetMetadata extended_tweet_metadata + + // obsolete 124: crosspost_destinations.CrosspostDestinations crosspost_destinations + + // Communities associated with a tweet + 125: optional Communities communities (personalDataType = 'PrivateTweetEntitiesAndMetadata', tweetEditAllowed='false') + + // If some text at the beginning or end of the tweet should be hidden, then this + // field indicates the range of text that should be shown in clients. + 126: optional TextRange visible_text_range + + // @deprecated 2021-07-19 + 127: optional safety_label.SafetyLabel spam_high_recall_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 128: optional safety_label.SafetyLabel duplicate_content_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 129: optional safety_label.SafetyLabel live_low_quality_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 130: optional safety_label.SafetyLabel nsfa_high_recall_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 131: optional safety_label.SafetyLabel pdna_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 132: optional safety_label.SafetyLabel search_blacklist_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 133: optional safety_label.SafetyLabel low_quality_mention_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 134: optional safety_label.SafetyLabel bystander_abusive_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 135: optional safety_label.SafetyLabel automation_high_recall_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 136: optional safety_label.SafetyLabel gore_and_violence_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 137: optional safety_label.SafetyLabel untrusted_url_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 138: optional safety_label.SafetyLabel gore_and_violence_high_recall_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 139: optional safety_label.SafetyLabel nsfw_video_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 140: optional safety_label.SafetyLabel nsfw_near_perfect_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 141: optional safety_label.SafetyLabel automation_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 142: optional safety_label.SafetyLabel nsfw_card_image_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 143: optional safety_label.SafetyLabel duplicate_mention_label (personalDataType = 'TweetSafetyLabels') + + // @deprecated 2021-07-19 + 144: optional safety_label.SafetyLabel bounce_label (personalDataType = 'TweetSafetyLabels') + // field 145 to 150 is reserved for safety labels + + /** + * If this tweet is part of a self_thread (tweetstorm) then this value may be set. + * See SelfThreadMetadata for details. + */ + 151: optional SelfThreadMetadata self_thread_metadata + // field 152 has been deprecated + + // The composer used to create this tweet. Either via the standard tweet creator or the + // Camera flow (go/newscamera). + // + // NOTE: this field is only set if a client passed an explicit ComposerSource in the PostTweetRequest. + // News Camera is deprecated and we no longer set ComposerSource in the PostTweetRequest so no new Tweets will + // have this field. + 153: optional ComposerSource composer_source + + // Present if replies are restricted, see ConversationControl for more details + 154: optional ConversationControl conversation_control + + // Determines the super follows requirements for being able to view a tweet. + 155: optional ExclusiveTweetControl exclusive_tweet_control (tweetEditAllowed='false') + + // Present for a Trusted Friends tweet, see TrustedFriendsControl for more details. + 156: optional TrustedFriendsControl trusted_friends_control (tweetEditAllowed='false') + + // Data about edits and editability. See EditControl for more details. + 157: optional edit_control.EditControl edit_control + + // Present for a CollabTweet or CollabInvitation, see CollabControl for more details. + 158: optional CollabControl collab_control (tweetEditAllowed='false') + + // Present for a 3rd-party developer-built card. See http://go/developer-built-cards-prd + 159: optional i64 developer_built_card_id (personalDataType = 'CardId') + + // Data about enrichments attached to a tweet. + 160: optional creative_entity_enrichments.CreativeEntityEnrichments creative_entity_enrichments_for_tweet + + // This field includes summed engagements from the previous tweets in the edit chain. + 161: optional StatusCounts previous_counts + + // A list of media references, including information about the source Tweet for pasted media. + // Prefer this field to media_keys, as media_keys is not present for old Tweets or pasted media Tweets. + 162: optional list media_refs + + // Whether this tweet is a 'backend tweet' to be referenced only by the creatives containers service + // go/cea-cc-integration for more details + 163: optional bool is_creatives_container_backend_tweet + + /** + * Aggregated perspective of this tweet and all other versions from the point of view of the + * user specified in for_user_id. + * + * This field is included only when for_user_id is provided and can be missing in a PARTIAL response + * if the timeline service request fails. + */ + 164: optional api_fields.TweetPerspective edit_perspective + + // Visibility controls related to Toxic Reply Filtering + // go/toxrf for more details + 165: optional filtered_reply_details.FilteredReplyDetails filtered_reply_details + + // The list of mentions that have unmentioned from the tweet's associated conversation + 166: optional unmentions.UnmentionData unmention_data + + /** + * A list of users that were mentioned in the tweet and have a blocking + * relationship with the author. + */ + 167: optional BlockingUnmentions blocking_unmentions + + /** + * A list of users that were mentioned in the tweet and should be unmentioned + * based on their mention setttings + */ + 168: optional SettingsUnmentions settings_unmentions + + /** + * A Note associated with this Tweet. + */ + 169: optional note_tweet.NoteTweet note_tweet + + // For additional fields, the next available field id is 169. + // NOTE: when adding a new additional field, please also update UnrequestedFieldScrubber.scrubKnownFields + + /** + * INTERNAL FIELDS + * + * These fields are used by tweetypie only and should not be accessed externally. + * The field ids are in descending order, starting with `32767`. + */ + + /** + * Present if tweet data is provided creatives container service instead of tweetypie storage, + * with encapsulated tweets or customized data. + */ + 32763: optional i64 underlying_creatives_container_id + + /** + * Stores tweetypie-internal metadata about a DirectedAtUser. + * + * A tweet's DirectedAtUser is hydrated as follows: + * 1. if this field is present, then DirectedAtUserMetadata.userId is the directed-at user + * 2. if this field is absent, then if the tweet has a reply and has a mention starting at text + * index 0 then that user is the directed-at user. + * + * Note: External clients should use CoreData.directed_at_user. + */ + 32764: optional DirectedAtUserMetadata directed_at_user_metadata + + // list of takedowns that are applied directly to the tweet + 32765: optional list tweetypie_only_takedown_reasons + + // Stores the media keys used to interact with the media platform systems. + // Prefer `media_refs` which will always have media data, unlike this field which is empty for + // older Tweets and Tweets with pasted media. + 32766: optional list media_keys + + // field 32767 is the list of takedowns that are applied directly to the tweet + 32767: optional list tweetypie_only_takedown_country_codes (personalDataType = 'ContentRestrictionStatus') + + + // for internal fields, the next available field id is 32765 (counting down) +}(persisted='true', hasPersonalData = 'true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_audit.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_audit.thrift new file mode 100644 index 000000000..db8361805 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_audit.thrift @@ -0,0 +1,32 @@ +namespace java com.twitter.tweetypie.thriftjava +namespace py gen.twitter.tweetypie.tweet_audit +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace rb TweetyPie +namespace go tweetypie + +// Copied from UserActionReason in guano.thrift - this should be kept in sync (though upper cased) +enum AuditUserActionReason { + SPAM + CHURNING + OTHER + PHISHING + BOUNCING + + RESERVED_1 + RESERVED_2 +} + +// This struct contains all fields of DestroyStatus in guano.thrift that can be set per remove/deleteTweets invocation +// Values are passed through TweetyPie as-is to guano scribe and not used by TweetyPie. +struct AuditDeleteTweet { + 1: optional string host (personalDataType = 'IpAddress') + 2: optional string bulk_id + 3: optional AuditUserActionReason reason + 4: optional string note + 5: optional bool done + 6: optional string run_id + // OBSOLETE 7: optional i64 id + 8: optional i64 client_application_id (personalDataType = 'AppId') + 9: optional string user_agent (personalDataType = 'UserAgent') +}(persisted = 'true', hasPersonalData = 'true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_comparison_service.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_comparison_service.thrift new file mode 100644 index 000000000..4ad96e564 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_comparison_service.thrift @@ -0,0 +1,28 @@ +namespace java com.twitter.tweetypiecomparison.thriftjava +#@namespace scala com.twitter.tweetypiecomparison.thriftscala +#@namespace strato com.twitter.tweetypiecomparison + +include "com/twitter/tweetypie/tweet_service.thrift" +include "com/twitter/context/viewer.thrift" + +service TweetComparisonService { + void compare_retweet( + 1: tweet_service.RetweetRequest request, + 2: optional viewer.Viewer viewer + ) + + void compare_post_tweet( + 1: tweet_service.PostTweetRequest request, + 2: optional viewer.Viewer viewer + ) + + void compare_unretweet( + 1: tweet_service.UnretweetRequest request, + 2: optional viewer.Viewer viewer + ) + + void compare_delete_tweets( + 1: tweet_service.DeleteTweetsRequest request, + 2: optional viewer.Viewer viewer + ) +} diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_events.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_events.thrift new file mode 100644 index 000000000..a80a74bf9 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_events.thrift @@ -0,0 +1,277 @@ +namespace java com.twitter.tweetypie.thriftjava +namespace py gen.twitter.tweetypie.tweet_events +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace rb TweetyPie +namespace go tweetypie + +include "com/twitter/tseng/withholding/withholding.thrift" +include "com/twitter/tweetypie/transient_context.thrift" +include "com/twitter/tweetypie/tweet.thrift" +include "com/twitter/tweetypie/tweet_audit.thrift" +include "com/twitter/gizmoduck/user.thrift" + +/** + * SafetyType encodes the event user's safety state in an enum so downstream + * event processors can filter events without having to load the user. + */ +enum SafetyType { + PRIVATE = 0 // user.safety.isProtected + RESTRICTED = 1 // !PRIVATE && user.safety.suspended + PUBLIC = 2 // !(PRIVATE || RESTRICTED) + RESERVED0 = 3 + RESERVED1 = 4 + RESERVED2 = 5 + RESERVED3 = 6 +} + +struct TweetCreateEvent { + /** + * The tweet that has been created. + */ + 1: tweet.Tweet tweet + + /** + * The user who owns the created tweet. + */ + 2: user.User user + + /** + * The tweet being retweeted. + */ + 3: optional tweet.Tweet source_tweet + + /** + * The user who owns source_tweet. + */ + 4: optional user.User source_user + + /** + * The user whose tweet or retweet is being retweeted. + * + * This is the id of the user who owns + * tweet.core_data.share.parent_status_id. In many cases this will be the + * same as source_user.id; it is different when the tweet is created via + * another retweet. See the explanation of source_user_id and parent_user_id + * in Share for examples. + */ + 5: optional i64 retweet_parent_user_id (personalDataType = 'UserId') + + /** + * The tweet quoted in the created tweet. + */ + 6: optional tweet.Tweet quoted_tweet + + /** + * The user who owns quoted_tweet. + */ + 7: optional user.User quoted_user + + /** + * Arbitrary passthrough metadata about tweet creation. + * + * See TweetCreateContextKey for more details about the data that may be + * present here. + */ + 8: optional map additional_context (personalDataTypeValue='UserId') + + /** + * Additional request arguments passed through to consumers. + */ + 9: optional transient_context.TransientCreateContext transient_context + + /** + * Flag exposing if a quoted tweet has been quoted by the user previously. + **/ + 10: optional bool quoter_has_already_quoted_tweet +}(persisted='true', hasPersonalData = 'true') + +struct TweetDeleteEvent { + /** + * The tweet being deleted. + */ + 1: tweet.Tweet tweet + + /** + * The user who owns the deleted tweet. + */ + 2: optional user.User user + + /** + * Whether this tweet was deleted as part of user erasure (the process of deleting tweets + * belonging to deactivated accounts). + * + * These deletions occur in high volume spikes and the tweets have already been made invisible + * externally. You may wish to process them in batches or offline. + */ + 3: optional bool is_user_erasure + + /** + * Audit information from the DeleteTweetRequest that caused this deletion. + * + * This field is used to track the reason for deletion in non-user-initiated + * tweet deletions, like Twitter support agents deleting tweets or spam + * cleanup. + */ + 4: optional tweet_audit.AuditDeleteTweet audit + + /** + * Id of the user initiating this request. + * It could be either the owner of the tweet or an admin. + * It is used for scrubbing. + */ + 5: optional i64 by_user_id (personalDataType = 'UserId') + + /** + * Whether this tweet was deleted by an admin user or not + * + * It is used for scrubbing. + */ + 6: optional bool is_admin_delete +}(persisted='true', hasPersonalData = 'true') + +struct TweetUndeleteEvent { + 1: tweet.Tweet tweet + 2: optional user.User user + 3: optional tweet.Tweet source_tweet + 4: optional user.User source_user + 5: optional i64 retweet_parent_user_id (personalDataType = 'UserId') + 6: optional tweet.Tweet quoted_tweet + 7: optional user.User quoted_user + // timestamp of the deletion that this undelete is reversing + 8: optional i64 deleted_at_msec +}(persisted='true', hasPersonalData = 'true') + +/** + * When a user deletes the location information for their tweets, we send one + * TweetScrubGeoEvent for every tweet from which the location is removed. + * + * Users cause this by selecting "Delete location information" in Settings -> + * Privacy. + */ +struct TweetScrubGeoEvent { + 1: i64 tweet_id (personalDataType = 'TweetId') + 2: i64 user_id (personalDataType = 'UserId') +}(persisted='true', hasPersonalData = 'true') + +/** + * When a user deletes the location information for their tweets, we send one + * UserScrubGeoEvent with the max tweet ID that was scrubbed (in addition to + * sending multiple TweetScrubGeoEvents as described above). + * + * Users cause this by selecting "Delete location information" in Settings -> + * Privacy. This additional event is sent to maintain backwards compatibility + * with Hosebird. + */ +struct UserScrubGeoEvent { + 1: i64 user_id (personalDataType = 'UserId') + 2: i64 max_tweet_id (personalDataType = 'TweetId') +}(persisted='true', hasPersonalData = 'true') + +struct TweetTakedownEvent { + 1: i64 tweet_id (personalDataType = 'TweetId') + 2: i64 user_id (personalDataType = 'UserId') + // This is the complete list of takedown country codes for the tweet, + // including whatever modifications were made to trigger this event. + // @deprecated Prefer takedown_reasons once TWEETYPIE-4329 deployed + 3: list takedown_country_codes = [] + // This is the complete list of takedown reasons for the tweet, + // including whatever modifications were made to trigger this event. + 4: list takedown_reasons = [] +}(persisted='true', hasPersonalData = 'true') + +struct AdditionalFieldUpdateEvent { + // Only contains the tweet id and modified or newly added fields on that tweet. + // Unchanged fields and tweet core data are omitted. + 1: tweet.Tweet updated_fields + 2: optional i64 user_id (personalDataType = 'UserId') +}(persisted='true', hasPersonalData = 'true') + +struct AdditionalFieldDeleteEvent { + // a map from tweet id to deleted field ids + // Each event will only contain one tweet. + 1: map> deleted_fields (personalDataTypeKey='TweetId') + 2: optional i64 user_id (personalDataType = 'UserId') +}(persisted='true', hasPersonalData = 'true') + +// This event is only logged to scribe not sent to EventBus +struct TweetMediaTagEvent { + 1: i64 tweet_id (personalDataType = 'TweetId') + 2: i64 user_id (personalDataType = 'UserId') + 3: set tagged_user_ids (personalDataType = 'UserId') + 4: optional i64 timestamp_ms +}(persisted='true', hasPersonalData = 'true') + +struct TweetPossiblySensitiveUpdateEvent { + 1: i64 tweet_id (personalDataType = 'TweetId') + 2: i64 user_id (personalDataType = 'UserId') + // The below two fields contain the results of the update. + 3: bool nsfw_admin + 4: bool nsfw_user +}(persisted='true', hasPersonalData = 'true') + +struct QuotedTweetDeleteEvent { + 1: i64 quoting_tweet_id (personalDataType = 'TweetId') + 2: i64 quoting_user_id (personalDataType = 'UserId') + 3: i64 quoted_tweet_id (personalDataType = 'TweetId') + 4: i64 quoted_user_id (personalDataType = 'UserId') +}(persisted='true', hasPersonalData = 'true') + +struct QuotedTweetTakedownEvent { + 1: i64 quoting_tweet_id (personalDataType = 'TweetId') + 2: i64 quoting_user_id (personalDataType = 'UserId') + 3: i64 quoted_tweet_id (personalDataType = 'TweetId') + 4: i64 quoted_user_id (personalDataType = 'UserId') + // This is the complete list of takedown country codes for the tweet, + // including whatever modifications were made to trigger this event. + // @deprecated Prefer takedown_reasons + 5: list takedown_country_codes = [] + // This is the complete list of takedown reasons for the tweet, + // including whatever modifications were made to trigger this event. + 6: list takedown_reasons = [] +}(persisted='true', hasPersonalData = 'true') + +union TweetEventData { + 1: TweetCreateEvent tweet_create_event + 2: TweetDeleteEvent tweet_delete_event + 3: AdditionalFieldUpdateEvent additional_field_update_event + 4: AdditionalFieldDeleteEvent additional_field_delete_event + 5: TweetUndeleteEvent tweet_undelete_event + 6: TweetScrubGeoEvent tweet_scrub_geo_event + 7: TweetTakedownEvent tweet_takedown_event + 8: UserScrubGeoEvent user_scrub_geo_event + 9: TweetPossiblySensitiveUpdateEvent tweet_possibly_sensitive_update_event + 10: QuotedTweetDeleteEvent quoted_tweet_delete_event + 11: QuotedTweetTakedownEvent quoted_tweet_takedown_event +}(persisted='true', hasPersonalData = 'true') + +/** + * @deprecated + */ +struct Checksum { + 1: i32 checksum +}(persisted='true') + +struct TweetEventFlags { + /** + * @deprecated Was dark_for_service. + */ + 1: list unused1 = [] + + 2: i64 timestamp_ms + + 3: optional SafetyType safety_type + + /** + * @deprecated Was checksum. + */ + 4: optional Checksum unused4 +}(persisted='true') + +/** + * A TweetEvent is a notification published to the tweet_events stream. + */ +struct TweetEvent { + 1: TweetEventData data + 2: TweetEventFlags flags +}(persisted='true', hasPersonalData = 'true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service.thrift new file mode 100644 index 000000000..3be5f3b12 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service.thrift @@ -0,0 +1,2320 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace py gen.twitter.tweetypie.service +namespace rb TweetyPie +namespace go tweetypie + +include "com/twitter/bouncer/bounce.thrift" +include "com/twitter/carousel/service/carousel_service.thrift" +include "com/twitter/context/feature_context.thrift" +include "com/twitter/mediaservices/commons/MediaCommon.thrift" +include "com/twitter/mediaservices/commons/MediaInformation.thrift" +include "com/twitter/servo/exceptions.thrift" +include "com/twitter/spam/features/safety_meta_data.thrift" +include "com/twitter/spam/rtf/safety_label.thrift" +include "com/twitter/spam/rtf/safety_level.thrift" +include "com/twitter/spam/rtf/safety_result.thrift" +include "com/twitter/tseng/withholding/withholding.thrift" +include "com/twitter/tweetypie/deleted_tweet.thrift" +include "com/twitter/tweetypie/transient_context.thrift" +include "com/twitter/tweetypie/tweet.thrift" +include "com/twitter/tweetypie/tweet_audit.thrift" +include "com/twitter/incentives/jiminy/jiminy.thrift" +include "unified_cards_contract.thrift" + +typedef i16 FieldId + +struct TweetGeoSearchRequestID { + 1: required string id (personalDataType = 'PrivateTweetEntitiesAndMetadata, PublicTweetEntitiesAndMetadata') +}(hasPersonalData = 'true') + +struct TweetCreateGeo { + 1: optional tweet.GeoCoordinates coordinates + 2: optional string place_id (personalDataType = 'InferredLocation') + 3: optional map place_metadata (personalDataTypeKey = 'InferredLocation', personalDataTypeValue = 'InferredLocation') + 4: bool auto_create_place = 1 + // deprecated; use tweet.GeoCoordinates.display + 5: bool display_coordinates = 1 + 6: bool override_user_geo_setting = 0 + 7: optional TweetGeoSearchRequestID geo_search_request_id +}(hasPersonalData = 'true') + +enum StatusState { + /** + * The tweet was found and successfully hydrated. + */ + FOUND = 0 + + /** + * The tweet was not found. It may have been deleted, or could just be an invalid or + * unused tweet id. + */ + NOT_FOUND = 1 + + /** + * The tweet was found, but there was at least one error hydrating some data on the tweet. + * GetTweetResult.missing_fields indicates which fields may have not been hydrated completely. + */ + PARTIAL = 2 + + /** + * @deprecated All failures, including time outs, are indicated by `Failed`. + */ + TIMED_OUT = 3 + + /** + * There was an upstream or internal failure reading this tweet. Usually indicates a + * transient issue that is safe to retry immediately. + */ + FAILED = 4 + + /** + * @deprecated tweets from deactivated users will soon be indicated via `Drop` with + * a `FilteredReason` of `authorAccountIsInactive`. + */ + DEACTIVATED_USER = 5 + + /** + * @deprecated tweets from suspended users will soon be indicated via `Drop` with + * a `FilteredReason` of `authorAccountIsInactive`. + */ + SUSPENDED_USER = 6 + + /** + * @deprecated tweets from protected users that the viewer can't see will soon be + * indicated via `Drop` with a `FilteredReason` of `authorIsProtected`. + */ + PROTECTED_USER = 7 + /** + * @deprecated tweets that have been reported by the viewer will soon be indicated + * via `Drop` or `Suppress` with a `FilteredReason` of `reportedTweet`. + */ + REPORTED_TWEET = 8 + + // PrivateTweet was originally used for TwitterSuggest v1 but has since been removed + // obsolete: PRIVATE_TWEET = 9 + + /** + * Could not return this tweet because of backpressure, should + * not be retried immediately; try again later + */ + OVER_CAPACITY = 10 + + /** + * Returned when the requesting client is considered to not be + * able to render the tweet properly + */ + UNSUPPORTED_CLIENT = 11 + + /** + * The tweet exists, but was not returned because it should not be seen by the + * viewer. The reason for the tweet being filtered is indicated via + * GetTweetResult.filtered_reason. + */ + DROP = 12 + + /** + * The tweet exists and was returned, but should not be directly shown to the + * user without additional user intent to see the tweet, as it may be offensive. + * The reason for the suppression is indicated via GetTweetResult.filtered_reason. + */ + SUPPRESS = 13 + + /** + * The tweet once existed and has been deleted. + * When GetTweetOptions.enable_deleted_state is true, deleted tweets + * will be returned as DELETED + * When GetTweetOptions.enable_deleted_state is false, deleted tweets + * will be returned as NOT_FOUND. + */ + DELETED = 14 + + /** + * The tweet once existed, had violated Twitter Rules, and has been deleted. + * When GetTweetOptions.enable_deleted_state is true, bounce-deleted tweets + * will be returned as BOUNCE_DELETED + * When GetTweetOptions.enable_deleted_state is false, bounce-deleted tweets + * will be returned as NOT_FOUND. + */ + BOUNCE_DELETED = 15 + + RESERVED_1 = 16 + RESERVED_2 = 17 + RESERVED_3 = 18 + RESERVED_4 = 19 +} + +enum TweetCreateState { + /** + * Tweet was created successfully. + */ + OK = 0, + + /** + * The user_id field from the creation request does not correspond to a user. + */ + USER_NOT_FOUND = 1, + + SOURCE_TWEET_NOT_FOUND = 2, + SOURCE_USER_NOT_FOUND = 3, + + /** + * @deprecated Users can now retweet their own tweets. + */ + CANNOT_RETWEET_OWN_TWEET = 4, + + CANNOT_RETWEET_PROTECTED_TWEET = 5, + CANNOT_RETWEET_SUSPENDED_USER = 6, + CANNOT_RETWEET_DEACTIVATED_USER = 7, + CANNOT_RETWEET_BLOCKING_USER = 8, + + ALREADY_RETWEETED = 9, + CONTRIBUTOR_NOT_SUPPORTED = 10, + + /** + * The created_via field from the creation request does not correspond to a + * known client application. + */ + DEVICE_SOURCE_NOT_FOUND = 11, + + MALWARE_URL = 12, + INVALID_URL = 13, + USER_DEACTIVATED = 14, + USER_SUSPENDED = 15, + TEXT_TOO_LONG = 16, + TEXT_CANNOT_BE_BLANK = 17, + DUPLICATE = 18, + + /** + * PostTweetRequest.in_reply_to_tweet_id was set to a tweet that cannot be found. + * + * This usually means that the tweet was recently deleted, but could also + * mean that the tweet isn't visible to the reply author. (This is the + * case for replies by blocked users.) + */ + IN_REPLY_TO_TWEET_NOT_FOUND = 19, + + INVALID_IMAGE = 20, + INVALID_ADDITIONAL_FIELD = 21, + RATE_LIMIT_EXCEEDED = 22, + INVALID_NARROWCAST = 23, + + /** + * Antispam systems (Scarecrow) denied the request. + * + * This happens for tweets that are probably spam, but there is some + * uncertainty. Tweets that Scarecrow is certain are spammy will appear to + * succeed, but will not be added to backends. + */ + SPAM = 24, + SPAM_CAPTCHA = 25, + + /** + * A provided media upload ID can't be resolved. + */ + MEDIA_NOT_FOUND = 26, + + /** + * Catch-all for when uploaded media violate some condition. + * + * For example, too many photos in a multi-photo-set, or including an + * animated gif or video in a multi-photo-set. + */ + INVALID_MEDIA = 27, + + /** + * Returned when Scarecrow tell us to rate limit a tweet request. + * + * Non verified users (i.e., phone verified, email verified) have more + * strict rate limit. + */ + SAFETY_RATE_LIMIT_EXCEEDED = 28, + + /** + * Scarecrow has rejected the creation request until the user completes the + * bounce assignment. + * + * This flag indicates that PostTweetResult.bounce will contain a Bounce + * struct to be propagated to the client. + */ + BOUNCE = 29, + + /** + * Tweet creation was denied because the user is in ReadOnly mode. + * + * As with SPAM, tweets will appear to succeed but will not be actually + * created. + */ + USER_READONLY = 30, + + /** + * Maximum number of mentions allowed in a tweet was exceeded. + */ + MENTION_LIMIT_EXCEEDED = 31, + + /** + * Maximum number of URLs allowed in a tweet was exceeded. + */ + URL_LIMIT_EXCEEDED = 32, + + /** + * Maximum number of hashtags allowed in a tweet was exceeded. + */ + HASHTAG_LIMIT_EXCEEDED = 33, + + /** + * Maximum number of cashtags allowed in a tweet was exceeded. + */ + CASHTAG_LIMIT_EXCEEDED = 34, + + /** + * Maximum length of a hashtag was exceeded. + */ + HASHTAG_LENGTH_LIMIT_EXCEEDED = 35, + + /** + * Returned if a request contains more than one attachment type, which + * includes media, attachment_url, and card_reference. + */ + TOO_MANY_ATTACHMENT_TYPES = 36, + + /** + * Returned if the request contained an attachment URL that isn't allowed. + */ + INVALID_ATTACHMENT_URL = 37, + + /** + * We don't allow users without screen names to be retweeted. + */ + CANNOT_RETWEET_USER_WITHOUT_SCREEN_NAME = 38, + + /** + * Tweets may not be allowed if replying or retweeting IPI'd tweets + * See go/tp-ipi-tdd for more details + */ + DISABLED_BY_IPI_POLICY = 39, + + /** + * This state expands our transparency around which URLs are blacklisted or limited + */ + URL_SPAM = 40, + + // Conversation controls are only valid when present on a root + // conversation tweet and quoted tweets. + INVALID_CONVERSATION_CONTROL = 41, + + // Reply Tweet is limited due to conversation controls state set on + // root conversation Tweet. + REPLY_TWEET_NOT_ALLOWED = 42, + + // Nudge is returned when the client provides nudgeOptions and tweetypie receives a nudge + // from the Jiminy strato column. + NUDGE = 43, + + // ApiError BadRequest (400) "Reply to a community tweet must also be a community tweet" + // -- Triggered when a user tries replying to a community tweet with a non community tweet. + COMMUNITY_REPLY_TWEET_NOT_ALLOWED = 44, + // ApiError Forbidden (403) "User is not authorized to post to this community" + // -- Triggered when a user tries posting to a public/closed community that they are not part of. + COMMUNITY_USER_NOT_AUTHORIZED = 45, + // ApiError NotFound (404) "Community does not exist" -- Triggered when: + // a) A user tries posting to a private community they are not a part of. + // b) A user tries posting to a non existent community + COMMUNITY_NOT_FOUND = 46, + // ApiError BadRequest (400) "Cannot retweet a community tweet" + // -- Triggered when a user tries to retweet a community tweet. Community tweets can not be retweeted. + COMMUNITY_RETWEET_NOT_ALLOWED = 47, + + // Attempt to tweet with Conversation Controls was rejected, e.g. due to feature switch authorization. + CONVERSATION_CONTROL_NOT_ALLOWED = 48, + + // Super follow tweets require a special permission to create. + SUPER_FOLLOWS_CREATE_NOT_AUTHORIZED = 49, + + // Not all params can go together. E.g. super follow tweets can not be community tweets. + SUPER_FOLLOWS_INVALID_PARAMS = 50, + + // ApiError Forbidden (403) "Protected user can not post to communities" + // -- Triggered when a protected user tries tweeting or replying + // to a community tweet. They are not allowed to create community tweets. + COMMUNITY_PROTECTED_USER_CANNOT_TWEET = 51, + + // ApiError Forbidden (451) "User is not permitted to engage with this exclusive tweet." + // -- Triggered when a user tries to reply to an exclusive tweet without being + // a superfollower of the tweet author. Could be used for other engagements in the future (e.g. favorite) + EXCLUSIVE_TWEET_ENGAGEMENT_NOT_ALLOWED = 52 + + /** + * ApiError BadRequest (400) "Invalid parameters on Trusted Friends tweet creation" + * + * Returned when either of the following occur: + * a) A user tries setting Trusted Friends Control on a reply + * b) A user tries setting Trusted Friends Control on a tweet with any of the following set: + * i) Conversation Control + * ii) Community + * iii) Exclusive Tweet Control + */ + TRUSTED_FRIENDS_INVALID_PARAMS = 53, + + /** + * ApiError Forbidden (403) + * + * Returned when a user tries to retweet a Trusted Friends tweet. + */ + TRUSTED_FRIENDS_RETWEET_NOT_ALLOWED = 54, + + /** + * ApiError Forbidden (457) + * + * Returned when a user tries to reply to a Trusted Friends tweet + * and they are not a trusted friend. + */ + TRUSTED_FRIENDS_ENGAGEMENT_NOT_ALLOWED = 55, + + /** + * ApiError BadRequest (400) "Invalid parameters for creating a CollabTweet or CollabInvitation" + * + * Returned when any of the following are true: + * a) A user tries setting Collab Control on a reply + * b) A user tries setting Collab Control on a tweet with any of the following set: + * i) Conversation Control + * ii) Community + * iii) Exclusive Tweet Control + * iv) Trusted Friends Control + **/ + COLLAB_TWEET_INVALID_PARAMS = 56, + + /** + * ApiError Forbidden (457) + * + * Returned when a user tries to create a Trusted Friends tweet but they are not allowed to tweet + * to the requested Trusted Friends list. + */ + TRUSTED_FRIENDS_CREATE_NOT_ALLOWED = 57, + + /** + * Returned when the current user is not allowed to edit in general, this might be due to missing + * roles during development, or a missing subscription. + */ + EDIT_TWEET_USER_NOT_AUTHORIZED = 58, + + /** + * Returned when a user tries to edit a Tweet which they didn't author. + */ + EDIT_TWEET_USER_NOT_AUTHOR = 59, + + /** + * Returned when a user tries edit a stale tweet, meaning a tweet which has already been edited. + */ + EDIT_TWEET_NOT_LATEST_VERSION = 60, + + /** + * ApiError Forbidden (460) + * + * Returned when a user tries to create a Trusted Friends tweet that quotes tweets a Trusted + * Friends tweet. + */ + TRUSTED_FRIENDS_QUOTE_TWEET_NOT_ALLOWED = 61, + + /** + * Returned when a user tries edit a tweet for which the editing time has already expired. + */ + EDIT_TIME_LIMIT_REACHED = 62, + + /** + * Returned when a user tries edit a tweet which has been already edited maximum number of times. + */ + EDIT_COUNT_LIMIT_REACHED = 63, + + /* Returned when a user tries to edit a field that is not allowed to be edited */ + FIELD_EDIT_NOT_ALLOWED = 64, + + /* Returned when the initial Tweet could not be found when trying to validate an edit */ + INITIAL_TWEET_NOT_FOUND = 65, + + /** + * ApiError Forbidden (457) + * + * Returned when a user tries to reply to a stale tweet + */ + STALE_TWEET_ENGAGEMENT_NOT_ALLOWED = 66, + + /** + * ApiError Forbidden (460) + * + * Returned when a user tries to create a tweet that quotes tweets a stale tweet + */ + STALE_TWEET_QUOTE_TWEET_NOT_ALLOWED = 67, + + /* Tweet cannot be edited because the initial tweet is + * marked as not edit eligible */ + NOT_ELIGIBLE_FOR_EDIT = 68, + + /* A stale version of an edit tweet cannot be retweeted + * Only latest version of an edit chain should be allowed to be retweeted. */ + STALE_TWEET_RETWEET_NOT_ALLOWED = 69, + + RESERVED_32 = 70, + RESERVED_33 = 71, + RESERVED_34 = 72, + RESERVED_35 = 73, + RESERVED_36 = 74, + RESERVED_37 = 75, +} + +enum UndeleteTweetState { + /** + * The Tweet was successfully undeleted. + */ + SUCCESS = 0, + + /** + * The Tweet was deleted and is still deleted. It cannot be undeleted + * because the tweet is no longer in the soft delete archive. + */ + SOFT_DELETE_EXPIRED = 1, + + /** + * The Tweet likely has never existed, and therefore cannot be undeleted. + */ + TWEET_NOT_FOUND = 2, + + /** + * The Tweet could not be undeleted because it was not deleted in + * the first place. + */ + TWEET_ALREADY_EXISTS = 3, + + /** + * The user who created the Tweet being undeleted could not be found. + */ + USER_NOT_FOUND = 4, + + /** + * The Tweet could not be undeleted because it is a retweet and the original + * tweet is gone. + */ + SOURCE_TWEET_NOT_FOUND = 5, + + /** + * The Tweet could not be undeleted because it is a retweet and the author + * of the original tweet is gone. + */ + SOURCE_USER_NOT_FOUND = 6, + + /** + * The Tweet was deleted and is still deleted. It cannot be undeleted + * because the tweet has been bounce deleted. Bounce deleted tweet + * has been found to violate Twitter Rules. go/bouncer go/bounced-tweet + */ + TWEET_IS_BOUNCE_DELETED = 7, + + /** + * This tweet cannot be undeleted because the tweet was created by a + * user when they were under 13. + **/ + TWEET_IS_U13_TWEET = 8, + + RESERVED_2 = 9, + RESERVED_3 = 10 +} + +enum TweetDeleteState { + /** + * Tweet was deleted successfully. + */ + OK = 0, + + /** + * Tweet was not deleted because of the associated user. + * + * The DeleteTweetsRequest.by_user_id must match the tweet owner or be an + * admin user. + */ + PERMISSION_ERROR = 1, + + /** + * The expected_user_id provided in DeleteTweetsRequest does not match the + * user_id of the tweet owner. + */ + EXPECTED_USER_ID_MISMATCH = 2, + + /** + * @deprecated. + * + * is_user_erasure was set in DeleteTweetsRequest but the user was not in + * the erased state. + */ + USER_NOT_IN_ERASED_STATE = 3, + + /** + * Failed to Load the source Tweet while unretweeting stale revisions in an edit chain. + */ + SOURCE_TWEET_NOT_FOUND = 4, + + RESERVED_4 = 5, + RESERVED_5 = 6, + RESERVED_6 = 7, + RESERVED_7 = 8 +} + +enum DeletedTweetState { + /** + * The tweet has been marked as deleted but has not been permanently deleted. + */ + SOFT_DELETED = 1 + + /** + * The tweet has never existed. + */ + NOT_FOUND = 2 + + /** + * The tweet has been permanently deleted. + */ + HARD_DELETED = 3 + + /** + * The tweet exists and is not currently deleted. + */ + NOT_DELETED = 4 + + RESERVED1 = 5 + RESERVED2 = 6 + RESERVED3 = 7 +} + +/** + * Hydrations to perform on the Tweet returned by post_tweet and post_retweet. + */ +struct WritePathHydrationOptions { + /** + * Return cards for tweets with cards in Tweet.cards or Tweet.card2 + * + * card2 also requires setting a valid cards_platform_key + */ + 1: bool include_cards = 0 + + /** + * The card format version supported by the requesting client + */ + 2: optional string cards_platform_key + + # 3: obsolete + # 4: obsolete + + /** + * The argument passed to the Stratostore extension points mechanism. + */ + 5: optional binary extensions_args + + /** + * When returning a tweet that quotes another tweet, do not include + * the URL to the quoted tweet in the tweet text and url entities. + * This is intended for clients that use the quoted_tweet field of + * the tweet to display quoted tweets. Also see simple_quoted_tweet + * field in GetTweetOptions and GetTweetFieldsOptions + */ + 6: bool simple_quoted_tweet = 0 +} + +struct RetweetRequest { + /** + * Id of the tweet being retweeted. + */ + 1: required i64 source_status_id (personalDataType = 'TweetId') + + /** + * User creating the retweet. + */ + 2: required i64 user_id (personalDataType = 'UserId') + + /** + * @see PostTweetRequest.created_via + */ + 3: required string created_via (personalDataType = 'ClientType') + 4: optional i64 contributor_user_id (personalDataType = 'UserId') // no longer supported + + /** + * @see PostTweetRequest.tracking_id + */ + 5: optional i64 tracking_id (personalDataType = 'ImpressionId') + 6: optional tweet.Narrowcast narrowcast + + /** + * @see PostTweetRequest.nullcast + */ + 7: bool nullcast = 0 + + /** + * @see PostTweetRequest.dark + */ + 8: bool dark = 0 + + // OBSOLETE 9: bool send_retweet_sms_push = 0 + + 10: optional WritePathHydrationOptions hydration_options + + /** + * @see PostTweetRequest.additional_fields + */ + 11: optional tweet.Tweet additional_fields + + /** + * @see PostTweetRequest.uniqueness_id + */ + 12: optional i64 uniqueness_id (personalDataType = 'PrivateTweetEntitiesAndMetadata, PublicTweetEntitiesAndMetadata') + + 13: optional feature_context.FeatureContext feature_context + + 14: bool return_success_on_duplicate = 0 + + /** + * Passthrough data for Scarecrow that is used for safety checks. + */ + 15: optional safety_meta_data.SafetyMetaData safety_meta_data + + /** + * This is a unique identifier used in both the REST and GraphQL-dark + * requests that will be used to correlate the GraphQL mutation requests to the REST requests + * during a transition period when clients will be moving toward tweet creation via GraphQL. + * See also, the "Comparison Testing" section at go/tweet-create-on-graphql-tdd for additional + * context. + */ + 16: optional string comparison_id (personalDataType = 'UniversallyUniqueIdentifierUuid') +}(hasPersonalData = 'true') + +/** + * A request to set or unset nsfw_admin and/or nsfw_user. + */ +struct UpdatePossiblySensitiveTweetRequest { + /** + * Id of tweet being updated + */ + 1: required i64 tweet_id (personalDataType = 'TweetId') + + /** + * Id of the user initiating this request. + * + * It could be either the owner of the tweet or an admin. It is used when + * auditing the request in Guano. + */ + 2: required i64 by_user_id (personalDataType = 'UserId') + + /** + * New value for tweet.core_data.nsfw_admin. + */ + 3: optional bool nsfw_admin + + /** + * New value for tweet.core_data.nsfw_user. + */ + 4: optional bool nsfw_user + + /** + * Host or remote IP where the request originated. + * + * This data is used when auditing the request in Guano. If unset, it will + * be logged as "". + */ + 5: optional string host (personalDataType = 'IpAddress') + + /** + * Pass-through message sent to the audit service. + */ + 6: optional string note +}(hasPersonalData = 'true') + +struct UpdateTweetMediaRequest { + /** + * The tweet id that's being updated + */ + 1: required i64 tweet_id (personalDataType = 'TweetId') + + /** + * A mapping from old (existing) media ids on the tweet to new media ids. + * + * Existing tweet media not in this map will remain unchanged. + */ + 2: required map old_to_new_media_ids (personalDataTypeKey = 'MediaId', personalDataTypeValue = 'MediaId') +}(hasPersonalData = 'true') + +struct TakedownRequest { + 1: required i64 tweet_id (personalDataType = 'TweetId') + + /** + * The list of takedown country codes to add to the tweet. + * + * DEPRECATED, reasons_to_add should be used instead. + */ + 2: list countries_to_add = [] (personalDataType = 'ContentRestrictionStatus') + + /** + * This field is the list of takedown country codes to remove from the tweet. + * + * DEPRECATED, reasons_to_remove should be used instead. + */ + 3: list countries_to_remove = [] (personalDataType = 'ContentRestrictionStatus') + + /** + * This field is the list of takedown reasons to add to the tweet. + */ + 11: list reasons_to_add = [] + + /** + * This field is the list of takedown reasons to remove from the tweet. + */ + 12: list reasons_to_remove = [] + + /** + * Motivation for the takedown which is written to the audit service. + * + * This data is not persisted with the takedown itself. + */ + 4: optional string audit_note (personalDataType = 'AuditMessage') + + /** + * Whether to send this request to the audit service. + */ + 5: bool scribe_for_audit = 1 + + // DEPRECATED, this field is no longer used. + 6: bool set_has_takedown = 1 + + // DEPRECATED, this field is no longer used. + 7: optional list previous_takedown_country_codes (personalDataType = 'ContentRestrictionStatus') + + /** + * Whether this request should enqueue a TweetTakedownEvent to EventBus and + * Hosebird. + */ + 8: bool eventbus_enqueue = 1 + + /** + * ID of the user who initiated the takedown. + * + * This is used when writing the takedown to the audit service. If unset, it + * will be logged as -1. + */ + 9: optional i64 by_user_id (personalDataType = 'UserId') + + /** + * Host or remote IP where the request originated. + * + * This data is used when auditing the request in Guano. If unset, it will + * be logged as "". + */ + 10: optional string host (personalDataType = 'IpAddress') +}(hasPersonalData = 'true') + +// Arguments to delete_location_data +struct DeleteLocationDataRequest { + 1: i64 user_id (personalDataType = 'UserId') +}(hasPersonalData = 'true') + +// structs for API V2 (flexible schema) + +struct GetTweetOptions { + /** + * Return the original tweet in GetTweetResult.source_tweet for retweets. + */ + 1: bool include_source_tweet = 1 + + /** + * Return the hydrated Place object in Tweet.place for tweets with geolocation. + */ + 2: bool include_places = 0 + + /** + * Language used for place names when include_places is true. Also passed to + * the cards service, if cards are hydrated for the request. + */ + 3: string language_tag = "en" + + /** + * Return cards for tweets with cards in Tweet.cards or Tweet.card2 + * + * card2 also requires setting a valid cards_platform_key + */ + 4: bool include_cards = 0 + + /** + * Return the number of times a tweet has been retweeted in + * Tweet.counts.retweet_count. + */ + 5: bool include_retweet_count = 0 + + /** + * Return the number of direct replies to a tweet in + * Tweet.counts.reply_count. + */ + 6: bool include_reply_count = 0 + + /** + * Return the number of favorites a tweet has received in + * Tweet.counts.favorite_count. + */ + 7: bool include_favorite_count = 0 + + # OBSOLETE 8: bool include_unique_users_impressed_count = 0 + # OBSOLETE 9: bool include_click_count = 0 + # OBSOLETE 10: bool include_descendent_reply_count = 0 + + /** + * @deprecated Use safety_level for spam filtering. + */ + 11: optional tweet.SpamSignalType spam_signal_type + + /** + * If the requested tweet is not already in cache, do not add it. + * + * You should set do_not_cache to true if you are requesting old tweets + * (older than 30 days) and they are unlikely to be requested again. + */ + 12: bool do_not_cache = 0 + + /** + * The card format version supported by the requesting client + */ + 13: optional string cards_platform_key (personalDataType = 'PrivateTweetEntitiesAndMetadata, PublicTweetEntitiesAndMetadata') + + /** + * The user for whose perspective this request should be processed. + * + * If you are requesting tweets on behalf of a user, set this to their user + * id. The effect of setting this option is: + * + * - Tweetypie will return protected tweets that the user is allowed to + * access, rather than filtering out protected tweets. + * + * - If this field is set *and* `include_perspectivals` is set, then the + * tweets will have the `perspective` field set to a struct with flags + * that indicate whether the user has favorited, retweeted, or reported + * the tweet in question. + * + * If you have a specific need to access all protected tweets (not + * just tweets that should be accessible to the current user), see the + * documentation for `include_protected`. + */ + 14: optional i64 for_user_id (personalDataType = 'UserId') + + /** + * Do not enforce normal filtering for protected tweets, blocked quote tweets, + * contributor data, etc. This does not affect Visibility Library (http://go/vf) + * based filtering which executes when safety_level is specified, see request + * field 24 safety_level below + * + * If `bypass_visibility_filtering` is true, Tweetypie will not enforce filtering + * for protected tweets, blocked quote tweets, contributor data, etc. and your client + * will receive all tweets regardless of follow relationship. You will also be able + * to access tweets from deactivated and suspended users. This is only necessary + * for special cases, such as indexing or analyzing tweets, or administrator access. + * Since this elevated access is usually unnecessary, and is a security risk, you will + * need to get your client id whitelisted to access this feature. + * + * If you are accessing tweets on behalf of a user, set + * `bypass_visibility_filtering` to false and set `for_user_id`. This will + * allow access to exactly the set of tweets that that user is authorized to + * access, and filter out tweets the user should not be authorized to access + * (returned with a StatusState of PROTECTED_USER). + */ + 15: bool bypass_visibility_filtering = 0 + + /** + * Return the user-specific view of a tweet in Tweet.perspective + * + * for_user_id must also be set. + */ + 16: bool include_perspectivals = 0 + + // OBSOLETE media faces are always included + 17: bool include_media_faces = 0 + + /** + * The flexible schema fields of the tweet to return. + * + * Fields of tweets in the 100+ range will only be returned if they are + * explicitly requested. + */ + 18: list additional_field_ids = [] + + // OBSOLETE 19: bool include_topic_labels = 0 + + /** + * Exclude user-reported tweets from this request. Only applicable if + * forUserId is set. + * + * Users can report individual tweets in the UI as uninteresting, spam, + * sensitive, or abusive. + */ + 20: bool exclude_reported = 0 + + // if set to true, disables suggested tweet visibility checks + // OBSOLETE (TwitterSuggestInfo version of suggested tweets has been removed) + 21: bool obsolete_skip_twitter_suggests_visibility_check = 0 + // OBSOLETE 22: optional set spam_signal_types + + /** + * Return the quoted tweet in GetTweetResult.quoted_tweet + */ + 23: bool include_quoted_tweet = 0 + + /** + * Content filtering policy that will be used to drop or suppress tweets + * from response. The filtering is based on the result of Visibility Library + * and does not affect filtering of tweets from blocked or non-followed protected users, see + * request field 15 bypass_visibility_filtering above + * + * If not specified SafetyLevel.FilterDefault will be used. + */ + 24: optional safety_level.SafetyLevel safety_level + + // obsolete 25: bool include_animated_gif_media_entities = 0 + 26: bool include_profile_geo_enrichment = 0 + // obsolete 27: optional set extensions + 28: bool include_tweet_pivots = 0 + + /** + * The argument passed to the Stratostore extension points mechanism. + */ + 29: optional binary extensions_args + + /** + * Return the number of times a tweet has been quoted in Tweet.counts.quote_count + */ + 30: bool include_quote_count = 0 + + /** + * Return media metadata from MediaInfoService in MediaEntity.additional_metadata + */ + 31: bool include_media_additional_metadata = 0 + + /** + * Populate the conversation_muted field of the Tweet for the requesting + * user. + * + * Setting this to true will have no effect unless for_user_id is set. + */ + 32: bool include_conversation_muted = 0 + + /** + * @deprecated go/sunsetting-carousels + */ + 33: bool include_carousels = 0 + + /** + * When enable_deleted_state is true and we have evidence that the + * tweet once existed and was deleted, Tweetypie returns + * StatusState.DELETED or StatusState.BOUNCE_DELETED. (See comments + * on StatusState for details on these two states.) + * + * When enable_deleted_state is false, deleted tweets are + * returned as StatusState.NOT_FOUND. + * + * Note: even when enable_deleted_state is true, a deleted tweet may + * still be returned as StatusState.NOT_FOUND due to eventual + * consistency. + * + * This option is false by default for compatibility with clients + * expecting StatusState.NOT_FOUND. + */ + 34: bool enable_deleted_state = 0 + + /** + * Populate the conversation_owner_id field of the Tweet for the requesting + * user. Which translate into is_conversation_owner in birdherd + * + */ + // obsolete 35: bool include_conversation_owner_id = 0 + + /** + * Populate the is_removed_from_conversation field of the Tweet for the requesting + * user. + * + */ + // obsolete 36: bool include_is_removed_from_conversation = 0 + + // To retrieve self-thread metadata request field Tweet.SelfThreadMetadataField + // obsolete 37: bool include_self_thread_info = 0 + + /** + * This option surfaces CardReference field (118) in Tweet thrift object. + * We use card_uri present in card reference, to get access to stored card information. + */ + 37: bool include_card_uri = 0 + + /** + * When returning a tweet that quotes another tweet, do not include + * the URL to the quoted tweet in the tweet text and url entities. + * This is intended for clients that use the quoted_tweet field of + * the tweet to display quoted tweets. + */ + 38: bool simple_quoted_tweet = 0 + + /** + * This flag is used and only take affect if the requested tweet is creatives container backed + * tweet. This will suprress the tweet materialization and return tweet not found. + * + * go/creatives-containers-tdd + **/ + 39: bool disable_tweet_materialization = 0 + + + /** + * Used for load shedding. If set to true, Tweetypie service might shed the request, if the service + * is struggling. + **/ + 40: optional bool is_request_sheddable + +}(hasPersonalData = 'true') + +struct GetTweetsRequest { + 1: required list tweet_ids (personalDataType = 'TweetId') + // @deprecated unused + 2: optional list source_tweet_id_hints (personalDataType = 'TweetId') + 3: optional GetTweetOptions options + // @deprecated unused + 4: optional list quoted_tweet_id_hints (personalDataType = 'TweetId') +}(hasPersonalData = 'true') + +/** + * Can be used to reference an arbitrary nested field of some struct via + * a list of field IDs describing the path of fields to reach the referenced + * field. + */ +struct FieldByPath { + 1: required list field_id_path +} + +struct GetTweetResult { + 1: required i64 tweet_id (personalDataType = 'TweetId') + + /** + * Indicates what happened when the tweet was loaded. + */ + 2: required StatusState tweet_state + + /** + * The requested tweet when tweet_state is `FOUND`, `PARTIAL`, or `SUPPRESS`. + * + * This field will be set if the tweet exists, access is authorized, + * and enough data about the tweet is available to materialize a + * tweet. When this field is set, you should look at the tweet_state + * field to determine how to treat this tweet. + * + * If tweet_state is FOUND, then this tweet is complete and passes the + * authorization checks requested in GetTweetOptions. (See + * GetTweetOptions.for_user_id for more information about authorization.) + * + * If tweet_state is PARTIAL, then enough data was available to return + * a tweet, but there was an error when loading the tweet that prevented + * some data from being returned (for example, if a request to the cards + * service times out when cards were requested, then the tweet will be + * marked PARTIAL). `missing_fields` indicates which parts of the tweet + * failed to load. When you receive a PARTIAL tweet, it is up to you + * whether to proceed with the degraded tweet data or to consider it a + * failure. For example, a mobile client might choose to display a + * PARTIAL tweet to the user, but not store it in an internal cache. + * + * If tweet_state is SUPPRESS, then the tweet is complete, but soft + * filtering is enabled. This state is intended to hide potentially + * harmful tweets from user's view while not taking away the option for + * the user to override our filtering decision. See http://go/rtf + * (render-time filtering) for more information about how to treat these + * tweets. + */ + 3: optional tweet.Tweet tweet + + /** + * The tweet fields that could not be loaded when tweet_state is `PARTIAL` + * or `SUPPRESS`. + * + * This field will be set when the `tweet_state` is `PARTIAL`, and may + * be set when `tweet_state` is SUPPRESS. It indicates degraded data in + * the `tweet`. Each entry in `missing_fields` indicates a traversal of + * the `Tweet` thrift object terminating at the field that is + * missing. For most non-core fields, the path will just be the field id + * of the field that is missing. + * + * For example, if card2 failed to load for a tweet, the `tweet_state` + * will be `PARTIAL`, the `tweet` field will be set, the Tweet's `card2` + * field will be empty, and this field will be set to: + * + * Set(FieldByPath(Seq(17))) + */ + 4: optional set missing_fields + + /** + * The original tweet when `tweet` is a retweet and + * GetTweetOptions.include_source_tweet is true. + */ + 5: optional tweet.Tweet source_tweet + + /** + * The retweet fields that could not be loaded when tweet_state is `PARTIAL`. + */ + 6: optional set source_tweet_missing_fields + + /** + * The quoted tweet when `tweet` is a quote tweet and + * GetTweetOptions.include_quoted_tweet is true. + */ + 7: optional tweet.Tweet quoted_tweet + + /** + * The quoted tweet fields that could not be loaded when tweet_state is `PARTIAL`. + */ + 8: optional set quoted_tweet_missing_fields + + /** + * The reason that a tweet should not be displayed when tweet_state is + * `SUPPRESS` or `DROP`. + */ + 9: optional safety_result.FilteredReason filtered_reason + + /** + * Hydrated carousel if the tweet contains a carousel URL and the + * GetTweetOptions.include_carousel is true. + * + * In this case Carousel Service is requested to hydrate the carousel, and + * the result stored in this field. + * + * @deprecated go/sunsetting-carousels + */ + 10: optional carousel_service.GetCarouselResult carousel_result + + /** + * If a quoted tweet would be present, but it was filtered out, then + * this field will be set to the reason that it was filtered. + */ + 11: optional safety_result.FilteredReason quoted_tweet_filtered_reason +}(hasPersonalData = 'true') + +union TweetInclude { + /** + * Field ID within the `Tweet` struct to include. All fields may be optionally included + * except for the `id` field. + */ + 1: FieldId tweetFieldId + + /** + * Field ID within the `StatusCounts` struct to include. Only specifically requested + * count fields will be included. Including any `countsFieldIds` values automatically + * implies including `Tweet.counts`. + * + */ + 2: FieldId countsFieldId + + /** + * Field ID within the `MediaEntity` struct to include. Currently, only `MediaEntity.additionalMetadata` + * may be optionally included (i.e., it will not be included by default if you include + * `tweetFieldId` = `Tweet.media` without also including `mediaEntityFieldId` = + * `MediaEntity.additionalMetadata`. Including any `mediaEntityFieldId` values automatically + * implies include `Tweet.media`. + */ + 3: FieldId mediaEntityFieldId +} + +/** + * An enumeration of policy options indicating how tweets should be filtered (protected tweets, blocked quote tweets, + * contributor data, etc.). This does not affect Visibility Library (http://go/vf) based filtering. + * This is equivalent to `bypass_visibility_filtering` in get_tweets() call. This means that + * `TweetVisibilityPolicy.NO_FILTERING` is equivalent to `bypass_visibility_filtering` = true + */ +enum TweetVisibilityPolicy { + /** + * only return tweets that should be visible to either the `forUserId` user, if specified, + * or from the perspective of a logged-out user if `forUserId` is not specified. This option + * should always be used if requesting data to be returned via the public API. + */ + USER_VISIBLE = 1, + + /** + * returns all tweets that can be found, regardless of user visibility. This option should + * never be used when gather data to be return in an API, and should only be used for internal + * processing. because this option allows access to potentially sensitive data, clients + * must be whitelisted to use it. + */ + NO_FILTERING = 2 +} + +struct GetTweetFieldsOptions { + /** + * Identifies which `Tweet` or nested fields to include in the response. + */ + 1: required set tweet_includes + + /** + * If true and the requested tweet is a retweet, then a `Tweet` + * containing the requested fields for the retweeted tweet will be + * included in the response. + */ + 2: bool includeRetweetedTweet = 0 + + /** + * If true and the requested tweet is a quote-tweet, then the quoted + * tweet will also be queried and the result for the quoted tweet + * included in `GetTweetFieldsResult.quotedTweetResult`. + */ + 3: bool includeQuotedTweet = 0 + + /** + * If true and the requested tweet contains a carousel URL, then the + * carousel will also be queried and the result for the carousel + * included in `GetTweetFieldsResult.carouselResult`. + * + * @deprecated go/sunsetting-carousels + */ + 4: bool includeCarousel = 0 + + /** + * If you are requesting tweets on behalf of a user, set this to their + * user id. The effect of setting this option is: + * + * - Tweetypie will return protected tweets that the user is allowed + * to access, rather than filtering out protected tweets, when `visibility_policy` + * is set to `USER_VISIBLE`. + * + * - If this field is set *and* `Tweet.perspective` is requested, then + * the tweets will have the `perspective` field set to a struct with + * flags that indicate whether the user has favorited, retweeted, or + * reported the tweet in question. + */ + 10: optional i64 forUserId (personalDataType = 'UserId') + + /** + * language_tag is used when hydrating a `Place` object, to get localized names. + * Also passed to the cards service, if cards are hydrated for the request. + */ + 11: optional string languageTag (personalDataType = 'InferredLanguage') + + /** + * if requesting card2 cards, you must specify the platform key + */ + 12: optional string cardsPlatformKey (personalDataType = 'PrivateTweetEntitiesAndMetadata, PublicTweetEntitiesAndMetadata') + + /** + * The argument passed to the Stratostore extension points mechanism. + */ + 13: optional binary extensionsArgs + + /** + * the policy to use when filtering tweets for basic visibility. + */ + 20: TweetVisibilityPolicy visibilityPolicy = TweetVisibilityPolicy.USER_VISIBLE + + /** + * Content filtering policy that will be used to drop or suppress tweets from response. + * The filtering is based on the result of Visibility Library (http://go/vf) + * and does not affect filtering of tweets from blocked or non-followed protected users, see + * request field 20 visibilityPolicy above + * + * If not specified SafetyLevel.FilterNone will be used. + */ + 21: optional safety_level.SafetyLevel safetyLevel + + /** + * The tweet result won't be cached by Tweetypie if doNotCache is true. + * You should set it as true if old tweets (older than 30 days) are requested, + * and they are unlikely to be requested again. + */ + 30: bool doNotCache = 0 + + /** + * When returning a tweet that quotes another tweet, do not include + * the URL to the quoted tweet in the tweet text and url entities. + * This is intended for clients that use the quoted_tweet field of + * the tweet to display quoted tweets. + * + */ + 31: bool simple_quoted_tweet = 0 + + /** + * This flag is used and only take affect if the requested tweet is creatives container backed + * tweet. This will suprress the tweet materialization and return tweet not found. + * + * go/creatives-containers-tdd + **/ + 32: bool disable_tweet_materialization = 0 + + /** + * Used for load shedding. If set to true, Tweetypie service might shed the request, if the service + * is struggling. + **/ + 33: optional bool is_request_sheddable +}(hasPersonalData = 'true') + +struct GetTweetFieldsRequest { + 1: required list tweetIds (personalDataType = 'TweetId') + 2: required GetTweetFieldsOptions options +} (hasPersonalData = 'true') + +/** + * Used in `TweetFieldsResultState` when the requested tweet is found. + */ +struct TweetFieldsResultFound { + 1: required tweet.Tweet tweet + + /** + * If `tweet` is a retweet, `retweetedTweet` will be the retweeted tweet. + * Just like with the requested tweet, only the requested fields will be + * hydrated and set on the retweeted tweet. + */ + 2: optional tweet.Tweet retweetedTweet + + /** + * If specified, then the tweet should be soft filtered. + */ + 3: optional safety_result.FilteredReason suppressReason +} + +/** + * Used in `TweetFieldsResultState` when the requested tweet is not found. + */ +struct TweetFieldsResultNotFound { + // If this field is true, then we know that the tweet once existed and + // has since been deleted. + 1: bool deleted = 0 + + // This tweet is deleted after being bounced for violating the Twitter + // Rules and should never be rendered or undeleted. see go/bounced-tweet + // In certain timelines we render a tombstone in its place. + 2: bool bounceDeleted = 0 + + // The reason that a tweet should not be displayed. See go/vf-tombstones-in-tweetypie + // Tweets that are not found do not going through Visibility Filtering rule evaluation and thus + // are not `TweetFieldsResultFiltered`, but may still have a filtered_reason that distinguishes + // whether the unavailable tweet should be tombstoned or hard-filtered based on the Safety Level. + 3: optional safety_result.FilteredReason filtered_reason +} + +struct TweetFieldsPartial { + 1: required TweetFieldsResultFound found + + /** + * The tweet fields that could not be loaded when hydration fails + * and a backend fails with an exception. This field is populated + * when a tweet is "partially" hydrated, i.e. some fields were + * successfully fetched while others were not. + * + * It indicates degraded data in the `tweet`. Each entry in `missing_fields` + * indicates a traversal of the `Tweet` thrift object terminating at + * the field that is missing. For most non-core fields, the path will + * just be the field id of the field that is missing. + * + * For example, if card2 failed to load for a tweet, the tweet is marked "partial", + * the `tweet` field will be set, the Tweet's `card2` + * field will be empty, and this field will be set to: + * + * Set(FieldByPath(Seq(17))) + */ + 2: required set missingFields + + /** + * Same as `missing_fields` but for the source tweet in case the requested tweet + * was a retweet. + */ + 3: required set sourceTweetMissingFields +} +/** + * Used in `TweetFieldsResultState` when there was a failure loading the requested tweet. + */ +struct TweetFieldsResultFailed { + /** + * If true, the failure was the result of backpressure, which means the request + * should not be immediately retried. It is safe to retry again later. + * + * If false, the failure is probably transient and safe to retry immediately. + */ + 1: required bool overCapacity + + /** + * An optional message about the cause of the failure. + */ + 2: optional string message + + /** + * This field is populated when some tweet fields fail to load and the + * tweet is marked "partial" in tweetypie. It contains the tweet/RT + * information along with the set of tweet fields that failed to + * get populated. + */ + 3: optional TweetFieldsPartial partial +} + +/** + * Used in `TweetFieldsResultState` when the requested tweet has been filtered out. + */ +struct TweetFieldsResultFiltered { + 1: required safety_result.FilteredReason reason +} + +/** + * A union of the different possible outcomes of a fetching a single tweet. + */ +union TweetFieldsResultState { + 1: TweetFieldsResultFound found + 2: TweetFieldsResultNotFound notFound + 3: TweetFieldsResultFailed failed + 4: TweetFieldsResultFiltered filtered +} + +/** + * The response to get_tweet_fields will include a TweetFieldsResultRow for each + * requested tweet id. + */ +struct GetTweetFieldsResult { + /** + * The id of the requested tweet. + */ + 1: required i64 tweetId (personalDataType = 'TweetId') + + /** + * the result for the requested tweet + */ + 2: required TweetFieldsResultState tweetResult + + /** + * If quoted-tweets were requested and the primary tweet was found, + * this field will contain the result state for the quoted tweeted. + */ + 3: optional TweetFieldsResultState quotedTweetResult + + /** + * If the primary tweet was found, carousels were requested and there + * was a carousel URL in the primary tweet, this field will contain the + * result for the carousel. + * + * @deprecated + */ + 4: optional carousel_service.GetCarouselResult carouselResult +} + +struct TweetCreateConversationControlByInvitation { + 1: optional bool invite_via_mention +} + +struct TweetCreateConversationControlCommunity { + 1: optional bool invite_via_mention +} + +struct TweetCreateConversationControlFollowers { + 1: optional bool invite_via_mention +} + +/** + * Specify limits on user participation in a conversation. + * + * This is a union rather than a struct to support adding conversation + * controls that require carrying metadata along with them, such as a list id. + * + * See also: + * Tweet.conversation_control + * PostTweetRequest.conversation_control + */ +union TweetCreateConversationControl { + 1: TweetCreateConversationControlCommunity community + 2: TweetCreateConversationControlByInvitation byInvitation + 3: TweetCreateConversationControlFollowers followers +} + +/* + * Specifies the exclusivity of a tweet + * This limits the audience of the tweet to the author + * and the author's super followers + * While empty now, we are expecting to add additional fields in v1+ + */ +struct ExclusiveTweetControlOptions {} + +struct TrustedFriendsControlOptions { + 1: i64 trusted_friends_list_id = 0 (personalDataType = 'TrustedFriendsListMetadata') +}(hasPersonalData = 'true') + +struct CollabInvitationOptions { + 1: required list collaborator_user_ids (personalDataType = 'UserId') + // Note: status not sent here, will be added in TweetBuilder to set all but author as PENDING +} + +struct CollabTweetOptions { + 1: required list collaborator_user_ids (personalDataType = 'UserId') +} + +union CollabControlOptions { + 1: CollabInvitationOptions collabInvitation + 2: CollabTweetOptions collabTweet +} + +/** + * When this struct is supplied, this PostTweetRequest is interpreted as + * an edit of the Tweet whose latest version is represented by previous_tweet_id. + * If this is the first edit of a Tweet, this will be the same as the initial_tweet_id. + **/ +struct EditOptions { + /** + * The ID of the previous latest version of the Tweet that is being edited. + * If this is the first edit, this will be the same as the initial_tweet_id. + **/ + 1: required i64 previous_tweet_id (personalDataType = 'TweetId') +} + +struct NoteTweetOptions { + /** + * The ID of the NoteTweet to be associated with this Tweet. + **/ + 1: required i64 note_tweet_id (personalDataType = 'TwitterArticleID') + // Deprecated + 2: optional list mentioned_screen_names (personalDataType = 'Username') + /** + * The user IDs of the mentioned users + **/ + 3: optional list mentioned_user_ids (personalDataType = 'UserId') + /** + * Specifies if the Tweet can be expanded into the NoteTweet, or if they have the same text + **/ + 4: optional bool is_expandable +} + +struct PostTweetRequest { + /** + * Id of the user creating the tweet. + */ + 1: required i64 user_id (personalDataType = 'UserId') + + /** + * The user-supplied text of the tweet. + */ + 2: required string text (personalDataType = 'PrivateTweets, PublicTweets') + + /** + * The OAuth client application from which the creation request originated. + * + * This must be in the format "oauth:". For requests + * from a user this is the application id of their client; for internal + * services this is the id of an associated application registered at + * https://apps.twitter.com. + */ + 3: required string created_via (personalDataType = 'ClientType') + + 4: optional i64 in_reply_to_tweet_id (personalDataType = 'TweetId') + 5: optional TweetCreateGeo geo + 6: optional list media_upload_ids (personalDataType = 'MediaId') + 7: optional tweet.Narrowcast narrowcast + + /** + * Do not deliver this tweet to a user's followers. + * + * When true this tweet will not be fanned out, appear in the user's + * timeline, or appear in search results. It will be distributed via the + * firehose and available in the public API. + * + * This is primarily used to create tweets that can be used as ads without + * broadcasting them to an advertiser's followers. + * + */ + 8: bool nullcast = 0 + + /** + * The impression id of the ad from which this tweet was created. + * + * This is set when a user retweets or replies to a promoted tweet. It is + * used to attribute the "earned" exposure of an advertisement. + */ + 9: optional i64 tracking_id (personalDataType = 'ImpressionId') + + /** + * @deprecated. + * TOO clients don't actively use this input param, and the v2 API does not plan + * to expose this parameter. The value associated with this field that's + * stored with a tweet is obtained from the user's account preferences stored in + * `User.safety.nsfw_user`. (See go/user.thrift for more details on this field) + * + * Field indicates whether a individual tweet may contain objectionable content. + * + * If specified, tweet.core_data.nsfw_user will equal this value (otherwise, + * tweet.core_data.nsfw_user will be set to user.nsfw_user). + */ + 10: optional bool possibly_sensitive + + /** + * Do not save, index, fanout, or otherwise persist this tweet. + * + * When true, the tweet is validated, created, and returned but is not + * persisted. This can be used for dark testing or pre-validating a tweet + * scheduled for later creation. + */ + 11: bool dark = 0 + + /** + * IP address of the user making the request. + * + * This is used for logging certain kinds of actions, like attempting to + * tweet malware urls. + */ + 12: optional string remote_host (personalDataType = 'IpAddress') + + /** + * Additional fields to write with this tweet. + * + * This Tweet object should contain only additional fields to write with + * this tweet. Additional fields are tweet fields with id > 100. Set + * tweet.id to be 0; the id will be generated by Tweetypie. Any other non- + * additional fields set on this tweet will be considered an invalid + * request. + * + */ + 14: optional tweet.Tweet additional_fields + + 15: optional WritePathHydrationOptions hydration_options + + // OBSOLETE 16: optional bool bypass_rate_limit_for_xfactor + + /** + * ID to explicitly identify a creation request for the purpose of rejecting + * duplicates. + * + * If two requests are received with the same uniqueness_id, then they will + * be considered duplicates of each other. This only applies for tweets + * created within the same datacenter. This id should be a snowflake id so + * that it's globally unique. + */ + 17: optional i64 uniqueness_id (personalDataType = 'PrivateTweetEntitiesAndMetadata, PublicTweetEntitiesAndMetadata') + + 18: optional feature_context.FeatureContext feature_context + + /** + * Passthrough data for Scarecrow that is used for safety checks. + */ + 19: optional safety_meta_data.SafetyMetaData safety_meta_data + + // OBSOLETE 20: bool community_narrowcast = 0 + + /** + * Toggle narrowcasting behavior for leading @mentions. + * + * If in_reply_to_tweet_id is not set: + * - When this flag is true and the tweet text starts with a leading mention then the tweet + * will be narrowcasted. + * + * If in_reply_to_tweet_id is set: + * - If auto_populate_reply_metadata is true + * - Setting this flag to true will use the default narrowcast determination logic where + * most replies will be narrowcast but some special-cases of self-replies will not. + * - Setting this flag to false will disable narrowcasting and the tweet will be fanned out + * to all the author's followers. Previously users prefixed their reply text with "." to + * achieve this effect. + * - If auto_populate_reply_metadata is false, this flag will control whether a leading + * mention in the tweet text will be narrowcast (true) or broadcast (false). + */ + 21: bool enable_tweet_to_narrowcasting = 1 + + /** + * Automatically populate replies with leading mentions from tweet text. + */ + 22: bool auto_populate_reply_metadata = 0 + + /** + * Metadata at the tweet-asset relationship level. + */ + 23: optional map media_metadata + + /** + * An optional URL that identifies a resource that is treated as an attachment of the + * the tweet, such as a quote-tweet permalink. + * + * When provided, it is appended to the end of the tweet text, but is not + * included in the visible_text_range. + */ + 24: optional string attachment_url (personalDataType = 'CardId, ShortUrl') + + /** + * Pass-through information to be published in `TweetCreateEvent`. + * + * This data is not persisted by Tweetypie. + * + * @deprecated prefer transient_context (see field 27) over this. + */ + 25: optional map additional_context + + /** + * Users to exclude from the automatic reply population behavior. + * + * When auto_populate_reply_metadata is true, screen names appearing in the + * mention prefix can be excluded by specifying a corresponding user id in + * exclude_reply_user_ids. Because the mention prefix must always include + * the leading mention to preserve directed-at addressing for the in-reply- + * to tweet author, attempting to exclude that user id will have no effect. + * Specifying a user id not in the prefix will be silently ignored. + */ + 26: optional list exclude_reply_user_ids (personalDataType = 'UserId') + + /** + * Used to pass structured data to Tweetypie and tweet_events eventbus + * stream consumers. This data is not persisted by Tweetypie. + * + * If adding a new passthrough field, prefer this over additional_context, + * as this is structured data, while additional_context is text data. + */ + 27: optional transient_context.TransientCreateContext transient_context + + /** + * Composer flow used to create this tweet. Unless using the News Camera (go/newscamera) + * flow, this should be `STANDARD`. + * + * When set to `CAMERA`, clients are expected to display the tweet with a different UI + * to emphasize attached media. + */ + 28: optional tweet.ComposerSource composer_source + + /** + * present if we want to restrict replies to this tweet (go/dont-at-me-api) + * - This gets converted to Tweet.conversation_control and changes type + * - This is only valid for conversation root tweets + * - This applies to all replies to this tweet + */ + 29: optional TweetCreateConversationControl conversation_control + + // OBSOLETE 30: optional jiminy.CreateNudgeOptions nudge_options + + /** + * Provided if the client wants to have the tweet create evaluated for a nudge (e.g. to notify + * the user that they are about to create a toxic tweet). Reference: go/docbird/jiminy + */ + 31: optional jiminy.CreateTweetNudgeOptions nudge_options + + /** + * Provided for correlating requests originating from REST endpoints and GraphQL endpoints. + * Its presence or absence does not affect Tweet mutation. It used for validation + * and debugging. The expected format is a 36 ASCII UUIDv4. + * Please see API specification at go/graphql-tweet-mutations for more information. + */ + 32: optional string comparison_id (personalDataType = 'UniversallyUniqueIdentifierUuid') + + /** + * Options that determine the shape of an exclusive tweet's restrictions. + * The existence of this object indicates that the tweet is intended to be an exclusive tweet + * While this is an empty structure for now, it will have fields added to it later in later versions. + */ + 33: optional ExclusiveTweetControlOptions exclusiveTweetControlOptions + + 34: optional TrustedFriendsControlOptions trustedFriendsControlOptions + + /** + * Provided if tweet data is backed up by a creative container, that at tweet hydration + * time, tweetypie would delegate to creative container service. + * + * go/creatives-containers-tdd + * Please note that this id is never publically shared with clients, its only used for + * internal purposes. + */ + 35: optional i64 underlying_creatives_container_id (personalDataType = 'TweetId') + + /** + * Provided if tweet is a CollabTweet or a CollabInvitation, along with a list of Collaborators + * which includes the original author. + * + * go/collab-tweets + **/ + 36: optional CollabControlOptions collabControlOptions + + /** + * When supplied, this PostTweetRequest is an edit. See [[EditOptions]] for more details. + **/ + 37: optional EditOptions editOptions + + /** + * When supplied, the NoteTweet specified is associated with the created Tweet. + **/ + 38: optional NoteTweetOptions noteTweetOptions +} (hasPersonalData = 'true') + +struct SetAdditionalFieldsRequest { + 1: required tweet.Tweet additional_fields +} + +struct DeleteAdditionalFieldsRequest { + 1: required list tweet_ids (personalDataType = 'TweetId') + 2: required list field_ids +}(hasPersonalData = 'true') + +struct DeleteTweetsRequest { + 1: required list tweet_ids (personalDataType = 'TweetId') + // DEPRECATED and moved to tweetypie_internal.thrift's CascadedDeleteTweetsRequest + 2: optional i64 cascaded_from_tweet_id (personalDataType = 'TweetId') + 3: optional tweet_audit.AuditDeleteTweet audit_passthrough + + /** + * The id of the user initiating this request. + * + * It could be either the owner of the tweet or an admin. If not specified + * we will use TwitterContext.userId. + */ + 4: optional i64 by_user_id (personalDataType = 'UserId') + + + /** + * Where these tweets are being deleted as part of a user erasure, the process + * of deleting tweets belonging to deactivated accounts. + * + * This lets backends optimize processing of mass deletes of tweets from the + * same user. Talk to the Tweetypie team before setting this flag. + */ + 5: bool is_user_erasure = 0 + + /** + * Id to compare with the user id of the tweets being deleted. + * + * This provides extra protection against accidental deletion of tweets. + * This is required when is_user_erasure is true. If any of the tweets + * specified in tweet_ids do not match expected_user_id a + * EXPECTED_USER_ID_MISMATCH state will be returned. + */ + 6: optional i64 expected_user_id (personalDataType = 'UserId') + + /** + * A bounced tweet is a tweet that has been found to violate Twitter Rules. + * This is represented as a tweet with its bounce_label field set. + * + * When the Tweet owner deletes their offending bounced tweet in the Bounced workflow, Bouncer + * will submit a delete request with `is_bounce_delete` set to true. If the tweet(s) being deleted + * have a bounce_label set, this request results in the tweet transitioning into the + * BounceDeleted state which means the tweet is partially deleted. + * + * Most of the normal tweet deletion side-effects occur but the tweet remains in a + * few tflock graphs, tweet cache, and a Manhattan marker is added. Other than timelines services, + * bounce deleted tweets are considered deleted and will return a StatusState.BounceDelete. + * + * After a defined grace period, tweets in this state will be fully deleted. + * + * If the tweet(s) being deleted do not have the bounce_label set, they will be deleted as usual. + * + * Other than Bouncer, no service should use `is_bounce_delete` flag. + */ + 7: bool is_bounce_delete = 0 + + /** + * This is a unique identifier used in both the REST and GraphQL-dark + * requests that will be used to correlate the GraphQL mutation requests to the REST requests + * during a transition period when clients will be moving toward tweet creation via GraphQL. + * See also, the "Comparison Testing" section at go/tweet-create-on-graphql-tdd for additional + * context. + */ + 8: optional string comparison_id (personalDataType = 'UniversallyUniqueIdentifierUuid') + + /** + * When an edited tweet is deleted via daemons, we take a different action + * than if it was deleted normally. If deleted normally, we delete the + * initial tweet in the chain. When deleted via daemons, we delete the actual tweet. + */ + 9: optional bool cascaded_edited_tweet_deletion +}(hasPersonalData = 'true') + +struct DeleteTweetResult { + 1: required i64 tweet_id (personalDataType = 'TweetId') + 2: required TweetDeleteState state +}(hasPersonalData = 'true') + +struct UnretweetResult { + /** + * Id of the retweet that was deleted if a retweet could be found. + */ + 1: optional i64 tweet_id (personalDataType = 'TweetId') + + 2: required TweetDeleteState state +}(hasPersonalData = 'true') + +struct PostTweetResult { + 1: required TweetCreateState state + + /** + * The created tweet when state is OK. + */ + 2: optional tweet.Tweet tweet + + /** + * The original tweet when state is OK and tweet is a retweet. + */ + 3: optional tweet.Tweet source_tweet + + /** + * The quoted tweet when state is OK and tweet is a quote tweet. + */ + 4: optional tweet.Tweet quoted_tweet + + /** + * The required user remediation from Scarecrow when state is BOUNCE. + */ + 5: optional bounce.Bounce bounce + + /** + * Additional information when TweetCreateState is not OK. + * + * Not all failures provide a reason. + */ + 6: optional string failure_reason + + // OBSOLETE 7: optional jiminy.Nudge nudge + + /** + * Returned when the state is NUDGE to indicate that the tweet has not been created, and that + * the client should instead display the nudge to the user. Reference: go/docbird/jiminy + */ + 8: optional jiminy.TweetNudge nudge +} (persisted = "true", hasPersonalData = "true") + +/** + * Specifies the cause of an AccessDenied error. + */ +enum AccessDeniedCause { + // obsolete: INVALID_CLIENT_ID = 0, + // obsolete: DEPRECATED = 1, + USER_DEACTIVATED = 2, + USER_SUSPENDED = 3, + + RESERVED_4 = 4, + RESERVED_5 = 5, + RESERVED_6 = 6 +} + +/** + * AccessDenied error is returned by delete_tweets endpoint when + * by_user_id is suspended or deactivated. + */ +exception AccessDenied { + 1: required string message + 2: optional AccessDeniedCause errorCause +} + +struct UndeleteTweetRequest { + 1: required i64 tweet_id (personalDataType = 'TweetId') + 2: optional WritePathHydrationOptions hydration_options + + /** + * Perform the side effects of undeletion even if the tweet is not deleted. + * + * This flag is useful if you know that the tweet is present in Manhattan + * but is not undeleted with respect to other services. + */ + 3: optional bool force +}(hasPersonalData = 'true') + +struct UndeleteTweetResponse { + 1: required UndeleteTweetState state + 2: optional tweet.Tweet tweet +} + +struct EraseUserTweetsRequest { + 1: required i64 user_id (personalDataType = 'UserId') +}(hasPersonalData = 'true') + +struct UnretweetRequest { + /** + * The id of the user who owns the retweet. + */ + 1: required i64 user_id (personalDataType = 'UserId') + + /** + * The source tweet that should be unretweeted. + */ + 2: required i64 source_tweet_id (personalDataType = 'TweetId') + + /** + * This is a unique identifier used in both the REST and GraphQL-dark + * requests that will be used to correlate the GraphQL mutation requests to the REST requests + * during a transition period when clients will be moving toward tweet creation via GraphQL. + * See also, the "Comparison Testing" section at go/tweet-create-on-graphql-tdd for additional + * context. + */ + 3: optional string comparison_id (personalDataType = 'UniversallyUniqueIdentifierUuid') +}(hasPersonalData = 'true') + +struct GetDeletedTweetsRequest { + 1: required list tweetIds (personalDataType = 'TweetId') +}(hasPersonalData = 'true') + +struct GetDeletedTweetResult { + 1: required i64 tweetId (personalDataType = 'TweetId') + 2: required DeletedTweetState state + 4: optional deleted_tweet.DeletedTweet tweet +}(hasPersonalData = 'true') + +/** + * Flushes tweets and/or their counts from cache. + * + * Typically will be used manually for testing or when a particular problem is + * found that needs to be fixed by hand. Defaults to flushing both tweet + * struct and associated counts. + */ +struct FlushRequest { + 1: required list tweet_ids (personalDataType = 'TweetId') + 2: bool flushTweets = 1 + 3: bool flushCounts = 1 +}(hasPersonalData = 'true') + +/** + * A request to retrieve counts for one or more tweets. + */ +struct GetTweetCountsRequest { + 1: required list tweet_ids (personalDataType = 'TweetId') + 2: bool include_retweet_count = 0 + 3: bool include_reply_count = 0 + 4: bool include_favorite_count = 0 + 5: bool include_quote_count = 0 + 6: bool include_bookmark_count = 0 +}(hasPersonalData = 'true') + +/** + * A response optionally indicating one or more counts for a tweet. + */ +struct GetTweetCountsResult { + 1: required i64 tweet_id (personalDataType = 'TweetId') + 2: optional i64 retweet_count (personalDataType = 'CountOfPrivateRetweets, CountOfPublicRetweets') + 3: optional i64 reply_count (personalDataType = 'CountOfPrivateReplies, CountOfPublicReplies') + 4: optional i64 favorite_count (personalDataType = 'CountOfPrivateLikes, CountOfPublicLikes') + 5: optional i64 quote_count (personalDataType = 'CountOfPrivateRetweets, CountOfPublicRetweets') + 6: optional i64 bookmark_count (personalDataType = 'CountOfPrivateLikes') +}(hasPersonalData = 'true') + +/** + * A request to increment the cached favorites count for a tweet. + * + * Negative values decrement the count. This request is automatically + * replicated to other data centers. + */ +struct IncrTweetFavCountRequest { + 1: required i64 tweet_id (personalDataType = 'TweetId') + 2: required i32 delta (personalDataType = 'CountOfPrivateLikes, CountOfPublicLikes') +}(hasPersonalData = 'true') + +/** + * A request to increment the cached bookmarks count for a tweet. + * + * Negative values decrement the count. This request is automatically + * replicated to other data centers. + */ +struct IncrTweetBookmarkCountRequest { + 1: required i64 tweet_id (personalDataType = 'TweetId') + 2: required i32 delta (personalDataType = 'CountOfPrivateLikes') +}(hasPersonalData = 'true') + +/** + * Request to scrub geolocation from 1 or more tweets, and replicates to other + * data centers. + */ +struct GeoScrub { + 1: required list status_ids (personalDataType = 'TweetId') + // OBSOLETE 2: bool write_through = 1 + 3: bool hosebird_enqueue = 0 + 4: i64 user_id = 0 (personalDataType = 'UserId') // should always be set for hosebird enqueue +}(hasPersonalData = 'true') + +/** + * Contains different indicators of a tweets "nsfw" status. + */ +struct NsfwState { + 1: required bool nsfw_user + 2: required bool nsfw_admin + 3: optional safety_label.SafetyLabel nsfw_high_precision_label + 4: optional safety_label.SafetyLabel nsfw_high_recall_label +} + +/** + * Interface to Tweetypie + */ +service TweetService { + /** + * Performs a multi-get of tweets. This endpoint is geared towards fetching + * tweets for the API, with many fields returned by default. + * + * The response list is ordered the same as the requested ids list. + */ + list get_tweets(1: GetTweetsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs a multi-get of tweets. This endpoint is geared towards internal + * processing that needs only specific subsets of the data. + * + * The response list is ordered the same as the requested ids list. + */ + list get_tweet_fields(1: GetTweetFieldsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Execute a {@link GetTweetCountsRequest} and return one or more {@link GetTweetCountsResult} + */ + list get_tweet_counts(1: GetTweetCountsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Set/Update additional fields on an existing tweet + */ + void set_additional_fields(1: SetAdditionalFieldsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Delete additional fields on a tweet + */ + void delete_additional_fields(1: DeleteAdditionalFieldsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Creates and saves a tweet. + * + * URLs contained in the text will be shortened via Talon. Validations that are + * handled by this endpoint include: + * + * - tweet length not greater than 140 display characters, after URL shortening; + * - tweet is not a duplicate of a recently created tweet by the same user; + * - user is not suspended or deactivated; + * - text does not contain malware urls, as determined by talon; + * + * Checks that are not handled here that should be handled by the web API: + * - oauth authentication; + * - client application has narrowcasting/nullcasting privileges; + */ + PostTweetResult post_tweet(1: PostTweetRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Creates and saves a retweet. + * + * Validations that are handled by this endpoint include: + * + * - source tweet exists; + * - source-tweet user exists and is not suspended or deactivated; + * - source-tweet user is not blocking retweeter; + * - user has not already retweeted the source tweet; + * + * Checks that are not handled here that should be handled by the web API: + * - oauth authentication; + * - client application has narrowcasting/nullcasting privileges; + */ + PostTweetResult post_retweet(1: RetweetRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Remove tweets. It removes all associated fields of the tweets in + * cache and the persistent storage. + */ + list delete_tweets(1: DeleteTweetsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error, + 3: AccessDenied access_denied) + + /** + * Restore a deleted Tweet. + * + * Tweets exist in a soft-deleted state for N days during which they can be + * restored by support agents following the internal restoration guidelines. + * If the undelete succeeds, the Tweet is given similar treatment to a new + * tweet e.g inserted into cache, sent to the timeline service, reindexed by + * TFlock etc. + */ + UndeleteTweetResponse undelete_tweet(1: UndeleteTweetRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Add or remove takedown countries associated with a Tweet. + */ + void takedown(1: TakedownRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Set or unset the nsfw_admin and/or nsfw_user bit of tweet.core_data. + **/ + void update_possibly_sensitive_tweet(1: UpdatePossiblySensitiveTweetRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error + ) + + /** + * Delete all tweets for a given user. Currently only called by Test User Service, but we + * can also use it ad-hoc. + * + * Note: regular user erasure is handled by the EraseUserTweets daemon. + */ + void erase_user_tweets(1: EraseUserTweetsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Unretweet a given tweet. + * + * There are two ways to unretweet: + * - call deleteTweets() with the retweetId + * - call unretweet() with the retweeter userId and sourceTweetId + * + * This is useful if you want to be able to undo a retweet without having to + * keep track of a retweetId. + */ + UnretweetResult unretweet(1: UnretweetRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Get tweet content and deletion times for soft-deleted tweets. + * + * The response list is ordered the same as the requested ids list. + */ + list get_deleted_tweets(1: GetDeletedTweetsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Execute a {@link FlushRequest} + */ + void flush(1: FlushRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Execute an {@link IncrTweetFavCountRequest} + */ + void incr_tweet_fav_count(1: IncrTweetFavCountRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Execute an {@link IncrTweetBookmarkCountRequest} + */ + void incr_tweet_bookmark_count(1: IncrTweetBookmarkCountRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Delete location data from all of a user's tweets. + * + * This endpoint initiates the process of deleting the user's location data + * from all of their tweets, as well as clearing the has_geotagged_statuses + * flag of the user. This method returns as soon as the event is enqueued, + * but the location data won't be scrubbed until the event is processed. + * Usually the latency for the whole process to complete is small, but it + * could take up to a couple of minutes if the user has a very large number + * of tweets, or if the request gets backed up behind other requests that + * need to scrub a large number of tweets. + * + * The event is processed by the Tweetypie geoscrub daemon. + * + */ + void delete_location_data(1: DeleteLocationDataRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Execute a {@link GeoScrub} request. + * + */ + void scrub_geo(1: GeoScrub geo_scrub) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) +} diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_federated.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_federated.thrift new file mode 100644 index 000000000..dd69a3299 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_federated.thrift @@ -0,0 +1,32 @@ +namespace java com.twitter.tweetypie.thriftjava.federated +#@namespace scala com.twitter.tweetypie.thriftscala.federated +#@namespace strato com.twitter.tweetypie.federated + +include "com/twitter/tweetypie/stored_tweet_info.thrift" + +typedef i16 FieldId + +struct GetStoredTweetsView { + 1: bool bypass_visibility_filtering = 0 + 2: optional i64 for_user_id + 3: list additional_field_ids = [] +} + +struct GetStoredTweetsResponse { + 1: stored_tweet_info.StoredTweetInfo stored_tweet +} + +struct GetStoredTweetsByUserView { + 1: bool bypass_visibility_filtering = 0 + 2: bool set_for_user_id = 0 + 3: optional i64 start_time_msec + 4: optional i64 end_time_msec + 5: optional i64 cursor + 6: bool start_from_oldest = 0 + 7: list additional_field_ids = [] +} + +struct GetStoredTweetsByUserResponse { + 1: required list stored_tweets + 2: optional i64 cursor +} diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_graphql.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_graphql.thrift new file mode 100644 index 000000000..3aa0ada82 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_graphql.thrift @@ -0,0 +1,391 @@ +namespace java com.twitter.tweetypie.thriftjava.graphql +#@namespace scala com.twitter.tweetypie.thriftscala.graphql +#@namespace strato com.twitter.tweetypie.graphql + +/** + * Reasons for defining "prefetch" structs: + * i) It enables GraphQL prefetch caching + * ii) All tweet mutation operations are defined to support prefetch caching for API consistency + * and future flexibility. (Populating the cache with VF results being a potential use case.) + */ +include "com/twitter/ads/callback/engagement_request.thrift" +include "com/twitter/strato/graphql/existsAndPrefetch.thrift" + +struct UnretweetRequest { + /** + * Tweet ID of the source tweet being referenced in the unretweet. + * Note: The retweet_id isn't being passed here as it will result in a + * successful response, but won't have any effect. This is due to + * how Tweetypie's unretweet endpoint works. + */ + 1: required i64 source_tweet_id ( + strato.json.numbers.type='string', + strato.description='The source tweet to be unretweeted.' + ) + 2: optional string comparison_id ( + strato.description='Correlates requests originating from REST endpoints and GraphQL endpoints.' + ) +} (strato.graphql.typename='UnretweetRequest') + +struct UnretweetResponse { + /** + * The response contains the source tweet's ID being unretweeted. + * Reasons for this: + * i) The operation should return a non-void response to retain consistency + * with other tweet mutation APIs. + * ii) The response struct should define at least one field due to requirements + * of the GraphQL infrastructure. + * iii) This allows the caller to hydrate the source tweet if required and request + * updated counts on the source tweet if desired. (since this operation decrements + * the source tweet's retweet count) + */ + 1: optional i64 source_tweet_id ( + strato.space='Tweet', + strato.graphql.fieldname='source_tweet', + strato.description='The source tweet that was unretweeted.' + ) +} (strato.graphql.typename='UnretweetResponse') + +struct UnretweetResponseWithSubqueryPrefetchItems { + 1: optional UnretweetResponse data + 2: optional existsAndPrefetch.PrefetchedData subqueryPrefetchItems +} + + +struct CreateRetweetRequest { + 1: required i64 tweet_id (strato.json.numbers.type='string') + + // @see com.twitter.tweetypie.thriftscala.PostTweetRequest.nullcast + 2: bool nullcast = 0 ( + strato.description='Do not deliver this retweet to a user\'s followers. http://go/nullcast' + ) + + // @see com.twitter.ads.callback.thriftscala.EngagementRequest + 3: optional engagement_request.EngagementRequest engagement_request ( + strato.description='The ad engagement from which this retweet was created.' + ) + + // @see com.twitter.tweetypie.thriftscala.PostTweetRequest.PostTweetRequest.comparison_id + 4: optional string comparison_id ( + strato.description='Correlates requests originating from REST endpoints and GraphQL endpoints. UUID v4 (random) 36 character string.' + ) +} (strato.graphql.typename='CreateRetweetRequest') + +struct CreateRetweetResponse { + 1: optional i64 retweet_id ( + strato.space='Tweet', + strato.graphql.fieldname='retweet', + strato.description='The created retweet.' + ) +} (strato.graphql.typename='CreateRetweetResponse') + +struct CreateRetweetResponseWithSubqueryPrefetchItems { + 1: optional CreateRetweetResponse data + 2: optional existsAndPrefetch.PrefetchedData subqueryPrefetchItems +} + +struct TweetReply { + //@see com.twitter.tweetypie.thriftscala.PostTweetRequest.in_reply_to_tweet_id + 1: i64 in_reply_to_tweet_id ( + strato.json.numbers.type='string', + strato.description='The id of the tweet that this tweet is replying to.' + ) + //@see com.twitter.tweetypie.thriftscala.PostTweetRequest.exclude_reply_user_ids + 2: list exclude_reply_user_ids = [] ( + strato.json.numbers.type='string', + strato.description='Screen names appearing in the mention prefix can be excluded. Because the mention prefix must always include the leading mention to preserve directed-at addressing for the in-reply-to tweet author, attempting to exclude that user id will have no effect. Specifying a user id not in the prefix will be silently ignored.' + ) +} (strato.graphql.typename='TweetReply') + +struct TweetMediaEntity { + // @see com.twitter.tweetypie.thriftscala.PostTweetRequest.media_upload_ids + 1: i64 media_id ( + strato.json.numbers.type='string', + strato.description='Media id as obtained from the User Image Service when uploaded.' + ) + + // @see com.twitter.tweetypie.thriftscala.Tweet.media_tags + 2: list tagged_users = [] ( + strato.json.numbers.type='string', + strato.description='List of user_ids to tag in this media entity. Requires Client App Privelege MEDIA_TAGS. Contributors (http://go/teams) are not supported. Tags are silently dropped when unauthorized.' + ) +} (strato.graphql.typename='TweetMediaEntity') + +struct TweetMedia { + 1: list media_entities = [] ( + strato.description='You may include up to 4 photos or 1 animated GIF or 1 video in a Tweet.' + ) + + /** + * @deprecated @see com.twitter.tweetypie.thriftscala.PostTweetRequest.possibly_sensitive for + * more details on why this field is ignored. + */ + 2: bool possibly_sensitive = 0 ( + strato.description='Mark this tweet as possibly containing objectionable media.' + ) +} (strato.graphql.typename='TweetMedia') + +//This is similar to the APITweetAnnotation struct except that here all the id fields are required. +struct TweetAnnotation { + 1: i64 group_id (strato.json.numbers.type='string') + 2: i64 domain_id (strato.json.numbers.type='string') + 3: i64 entity_id (strato.json.numbers.type='string') +} (strato.graphql.typename='TweetAnnotation', strato.case.format='preserve') + +struct TweetGeoCoordinates { + 1: double latitude (strato.description='The latitude of the location this Tweet refers to. The valid range for latitude is -90.0 to +90.0 (North is positive) inclusive.') + 2: double longitude (strato.description='The longitude of the location this Tweet refers to. The valid range for longitude is -180.0 to +180.0 (East is positive) inclusive.') + 3: bool display_coordinates = 1 (strato.description='Whether or not make the coordinates public. When false, geo coordinates are persisted with the Tweet but are not shared publicly.') +} (strato.graphql.typename='TweetGeoCoordinates') + +struct TweetGeo { + 1: optional TweetGeoCoordinates coordinates ( + strato.description='The geo coordinates of the location this Tweet refers to.' + ) + 2: optional string place_id ( + strato.description='A place in the world. See also https://developer.twitter.com/en/docs/twitter-api/v1/data-dictionary/object-model/geo#place' + ) + 3: optional string geo_search_request_id ( + strato.description='See https://confluence.twitter.biz/display/GEO/Passing+the+geo+search+request+ID' + ) +} ( + strato.graphql.typename='TweetGeo', + strato.description='Tweet geo location metadata. See https://developer.twitter.com/en/docs/twitter-api/v1/data-dictionary/object-model/geo' +) + +enum BatchComposeMode { + BATCH_FIRST = 1 (strato.description='This is the first Tweet in a batch.') + BATCH_SUBSEQUENT = 2 (strato.description='This is any of the subsequent Tweets in a batch.') +}( + strato.graphql.typename='BatchComposeMode', + strato.description='Indicates whether a Tweet was created using a batch composer, and if so position of a Tweet within the batch. A value of None, indicates that the tweet was not created in a batch. More info: go/batchcompose.' +) + +/** + * Conversation Controls + * See also: + * tweet.thrift/Tweet.conversation_control + * tweet_service.thrift/TweetCreateConversationControl + * tweet_service.thrift/PostTweetRequest.conversation_control + * + * These types are isomorphic/equivalent to tweet_service.thrift/TweetCreateConversationControl* to + * avoid exposing internal service thrift types. + */ +enum ConversationControlMode { + BY_INVITATION = 1 (strato.description='Users that the conversation owner mentions by @screenname in the tweet text are invited.') + COMMUNITY = 2 (strato.description='The conversation owner, invited users, and users who the conversation owner follows can reply.') +} ( + strato.graphql.typename='ConversationControlMode' +) + +struct TweetConversationControl { + 1: ConversationControlMode mode +} ( + strato.graphql.typename='TweetConversationControl', + strato.description='Specifies limits on user participation in a conversation. See also http://go/dont-at-me. Up to one value may be provided. (Conceptually this is a union, however graphql doesn\'t support union types as inputs.)' +) + +// empty for now, but intended to be populated in later iterations of the super follows project. +struct ExclusiveTweetControlOptions {} ( + strato.description='Marks a tweet as exclusive. See go/superfollows.', + strato.graphql.typename='ExclusiveTweetControlOptions', +) + +struct EditOptions { + 1: optional i64 previous_tweet_id (strato.json.numbers.type='string', strato.description='previous Tweet id') +} ( + strato.description='Edit options for a Tweet.', + strato.graphql.typename='EditOptions', +) + +struct TweetPeriscopeContext { + 1: bool is_live = 0 ( + strato.description='Indicates if the tweet contains live streaming video. A value of false is equivalent to this struct being undefined in the CreateTweetRequest.' + ) + + // Note that the REST API also defines a context_periscope_creator_id param. The GraphQL + // API infers this value from the TwitterContext Viewer.userId since it should always be + // the same as the Tweet.coreData.userId which is also inferred from Viewer.userId. +} ( + strato.description='Specifies information about live video streaming. Note that the Periscope product was shut down in March 2021, however some live video streaming features remain in the Twitter app. This struct keeps the Periscope naming convention to retain parity and traceability to other areas of the codebase that also retain the Periscope name.', + strato.graphql.typename='TweetPeriscopeContext', +) + +struct TrustedFriendsControlOptions { + 1: required i64 trusted_friends_list_id ( + strato.json.numbers.type='string', + strato.description='The ID of the Trusted Friends List whose members can view this tweet.' + ) +} ( + strato.description='Specifies information for a Trusted Friends tweet. See go/trusted-friends', + strato.graphql.typename='TrustedFriendsControlOptions', +) + +enum CollabControlType { + COLLAB_INVITATION = 1 (strato.description='This represents a CollabInvitation.') + // Note that a CollabTweet cannot be created through external graphql request, + // rather a user can create a CollabInvitation (which is automatically nullcasted) and a + // public CollabTweet will be created when all Collaborators have accepted the CollabInvitation, + // triggering a strato column to instantiate the CollabTweet directly +}( + strato.graphql.typename='CollabControlType', +) + +struct CollabControlOptions { + 1: required CollabControlType collabControlType + 2: required list collaborator_user_ids ( + strato.json.numbers.type='string', + strato.description='A list of user ids representing all Collaborators on a CollabTweet or CollabInvitation') +}( + strato.graphql.typename='CollabControlOptions', + strato.description='Specifies information about a CollabTweet or CollabInvitation (a union is used to ensure CollabControl defines one or the other). See more at go/collab-tweets.' +) + +struct NoteTweetOptions { + 1: required i64 note_tweet_id ( + strato.json.numbers.type='string', + strato.description='The ID of the Note Tweet that has to be associated with the created Tweet.') + // Deprecated + 2: optional list mentioned_screen_names ( + strato.description = 'Screen names of the users mentioned in the NoteTweet. This is used to set conversation control on the Tweet.') + + 3: optional list mentioned_user_ids ( + strato.description = 'User ids of mentioned users in the NoteTweet. This is used to set conversation control on the Tweet, send mentioned user ids to TLS' + ) + 4: optional bool is_expandable ( + strato.description = 'Specifies if the Tweet can be expanded into the NoteTweet, or if they have the same text' + ) +} ( + strato.graphql.typename='NoteTweetOptions', + strato.description='Note Tweet options for a Tweet.' +) + +// NOTE: Some clients were using the dark_request directive in GraphQL to signal that a Tweet should not be persisted +// but this is not recommended, since the dark_request directive is not meant to be used for business logic. +struct UndoOptions { + 1: required bool is_undo ( + strato.description='Set to true if the Tweet is undo-able. Tweetypie will process the Tweet but will not persist it.' + ) +} ( + strato.graphql.typename='UndoOptions' +) + +struct CreateTweetRequest { + 1: string tweet_text = "" ( + strato.description='The user-supplied text of the tweet. Defaults to empty string. Leading & trailing whitespace are trimmed, remaining value may be empty if and only if one or more media entity ids are also provided.' + ) + + // @see com.twitter.tweetypie.thriftscala.PostTweetRequest.nullcast + 2: bool nullcast = 0 ( + strato.description='Do not deliver this tweet to a user\'s followers. http://go/nullcast' + ) + + // @see com.twitter.tweetypie.thriftscala.PostTweetRequest.PostTweetRequest.comparison_id + 3: optional string comparison_id ( + strato.description='Correlates requests originating from REST endpoints and GraphQL endpoints. UUID v4 (random) 36 character string.' + ) + + // @see com.twitter.ads.callback.thriftscala.EngagementRequest + 4: optional engagement_request.EngagementRequest engagement_request ( + strato.description='The ad engagement from which this tweet was created.' + ) + + // @see com.twitter.tweetypie.thriftscala.PostTweetRequest.attachment_url + 5: optional string attachment_url ( + strato.description='Tweet permalink (i.e. Quoted Tweet) or Direct Message deep link. This URL is not included in the visible_text_range.' + ) + + // @see com.twitter.tweetypie.thriftscala.Tweet.card_reference + 6: optional string card_uri ( + strato.description='Link to the card to associate with a tweet.' + ) + + 7: optional TweetReply reply ( + strato.description='Reply parameters.' + ) + + 8: optional TweetMedia media ( + strato.description='Media parameters.' + ) + + 9: optional list semantic_annotation_ids ( + strato.description='Escherbird Annotations.' + ) + + 10: optional TweetGeo geo ( + strato.description='Tweet geo location metadata. See https://developer.twitter.com/en/docs/twitter-api/v1/data-dictionary/object-model/geo' + ) + + 11: optional BatchComposeMode batch_compose ( + strato.description='Batch Compose Mode. See go/batchcompose' + ) + + 12: optional ExclusiveTweetControlOptions exclusive_tweet_control_options ( + strato.description='When defined, this tweet will be marked as exclusive. Leave undefined to signify a regular, non-exclusive tweet. See go/superfollows.' + ) + + 13: optional TweetConversationControl conversation_control ( + strato.description='Restrict replies to this tweet. See http://go/dont-at-me-api. Only valid for conversation root tweets. Applies to all replies to this tweet.' + ) + + 14: optional TweetPeriscopeContext periscope ( + strato.description='Specifies information about live video streaming. Note that the Periscope product was shut down in March 2021, however some live video streaming features remain in the Twitter app. This struct keeps the Periscope naming convention to retain parity and traceability to other areas of the codebase that also retain the Periscope name. Note: A value of periscope.isLive=false is equivalent to this struct being left undefined.' + ) + + 15: optional TrustedFriendsControlOptions trusted_friends_control_options ( + strato.description='Trusted Friends parameters.' + ) + + 16: optional CollabControlOptions collab_control_options ( + strato.description='Collab Tweet & Collab Invitation parameters.' + ) + + 17: optional EditOptions edit_options ( + strato.description='when defined, this tweet will be marked as an edit of the tweet represented by previous_tweet_id in edit_options.' + ) + + 18: optional NoteTweetOptions note_tweet_options ( + strato.description='The Note Tweet that is to be associated with the created Tweet.', + strato.graphql.skip='true' + ) + + 19: optional UndoOptions undo_options ( + strato.description='If the user has Undo Tweets enabled, the Tweet is created so that it can be previewed by the client but is not persisted.', + ) +} (strato.graphql.typename='CreateTweetRequest') + +struct CreateTweetResponse { + 1: optional i64 tweet_id ( + strato.space='Tweet', + strato.graphql.fieldname='tweet', + strato.description='The created tweet.' + ) +} (strato.graphql.typename='CreateTweetResponse') + +struct CreateTweetResponseWithSubqueryPrefetchItems { + 1: optional CreateTweetResponse data + 2: optional existsAndPrefetch.PrefetchedData subqueryPrefetchItems +} + +// Request struct, ResponseStruct, ResponseWithPrefetchStruct +struct DeleteTweetRequest { + 1: required i64 tweet_id (strato.json.numbers.type='string') + + // @see com.twitter.tweetypie.thriftscala.PostTweetRequest.PostTweetRequest.comparison_id + 2: optional string comparison_id ( + strato.description='Correlates requests originating from REST endpoints and GraphQL endpoints. UUID v4 (random) 36 character string.' + ) +} (strato.graphql.typename='DeleteTweetRequest') + +struct DeleteTweetResponse { + 1: optional i64 tweet_id ( + strato.space='Tweet', + strato.graphql.fieldname='tweet', + strato.description='The deleted Tweet. Since the Tweet will always be not found after deletion, the TweetResult will always be empty.' + ) +} (strato.graphql.typename='DeleteTweetResponse') + +struct DeleteTweetResponseWithSubqueryPrefetchItems { + 1: optional DeleteTweetResponse data + 2: optional existsAndPrefetch.PrefetchedData subqueryPrefetchItems +} diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/unmentions/unmentions.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/unmentions/unmentions.thrift new file mode 100644 index 000000000..b01ac21d3 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/unmentions/unmentions.thrift @@ -0,0 +1,9 @@ +namespace java com.twitter.tweetypie.unmentions.thriftjava +#@ namespace scala com.twitter.tweetypie.unmentions.thriftscala +#@ namespace strato com.twitter.tweetypie.unmentions +namespace py gen.twitter.tweetypie.unmentions + +struct UnmentionData { + 1: optional i64 conversationId (personalDataType = 'TweetConversationId') + 2: optional list mentionedUsers (personalDataType = 'UserId') +} (strato.space = "Unmentions", persisted='true', hasPersonalData = 'true') \ No newline at end of file diff --git a/tweetypie/server/BUILD b/tweetypie/server/BUILD new file mode 100644 index 000000000..2b17d8a49 --- /dev/null +++ b/tweetypie/server/BUILD @@ -0,0 +1,16 @@ +target( + tags = ["bazel-compatible"], + dependencies = [ + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/config", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/handler", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/media", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/repository", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/service", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/store", + ], +) \ No newline at end of file diff --git a/tweetypie/server/README.md b/tweetypie/server/README.md new file mode 100644 index 000000000..49d3c7ef7 --- /dev/null +++ b/tweetypie/server/README.md @@ -0,0 +1,45 @@ +# Tweetypie + +## Overview + +Tweetypie is the core Tweet service that handles the reading and writing of Tweet data. It is called by the Twitter clients (through GraphQL), as well as various internal Twitter services, to fetch, create, delete, and edit Tweets. Tweetypie calls several backends to hydrate Tweet related data to return to callers. + +## How It Works + +The next sections describe the layers involved in the read and create paths for Tweets. + +### Read Path + +In the read path, Tweetypie fetches the Tweet data from [Manhattan](https://blog.twitter.com/engineering/en_us/a/2014/manhattan-our-real-time-multi-tenant-distributed-database-for-twitter-scale) or [Twemcache](https://blog.twitter.com/engineering/en_us/a/2012/caching-with-twemcache), and hydrates data about the Tweet from various other backend services. + +#### Relevant Packages + +- [backends](src/main/scala/com/twitter/tweetypie/backends/): A "backend" is a wrapper around a thrift service that Tweetypie calls. For example [Talon.scala](src/main/scala/com/twitter/tweetypie/backends/Talon.scala) is the backend for Talon, the URL shortener. +- [repository](src/main/scala/com/twitter/tweetypie/repository/): A "repository" wraps a backend and provides a structured interface for retrieving data from the backend. [UrlRepository.scala](src/main/scala/com/twitter/tweetypie/repository/UrlRepository.scala) is the repository for the Talon backend. +- [hydrator](src/main/scala/com/twitter/tweetypie/hydrator/): Tweetypie doesn't store all the data associated with Tweets. For example, it doesn't store User objects, but it stores screennames in the Tweet text (as mentions). It stores media IDs, but it doesn't store the media metadata. Hydrators take the raw Tweet data from Manhattan or Cache and return it with some additional information, along with hydration metadata that says whether the hydration took place. This information is usually fetched using a repository. For example, during the hydration process, the [UrlEntityHydrator](src/main/scala/com/twitter/tweetypie/hydrator/UrlEntityHydrator.scala) calls Talon using the [UrlRepository](src/main/scala/com/twitter/tweetypie/repository/UrlRepository.scala) and fetches the expanded URLs for the t.co links in the Tweet. +- [handler](src/main/scala/com/twitter/tweetypie/handler/): A handler is a function that handles requests to one of the Tweetypie endpoints. The [GetTweetsHandler](src/main/scala/com/twitter/tweetypie/handler/GetTweetsHandler.scala) handles requests to `get_tweets`, one of the endpoints used to fetch Tweets. + +#### Through the Read Path + +At a high level, the path a `get_tweets` request takes is as follows. + +- The request is handled by [GetTweetsHandler](src/main/scala/com/twitter/tweetypie/handler/GetTweetsHandler.scala). +- GetTweetsHandler uses the TweetResultRepository (defined in [LogicalRepositories.scala](src/main/scala/com/twitter/tweetypie/config/LogicalRepositories#L301)). The TweetResultRepository has at its core a [ManhattanTweetRespository](src/main/scala/com/twitter/tweetypie/repository/ManhattanTweetRepository.scala) (that fetches the Tweet data from Manhattan), wrapped in a [CachingTweetRepository](src/main/scala/com/twitter/tweetypie/repository/ManhattanTweetRepository.scala) (that applies caching using Twemcache). Finally, the caching repository is wrapped in a hydration layer (provided by [TweetHydration.hydrateRepo](src/main/scala/com/twitter/tweetypie/hydrator/TweetHydration.scala#L789)). Essentially, the TweetResultRepository fetches the Tweet data from cache or Manhattan, and passes it through the hydration pipeline. +- The hydration pipeline is described in [TweetHydration.scala](src/main/scala/com/twitter/tweetypie/hydrator/TweetHydration.scala), where all the hydrators are combined together. + +### Write Path + +The write path follows different patterns to the read path, but reuses some of the code. + +#### Relevant Packages + +- [store](src/main/scala/com/twitter/tweetypie/store/): The store package includes the code for updating backends on write, and the coordination code for describing which backends need to be updated for which endpoints. There are two types of file in this package: stores and store modules. Files that end in Store are stores and define the logic for updating a backend, for example [ManhattanTweetStore](src/main/scala/com/twitter/tweetypie/store/ManhattanTweetStore.scala) writes Tweets to Manhattan. Most of the files that don't end in Store are store modules and define the logic for handling a write endpoint, and describe which stores are called, for example [InsertTweet](src/main/scala/com/twitter/tweetypie/store/InsertTweet.scala) which handles the `post_tweet` endpoint. Modules define which stores they call, and stores define which modules they handle. + +#### Through the Write Path + +The path a `post_tweet` request takes is as follows. + +- The request is handled in [PostTweet.scala](src/main/scala/com/twitter/tweetypie/handler/PostTweet.scala#L338). +- [TweetBuilder](src/main/scala/com/twitter/tweetypie/handler/TweetBuilder.scala) creates a Tweet from the request, after performing text processing, validation, URL shortening, media processing, checking for duplicates etc. +- [WritePathHydration.hydrateInsertTweet](src/main/scala/com/twitter/tweetypie/config/WritePathHydration.scala#L54) passes the Tweet through the hydration pipeline to return the caller. +- The Tweet data is written to various stores as described in [InsertTweet.scala](src/main/scala/com/twitter/tweetypie/store/InsertTweet.scala#L84). diff --git a/tweetypie/server/config/BUILD b/tweetypie/server/config/BUILD new file mode 100644 index 000000000..45a90181c --- /dev/null +++ b/tweetypie/server/config/BUILD @@ -0,0 +1,7 @@ +resources( + sources = [ + "!**/*.pyc", + "!BUILD*", + "**/*", + ], +) diff --git a/tweetypie/server/config/decider.yml b/tweetypie/server/config/decider.yml new file mode 100644 index 000000000..6d3042fbc --- /dev/null +++ b/tweetypie/server/config/decider.yml @@ -0,0 +1,313 @@ +stratofed_forward_dark_traffic: + comment: + Forward Federated Strato traffic to DarkTrafficProxy (DTP). + Note, this decider is not overrideable via Tweetypie tooling, + as it is only used by the StratoFedServer frameworkDecider instance. + Note, you cannot use this decider as a TweetypieDeciderGates. + default_availability: 0 + +tweetypie_enable_community_tweet_creates: + comment: When enable, it allows the creation of community tweets + default_availability: 10000 + +tweetypie_hydrate_scrub_engagements: + comment: Redact Tweet engagement related data (StatusCounts) from Interstital Public Interest (IPI) Tweets. + default_availability: 10000 + +tweetypie_check_spam_on_retweet: + comment: Enable Scarecrow spam check during retweet creation + default_availability: 10000 + +tweetypie_check_spam_on_tweet: + comment: Enable Scarecrow spam check during tweet creation + default_availability: 10000 + +tweetypie_conversation_control_use_feature_switch_results: + comment: + Controls whether Tweetypie uses feature switch results during conversation control parameter validation. This decider supports migration of feature switches from macaw-tweets to tweetypie. + default_availability: 0 + +tweetypie_conversation_control_tweet_create_enabled: + comment: + Controls whether we will enforce conversation control policy on tweet create. http://go/dont-at-me-backend-tdd + default_availability: 0 + +tweetypie_enable_exclusive_tweet_control_validation: + comment: + Controls whether we will restrict the exclusiveTweetControlOptions parameter to only be usable by creators. http://go/superfollows + default_availability: 0 + +tweetypie_delay_erase_user_tweets: + comment: sleep for a fixed number of seconds before deleting a page of tweets during user erasure. Used as a rate limiting mechanism. + default_availability: 5000 + +tweetypie_deny_non_tweet_permalinks: + comment: + Right now we would create a quote-tweet which would contain a non-working permalink + whereas with latest QT + media changes we're blocking the tweet creation. If tweet + from permalink is not found, we would throw an exception if this decider is on else + return a default pass-through response. + default_availability: 0 + +tweetypie_enable_trusted_friends_control_validation: + comment: Controls whether we will enforce trusted friends control policy on replies + default_availability: 0 + +tweetypie_enforce_rate_limited_clients: + comment: + Controls whether we will rate-limit low-priority clients based on per-instance requests per sec. + We enable clients for rate-limiting by setting the rate_limit to true in clients.yml + default_availability: 10000 + +tweetypie_fail_closed_in_vf: + comment: Propagate failure from backends such as Strato when running Visibility Filtering + default_availability: 0 + +tweetypie_fork_dark_traffic: + comment: Forks traffic to Darkhub + default_availability: 0 + +tweetypie_hydrate_conversation_muted: + comment: Hydrates the conversationMuted field if requested. This calls timelineservice. + default_availability: 10000 + +tweetypie_hydrate_counts: + comment: Hydrate status counts, if asked for. This calls TFlock. + default_availability: 10000 + +tweetypie_hydrate_previous_counts: + comment: Hydrate previous engagements on a tweet in an edit chain + default_availability: 0 + +tweetypie_hydrate_device_sources: + comment: Hydrate device sources. This reads from DBs. + default_availability: 10000 + +tweetypie_hydrate_escherbird_annotations: + comment: Hydrate the escherbirdEntityAnnotations additional field on tweet create. This calls the Escherbird Annotation Service. + default_availability: 10000 + +tweetypie_hydrate_gnip_profile_geo_enrichment: + comment: Hydrates each tweet with the profile geo enrichment. + default_availability: 10000 + +tweetypie_hydrate_has_media: + comment: Hydrate the hasMedia field based on whether the tweet has a media entity, a media card, or a URL that matches partner media regexes + default_availability: 10000 + +tweetypie_hydrate_media: + comment: Hydrate media entities. This calls MediaInfo Service. + default_availability: 10000 + +tweetypie_hydrate_media_refs: + comment: Hydrate MediaRefs. Calls Tweetypie for pasted media. + default_availability: 0 + +tweetypie_hydrate_media_tags: + comment: Hydrate media tags. This calls Gizmoduck for user view. + default_availability: 10000 + +tweetypie_hydrate_pasted_media: + comment: Copies media entities and media cards from tweets referenced by a media permalink url entity. + default_availability: 10000 + +tweetypie_hydrate_perspectives: + comment: Hydrate perspectival attributes, if asked for. This calls TLS, which may call TFlock if items are not in cache. + default_availability: 10000 + +tweetypie_hydrate_perspectives_edits_for_timelines: + comment: + Hydrated perspectival attributes across versions of tweet edit, + for timelines safety levels if asked for. + This results in more TLS calls (one for each version of tweet). + default_availability: 0 + +tweetypie_hydrate_perspectives_edits_for_tweet_details: + comment: + Hydrated perspectival attributes across versions of tweet edit, + for tweet detail safety levels if asked for. + This results in more TLS calls (one for each version of tweet). + default_availability: 0 + +tweetypie_hydrate_perspectives_edits_for_other_levels: + comment: + Hydrated perspectival attributes across versions of tweet edit, + for safety levels not mentioned in other deciders, if asked for. + This results in more TLS calls (one for each version of tweet). + default_availability: 0 + +tweetypie_hydrate_places: + comment: Hydrate place data, if asked for. This calls geoduck. + default_availability: 10000 + +tweetypie_jiminy_dark_requests: + comment: Enables dark requests to the Jiminy backend for the specified % of traffic + default_availability: 0 + +tweetypie_log_cache_exceptions: + comment: Enables logging of cache exceptions to loglens. + default_availability: 0 + +tweetypie_log_reads: + comment: Enables logging of reads. + default_availability: 50 + +tweetypie_log_tweet_cache_writes: + comment: Scribe a record for each cache write for tweets that pass this decider. + default_availability: 0 + +tweetypie_log_writes: + comment: Enables logging of status writes. + default_availability: 10000 + +tweetypie_log_young_tweet_cache_writes: + comment: + Scribe a record of cache writes for recently-created tweets that + pass this decider. + default_availability: 0 + +tweetypie_log_vf_dark_read_mismatches: + comment: Log mismatches from the tweetypie_dark_read_safety_labels_from_strato code path. + default_availability: 0 + +tweetypie_max_request_width_enabled: + comment: + Controls whether the max request width limit is enabled or not. + 0 means the limit is disabled, 10000 means it is turned on and + request widths > maxSize will be rejected. + default_availability: 0 + +tweetypie_media_refs_hydrator_include_pasted_media: + comment: + For debugging MediaRefsHydrator - determine if extra calls to pasted media are causing the GC issues. + default_availability: 0 + +tweetypie_prefer_forwarded_service_identifier_for_client_id: + comment: Effective client ID logic prefers forwarded service identifier to service identifier if available. + default_availability: 0 + +tweetypie_rate_limit_by_limiter_service: + comment: + Controls whether we will consult limiter service to see whether a + user is allowed to create more tweets. + default_availability: 10000 + +tweetypie_rate_limit_tweet_creation_failure: + comment: + Controls whether we rate limit tweet creation based on failed + attempts to create tweets via limiter service. This is separate + from the limit of created tweets. It is intended to prevent + unforeseen abuse by failing tweet creation attempts immediately if + the user has had too many recent tweet creation + failures. Disabling this decider will reduce traffic to limiter + service, but will remove the per-user abuse protection. + default_availability: 10000 + +tweetypie_replicate_reads_to_atla: + comment: Send reads to deferredrpc for replication to atla. We can use this to warm caches in atla. + default_availability: 0 + +tweetypie_replicate_reads_to_pdxa: + comment: Send reads to deferredrpc for replication to pdxa. We can use this to warm caches in pdxa. + default_availability: 0 + +tweetypie_disable_invite_via_mention: + comment: + Disables invite via mention field in the conversation control struct of + root tweets + reply tweets. + default_availability: 0 + +tweetypie_shed_read_traffic_voluntarily: + comment: + Preferred way to reject read requests during an incident from a subset of clients that have + volunteered to shed load. These clients have load_shed_envs set in clients.yml, often for + staging environments. Although this decider is available and clients have volunteered, should + still only be used in an emergency. + default_availability: 0 + +tweetypie_validate_card_ref_attachment_android: + comment: + When enabled tweet creates from Android consider CardReference for the TooManyAttachmentTypes error when creating tweets with more than one attachment type. + default_availability: 0 + +tweetypie_validate_card_ref_attachment_non_android: + comment: + When enabled tweet creates from non-Android consider CardReference for the TooManyAttachmentTypes error when creating tweets with more than one attachment type + default_availability: 0 + +# Additional Fields + +tweetypie_short_circuit_likely_partial_tweet_reads_ms: + comment: + Specifies a number of milliseconds before which, we short circuit likely + partial reads from MH and return NotFound tweet response state. + After experimenting decided to go with 1500 ms. + default_availability: 0 + +tweetypie_populate_quoted_tweet_results_as_contextual_tweet_ref: + comment: + CreateTweet and CreateReTweet column to return the 'quoted_tweet_results' PrefetchedItem as + ContextualTweetRef type instead of just tweetId. + This will be used during the quotedTweet.Tweet column migration (see http://go/qt-col-migration) + Post-migration, this will be removed. + default_availability: 0 +tweetypie_enable_unmentions_timeline_warmup: + comment: + When enabled, read-path calls execute an async call to the getUnmentionedUsersFromConverstion + strato column to warm the unmentioned NH/haplolite cache. + http://go/unmention-me-onepager + default_availability: 0 + +tweetypie_tweet_visibility_library_enable_parity_test: + comment: measure TVL parity against VF federated service, for a fraction of traffic + default_availability: 0 + +tweetypie_enable_vf_feature_hydration_in_quoted_tweet_visibility_library_shim: + comment: when enabled, all features are hydrated in QuotedTweetVisibilityLibrary shim + default_availability: 0 + +tweetypie_enable_remove_unmentioned_implicit_mentions: + comment: + When enabled, implicit mentions are filtered based on users that have unmentioned themselves + from the tweet's conversation. http://go/unmention-me-onepager + default_availability: 0 + +tweetypie_enable_stale_tweet_validation: + comment: Controls whether we will enforce stale tweet policy on replies and QT + default_availability: 0 + +tweetypie_disable_promoted_tweet_edit: + comment: + Controls whether we will disable edits on promoted tweets + default_availability: 0 + +tweetypie_should_materialize_containers: + comment: + When enabled, Creatives Container Service will be called to materialize container-backed tweets. + Otherwise, TP will not call CCS and return a StatusState of NotFound. + default_availability: 0 + +tweetypie_check_twitter_blue_subscription_for_edit: + comment: + Controls whether we check if the User is subscribed to Twitter Blue when editing a Tweet. + default_availability: 0 + +tweetypie_hydrate_bookmarks_count: + comment: + Controls whether we hydrate bookmarks count for a Tweet + default_availability: 0 + +tweetypie_hydrate_bookmarks_perspective: + comment: + Controls whether we request the Bookmarked perspective from TLS + default_availability: 0 + +tweetypie_set_edit_time_window_to_sixty_minutes: + comment: + Set time window in which Tweets are editable to 60 minutes + default_availability: 0 + +tweetypie_enable_federated_column_dark_traffic: + comment: + Enable dark traffic for federated column. + default_availability: 0 diff --git a/tweetypie/server/config/decider_staging.yml b/tweetypie/server/config/decider_staging.yml new file mode 100644 index 000000000..e69de29bb diff --git a/tweetypie/server/config/logging/logback-all-include.xml b/tweetypie/server/config/logging/logback-all-include.xml new file mode 100644 index 000000000..269cb149f --- /dev/null +++ b/tweetypie/server/config/logging/logback-all-include.xml @@ -0,0 +1,23 @@ + + + tweetypie-all.log + true + + tweetypie-all-%i.log + 1 + 20 + + + 800MB + + + + %date [%thread] %-5level %logger{36} - %msg%n + + + + + + + + diff --git a/tweetypie/server/config/logging/logback-without-loglens.xml b/tweetypie/server/config/logging/logback-without-loglens.xml new file mode 100644 index 000000000..b2e6c4d6a --- /dev/null +++ b/tweetypie/server/config/logging/logback-without-loglens.xml @@ -0,0 +1,12 @@ + + + + true + + + + + + + + diff --git a/tweetypie/server/config/logging/logback.xml b/tweetypie/server/config/logging/logback.xml new file mode 100644 index 000000000..04d686c20 --- /dev/null +++ b/tweetypie/server/config/logging/logback.xml @@ -0,0 +1,146 @@ + + + + + true + + + + + + + + tweetypie-important.log + true + + tweetypie-important-%i.log + 1 + 20 + + + 800MB + + + + + %date [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + true + ${log.lens.tag} + ${log.lens.index} + loglens + + %msg + + + + + + + + + alertable-exception.log + true + + alertable-exception-%i.log + 1 + 17 + + + 100MB + + + + + %date [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + true + ${log.lens.tag} + ${log.lens.index} + loglens + + ${ALERTABLE_MESSAGE_FORMAT} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tweetypie/server/config/partner_media.yml b/tweetypie/server/config/partner_media.yml new file mode 100644 index 000000000..f737dd7c1 --- /dev/null +++ b/tweetypie/server/config/partner_media.yml @@ -0,0 +1,30 @@ +http_or_https: + - 'vine\.co/v/[a-zA-Z0-9]+' + - 'amp\.twimg\.com/' + - '(www\.)?dailymotion.com/video/[a-zA-Z0-9_\-/]+' + - '(www\.)?dai.ly/[a-zA-Z0-9_\-/]+' + - '(www\.)?youtu\.be/[a-zA-Z0-9_\-\?\&\=/]+' + - '(www\.)?youtube\.com/watch[a-zA-Z0-9_\-\?\&\=/]+' + - '(www\.)?ustream\.tv/recorded/\d+' + - '(www\.)?vevo\.com/watch/[\w-]+/[\w-]+/[a-zA-Z0-9_]+' + - '(www\.)?flickr\.com/photos/[\w\@\-]+/\d+/?' + - '(www\.)?flic\.kr/p/[A-Z0-9a-z\-]+' + - '([\w\-]+\.)deviantart\.com/(art|deviation|view)/[\w\@-]+' + - '(www\.)?vimeo\.com/\d+' + - '(www\.)?photozou\.(com|jp)/photo/show/\d+/\d+' + - '(www\.)?twitpic\.com/(?!(place|photos|events)/)([a-zA-Z0-9\?\=\-]+)' + - '(www\.)?mtv\.com/videos/([a-z0-9\-\_]+/)+[0-9]+/[a-z0-9\-\_]+\.jhtml(#[a-z0-9\=\&]+)?' + - '([\w\-\_]+\.)?washingtonpost\.com/wp-dyn/content/video/\d{4}/\d{2}/\d{2}/VI\d+\.html([a-zA-Z0-9_#\.\-\?\&\=/]+)?' + - '([\w\-\_]+\.)?msnbc\.msn\.com/id/\d{1,8}/vp/\d{1,8}([a-zA-Z0-9_#\.\-\?\&\=/]+)?' + - '((www|edition|us)\.)?cnn\.com/video/[\?|#]/[a-zA-Z0-9_#\.\-\?\&\=/]+' + - 'itunes\.apple\.com(/[a-z][a-z])?/(music-)?video/' + - '(www\.)?blip\.tv/((file/[\w-]+)|(([\w-]+/)?[\w-]+-\d+))/?' + - 'online\.wsj\.com/video/[A-Z0-9a-z\-]+/[A-Z0-9a-z\-]+\.html' + - '(www\.)?hulu\.com/w(atch)?/[a-zA-Z0-9]+' + - 'video\.([a-z]{4,11}\.)?nhl\.com/videocenter/console\?(((catid=-?\d+&)?id=\d+)|(hlg=\d{8},\d,\d{1,4}(&event=[A-Z0-9]{4,6})?)|(hlp=\d{5,10}(&event=[A-Z0-9]{4,6})?))' + - '([a-zA-Z0-9\-]+\.)*grabyo\.com/((g/v/[a-zA-Z0-9]{11})|((studio/)?studiotimeline\.jsp\?shareId=[a-zA-Z0-9]{11}))[a-zA-Z0-9_?&=#:%/\.\-]*' + +http_only: + - 'on\.nba\.com/[a-zA-Z0-9]+' + - 'on\.nfl\.com/[a-zA-Z0-9]+' + - 'snpy\.tv/[a-zA-Z0-9]+' diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/BUILD new file mode 100644 index 000000000..261fcb099 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/BUILD @@ -0,0 +1,19 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "mediaservices/commons/src/main/thrift:thrift-scala", + "tweetypie/servo/util", + "snowflake:id", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/gizmoduck:user-thrift-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "tweetypie/server/src/main/thrift:compiled-scala", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "util/util-slf4j-api", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/BUILD new file mode 100644 index 000000000..ab03f48f3 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/BUILD @@ -0,0 +1,48 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/twitter/bijection:thrift", + "3rdparty/jvm/org/apache/thrift:libthrift", + "configbus/client/src/main/scala/com/twitter/configbus/client", + "creatives-container/thrift/src/main/thrift:creatives-container-service-scala", + "finagle/finagle-core/src/main", + "finagle/finagle-thriftmux/src/main/scala", + # "finatra-internal/kafka/src/main/scala/com/twitter/finatra/kafka/producers", + "finatra-internal/messaging/kafka/src/main/scala", + "finatra-internal/thrift/src/main/thrift:thrift-scala", + "flock-client/src/main/scala", + "flock-client/src/main/thrift:thrift-scala", + # "kafka/finagle-kafka/finatra-kafka/src/main/scala", + "limiter/thrift-only/src/main/thrift:thrift-scala", + "mediaservices/mediainfo-server/thrift/src/main/thrift:thrift-scala", + "tweetypie/servo/util", + "src/thrift/com/twitter/dataproducts:service-scala", + "src/thrift/com/twitter/escherbird:annotation-service-scala", + "src/thrift/com/twitter/escherbird:tweet-annotation-scala", + "src/thrift/com/twitter/escherbird/metadata:metadata-service-scala", + "src/thrift/com/twitter/expandodo:only-scala", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/gizmoduck:user-thrift-scala", + "src/thrift/com/twitter/service/scarecrow/gen:scarecrow-scala", + "src/thrift/com/twitter/service/scarecrow/gen:tiered-actions-scala", + "src/thrift/com/twitter/service/talon/gen:thrift-scala", + "src/thrift/com/twitter/servo:servo-exception-scala", + "src/thrift/com/twitter/socialgraph:thrift-scala", + "src/thrift/com/twitter/timelineservice:thrift-scala", + "src/thrift/com/twitter/timelineservice/server/internal:thrift-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-core", + "storage/clients/manhattan/client/src/main/scala", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "tweetypie/common/src/scala/com/twitter/tweetypie/storage", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "user-image-service/thrift/src/main/thrift:thrift-scala", + "util/util-stats/src/main/scala", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Backend.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Backend.scala new file mode 100644 index 000000000..2daa79e87 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Backend.scala @@ -0,0 +1,172 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.finagle.context.Deadline +import com.twitter.finagle.service.RetryBudget +import com.twitter.finagle.service.RetryPolicy +import com.twitter.servo.util.FutureArrow +import com.twitter.servo.util.RetryHandler +import com.twitter.tweetypie.core.OverCapacity +import com.twitter.util.Timer +import com.twitter.util.TimeoutException + +object Backend { + val log: Logger = Logger(getClass) + + /** + * Common stuff that is needed as part of the configuration of all + * of the backends. + */ + case class Context(val timer: Timer, val stats: StatsReceiver) + + /** + * All backend operations are encapsulated in the FutureArrow type. The Builder type + * represents functions that can decorate the FutureArrow, typically by calling the various + * combinator methods on FutureArrow. + */ + type Builder[A, B] = FutureArrow[A, B] => FutureArrow[A, B] + + /** + * A Policy defines some behavior to apply to a FutureArrow that wraps an endpoint. + */ + trait Policy { + + /** + * Using an endpoint name and Context, returns a Builder that does the actual + * application of the policy to the FutureArrow. + */ + def apply[A, B](name: String, ctx: Context): Builder[A, B] + + /** + * Sequentially combines policies, first applying this policy and then applying + * the next policy. Order matters! For example, to retry on timeouts, the FailureRetryPolicy + * needs to be applied after the TimeoutPolicy: + * + * TimeoutPolicy(100.milliseconds) >>> FailureRetryPolicy(retryPolicy) + */ + def andThen(next: Policy): Policy = { + val first = this + new Policy { + def apply[A, B](name: String, ctx: Context): Builder[A, B] = + first(name, ctx).andThen(next(name, ctx)) + + override def toString = s"$first >>> $next" + } + } + + /** + * An alias for `andThen`. + */ + def >>>(next: Policy): Policy = andThen(next) + } + + /** + * Applies a timeout to the underlying FutureArrow. + */ + case class TimeoutPolicy(timeout: Duration) extends Policy { + def apply[A, B](name: String, ctx: Context): Builder[A, B] = { + val stats = ctx.stats.scope(name) + val ex = new TimeoutException(name + ": " + timeout) + (_: FutureArrow[A, B]).raiseWithin(ctx.timer, timeout, ex) + } + } + + /** + * Attaches a RetryHandler with the given RetryPolicy to retry failures. + */ + case class FailureRetryPolicy( + retryPolicy: RetryPolicy[Try[Nothing]], + retryBudget: RetryBudget = RetryBudget()) + extends Policy { + def apply[A, B](name: String, ctx: Context): Builder[A, B] = { + val stats = ctx.stats.scope(name) + (_: FutureArrow[A, B]) + .retry(RetryHandler.failuresOnly(retryPolicy, ctx.timer, stats, retryBudget)) + } + } + + /** + * This policy applies standardized endpoint metrics. This should be used with every endpoint. + */ + case object TrackPolicy extends Policy { + def apply[A, B](name: String, ctx: Context): Builder[A, B] = { + val stats = ctx.stats.scope(name) + (_: FutureArrow[A, B]) + .onFailure(countOverCapacityExceptions(stats)) + .trackOutcome(ctx.stats, (_: A) => name) + .trackLatency(ctx.stats, (_: A) => name) + } + } + + /** + * The default "policy" for timeouts, retries, exception counting, latency tracking, etc. to + * apply to each backend operation. This returns a Builder type (an endofunction on FutureArrow), + * which can be composed with other Builders via simple function composition. + */ + def defaultPolicy[A, B]( + name: String, + requestTimeout: Duration, + retryPolicy: RetryPolicy[Try[B]], + ctx: Context, + retryBudget: RetryBudget = RetryBudget(), + totalTimeout: Duration = Duration.Top, + exceptionCategorizer: Throwable => Option[String] = _ => None + ): Builder[A, B] = { + val scopedStats = ctx.stats.scope(name) + val requestTimeoutException = new TimeoutException( + s"$name: hit request timeout of $requestTimeout" + ) + val totalTimeoutException = new TimeoutException(s"$name: hit total timeout of $totalTimeout") + base => + base + .raiseWithin( + ctx.timer, + // We defer to a per-request deadline. When the deadline is missing or wasn't toggled, + // 'requestTimeout' is used instead. This mimics the behavior happening within a standard + // Finagle client stack and its 'TimeoutFilter'. + Deadline.currentToggled.fold(requestTimeout)(_.remaining), + requestTimeoutException + ) + .retry(RetryHandler(retryPolicy, ctx.timer, scopedStats, retryBudget)) + .raiseWithin(ctx.timer, totalTimeout, totalTimeoutException) + .onFailure(countOverCapacityExceptions(scopedStats)) + .trackOutcome(ctx.stats, (_: A) => name, exceptionCategorizer) + .trackLatency(ctx.stats, (_: A) => name) + } + + /** + * An onFailure FutureArrow callback that counts OverCapacity exceptions to a special counter. + * These will also be counted as failures and by exception class name, but having a special + * counter for this is easier to use in success rate computations where you want to factor out + * backpressure responses. + */ + def countOverCapacityExceptions[A](scopedStats: StatsReceiver): (A, Throwable) => Unit = { + val overCapacityCounter = scopedStats.counter("over_capacity") + + { + case (_, ex: OverCapacity) => overCapacityCounter.incr() + case _ => () + } + } + + /** + * Provides a simple mechanism for applying a Policy to an endpoint FutureArrow from + * an underlying service interface. + */ + class PolicyAdvocate[S](backendName: String, ctx: Backend.Context, svc: S) { + + /** + * Tacks on the TrackPolicy to the given base policy, and then applies the policy to + * a FutureArrow. This is more of a convenience method that every Backend can use to + * build the fully configured FutureArrow. + */ + def apply[A, B]( + endpointName: String, + policy: Policy, + endpoint: S => FutureArrow[A, B] + ): FutureArrow[A, B] = { + log.info(s"appling policy to $backendName.$endpointName: $policy") + policy.andThen(TrackPolicy)(endpointName, ctx)(endpoint(svc)) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/ConfigBus.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/ConfigBus.scala new file mode 100644 index 000000000..f77ad3d77 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/ConfigBus.scala @@ -0,0 +1,50 @@ +package com.twitter.tweetypie.backends + +import com.twitter.configbus.client.ConfigbusClientException +import com.twitter.configbus.client.file.PollingConfigSourceBuilder +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.logging.Logger +import com.twitter.util.Activity +import com.twitter.util.Activity._ +import com.twitter.conversions.DurationOps._ +import com.twitter.io.Buf + +trait ConfigBus { + def file(path: String): Activity[String] +} + +object ConfigBus { + private[this] val basePath = "appservices/tweetypie" + private[this] val log = Logger(getClass) + + def apply(stats: StatsReceiver, instanceId: Int, instanceCount: Int): ConfigBus = { + + val client = PollingConfigSourceBuilder() + .statsReceiver(stats) + .pollPeriod(30.seconds) + .instanceId(instanceId) + .numberOfInstances(instanceCount) + .build() + + val validBuffer = stats.counter("valid_buffer") + + def subscribe(path: String) = + client.subscribe(s"$basePath/$path").map(_.configs).map { + case Buf.Utf8(string) => + validBuffer.incr() + string + } + + new ConfigBus { + def file(path: String): Activity[String] = { + val changes = subscribe(path).run.changes.dedupWith { + case (Failed(e1: ConfigbusClientException), Failed(e2: ConfigbusClientException)) => + e1.getMessage == e2.getMessage + case other => + false + } + Activity(changes) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/CreativesContainerService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/CreativesContainerService.scala new file mode 100644 index 000000000..781e2ad81 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/CreativesContainerService.scala @@ -0,0 +1,71 @@ +package com.twitter.tweetypie.backends + +import com.twitter.container.{thriftscala => ccs} +import com.twitter.finagle.Backoff +import com.twitter.finagle.service.RetryPolicy +import com.twitter.finatra.thrift.thriftscala.ServerError +import com.twitter.finatra.thrift.thriftscala.ServerErrorCause +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.Duration +import com.twitter.tweetypie.Future +import com.twitter.tweetypie.Try +import com.twitter.tweetypie.util.RetryPolicyBuilder +import com.twitter.tweetypie.{thriftscala => tp} +import com.twitter.util.Throw + +object CreativesContainerService { + import Backend._ + + type MaterializeAsTweet = FutureArrow[ccs.MaterializeAsTweetRequests, Seq[tp.GetTweetResult]] + type MaterializeAsTweetFields = + FutureArrow[ccs.MaterializeAsTweetFieldsRequests, Seq[tp.GetTweetFieldsResult]] + + def fromClient( + client: ccs.CreativesContainerService.MethodPerEndpoint + ): CreativesContainerService = + new CreativesContainerService { + val materializeAsTweet: MaterializeAsTweet = FutureArrow(client.materializeAsTweets) + val materializeAsTweetFields: MaterializeAsTweetFields = FutureArrow( + client.materializeAsTweetFields) + + def ping(): Future[Unit] = client.materializeAsTweets(ccs.MaterializeAsTweetRequests()).unit + } + + case class Config( + requestTimeout: Duration, + timeoutBackoffs: Stream[Duration], + serverErrorBackoffs: Stream[Duration]) { + def apply(svc: CreativesContainerService, ctx: Backend.Context): CreativesContainerService = + new CreativesContainerService { + override val materializeAsTweet: MaterializeAsTweet = + policy("materializeAsTweets", ctx)(svc.materializeAsTweet) + + override val materializeAsTweetFields: MaterializeAsTweetFields = + policy("materializeAsTweetFields", ctx)(svc.materializeAsTweetFields) + + override def ping(): Future[Unit] = svc.ping() + } + + private[this] def policy[A, B](name: String, ctx: Context): Builder[A, B] = + defaultPolicy(name, requestTimeout, retryPolicy, ctx) + + private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = + RetryPolicy.combine[Try[B]]( + RetryPolicyBuilder.timeouts[B](timeoutBackoffs), + RetryPolicy.backoff(Backoff.fromStream(serverErrorBackoffs)) { + case Throw(ex: ServerError) if ex.errorCause != ServerErrorCause.NotImplemented => true + } + ) + + implicit val warmup: Warmup[CreativesContainerService] = + Warmup[CreativesContainerService]("creativesContainerService")(_.ping()) + } +} + +trait CreativesContainerService { + import CreativesContainerService._ + + val materializeAsTweet: MaterializeAsTweet + val materializeAsTweetFields: MaterializeAsTweetFields + def ping(): Future[Unit] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Escherbird.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Escherbird.scala new file mode 100644 index 000000000..fc9e1acc3 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Escherbird.scala @@ -0,0 +1,43 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.escherbird.thriftscala.TweetEntityAnnotation +import com.twitter.escherbird.{thriftscala => escherbird} +import com.twitter.finagle.service.RetryPolicy +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.util.RetryPolicyBuilder + +object Escherbird { + import Backend._ + + type Annotate = FutureArrow[Tweet, Seq[TweetEntityAnnotation]] + + def fromClient(client: escherbird.TweetEntityAnnotationService.MethodPerEndpoint): Escherbird = + new Escherbird { + val annotate = FutureArrow(client.annotate) + } + + case class Config(requestTimeout: Duration, timeoutBackoffs: Stream[Duration]) { + + def apply(svc: Escherbird, ctx: Backend.Context): Escherbird = + new Escherbird { + val annotate: FutureArrow[Tweet, Seq[TweetEntityAnnotation]] = + policy("annotate", requestTimeout, ctx)(svc.annotate) + } + + private[this] def policy[A, B]( + name: String, + requestTimeout: Duration, + ctx: Context + ): Builder[A, B] = + defaultPolicy(name, requestTimeout, retryPolicy, ctx) + + private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = + RetryPolicyBuilder.timeouts[Any](timeoutBackoffs) + } +} + +trait Escherbird { + import Escherbird._ + val annotate: Annotate +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Expandodo.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Expandodo.scala new file mode 100644 index 000000000..10cdc28e1 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Expandodo.scala @@ -0,0 +1,83 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.expandodo.thriftscala.AttachmentEligibilityRequest +import com.twitter.expandodo.thriftscala.AttachmentEligibilityResponses +import com.twitter.expandodo.thriftscala.Card2Request +import com.twitter.expandodo.thriftscala.Card2RequestOptions +import com.twitter.expandodo.thriftscala.Card2Responses +import com.twitter.expandodo.thriftscala.CardsResponse +import com.twitter.expandodo.thriftscala.GetCardUsersRequests +import com.twitter.expandodo.thriftscala.GetCardUsersResponses +import com.twitter.expandodo.{thriftscala => expandodo} +import com.twitter.finagle.Backoff +import com.twitter.finagle.service.RetryPolicy +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.util.RetryPolicyBuilder + +object Expandodo { + import Backend._ + + type GetCards = FutureArrow[Set[String], collection.Map[String, expandodo.CardsResponse]] + type GetCards2 = FutureArrow[ + (Seq[expandodo.Card2Request], expandodo.Card2RequestOptions), + expandodo.Card2Responses + ] + type GetCardUsers = FutureArrow[expandodo.GetCardUsersRequests, expandodo.GetCardUsersResponses] + type CheckAttachmentEligibility = + FutureArrow[Seq[ + expandodo.AttachmentEligibilityRequest + ], expandodo.AttachmentEligibilityResponses] + + def fromClient(client: expandodo.CardsService.MethodPerEndpoint): Expandodo = + new Expandodo { + val getCards = FutureArrow(client.getCards _) + val getCards2 = FutureArrow((client.getCards2 _).tupled) + val getCardUsers = FutureArrow(client.getCardUsers _) + val checkAttachmentEligibility = FutureArrow(client.checkAttachmentEligibility _) + } + + case class Config( + requestTimeout: Duration, + timeoutBackoffs: Stream[Duration], + serverErrorBackoffs: Stream[Duration]) { + def apply(svc: Expandodo, ctx: Backend.Context): Expandodo = + new Expandodo { + val getCards: FutureArrow[Set[String], collection.Map[String, CardsResponse]] = + policy("getCards", ctx)(svc.getCards) + val getCards2: FutureArrow[(Seq[Card2Request], Card2RequestOptions), Card2Responses] = + policy("getCards2", ctx)(svc.getCards2) + val getCardUsers: FutureArrow[GetCardUsersRequests, GetCardUsersResponses] = + policy("getCardUsers", ctx)(svc.getCardUsers) + val checkAttachmentEligibility: FutureArrow[Seq[ + AttachmentEligibilityRequest + ], AttachmentEligibilityResponses] = + policy("checkAttachmentEligibility", ctx)(svc.checkAttachmentEligibility) + } + + private[this] def policy[A, B](name: String, ctx: Context): Builder[A, B] = + defaultPolicy(name, requestTimeout, retryPolicy, ctx) + + private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = + RetryPolicy.combine[Try[B]]( + RetryPolicyBuilder.timeouts[B](timeoutBackoffs), + RetryPolicy.backoff(Backoff.fromStream(serverErrorBackoffs)) { + case Throw(ex: expandodo.InternalServerError) => true + } + ) + } + + implicit val warmup: Warmup[Expandodo] = + Warmup[Expandodo]("expandodo")( + _.getCards2((Seq.empty, expandodo.Card2RequestOptions("iPhone-13"))) + ) +} + +trait Expandodo { + import Expandodo._ + + val getCards: GetCards + val getCards2: GetCards2 + val getCardUsers: GetCardUsers + val checkAttachmentEligibility: CheckAttachmentEligibility +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GeoScrubEventStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GeoScrubEventStore.scala new file mode 100644 index 000000000..e05d9950e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GeoScrubEventStore.scala @@ -0,0 +1,84 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.servo.util.FutureArrow +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.bijections.Bijections._ +import com.twitter.storage.client.manhattan.kv._ +import com.twitter.storage.client.manhattan.kv.impl._ +import com.twitter.util.Time + +/** + * Read and write the timestamp of the last delete_location_data request + * for a user. This is used as a safeguard to prevent leaking geo data + * with tweets that have not yet been scrubbed or were missed during the + * geo scrubbing process. + */ +object GeoScrubEventStore { + type GetGeoScrubTimestamp = UserId => Stitch[Option[Time]] + type SetGeoScrubTimestamp = FutureArrow[(UserId, Time), Unit] + + private[this] val KeyDesc = + KeyDescriptor( + Component(LongInjection), + Component(LongInjection, StringInjection) + ).withDataset("geo_scrub") + + private[this] val ValDesc = ValueDescriptor(LongInjection) + + // This modulus determines how user ids get assigned to PKeys, and + // thus to shards within the MH cluster. The origin of the specific + // value has been lost to time, but it's important that we don't + // change it, or else the existing data will be inaccessible. + private[this] val PKeyModulus: Long = 25000L + + private[this] def toKey(userId: Long) = + KeyDesc + .withPkey(userId % PKeyModulus) + .withLkey(userId, "_last_scrub") + + def apply(client: ManhattanKVClient, config: Config, ctx: Backend.Context): GeoScrubEventStore = { + new GeoScrubEventStore { + val getGeoScrubTimestamp: UserId => Stitch[Option[Time]] = { + val endpoint = config.read.endpoint(client) + + (userId: UserId) => { + endpoint + .get(toKey(userId), ValDesc) + .map(_.map(value => Time.fromMilliseconds(value.contents))) + } + } + + val setGeoScrubTimestamp: SetGeoScrubTimestamp = { + val endpoint = config.write.endpoint(client) + + FutureArrow { + case (userId, timestamp) => + val key = toKey(userId) + + // Use the geo scrub timestamp as the MH entry timestamp. This + // ensures that whatever timestamp is highest will win any + // update races. + val value = ValDesc.withValue(timestamp.inMilliseconds, timestamp) + Stitch.run(endpoint.insert(key, value)) + } + } + } + } + + case class EndpointConfig(requestTimeout: Duration, maxRetryCount: Int) { + def endpoint(client: ManhattanKVClient): ManhattanKVEndpoint = + ManhattanKVEndpointBuilder(client) + .defaultMaxTimeout(requestTimeout) + .maxRetryCount(maxRetryCount) + .build() + } + + case class Config(read: EndpointConfig, write: EndpointConfig) +} + +trait GeoScrubEventStore { + import GeoScrubEventStore._ + val getGeoScrubTimestamp: GetGeoScrubTimestamp + val setGeoScrubTimestamp: SetGeoScrubTimestamp +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Gizmoduck.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Gizmoduck.scala new file mode 100644 index 000000000..79f519250 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Gizmoduck.scala @@ -0,0 +1,93 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.finagle.Backoff +import com.twitter.finagle.service.RetryPolicy +import com.twitter.gizmoduck.thriftscala.CountsUpdateField +import com.twitter.gizmoduck.thriftscala.LookupContext +import com.twitter.gizmoduck.thriftscala.ModifiedUser +import com.twitter.gizmoduck.thriftscala.UserResult +import com.twitter.gizmoduck.{thriftscala => gd} +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.core.OverCapacity +import com.twitter.tweetypie.util.RetryPolicyBuilder + +object Gizmoduck { + import Backend._ + + type GetById = FutureArrow[(gd.LookupContext, Seq[UserId], Set[UserField]), Seq[gd.UserResult]] + type GetByScreenName = + FutureArrow[(gd.LookupContext, Seq[String], Set[UserField]), Seq[gd.UserResult]] + type IncrCount = FutureArrow[(UserId, gd.CountsUpdateField, Int), Unit] + type ModifyAndGet = FutureArrow[(gd.LookupContext, UserId, gd.ModifiedUser), gd.User] + + def fromClient(client: gd.UserService.MethodPerEndpoint): Gizmoduck = + new Gizmoduck { + val getById = FutureArrow((client.get _).tupled) + val getByScreenName = FutureArrow((client.getByScreenName _).tupled) + val incrCount = FutureArrow((client.incrCount _).tupled) + val modifyAndGet = FutureArrow((client.modifyAndGet _).tupled) + def ping(): Future[Unit] = client.get(gd.LookupContext(), Seq.empty, Set.empty).unit + } + + case class Config( + readTimeout: Duration, + writeTimeout: Duration, + modifyAndGetTimeout: Duration, + modifyAndGetTimeoutBackoffs: Stream[Duration], + defaultTimeoutBackoffs: Stream[Duration], + gizmoduckExceptionBackoffs: Stream[Duration]) { + + def apply(svc: Gizmoduck, ctx: Backend.Context): Gizmoduck = + new Gizmoduck { + val getById: FutureArrow[(LookupContext, Seq[UserId], Set[UserField]), Seq[UserResult]] = + policy("getById", readTimeout, ctx)(svc.getById) + val getByScreenName: FutureArrow[(LookupContext, Seq[String], Set[UserField]), Seq[ + UserResult + ]] = policy("getByScreenName", readTimeout, ctx)(svc.getByScreenName) + val incrCount: FutureArrow[(UserId, CountsUpdateField, Int), Unit] = + policy("incrCount", writeTimeout, ctx)(svc.incrCount) + val modifyAndGet: FutureArrow[(LookupContext, UserId, ModifiedUser), User] = policy( + "modifyAndGet", + modifyAndGetTimeout, + ctx, + timeoutBackoffs = modifyAndGetTimeoutBackoffs + )(svc.modifyAndGet) + def ping(): Future[Unit] = svc.ping() + } + + private[this] def policy[A, B]( + name: String, + requestTimeout: Duration, + ctx: Context, + timeoutBackoffs: Stream[Duration] = defaultTimeoutBackoffs + ): Builder[A, B] = + translateExceptions andThen + defaultPolicy(name, requestTimeout, retryPolicy(timeoutBackoffs), ctx) + + private[this] def translateExceptions[A, B]: Builder[A, B] = + _.translateExceptions { + case gd.OverCapacity(msg) => OverCapacity(s"gizmoduck: $msg") + } + + private[this] def retryPolicy[B](timeoutBackoffs: Stream[Duration]): RetryPolicy[Try[B]] = + RetryPolicy.combine[Try[B]]( + RetryPolicyBuilder.timeouts[B](timeoutBackoffs), + RetryPolicy.backoff(Backoff.fromStream(gizmoduckExceptionBackoffs)) { + case Throw(ex: gd.InternalServerError) => true + } + ) + } + + implicit val warmup: Warmup[Gizmoduck] = + Warmup[Gizmoduck]("gizmoduck")(_.ping()) +} + +trait Gizmoduck { + import Gizmoduck._ + val getById: GetById + val getByScreenName: GetByScreenName + val incrCount: IncrCount + val modifyAndGet: ModifyAndGet + def ping(): Future[Unit] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GnipEnricherator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GnipEnricherator.scala new file mode 100644 index 000000000..3b716c5b1 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GnipEnricherator.scala @@ -0,0 +1,42 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.conversions.PercentOps._ +import com.twitter.conversions.DurationOps._ +import com.twitter.dataproducts.enrichments.thriftscala._ +import com.twitter.dataproducts.enrichments.thriftscala.Enricherator +import com.twitter.finagle.thriftmux.MethodBuilder +import com.twitter.servo.util.FutureArrow + +object GnipEnricherator { + + type HydrateProfileGeo = FutureArrow[ProfileGeoRequest, Seq[ProfileGeoResponse]] + + private def methodPerEndpoint(methodBuilder: MethodBuilder) = + Enricherator.MethodPerEndpoint( + methodBuilder + .servicePerEndpoint[Enricherator.ServicePerEndpoint] + .withHydrateProfileGeo( + methodBuilder + .withTimeoutTotal(300.milliseconds) + .withTimeoutPerRequest(100.milliseconds) + .idempotent(maxExtraLoad = 1.percent) + .servicePerEndpoint[Enricherator.ServicePerEndpoint](methodName = "hydrateProfileGeo") + .hydrateProfileGeo + ) + ) + + def fromMethod(methodBuilder: MethodBuilder): GnipEnricherator = { + val mpe = methodPerEndpoint(methodBuilder) + + new GnipEnricherator { + override val hydrateProfileGeo: HydrateProfileGeo = + FutureArrow(mpe.hydrateProfileGeo) + } + } +} + +trait GnipEnricherator { + import GnipEnricherator._ + val hydrateProfileGeo: HydrateProfileGeo +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterBackend.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterBackend.scala new file mode 100644 index 000000000..3bfe1a682 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterBackend.scala @@ -0,0 +1,55 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.finagle.service.RetryPolicy +import com.twitter.limiter.thriftscala.FeatureRequest +import com.twitter.limiter.thriftscala.Usage +import com.twitter.limiter.{thriftscala => ls} +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.util.RetryPolicyBuilder + +object LimiterBackend { + import Backend._ + + type IncrementFeature = FutureArrow[(ls.FeatureRequest, Int), Unit] + type GetFeatureUsage = FutureArrow[ls.FeatureRequest, ls.Usage] + + def fromClient(client: ls.LimitService.MethodPerEndpoint): LimiterBackend = + new LimiterBackend { + val incrementFeature: IncrementFeature = + FutureArrow { + case (featureReq, amount) => client.incrementFeature(featureReq, amount).unit + } + + val getFeatureUsage: GetFeatureUsage = + FutureArrow(featureReq => client.getLimitUsage(None, Some(featureReq))) + } + + case class Config(requestTimeout: Duration, timeoutBackoffs: Stream[Duration]) { + + def apply(client: LimiterBackend, ctx: Backend.Context): LimiterBackend = + new LimiterBackend { + val incrementFeature: FutureArrow[(FeatureRequest, Int), Unit] = + policy("incrementFeature", requestTimeout, ctx)(client.incrementFeature) + val getFeatureUsage: FutureArrow[FeatureRequest, Usage] = + policy("getFeatureUsage", requestTimeout, ctx)(client.getFeatureUsage) + } + + private[this] def policy[A, B]( + name: String, + requestTimeout: Duration, + ctx: Context + ): Builder[A, B] = + defaultPolicy(name, requestTimeout, retryPolicy, ctx) + + private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = + RetryPolicyBuilder.timeouts[Any](timeoutBackoffs) + } +} + +trait LimiterBackend { + import LimiterBackend._ + + val incrementFeature: IncrementFeature + val getFeatureUsage: GetFeatureUsage +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterService.scala new file mode 100644 index 000000000..289c92c0b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterService.scala @@ -0,0 +1,193 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.finagle.stats.NullStatsReceiver +import com.twitter.limiter.thriftscala.FeatureRequest +import com.twitter.tweetypie.backends.LimiterBackend.GetFeatureUsage +import com.twitter.tweetypie.backends.LimiterBackend.IncrementFeature +import com.twitter.tweetypie.backends.LimiterService.Feature + +/** + * Why does LimiterService exist? + * + * The underlying Limiter thrift service doesn't support batching. This trait and implementation + * basically exist to allow a batch like interface to the Limiter. This keeps us from having to + * spread batching throughout our code base. + * + * Why is LimiterService in the backends package? + * + * In some ways it is like a backend if the backend supports batching. There is a modest amount of + * business logic LimiterService, but that logic exists here to allow easier consumption throughout + * the tweetypie code base. We did look at moving LimiterService to another package, but all likely + * candidates (service, serverutil) caused circular dependencies. + * + * When I need to add functionality, should I add it to LimiterBackend or LimiterService? + * + * LimiterBackend is used as a simple wrapper around the Limiter thrift client. The LimiterBackend + * should be kept as dumb as possible. You will most likely want to add the functionality in + * LimiterService. + */ +object LimiterService { + type MinRemaining = (UserId, Option[UserId]) => Future[Int] + type HasRemaining = (UserId, Option[UserId]) => Future[Boolean] + type Increment = (UserId, Option[UserId], Int) => Future[Unit] + type IncrementByOne = (UserId, Option[UserId]) => Future[Unit] + + sealed abstract class Feature(val name: String, val hasPerApp: Boolean = false) { + def forUser(userId: UserId): FeatureRequest = FeatureRequest(name, userId = Some(userId)) + def forApp(appId: AppId): Option[FeatureRequest] = + if (hasPerApp) { + Some( + FeatureRequest( + s"${name}_per_app", + applicationId = Some(appId), + identifier = Some(appId.toString) + ) + ) + } else { + None + } + } + object Feature { + case object Updates extends Feature("updates", hasPerApp = true) + case object MediaTagCreate extends Feature("media_tag_create") + case object TweetCreateFailure extends Feature("tweet_creation_failure") + } + + def fromBackend( + incrementFeature: IncrementFeature, + getFeatureUsage: GetFeatureUsage, + getAppId: => Option[ + AppId + ], // the call-by-name here to invoke per request to get the current request's app id + stats: StatsReceiver = NullStatsReceiver + ): LimiterService = + new LimiterService { + def increment( + feature: Feature + )( + userId: UserId, + contributorUserId: Option[UserId], + amount: Int + ): Future[Unit] = { + Future.when(amount > 0) { + def increment(req: FeatureRequest): Future[Unit] = incrementFeature((req, amount)) + + val incrementUser: Option[Future[Unit]] = + Some(increment(feature.forUser(userId))) + + val incrementContributor: Option[Future[Unit]] = + for { + id <- contributorUserId + if id != userId + } yield increment(feature.forUser(id)) + + val incrementPerApp: Option[Future[Unit]] = + for { + appId <- getAppId + req <- feature.forApp(appId) + } yield increment(req) + + Future.collect(Seq(incrementUser, incrementContributor, incrementPerApp).flatten) + } + } + + def minRemaining( + feature: Feature + )( + userId: UserId, + contributorUserId: Option[UserId] + ): Future[Int] = { + def getRemaining(req: FeatureRequest): Future[Int] = getFeatureUsage(req).map(_.remaining) + + val getUserRemaining: Option[Future[Int]] = + Some(getRemaining(feature.forUser(userId))) + + val getContributorRemaining: Option[Future[Int]] = + contributorUserId.map(id => getRemaining(feature.forUser(id))) + + val getPerAppRemaining: Option[Future[Int]] = + for { + appId <- getAppId + req <- feature.forApp(appId) + } yield getRemaining(req) + + Future + .collect(Seq(getUserRemaining, getContributorRemaining, getPerAppRemaining).flatten) + .map(_.min) + } + } +} + +trait LimiterService { + + /** + * Increment the feature count for both the user and the contributor. If either increment fails, + * the resulting future will be the first exception encountered. + * + * @param feature The feature that is incremented + * @param userId The current user tied to the current request + * @param contributorUserId The contributor, if one exists, tied to the current request + * @param amount The amount that each feature should be incremented. + */ + def increment( + feature: Feature + )( + userId: UserId, + contributorUserId: Option[UserId], + amount: Int + ): Future[Unit] + + /** + * Increment the feature count, by one, for both the user and the contributor. If either + * increment fails, the resulting future will be the first exception encountered. + * + * @param feature The feature that is incremented + * @param userId The current user tied to the current request + * @param contributorUserId The contributor, if one exists, tied to the current request + * + * @see [[increment]] if you want to increment a feature by a specified amount + */ + def incrementByOne( + feature: Feature + )( + userId: UserId, + contributorUserId: Option[UserId] + ): Future[Unit] = + increment(feature)(userId, contributorUserId, 1) + + /** + * The minimum remaining limit between the user and contributor. If an exception occurs, then the + * resulting Future will be the first exception encountered. + * + * @param feature The feature that is queried + * @param userId The current user tied to the current request + * @param contributorUserId The contributor, if one exists, tied to the current request + * + * @return a `Future[Int]` with the minimum limit left between the user and contributor + */ + def minRemaining(feature: Feature)(userId: UserId, contributorUserId: Option[UserId]): Future[Int] + + /** + * Can the user and contributor increment the given feature. If the result cannot be determined + * because of an exception, then we assume they can increment. This will allow us to continue + * servicing requests even if the limiter service isn't responding. + * + * @param feature The feature that is queried + * @param userId The current user tied to the current request + * @param contributorUserId The contributor, if one exists, tied to the current request + * @return a `Future[Boolean]` with true if both the user and contributor have remaining limit + * cap. + * + * @see [[minRemaining]] if you would like to handle any exceptions that occur on your own + */ + def hasRemaining( + feature: Feature + )( + userId: UserId, + contributorUserId: Option[UserId] + ): Future[Boolean] = + minRemaining(feature)(userId, contributorUserId) + .map(_ > 0) + .handle { case _ => true } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Manhattan.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Manhattan.scala new file mode 100644 index 000000000..ce4e0838e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Manhattan.scala @@ -0,0 +1,46 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.servo.exception.thriftscala +import com.twitter.servo.exception.thriftscala.ClientErrorCause +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.kv.TimeoutManhattanException +import com.twitter.tweetypie.core.OverCapacity +import com.twitter.tweetypie.storage.TweetStorageClient.Ping +import com.twitter.tweetypie.storage.ClientError +import com.twitter.tweetypie.storage.RateLimited +import com.twitter.tweetypie.storage.TweetStorageClient +import com.twitter.tweetypie.util.StitchUtils +import com.twitter.util.TimeoutException + +object Manhattan { + def fromClient(underlying: TweetStorageClient): TweetStorageClient = + new TweetStorageClient { + val addTweet = translateExceptions(underlying.addTweet) + val deleteAdditionalFields = translateExceptions(underlying.deleteAdditionalFields) + val getDeletedTweets = translateExceptions(underlying.getDeletedTweets) + val getTweet = translateExceptions(underlying.getTweet) + val getStoredTweet = translateExceptions(underlying.getStoredTweet) + val scrub = translateExceptions(underlying.scrub) + val softDelete = translateExceptions(underlying.softDelete) + val undelete = translateExceptions(underlying.undelete) + val updateTweet = translateExceptions(underlying.updateTweet) + val hardDeleteTweet = translateExceptions(underlying.hardDeleteTweet) + val ping: Ping = underlying.ping + val bounceDelete = translateExceptions(underlying.bounceDelete) + } + + private[backends] object translateExceptions { + private[this] def pf: PartialFunction[Throwable, Throwable] = { + case e: RateLimited => OverCapacity(s"storage: ${e.getMessage}") + case e: TimeoutManhattanException => new TimeoutException(e.getMessage) + case e: ClientError => thriftscala.ClientError(ClientErrorCause.BadRequest, e.message) + } + + def apply[A, B](f: A => Stitch[B]): A => Stitch[B] = + a => StitchUtils.translateExceptions(f(a), pf) + + def apply[A, B, C](f: (A, B) => Stitch[C]): (A, B) => Stitch[C] = + (a, b) => StitchUtils.translateExceptions(f(a, b), pf) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/MediaInfoService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/MediaInfoService.scala new file mode 100644 index 000000000..a355507cf --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/MediaInfoService.scala @@ -0,0 +1,43 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.finagle.service.RetryPolicy +import com.twitter.mediainfo.server.thriftscala.GetTweetMediaInfoRequest +import com.twitter.mediainfo.server.thriftscala.GetTweetMediaInfoResponse +import com.twitter.mediainfo.server.{thriftscala => mis} +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.util.RetryPolicyBuilder + +object MediaInfoService { + import Backend._ + + type GetTweetMediaInfo = FutureArrow[mis.GetTweetMediaInfoRequest, mis.GetTweetMediaInfoResponse] + + def fromClient(client: mis.MediaInfoService.MethodPerEndpoint): MediaInfoService = + new MediaInfoService { + val getTweetMediaInfo = FutureArrow(client.getTweetMediaInfo) + } + + case class Config( + requestTimeout: Duration, + totalTimeout: Duration, + timeoutBackoffs: Stream[Duration]) { + + def apply(svc: MediaInfoService, ctx: Backend.Context): MediaInfoService = + new MediaInfoService { + val getTweetMediaInfo: FutureArrow[GetTweetMediaInfoRequest, GetTweetMediaInfoResponse] = + policy("getTweetMediaInfo", ctx)(svc.getTweetMediaInfo) + } + + private[this] def policy[A, B](name: String, ctx: Context): Builder[A, B] = + defaultPolicy(name, requestTimeout, retryPolicy, ctx, totalTimeout = totalTimeout) + + private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = + RetryPolicyBuilder.timeouts[Any](timeoutBackoffs) + } +} + +trait MediaInfoService { + import MediaInfoService._ + val getTweetMediaInfo: GetTweetMediaInfo +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Scarecrow.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Scarecrow.scala new file mode 100644 index 000000000..d8df2beb5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Scarecrow.scala @@ -0,0 +1,73 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.Backoff +import com.twitter.finagle.service.RetryPolicy +import com.twitter.service.gen.scarecrow.thriftscala.CheckTweetResponse +import com.twitter.service.gen.scarecrow.thriftscala.Retweet +import com.twitter.service.gen.scarecrow.thriftscala.TieredAction +import com.twitter.service.gen.scarecrow.thriftscala.TweetContext +import com.twitter.service.gen.scarecrow.thriftscala.TweetNew +import com.twitter.service.gen.scarecrow.{thriftscala => scarecrow} +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.util.RetryPolicyBuilder + +object Scarecrow { + import Backend._ + + type CheckTweet2 = + FutureArrow[(scarecrow.TweetNew, scarecrow.TweetContext), scarecrow.CheckTweetResponse] + type CheckRetweet = FutureArrow[scarecrow.Retweet, scarecrow.TieredAction] + + def fromClient(client: scarecrow.ScarecrowService.MethodPerEndpoint): Scarecrow = + new Scarecrow { + val checkTweet2 = FutureArrow((client.checkTweet2 _).tupled) + val checkRetweet = FutureArrow(client.checkRetweet _) + def ping(): Future[Unit] = client.ping() + } + + case class Config( + readTimeout: Duration, + writeTimeout: Duration, + timeoutBackoffs: Stream[Duration], + scarecrowExceptionBackoffs: Stream[Duration]) { + def apply(svc: Scarecrow, ctx: Backend.Context): Scarecrow = + new Scarecrow { + val checkTweet2: FutureArrow[(TweetNew, TweetContext), CheckTweetResponse] = + writePolicy("checkTweet2", ctx)(svc.checkTweet2) + val checkRetweet: FutureArrow[Retweet, TieredAction] = + writePolicy("checkRetweet", ctx)(svc.checkRetweet) + def ping(): Future[Unit] = svc.ping() + } + + private[this] def readPolicy[A, B](name: String, ctx: Context): Builder[A, B] = + defaultPolicy(name, readTimeout, readRetryPolicy, ctx) + + private[this] def writePolicy[A, B](name: String, ctx: Context): Builder[A, B] = + defaultPolicy(name, writeTimeout, nullRetryPolicy, ctx) + + private[this] def readRetryPolicy[B]: RetryPolicy[Try[B]] = + RetryPolicy.combine[Try[B]]( + RetryPolicyBuilder.timeouts[B](timeoutBackoffs), + RetryPolicy.backoff(Backoff.fromStream(scarecrowExceptionBackoffs)) { + case Throw(ex: scarecrow.InternalServerError) => true + } + ) + + private[this] def nullRetryPolicy[B]: RetryPolicy[Try[B]] = + // retry policy that runs once, and will not retry on any exception + RetryPolicy.backoff(Backoff.fromStream(Stream(0.milliseconds))) { + case Throw(_) => false + } + } + + implicit val warmup: Warmup[Scarecrow] = Warmup[Scarecrow]("scarecrow")(_.ping()) +} + +trait Scarecrow { + import Scarecrow._ + val checkTweet2: CheckTweet2 + val checkRetweet: CheckRetweet + def ping(): Future[Unit] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/SocialGraphService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/SocialGraphService.scala new file mode 100644 index 000000000..37ac1243d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/SocialGraphService.scala @@ -0,0 +1,52 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.finagle.service.RetryPolicy +import com.twitter.servo.util.FutureArrow +import com.twitter.socialgraph.thriftscala.ExistsRequest +import com.twitter.socialgraph.thriftscala.ExistsResult +import com.twitter.socialgraph.thriftscala.RequestContext +import com.twitter.socialgraph.{thriftscala => sg} +import com.twitter.tweetypie.util.RetryPolicyBuilder + +object SocialGraphService { + import Backend._ + + type Exists = + FutureArrow[(Seq[sg.ExistsRequest], Option[sg.RequestContext]), Seq[sg.ExistsResult]] + + def fromClient(client: sg.SocialGraphService.MethodPerEndpoint): SocialGraphService = + new SocialGraphService { + val exists = FutureArrow((client.exists _).tupled) + def ping: Future[Unit] = client.ping().unit + } + + case class Config(socialGraphTimeout: Duration, timeoutBackoffs: Stream[Duration]) { + + def apply(svc: SocialGraphService, ctx: Backend.Context): SocialGraphService = + new SocialGraphService { + val exists: FutureArrow[(Seq[ExistsRequest], Option[RequestContext]), Seq[ExistsResult]] = + policy("exists", socialGraphTimeout, ctx)(svc.exists) + def ping(): Future[Unit] = svc.ping() + } + + private[this] def policy[A, B]( + name: String, + requestTimeout: Duration, + ctx: Context + ): Builder[A, B] = + defaultPolicy(name, requestTimeout, retryPolicy, ctx) + + private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = + RetryPolicyBuilder.timeouts[Any](timeoutBackoffs) + } + + implicit val warmup: Warmup[SocialGraphService] = + Warmup[SocialGraphService]("socialgraphservice")(_.ping) +} + +trait SocialGraphService { + import SocialGraphService._ + val exists: Exists + def ping(): Future[Unit] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TFlock.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TFlock.scala new file mode 100644 index 000000000..e056db8c9 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TFlock.scala @@ -0,0 +1,98 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.finagle.Backoff +import com.twitter.finagle.service.RetryPolicy +import com.twitter.flockdb.client.{thriftscala => flockdb, _} +import com.twitter.servo +import com.twitter.servo.util.RetryHandler +import com.twitter.tweetypie.core.OverCapacity +import com.twitter.tweetypie.util.RetryPolicyBuilder +import com.twitter.util.Future +import com.twitter.util.TimeoutException + +object TFlock { + val log = Logger(this.getClass) + + case class Config( + requestTimeout: Duration, + timeoutBackoffs: Stream[Duration], + flockExceptionBackoffs: Stream[Duration], + overCapacityBackoffs: Stream[Duration], + defaultPageSize: Int = 1000) { + def apply(svc: flockdb.FlockDB.MethodPerEndpoint, ctx: Backend.Context): TFlockClient = { + val retryHandler = + RetryHandler[Any]( + retryPolicy(timeoutBackoffs, flockExceptionBackoffs, overCapacityBackoffs), + ctx.timer, + ctx.stats + ) + val rescueHandler = translateExceptions.andThen(Future.exception) + val exceptionCounter = new servo.util.ExceptionCounter(ctx.stats, "failures") + val timeoutException = new TimeoutException(s"tflock: $requestTimeout") + val wrapper = + new WrappingFunction { + def apply[T](f: => Future[T]): Future[T] = + retryHandler { + exceptionCounter { + f.raiseWithin(ctx.timer, requestTimeout, timeoutException) + .onFailure(logFlockExceptions) + .rescue(rescueHandler) + } + } + } + + val wrappedClient = new WrappingFlockClient(svc, wrapper, wrapper) + val statsClient = new StatsCollectingFlockService(wrappedClient, ctx.stats) + new TFlockClient(statsClient, defaultPageSize) + } + } + + def isOverCapacity(ex: flockdb.FlockException): Boolean = + ex.errorCode match { + case Some(flockdb.Constants.READ_OVERCAPACITY_ERROR) => true + case Some(flockdb.Constants.WRITE_OVERCAPACITY_ERROR) => true + case _ => false + } + + /** + * Builds a RetryPolicy for tflock operations that will retry timeouts with the specified + * timeout backoffs, and will retry non-overcapacity FlockExceptions with the + * specified flockExceptionBackoffs backoffs, and will retry over-capacity exceptions with + * the specified overCapacityBackoffs. + */ + def retryPolicy( + timeoutBackoffs: Stream[Duration], + flockExceptionBackoffs: Stream[Duration], + overCapacityBackoffs: Stream[Duration] + ): RetryPolicy[Try[Any]] = + RetryPolicy.combine[Try[Any]]( + RetryPolicyBuilder.timeouts[Any](timeoutBackoffs), + RetryPolicy.backoff(Backoff.fromStream(flockExceptionBackoffs)) { + case Throw(ex: flockdb.FlockException) if !isOverCapacity(ex) => true + case Throw(_: flockdb.FlockQuotaException) => false + }, + RetryPolicy.backoff(Backoff.fromStream(overCapacityBackoffs)) { + case Throw(ex: flockdb.FlockException) if isOverCapacity(ex) => true + case Throw(_: flockdb.FlockQuotaException) => true + case Throw(_: OverCapacity) => true + } + ) + + val logFlockExceptions: Throwable => Unit = { + case t: flockdb.FlockException => { + log.info("FlockException from TFlock", t) + } + case _ => + } + + /** + * Converts FlockExceptions with overcapacity codes into tweetypie's OverCapacity. + */ + val translateExceptions: PartialFunction[Throwable, Throwable] = { + case t: flockdb.FlockQuotaException => + OverCapacity(s"tflock: throttled ${t.description}") + case t: flockdb.FlockException if isOverCapacity(t) => + OverCapacity(s"tflock: ${t.description}") + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Talon.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Talon.scala new file mode 100644 index 000000000..95385b510 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Talon.scala @@ -0,0 +1,94 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.finagle.Backoff +import com.twitter.finagle.service.RetryPolicy +import com.twitter.service.talon.thriftscala.ExpandRequest +import com.twitter.service.talon.thriftscala.ExpandResponse +import com.twitter.service.talon.thriftscala.ResponseCode +import com.twitter.service.talon.thriftscala.ShortenRequest +import com.twitter.service.talon.thriftscala.ShortenResponse +import com.twitter.service.talon.{thriftscala => talon} +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.core.OverCapacity +import com.twitter.tweetypie.util.RetryPolicyBuilder + +object Talon { + import Backend._ + + type Expand = FutureArrow[talon.ExpandRequest, talon.ExpandResponse] + type Shorten = FutureArrow[talon.ShortenRequest, talon.ShortenResponse] + + case object TransientError extends Exception() + case object PermanentError extends Exception() + + def fromClient(client: talon.Talon.MethodPerEndpoint): Talon = + new Talon { + val shorten = FutureArrow(client.shorten _) + val expand = FutureArrow(client.expand _) + def ping(): Future[Unit] = client.serviceInfo().unit + } + + case class Config( + shortenTimeout: Duration, + expandTimeout: Duration, + timeoutBackoffs: Stream[Duration], + transientErrorBackoffs: Stream[Duration]) { + def apply(svc: Talon, ctx: Backend.Context): Talon = + new Talon { + val shorten: FutureArrow[ShortenRequest, ShortenResponse] = + policy("shorten", shortenTimeout, shortenResponseCode, ctx)(svc.shorten) + val expand: FutureArrow[ExpandRequest, ExpandResponse] = + policy("expand", expandTimeout, expandResponseCode, ctx)(svc.expand) + def ping(): Future[Unit] = svc.ping() + } + + private[this] def policy[A, B]( + name: String, + requestTimeout: Duration, + getResponseCode: B => talon.ResponseCode, + ctx: Context + ): Builder[A, B] = + handleResponseCodes(name, getResponseCode, ctx) andThen + defaultPolicy(name, requestTimeout, retryPolicy, ctx) + + private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = + RetryPolicy.combine[Try[B]]( + RetryPolicyBuilder.timeouts[B](timeoutBackoffs), + RetryPolicy.backoff(Backoff.fromStream(transientErrorBackoffs)) { + case Throw(TransientError) => true + } + ) + + private[this] def handleResponseCodes[A, B]( + name: String, + extract: B => talon.ResponseCode, + ctx: Context + ): Builder[A, B] = { + val scopedStats = ctx.stats.scope(name) + val responseCodeStats = scopedStats.scope("response_code") + _ andThen FutureArrow[B, B] { res => + val responseCode = extract(res) + responseCodeStats.counter(responseCode.toString).incr() + responseCode match { + case talon.ResponseCode.TransientError => Future.exception(TransientError) + case talon.ResponseCode.PermanentError => Future.exception(PermanentError) + case talon.ResponseCode.ServerOverloaded => Future.exception(OverCapacity("talon")) + case _ => Future.value(res) + } + } + } + } + + def shortenResponseCode(res: talon.ShortenResponse): ResponseCode = res.responseCode + def expandResponseCode(res: talon.ExpandResponse): ResponseCode = res.responseCode + + implicit val warmup: Warmup[Talon] = Warmup[Talon]("talon")(_.ping()) +} + +trait Talon { + import Talon._ + val shorten: Shorten + val expand: Expand + def ping(): Future[Unit] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TimelineService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TimelineService.scala new file mode 100644 index 000000000..a8c9b74db --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TimelineService.scala @@ -0,0 +1,84 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.finagle.Backoff +import com.twitter.finagle.service.RetryPolicy +import com.twitter.servo.util.FutureArrow +import com.twitter.timelineservice.thriftscala.Event +import com.twitter.timelineservice.thriftscala.PerspectiveQuery +import com.twitter.timelineservice.thriftscala.PerspectiveResult +import com.twitter.timelineservice.thriftscala.ProcessEventResult +import com.twitter.timelineservice.thriftscala.StatusTimelineResult +import com.twitter.timelineservice.thriftscala.TimelineQuery +import com.twitter.timelineservice.{thriftscala => tls} +import com.twitter.tweetypie.util.RetryPolicyBuilder + +object TimelineService { + import Backend._ + + type GetStatusTimeline = FutureArrow[Seq[tls.TimelineQuery], Seq[tls.StatusTimelineResult]] + type GetPerspectives = FutureArrow[Seq[tls.PerspectiveQuery], Seq[tls.PerspectiveResult]] + type ProcessEvent2 = FutureArrow[tls.Event, tls.ProcessEventResult] + + private val warmupQuery = + // we need a non-empty query, since tls treats empty queries as an error + tls.TimelineQuery( + timelineType = tls.TimelineType.User, + timelineId = 620530287L, // same user id that timelineservice-api uses for warmup + maxCount = 1 + ) + + def fromClient(client: tls.TimelineService.MethodPerEndpoint): TimelineService = + new TimelineService { + val processEvent2 = FutureArrow(client.processEvent2 _) + val getStatusTimeline = FutureArrow(client.getStatusTimeline _) + val getPerspectives = FutureArrow(client.getPerspectives _) + def ping(): Future[Unit] = + client.touchTimeline(Seq(warmupQuery)).handle { case _: tls.InternalServerError => } + } + + case class Config(writeRequestPolicy: Policy, readRequestPolicy: Policy) { + + def apply(svc: TimelineService, ctx: Backend.Context): TimelineService = { + val build = new PolicyAdvocate("TimelineService", ctx, svc) + new TimelineService { + val processEvent2: FutureArrow[Event, ProcessEventResult] = + build("processEvent2", writeRequestPolicy, _.processEvent2) + val getStatusTimeline: FutureArrow[Seq[TimelineQuery], Seq[StatusTimelineResult]] = + build("getStatusTimeline", readRequestPolicy, _.getStatusTimeline) + val getPerspectives: FutureArrow[Seq[PerspectiveQuery], Seq[PerspectiveResult]] = + build("getPerspectives", readRequestPolicy, _.getPerspectives) + def ping(): Future[Unit] = svc.ping() + } + } + } + + case class FailureBackoffsPolicy( + timeoutBackoffs: Stream[Duration] = Stream.empty, + tlsExceptionBackoffs: Stream[Duration] = Stream.empty) + extends Policy { + def toFailureRetryPolicy: FailureRetryPolicy = + FailureRetryPolicy( + RetryPolicy.combine( + RetryPolicyBuilder.timeouts(timeoutBackoffs), + RetryPolicy.backoff(Backoff.fromStream(tlsExceptionBackoffs)) { + case Throw(ex: tls.InternalServerError) => true + } + ) + ) + + def apply[A, B](name: String, ctx: Context): Builder[A, B] = + toFailureRetryPolicy(name, ctx) + } + + implicit val warmup: Warmup[TimelineService] = + Warmup[TimelineService]("timelineservice")(_.ping()) +} + +trait TimelineService { + import TimelineService._ + val processEvent2: ProcessEvent2 + val getStatusTimeline: GetStatusTimeline + val getPerspectives: GetPerspectives + def ping(): Future[Unit] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/UserImageService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/UserImageService.scala new file mode 100644 index 000000000..e756d5202 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/UserImageService.scala @@ -0,0 +1,71 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.finagle.service.RetryPolicy +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.util.RetryPolicyBuilder +import com.twitter.user_image_service.thriftscala.ProcessTweetMediaRequest +import com.twitter.user_image_service.thriftscala.ProcessTweetMediaResponse +import com.twitter.user_image_service.thriftscala.UpdateProductMetadataRequest +import com.twitter.user_image_service.thriftscala.UpdateTweetMediaRequest +import com.twitter.user_image_service.thriftscala.UpdateTweetMediaResponse +import com.twitter.user_image_service.{thriftscala => uis} + +object UserImageService { + import Backend._ + + type ProcessTweetMedia = FutureArrow[uis.ProcessTweetMediaRequest, uis.ProcessTweetMediaResponse] + type UpdateProductMetadata = FutureArrow[uis.UpdateProductMetadataRequest, Unit] + type UpdateTweetMedia = FutureArrow[uis.UpdateTweetMediaRequest, uis.UpdateTweetMediaResponse] + + def fromClient(client: uis.UserImageService.MethodPerEndpoint): UserImageService = + new UserImageService { + val processTweetMedia = FutureArrow(client.processTweetMedia) + val updateProductMetadata: FutureArrow[UpdateProductMetadataRequest, Unit] = FutureArrow( + client.updateProductMetadata).unit + val updateTweetMedia = FutureArrow(client.updateTweetMedia) + } + + case class Config( + processTweetMediaTimeout: Duration, + updateTweetMediaTimeout: Duration, + timeoutBackoffs: Stream[Duration]) { + + def apply(svc: UserImageService, ctx: Backend.Context): UserImageService = + new UserImageService { + val processTweetMedia: FutureArrow[ProcessTweetMediaRequest, ProcessTweetMediaResponse] = + policy("processTweetMedia", processTweetMediaTimeout, ctx)(svc.processTweetMedia) + val updateProductMetadata: FutureArrow[UpdateProductMetadataRequest, Unit] = + policy("updateProductMetadata", processTweetMediaTimeout, ctx)(svc.updateProductMetadata) + val updateTweetMedia: FutureArrow[UpdateTweetMediaRequest, UpdateTweetMediaResponse] = + policy("updateTweetMedia", updateTweetMediaTimeout, ctx)(svc.updateTweetMedia) + } + + private[this] def policy[A, B]( + name: String, + requestTimeout: Duration, + ctx: Context + ): Builder[A, B] = + defaultPolicy( + name = name, + requestTimeout = requestTimeout, + retryPolicy = retryPolicy, + ctx = ctx, + exceptionCategorizer = { + case _: uis.BadRequest => Some("success") + case _ => None + } + ) + + private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = + RetryPolicyBuilder.timeouts[Any](timeoutBackoffs) + } +} + +trait UserImageService { + import UserImageService._ + + val processTweetMedia: ProcessTweetMedia + val updateProductMetadata: UpdateProductMetadata + val updateTweetMedia: UpdateTweetMedia +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Warmup.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Warmup.scala new file mode 100644 index 000000000..06c61934e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Warmup.scala @@ -0,0 +1,266 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.concurrent.AsyncSemaphore +import com.twitter.util.Timer +import com.twitter.util.Promise +import scala.util.control.NoStackTrace + +/** + * Tools for building warmup actions on backend clients. The basic + * idea is to make requests to backends repeatedly until they succeed. + */ +object Warmup { + + /** + * Signals that a warmup action was aborted because warmup is + * complete. + */ + object WarmupComplete extends Exception with NoStackTrace + + /** + * Configuration for warmup actions. + * + * @param maxOutstandingRequests: Limit on total number of outstanding warmup requests. + * @param maxWarmupDuration: Total amount of time warmup is allowed to take. + * @param requestTimeouts: Time limit for individual warmup actions. + * @param reliability: Criteria for how many times each warmup should be run. + */ + case class Settings( + maxOutstandingRequests: Int, + maxWarmupDuration: Duration, + requestTimeouts: Map[String, Duration], + reliability: Reliably) { + def toRunner(logger: Logger, timer: Timer): Runner = + new WithTimeouts(requestTimeouts, timer) + .within(new Logged(logger)) + .within(new LimitedConcurrency(maxOutstandingRequests)) + .within(reliability) + + def apply[A: Warmup](value: A, logger: Logger, timer: Timer): Future[Unit] = + toRunner(logger, timer) + .run(value) + .raiseWithin(maxWarmupDuration)(timer) + .handle { case _ => } + } + + /** + * Strategy for running Warmup actions. + */ + trait Runner { self => + + /** + * Run one single warmup action. + */ + def runOne(name: String, action: => Future[Unit]): Future[Unit] + + /** + * Compose these two Runners by calling this Runner's runOne + * inside of other's runOne. + */ + final def within(other: Runner): Runner = + new Runner { + override def runOne(name: String, action: => Future[Unit]): Future[Unit] = + other.runOne(name, self.runOne(name, action)) + } + + /** + * Execute all of the warmup actions for the given value using + * this runner. + */ + final def run[T](t: T)(implicit w: Warmup[T]): Future[Unit] = + Future.join(w.actions.toSeq.map { case (name, f) => runOne(name, f(t).unit) }) + } + + /** + * Set a ceiling on the amount of time each kind of warmup action is + * allowed to take. + */ + class WithTimeouts(timeouts: Map[String, Duration], timer: Timer) extends Runner { + override def runOne(name: String, action: => Future[Unit]): Future[Unit] = + timeouts.get(name).map(action.raiseWithin(_)(timer)).getOrElse(action) + } + + /** + * Execute each action until its reliability is estimated to be + * above the given threshold. The reliability is initially assumed + * to be zero. The reliability is estimated as an exponential moving + * average, with the new data point given the appropriate weight so + * that a single data point will no longer be able to push the + * average below the threshold. + * + * The warmup action is considered successful if it does not throw + * an exception. No timeouts are applied. + * + * The threshold must be in the interval [0, 1). + * + * The concurrency level determines how many outstanding requests + * to maintain until the threshold is reached. This allows warmup + * to happen more rapidly when individual requests have high + * latency. + * + * maxAttempts limits the total number of tries that we are allowed + * to try to reach the reliability threshold. This is a safety + * measure to prevent overloading whatever subsystem we are + * attempting to warm up. + */ + case class Reliably(reliabilityThreshold: Double, concurrency: Int, maxAttempts: Int) + extends Runner { + require(reliabilityThreshold < 1) + require(reliabilityThreshold >= 0) + require(concurrency > 0) + require(maxAttempts > 0) + + // Find the weight at which one failure will not push us under the + // reliabilityThreshold. + val weight: Double = 1 - math.pow( + 1 - reliabilityThreshold, + (1 - reliabilityThreshold) / reliabilityThreshold + ) + + // Make sure that rounding error did not cause weight to become zero. + require(weight > 0) + require(weight <= 1) + + // On each iteration, we discount the current reliability by this + // factor before adding in the new reliability data point. + val decay: Double = 1 - weight + + // Make sure that rounding error did not cause decay to be zero. + require(decay < 1) + + override def runOne(name: String, action: => Future[Unit]): Future[Unit] = { + def go(attempts: Int, reliability: Double, outstanding: Seq[Future[Unit]]): Future[Unit] = + if (reliability >= reliabilityThreshold || (attempts == 0 && outstanding.isEmpty)) { + // We hit the threshold or ran out of tries. Don't cancel any + // outstanding requests, just wait for them all to complete. + Future.join(outstanding.map(_.handle { case _ => })) + } else if (attempts > 0 && outstanding.length < concurrency) { + // We have not yet hit the reliability threshold, and we + // still have available concurrency, so make a new request. + go(attempts - 1, reliability, action +: outstanding) + } else { + val sel = Future.select(outstanding) + + // We need this promise wrapper because if the select is + // interrupted, it relays the interrupt to the outstanding + // requests but does not itself return with a + // failure. Wrapping in a promise lets us differentiate + // between an interrupt coming from above and the created + // Future failing for another reason. + val p = new Promise[(Try[Unit], Seq[Future[Unit]])] + p.setInterruptHandler { + case e => + // Interrupt the outstanding requests. + sel.raise(e) + // Halt the computation with a failure. + p.updateIfEmpty(Throw(e)) + } + + // When the select finishes, update the promise with the value. + sel.respond(p.updateIfEmpty) + p.flatMap { + case (tryRes, remaining) => + val delta = if (tryRes.isReturn) weight else 0 + go(attempts, reliability * decay + delta, remaining) + } + } + + go(maxAttempts, 0, Seq.empty) + } + } + + /** + * Write a log message recording each invocation of each warmup + * action. The log message is comma-separated, with the following + * fields: + * + * name: + * The supplied name. + * + * start time: + * The number of milliseconds since the start of the Unix + * epoch. + * + * duration: + * How long this warmup action took, in milliseconds. + * + * result: + * "passed" or "failed" depending on whether the Future + * returned an exception. + * + * exception type: + * If the result "failed", then this will be the name of + * the exception that casued the failure. If it "passed", + * it will be the empty string. + * + * These messages should be sufficient to get a picture of how + * warmup proceeded, since they allow the messages to be ordered + * and sorted by type. You can use this information to tune the + * warmup parameters. + */ + class Logged(logger: Logger) extends Runner { + override def runOne(name: String, action: => Future[Unit]): Future[Unit] = { + val start = Time.now + val startStr = start.sinceEpoch.inMilliseconds.toString + + action.respond { + case Throw(WarmupComplete) => + // Don't log anything for computations that we abandoned + // because warmup is complete. + + case r => + val duration = (Time.now - start).inMilliseconds + val result = r match { + case Throw(e) => "failed," + e.toString.takeWhile(_ != '\n') + case _ => "passed," + } + logger.info(s"$name,${startStr}ms,${duration}ms,$result") + } + } + } + + /** + * Ensure that no more than the specified number of invocations of a + * warmup action are happening at one time. + */ + class LimitedConcurrency(limit: Int) extends Runner { + private[this] val sem = new AsyncSemaphore(limit) + override def runOne(name: String, action: => Future[Unit]): Future[Unit] = + sem.acquireAndRun(action) + } + + /** + * Create a new Warmup that performs this single action. + */ + def apply[A](name: String)(f: A => Future[_]): Warmup[A] = new Warmup(Map(name -> f)) + + /** + * Create a Warmup that does nothing. This is useful in concert with + * warmField. + */ + def empty[A]: Warmup[A] = new Warmup[A](Map.empty) +} + +/** + * A set of independent warmup actions. Each action should be the + * minimum work that can be done in order to exercise a code + * path. Runners can be used to e.g. run the actions repeatedly or + * with timeouts. + */ +class Warmup[A](val actions: Map[String, A => Future[_]]) { + def ++(other: Warmup[A]) = new Warmup[A](actions ++ other.actions) + + /** + * The names of the individual warmup actions that this warmup is + * composed of. + */ + def names: Set[String] = actions.keySet + + /** + * Create a new Warmup that does all of the actions of this warmup + * and additionally does warmup on the value specified by `f`. + */ + def warmField[B](f: A => B)(implicit w: Warmup[B]): Warmup[A] = + new Warmup[A](actions ++ (w.actions.mapValues(f.andThen))) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BUILD new file mode 100644 index 000000000..7dc0a6379 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BUILD @@ -0,0 +1,135 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/fasterxml/jackson/dataformat:jackson-dataformat-yaml", + "3rdparty/jvm/com/fasterxml/jackson/module:jackson-module-scala", + "3rdparty/jvm/io/netty:netty4-tcnative-boringssl-static", + "3rdparty/jvm/org/apache/kafka:kafka-clients", + "3rdparty/jvm/org/apache/thrift:libthrift", + "ads-common/loggingclient/src/main/scala", + "core-app-services/failed_task/src/scala/com/twitter/coreservices/failed_task/writer", + "creatives-container/thrift/src/main/thrift:creatives-container-service-scala", + "decider", + "deferredrpc/client", + "deferredrpc/client/src/main/thrift:thrift-scala", + "eventbus/client", + "fanoutservice/thrift/src/main/thrift:thrift-scala", + "featureswitches/featureswitches-core:v2", + "featureswitches/featureswitches-core/src/main/scala", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authorization", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authorization/server", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/client", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/server", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/transport", + "finagle/finagle-core/src/main", + "finagle/finagle-http/src/main/scala", + "finagle/finagle-memcached/src/main/scala", + "finagle/finagle-mux/src/main/scala", + "finagle/finagle-stats", + "finagle/finagle-thrift", + "finagle/finagle-thrift/src/main/java", + "finagle/finagle-thriftmux", + "finatra-internal/kafka/src/main/scala/com/twitter/finatra/kafka/producers", + "finatra/inject/inject-slf4j/src/main/scala/com/twitter/inject", + "flock-client/src/main/scala", + "flock-client/src/main/thrift:thrift-scala", + "geoduck/service/src/main/scala/com/twitter/geoduck/service/common/clientmodules", + "geoduck/util/src/main/scala/com/twitter/geoduck/util/service", + "kafka/finagle-kafka/finatra-kafka/src/main/scala", + "limiter-client", + "limiter/thrift-only/src/main/thrift:thrift-scala", + "mediaservices/mediainfo-server/thrift/src/main/thrift:thrift-scala", + "passbird/thrift-only/src/main/thrift:thrift-scala", + "quill/capture", + "quill/core/src/main/thrift:thrift-scala", + "scrooge/scrooge-core", + "tweetypie/servo/repo/src/main/scala", + "tweetypie/servo/repo/src/main/thrift:thrift-scala", + "tweetypie/servo/request/src/main/scala", + "tweetypie/servo/util", + "snowflake:client", + "snowflake/src/main/scala/com/twitter/snowflake/id", + "snowflake/src/main/thrift:thrift-scala", + "src/scala/com/twitter/ads/internal/pcl/service", + "src/scala/com/twitter/search/blender/services/strato", + "src/thrift/com/twitter/ads/adserver:adserver_rpc-scala", + "src/thrift/com/twitter/context:twitter-context-scala", + "src/thrift/com/twitter/escherbird:annotation-service-scala", + "src/thrift/com/twitter/escherbird/metadata:metadata-service-scala", + "src/thrift/com/twitter/expandodo:cards-scala", + "src/thrift/com/twitter/expandodo:only-scala", + "src/thrift/com/twitter/geoduck:geoduck-scala", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/merlin:thrift-scala", + "src/thrift/com/twitter/service/scarecrow/gen:scarecrow-scala", + "src/thrift/com/twitter/service/talon/gen:thrift-scala", + "src/thrift/com/twitter/socialgraph:thrift-scala", + "src/thrift/com/twitter/spam/rtf:tweet-rtf-event-scala", + "src/thrift/com/twitter/timelineservice:thrift-scala", + "src/thrift/com/twitter/timelineservice/server/internal:thrift-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:delete_location_data-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:events-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:stored-tweet-info-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet_comparison_service-scala", + "stitch/stitch-core", + "stitch/stitch-repo/src/main/scala", + "stitch/stitch-timelineservice/src/main/scala", + "storage/clients/manhattan/client/src/main/scala", + "strato/src/main/scala/com/twitter/strato/catalog", + "strato/src/main/scala/com/twitter/strato/client", + "strato/src/main/scala/com/twitter/strato/fed/server", + "strato/src/main/scala/com/twitter/strato/rpc", + "strato/src/main/scala/com/twitter/strato/server", + "strato/src/main/scala/com/twitter/strato/util", + "stringcenter/client/src/main/scala", + "tweetypie/server/config", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/handler", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/media", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/repository", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/service", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/store", + "tweetypie/server/src/main/thrift:compiled-scala", + "tweetypie/common/src/scala/com/twitter/tweetypie/caching", + "tweetypie/common/src/scala/com/twitter/tweetypie/client_id", + "tweetypie/common/src/scala/com/twitter/tweetypie/context", + "tweetypie/common/src/scala/com/twitter/tweetypie/decider", + "tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie", + "tweetypie/common/src/scala/com/twitter/tweetypie/matching", + "tweetypie/common/src/scala/com/twitter/tweetypie/media", + "tweetypie/common/src/scala/com/twitter/tweetypie/storage", + "tweetypie/common/src/scala/com/twitter/tweetypie/tflock", + "tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala", + "tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities", + "tweetypie/common/src/scala/com/twitter/tweetypie/tweettext", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "twitter-config/yaml", + "twitter-context", + "twitter-server-internal", + "twitter-server/server/src/main/scala", + "user-image-service/thrift/src/main/thrift:thrift-scala", + "util/util-app", + "util/util-hashing/src/main/scala", + "util/util-slf4j-api/src/main/scala", + "util/util-stats/src/main/scala", + "visibility/common/src/main/scala/com/twitter/visibility/common", + "visibility/common/src/main/scala/com/twitter/visibility/common/tflock", + "visibility/lib:tweets", + "visibility/lib/src/main/scala/com/twitter/visibility/util", + "visibility/writer/src/main/scala/com/twitter/visibility/writer", + "visibility/writer/src/main/scala/com/twitter/visibility/writer/interfaces/tweets", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BackendClients.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BackendClients.scala new file mode 100644 index 000000000..044177438 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BackendClients.scala @@ -0,0 +1,796 @@ +package com.twitter.tweetypie +package config + +import com.twitter.ads.internal.pcl.service.CallbackPromotedContentLogger +import com.twitter.ads.loggingclient.AdsLoggingClient +import com.twitter.adserver.thriftscala.AdCallbackEvent +import com.twitter.conversions.DurationOps._ +import com.twitter.conversions.PercentOps._ +import com.twitter.container.{thriftscala => ccs} +import com.twitter.deferredrpc.client.DeferredThriftService +import com.twitter.deferredrpc.thrift.Datacenter +import com.twitter.deferredrpc.thrift.DeferredRPC +import com.twitter.deferredrpc.thrift.Target +import com.twitter.escherbird.thriftscala.TweetEntityAnnotationService$FinagleClient +import com.twitter.escherbird.thriftscala.{ + TweetEntityAnnotationService => TweetEntityAnnotationScroogeIface +} +import com.twitter.eventbus.client.EventBusPublisher +import com.twitter.eventbus.client.EventBusPublisherBuilder +import com.twitter.expandodo.thriftscala.CardsService$FinagleClient +import com.twitter.expandodo.thriftscala.{CardsService => CardsScroogeIface} +import com.twitter.finagle._ +import com.twitter.finagle.builder.ClientBuilder +import com.twitter.finagle.client.Transporter +import com.twitter.finagle.factory.TimeoutFactory +import com.twitter.finagle.liveness.FailureAccrualFactory +import com.twitter.finagle.loadbalancer.Balancers +import com.twitter.finagle.mtls.authentication.EmptyServiceIdentifier +import com.twitter.finagle.mtls.client.MtlsClientBuilder._ +import com.twitter.finagle.mtls.client.MtlsStackClient._ +import com.twitter.finagle.partitioning.param +import com.twitter.finagle.service.TimeoutFilter.PropagateDeadlines +import com.twitter.finagle.service._ +import com.twitter.finagle.ssl.OpportunisticTls +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.thrift.ThriftClientRequest +import com.twitter.finagle.thriftmux.MethodBuilder +import com.twitter.finagle.tracing.DefaultTracer +import com.twitter.flockdb.client.thriftscala.FlockDB +import com.twitter.flockdb.client.FlockResponse +import com.twitter.flockdb.client.TFlockClient +import com.twitter.flockdb.client.UserTimelineGraph +import com.twitter.geoduck.backend.hydration.thriftscala.{Hydration => GeoduckHydration} +import com.twitter.geoduck.backend.relevance.thriftscala.Relevance +import com.twitter.geoduck.backend.relevance.thriftscala.Relevance$FinagleClient +import com.twitter.geoduck.backend.relevance.thriftscala.RelevanceContext +import com.twitter.geoduck.service.common.clientmodules.GeoduckGeohashLocate +import com.twitter.geoduck.thriftscala.ReverseGeocoder +import com.twitter.geoduck.util.service.GeoduckLocate +import com.twitter.gizmoduck.thriftscala.UserService +import com.twitter.hashing.KeyHasher +import com.twitter.limiter.client.LimiterClientFactory +import com.twitter.mediainfo.server.thriftscala.MediaInfoService$FinagleClient +import com.twitter.mediainfo.server.thriftscala.{MediaInfoService => MediaInfoScroogeIface} +import com.twitter.merlin.thriftscala.UserRolesService +import com.twitter.passbird.thriftscala.PassbirdService +import com.twitter.passbird.thriftscala.PassbirdService$FinagleClient +import com.twitter.service.gen.scarecrow.thriftscala.ScarecrowService$FinagleClient +import com.twitter.service.gen.scarecrow.thriftscala.{ScarecrowService => ScarecrowScroogeIface} +import com.twitter.service.talon.thriftscala.Talon$FinagleClient +import com.twitter.service.talon.thriftscala.{Talon => TalonScroogeIface} +import com.twitter.snowflake.client.SnowflakeClient +import com.twitter.snowflake.thriftscala.Snowflake +import com.twitter.socialgraph.thriftscala.SocialGraphService$FinagleClient +import com.twitter.socialgraph.thriftscala.{SocialGraphService => SocialGraphScroogeIface} +import com.twitter.storage.client.manhattan.kv.Experiments +import com.twitter.storage.client.manhattan.kv.ManhattanKVClient +import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams +import com.twitter.storage.client.manhattan.kv.NoMtlsParams +import com.twitter.strato.client.Strato +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.timelineservice.fanout.thriftscala.FanoutService +import com.twitter.timelineservice.fanout.thriftscala.FanoutService$FinagleClient +import com.twitter.timelineservice.{thriftscala => tls} +import com.twitter.tweetypie.backends._ +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.media.MediaClient +import com.twitter.tweetypie.service.ReplicatingTweetService.GatedReplicationClient +import com.twitter.tweetypie.storage.ManhattanTweetStorageClient +import com.twitter.tweetypie.storage.TweetStorageClient +import com.twitter.tweetypie.store._ +import com.twitter.tweetypie.thriftscala.DeleteLocationData +import com.twitter.tweetypie.thriftscala.RetweetArchivalEvent +import com.twitter.tweetypie.thriftscala.TweetEvent +import com.twitter.tweetypie.thriftscala.TweetServiceInternal$FinagleClient +import com.twitter.user_image_service.thriftscala.UserImageService$FinagleClient +import com.twitter.user_image_service.thriftscala.{UserImageService => UserImageScroogeIface} +import com.twitter.util.Throw +import com.twitter.util.Timer +import com.twitter.util.{TimeoutException => UtilTimeoutException} +import scala.util.Random + +trait BackendClients { + + /** returns all the finagle.Names created while building clients */ + def referencedNames: Seq[Name] + + val asyncRetryTweetService: ThriftTweetService + val asyncTweetDeletionService: ThriftTweetService + val asyncTweetService: ThriftTweetService + val configBus: ConfigBus + val creativesContainerService: CreativesContainerService + val darkTrafficClient: Service[Array[Byte], Array[Byte]] + val deleteLocationDataPublisher: EventBusPublisher[DeleteLocationData] + val escherbird: Escherbird + val expandodo: Expandodo + val fanoutServiceClient: FanoutService.MethodPerEndpoint + val geoHydrationLocate: GeoduckLocate + val geoRelevance: Relevance.MethodPerEndpoint + val geoScrubEventStore: GeoScrubEventStore + val geoduckGeohashLocate: GeoduckGeohashLocate + val gizmoduck: Gizmoduck + val gnipEnricherator: GnipEnricherator + val guano: Guano + val limiterService: LimiterService + val lowQoSReplicationClients: Seq[GatedReplicationClient] + val mediaClient: MediaClient + val mediaInfoService: MediaInfoService + val memcacheClient: memcached.Client + val merlin: UserRolesService.MethodPerEndpoint + val passbirdClient: PassbirdService.MethodPerEndpoint + val replicationClient: ThriftTweetService + val retweetArchivalEventPublisher: EventBusPublisher[RetweetArchivalEvent] + val scarecrow: Scarecrow + val snowflakeClient: SnowflakeClient.SnowflakeClient + val socialGraphService: SocialGraphService + val stratoserverClient: StratoClient + val talon: Talon + val tflockReadClient: TFlockClient + val tflockWriteClient: TFlockClient + val timelineService: TimelineService + val tweetEventsPublisher: EventBusPublisher[TweetEvent] + val tweetStorageClient: TweetStorageClient + val userImageService: UserImageService + val callbackPromotedContentLogger: CallbackPromotedContentLogger +} + +/** + * default implementation of BackendClients that connects to real, remote + * backend services. + */ +object BackendClients { + // for most services, tweetypie typically maintains only a single connection to + // each host in the cluster, and that is enough for normal steady-state work. + // to prevent ddos'ing backends during unusual traffic influxes, we set the host + // connection limit to be 2-3x the steady-state daily peak, giving plenty of head + // room but without allowing an excessive number of connections. + private val defaultHostConnectionLimit = 3 + + // 100ms is greater than most gc pauses; smaller values cause more timeouts + private val defaultConnectTimeout = 100.milliseconds + // tcpConnect timeout is less than half of defaultConnectTimeout, to allow at least + // two tries (except when there is a GC pause) + private val defaultTcpConnectTimeout = 20.milliseconds + + private val WriteExceptionsOnly: PartialFunction[Try[Nothing], Boolean] = + RetryPolicy.WriteExceptionsOnly + + private val ClosedExceptionsOnly: PartialFunction[Try[Nothing], Boolean] = { + case Throw(_: ChannelClosedException) => true + } + + private val TimeoutExceptionsOnly: PartialFunction[Try[Nothing], Boolean] = { + case Throw(_: TimeoutException) => true + case Throw(_: UtilTimeoutException) => true + } + + private val NoBackoff = Backoff.const(0.second) + + private def retry(writeExceptions: Int = 100, closedExceptions: Int = 2, timeouts: Int = 0) = + RetryPolicy.combine( + RetryPolicy.backoff(NoBackoff.take(writeExceptions))(WriteExceptionsOnly), + RetryPolicy.backoff(NoBackoff.take(closedExceptions))(ClosedExceptionsOnly), + RetryPolicy.backoff(NoBackoff.take(timeouts))(TimeoutExceptionsOnly) + ) + + implicit val warmup: Warmup[BackendClients] = { + // Use a random string so that the keys are likely to hash to + // different memcache instances. Request multiple keys at a time so + // that we don't consider the backend warm just because we can get a + // bunch of successful responses to one cache. + val cacheGet = (_: memcached.Client).get(Seq.fill(20)(Random.nextLong.toString)) + + Warmup + .empty[BackendClients] + .warmField(_.expandodo) + .warmField(_.gizmoduck) + .warmField(_.memcacheClient)(Warmup("memcache")(cacheGet)) + .warmField(_.talon) + .warmField(_.tweetStorageClient)(Warmup("tweetstorage")(_.ping())) + .warmField(_.tflockReadClient)(Warmup("tflock")(_.contains(UserTimelineGraph, 0, 0))) + .warmField(_.scarecrow) + .warmField(_.socialGraphService) + .warmField(_.timelineService) + .warmField(_.geoRelevance)(Warmup("geo_relevance")(_.placeSearch(RelevanceContext()))) + } + + def apply( + settings: TweetServiceSettings, + deciderGates: TweetypieDeciderGates, + statsReceiver: StatsReceiver, + hostStatsReceiver: StatsReceiver, + timer: Timer, + clientIdHelper: ClientIdHelper, + ): BackendClients = { + val thriftClientId = settings.thriftClientId + val tracer = DefaultTracer + + val env = settings.env.toString + val zone = settings.zone + val log = Logger(getClass) + val backendsScope = statsReceiver.scope("backends") + + /** a Seq builder of finagle.Names loaded via getName */ + val referencedNamesBuilder = Seq.newBuilder[Name] + + /** the default set of exceptions we believe are safe for Tweetypie to retry */ + val defaultResponseClassifier: ResponseClassifier = + ResponseClassifier.RetryOnChannelClosed.orElse(ResponseClassifier.RetryOnTimeout) + + /** + * Resolve a string into a Finagle Name and record it + * in referencedNames. + */ + def eval(address: String): Name = { + val name = Resolver.eval(address) + referencedNamesBuilder += name + name + } + + def backendContext(name: String) = + Backend.Context(timer, backendsScope.scope(name)) + + // by default, retries on most exceptions (see defaultRetryExceptions). if an rpc is not + // idempotent, it should use a different retry policy. + def clientBuilder(name: String) = { + ClientBuilder() + .name(name) + .reportTo(statsReceiver) + .reportHostStats(hostStatsReceiver) + .tracer(tracer) + .daemon(true) + .tcpConnectTimeout(defaultTcpConnectTimeout) + .connectTimeout(defaultConnectTimeout) + .retryPolicy(retry()) + } + + def thriftMuxClientBuilder(name: String, address: String, clazz: Class[_]) = { + clientBuilder(name) + .stack( + ThriftMux.client + .withClientId(thriftClientId) + .withOpportunisticTls(OpportunisticTls.Required) + .withServiceClass(clazz)) + .loadBalancer(balancer()) + .dest(eval(address)) + .mutualTls(settings.serviceIdentifier) + } + + // Our base ThriftMux.Client + // Prefer using thriftMuxMethodBuilder below but + // can be used to build custom clients (re: darkTrafficClient) + def thriftMuxClient(name: String, propagateDeadlines: Boolean = true): ThriftMux.Client = { + ThriftMux.client + .withClientId(thriftClientId) + .withLabel(name) + .withStatsReceiver(statsReceiver) + .withTracer(tracer) + .withTransport.connectTimeout(defaultTcpConnectTimeout) + .withSession.acquisitionTimeout(defaultConnectTimeout) + .withMutualTls(settings.serviceIdentifier) + .withOpportunisticTls(OpportunisticTls.Required) + .configured(PropagateDeadlines(enabled = propagateDeadlines)) + } + + // If an endpoint is non-idempotent you should add .nonidempotent and + // leave off any ResponseClassifiers (it will remove any placed before but not after) + // If it is unequivocally idempotent you should add .idempotent and + // leave off any ResponseClassifiers (it will retry on all Throws). This will also + // enable backup requests + def thriftMuxMethodBuilder( + name: String, + dest: String, + ): MethodBuilder = { + thriftMuxClient(name) + .withLoadBalancer(balancer(minAperture = 2)) + .methodBuilder(dest) + .withRetryForClassifier(defaultResponseClassifier) + .withTimeoutTotal(2.seconds) // total timeout including 1st attempt and up to 2 retries + } + + def balancer(minAperture: Int = 2) = Balancers.aperture(minAperture = minAperture) + + val eventBusPublisherBuilder = + EventBusPublisherBuilder() + .dest(eval("/s/eventbus/provisioning")) + .clientId(settings.thriftClientId) + // eventbus stats are further scoped by stream, so put all + // publishers under the same stats namespace + .statsReceiver(backendsScope.scope("event_bus")) + // This makes the underlying kps-client to be resolved over WilyNs vs DNS + .serviceIdentifier(settings.serviceIdentifier) + + new BackendClients { + def referencedNames: Seq[Name] = referencedNamesBuilder.result() + + val memcacheClient: memcached.Client = + Memcached.client + .withMutualTls(settings.serviceIdentifier) + .connectionsPerEndpoint(2) + .configured(param.KeyHasher(KeyHasher.FNV1_32)) + .configured(Transporter.ConnectTimeout(100.milliseconds)) + .configured(TimeoutFilter.Param(200.milliseconds)) + .configured(TimeoutFactory.Param(200.milliseconds)) + .configured(param.EjectFailedHost(false)) + .configured(FailureAccrualFactory.Param(numFailures = 20, markDeadFor = 30.second)) + .configured( + PendingRequestFilter.Param(limit = Some(settings.cacheClientPendingRequestLimit)) + ) + .filtered(new MemcacheExceptionLoggingFilter) + .newRichClient(dest = eval(settings.twemcacheDest), label = "memcache") + + /* clients */ + val tweetStorageClient: TweetStorageClient = + Manhattan.fromClient( + new ManhattanTweetStorageClient( + settings.tweetStorageConfig, + statsReceiver = backendsScope.scope("tweet_storage"), + clientIdHelper = clientIdHelper, + ) + ) + + val socialGraphService: SocialGraphService = { + val finagleClient = + new SocialGraphService$FinagleClient( + thriftMuxClientBuilder( + "socialgraph", + "/s/socialgraph/socialgraph", + classOf[SocialGraphScroogeIface.MethodPerEndpoint] + ).loadBalancer(Balancers.aperturePeakEwma(minAperture = 16)) + .build() + ) + + settings.socialGraphSeviceConfig( + SocialGraphService.fromClient(finagleClient), + backendContext("socialgraph") + ) + } + + val tflockClient = + new FlockDB.FinagledClient( + thriftMuxClientBuilder("tflock", "/s/tflock/tflock", classOf[FlockDB.MethodPerEndpoint]) + .loadBalancer(balancer(minAperture = 5)) + .responseClassifier(FlockResponse.classifier) + .build(), + serviceName = "tflock", + stats = statsReceiver + ) + + val tflockReadClient: TFlockClient = + settings.tflockReadConfig(tflockClient, backendContext("tflock")) + + val tflockWriteClient: TFlockClient = + settings.tflockWriteConfig(tflockClient, backendContext("tflock")) + + val gizmoduck: Gizmoduck = { + val clientBuilder = + thriftMuxClientBuilder( + "gizmoduck", + "/s/gizmoduck/gizmoduck", + classOf[UserService.MethodPerEndpoint]) + .loadBalancer(balancer(minAperture = 63)) + val mb = MethodBuilder + .from(clientBuilder) + .idempotent(maxExtraLoad = 1.percent) + .servicePerEndpoint[UserService.ServicePerEndpoint] + + val gizmoduckClient = ThriftMux.Client.methodPerEndpoint(mb) + settings.gizmoduckConfig(Gizmoduck.fromClient(gizmoduckClient), backendContext("gizmoduck")) + } + + val merlin: UserRolesService.MethodPerEndpoint = { + val thriftClient = thriftMuxMethodBuilder("merlin", "/s/merlin/merlin") + .withTimeoutPerRequest(100.milliseconds) + .withTimeoutTotal(400.milliseconds) + .idempotent(0.01) + .servicePerEndpoint[UserRolesService.ServicePerEndpoint] + + ThriftMux.Client.methodPerEndpoint(thriftClient) + } + + val talon: Talon = { + val talonClient = + new Talon$FinagleClient( + thriftMuxClientBuilder( + "talon", + "/s/talon/backend", + classOf[TalonScroogeIface.MethodPerEndpoint]) + .build() + ) + + settings.talonConfig(Talon.fromClient(talonClient), backendContext("talon")) + } + + val guano = Guano() + + val mediaInfoService: MediaInfoService = { + val finagleClient = + new MediaInfoService$FinagleClient( + thriftMuxClientBuilder( + "mediainfo", + "/s/photurkey/mediainfo", + classOf[MediaInfoScroogeIface.MethodPerEndpoint]) + .loadBalancer(balancer(minAperture = 75)) + .build() + ) + + settings.mediaInfoServiceConfig( + MediaInfoService.fromClient(finagleClient), + backendContext("mediainfo") + ) + } + + val userImageService: UserImageService = { + val finagleClient = + new UserImageService$FinagleClient( + thriftMuxClientBuilder( + "userImage", + "/s/user-image-service/uis", + classOf[UserImageScroogeIface.MethodPerEndpoint]) + .build() + ) + + settings.userImageServiceConfig( + UserImageService.fromClient(finagleClient), + backendContext("userImage") + ) + } + + val mediaClient: MediaClient = + MediaClient.fromBackends( + userImageService = userImageService, + mediaInfoService = mediaInfoService + ) + + val timelineService: TimelineService = { + val timelineServiceClient = + new tls.TimelineService$FinagleClient( + thriftMuxClientBuilder( + "timelineService", + "/s/timelineservice/timelineservice", + classOf[tls.TimelineService.MethodPerEndpoint]) + .loadBalancer(balancer(minAperture = 13)) + .build() + ) + + settings.timelineServiceConfig( + TimelineService.fromClient(timelineServiceClient), + backendContext("timelineService") + ) + } + + val expandodo: Expandodo = { + val cardsServiceClient = + new CardsService$FinagleClient( + thriftMuxClientBuilder( + "expandodo", + "/s/expandodo/server", + classOf[CardsScroogeIface.MethodPerEndpoint]) + .loadBalancer(balancer(minAperture = 6)) + .build() + ) + + settings.expandodoConfig( + Expandodo.fromClient(cardsServiceClient), + backendContext("expandodo") + ) + } + + val creativesContainerService: CreativesContainerService = { + val mb = thriftMuxMethodBuilder( + "creativesContainerService", + "/s/creatives-container/creatives-container", + ).withTimeoutTotal(300.milliseconds) + .idempotent(maxExtraLoad = 1.percent) + .servicePerEndpoint[ccs.CreativesContainerService.ServicePerEndpoint] + + settings.creativesContainerServiceConfig( + CreativesContainerService.fromClient(ccs.CreativesContainerService.MethodPerEndpoint(mb)), + backendContext("creativesContainerService") + ) + } + + val scarecrow: Scarecrow = { + val scarecrowClient = new ScarecrowService$FinagleClient( + thriftMuxClientBuilder( + "scarecrow", + "/s/abuse/scarecrow", + classOf[ScarecrowScroogeIface.MethodPerEndpoint]) + .loadBalancer(balancer(minAperture = 6)) + .build(), + serviceName = "scarecrow", + stats = statsReceiver + ) + + settings.scarecrowConfig(Scarecrow.fromClient(scarecrowClient), backendContext("scarecrow")) + } + + val snowflakeClient: Snowflake.MethodPerEndpoint = { + eval("/s/snowflake/snowflake") // eagerly resolve the serverset + val mb = thriftMuxMethodBuilder( + "snowflake", + "/s/snowflake/snowflake" + ).withTimeoutTotal(300.milliseconds) + .withTimeoutPerRequest(100.milliseconds) + .idempotent(maxExtraLoad = 1.percent) + + SnowflakeClient.snowflakeClient(mb) + } + + val deferredRpcClient = + new DeferredRPC.FinagledClient( + thriftMuxClientBuilder( + "deferredrpc", + "/s/kafka-shared/krpc-server-main", + classOf[DeferredRPC.MethodPerEndpoint]) + .requestTimeout(200.milliseconds) + .retryPolicy(retry(timeouts = 3)) + .build(), + serviceName = "deferredrpc", + stats = statsReceiver + ) + + def deferredTweetypie(target: Target): ThriftTweetService = { + // When deferring back to the local datacenter, preserve the finagle + // context and dtabs. This will ensure that developer dtabs are honored + // and that context is preserved in eventbus. (eventbus enqueues only + // happen in async requests within the same datacenter.) + // + // Effectively, this means we consider deferredrpc requests within the + // same datacenter to be part of the same request, but replicated + // requests are not. + val isLocal: Boolean = target.datacenter == Datacenter.Local + + val deferredThriftService: Service[ThriftClientRequest, Array[Byte]] = + new DeferredThriftService( + deferredRpcClient, + target, + serializeFinagleContexts = isLocal, + serializeFinagleDtabs = isLocal + ) + + new TweetServiceInternal$FinagleClient(deferredThriftService) + } + + val replicationClient: ThriftTweetService = + deferredTweetypie(Target(Datacenter.AllOthers, "tweetypie-replication")) + + // used for read endpoints replication + val lowQoSReplicationClients: Seq[GatedReplicationClient] = { + val rampUpGate = Gate.linearRampUp(Time.now, settings.forkingRampUp) + + // Gates to avoid sending replicated reads from a cluster to itself + val inATLA = if (settings.zone == "atla") Gate.True else Gate.False + val inPDXA = if (settings.zone == "pdxa") Gate.True else Gate.False + + Seq( + GatedReplicationClient( + client = deferredTweetypie(Target(Datacenter.Atla, "tweetypie-lowqos")), + gate = rampUpGate & deciderGates.replicateReadsToATLA & !inATLA + ), + GatedReplicationClient( + client = deferredTweetypie(Target(Datacenter.Pdxa, "tweetypie-lowqos")), + gate = rampUpGate & deciderGates.replicateReadsToPDXA & !inPDXA + ) + ) + } + + // used for async operations in the write path + val asyncTweetService: ThriftTweetService = + deferredTweetypie(Target(Datacenter.Local, "tweetypie")) + + // used to trigger asyncEraseUserTweetsRequest + val asyncTweetDeletionService: ThriftTweetService = + deferredTweetypie(Target(Datacenter.Local, "tweetypie-retweet-deletion")) + + // used for async retries + val asyncRetryTweetService: ThriftTweetService = + deferredTweetypie(Target(Datacenter.Local, "tweetypie-async-retry")) + + val darkTrafficClient: Service[Array[Byte], Array[Byte]] = { + val thriftService = + thriftMuxClient( + "tweetypie.dark", + propagateDeadlines = false + ).withRequestTimeout(100.milliseconds) + .newService("/s/tweetypie/proxy") + + val transformer = + new Filter[Array[Byte], Array[Byte], ThriftClientRequest, Array[Byte]] { + override def apply( + request: Array[Byte], + service: Service[ThriftClientRequest, Array[Byte]] + ): Future[Array[Byte]] = + service(new ThriftClientRequest(request, false)) + } + + transformer andThen thriftService + } + + val geoHydrationClient: GeoduckHydration.MethodPerEndpoint = { + val mb = thriftMuxMethodBuilder("geoduck_hydration", "/s/geo/hydration") + .withTimeoutPerRequest(100.millis) + .idempotent(maxExtraLoad = 1.percent) + ThriftMux.Client.methodPerEndpoint( + mb.servicePerEndpoint[GeoduckHydration.ServicePerEndpoint]) + } + + val geoHydrationLocate: GeoduckLocate = geoHydrationClient.locate + + val geoReverseGeocoderClient: ReverseGeocoder.MethodPerEndpoint = { + val mb = thriftMuxMethodBuilder("geoduck_reversegeocoder", "/s/geo/geoduck_reversegeocoder") + .withTimeoutPerRequest(100.millis) + .idempotent(maxExtraLoad = 1.percent) + ThriftMux.Client.methodPerEndpoint( + mb.servicePerEndpoint[ReverseGeocoder.ServicePerEndpoint]) + } + + val geoduckGeohashLocate: GeoduckGeohashLocate = { + new GeoduckGeohashLocate( + reverseGeocoderClient = geoReverseGeocoderClient, + hydrationClient = geoHydrationClient, + classScopedStatsReceiver = statsReceiver.scope("geo_geohash_locate")) + } + + val geoRelevance = + new Relevance$FinagleClient( + thriftMuxClientBuilder( + "geoduck_relevance", + "/s/geo/relevance", + classOf[Relevance.MethodPerEndpoint]) + .requestTimeout(100.milliseconds) + .retryPolicy(retry(timeouts = 1)) + .build(), + stats = statsReceiver + ) + + val fanoutServiceClient = + new FanoutService$FinagleClient( + new DeferredThriftService(deferredRpcClient, Target(Datacenter.Local, "fanoutservice")), + serviceName = "fanoutservice", + stats = statsReceiver + ) + + val limiterService: LimiterService = { + val limiterClient = + new LimiterClientFactory( + name = "limiter", + clientId = thriftClientId, + tracer = tracer, + statsReceiver = statsReceiver, + serviceIdentifier = settings.serviceIdentifier, + opportunisticTlsLevel = OpportunisticTls.Required, + daemonize = true + )(eval("/s/limiter/limiter")) + + val limiterBackend = settings.limiterBackendConfig( + LimiterBackend.fromClient(limiterClient), + backendContext("limiter") + ) + + LimiterService.fromBackend( + limiterBackend.incrementFeature, + limiterBackend.getFeatureUsage, + getAppId, + backendsScope.scope("limiter") + ) + } + + val passbirdClient = + new PassbirdService$FinagleClient( + thriftMuxClientBuilder( + "passbird", + "/s/passbird/passbird", + classOf[PassbirdService.MethodPerEndpoint]) + .requestTimeout(100.milliseconds) + .retryPolicy(retry(timeouts = 1)) + .build(), + serviceName = "passbird", + stats = statsReceiver + ) + + val escherbird: Escherbird = { + val escherbirdClient = + new TweetEntityAnnotationService$FinagleClient( + thriftMuxClientBuilder( + "escherbird", + "/s/escherbird/annotationservice", + classOf[TweetEntityAnnotationScroogeIface.MethodPerEndpoint]) + .build() + ) + settings.escherbirdConfig( + Escherbird.fromClient(escherbirdClient), + backendContext("escherbird") + ) + } + + val geoScrubEventStore: GeoScrubEventStore = { + val mhMtlsParams = + if (settings.serviceIdentifier == EmptyServiceIdentifier) NoMtlsParams + else + ManhattanKVClientMtlsParams( + serviceIdentifier = settings.serviceIdentifier, + opportunisticTls = OpportunisticTls.Required) + + val mhClient = + new ManhattanKVClient( + appId = "geoduck_scrub_datastore", + dest = "/s/manhattan/omega.native-thrift", + mtlsParams = mhMtlsParams, + label = "mh_omega", + Seq(Experiments.ApertureLoadBalancer) + ) + + GeoScrubEventStore( + mhClient, + settings.geoScrubEventStoreConfig, + backendContext("geoScrubEventStore") + ) + } + + val tweetEventsPublisher: EventBusPublisher[TweetEvent] = + eventBusPublisherBuilder + .streamName("tweet_events") + .thriftStruct(TweetEvent) + .publishTimeout(500.milliseconds) + .serializeFinagleDtabs(true) + .build() + + val deleteLocationDataPublisher: EventBusPublisher[DeleteLocationData] = + eventBusPublisherBuilder + .streamName("tweetypie_delete_location_data_prod") + .thriftStruct(DeleteLocationData) + // deleteLocationData is relatively rare, and publishing to + // eventbus is all that the endpoint does. This means that it + // is much more likely that we will have to make a connection, + // which has much greater latency, and also makes us more + // tolerant of slow requests, so we choose a long timeout. + .publishTimeout(2.seconds) + .build() + + val retweetArchivalEventPublisher: EventBusPublisher[RetweetArchivalEvent] = + eventBusPublisherBuilder + .streamName("retweet_archival_events") + .thriftStruct(RetweetArchivalEvent) + .publishTimeout(500.milliseconds) + .build() + + val gnipEnricherator: GnipEnricherator = { + val gnipEnricherator = + thriftMuxMethodBuilder( + "enricherator", + "/s/datadelivery-enrichments/enricherator" + ) + GnipEnricherator.fromMethod(gnipEnricherator) + } + + val stratoserverClient: StratoClient = Strato.client + .withMutualTls( + serviceIdentifier = settings.serviceIdentifier, + opportunisticLevel = OpportunisticTls.Required) + .withLabel("stratoserver") + .withRequestTimeout(100.milliseconds) + .build() + + val configBus: ConfigBus = + ConfigBus(backendsScope.scope("config_bus"), settings.instanceId, settings.instanceCount) + + val callbackPromotedContentLogger: CallbackPromotedContentLogger = { + val publisher = + eventBusPublisherBuilder + .streamName(settings.adsLoggingClientTopicName) + .thriftStruct(AdCallbackEvent) + .publishTimeout(500.milliseconds) + .serializeFinagleDtabs(true) + .maxQueuedEvents(1000) + .kafkaDest("/s/kafka/ads-callback:kafka-tls") + .build() + + val stats = backendsScope.scope("promoted_content") + val adsLoggingClient = AdsLoggingClient(publisher, stats, "Tweetypie") + new CallbackPromotedContentLogger(adsLoggingClient, stats) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Caches.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Caches.scala new file mode 100644 index 000000000..4a670f4d2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Caches.scala @@ -0,0 +1,281 @@ +package com.twitter.tweetypie +package config + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.Backoff +import com.twitter.finagle.memcached +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.servo.cache.{Serializer => CacheSerializer, _} +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.handler.CacheBasedTweetCreationLock +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.serverutil._ +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.util._ +import com.twitter.util.Timer + +/** + * Provides configured caches (most backed by memcached) wrapped with appropriate metrics and locks. + * + * All memcached-backed caches share: + * - one Finagle memcached client from backends.memcacheClient + * - one in memory caffeine cache + * - one Twemcache pool + * + * Each memcached-backed cache specialization provides its own: + * - key prefix or "namespace" + * - value serializer/deserializer + * - stats scope + * - log name + */ +trait Caches { + val memcachedClientWithInProcessCaching: memcached.Client + val tweetCache: LockingCache[TweetKey, Cached[CachedTweet]] + val tweetResultCache: LockingCache[TweetId, Cached[TweetResult]] + val tweetDataCache: LockingCache[TweetId, Cached[TweetData]] + val tweetCreateLockerCache: Cache[TweetCreationLock.Key, TweetCreationLock.State] + val tweetCountsCache: LockingCache[TweetCountKey, Cached[Count]] + val deviceSourceInProcessCache: LockingCache[String, Cached[DeviceSource]] + val geoScrubCache: LockingCache[UserId, Cached[Time]] +} + +object Caches { + object NoCache extends Caches { + override val memcachedClientWithInProcessCaching: memcached.Client = new NullMemcacheClient() + private val toLockingCache: LockingCacheFactory = NonLockingCacheFactory + val tweetCache: LockingCache[TweetKey, Cached[CachedTweet]] = + toLockingCache(new NullCache) + val tweetResultCache: LockingCache[TweetId, Cached[TweetResult]] = + toLockingCache(new NullCache) + val tweetDataCache: LockingCache[TweetId, Cached[TweetData]] = + toLockingCache(new NullCache) + val tweetCreateLockerCache: Cache[TweetCreationLock.Key, TweetCreationLock.State] = + new NullCache + val tweetCountsCache: LockingCache[TweetCountKey, Cached[Count]] = + toLockingCache(new NullCache) + val deviceSourceInProcessCache: LockingCache[String, Cached[DeviceSource]] = + toLockingCache(new NullCache) + val geoScrubCache: LockingCache[UserId, Cached[Time]] = + toLockingCache(new NullCache) + } + + def apply( + settings: TweetServiceSettings, + stats: StatsReceiver, + timer: Timer, + clients: BackendClients, + tweetKeyFactory: TweetKeyFactory, + deciderGates: TweetypieDeciderGates, + clientIdHelper: ClientIdHelper, + ): Caches = { + val cachesStats = stats.scope("caches") + val cachesInprocessStats = cachesStats.scope("inprocess") + val cachesMemcacheStats = cachesStats.scope("memcache") + val cachesMemcacheObserver = new StatsReceiverCacheObserver(cachesStats, 10000, "memcache") + val cachesMemcacheTweetStats = cachesMemcacheStats.scope("tweet") + val cachesInprocessDeviceSourceStats = cachesInprocessStats.scope("device_source") + val cachesMemcacheCountStats = cachesMemcacheStats.scope("count") + val cachesMemcacheTweetCreateStats = cachesMemcacheStats.scope("tweet_create") + val cachesMemcacheGeoScrubStats = cachesMemcacheStats.scope("geo_scrub") + val memcacheClient = clients.memcacheClient + + val caffieneMemcachedClient = settings.inProcessCacheConfigOpt match { + case Some(inProcessCacheConfig) => + new CaffeineMemcacheClient( + proxyClient = memcacheClient, + inProcessCacheConfig.maximumSize, + inProcessCacheConfig.ttl, + cachesMemcacheStats.scope("caffeine") + ) + case None => + memcacheClient + } + + val observedMemcacheWithCaffeineClient = + new ObservableMemcache( + new FinagleMemcache( + caffieneMemcachedClient + ), + cachesMemcacheObserver + ) + + def observeCache[K, V]( + cache: Cache[K, V], + stats: StatsReceiver, + logName: String, + windowSize: Int = 10000 + ) = + ObservableCache( + cache, + stats, + windowSize, + // Need to use an old-school c.t.logging.Logger because that's what servo needs + com.twitter.logging.Logger(s"com.twitter.tweetypie.cache.$logName") + ) + + def mkCache[K, V]( + ttl: Duration, + serializer: CacheSerializer[V], + perCacheStats: StatsReceiver, + logName: String, + windowSize: Int = 10000 + ): Cache[K, V] = { + observeCache( + new MemcacheCache[K, V]( + observedMemcacheWithCaffeineClient, + ttl, + serializer + ), + perCacheStats, + logName, + windowSize + ) + } + + def toLockingCache[K, V]( + cache: Cache[K, V], + stats: StatsReceiver, + backoffs: Stream[Duration] = settings.lockingCacheBackoffs + ): LockingCache[K, V] = + new OptimisticLockingCache( + underlyingCache = cache, + backoffs = Backoff.fromStream(backoffs), + observer = new OptimisticLockingCacheObserver(stats), + timer = timer + ) + + def mkLockingCache[K, V]( + ttl: Duration, + serializer: CacheSerializer[V], + stats: StatsReceiver, + logName: String, + windowSize: Int = 10000, + backoffs: Stream[Duration] = settings.lockingCacheBackoffs + ): LockingCache[K, V] = + toLockingCache( + mkCache(ttl, serializer, stats, logName, windowSize), + stats, + backoffs + ) + + def trackTimeInCache[K, V]( + cache: Cache[K, Cached[V]], + stats: StatsReceiver + ): Cache[K, Cached[V]] = + new CacheWrapper[K, Cached[V]] { + val ageStat: Stat = stats.stat("time_in_cache_ms") + val underlyingCache: Cache[K, Cached[V]] = cache + + override def get(keys: Seq[K]): Future[KeyValueResult[K, Cached[V]]] = + underlyingCache.get(keys).onSuccess(record) + + private def record(res: KeyValueResult[K, Cached[V]]): Unit = { + val now = Time.now + for (c <- res.found.values) { + ageStat.add(c.cachedAt.until(now).inMilliseconds) + } + } + } + + new Caches { + override val memcachedClientWithInProcessCaching: memcached.Client = caffieneMemcachedClient + + private val observingTweetCache: Cache[TweetKey, Cached[CachedTweet]] = + trackTimeInCache( + mkCache( + ttl = settings.tweetMemcacheTtl, + serializer = Serializer.CachedTweet.CachedCompact, + perCacheStats = cachesMemcacheTweetStats, + logName = "MemcacheTweetCache" + ), + cachesMemcacheTweetStats + ) + + // Wrap the tweet cache with a wrapper that will scribe the cache writes + // that happen to a fraction of tweets. This was added as part of the + // investigation into missing place ids and cache inconsistencies that + // were discovered by the additional fields hydrator. + private[this] val writeLoggingTweetCache = + new ScribeTweetCacheWrites( + underlyingCache = observingTweetCache, + logYoungTweetCacheWrites = deciderGates.logYoungTweetCacheWrites, + logTweetCacheWrites = deciderGates.logTweetCacheWrites + ) + + val tweetCache: LockingCache[TweetKey, Cached[CachedTweet]] = + toLockingCache( + cache = writeLoggingTweetCache, + stats = cachesMemcacheTweetStats + ) + + val tweetDataCache: LockingCache[TweetId, Cached[TweetData]] = + toLockingCache( + cache = TweetDataCache(tweetCache, tweetKeyFactory.fromId), + stats = cachesMemcacheTweetStats + ) + + val tweetResultCache: LockingCache[TweetId, Cached[TweetResult]] = + toLockingCache( + cache = TweetResultCache(tweetDataCache), + stats = cachesMemcacheTweetStats + ) + + val tweetCountsCache: LockingCache[TweetCountKey, Cached[Count]] = + mkLockingCache( + ttl = settings.tweetCountsMemcacheTtl, + serializer = Serializers.CachedLong.Compact, + stats = cachesMemcacheCountStats, + logName = "MemcacheTweetCountCache", + windowSize = 1000, + backoffs = Backoff.linear(0.millis, 2.millis).take(2).toStream + ) + + val tweetCreateLockerCache: Cache[TweetCreationLock.Key, TweetCreationLock.State] = + observeCache( + new TtlCacheToCache( + underlyingCache = new KeyValueTransformingTtlCache( + underlyingCache = observedMemcacheWithCaffeineClient, + transformer = TweetCreationLock.State.Serializer, + underlyingKey = (_: TweetCreationLock.Key).toString + ), + ttl = CacheBasedTweetCreationLock.ttlChooser( + shortTtl = settings.tweetCreateLockingMemcacheTtl, + longTtl = settings.tweetCreateLockingMemcacheLongTtl + ) + ), + stats = cachesMemcacheTweetCreateStats, + logName = "MemcacheTweetCreateLockingCache", + windowSize = 1000 + ) + + val deviceSourceInProcessCache: LockingCache[String, Cached[DeviceSource]] = + toLockingCache( + observeCache( + new ExpiringLruCache( + ttl = settings.deviceSourceInProcessTtl, + maximumSize = settings.deviceSourceInProcessCacheMaxSize + ), + stats = cachesInprocessDeviceSourceStats, + logName = "InprocessDeviceSourceCache" + ), + stats = cachesInprocessDeviceSourceStats + ) + + val geoScrubCache: LockingCache[UserId, Cached[Time]] = + toLockingCache[UserId, Cached[Time]]( + new KeyTransformingCache( + mkCache[GeoScrubTimestampKey, Cached[Time]]( + ttl = settings.geoScrubMemcacheTtl, + serializer = Serializer.toCached(CacheSerializer.Time), + perCacheStats = cachesMemcacheGeoScrubStats, + logName = "MemcacheGeoScrubCache" + ), + (userId: UserId) => GeoScrubTimestampKey(userId) + ), + cachesMemcacheGeoScrubStats + ) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ClientsParser.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ClientsParser.scala new file mode 100644 index 000000000..5ebca25c2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ClientsParser.scala @@ -0,0 +1,126 @@ +package com.twitter.tweetypie.config + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.util.Try + +case object EmptyConfigException extends Exception + +case class ServiceIdentifierPattern( + role: Option[String], + service: Option[String], + environment: Option[String], +) { + // Service identifier matches if the fields of service identifier + // match all the defined fields of pattern. + def matches(id: ServiceIdentifier): Boolean = + Seq( + role.map(_ == id.role), + service.map(_ == id.service), + environment.map(_ == id.environment), + ) + .flatten + .forall(identity) + + // True if this is the kind of pattern that only specifies environment. + // This should be used in rare cases, for example letting all devel clients + // use permitted methods - like get_tweet_fields. + def onlyEnv: Boolean = + role.isEmpty && service.isEmpty && environment.isDefined +} + +case class Client( + clientId: String, + serviceIdentifiers: Seq[ServiceIdentifierPattern], + tpsLimit: Option[Int], + environments: Seq[String], + loadShedEnvs: Seq[String], + permittedMethods: Set[String], + accessAllMethods: Boolean, + bypassVisibilityFiltering: Boolean, + enforceRateLimit: Boolean) { + + // Client matches a service identifier if any of its patterns + // match. + def matches(id: ServiceIdentifier): Boolean = + serviceIdentifiers.exists(_.matches(id)) +} + +object ClientsParser { + + // Case classes for parsing yaml - should match the structure of clients.yml + private case class YamlServiceIdentifier( + role: Option[String], + service: Option[String], + environment: Option[String], + ) + private case class YamlClient( + client_id: String, + service_identifiers: Option[Seq[YamlServiceIdentifier]], + service_name: String, + tps_quota: String, + contact_email: String, + environments: Seq[String], + load_shed_envs: Option[ + Seq[String] + ], // list of environments we can rejects requests from if load shedding + comment: Option[String], + permitted_methods: Option[Seq[String]], + access_all_methods: Boolean, + bypass_visibility_filtering: Boolean, + bypass_visibility_filtering_reason: Option[String], + rate_limit: Boolean) { + def toClient: Client = { + + // we provision tps_quota for both DCs during white-listing, to account for full fail-over. + val tpsLimit: Option[Int] = Try(tps_quota.replaceAll("[^0-9]", "").toInt * 1000).toOption + + Client( + clientId = client_id, + serviceIdentifiers = service_identifiers.getOrElse(Nil).flatMap { id => + if (id.role.isDefined || id.service.isDefined || id.environment.isDefined) { + Seq(ServiceIdentifierPattern( + role = id.role, + service = id.service, + environment = id.environment, + )) + } else { + Seq() + } + }, + tpsLimit = tpsLimit, + environments = environments, + loadShedEnvs = load_shed_envs.getOrElse(Nil), + permittedMethods = permitted_methods.getOrElse(Nil).toSet, + accessAllMethods = access_all_methods, + bypassVisibilityFiltering = bypass_visibility_filtering, + enforceRateLimit = rate_limit + ) + } + } + + private val mapper: ObjectMapper = new ObjectMapper(new YAMLFactory()) + mapper.registerModule(DefaultScalaModule) + + private val yamlClientTypeFactory = + mapper + .getTypeFactory() + .constructCollectionLikeType( + classOf[Seq[YamlClient]], + classOf[YamlClient] + ) + + def apply(yamlString: String): Seq[Client] = { + val parsed = + mapper + .readValue[Seq[YamlClient]](yamlString, yamlClientTypeFactory) + .map(_.toClient) + + if (parsed.isEmpty) + throw EmptyConfigException + else + parsed + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfig.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfig.scala new file mode 100644 index 000000000..8d29cdc72 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfig.scala @@ -0,0 +1,100 @@ +package com.twitter.tweetypie.config + +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.tweetypie.Gate +import com.twitter.tweetypie.backends.ConfigBus +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.util.Activity + +case class DynamicConfig( + // A map of fully-qualified client ID (including the environment suffix, e.g. tweetypie.prod) to Client case class + clientsByFullyQualifiedId: Option[Map[String, Client]], + // Clients by service identifier parts. + clientsByRole: Option[Map[String, Seq[Client]]] = None, + clientsByService: Option[Map[String, Seq[Client]]] = None, + onlyEnvClients: Option[Seq[Client]] = None, + // These endpoints do not need permissions to be accessed + unprotectedEndpoints: Set[String] = Set("get_tweet_counts", "get_tweet_fields", "get_tweets")) { + + /** + * Function that takes a fully qualified client id and says whether it is included in the allowList + */ + val isAllowListedClient: String => Boolean = + clientsByFullyQualifiedId.map(clients => clients.contains _).getOrElse(_ => true) + + def byServiceIdentifier(serviceIdentifier: ServiceIdentifier): Set[Client] = + Iterable.concat( + get(clientsByRole, serviceIdentifier.role), + get(clientsByService, serviceIdentifier.service), + onlyEnvClients.getOrElse(Seq()), + ) + .filter(_.matches(serviceIdentifier)) + .toSet + + private def get(clientsByKey: Option[Map[String, Seq[Client]]], key: String): Seq[Client] = + clientsByKey match { + case Some(map) => map.getOrElse(key, Seq()) + case None => Seq() + } + + /** + * Take a fully qualified client id and says if the client has offered to shed reads if tweetypie + * is in an emergency + */ + val loadShedEligible: Gate[String] = Gate { (clientId: String) => + val env = ClientIdHelper.getClientIdEnv(clientId) + clientsByFullyQualifiedId.flatMap(clients => clients.get(clientId)).exists { c => + c.loadShedEnvs.contains(env) + } + } +} + +/** + * DynamicConfig uses ConfigBus to update Tweetypie with configuration changes + * dynamically. Every time the config changes, the Activity[DynamicConfig] is + * updated, and anything relying on that config will be reinitialized. + */ +object DynamicConfig { + def fullyQualifiedClientIds(client: Client): Seq[String] = { + val clientId = client.clientId + client.environments match { + case Nil => Seq(clientId) + case envs => envs.map(env => s"$clientId.$env") + } + } + + // Make a Map of fully qualified client id to Client + def byClientId(clients: Seq[Client]): Map[String, Client] = + clients.flatMap { client => + fullyQualifiedClientIds(client).map { fullClientId => fullClientId -> client } + }.toMap + + def by(get: ServiceIdentifierPattern => Option[String])(clients: Seq[Client]): Map[String, Seq[Client]] = + clients.flatMap { c => + c.serviceIdentifiers.collect { + case s if get(s).isDefined => (get(s).get, c) + } + }.groupBy(_._1).mapValues(_.map(_._2)) + + private[this] val clientsPath = "config/clients.yml" + + def apply( + stats: StatsReceiver, + configBus: ConfigBus, + settings: TweetServiceSettings + ): Activity[DynamicConfig] = + DynamicConfigLoader(configBus.file) + .apply(clientsPath, stats.scope("client_allowlist"), ClientsParser.apply) + .map(fromClients) + + def fromClients(clients: Option[Seq[Client]]): DynamicConfig = + DynamicConfig( + clientsByFullyQualifiedId = clients.map(byClientId), + clientsByRole = clients.map(by(_.role)), + clientsByService = clients.map(by(_.service)), + onlyEnvClients = clients.map(_.filter { client => + client.serviceIdentifiers.exists(_.onlyEnv) + }), + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfigLoader.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfigLoader.scala new file mode 100644 index 000000000..724f97644 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfigLoader.scala @@ -0,0 +1,69 @@ +package com.twitter.tweetypie.config + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.servo.util.ExceptionCounter +import com.twitter.tweetypie.serverutil.ActivityUtil +import com.twitter.util.{Activity, Return, Try} +import com.twitter.util.logging.Logger + +trait DynamicConfigLoader { + def apply[T](path: String, stats: StatsReceiver, parse: String => T): Activity[Option[T]] +} + +object DynamicConfigLoader { + + def apply(read: String => Activity[String]): DynamicConfigLoader = + new DynamicConfigLoader { + val logger = Logger(getClass) + + private def snoopState[T](stats: StatsReceiver)(a: Activity[T]): Activity[T] = { + val pending = stats.counter("pending") + val failure = stats.counter("failure") + val success = stats.counter("success") + + a.mapState { + case s @ Activity.Ok(_) => + success.incr() + s + case Activity.Pending => + pending.incr() + Activity.Pending + case s @ Activity.Failed(_) => + failure.incr() + s + } + } + + def apply[T](path: String, stats: StatsReceiver, parse: String => T): Activity[Option[T]] = { + val exceptionCounter = new ExceptionCounter(stats) + + val rawActivity: Activity[T] = + snoopState(stats.scope("raw"))( + ActivityUtil + .strict(read(path)) + .map(parse) + .handle { + case e => + exceptionCounter(e) + logger.error(s"Invalid config in $path", e) + throw e + } + ) + + val stableActivity = + snoopState(stats.scope("stabilized"))(rawActivity.stabilize).mapState[Option[T]] { + case Activity.Ok(t) => Activity.Ok(Some(t)) + case _ => Activity.Ok(None) + } + + stats.provideGauge("config_state") { + Try(stableActivity.sample()) match { + case Return(Some(c)) => c.hashCode.abs + case _ => 0 + } + } + + stableActivity + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ExternalRepositories.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ExternalRepositories.scala new file mode 100644 index 000000000..0f8206ffa --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ExternalRepositories.scala @@ -0,0 +1,182 @@ +package com.twitter.tweetypie +package config + +import com.twitter.flockdb.client.StatusGraph +import com.twitter.servo.util.FutureArrow +import com.twitter.stitch.timelineservice.TimelineService.GetPerspectives +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.repository.DeviceSourceRepository.Type +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.serverutil._ +import com.twitter.visibility.common.tflock.UserIsInvitedToConversationRepository + +/** + * Tweetypie's read path composes results from many data sources. This + * trait is a collection of repositories for external data access. + * These repositories should not have (within-Tweetypie) caches, + * deciders, etc. applied to them, since that is done when the + * repositories are composed together. They should be the minimal + * wrapping of the external clients in order to expose an Arrow-based + * interface. + */ +trait ExternalRepositories { + def card2Repo: Card2Repository.Type + def cardRepo: CardRepository.Type + def cardUsersRepo: CardUsersRepository.Type + def conversationIdRepo: ConversationIdRepository.Type + def containerAsTweetRepo: CreativesContainerMaterializationRepository.GetTweetType + def containerAsTweetFieldsRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType + def deviceSourceRepo: DeviceSourceRepository.Type + def escherbirdAnnotationRepo: EscherbirdAnnotationRepository.Type + def stratoSafetyLabelsRepo: StratoSafetyLabelsRepository.Type + def stratoCommunityMembershipRepo: StratoCommunityMembershipRepository.Type + def stratoCommunityAccessRepo: StratoCommunityAccessRepository.Type + def stratoPromotedTweetRepo: StratoPromotedTweetRepository.Type + def stratoSuperFollowEligibleRepo: StratoSuperFollowEligibleRepository.Type + def stratoSuperFollowRelationsRepo: StratoSuperFollowRelationsRepository.Type + def stratoSubscriptionVerificationRepo: StratoSubscriptionVerificationRepository.Type + def unmentionedEntitiesRepo: UnmentionedEntitiesRepository.Type + def geoScrubTimestampRepo: GeoScrubTimestampRepository.Type + def mediaMetadataRepo: MediaMetadataRepository.Type + def perspectiveRepo: PerspectiveRepository.Type + def placeRepo: PlaceRepository.Type + def profileGeoRepo: ProfileGeoRepository.Type + def quoterHasAlreadyQuotedRepo: QuoterHasAlreadyQuotedRepository.Type + def lastQuoteOfQuoterRepo: LastQuoteOfQuoterRepository.Type + def relationshipRepo: RelationshipRepository.Type + def retweetSpamCheckRepo: RetweetSpamCheckRepository.Type + def tweetCountsRepo: TweetCountsRepository.Type + def tweetResultRepo: TweetResultRepository.Type + def tweetSpamCheckRepo: TweetSpamCheckRepository.Type + def urlRepo: UrlRepository.Type + def userIsInvitedToConversationRepo: UserIsInvitedToConversationRepository.Type + def userRepo: UserRepository.Type +} + +class ExternalServiceRepositories( + clients: BackendClients, + statsReceiver: StatsReceiver, + settings: TweetServiceSettings, + clientIdHelper: ClientIdHelper) + extends ExternalRepositories { + + lazy val card2Repo: Card2Repository.Type = + Card2Repository(clients.expandodo.getCards2, maxRequestSize = 5) + + lazy val cardRepo: CardRepository.Type = + CardRepository(clients.expandodo.getCards, maxRequestSize = 5) + + lazy val cardUsersRepo: CardUsersRepository.Type = + CardUsersRepository(clients.expandodo.getCardUsers) + + lazy val conversationIdRepo: ConversationIdRepository.Type = + ConversationIdRepository(clients.tflockReadClient.multiSelectOne) + + lazy val containerAsTweetRepo: CreativesContainerMaterializationRepository.GetTweetType = + CreativesContainerMaterializationRepository( + clients.creativesContainerService.materializeAsTweet) + + lazy val containerAsTweetFieldsRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType = + CreativesContainerMaterializationRepository.materializeAsTweetFields( + clients.creativesContainerService.materializeAsTweetFields) + + lazy val deviceSourceRepo: Type = { + DeviceSourceRepository( + DeviceSourceParser.parseAppId, + FutureArrow(clients.passbirdClient.getClientApplications(_)) + ) + } + + lazy val escherbirdAnnotationRepo: EscherbirdAnnotationRepository.Type = + EscherbirdAnnotationRepository(clients.escherbird.annotate) + + lazy val quoterHasAlreadyQuotedRepo: QuoterHasAlreadyQuotedRepository.Type = + QuoterHasAlreadyQuotedRepository(clients.tflockReadClient) + + lazy val lastQuoteOfQuoterRepo: LastQuoteOfQuoterRepository.Type = + LastQuoteOfQuoterRepository(clients.tflockReadClient) + + lazy val stratoSafetyLabelsRepo: StratoSafetyLabelsRepository.Type = + StratoSafetyLabelsRepository(clients.stratoserverClient) + + lazy val stratoCommunityMembershipRepo: StratoCommunityMembershipRepository.Type = + StratoCommunityMembershipRepository(clients.stratoserverClient) + + lazy val stratoCommunityAccessRepo: StratoCommunityAccessRepository.Type = + StratoCommunityAccessRepository(clients.stratoserverClient) + + lazy val stratoSuperFollowEligibleRepo: StratoSuperFollowEligibleRepository.Type = + StratoSuperFollowEligibleRepository(clients.stratoserverClient) + + lazy val stratoSuperFollowRelationsRepo: StratoSuperFollowRelationsRepository.Type = + StratoSuperFollowRelationsRepository(clients.stratoserverClient) + + lazy val stratoPromotedTweetRepo: StratoPromotedTweetRepository.Type = + StratoPromotedTweetRepository(clients.stratoserverClient) + + lazy val stratoSubscriptionVerificationRepo: StratoSubscriptionVerificationRepository.Type = + StratoSubscriptionVerificationRepository(clients.stratoserverClient) + + lazy val geoScrubTimestampRepo: GeoScrubTimestampRepository.Type = + GeoScrubTimestampRepository(clients.geoScrubEventStore.getGeoScrubTimestamp) + + lazy val mediaMetadataRepo: MediaMetadataRepository.Type = + MediaMetadataRepository(clients.mediaClient.getMediaMetadata) + + lazy val perspectiveRepo: GetPerspectives = + GetPerspectives(clients.timelineService.getPerspectives) + + lazy val placeRepo: PlaceRepository.Type = + GeoduckPlaceRepository(clients.geoHydrationLocate) + + lazy val profileGeoRepo: ProfileGeoRepository.Type = + ProfileGeoRepository(clients.gnipEnricherator.hydrateProfileGeo) + + lazy val relationshipRepo: RelationshipRepository.Type = + RelationshipRepository(clients.socialGraphService.exists, maxRequestSize = 6) + + lazy val retweetSpamCheckRepo: RetweetSpamCheckRepository.Type = + RetweetSpamCheckRepository(clients.scarecrow.checkRetweet) + + lazy val tweetCountsRepo: TweetCountsRepository.Type = + TweetCountsRepository( + clients.tflockReadClient, + maxRequestSize = settings.tweetCountsRepoChunkSize + ) + + lazy val tweetResultRepo: TweetResultRepository.Type = + ManhattanTweetRepository( + clients.tweetStorageClient.getTweet, + clients.tweetStorageClient.getStoredTweet, + settings.shortCircuitLikelyPartialTweetReads, + statsReceiver.scope("manhattan_tweet_repo"), + clientIdHelper, + ) + + lazy val tweetSpamCheckRepo: TweetSpamCheckRepository.Type = + TweetSpamCheckRepository(clients.scarecrow.checkTweet2) + + lazy val unmentionedEntitiesRepo: UnmentionedEntitiesRepository.Type = + UnmentionedEntitiesRepository(clients.stratoserverClient) + + lazy val urlRepo: UrlRepository.Type = + UrlRepository( + clients.talon.expand, + settings.thriftClientId.name, + statsReceiver.scope("talon_url_repo"), + clientIdHelper, + ) + + lazy val userRepo: UserRepository.Type = + GizmoduckUserRepository( + clients.gizmoduck.getById, + clients.gizmoduck.getByScreenName, + maxRequestSize = 100 + ) + + lazy val userIsInvitedToConversationRepo: UserIsInvitedToConversationRepository.Type = + UserIsInvitedToConversationRepository( + FutureArrow(clients.tflockReadClient.multiSelectOne(_)), + FutureArrow((clients.tflockReadClient.contains(_: StatusGraph, _: Long, _: Long)).tupled)) + +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/LogicalRepositories.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/LogicalRepositories.scala new file mode 100644 index 000000000..2060e7bcc --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/LogicalRepositories.scala @@ -0,0 +1,807 @@ +package com.twitter.tweetypie +package config + +import com.twitter.abdecider.ABDeciderFactory +import com.twitter.config.yaml.YamlConfig +import com.twitter.decider.Decider +import com.twitter.featureswitches.v2.FeatureSwitches +import com.twitter.finagle.memcached +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.servo.cache._ +import com.twitter.servo.cache.{KeyValueResult => _} +import com.twitter.servo.repository._ +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.stitch.repo.Repo +import com.twitter.stitch.timelineservice.TimelineService +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.stringcenter.client.ExternalStringRegistry +import com.twitter.stringcenter.client.MultiProjectStringCenter +import com.twitter.translation.Languages +import com.twitter.translation.YamlConfigLanguages +import com.twitter.tweetypie.caching.CacheOperations +import com.twitter.tweetypie.caching.Expiry +import com.twitter.tweetypie.caching.ServoCachedValueSerializer +import com.twitter.tweetypie.caching.StitchCaching +import com.twitter.tweetypie.caching.ValueSerializer +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.core.TweetResult +import com.twitter.tweetypie.hydrator.TextRepairer +import com.twitter.tweetypie.hydrator.TweetHydration +import com.twitter.tweetypie.hydrator.TweetQueryOptionsExpander +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.repository.UserRepository +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.serverutil.BoringStackTrace +import com.twitter.tweetypie.serverutil.ExceptionCounter +import com.twitter.tweetypie.thriftscala.DeviceSource +import com.twitter.tweetypie.thriftscala.Place +import com.twitter.tweetypie.thriftscala.entities.EntityExtractor +import com.twitter.tweetypie.util.StitchUtils +import com.twitter.util.Duration +import com.twitter.util.FuturePool +import com.twitter.util.Timer +import com.twitter.visibility.VisibilityLibrary +import com.twitter.visibility.common.KeywordMatcher +import com.twitter.visibility.common.LocalizationSource +import com.twitter.visibility.common.TweetMediaMetadataSource +import com.twitter.visibility.common.TweetPerspectiveSource +import com.twitter.visibility.common.UserRelationshipSource +import com.twitter.visibility.common.UserSource +import com.twitter.visibility.common.tflock.UserIsInvitedToConversationRepository +import com.twitter.visibility.configapi.configs.VisibilityDeciderGates +import com.twitter.visibility.generators.CountryNameGenerator +import com.twitter.visibility.generators.LocalizedInterstitialGenerator +import com.twitter.visibility.generators.TombstoneGenerator +import com.twitter.visibility.interfaces.tweets.DeletedTweetVisibilityLibrary +import com.twitter.visibility.interfaces.tweets.QuotedTweetVisibilityLibrary +import com.twitter.visibility.interfaces.tweets.TweetVisibilityLibrary +import com.twitter.visibility.interfaces.tweets.UserUnavailableStateVisibilityLibrary +import com.twitter.visibility.util.DeciderUtil +import com.twitter.visibility.util.FeatureSwitchUtil +import java.util.concurrent.Executors + +/** + * LogicalRepositories is a layer above ExternalRepositories. These repos may have additional + * logic layered in, such as memcache-caching, hot-key caching, etc. There may + * also be multiple logical repositories mapped to an single external repository. + * + * These repositories are used in tweet hydration and tweet creation. + */ +trait LogicalRepositories { + + def card2Repo: Card2Repository.Type + def cardRepo: CardRepository.Type + def cardUsersRepo: CardUsersRepository.Type + def conversationIdRepo: ConversationIdRepository.Type + def conversationControlRepo: ConversationControlRepository.Type + def conversationMutedRepo: ConversationMutedRepository.Type + def containerAsGetTweetResultRepo: CreativesContainerMaterializationRepository.GetTweetType + def containerAsGetTweetFieldsResultRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType + def deviceSourceRepo: DeviceSourceRepository.Type + def escherbirdAnnotationRepo: EscherbirdAnnotationRepository.Type + def geoScrubTimestampRepo: GeoScrubTimestampRepository.Type + def languageRepo: LanguageRepository.Type + def mediaMetadataRepo: MediaMetadataRepository.Type + def pastedMediaRepo: PastedMediaRepository.Type + def perspectiveRepo: PerspectiveRepository.Type + def placeRepo: PlaceRepository.Type + def profileGeoRepo: ProfileGeoRepository.Type + def quoterHasAlreadyQuotedRepo: QuoterHasAlreadyQuotedRepository.Type + def lastQuoteOfQuoterRepo: LastQuoteOfQuoterRepository.Type + def relationshipRepo: RelationshipRepository.Type + def stratoSafetyLabelsRepo: StratoSafetyLabelsRepository.Type + def stratoCommunityMembershipRepo: StratoCommunityMembershipRepository.Type + def stratoCommunityAccessRepo: StratoCommunityAccessRepository.Type + def stratoSuperFollowEligibleRepo: StratoSuperFollowEligibleRepository.Type + def stratoSuperFollowRelationsRepo: StratoSuperFollowRelationsRepository.Type + def stratoPromotedTweetRepo: StratoPromotedTweetRepository.Type + def stratoSubscriptionVerificationRepo: StratoSubscriptionVerificationRepository.Type + def takedownRepo: UserTakedownRepository.Type + def tweetSpamCheckRepo: TweetSpamCheckRepository.Type + def retweetSpamCheckRepo: RetweetSpamCheckRepository.Type + def tweetCountsRepo: TweetCountsRepository.Type + def tweetVisibilityRepo: TweetVisibilityRepository.Type + def quotedTweetVisibilityRepo: QuotedTweetVisibilityRepository.Type + def deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type + def unmentionedEntitiesRepo: UnmentionedEntitiesRepository.Type + def urlRepo: UrlRepository.Type + def userRepo: UserRepository.Type + def optionalUserRepo: UserRepository.Optional + def userIdentityRepo: UserIdentityRepository.Type + def userIsInvitedToConversationRepo: UserIsInvitedToConversationRepository.Type + def userProtectionRepo: UserProtectionRepository.Type + def userViewRepo: UserViewRepository.Type + def userVisibilityRepo: UserVisibilityRepository.Type + + def tweetResultRepo: TweetResultRepository.Type + def tweetRepo: TweetRepository.Type + def optionalTweetRepo: TweetRepository.Optional + + /** + * Not actually repositories, but intimately intertwined. + */ + def tweetHydrators: TweetHydrators +} + +object LogicalRepositories { + + /** + * Middleware is a function that takes a stitch repo and returns a new stitch repo. + */ + type Middleware[K, V] = (K => Stitch[V]) => K => Stitch[V] + + // Middleware2 is a function that takes a two-arg stitch repo and returns a new two-arg stitch repo. + type Middleware2[K, C, V] = ((K, C) => Stitch[V]) => ((K, C) => Stitch[V]) + val exceptionLog: Logger = Logger(getClass) + + // Converts a Middleware2 to a Middleware for use with withMiddleware. + def tupledMiddleware[K, C, V](middleware2: Middleware2[K, C, V]): Middleware[(K, C), V] = + repo => middleware2(Function.untupled(repo)).tupled + + object ObserveStitch { + def apply[K, V]( + repo: K => Stitch[V], + repoName: String, + stats: StatsReceiver + ): K => Stitch[V] = { + val successCounter = stats.counter("success") + val notFoundCounter = stats.counter("not_found") + val latencyStat = stats.stat("latency_ms") + + val exceptionCounter = + ExceptionCounter( + stats, + // don't count FilteredState exceptions + FilteredState.ignoringCategorizer(ExceptionCounter.defaultCategorizer) + ) + + (key: K) => + StitchUtils.trackLatency(latencyStat, repo(key)).respond { + case Return(_) => successCounter.incr() + case Throw(NotFound) => notFoundCounter.incr() + case Throw(t) => + val message = s"$repoName: $key" + if (BoringStackTrace.isBoring(t)) { + exceptionLog.debug(message, t) + } else { + exceptionLog.warn(message, t) + } + + exceptionCounter(t) + } + } + } + + /** + * Add middleware to configure a repository. The stats receiver is + * scoped for the currently-configured repository. The `toRepo` field + * is the repository with some set of middleware applied. Each method + * adds a new middleware to the current repo, and returns it as a + * `RepoConfig`, allowing method chaining. + * + * Since each method call applies a new middleware, the final middleware is + * the outermost middleware, and thus the one that sees the arguments + * first. + */ + class RepoConfig[K, V]( + val toRepo: K => Stitch[V], + stats: StatsReceiver, + name: String, + memcachedClientWithInProcessCaching: memcached.Client) { + def withMiddleware(middleware: Middleware[K, V]): RepoConfig[K, V] = + new RepoConfig[K, V](middleware(toRepo), stats, name, memcachedClientWithInProcessCaching) + + /** + * Wraps a repo with success/failure/latency stats tracking and logs + * exceptions. This will be applied to every repository. + * + * @param repoName Used when logging exceptions thrown by the underlying repo. + */ + def observe(repoName: String = s"${name}_repo"): RepoConfig[K, V] = { + withMiddleware { repo => ObserveStitch[K, V](repo, repoName, stats) } + } + + /** + * Use the supplied cache to wrap the repository with a read-through + * caching layer. + */ + def caching( + cache: LockingCache[K, Cached[V]], + partialHandler: CachedResult.PartialHandler[K, V], + maxCacheRequestSize: Int = Int.MaxValue + ): RepoConfig[K, V] = { + val stitchLockingCache = StitchLockingCache( + underlying = cache, + picker = new PreferNewestCached[V], + maxRequestSize = maxCacheRequestSize + ) + + val handler: CachedResult.Handler[K, V] = + CachedResult.Handler( + CachedResult.PartialHandler.orElse( + partialHandler, + CachedResult.failuresAreDoNotCache + ) + ) + + withMiddleware { repo => + CacheStitch[K, K, V]( + repo = repo, + cache = stitchLockingCache, + identity, + handler = handler, + cacheable = CacheStitch.cacheFoundAndNotFound + ) + } + } + + def newCaching( + keySerializer: K => String, + valueSerializer: ValueSerializer[Try[V]] + ): RepoConfig[K, V] = + withMiddleware { repo => + val logger = Logger(s"com.twitter.tweetypie.config.LogicalRepositories.$name") + + val cacheOperations: CacheOperations[K, Try[V]] = + new CacheOperations( + keySerializer = keySerializer, + valueSerializer = valueSerializer, + memcachedClient = memcachedClientWithInProcessCaching, + statsReceiver = stats.scope("caching"), + logger = logger + ) + + val tryRepo: K => Stitch[Try[V]] = repo.andThen(_.liftToTry) + val cachingTryRepo: K => Stitch[Try[V]] = new StitchCaching(cacheOperations, tryRepo) + cachingTryRepo.andThen(_.lowerFromTry) + } + + def toRepo2[K1, C](implicit tupleToK: ((K1, C)) <:< K): (K1, C) => Stitch[V] = + (k1, c) => toRepo(tupleToK((k1, c))) + } + + def softTtlPartialHandler[K, V]( + softTtl: Option[V] => Duration, + softTtlPerturbationFactor: Float = 0.05f + ): CachedResult.PartialHandler[K, V] = + CachedResult + .softTtlExpiration[K, V](softTtl, CachedResult.randomExpiry(softTtlPerturbationFactor)) + + def apply( + settings: TweetServiceSettings, + stats: StatsReceiver, + timer: Timer, + deciderGates: TweetypieDeciderGates, + external: ExternalRepositories, + caches: Caches, + stratoClient: StratoClient, + hasMedia: Tweet => Boolean, + clientIdHelper: ClientIdHelper, + featureSwitchesWithoutExperiments: FeatureSwitches, + ): LogicalRepositories = { + val repoStats = stats.scope("repositories") + + def repoConfig[K, V](name: String, repo: K => Stitch[V]): RepoConfig[K, V] = + new RepoConfig[K, V]( + name = name, + toRepo = repo, + stats = repoStats.scope(name), + memcachedClientWithInProcessCaching = caches.memcachedClientWithInProcessCaching) + + def repo2Config[K, C, V](name: String, repo: (K, C) => Stitch[V]): RepoConfig[(K, C), V] = + repoConfig[(K, C), V](name, repo.tupled) + + new LogicalRepositories { + // the final tweetResultRepo has a circular dependency, where it depends on hydrators + // that in turn depend on the tweetResultRepo, so we create a `tweetResultRepo` function + // that proxies to `var finalTweetResultRepo`, which gets set at the end of this block. + var finalTweetResultRepo: TweetResultRepository.Type = null + val tweetResultRepo: TweetResultRepository.Type = + (tweetId, opts) => finalTweetResultRepo(tweetId, opts) + val tweetRepo: TweetRepository.Type = TweetRepository.fromTweetResult(tweetResultRepo) + + val optionalTweetRepo: TweetRepository.Optional = TweetRepository.optional(tweetRepo) + + val userRepo: UserRepository.Type = + repo2Config(repo = external.userRepo, name = "user") + .observe() + .toRepo2 + + val optionalUserRepo: UserRepository.Optional = UserRepository.optional(userRepo) + + private[this] val tweetVisibilityStatsReceiver: StatsReceiver = + repoStats.scope("tweet_visibility_library") + private[this] val userUnavailableVisibilityStatsReceiver: StatsReceiver = + repoStats.scope("user_unavailable_visibility_library") + private[this] val quotedTweetVisibilityStatsReceiver: StatsReceiver = + repoStats.scope("quoted_tweet_visibility_library") + private[this] val deletedTweetVisibilityStatsReceiver: StatsReceiver = + repoStats.scope("deleted_tweet_visibility_library") + // TweetVisibilityLibrary still uses the old c.t.logging.Logger + private[this] val tweetVisibilityLogger = + com.twitter.logging.Logger("com.twitter.tweetypie.TweetVisibility") + private[this] val visibilityDecider: Decider = DeciderUtil.mkDecider( + deciderOverlayPath = settings.vfDeciderOverlayFilename, + useLocalDeciderOverrides = true) + private[this] val visibilityDeciderGates = VisibilityDeciderGates(visibilityDecider) + + private[this] def visibilityLibrary(statsReceiver: StatsReceiver) = VisibilityLibrary + .Builder( + log = tweetVisibilityLogger, + statsReceiver = statsReceiver, + memoizeSafetyLevelParams = visibilityDeciderGates.enableMemoizeSafetyLevelParams + ) + .withDecider(visibilityDecider) + .withDefaultABDecider(isLocal = false) + .withCaptureDebugStats(Gate.True) + .withEnableComposableActions(Gate.True) + .withEnableFailClosed(Gate.True) + .withEnableShortCircuiting(visibilityDeciderGates.enableShortCircuitingTVL) + .withSpecialLogging(visibilityDeciderGates.enableSpecialLogging) + .build() + + def countryNameGenerator(statsReceiver: StatsReceiver) = { + // TweetVisibilityLibrary, DeletedTweetVisibilityLibrary, and + // UserUnavailableVisibilityLibrary do not evaluate any Rules + // that require the display of country names in copy + CountryNameGenerator.providesWithCustomMap(Map.empty, statsReceiver) + } + + def tombstoneGenerator( + countryNameGenerator: CountryNameGenerator, + statsReceiver: StatsReceiver + ) = + TombstoneGenerator( + visibilityLibrary(statsReceiver).visParams, + countryNameGenerator, + statsReceiver) + + private[this] val userUnavailableVisibilityLibrary = + UserUnavailableStateVisibilityLibrary( + visibilityLibrary(userUnavailableVisibilityStatsReceiver), + visibilityDecider, + tombstoneGenerator( + countryNameGenerator(userUnavailableVisibilityStatsReceiver), + userUnavailableVisibilityStatsReceiver + ), + LocalizedInterstitialGenerator(visibilityDecider, userUnavailableVisibilityStatsReceiver) + ) + + val userIdentityRepo: UserIdentityRepository.Type = + repoConfig(repo = UserIdentityRepository(userRepo), name = "user_identity") + .observe() + .toRepo + + val userProtectionRepo: UserProtectionRepository.Type = + repoConfig(repo = UserProtectionRepository(userRepo), name = "user_protection") + .observe() + .toRepo + + val userViewRepo: UserViewRepository.Type = + repoConfig(repo = UserViewRepository(userRepo), name = "user_view") + .observe() + .toRepo + + val userVisibilityRepo: UserVisibilityRepository.Type = + repoConfig( + repo = UserVisibilityRepository(userRepo, userUnavailableVisibilityLibrary), + name = "user_visibility" + ).observe().toRepo + + val urlRepo: UrlRepository.Type = + repoConfig(repo = external.urlRepo, name = "url") + .observe() + .toRepo + + val profileGeoRepo: ProfileGeoRepository.Type = + repoConfig(repo = external.profileGeoRepo, name = "profile_geo") + .observe() + .toRepo + + val quoterHasAlreadyQuotedRepo: QuoterHasAlreadyQuotedRepository.Type = + repo2Config(repo = external.quoterHasAlreadyQuotedRepo, name = "quoter_has_already_quoted") + .observe() + .toRepo2 + + val lastQuoteOfQuoterRepo: LastQuoteOfQuoterRepository.Type = + repo2Config(repo = external.lastQuoteOfQuoterRepo, name = "last_quote_of_quoter") + .observe() + .toRepo2 + + val mediaMetadataRepo: MediaMetadataRepository.Type = + repoConfig(repo = external.mediaMetadataRepo, name = "media_metadata") + .observe() + .toRepo + + val perspectiveRepo: PerspectiveRepository.Type = + repoConfig(repo = external.perspectiveRepo, name = "perspective") + .observe() + .toRepo + + val conversationMutedRepo: ConversationMutedRepository.Type = + TimelineService.GetPerspectives.getConversationMuted(perspectiveRepo) + + // Because observe is applied before caching, only cache misses + // (i.e. calls to the underlying repo) are observed. + // Note that `newCaching` has stats around cache hit/miss but `caching` does not. + val deviceSourceRepo: DeviceSourceRepository.Type = + repoConfig(repo = external.deviceSourceRepo, name = "device_source") + .observe() + .newCaching( + keySerializer = appIdStr => DeviceSourceKey(appIdStr).toString, + valueSerializer = ServoCachedValueSerializer( + codec = DeviceSource, + expiry = Expiry.byAge(settings.deviceSourceMemcacheTtl), + softTtl = settings.deviceSourceMemcacheSoftTtl + ) + ) + .caching( + cache = caches.deviceSourceInProcessCache, + partialHandler = softTtlPartialHandler(_ => settings.deviceSourceInProcessSoftTtl) + ) + .toRepo + + // Because observe is applied before caching, only cache misses + // (i.e. calls to the underlying repo) are observed + // Note that `newCaching` has stats around cache hit/miss but `caching` does not. + val placeRepo: PlaceRepository.Type = + repoConfig(repo = external.placeRepo, name = "place") + .observe() + .newCaching( + keySerializer = placeKey => placeKey.toString, + valueSerializer = ServoCachedValueSerializer( + codec = Place, + expiry = Expiry.byAge(settings.placeMemcacheTtl), + softTtl = settings.placeMemcacheSoftTtl + ) + ) + .toRepo + + val cardRepo: CardRepository.Type = + repoConfig(repo = external.cardRepo, name = "cards") + .observe() + .toRepo + + val card2Repo: Card2Repository.Type = + repo2Config(repo = external.card2Repo, name = "card2") + .observe() + .toRepo2 + + val cardUsersRepo: CardUsersRepository.Type = + repo2Config(repo = external.cardUsersRepo, name = "card_users") + .observe() + .toRepo2 + + val relationshipRepo: RelationshipRepository.Type = + repoConfig(repo = external.relationshipRepo, name = "relationship") + .observe() + .toRepo + + val conversationIdRepo: ConversationIdRepository.Type = + repoConfig(repo = external.conversationIdRepo, name = "conversation_id") + .observe() + .toRepo + + val conversationControlRepo: ConversationControlRepository.Type = + repo2Config( + repo = ConversationControlRepository(tweetRepo, stats.scope("conversation_control")), + name = "conversation_control" + ).observe().toRepo2 + + val containerAsGetTweetResultRepo: CreativesContainerMaterializationRepository.GetTweetType = + repo2Config( + repo = external.containerAsTweetRepo, + name = "container_as_tweet" + ).observe().toRepo2 + + val containerAsGetTweetFieldsResultRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType = + repo2Config( + repo = external.containerAsTweetFieldsRepo, + name = "container_as_tweet_fields" + ).observe().toRepo2 + + val languageRepo: LanguageRepository.Type = { + val pool = FuturePool(Executors.newFixedThreadPool(settings.numPenguinThreads)) + repoConfig(repo = PenguinLanguageRepository(pool), name = "language") + .observe() + .toRepo + } + + // Because observe is applied before caching, only cache misses + // (i.e. calls to the underlying repo) are observed + // Note that `newCaching` has stats around cache hit/miss but `caching` does not. + val tweetCountsRepo: TweetCountsRepository.Type = + repoConfig(repo = external.tweetCountsRepo, name = "counts") + .observe() + .caching( + cache = caches.tweetCountsCache, + partialHandler = softTtlPartialHandler { + case Some(0) => settings.tweetCountsMemcacheZeroSoftTtl + case _ => settings.tweetCountsMemcacheNonZeroSoftTtl + }, + maxCacheRequestSize = settings.tweetCountsCacheChunkSize + ) + .toRepo + + val pastedMediaRepo: PastedMediaRepository.Type = + repo2Config(repo = PastedMediaRepository(tweetRepo), name = "pasted_media") + .observe() + .toRepo2 + + val escherbirdAnnotationRepo: EscherbirdAnnotationRepository.Type = + repoConfig(repo = external.escherbirdAnnotationRepo, name = "escherbird_annotations") + .observe() + .toRepo + + val stratoSafetyLabelsRepo: StratoSafetyLabelsRepository.Type = + repo2Config(repo = external.stratoSafetyLabelsRepo, name = "strato_safety_labels") + .observe() + .toRepo2 + + val stratoCommunityMembershipRepo: StratoCommunityMembershipRepository.Type = + repoConfig( + repo = external.stratoCommunityMembershipRepo, + name = "strato_community_memberships") + .observe() + .toRepo + + val stratoCommunityAccessRepo: StratoCommunityAccessRepository.Type = + repoConfig(repo = external.stratoCommunityAccessRepo, name = "strato_community_access") + .observe() + .toRepo + + val stratoSuperFollowEligibleRepo: StratoSuperFollowEligibleRepository.Type = + repoConfig( + repo = external.stratoSuperFollowEligibleRepo, + name = "strato_super_follow_eligible") + .observe() + .toRepo + + val stratoSuperFollowRelationsRepo: StratoSuperFollowRelationsRepository.Type = + repo2Config( + repo = external.stratoSuperFollowRelationsRepo, + name = "strato_super_follow_relations") + .observe() + .toRepo2 + + val stratoPromotedTweetRepo: StratoPromotedTweetRepository.Type = + repoConfig(repo = external.stratoPromotedTweetRepo, name = "strato_promoted_tweet") + .observe() + .toRepo + + val stratoSubscriptionVerificationRepo: StratoSubscriptionVerificationRepository.Type = + repo2Config( + repo = external.stratoSubscriptionVerificationRepo, + name = "strato_subscription_verification") + .observe() + .toRepo2 + + val unmentionedEntitiesRepo: UnmentionedEntitiesRepository.Type = + repo2Config(repo = external.unmentionedEntitiesRepo, name = "unmentioned_entities") + .observe() + .toRepo2 + + private[this] val userSource = + UserSource.fromRepo( + Repo { (k, _) => + val opts = UserQueryOptions(k.fields, UserVisibility.All) + userRepo(UserKey(k.id), opts) + } + ) + + private[this] val userRelationshipSource = + UserRelationshipSource.fromRepo( + Repo[UserRelationshipSource.Key, Unit, Boolean] { (key, _) => + relationshipRepo( + RelationshipKey(key.subjectId, key.objectId, key.relationship) + ) + } + ) + + private[this] val tweetPerspectiveSource = + TweetPerspectiveSource.fromGetPerspectives(perspectiveRepo) + private[this] val tweetMediaMetadataSource = + TweetMediaMetadataSource.fromFunction(mediaMetadataRepo) + + val userIsInvitedToConversationRepo: UserIsInvitedToConversationRepository.Type = + repo2Config( + repo = external.userIsInvitedToConversationRepo, + name = "user_is_invited_to_conversation") + .observe() + .toRepo2 + + private[this] val stringCenterClient: MultiProjectStringCenter = { + val stringCenterProjects = settings.flags.stringCenterProjects().toList + + val languages: Languages = new YamlConfigLanguages( + new YamlConfig(settings.flags.languagesConfig())) + + val loggingAbDecider = ABDeciderFactory("/usr/local/config/abdecider/abdecider.yml") + .withEnvironment("production") + .buildWithLogging() + + MultiProjectStringCenter( + projects = stringCenterProjects, + defaultBundlePath = MultiProjectStringCenter.StandardDefaultBundlePath, + refreshingBundlePath = MultiProjectStringCenter.StandardRefreshingBundlePath, + refreshingInterval = MultiProjectStringCenter.StandardRefreshingInterval, + requireDefaultBundleExists = true, + languages = languages, + statsReceiver = tweetVisibilityStatsReceiver, + loggingABDecider = loggingAbDecider + ) + } + private[this] val stringRegistry: ExternalStringRegistry = new ExternalStringRegistry() + private[this] val localizationSource: LocalizationSource = + LocalizationSource.fromMultiProjectStringCenterClient(stringCenterClient, stringRegistry) + + val tweetVisibilityRepo: TweetVisibilityRepository.Type = { + val tweetVisibilityLibrary: TweetVisibilityLibrary.Type = + TweetVisibilityLibrary( + visibilityLibrary(tweetVisibilityStatsReceiver), + userSource = userSource, + userRelationshipSource = userRelationshipSource, + keywordMatcher = KeywordMatcher.defaultMatcher(stats), + stratoClient = stratoClient, + localizationSource = localizationSource, + decider = visibilityDecider, + invitedToConversationRepo = userIsInvitedToConversationRepo, + tweetPerspectiveSource = tweetPerspectiveSource, + tweetMediaMetadataSource = tweetMediaMetadataSource, + tombstoneGenerator = tombstoneGenerator( + countryNameGenerator(tweetVisibilityStatsReceiver), + tweetVisibilityStatsReceiver + ), + interstitialGenerator = + LocalizedInterstitialGenerator(visibilityDecider, tweetVisibilityStatsReceiver), + limitedActionsFeatureSwitches = + FeatureSwitchUtil.mkLimitedActionsFeatureSwitches(tweetVisibilityStatsReceiver), + enableParityTest = deciderGates.tweetVisibilityLibraryEnableParityTest + ) + + val underlying = + TweetVisibilityRepository( + tweetVisibilityLibrary, + visibilityDeciderGates, + tweetVisibilityLogger, + repoStats.scope("tweet_visibility_repo") + ) + + repoConfig(repo = underlying, name = "tweet_visibility") + .observe() + .toRepo + } + + val quotedTweetVisibilityRepo: QuotedTweetVisibilityRepository.Type = { + val quotedTweetVisibilityLibrary: QuotedTweetVisibilityLibrary.Type = + QuotedTweetVisibilityLibrary( + visibilityLibrary(quotedTweetVisibilityStatsReceiver), + userSource = userSource, + userRelationshipSource = userRelationshipSource, + visibilityDecider, + userStateVisibilityLibrary = userUnavailableVisibilityLibrary, + enableVfFeatureHydration = deciderGates.enableVfFeatureHydrationInQuotedTweetVLShim + ) + + val underlying = + QuotedTweetVisibilityRepository(quotedTweetVisibilityLibrary, visibilityDeciderGates) + + repoConfig(repo = underlying, name = "quoted_tweet_visibility") + .observe() + .toRepo + } + + val deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type = { + val deletedTweetVisibilityLibrary: DeletedTweetVisibilityLibrary.Type = + DeletedTweetVisibilityLibrary( + visibilityLibrary(deletedTweetVisibilityStatsReceiver), + visibilityDecider, + tombstoneGenerator( + countryNameGenerator(deletedTweetVisibilityStatsReceiver), + deletedTweetVisibilityStatsReceiver + ) + ) + + val underlying = DeletedTweetVisibilityRepository.apply( + deletedTweetVisibilityLibrary + ) + + repoConfig(repo = underlying, name = "deleted_tweet_visibility") + .observe() + .toRepo + } + + val takedownRepo: UserTakedownRepository.Type = + repoConfig(repo = UserTakedownRepository(userRepo), name = "takedowns") + .observe() + .toRepo + + val tweetSpamCheckRepo: TweetSpamCheckRepository.Type = + repo2Config(repo = external.tweetSpamCheckRepo, name = "tweet_spam_check") + .observe() + .toRepo2 + + val retweetSpamCheckRepo: RetweetSpamCheckRepository.Type = + repoConfig(repo = external.retweetSpamCheckRepo, name = "retweet_spam_check") + .observe() + .toRepo + + // Because observe is applied before caching, only cache misses + // (i.e. calls to the underlying repo) are observed + // Note that `newCaching` has stats around cache hit/miss but `caching` does not. + val geoScrubTimestampRepo: GeoScrubTimestampRepository.Type = + repoConfig(repo = external.geoScrubTimestampRepo, name = "geo_scrub") + .observe() + .caching( + cache = caches.geoScrubCache, + partialHandler = (_ => None) + ) + .toRepo + + val tweetHydrators: TweetHydrators = + TweetHydrators( + stats = stats, + deciderGates = deciderGates, + repos = this, + tweetDataCache = caches.tweetDataCache, + hasMedia = hasMedia, + featureSwitchesWithoutExperiments = featureSwitchesWithoutExperiments, + clientIdHelper = clientIdHelper, + ) + + val queryOptionsExpander: TweetQueryOptionsExpander.Type = + TweetQueryOptionsExpander.threadLocalMemoize( + TweetQueryOptionsExpander.expandDependencies + ) + + // mutations to tweets that we only need to apply when reading from the external + // repository, and not when reading from cache + val tweetMutation: Mutation[Tweet] = + Mutation + .all( + Seq( + EntityExtractor.mutationAll, + TextRepairer.BlankLineCollapser, + TextRepairer.CoreTextBugPatcher + ) + ).onlyIf(_.coreData.isDefined) + + val cachingTweetRepo: TweetResultRepository.Type = + repo2Config(repo = external.tweetResultRepo, name = "saved_tweet") + .observe() + .withMiddleware { repo => + // applies tweetMutation to the results of TweetResultRepository + val mutateResult = TweetResult.mutate(tweetMutation) + repo.andThen(stitchResult => stitchResult.map(mutateResult)) + } + .withMiddleware( + tupledMiddleware( + CachingTweetRepository( + caches.tweetResultCache, + settings.tweetTombstoneTtl, + stats.scope("saved_tweet", "cache"), + clientIdHelper, + deciderGates.logCacheExceptions, + ) + ) + ) + .toRepo2 + + finalTweetResultRepo = repo2Config(repo = cachingTweetRepo, name = "tweet") + .withMiddleware( + tupledMiddleware( + TweetHydration.hydrateRepo( + tweetHydrators.hydrator, + tweetHydrators.cacheChangesEffect, + queryOptionsExpander + ) + ) + ) + .observe() + .withMiddleware(tupledMiddleware(TweetResultRepository.shortCircuitInvalidIds)) + .toRepo2 + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Main.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Main.scala new file mode 100644 index 000000000..22623039b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Main.scala @@ -0,0 +1,314 @@ +package com.twitter.tweetypie +package config + +import com.twitter.app.Flag +import com.twitter.app.Flaggable +import com.twitter.app.Flags +import com.twitter.finagle.http.HttpMuxer +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.mtls.authorization.server.MtlsServerSessionTrackerFilter +import com.twitter.finagle.mtls.server.MtlsStackServer._ +import com.twitter.finagle.param.Reporter +import com.twitter.finagle.ssl.OpportunisticTls +import com.twitter.finagle.util.NullReporterFactory +import com.twitter.finagle.Thrift +import com.twitter.finagle.ThriftMux +import com.twitter.flockdb.client.thriftscala.Priority +import com.twitter.inject.Injector +import com.twitter.inject.annotations.{Flags => InjectFlags} +import com.twitter.scrooge.ThriftEnum +import com.twitter.scrooge.ThriftEnumObject +import com.twitter.server.handler.IndexHandler +import com.twitter.strato.catalog.Catalog +import com.twitter.strato.fed.StratoFed +import com.twitter.strato.fed.server.StratoFedServer +import com.twitter.strato.util.Ref +import com.twitter.strato.warmup.Warmer +import com.twitter.tweetypie.federated.StratoCatalogBuilder +import com.twitter.tweetypie.federated.warmups.StratoCatalogWarmups +import com.twitter.tweetypie.serverutil.ActivityService +import java.net.InetSocketAddress +import scala.reflect.ClassTag + +object Env extends Enumeration { + val dev: Env.Value = Value + val staging: Env.Value = Value + val prod: Env.Value = Value +} + +class TweetServiceFlags(flag: Flags, injector: => Injector) { + implicit object EnvFlaggable extends Flaggable[Env.Value] { + def parse(s: String): Env.Value = + s match { + // Handle Aurora env names that are different from tweetypie's names + case "devel" => Env.dev + case "test" => Env.staging + // Handle Tweetypie env names + case other => Env.withName(other) + } + } + + val zone: Flag[String] = + flag("zone", "localhost", "One of: atla, pdxa, localhost, etc.") + + val env: Flag[Env.Value] = + flag("env", Env.dev, "One of: testbox, dev, staging, prod") + + val twemcacheDest: Flag[String] = + flag( + "twemcacheDest", + "/s/cache/tweetypie:twemcaches", + "The Name for the tweetypie cache cluster." + ) + + val deciderOverrides: Flag[Map[String, Boolean]] = + flag( + "deciderOverrides", + Map.empty[String, Boolean], + "Set deciders to constant values, overriding decider configuration files." + )( + // Unfortunately, the implicit Flaggable[Boolean] has a default + // value and Flaggable.ofMap[K, V] requires that the implicit + // Flaggable[V] not have a default. Even less fortunately, it + // doesn't say why. We're stuck with this. + Flaggable.ofMap(implicitly, Flaggable.mandatory(_.toBoolean)) + ) + + // "/decider.yml" comes from the resources included at + // "tweetypie/server/config", so you should not normally need to + // override this value. This flag is defined as a step toward making + // our command-line usage more similar to the standard + // twitter-server-internal flags. + def deciderBase(): String = + injector.instance[String](InjectFlags.named("decider.base")) + + // Omitting a value for decider overlay flag causes the server to use + // only the static decider. + def deciderOverlay(): String = + injector.instance[String](InjectFlags.named("decider.overlay")) + + // Omitting a value for the VF decider overlay flag causes the server + // to use only the static decider. + val vfDeciderOverlay: Flag[String] = + flag( + "vf.decider.overlay", + "The location of the overlay decider configuration for Visibility Filtering") + + /** + * Warmup Requests happen as part of the initialization process, before any real requests are + * processed. This prevents real requests from ever being served from a competely cold state + */ + val enableWarmupRequests: Flag[Boolean] = + flag( + "enableWarmupRequests", + true, + """| warms up Tweetypie service by generating random requests + | to Tweetypie that are processed prior to the actual client requests """.stripMargin + ) + + val grayListRateLimit: Flag[Double] = + flag("graylistRateLimit", 5.0, "rate-limit for non-allowlisted clients") + + val servicePort: Flag[InetSocketAddress] = + flag("service.port", "port for tweet-service thrift interface") + + val clientId: Flag[String] = + flag("clientId", "tweetypie.staging", "clientId to send in requests") + + val allowlist: Flag[Boolean] = + flag("allowlist", true, "enforce client allowlist") + + val clientHostStats: Flag[Boolean] = + flag("clientHostStats", false, "enable per client host stats") + + val withCache: Flag[Boolean] = + flag("withCache", true, "if set to false, Tweetypie will launch without memcache") + + /** + * Make any [[ThriftEnum]] value parseable as a [[Flag]] value. This + * will parse case-insensitive values that match the unqualified + * names of the values of the enumeration, in the manner of + * [[ThriftEnum]]'s `valueOf` method. + * + * Consider a [[ThriftEnum]] generated from the following Thrift IDL snippet: + * + * {{{ + * enum Priority { + * Low = 1 + * Throttled = 2 + * High = 3 + * } + * }}} + * + * To enable defining flags that specify one of these enum values: + * + * {{{ + * implicit val flaggablePriority: Flaggable[Priority] = flaggableThriftEnum(Priority) + * }}} + * + * In this example, the enumeration value `Priority.Low` can be + * represented as the string "Low", "low", or "LOW". + */ + def flaggableThriftEnum[T <: ThriftEnum: ClassTag](enum: ThriftEnumObject[T]): Flaggable[T] = + Flaggable.mandatory[T] { stringValue: String => + enum + .valueOf(stringValue) + .getOrElse { + val validValues = enum.list.map(_.name).mkString(", ") + throw new IllegalArgumentException( + s"Invalid value ${stringValue}. Valid values include: ${validValues}" + ) + } + } + + implicit val flaggablePriority: Flaggable[Priority] = flaggableThriftEnum(Priority) + + val backgroundIndexingPriority: Flag[Priority] = + flag( + "backgroundIndexingPriority", + Priority.Low, + "specifies the queue to use for \"background\" tflock operations, such as removing edges " + + "for deleted Tweets. This exists for testing scenarios, when it is useful to see the " + + "effects of background indexing operations sooner. In production, this should always be " + + "set to \"low\" (the default)." + ) + + val tflockPageSize: Flag[Int] = + flag("tflockPageSize", 1000, "Number of items to return in each page when querying tflock") + + val enableInProcessCache: Flag[Boolean] = + flag( + "enableInProcessCache", + true, + "if set to false, Tweetypie will not use the in-process cache" + ) + + val inProcessCacheSize: Flag[Int] = + flag("inProcessCacheSize", 1700, "maximum items in in-process cache") + + val inProcessCacheTtlMs: Flag[Int] = + flag("inProcessCacheTtlMs", 10000, "milliseconds that hot keys are stored in memory") + + val memcachePendingRequestLimit: Flag[Int] = + flag( + "memcachePendingRequestLimit", + 100, + "Number of requests that can be queued on a single memcache connection (4 per cache server)" + ) + + val instanceId: Flag[Int] = + flag( + "configbus.instanceId", + -1, + "InstanceId of the tweetypie service instance for staged configuration distribution" + ) + + val instanceCount: Flag[Int] = + flag( + "configbus.instanceCount", + -1, + "Total number of tweetypie service instances for staged configuration distribution" + ) + + def serviceIdentifier(): ServiceIdentifier = + injector.instance[ServiceIdentifier] + + val enableReplication: Flag[Boolean] = + flag( + "enableReplication", + true, + "Enable replication of reads (configurable via tweetypie_replicate_reads decider) and writes (100%) via DRPC" + ) + + val simulateDeferredrpcCallbacks: Flag[Boolean] = + flag( + "simulateDeferredrpcCallbacks", + false, + """|For async write path, call back into current instance instead of via DRPC. + |This is used for test and devel instances so we can ensure the test traffic + |is going to the test instance.""".stripMargin + ) + + val shortCircuitLikelyPartialTweetReadsMs: Flag[Int] = + flag( + "shortCircuitLikelyPartialTweetReadsMs", + 1500, + """|Specifies a number of milliseconds before which we will short-circuit likely + |partial reads from MH and return a NotFound tweet response state. After + |experimenting we went with 1500 ms.""".stripMargin + ) + + val stringCenterProjects: Flag[Seq[String]] = + flag( + "stringcenter.projects", + Seq.empty[String], + "String Center project names, comma separated")(Flaggable.ofSeq(Flaggable.ofString)) + + val languagesConfig: Flag[String] = + flag("international.languages", "Supported languages config file") +} + +class TweetypieMain extends StratoFedServer { + override def dest: String = "/s/tweetypie/tweetypie:federated" + + val tweetServiceFlags: TweetServiceFlags = new TweetServiceFlags(flag, injector) + + // display all the registered HttpMuxer handlers + HttpMuxer.addHandler("", new IndexHandler) + + private[this] lazy val serverBuilder = { + val settings = new TweetServiceSettings(tweetServiceFlags) + val serverBuilder = new TweetServerBuilder(settings) + + val mtlsSessionTrackerFilter = + new MtlsServerSessionTrackerFilter[Array[Byte], Array[Byte]](statsReceiver) + + val mtlsTrackedService = mtlsSessionTrackerFilter.andThen(ActivityService(serverBuilder.build)) + + val thriftMuxServer = ThriftMux.server + // by default, finagle logs exceptions to chickadee, which is deprecated and + // basically unused. to avoid wasted overhead, we explicitly disable the reporter. + .configured(Reporter(NullReporterFactory)) + .withLabel("tweetypie") + .withMutualTls(tweetServiceFlags.serviceIdentifier()) + .withOpportunisticTls(OpportunisticTls.Required) + .configured(Thrift.param.ServiceClass(Some(classOf[ThriftTweetService]))) + .serve(tweetServiceFlags.servicePort(), mtlsTrackedService) + + closeOnExit(thriftMuxServer) + await(thriftMuxServer) + + serverBuilder + } + + override def configureRefCatalog( + catalog: Ref[Catalog[StratoFed.Column]] + ): Ref[Catalog[StratoFed.Column]] = + catalog + .join { + Ref( + serverBuilder.stratoTweetService.flatMap { tweetService => + StratoCatalogBuilder.catalog( + tweetService, + serverBuilder.backendClients.stratoserverClient, + serverBuilder.backendClients.gizmoduck.getById, + serverBuilder.backendClients.callbackPromotedContentLogger, + statsReceiver, + serverBuilder.deciderGates.enableCommunityTweetCreates, + ) + } + ) + } + .map { case (l, r) => l ++ r } + + override def configureWarmer(warmer: Warmer): Unit = { + new TweetServiceSettings(tweetServiceFlags).warmupRequestsSettings.foreach { warmupSettings => + warmer.add( + "tweetypie strato catalog", + () => StratoCatalogWarmups.warmup(warmupSettings, composedOps) + ) + } + } +} + +object Main extends TweetypieMain diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/MemcacheExceptionLoggingFilter.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/MemcacheExceptionLoggingFilter.scala new file mode 100644 index 000000000..04746792b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/MemcacheExceptionLoggingFilter.scala @@ -0,0 +1,62 @@ +package com.twitter.tweetypie +package config + +import com.twitter.io.Buf +import com.twitter.finagle.{Service, SimpleFilter} +import com.twitter.finagle.memcached.protocol._ + +class MemcacheExceptionLoggingFilter extends SimpleFilter[Command, Response] { + // Using a custom logger name so that we can target logging rules specifically + // for memcache excpetion logging. + val logger: Logger = Logger(getClass) + + def apply(command: Command, service: Service[Command, Response]): Future[Response] = { + service(command).respond { + case Return(Error(e)) => + log(command, e) + case Return(ValuesAndErrors(_, errors)) if errors.nonEmpty => + errors.foreach { + case (Buf.Utf8(keyStr), e) => + log(command.name, keyStr, e) + } + case Throw(e) => + log(command, e) + + case _ => + } + } + + private def log(command: Command, e: Throwable): Unit = { + log(command.name, getKey(command), e) + } + + private def log(commandName: String, keyStr: String, e: Throwable): Unit = { + logger.debug( + s"CACHE_EXCEPTION command: ${commandName} key: ${keyStr} exception: ${e.getClass.getName}", + e, + ) + } + + private def getKey(command: Command): String = command match { + case Get(keys) => toKeyStr(keys) + case Gets(keys) => toKeyStr(keys) + + case Set(Buf.Utf8(key), _, _, _) => key + case Add(Buf.Utf8(key), _, _, _) => key + case Cas(Buf.Utf8(key), _, _, _, _) => key + case Delete(Buf.Utf8(key)) => key + case Replace(Buf.Utf8(key), _, _, _) => key + case Append(Buf.Utf8(key), _, _, _) => key + case Prepend(Buf.Utf8(key), _, _, _) => key + + case Incr(Buf.Utf8(key), _) => key + case Decr(Buf.Utf8(key), _) => key + case Stats(keys) => toKeyStr(keys) + case Quit() => "quit" + case Upsert(Buf.Utf8(key), _, _, _, _) => key + case Getv(keys) => toKeyStr(keys) + } + + private def toKeyStr(keys: Seq[Buf]): String = + keys.map { case Buf.Utf8(key) => key }.mkString(",") +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Resources.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Resources.scala new file mode 100644 index 000000000..d1f4721dc --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Resources.scala @@ -0,0 +1,15 @@ +package com.twitter.tweetypie.config + +import com.twitter.config.yaml.YamlMap +import com.twitter.tweetypie.serverutil.PartnerMedia +import scala.util.matching.Regex + +/** + * Helpers for loading resources bundled with Tweetypie. We load them + * through this API in order to be able to unit test the resource + * loading code. + */ +object Resources { + def loadPartnerMediaRegexes(): Seq[Regex] = + PartnerMedia.load(YamlMap.load("/partner_media.yml")) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ScribeTweetCacheWrites.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ScribeTweetCacheWrites.scala new file mode 100644 index 000000000..523c9be1c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ScribeTweetCacheWrites.scala @@ -0,0 +1,102 @@ +package com.twitter.tweetypie.config + +import com.twitter.servo.cache.{Cache, Cached, CachedValue, CachedValueStatus} +import com.twitter.servo.util.Scribe +import com.twitter.tweetypie.TweetId +import com.twitter.tweetypie.repository.TweetKey +import com.twitter.tweetypie.serverutil.logcachewrites.WriteLoggingCache +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.tweetypie.thriftscala.{CachedTweet, ComposerSource, TweetCacheWrite} +import com.twitter.util.Time + +class ScribeTweetCacheWrites( + val underlyingCache: Cache[TweetKey, Cached[CachedTweet]], + logYoungTweetCacheWrites: TweetId => Boolean, + logTweetCacheWrites: TweetId => Boolean) + extends WriteLoggingCache[TweetKey, Cached[CachedTweet]] { + private[this] lazy val scribe = Scribe(TweetCacheWrite, "tweetypie_tweet_cache_writes") + + private[this] def mkTweetCacheWrite( + id: Long, + action: String, + cachedValue: CachedValue, + cachedTweet: Option[CachedTweet] = None + ): TweetCacheWrite = { + /* + * If the Tweet id is a Snowflake id, calculate the offset since Tweet creation. + * If it is not a Snowflake id, then the offset should be 0. See [[TweetCacheWrite]]'s Thrift + * documentation for more details. + */ + val timestampOffset = + if (SnowflakeId.isSnowflakeId(id)) { + SnowflakeId(id).unixTimeMillis.asLong + } else { + 0 + } + + TweetCacheWrite( + tweetId = id, + timestamp = Time.now.inMilliseconds - timestampOffset, + action = action, + cachedValue = cachedValue, + cachedTweet = cachedTweet + ) + } + + /** + * Scribe a TweetCacheWrite record to tweetypie_tweet_cache_writes. We scribe the + * messages instead of writing them to the regular log file because the + * primary use of this logging is to get a record over time of the cache + * actions that affected a tweet, so we need a durable log that we can + * aggregate. + */ + override def log(action: String, k: TweetKey, v: Option[Cached[CachedTweet]]): Unit = + v match { + case Some(cachedTweet) => { + val cachedValue = CachedValue( + status = cachedTweet.status, + cachedAtMsec = cachedTweet.cachedAt.inMilliseconds, + readThroughAtMsec = cachedTweet.readThroughAt.map(_.inMilliseconds), + writtenThroughAtMsec = cachedTweet.writtenThroughAt.map(_.inMilliseconds), + doNotCacheUntilMsec = cachedTweet.doNotCacheUntil.map(_.inMilliseconds), + ) + scribe(mkTweetCacheWrite(k.id, action, cachedValue, cachedTweet.value)) + } + // `v` is only None if the action is a "delete" so set CachedValue with a status `Deleted` + case None => { + val cachedValue = + CachedValue(status = CachedValueStatus.Deleted, cachedAtMsec = Time.now.inMilliseconds) + scribe(mkTweetCacheWrite(k.id, action, cachedValue)) + } + } + + private[this] val YoungTweetThresholdMs = 3600 * 1000 + + private[this] def isYoungTweet(tweetId: TweetId): Boolean = + (SnowflakeId.isSnowflakeId(tweetId) && + ((Time.now.inMilliseconds - SnowflakeId(tweetId).unixTimeMillis.asLong) <= + YoungTweetThresholdMs)) + + /** + * Select all tweets for which the log_tweet_cache_writes decider returns + * true and "young" tweets for which the log_young_tweet_cache_writes decider + * returns true. + */ + override def selectKey(k: TweetKey): Boolean = + // When the tweet is young, we log it if it passes either decider. This is + // because the deciders will (by design) select a different subset of + // tweets. We do this so that we have a full record for all tweets for which + // log_tweet_cache_writes is on, but also cast a wider net for tweets that + // are more likely to be affected by replication lag, race conditions + // between different writes, or other consistency issues + logTweetCacheWrites(k.id) || (isYoungTweet(k.id) && logYoungTweetCacheWrites(k.id)) + + /** + * Log newscamera tweets as well as any tweets for which selectKey returns + * true. Note that for newscamera tweets, we will possibly miss "delete" + * actions since those do not have access to the value, and so do not call + * this method. + */ + override def select(k: TweetKey, v: Cached[CachedTweet]): Boolean = + v.value.exists(_.tweet.composerSource.contains(ComposerSource.Camera)) || selectKey(k) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetBuilders.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetBuilders.scala new file mode 100644 index 000000000..eafd02eaa --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetBuilders.scala @@ -0,0 +1,300 @@ +package com.twitter.tweetypie +package config + +import com.twitter.featureswitches.v2.FeatureSwitches +import com.twitter.stitch.repo.Repo +import com.twitter.tweetypie.backends.LimiterService.Feature +import com.twitter.tweetypie.handler._ +import com.twitter.tweetypie.jiminy.tweetypie.NudgeBuilder +import com.twitter.tweetypie.repository.RelationshipKey +import com.twitter.tweetypie.store.TotalTweetStore +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.tweettext.TweetText +import com.twitter.visibility.common.TrustedFriendsSource +import com.twitter.visibility.common.UserRelationshipSource +import com.twitter.visibility.writer.interfaces.tweets.TweetWriteEnforcementLibrary + +trait TweetBuilders { + val retweetBuilder: RetweetBuilder.Type + val tweetBuilder: TweetBuilder.Type +} + +object TweetBuilders { + + def validateCardRefAttachmentByUserAgentGate( + android: Gate[Unit], + nonAndroid: Gate[Unit] + ): Gate[Option[String]] = + Gate[Option[String]] { (userAgent: Option[String]) => + if (userAgent.exists(_.startsWith("TwitterAndroid"))) { + android() + } else { + nonAndroid() + } + } + + def apply( + settings: TweetServiceSettings, + statsReceiver: StatsReceiver, + deciderGates: TweetypieDeciderGates, + featureSwitchesWithExperiments: FeatureSwitches, + clients: BackendClients, + caches: Caches, + repos: LogicalRepositories, + tweetStore: TotalTweetStore, + hasMedia: Tweet => Boolean, + unretweetEdits: TweetDeletePathHandler.UnretweetEdits, + ): TweetBuilders = { + val urlShortener = + UrlShortener.scribeMalware(clients.guano) { + UrlShortener.fromTalon(clients.talon.shorten) + } + + val urlEntityBuilder = UrlEntityBuilder.fromShortener(urlShortener) + + val geoBuilder = + GeoBuilder( + repos.placeRepo, + ReverseGeocoder.fromGeoduck(clients.geoduckGeohashLocate), + statsReceiver.scope("geo_builder") + ) + + val replyCardUsersFinder: CardUsersFinder.Type = CardUsersFinder(repos.cardUsersRepo) + + val selfThreadBuilder = SelfThreadBuilder(statsReceiver.scope("self_thread_builder")) + + val replyBuilder = + ReplyBuilder( + repos.userIdentityRepo, + repos.optionalTweetRepo, + replyCardUsersFinder, + selfThreadBuilder, + repos.relationshipRepo, + repos.unmentionedEntitiesRepo, + deciderGates.enableRemoveUnmentionedImplicitMentions, + statsReceiver.scope("reply_builder"), + TweetText.MaxMentions + ) + + val mediaBuilder = + MediaBuilder( + clients.mediaClient.processMedia, + CreateMediaTco(urlShortener), + statsReceiver.scope("media_builder") + ) + + val validateAttachments = + AttachmentBuilder.validateAttachments( + statsReceiver, + validateCardRefAttachmentByUserAgentGate( + android = deciderGates.validateCardRefAttachmentAndroid, + nonAndroid = deciderGates.validateCardRefAttachmentNonAndroid + ) + ) + + val attachmentBuilder = + AttachmentBuilder( + repos.optionalTweetRepo, + urlShortener, + validateAttachments, + statsReceiver.scope("attachment_builder"), + deciderGates.denyNonTweetPermalinks + ) + + val validatePostTweetRequest: FutureEffect[PostTweetRequest] = + TweetBuilder.validateAdditionalFields[PostTweetRequest] + + val validateRetweetRequest = + TweetBuilder.validateAdditionalFields[RetweetRequest] + + val tweetIdGenerator = + () => clients.snowflakeClient.get() + + val retweetSpamChecker = + Spam.gated(deciderGates.checkSpamOnRetweet) { + Spam.allowOnException( + ScarecrowRetweetSpamChecker( + statsReceiver.scope("retweet_builder").scope("spam"), + repos.retweetSpamCheckRepo + ) + ) + } + + val tweetSpamChecker = + Spam.gated(deciderGates.checkSpamOnTweet) { + Spam.allowOnException( + ScarecrowTweetSpamChecker.fromSpamCheckRepository( + statsReceiver.scope("tweet_builder").scope("spam"), + repos.tweetSpamCheckRepo + ) + ) + } + + val duplicateTweetFinder = + DuplicateTweetFinder( + settings = settings.duplicateTweetFinderSettings, + tweetSource = DuplicateTweetFinder.TweetSource.fromServices( + tweetRepo = repos.optionalTweetRepo, + getStatusTimeline = clients.timelineService.getStatusTimeline + ) + ) + + val validateUpdateRateLimit = + RateLimitChecker.validate( + clients.limiterService.hasRemaining(Feature.Updates), + statsReceiver.scope("rate_limits", Feature.Updates.name), + deciderGates.rateLimitByLimiterService + ) + + val tweetBuilderStats = statsReceiver.scope("tweet_builder") + + val updateUserCounts = + TweetBuilder.updateUserCounts(hasMedia) + + val filterInvalidData = + TweetBuilder.filterInvalidData( + validateTweetMediaTags = TweetBuilder.validateTweetMediaTags( + tweetBuilderStats.scope("media_tags_filter"), + RateLimitChecker.getMaxMediaTags( + clients.limiterService.minRemaining(Feature.MediaTagCreate), + TweetBuilder.MaxMediaTagCount + ), + repos.optionalUserRepo + ), + cardReferenceBuilder = TweetBuilder.cardReferenceBuilder( + CardReferenceValidationHandler(clients.expandodo.checkAttachmentEligibility), + urlShortener + ) + ) + + val rateLimitFailures = + PostTweet.RateLimitFailures( + validateLimit = RateLimitChecker.validate( + clients.limiterService.hasRemaining(Feature.TweetCreateFailure), + statsReceiver.scope("rate_limits", Feature.TweetCreateFailure.name), + deciderGates.rateLimitTweetCreationFailure + ), + clients.limiterService.incrementByOne(Feature.Updates), + clients.limiterService.incrementByOne(Feature.TweetCreateFailure) + ) + + val countFailures = + PostTweet.CountFailures[TweetBuilderResult](statsReceiver) + + val tweetBuilderFilter: PostTweet.Filter[TweetBuilderResult] = + rateLimitFailures.andThen(countFailures) + + val conversationControlBuilder = ConversationControlBuilder.fromUserIdentityRepo( + statsReceiver = statsReceiver.scope("conversation_control_builder"), + userIdentityRepo = repos.userIdentityRepo + ) + + val conversationControlValidator = ConversationControlBuilder.Validate( + useFeatureSwitchResults = deciderGates.useConversationControlFeatureSwitchResults, + statsReceiver = statsReceiver + ) + + val communitiesValidator: CommunitiesValidator.Type = CommunitiesValidator() + + val collabControlBuilder: CollabControlBuilder.Type = CollabControlBuilder() + + val userRelationshipSource = UserRelationshipSource.fromRepo( + Repo[UserRelationshipSource.Key, Unit, Boolean] { (key, _) => + repos.relationshipRepo( + RelationshipKey(key.subjectId, key.objectId, key.relationship) + ) + } + ) + + val trustedFriendsSource = + TrustedFriendsSource.fromStrato(clients.stratoserverClient, statsReceiver) + + val validateTweetWrite = TweetWriteValidator( + convoCtlRepo = repos.conversationControlRepo, + tweetWriteEnforcementLibrary = TweetWriteEnforcementLibrary( + userRelationshipSource, + trustedFriendsSource, + repos.userIsInvitedToConversationRepo, + repos.stratoSuperFollowEligibleRepo, + repos.tweetRepo, + statsReceiver.scope("tweet_write_enforcement_library") + ), + enableExclusiveTweetControlValidation = deciderGates.enableExclusiveTweetControlValidation, + enableTrustedFriendsControlValidation = deciderGates.enableTrustedFriendsControlValidation, + enableStaleTweetValidation = deciderGates.enableStaleTweetValidation + ) + + val nudgeBuilder = NudgeBuilder( + clients.stratoserverClient, + deciderGates.jiminyDarkRequests, + statsReceiver.scope("nudge_builder") + ) + + val editControlBuilder = EditControlBuilder( + tweetRepo = repos.tweetRepo, + card2Repo = repos.card2Repo, + promotedTweetRepo = repos.stratoPromotedTweetRepo, + subscriptionVerificationRepo = repos.stratoSubscriptionVerificationRepo, + disablePromotedTweetEdit = deciderGates.disablePromotedTweetEdit, + checkTwitterBlueSubscription = deciderGates.checkTwitterBlueSubscriptionForEdit, + setEditWindowToSixtyMinutes = deciderGates.setEditTimeWindowToSixtyMinutes, + stats = statsReceiver, + ) + + val validateEdit = EditValidator(repos.optionalTweetRepo) + + // TweetBuilders builds two distinct TweetBuilders (Tweet and Retweet builders). + new TweetBuilders { + val tweetBuilder: TweetBuilder.Type = + tweetBuilderFilter[PostTweetRequest]( + TweetBuilder( + stats = tweetBuilderStats, + validateRequest = validatePostTweetRequest, + validateEdit = validateEdit, + validateUpdateRateLimit = validateUpdateRateLimit, + tweetIdGenerator = tweetIdGenerator, + userRepo = repos.userRepo, + deviceSourceRepo = repos.deviceSourceRepo, + communityMembershipRepo = repos.stratoCommunityMembershipRepo, + communityAccessRepo = repos.stratoCommunityAccessRepo, + urlShortener = urlShortener, + urlEntityBuilder = urlEntityBuilder, + geoBuilder = geoBuilder, + replyBuilder = replyBuilder, + mediaBuilder = mediaBuilder, + attachmentBuilder = attachmentBuilder, + duplicateTweetFinder = duplicateTweetFinder, + spamChecker = tweetSpamChecker, + filterInvalidData = filterInvalidData, + updateUserCounts = updateUserCounts, + validateConversationControl = conversationControlValidator, + conversationControlBuilder = conversationControlBuilder, + validateTweetWrite = validateTweetWrite, + nudgeBuilder = nudgeBuilder, + communitiesValidator = communitiesValidator, + collabControlBuilder = collabControlBuilder, + editControlBuilder = editControlBuilder, + featureSwitches = featureSwitchesWithExperiments, + ) + ) + + val retweetBuilder: RetweetBuilder.Type = + tweetBuilderFilter[RetweetRequest]( + RetweetBuilder( + validateRequest = validateRetweetRequest, + tweetIdGenerator = tweetIdGenerator, + tweetRepo = repos.tweetRepo, + userRepo = repos.userRepo, + tflock = clients.tflockWriteClient, + deviceSourceRepo = repos.deviceSourceRepo, + validateUpdateRateLimit = validateUpdateRateLimit, + spamChecker = retweetSpamChecker, + updateUserCounts = updateUserCounts, + superFollowRelationsRepo = repos.stratoSuperFollowRelationsRepo, + unretweetEdits = unretweetEdits, + setEditWindowToSixtyMinutes = deciderGates.setEditTimeWindowToSixtyMinutes + ) + ) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetHydrators.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetHydrators.scala new file mode 100644 index 000000000..af71bf89d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetHydrators.scala @@ -0,0 +1,341 @@ +package com.twitter.tweetypie +package config + +import com.twitter.featureswitches.v2.FeatureSwitches +import com.twitter.servo.cache.Cached +import com.twitter.servo.cache.LockingCache +import com.twitter.servo.util.ExceptionCategorizer +import com.twitter.servo.util.ExceptionCounter +import com.twitter.servo.util.FutureEffect +import com.twitter.servo.util.Scribe +import com.twitter.stitch.NotFound +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.core.TweetData +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.hydrator._ +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.serverutil.{ExceptionCounter => TpExceptionCounter} +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.client_id.ClientIdHelper + +trait TweetHydrators { + + /** + * Hydrator that has all the Tweet hydrators (entire "pipeline") configured + * and wired up. + * This hydrator is used both on the read and write path and is + * customized by different TweetQuery.Options. + * Modifications are not automatically written back to cache. + * `cacheChanges` must be used for that. + */ + def hydrator: TweetDataValueHydrator + + /** + * The `Effect` to use to write modified tweets back to cache. + */ + def cacheChangesEffect: Effect[ValueState[TweetData]] +} + +object TweetHydrators { + + /** + * Creates all the hydrators and calls TweetHydration to wire them up. + */ + def apply( + stats: StatsReceiver, + deciderGates: TweetypieDeciderGates, + repos: LogicalRepositories, + tweetDataCache: LockingCache[TweetId, Cached[TweetData]], + hasMedia: Tweet => Boolean, + featureSwitchesWithoutExperiments: FeatureSwitches, + clientIdHelper: ClientIdHelper + ): TweetHydrators = { + import repos._ + + val repairStats = stats.scope("repairs") + val hydratorStats = stats.scope("hydrators") + + def scoped[A](stats: StatsReceiver, name: String)(f: StatsReceiver => A): A = { + val scopedStats = stats.scope(name) + f(scopedStats) + } + + val isFailureException: Throwable => Boolean = { + case _: FilteredState => false + case NotFound => false + case _ => true + } + + def hydratorExceptionCategorizer(failureScope: String) = + ExceptionCategorizer.const("filtered").onlyIf(_.isInstanceOf[FilteredState]) ++ + ExceptionCategorizer.const("not_found").onlyIf(_ == NotFound) ++ + TpExceptionCounter.defaultCategorizer(failureScope).onlyIf(isFailureException) + + val hydratorExceptionCounter: (StatsReceiver, String) => ExceptionCounter = + (stats, scope) => TpExceptionCounter(stats, hydratorExceptionCategorizer(scope)) + + val tweetHydrator = + TweetHydration( + hydratorStats = hydratorStats, + hydrateFeatureSwitchResults = + FeatureSwitchResultsHydrator(featureSwitchesWithoutExperiments, clientIdHelper), + hydrateMentions = MentionEntitiesHydrator + .once(MentionEntityHydrator(userIdentityRepo)) + .observe(hydratorStats.scope("mentions"), hydratorExceptionCounter), + hydrateLanguage = LanguageHydrator(languageRepo) + .observe(hydratorStats.scope("language"), hydratorExceptionCounter), + hydrateUrls = scoped(hydratorStats, "url") { stats => + UrlEntitiesHydrator + .once(UrlEntityHydrator(urlRepo, stats)) + .observe(stats, hydratorExceptionCounter) + }, + hydrateQuotedTweetRef = QuotedTweetRefHydrator + .once( + QuotedTweetRefHydrator(tweetRepo) + ) + .observe(hydratorStats.scope("quoted_tweet_ref"), hydratorExceptionCounter), + hydrateQuotedTweetRefUrls = QuotedTweetRefUrlsHydrator(userIdentityRepo) + .observe(hydratorStats.scope("quoted_tweet_ref_urls"), hydratorExceptionCounter), + hydrateMediaCacheable = MediaEntitiesHydrator.Cacheable + .once( + MediaEntityHydrator.Cacheable( + hydrateMediaUrls = MediaUrlFieldsHydrator() + .observe(hydratorStats.scope("media_urls"), hydratorExceptionCounter), + hydrateMediaIsProtected = MediaIsProtectedHydrator(userProtectionRepo) + .observe(hydratorStats.scope("media_is_protected"), hydratorExceptionCounter) + ) + ) + .observe(hydratorStats.scope("media_cacheable"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydrateMedia), + hydrateReplyScreenName = ReplyScreenNameHydrator + .once(ReplyScreenNameHydrator(userIdentityRepo)) + .observe(hydratorStats.scope("in_reply_to_screen_name"), hydratorExceptionCounter), + hydrateConvoId = ConversationIdHydrator(conversationIdRepo) + .observe(hydratorStats.scope("conversation_id"), hydratorExceptionCounter), + hydratePerspective = // Don't cache with the tweet because it depends on the request + PerspectiveHydrator( + repo = perspectiveRepo, + shouldHydrateBookmarksPerspective = deciderGates.hydrateBookmarksPerspective, + stats = hydratorStats.scope("perspective_by_safety_label") + ).observe(hydratorStats.scope("perspective"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydratePerspectives), + hydrateEditPerspective = EditPerspectiveHydrator( + repo = perspectiveRepo, + timelinesGate = deciderGates.hydratePerspectivesEditsForTimelines, + tweetDetailsGate = deciderGates.hydratePerspectivesEditsForTweetDetail, + otherSafetyLevelsGate = deciderGates.hydratePerspectivesEditsForOtherSafetyLevels, + bookmarksGate = deciderGates.hydrateBookmarksPerspective, + stats = hydratorStats + ).observe(hydratorStats.scope("edit_perspective"), hydratorExceptionCounter), + hydrateConversationMuted = // Don't cache because it depends on the request. If + // possible, this hydrator should be in the same stage as + // PerspectiveHydrator, so that the calls can be batched + // together. + ConversationMutedHydrator(conversationMutedRepo) + .observe(hydratorStats.scope("conversation_muted"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydrateConversationMuted), + hydrateContributor = ContributorHydrator + .once(ContributorHydrator(userIdentityRepo)) + .observe(hydratorStats.scope("contributors"), hydratorExceptionCounter), + hydrateTakedowns = TakedownHydrator(takedownRepo) + .observe(hydratorStats.scope("takedowns"), hydratorExceptionCounter), + hydrateDirectedAt = scoped(hydratorStats, "directed_at") { stats => + DirectedAtHydrator + .once(DirectedAtHydrator(userIdentityRepo, stats)) + .observe(stats, hydratorExceptionCounter) + }, + hydrateGeoScrub = GeoScrubHydrator( + geoScrubTimestampRepo, + Scribe("test_tweetypie_read_time_geo_scrubs") + .contramap[TweetId](_.toString) + ).observe(hydratorStats.scope("geo_scrub"), hydratorExceptionCounter), + hydrateCacheableRepairs = ValueHydrator + .fromMutation[Tweet, TweetQuery.Options]( + RepairMutation( + repairStats.scope("on_read"), + "created_at" -> + new CreatedAtRepairer(Scribe("test_tweetypie_bad_created_at")), + "retweet_media" -> RetweetMediaRepairer, + "parent_status_id" -> RetweetParentStatusIdRepairer.tweetMutation, + "visible_text_range" -> NegativeVisibleTextRangeRepairer.tweetMutation + ) + ) + .lensed(TweetData.Lenses.tweet) + .onlyIf((td, opts) => opts.cause.reading(td.tweet.id)), + hydrateMediaUncacheable = MediaEntityHydrator + .Uncacheable( + hydrateMediaKey = MediaKeyHydrator() + .observe(hydratorStats.scope("media_key"), hydratorExceptionCounter), + hydrateMediaInfo = scoped(hydratorStats, "media_info") { stats => + MediaInfoHydrator(mediaMetadataRepo, stats) + .observe(stats, hydratorExceptionCounter) + } + ) + .observe(hydratorStats.scope("media_uncacheable"), hydratorExceptionCounter) + .liftSeq + .ifEnabled(deciderGates.hydrateMedia), + hydratePostCacheRepairs = + // clean-up partially hydrated entities before any of the hydrators that look at + // url and media entities run, so that they never see bad entities. + ValueHydrator.fromMutation[TweetData, TweetQuery.Options]( + RepairMutation( + repairStats.scope("on_read"), + "partial_entity_cleanup" -> PartialEntityCleaner(repairStats), + "strip_not_display_coords" -> StripHiddenGeoCoordinates + ).lensed(TweetData.Lenses.tweet) + ), + hydrateTweetLegacyFormat = scoped(hydratorStats, "tweet_legacy_formatter") { stats => + TweetLegacyFormatter(stats) + .observe(stats, hydratorExceptionCounter) + .onlyIf((td, opts) => opts.cause.reading(td.tweet.id)) + }, + hydrateQuoteTweetVisibility = QuoteTweetVisibilityHydrator(quotedTweetVisibilityRepo) + .observe(hydratorStats.scope("quote_tweet_visibility"), hydratorExceptionCounter), + hydrateQuotedTweet = QuotedTweetHydrator(tweetResultRepo) + .observe(hydratorStats.scope("quoted_tweet"), hydratorExceptionCounter), + hydratePastedMedia = + // Don't cache with the tweet because we want to automatically drop this media if + // the referenced tweet is deleted or becomes non-public. + PastedMediaHydrator(pastedMediaRepo) + .observe(hydratorStats.scope("pasted_media")) + .ifEnabled(deciderGates.hydratePastedMedia), + hydrateMediaRefs = MediaRefsHydrator( + optionalTweetRepo, + deciderGates.mediaRefsHydratorIncludePastedMedia + ).observe(hydratorStats.scope("media_refs")) + .ifEnabled(deciderGates.hydrateMediaRefs), + hydrateMediaTags = // depends on AdditionalFieldsHydrator + MediaTagsHydrator(userViewRepo) + .observe(hydratorStats.scope("media_tags"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydrateMediaTags), + hydrateClassicCards = CardHydrator(cardRepo) + .observe(hydratorStats.scope("cards"), hydratorExceptionCounter), + hydrateCard2 = Card2Hydrator(card2Repo) + .observe(hydratorStats.scope("card2")), + hydrateContributorVisibility = + // Filter out contributors field for all but the user who owns the tweet + ContributorVisibilityFilter() + .observe(hydratorStats.scope("contributor_visibility"), hydratorExceptionCounter), + hydrateHasMedia = + // Sets hasMedia. Comes after PastedMediaHydrator in order to include pasted + // pics as well as other media & urls. + HasMediaHydrator(hasMedia) + .observe(hydratorStats.scope("has_media"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydrateHasMedia), + hydrateTweetCounts = // Don't cache counts with the tweet because it has its own cache with + // a different TTL + TweetCountsHydrator(tweetCountsRepo, deciderGates.hydrateBookmarksCount) + .observe(hydratorStats.scope("tweet_counts"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydrateCounts), + hydratePreviousTweetCounts = // previous counts are not cached + scoped(hydratorStats, "previous_counts") { stats => + PreviousTweetCountsHydrator(tweetCountsRepo, deciderGates.hydrateBookmarksCount) + .observe(stats, hydratorExceptionCounter) + .ifEnabled(deciderGates.hydratePreviousCounts) + }, + hydratePlace = + // Don't cache with the tweet because Place has its own tweetypie cache keyspace + // with a different TTL, and it's more efficient to store separately. + // See com.twitter.tweetypie.repository.PlaceKey + PlaceHydrator(placeRepo) + .observe(hydratorStats.scope("place"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydratePlaces), + hydrateDeviceSource = // Don't cache with the tweet because it has its own cache, + // and it's more efficient to cache it separately + DeviceSourceHydrator(deviceSourceRepo) + .observe(hydratorStats.scope("device_source"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydrateDeviceSources), + hydrateProfileGeo = + // Don't cache gnip profile geo as read request volume is expected to be low + ProfileGeoHydrator(profileGeoRepo) + .observe(hydratorStats.scope("profile_geo"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydrateGnipProfileGeoEnrichment), + hydrateSourceTweet = scoped(hydratorStats, "source_tweet") { stats => + SourceTweetHydrator( + tweetResultRepo, + stats, + FutureEffect + .inParallel( + Scribe(DetachedRetweet, "tweetypie_detached_retweets"), + Scribe(DetachedRetweet, "test_tweetypie_detached_retweets"), + ) + ).observe(stats, hydratorExceptionCounter) + }, + hydrateIM1837State = IM1837FilterHydrator() + .observe(hydratorStats.scope("im1837_filter"), hydratorExceptionCounter) + .onlyIf { (_, ctx) => + ctx.opts.forExternalConsumption && ctx.opts.cause.reading(ctx.tweetId) + }, + hydrateIM2884State = scoped(hydratorStats, "im2884_filter") { stats => + IM2884FilterHydrator(stats) + .observe(stats, hydratorExceptionCounter) + .onlyIf { (_, ctx) => + ctx.opts.forExternalConsumption && ctx.opts.cause.reading(ctx.tweetId) + } + }, + hydrateIM3433State = scoped(hydratorStats, "im3433_filter") { stats => + IM3433FilterHydrator(stats) + .observe(stats, hydratorExceptionCounter) + .onlyIf { (_, ctx) => + ctx.opts.forExternalConsumption && ctx.opts.cause.reading(ctx.tweetId) + } + }, + hydrateTweetAuthorVisibility = TweetAuthorVisibilityHydrator(userVisibilityRepo) + .observe(hydratorStats.scope("tweet_author_visibility"), hydratorExceptionCounter) + .onlyIf((_, ctx) => ctx.opts.cause.reading(ctx.tweetId)), + hydrateReportedTweetVisibility = ReportedTweetFilter() + .observe(hydratorStats.scope("reported_tweet_filter"), hydratorExceptionCounter), + scrubSuperfluousUrlEntities = ValueHydrator + .fromMutation[Tweet, TweetQuery.Options](SuperfluousUrlEntityScrubber.mutation) + .lensed(TweetData.Lenses.tweet), + copyFromSourceTweet = CopyFromSourceTweet.hydrator + .observe(hydratorStats.scope("copy_from_source_tweet"), hydratorExceptionCounter), + hydrateTweetVisibility = scoped(hydratorStats, "tweet_visibility") { stats => + TweetVisibilityHydrator( + tweetVisibilityRepo, + deciderGates.failClosedInVF, + stats + ).observe(stats, hydratorExceptionCounter) + }, + hydrateEscherbirdAnnotations = EscherbirdAnnotationHydrator(escherbirdAnnotationRepo) + .observe(hydratorStats.scope("escherbird_annotations"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydrateEscherbirdAnnotations), + hydrateScrubEngagements = ScrubEngagementHydrator() + .observe(hydratorStats.scope("scrub_engagements"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydrateScrubEngagements), + hydrateConversationControl = scoped(hydratorStats, "tweet_conversation_control") { stats => + ConversationControlHydrator( + conversationControlRepo, + deciderGates.disableInviteViaMention, + stats + ).observe(stats, hydratorExceptionCounter) + }, + hydrateEditControl = scoped(hydratorStats, "tweet_edit_control") { stats => + EditControlHydrator( + tweetRepo, + deciderGates.setEditTimeWindowToSixtyMinutes, + stats + ).observe(stats, hydratorExceptionCounter) + }, + hydrateUnmentionData = UnmentionDataHydrator(), + hydrateNoteTweetSuffix = NoteTweetSuffixHydrator().observe(stats, hydratorExceptionCounter) + ) + + new TweetHydrators { + val hydrator: TweetDataValueHydrator = + tweetHydrator.onlyIf { (tweetData, opts) => + // When the caller requests fetchStoredTweets and Tweets are fetched from Manhattan + // irrespective of state, the stored data for some Tweets may be incomplete. + // We skip the hydration of those Tweets. + !opts.fetchStoredTweets || + tweetData.storedTweetResult.exists(_.canHydrate) + } + + val cacheChangesEffect: Effect[ValueState[TweetData]] = + TweetHydration.cacheChanges( + tweetDataCache, + hydratorStats.scope("tweet_caching") + ) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServerBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServerBuilder.scala new file mode 100644 index 000000000..795e1b300 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServerBuilder.scala @@ -0,0 +1,300 @@ +package com.twitter.tweetypie.config + +import com.twitter.decider.Decider +import com.twitter.decider.DeciderFactory +import com.twitter.decider.LocalOverrides +import com.twitter.featureswitches.v2.builder.FeatureSwitchesBuilder +import com.twitter.finagle.filter.DarkTrafficFilter +import com.twitter.finagle.stats.DefaultStatsReceiver +import com.twitter.finagle.stats.NullStatsReceiver +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.thrift.Protocols +import com.twitter.finagle.util.DefaultTimer +import com.twitter.finagle.Filter +import com.twitter.finagle.Service +import com.twitter.finagle.SimpleFilter +import com.twitter.quill.capture._ +import com.twitter.servo.util.MemoizingStatsReceiver +import com.twitter.servo.util.WaitForServerSets +import com.twitter.tweetypie.ThriftTweetService +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.client_id.ConditionalServiceIdentifierStrategy +import com.twitter.tweetypie.client_id.PreferForwardedServiceIdentifierForStrato +import com.twitter.tweetypie.client_id.UseTransportServiceIdentifier +import com.twitter.tweetypie.context.TweetypieContext +import com.twitter.tweetypie.matching.Tokenizer +import com.twitter.tweetypie.service._ +import com.twitter.tweetypie.thriftscala.TweetServiceInternal$FinagleService +import com.twitter.util._ +import com.twitter.util.logging.Logger +import scala.util.control.NonFatal + +class TweetServerBuilder(settings: TweetServiceSettings) { + + /** + * A logger used by some of the built-in initializers. + */ + val log: Logger = Logger(getClass) + + /** + * The top-level stats receiver. Defaults to the default StatsReceiver + * embedded in Finagle. + */ + val statsReceiver: StatsReceiver = + new MemoizingStatsReceiver(DefaultStatsReceiver) + + val hostStatsReceiver: StatsReceiver = + if (settings.clientHostStats) + statsReceiver + else + NullStatsReceiver + + /** + * A timer for scheduling various things. + */ + val timer: Timer = DefaultTimer + + /** + * Creates a decider instance by looking up the decider configuration information + * from the settings object. + */ + val decider: Decider = { + val fileBased = DeciderFactory(settings.deciderBaseFilename, settings.deciderOverlayFilename)() + + // Use the tweetypie decider dashboard name for propagating decider overrides. + LocalOverrides.decider("tweetypie").orElse(fileBased) + } + + val deciderGates: TweetypieDeciderGates = { + val deciderGates = TweetypieDeciderGates(decider, settings.deciderOverrides) + + // Write out the configuration overrides to the log so that it's + // easy to confirm how this instance has been customized. + deciderGates.overrides.foreach { + case (overrideName, overrideValue) => + log.info("Decider feature " + overrideName + " overridden to " + overrideValue) + if (deciderGates.unusedOverrides.contains(overrideName)) { + log.error("Unused decider override flag: " + overrideName) + } + } + + val scopedReceiver = statsReceiver.scope("decider_values") + + deciderGates.availabilityMap.foreach { + case (feature, value) => + scopedReceiver.provideGauge(feature) { + // Default value of -1 indicates error state. + value.getOrElse(-1).toFloat + } + } + + deciderGates + } + + val featureSwitchesWithExperiments = FeatureSwitchesBuilder + .createWithExperiments("/features/tweetypie/main") + .build() + + val featureSwitchesWithoutExperiments = FeatureSwitchesBuilder + .createWithNoExperiments("/features/tweetypie/main", Some(statsReceiver)) + .build() + + // ********* initializer ********** + + private[this] def warmupTextTokenization(logger: Logger): Unit = { + logger.info("Warming up text tokenization") + val watch = Stopwatch.start() + Tokenizer.warmUp() + logger.info(s"Warmed up text tokenization in ${watch()}") + } + + private[this] def runWarmup(tweetService: Activity[ThriftTweetService]): Unit = { + val tokenizationLogger = Logger("com.twitter.tweetypie.TweetServerBuilder.TokenizationWarmup") + warmupTextTokenization(tokenizationLogger) + + val warmupLogger = Logger("com.twitter.tweetypie.TweetServerBuilder.BackendWarmup") + // #1 warmup backends + Await.ready(settings.backendWarmupSettings(backendClients, warmupLogger, timer)) + + // #2 warmup Tweet Service + Await.ready { + tweetService.values.toFuture.map(_.get).map { service => + settings.warmupRequestsSettings.foreach(new TweetServiceWarmer(_)(service)) + } + } + } + + private[this] def waitForServerSets(): Unit = { + val names = backendClients.referencedNames + val startTime = Time.now + log.info("will wait for serversets: " + names.mkString("\n", "\t\n", "")) + + try { + Await.result(WaitForServerSets.ready(names, settings.waitForServerSetsTimeout, timer)) + val duration = Time.now.since(startTime) + log.info("resolved all serversets in " + duration) + } catch { + case NonFatal(ex) => log.warn("failed to resolve all serversets", ex) + } + } + + private[this] def initialize(tweetService: Activity[ThriftTweetService]): Unit = { + waitForServerSets() + runWarmup(tweetService) + + // try to force a GC before starting to serve requests; this may or may not do anything + System.gc() + } + + // ********* builders ********** + + val clientIdHelper = new ClientIdHelper( + new ConditionalServiceIdentifierStrategy( + condition = deciderGates.preferForwardedServiceIdentifierForClientId, + ifTrue = PreferForwardedServiceIdentifierForStrato, + ifFalse = UseTransportServiceIdentifier, + ), + ) + + val backendClients: BackendClients = + BackendClients( + settings = settings, + deciderGates = deciderGates, + statsReceiver = statsReceiver, + hostStatsReceiver = hostStatsReceiver, + timer = timer, + clientIdHelper = clientIdHelper, + ) + + val tweetService: Activity[ThriftTweetService] = + TweetServiceBuilder( + settings = settings, + statsReceiver = statsReceiver, + timer = timer, + deciderGates = deciderGates, + featureSwitchesWithExperiments = featureSwitchesWithExperiments, + featureSwitchesWithoutExperiments = featureSwitchesWithoutExperiments, + backendClients = backendClients, + clientIdHelper = clientIdHelper, + ) + + // Strato columns should use this tweetService + def stratoTweetService: Activity[ThriftTweetService] = + tweetService.map { service => + // Add quill functionality to the strato tweet service only + val quillCapture = QuillCaptureBuilder(settings, deciderGates) + new QuillTweetService(quillCapture, service) + } + + def build: Activity[Service[Array[Byte], Array[Byte]]] = { + + val quillCapture = QuillCaptureBuilder(settings, deciderGates) + + val darkTrafficFilter: SimpleFilter[Array[Byte], Array[Byte]] = + if (!settings.trafficForkingEnabled) { + Filter.identity + } else { + new DarkTrafficFilter( + backendClients.darkTrafficClient, + _ => deciderGates.forkDarkTraffic(), + statsReceiver + ) + } + + val serviceFilter = + quillCapture + .getServerFilter(ThriftProto.server) + .andThen(TweetypieContext.Local.filter[Array[Byte], Array[Byte]]) + .andThen(darkTrafficFilter) + + initialize(tweetService) + + // tweetService is an Activity[ThriftTweetService], so this callback + // is called every time that Activity updates (on ConfigBus changes). + tweetService.map { service => + val finagleService = + new TweetServiceInternal$FinagleService( + service, + protocolFactory = Protocols.binaryFactory(), + stats = NullStatsReceiver, + maxThriftBufferSize = settings.maxThriftBufferSize + ) + + serviceFilter andThen finagleService + } + } +} + +object QuillCaptureBuilder { + val tweetServiceWriteMethods: Set[String] = + Set( + "async_delete", + "async_delete_additional_fields", + "async_erase_user_tweets", + "async_incr_fav_count", + "async_insert", + "async_set_additional_fields", + "async_set_retweet_visibility", + "async_takedown", + "async_undelete_tweet", + "async_update_possibly_sensitive_tweet", + "cascaded_delete_tweet", + "delete_additional_fields", + "delete_retweets", + "delete_tweets", + "erase_user_tweets", + "flush", + "incr_fav_count", + "insert", + "post_retweet", + "post_tweet", + "remove", + "replicated_delete_additional_fields", + "replicated_delete_tweet", + "replicated_delete_tweet2", + "replicated_incr_fav_count", + "replicated_insert_tweet2", + "replicated_scrub_geo", + "replicated_set_additional_fields", + "replicated_set_has_safety_labels", + "replicated_set_retweet_visibility", + "replicated_takedown", + "replicated_undelete_tweet2", + "replicated_update_possibly_sensitive_tweet", + "scrub_geo", + "scrub_geo_update_user_timestamp", + "set_additional_fields", + "set_has_safety_labels", + "set_retweet_visibility", + "set_tweet_user_takedown", + "takedown", + "undelete_tweet" + ) + + val tweetServiceReadMethods: Set[String] = + Set( + "get_tweet_counts", + "get_tweet_fields", + "get_tweets", + "replicated_get_tweet_counts", + "replicated_get_tweet_fields", + "replicated_get_tweets" + ) + + def apply(settings: TweetServiceSettings, deciderGates: TweetypieDeciderGates): QuillCapture = { + val writesStore = SimpleScribeMessageStore("tweetypie_writes") + .enabledBy(deciderGates.logWrites) + + val readsStore = SimpleScribeMessageStore("tweetypie_reads") + .enabledBy(deciderGates.logReads) + + val messageStore = + MessageStore.selected { + case msg if tweetServiceWriteMethods.contains(msg.name) => writesStore + case msg if tweetServiceReadMethods.contains(msg.name) => readsStore + case _ => writesStore + } + + new QuillCapture(Store.legacyStore(messageStore), Some(settings.thriftClientId.name)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceAuthorizers.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceAuthorizers.scala new file mode 100644 index 000000000..765a608a2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceAuthorizers.scala @@ -0,0 +1,399 @@ +package com.twitter.tweetypie +package config + +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.mtls.transport.S2STransport +import com.twitter.servo.gate.RateLimitingGate +import com.twitter.servo.request.ClientRequestAuthorizer.UnauthorizedException +import com.twitter.servo.request.{ClientRequestAuthorizer, ClientRequestObserver} +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.client_id.PreferForwardedServiceIdentifierForStrato +import com.twitter.tweetypie.core.RateLimited +import com.twitter.tweetypie.service.MethodAuthorizer +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.Future + +/** + * Compose a ClientRequestAuthorizer for + * ClientHandlingTweetService + */ +object ClientHandlingTweetServiceAuthorizer { + private val RateLimitExceeded = + RateLimited("Your ClientId has exceeded the rate limit for non-allowListed clients.") + + def apply( + settings: TweetServiceSettings, + dynamicConfig: DynamicConfig, + statsReceiver: StatsReceiver, + getServiceIdentifier: () => ServiceIdentifier = S2STransport.peerServiceIdentifier _ + ): ClientRequestAuthorizer = { + val authorizer = + if (settings.allowlistingRequired) { + val limitingGate = RateLimitingGate.uniform(settings.nonAllowListedClientRateLimitPerSec) + allowListedOrRateLimitedAuthorizer(dynamicConfig, limitingGate) + .andThen(rejectNonAllowListedProdAuthorizer(dynamicConfig)) + .andThen(permittedMethodsAuthorizer(dynamicConfig)) + .andThen(allowProductionAuthorizer(settings.allowProductionClients)) + } else { + ClientRequestAuthorizer.withClientId + } + + val alternativeClientIdHelper = new ClientIdHelper(PreferForwardedServiceIdentifierForStrato) + // pass the authorizer into an observed authorizer for stats tracking. + // (observed authorizers can't be composed with andThen) + ClientRequestAuthorizer.observed( + authorizer, + new ClientRequestObserver(statsReceiver) { + override def apply( + methodName: String, + clientIdScopesOpt: Option[Seq[String]] + ): Future[Unit] = { + // Monitor for the migration taking into account forwarded service identifier + // as effective client ID for strato. + val alternativeClientIdScopes = alternativeClientIdHelper.effectiveClientId.map(Seq(_)) + if (clientIdScopesOpt != alternativeClientIdScopes) { + scopedReceiver.scope(methodName) + .scope("before_migration") + .scope(clientIdScopesOpt.getOrElse(Seq(ClientIdHelper.UnknownClientId)): _*) + .scope("after_migration") + .counter(alternativeClientIdScopes.getOrElse(Seq(ClientIdHelper.UnknownClientId)): _*) + .incr() + } else { + scopedReceiver.scope(methodName).counter("migration_indifferent").incr() + } + super.apply(methodName, clientIdScopesOpt) + } + + override def authorized(methodName: String, clientIdStr: String): Unit = { + // Monitor for the migration of using service identifier + // as identity instead of client ID. + val serviceIdentifier = getServiceIdentifier() + scopedReceiver.counter( + "authorized_request", + clientIdStr, + serviceIdentifier.role, + serviceIdentifier.service, + serviceIdentifier.environment + ).incr() + val status = dynamicConfig.byServiceIdentifier(serviceIdentifier).toSeq match { + case Seq() => "none" + case Seq(client) if client.clientId == clientIdStr => "equal" + case Seq(_) => "other" + case _ => "ambiguous" + } + scopedReceiver.counter( + "service_id_match_client_id", + clientIdStr, + serviceIdentifier.role, + serviceIdentifier.service, + serviceIdentifier.environment, + status + ).incr() + } + } + ) + } + + /** + * @return A ClientRequestAuthorizer that allows unlimited requests for allowlisted client ids and + * rate-limited requests for unknown clients. + */ + def allowListedOrRateLimitedAuthorizer( + dynamicConfig: DynamicConfig, + nonAllowListedLimiter: Gate[Unit] + ): ClientRequestAuthorizer = + ClientRequestAuthorizer.filtered( + { (_, clientId) => + dynamicConfig.isAllowListedClient(clientId) || nonAllowListedLimiter() + }, + RateLimitExceeded) + + /** + * @return A ClientRequestAuthorizer that rejects requests from non-allowListed prod clients. + */ + def rejectNonAllowListedProdAuthorizer(dynamicConfig: DynamicConfig): ClientRequestAuthorizer = { + object UnallowlistedException + extends UnauthorizedException( + "Traffic is only allowed from allow-listed *.prod clients." + + " Please create a ticket to register your clientId to enable production traffic using http://go/tp-new-client." + ) + + def isProdClient(clientId: String): Boolean = + clientId.endsWith(".prod") || clientId.endsWith(".production") + + ClientRequestAuthorizer.filtered( + { (_, clientId) => + !isProdClient(clientId) || dynamicConfig.isAllowListedClient(clientId) + }, + UnallowlistedException) + } + + /** + * @return A ClientRequestAuthorizer that checks if a given client's + * permittedMethods field includes the method they are calling + */ + def permittedMethodsAuthorizer(dynamicConfig: DynamicConfig): ClientRequestAuthorizer = + dynamicConfig.clientsByFullyQualifiedId match { + case Some(clientsById) => permittedMethodsAuthorizer(dynamicConfig, clientsById) + case None => ClientRequestAuthorizer.permissive + } + + private def permittedMethodsAuthorizer( + dynamicConfig: DynamicConfig, + clientsByFullyQualifiedId: Map[String, Client] + ): ClientRequestAuthorizer = { + ClientRequestAuthorizer.filtered { (methodName, clientId) => + dynamicConfig.unprotectedEndpoints(methodName) || + (clientsByFullyQualifiedId.get(clientId) match { + case Some(client) => + client.accessAllMethods || + client.permittedMethods.contains(methodName) + case None => + false // If client id is unknown, don't allow access + }) + } + } + + /** + * @return A ClientRequestAuthorizer that fails the + * request if it is coming from a production client + * and allowProductionClients is false + */ + def allowProductionAuthorizer(allowProductionClients: Boolean): ClientRequestAuthorizer = + ClientRequestAuthorizer.filtered { (_, clientId) => + allowProductionClients || !(clientId.endsWith(".prod") || clientId.endsWith(".production")) + } +} + +/** + * Compose a MethodAuthorizer for the `getTweets` endpoint. + */ +object GetTweetsAuthorizer { + import ProtectedTweetsAuthorizer.IncludeProtected + + def apply( + config: DynamicConfig, + maxRequestSize: Int, + instanceCount: Int, + enforceRateLimitedClients: Gate[Unit], + maxRequestWidthEnabled: Gate[Unit], + statsReceiver: StatsReceiver, + ): MethodAuthorizer[GetTweetsRequest] = + MethodAuthorizer.all( + Seq( + ProtectedTweetsAuthorizer(config.clientsByFullyQualifiedId) + .contramap[GetTweetsRequest] { r => + IncludeProtected(r.options.exists(_.bypassVisibilityFiltering)) + }, + RequestSizeAuthorizer(maxRequestSize, maxRequestWidthEnabled) + .contramap[GetTweetsRequest](_.tweetIds.size), + RateLimiterAuthorizer(config, instanceCount, enforceRateLimitedClients, statsReceiver) + .contramap[GetTweetsRequest](_.tweetIds.size) + ) + ) +} + +/** + * Compose a MethodAuthorizer for the `getTweetFields` endpoint. + */ +object GetTweetFieldsAuthorizer { + import ProtectedTweetsAuthorizer.IncludeProtected + + def apply( + config: DynamicConfig, + maxRequestSize: Int, + instanceCount: Int, + enforceRateLimitedClients: Gate[Unit], + maxRequestWidthEnabled: Gate[Unit], + statsReceiver: StatsReceiver + ): MethodAuthorizer[GetTweetFieldsRequest] = + MethodAuthorizer.all( + Seq( + ProtectedTweetsAuthorizer(config.clientsByFullyQualifiedId) + .contramap[GetTweetFieldsRequest](r => + IncludeProtected(r.options.visibilityPolicy == TweetVisibilityPolicy.NoFiltering)), + RequestSizeAuthorizer(maxRequestSize, maxRequestWidthEnabled) + .contramap[GetTweetFieldsRequest](_.tweetIds.size), + RateLimiterAuthorizer(config, instanceCount, enforceRateLimitedClients, statsReceiver) + .contramap[GetTweetFieldsRequest](_.tweetIds.size) + ) + ) +} + +object ProtectedTweetsAuthorizer { + case class IncludeProtected(include: Boolean) extends AnyVal + + class BypassVisibilityFilteringNotAuthorizedException(message: String) + extends UnauthorizedException(message) + + def apply(optClientsById: Option[Map[String, Client]]): MethodAuthorizer[IncludeProtected] = { + optClientsById match { + case Some(clientsByFullyQualifiedId) => + val clientsWithBypassVisibilityFiltering = clientsByFullyQualifiedId.filter { + case (_, client) => client.bypassVisibilityFiltering + } + apply(clientId => clientsWithBypassVisibilityFiltering.contains(clientId)) + + case None => + apply((_: String) => true) + } + } + + /** + * A MethodAuthorizer that fails the request if a client requests to bypass visibility + * filtering but doesn't have BypassVisibilityFiltering + */ + def apply(protectedTweetsAllowlist: String => Boolean): MethodAuthorizer[IncludeProtected] = + MethodAuthorizer { (includeProtected, clientId) => + // There is only one unauthorized case, a client requesting + // protected tweets when they are not in the allowlist + Future.when(includeProtected.include && !protectedTweetsAllowlist(clientId)) { + Future.exception( + new BypassVisibilityFilteringNotAuthorizedException( + s"$clientId is not authorized to bypass visibility filtering" + ) + ) + } + } +} + +/** + * A MethodAuthorizer[Int] that fails large requests. + */ +object RequestSizeAuthorizer { + class ExceededMaxWidthException(message: String) extends UnauthorizedException(message) + + def apply( + maxRequestSize: Int, + maxWidthLimitEnabled: Gate[Unit] = Gate.False + ): MethodAuthorizer[Int] = + MethodAuthorizer { (requestSize, clientId) => + Future.when(requestSize > maxRequestSize && maxWidthLimitEnabled()) { + Future.exception( + new ExceededMaxWidthException( + s"$requestSize exceeds bulk request size limit. $clientId can request at most $maxRequestSize items per request" + ) + ) + } + } +} + +object RateLimiterAuthorizer { + + type ClientId = String + + /** + * @return client ID to weighted RateLimitingGate map + * + * We want to rate-limit based on requests per sec for every instance. + * When we allowlist new clients to Tweetypie, we assign tweets per sec quota. + * That's why, we compute perInstanceQuota [1] and create a weighted rate-limiting gate [2] + * which returns true if acquiring requestSize number of permits is successful. [3] + * + * [1] tps quota during allowlisting is for both DCs and instanceCount is for one DC. + * Therefore, we are over-compensating perInstanceQuota for all low-priority clients. + * this will act a fudge-factor to account for cluster-wide traffic imbalances. + * + * val perInstanceQuota : Double = math.max(1.0, math.ceil(tpsLimit.toFloat / instanceCount)) + * + * We have some clients like deferredRPC with 0K tps quota and rate limiter expects > 0 permits. + * + * [2] if a client has multiple environments - staging, devel, prod. We provision the + * same rate-limits for all envs instead of distributing the tps quota across envs. + * + * Example: + * + * val c = Client(..., limit = 10k, ...) + * Map("foo.prod" -> c, "foo.staging" -> c, "foo.devel" -> c) + * + * Above client config turns into 3 separate RateLimitingGate.weighted(), each with 10k + * + * [3] RateLimitingGate will always give permit to the initial request that exceeds + * the limit. ex: starting with rate-limit of 1 tps per instance. first request with + * 100 batch size is allowed. + * + * RateLimitFudgeFactor is a multiplier for per-instance quota to account for: + * + * a) High likelihood of concurrent batches hitting the same tweetypie shard due to + * non-uniform load distribution (this can be alleviated by using Deterministic Aperture) + * b) Clients with no retry backoffs and custom batching/concurrency. + * + * We are adding default stitch batch size to per instance quota, to give more headroom for low-tps clients. + * https://cgit.twitter.biz/source/tree/stitch/stitch-tweetypie/src/main/scala/com/twitter/stitch/tweetypie/TweetyPie.scala#n47 + * + */ + case class RateLimiterConfig(limitingGate: Gate[Int], enforceRateLimit: Boolean) + + def perClientRateLimiters( + dynamicConfig: DynamicConfig, + instanceCount: Int + ): Map[ClientId, RateLimiterConfig] = { + val RateLimitFudgeFactor: Double = 1.5 + val DefaultStitchBatchSize: Double = 25.0 + dynamicConfig.clientsByFullyQualifiedId match { + case Some(clients) => + clients.collect { + case (clientId, client) if client.tpsLimit.isDefined => + val perInstanceQuota: Double = + math.max( + 1.0, + math.ceil( + client.tpsLimit.get.toFloat / instanceCount)) * RateLimitFudgeFactor + DefaultStitchBatchSize + clientId -> RateLimiterConfig( + RateLimitingGate.weighted(perInstanceQuota), + client.enforceRateLimit + ) + } + case None => Map.empty + } + } + + /* + enforce rate-limiting on get_tweets and get_tweet_fields requests + given enable_rate_limited_clients decider is true and rate limiting gate + is not giving any more permits. + */ + def apply( + config: DynamicConfig, + limiters: Map[ClientId, RateLimiterConfig], + instanceCount: Int, + enforceRateLimitedClients: Gate[Unit], + statsReceiver: StatsReceiver + ): MethodAuthorizer[Int] = { + + val tpsExceededScope = statsReceiver.scope("tps_exceeded") + val tpsRejectedScope = statsReceiver.scope("tps_rejected") + val qpsExceededScope = statsReceiver.scope("qps_exceeded") + val qpsRejectedScope = statsReceiver.scope("qps_rejected") + + MethodAuthorizer { (requestSize, clientId) => + val positiveRequestSize = math.max(1, requestSize) + val shouldRateLimit: Boolean = limiters.get(clientId).exists { config => + val exceededLimit = !config.limitingGate(positiveRequestSize) + if (exceededLimit) { + qpsExceededScope.counter(clientId).incr() + tpsExceededScope.counter(clientId).incr(positiveRequestSize) + } + exceededLimit && config.enforceRateLimit + } + + Future.when(shouldRateLimit && enforceRateLimitedClients()) { + qpsRejectedScope.counter(clientId).incr() + tpsRejectedScope.counter(clientId).incr(positiveRequestSize) + Future.exception( + RateLimited(s"Your client ID $clientId has exceeded its reserved tps quota.") + ) + } + } + } + + def apply( + config: DynamicConfig, + instanceCount: Int, + enforceRateLimitedClients: Gate[Unit], + statsReceiver: StatsReceiver + ): MethodAuthorizer[Int] = { + val limiters = perClientRateLimiters(config, instanceCount) + apply(config, limiters, instanceCount, enforceRateLimitedClients, statsReceiver) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceBuilder.scala new file mode 100644 index 000000000..518d0edcd --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceBuilder.scala @@ -0,0 +1,683 @@ +package com.twitter.tweetypie +package config + +import com.twitter.coreservices.failed_task.writer.FailedTaskWriter +import com.twitter.featureswitches.v2.FeatureSwitches +import com.twitter.flockdb.client._ +import com.twitter.servo.forked +import com.twitter.servo.util.FutureArrow +import com.twitter.servo.util.Scribe +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.handler._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.service.ReplicatingTweetService +import com.twitter.tweetypie.service._ +import com.twitter.tweetypie.storage.TweetStorageClient +import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet +import com.twitter.tweetypie.store._ +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.Activity +import com.twitter.util.Timer + +/** + * Builds a fully configured ThriftTweetService instance. + * + * The core of the tweet service is a DispatchingTweetService, which is responsible + * for dispatching requests to underlying handlers and stores. + * The DispatchingTweetService instance is wrapped in: + * - ObservedTweetService (adds stats counting) + * - ClientHandlingTweetService (authentication, exception handling, etc) + * - ReplicatingTweetService (replicates some reads) + * + * TweetServiceBuilder returns an Activity[ThriftTweetService] which updates + * on config changes. See DynamicConfig.scala for more details. + */ +object TweetServiceBuilder { + def apply( + settings: TweetServiceSettings, + statsReceiver: StatsReceiver, + timer: Timer, + deciderGates: TweetypieDeciderGates, + featureSwitchesWithExperiments: FeatureSwitches, + featureSwitchesWithoutExperiments: FeatureSwitches, + backendClients: BackendClients, + clientIdHelper: ClientIdHelper, + ): Activity[ThriftTweetService] = { + // a forward reference, will be set to the DispatchingTweetService once created + val syncTweetService = new MutableTweetServiceProxy(null) + + val tweetServiceScope = statsReceiver.scope("tweet_service") + + val dispatchingTweetService = + DispatchingTweetServiceBuilder( + settings, + statsReceiver, + tweetServiceScope, + syncTweetService, + timer, + deciderGates, + featureSwitchesWithExperiments, + featureSwitchesWithoutExperiments, + backendClients, + clientIdHelper, + ) + + val failureLoggingTweetService = + // Add the failure writing inside of the authorization filter so + // that we don't write out the failures when authorization fails. + new FailureLoggingTweetService( + failedTaskWriter = FailedTaskWriter("tweetypie_service_failures", identity), + underlying = dispatchingTweetService + ) + + val observedTweetService = + new ObservedTweetService(failureLoggingTweetService, tweetServiceScope, clientIdHelper) + + // Every time config is updated, create a new tweet service. Only + // ClientHandlingTweetService and ReplicatingTweetService need to + // be recreated, as the underlying TweetServices above don't depend + // on the config. + DynamicConfig( + statsReceiver.scope("dynamic_config"), + backendClients.configBus, + settings + ).map { dynamicConfig => + val clientHandlingTweetService = + new ClientHandlingTweetService( + observedTweetService, + tweetServiceScope, + dynamicConfig.loadShedEligible, + deciderGates.shedReadTrafficVoluntarily, + ClientHandlingTweetServiceAuthorizer( + settings = settings, + dynamicConfig = dynamicConfig, + statsReceiver = statsReceiver + ), + GetTweetsAuthorizer( + config = dynamicConfig, + maxRequestSize = settings.maxGetTweetsRequestSize, + instanceCount = settings.instanceCount, + enforceRateLimitedClients = deciderGates.enforceRateLimitedClients, + maxRequestWidthEnabled = deciderGates.maxRequestWidthEnabled, + statsReceiver = tweetServiceScope.scope("get_tweets"), + ), + GetTweetFieldsAuthorizer( + config = dynamicConfig, + maxRequestSize = settings.maxGetTweetsRequestSize, + instanceCount = settings.instanceCount, + enforceRateLimitedClients = deciderGates.enforceRateLimitedClients, + maxRequestWidthEnabled = deciderGates.maxRequestWidthEnabled, + statsReceiver = tweetServiceScope.scope("get_tweet_fields"), + ), + RequestSizeAuthorizer(settings.maxRequestSize, deciderGates.maxRequestWidthEnabled), + clientIdHelper, + ) + + syncTweetService.underlying = clientHandlingTweetService + + val replicatingService = + if (!settings.enableReplication) + clientHandlingTweetService + else { + new ReplicatingTweetService( + underlying = clientHandlingTweetService, + replicationTargets = backendClients.lowQoSReplicationClients, + executor = new forked.QueueExecutor( + 100, + statsReceiver.scope("replicating_tweet_service") + ), + ) + } + + replicatingService + } + } +} + +object DispatchingTweetServiceBuilder { + val hasMedia: Tweet => Boolean = MediaIndexHelper(Resources.loadPartnerMediaRegexes()) + + def apply( + settings: TweetServiceSettings, + statsReceiver: StatsReceiver, + tweetServiceScope: StatsReceiver, + syncTweetService: ThriftTweetService, + timer: Timer, + deciderGates: TweetypieDeciderGates, + featureSwitchesWithExperiments: FeatureSwitches, + featureSwitchesWithoutExperiments: FeatureSwitches, + backendClients: BackendClients, + clientIdHelper: ClientIdHelper, + ): ThriftTweetService = { + val (syncInvocationBuilder, asyncInvocationBuilder) = { + val b = + new ServiceInvocationBuilder(syncTweetService, settings.simulateDeferredrpcCallbacks) + (b.withClientId(settings.thriftClientId), b.withClientId(settings.deferredrpcClientId)) + } + + val tweetKeyFactory = TweetKeyFactory(settings.tweetKeyCacheVersion) + + val caches = + if (!settings.withCache) + Caches.NoCache + else + Caches( + settings = settings, + stats = statsReceiver, + timer = timer, + clients = backendClients, + tweetKeyFactory = tweetKeyFactory, + deciderGates = deciderGates, + clientIdHelper = clientIdHelper, + ) + + val logicalRepos = + LogicalRepositories( + settings = settings, + stats = statsReceiver, + timer = timer, + deciderGates = deciderGates, + external = new ExternalServiceRepositories( + clients = backendClients, + statsReceiver = statsReceiver, + settings = settings, + clientIdHelper = clientIdHelper, + ), + caches = caches, + stratoClient = backendClients.stratoserverClient, + hasMedia = hasMedia, + clientIdHelper = clientIdHelper, + featureSwitchesWithoutExperiments = featureSwitchesWithoutExperiments, + ) + + val tweetCreationLock = + new CacheBasedTweetCreationLock( + cache = caches.tweetCreateLockerCache, + maxTries = 3, + stats = statsReceiver.scope("tweet_save").scope("locker"), + logUniquenessId = + if (settings.scribeUniquenessIds) CacheBasedTweetCreationLock.ScribeUniquenessId + else CacheBasedTweetCreationLock.LogUniquenessId + ) + + val tweetStores = + TweetStores( + settings = settings, + statsReceiver = statsReceiver, + timer = timer, + deciderGates = deciderGates, + tweetKeyFactory = tweetKeyFactory, + clients = backendClients, + caches = caches, + asyncBuilder = asyncInvocationBuilder, + hasMedia = hasMedia, + clientIdHelper = clientIdHelper, + ) + + val tweetDeletePathHandler = + new DefaultTweetDeletePathHandler( + tweetServiceScope, + logicalRepos.tweetResultRepo, + logicalRepos.optionalUserRepo, + logicalRepos.stratoSafetyLabelsRepo, + logicalRepos.lastQuoteOfQuoterRepo, + tweetStores, + getPerspectives = backendClients.timelineService.getPerspectives, + ) + + val tweetBuilders = + TweetBuilders( + settings = settings, + statsReceiver = statsReceiver, + deciderGates = deciderGates, + featureSwitchesWithExperiments = featureSwitchesWithExperiments, + clients = backendClients, + caches = caches, + repos = logicalRepos, + tweetStore = tweetStores, + hasMedia = hasMedia, + unretweetEdits = tweetDeletePathHandler.unretweetEdits, + ) + + val hydrateTweetForInsert = + WritePathHydration.hydrateTweet( + logicalRepos.tweetHydrators.hydrator, + statsReceiver.scope("insert_tweet") + ) + + val defaultTweetQueryOptions = TweetQuery.Options(include = GetTweetsHandler.BaseInclude) + + val parentUserIdRepo: ParentUserIdRepository.Type = + ParentUserIdRepository( + tweetRepo = logicalRepos.tweetRepo + ) + + val undeleteTweetHandler = + UndeleteTweetHandlerBuilder( + backendClients.tweetStorageClient, + logicalRepos, + tweetStores, + parentUserIdRepo, + statsReceiver + ) + + val eraseUserTweetsHandler = + EraseUserTweetsHandlerBuilder( + backendClients, + asyncInvocationBuilder, + deciderGates, + settings, + timer, + tweetDeletePathHandler, + tweetServiceScope + ) + + val setRetweetVisibilityHandler = + SetRetweetVisibilityHandler( + tweetGetter = + TweetRepository.tweetGetter(logicalRepos.optionalTweetRepo, defaultTweetQueryOptions), + tweetStores.setRetweetVisibility + ) + + val takedownHandler = + TakedownHandlerBuilder( + logicalRepos = logicalRepos, + tweetStores = tweetStores + ) + + val updatePossiblySensitiveTweetHandler = + UpdatePossiblySensitiveTweetHandler( + HandlerError.getRequired( + TweetRepository.tweetGetter(logicalRepos.optionalTweetRepo, defaultTweetQueryOptions), + HandlerError.tweetNotFoundException + ), + HandlerError.getRequired( + FutureArrow( + UserRepository + .userGetter( + logicalRepos.optionalUserRepo, + UserQueryOptions(Set(UserField.Safety), UserVisibility.All) + ) + .compose(UserKey.byId) + ), + HandlerError.userNotFoundException + ), + tweetStores.updatePossiblySensitiveTweet + ) + + val userTakedownHandler = + UserTakedownHandlerBuilder( + logicalRepos = logicalRepos, + tweetStores = tweetStores, + stats = tweetServiceScope + ) + + val getDeletedTweetsHandler = + GetDeletedTweetsHandler( + getDeletedTweets = backendClients.tweetStorageClient.getDeletedTweets, + tweetsExist = + GetDeletedTweetsHandler.tweetsExist(backendClients.tweetStorageClient.getTweet), + stats = tweetServiceScope.scope("get_deleted_tweets_handler") + ) + + val hydrateQuotedTweet = + WritePathHydration.hydrateQuotedTweet( + logicalRepos.optionalTweetRepo, + logicalRepos.optionalUserRepo, + logicalRepos.quoterHasAlreadyQuotedRepo + ) + + val deleteLocationDataHandler = + DeleteLocationDataHandler( + backendClients.geoScrubEventStore.getGeoScrubTimestamp, + Scribe(DeleteLocationData, "tweetypie_delete_location_data"), + backendClients.deleteLocationDataPublisher + ) + + val getStoredTweetsHandler = GetStoredTweetsHandler(logicalRepos.tweetResultRepo) + + val getStoredTweetsByUserHandler = GetStoredTweetsByUserHandler( + getStoredTweetsHandler = getStoredTweetsHandler, + getStoredTweet = backendClients.tweetStorageClient.getStoredTweet, + selectPage = FutureArrow { select => + backendClients.tflockReadClient + .selectPage(select, Some(settings.getStoredTweetsByUserPageSize)) + }, + maxPages = settings.getStoredTweetsByUserMaxPages + ) + + val getTweetsHandler = + GetTweetsHandler( + logicalRepos.tweetResultRepo, + logicalRepos.containerAsGetTweetResultRepo, + logicalRepos.deletedTweetVisibilityRepo, + statsReceiver.scope("read_path"), + deciderGates.shouldMaterializeContainers + ) + + val getTweetFieldsHandler = + GetTweetFieldsHandler( + logicalRepos.tweetResultRepo, + logicalRepos.deletedTweetVisibilityRepo, + logicalRepos.containerAsGetTweetFieldsResultRepo, + statsReceiver.scope("read_path"), + deciderGates.shouldMaterializeContainers + ) + + val unretweetHandler = + UnretweetHandler( + tweetDeletePathHandler.deleteTweets, + backendClients.timelineService.getPerspectives, + tweetDeletePathHandler.unretweetEdits, + logicalRepos.tweetRepo, + ) + + val hydrateInsertEvent = + WritePathHydration.hydrateInsertTweetEvent( + hydrateTweet = hydrateTweetForInsert, + hydrateQuotedTweet = hydrateQuotedTweet + ) + + val scrubGeoUpdateUserTimestampBuilder = + ScrubGeoEventBuilder.UpdateUserTimestamp( + stats = tweetServiceScope.scope("scrub_geo_update_user_timestamp"), + userRepo = logicalRepos.optionalUserRepo + ) + + val scrubGeoScrubTweetsBuilder = + ScrubGeoEventBuilder.ScrubTweets( + stats = tweetServiceScope.scope("scrub_geo"), + userRepo = logicalRepos.optionalUserRepo + ) + + val handlerFilter = + PostTweet + .DuplicateHandler( + tweetCreationLock = tweetCreationLock, + getTweets = getTweetsHandler, + stats = statsReceiver.scope("duplicate") + ) + .andThen(PostTweet.RescueTweetCreateFailure) + .andThen(PostTweet.LogFailures) + + val postTweetHandler = + handlerFilter[PostTweetRequest]( + PostTweet.Handler( + tweetBuilder = tweetBuilders.tweetBuilder, + hydrateInsertEvent = hydrateInsertEvent, + tweetStore = tweetStores, + ) + ) + + val postRetweetHandler = + handlerFilter[RetweetRequest]( + PostTweet.Handler( + tweetBuilder = tweetBuilders.retweetBuilder, + hydrateInsertEvent = hydrateInsertEvent, + tweetStore = tweetStores, + ) + ) + + val quotedTweetDeleteBuilder: QuotedTweetDeleteEventBuilder.Type = + QuotedTweetDeleteEventBuilder(logicalRepos.optionalTweetRepo) + + val quotedTweetTakedownBuilder: QuotedTweetTakedownEventBuilder.Type = + QuotedTweetTakedownEventBuilder(logicalRepos.optionalTweetRepo) + + val setAdditionalFieldsBuilder: SetAdditionalFieldsBuilder.Type = + SetAdditionalFieldsBuilder( + tweetRepo = logicalRepos.tweetRepo + ) + + val asyncSetAdditionalFieldsBuilder: AsyncSetAdditionalFieldsBuilder.Type = + AsyncSetAdditionalFieldsBuilder( + userRepo = logicalRepos.userRepo + ) + + val deleteAdditionalFieldsBuilder: DeleteAdditionalFieldsBuilder.Type = + DeleteAdditionalFieldsBuilder( + tweetRepo = logicalRepos.tweetRepo + ) + + val asyncDeleteAdditionalFieldsBuilder: AsyncDeleteAdditionalFieldsBuilder.Type = + AsyncDeleteAdditionalFieldsBuilder( + userRepo = logicalRepos.userRepo + ) + + new DispatchingTweetService( + asyncDeleteAdditionalFieldsBuilder = asyncDeleteAdditionalFieldsBuilder, + asyncSetAdditionalFieldsBuilder = asyncSetAdditionalFieldsBuilder, + deleteAdditionalFieldsBuilder = deleteAdditionalFieldsBuilder, + deleteLocationDataHandler = deleteLocationDataHandler, + deletePathHandler = tweetDeletePathHandler, + eraseUserTweetsHandler = eraseUserTweetsHandler, + getDeletedTweetsHandler = getDeletedTweetsHandler, + getStoredTweetsHandler = getStoredTweetsHandler, + getStoredTweetsByUserHandler = getStoredTweetsByUserHandler, + getTweetsHandler = getTweetsHandler, + getTweetFieldsHandler = getTweetFieldsHandler, + getTweetCountsHandler = GetTweetCountsHandler(logicalRepos.tweetCountsRepo), + postTweetHandler = postTweetHandler, + postRetweetHandler = postRetweetHandler, + quotedTweetDeleteBuilder = quotedTweetDeleteBuilder, + quotedTweetTakedownBuilder = quotedTweetTakedownBuilder, + scrubGeoUpdateUserTimestampBuilder = scrubGeoUpdateUserTimestampBuilder, + scrubGeoScrubTweetsBuilder = scrubGeoScrubTweetsBuilder, + setAdditionalFieldsBuilder = setAdditionalFieldsBuilder, + setRetweetVisibilityHandler = setRetweetVisibilityHandler, + statsReceiver = statsReceiver, + takedownHandler = takedownHandler, + tweetStore = tweetStores, + undeleteTweetHandler = undeleteTweetHandler, + unretweetHandler = unretweetHandler, + updatePossiblySensitiveTweetHandler = updatePossiblySensitiveTweetHandler, + userTakedownHandler = userTakedownHandler, + clientIdHelper = clientIdHelper, + ) + } +} + +object TakedownHandlerBuilder { + type Type = FutureArrow[TakedownRequest, Unit] + + def apply(logicalRepos: LogicalRepositories, tweetStores: TotalTweetStore) = + TakedownHandler( + getTweet = HandlerError.getRequired( + tweetGetter(logicalRepos), + HandlerError.tweetNotFoundException + ), + getUser = HandlerError.getRequired( + userGetter(logicalRepos), + HandlerError.userNotFoundException + ), + writeTakedown = tweetStores.takedown + ) + + def tweetGetter(logicalRepos: LogicalRepositories): FutureArrow[TweetId, Option[Tweet]] = + FutureArrow( + TweetRepository.tweetGetter( + logicalRepos.optionalTweetRepo, + TweetQuery.Options( + include = GetTweetsHandler.BaseInclude.also( + tweetFields = Set( + Tweet.TweetypieOnlyTakedownCountryCodesField.id, + Tweet.TweetypieOnlyTakedownReasonsField.id + ) + ) + ) + ) + ) + + def userGetter(logicalRepos: LogicalRepositories): FutureArrow[UserId, Option[User]] = + FutureArrow( + UserRepository + .userGetter( + logicalRepos.optionalUserRepo, + UserQueryOptions( + Set(UserField.Roles, UserField.Safety, UserField.Takedowns), + UserVisibility.All + ) + ) + .compose(UserKey.byId) + ) +} + +object UserTakedownHandlerBuilder { + def apply( + logicalRepos: LogicalRepositories, + tweetStores: TotalTweetStore, + stats: StatsReceiver + ): UserTakedownHandler.Type = + UserTakedownHandler( + getTweet = TakedownHandlerBuilder.tweetGetter(logicalRepos), + tweetTakedown = tweetStores.takedown, + ) +} + +object EraseUserTweetsHandlerBuilder { + def apply( + backendClients: BackendClients, + asyncInvocationBuilder: ServiceInvocationBuilder, + deciderGates: TweetypieDeciderGates, + settings: TweetServiceSettings, + timer: Timer, + tweetDeletePathHandler: DefaultTweetDeletePathHandler, + tweetServiceScope: StatsReceiver + ): EraseUserTweetsHandler = + EraseUserTweetsHandler( + selectPage(backendClients, settings), + deleteTweet(tweetDeletePathHandler), + eraseUserTweets(backendClients, asyncInvocationBuilder), + tweetServiceScope.scope("erase_user_tweets"), + sleep(deciderGates, settings, timer) + ) + + def selectPage( + backendClients: BackendClients, + settings: TweetServiceSettings + ): FutureArrow[Select[StatusGraph], PageResult[Long]] = + FutureArrow( + backendClients.tflockWriteClient.selectPage(_, Some(settings.eraseUserTweetsPageSize)) + ) + + def deleteTweet( + tweetDeletePathHandler: DefaultTweetDeletePathHandler + ): FutureEffect[(TweetId, UserId)] = + FutureEffect[(TweetId, UserId)] { + case (tweetId, expectedUserId) => + tweetDeletePathHandler + .internalDeleteTweets( + request = DeleteTweetsRequest( + Seq(tweetId), + isUserErasure = true, + expectedUserId = Some(expectedUserId) + ), + byUserId = None, + authenticatedUserId = None, + validate = tweetDeletePathHandler.validateTweetsForUserErasureDaemon + ) + .unit + } + + def eraseUserTweets( + backendClients: BackendClients, + asyncInvocationBuilder: ServiceInvocationBuilder + ): FutureArrow[AsyncEraseUserTweetsRequest, Unit] = + asyncInvocationBuilder + .asyncVia(backendClients.asyncTweetDeletionService) + .method(_.asyncEraseUserTweets) + + def sleep( + deciderGates: TweetypieDeciderGates, + settings: TweetServiceSettings, + timer: Timer + ): () => Future[Unit] = + () => + if (deciderGates.delayEraseUserTweets()) { + Future.sleep(settings.eraseUserTweetsDelay)(timer) + } else { + Future.Unit + } +} + +object UndeleteTweetHandlerBuilder { + def apply( + tweetStorage: TweetStorageClient, + logicalRepos: LogicalRepositories, + tweetStores: TotalTweetStore, + parentUserIdRepo: ParentUserIdRepository.Type, + statsReceiver: StatsReceiver + ): UndeleteTweetHandler.Type = + UndeleteTweetHandler( + undelete = tweetStorage.undelete, + tweetExists = tweetExists(tweetStorage), + getUser = FutureArrow( + UserRepository + .userGetter( + logicalRepos.optionalUserRepo, + UserQueryOptions( + // ExtendedProfile is needed to view a user's birthday to + // guarantee we are not undeleting tweets from when a user was < 13 + TweetBuilder.userFields ++ Set(UserField.ExtendedProfile), + UserVisibility.All, + filteredAsFailure = false + ) + ) + .compose(UserKey.byId) + ), + getDeletedTweets = tweetStorage.getDeletedTweets, + parentUserIdRepo = parentUserIdRepo, + save = save( + logicalRepos, + tweetStores, + statsReceiver + ) + ) + + private def tweetExists(tweetStorage: TweetStorageClient): FutureArrow[TweetId, Boolean] = + FutureArrow { id => + Stitch + .run(tweetStorage.getTweet(id)) + .map { + case _: GetTweet.Response.Found => true + case _ => false + } + } + + // 1. hydrates the undeleted tweet + // 2. hands a UndeleteTweetEvent to relevant stores. + // 3. return the hydrated tweet + def save( + logicalRepos: LogicalRepositories, + tweetStores: TotalTweetStore, + statsReceiver: StatsReceiver + ): FutureArrow[UndeleteTweet.Event, Tweet] = { + + val hydrateTweet = + WritePathHydration.hydrateTweet( + logicalRepos.tweetHydrators.hydrator, + statsReceiver.scope("undelete_tweet") + ) + + val hydrateQuotedTweet = + WritePathHydration.hydrateQuotedTweet( + logicalRepos.optionalTweetRepo, + logicalRepos.optionalUserRepo, + logicalRepos.quoterHasAlreadyQuotedRepo + ) + + val hydrateUndeleteEvent = + WritePathHydration.hydrateUndeleteTweetEvent( + hydrateTweet = hydrateTweet, + hydrateQuotedTweet = hydrateQuotedTweet + ) + + FutureArrow[UndeleteTweet.Event, Tweet] { event => + for { + hydratedEvent <- hydrateUndeleteEvent(event) + _ <- tweetStores.undeleteTweet(hydratedEvent) + } yield hydratedEvent.tweet + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceInvocationBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceInvocationBuilder.scala new file mode 100644 index 000000000..d4d6e054f --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceInvocationBuilder.scala @@ -0,0 +1,34 @@ +package com.twitter.tweetypie.config + +import com.twitter.finagle.thrift.ClientId +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie._ +import com.twitter.tweetypie.service.{ClientIdSettingTweetServiceProxy, TweetServiceProxy} + +/** + * This class builds deciderable ThriftTweetService and FutureArrows that respect the + * simulateDeferredrpcCallbacks decider. When simulateDeferredrpcCallbacks=true, invocations will + * be performed synchronously by the root ThriftTweetService. + */ +class ServiceInvocationBuilder( + val service: ThriftTweetService, + simulateDeferredrpcCallbacks: Boolean) { + + def withClientId(clientId: ClientId): ServiceInvocationBuilder = + new ServiceInvocationBuilder( + new ClientIdSettingTweetServiceProxy(clientId, service), + simulateDeferredrpcCallbacks + ) + + def asyncVia(asyncService: ThriftTweetService): ServiceInvocationBuilder = + new ServiceInvocationBuilder( + new TweetServiceProxy { + override def underlying: ThriftTweetService = + if (simulateDeferredrpcCallbacks) service else asyncService + }, + simulateDeferredrpcCallbacks + ) + + def method[A, B](op: ThriftTweetService => A => Future[B]): FutureArrow[A, B] = + FutureArrow(op(service)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceSettings.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceSettings.scala new file mode 100644 index 000000000..08592c16d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceSettings.scala @@ -0,0 +1,475 @@ +package com.twitter.tweetypie +package config + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.Backoff +import com.twitter.finagle.memcached.exp.localMemcachedPort +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.ssl.OpportunisticTls +import com.twitter.finagle.thrift.ClientId +import com.twitter.flockdb.client.thriftscala.Priority +import com.twitter.servo.repository.CachedResult +import com.twitter.servo.util.Availability +import com.twitter.tweetypie.backends._ +import com.twitter.tweetypie.caching.SoftTtl +import com.twitter.tweetypie.handler.DuplicateTweetFinder +import com.twitter.tweetypie.repository.TombstoneTtl +import com.twitter.tweetypie.service._ +import com.twitter.tweetypie.storage.ManhattanTweetStorageClient +import com.twitter.util.Duration + +case class InProcessCacheConfig(ttl: Duration, maximumSize: Int) + +class TweetServiceSettings(val flags: TweetServiceFlags) { + + /** + * Convert a Boolean to an Option + * > optional(true, "my value") + * res: Some(my value) + * + * > optional(false, "my value") + * res: None + */ + def optional[T](b: Boolean, a: => T): Option[T] = if (b) Some(a) else None + + /** atla, localhost, etc. */ + val zone: String = flags.zone() + + /** dc is less specific than zone, zone=atla, dc=atl */ + val dc: String = zone.dropRight(1) + + /** one of: prod, staging, dev, testbox */ + val env: Env.Value = flags.env() + + /** instanceId of this aurora instance */ + lazy val instanceId: Int = flags.instanceId() + + /** total number of tweetypie aurora instances */ + val instanceCount: Int = flags.instanceCount() + + /** The Name to resolve to find the memcached cluster */ + val twemcacheDest: String = + // If twemcacheDest is explicitly set, always prefer that to + // localMemcachedPort. + flags.twemcacheDest.get + // Testbox uses this global flag to specify the location of the + // local memcached instance. + .orElse(localMemcachedPort().map("/$/inet/localhost/" + _)) + // If no explicit Name is specified, use the default. + .getOrElse(flags.twemcacheDest()) + + /** Read/write data through Cache */ + val withCache: Boolean = flags.withCache() + + /** + * The TFlock queue to use for background indexing operations. For + * production, this should always be the low priority queue, to + * allow foreground operations to be processed first. + */ + val backgroundIndexingPriority: Priority = flags.backgroundIndexingPriority() + + /** Set certain decider gates to this overridden value */ + val deciderOverrides: Map[String, Boolean] = + flags.deciderOverrides() + + /** use per host stats? */ + val clientHostStats: Boolean = + flags.clientHostStats() + + val warmupRequestsSettings: Option[WarmupQueriesSettings] = + optional(flags.enableWarmupRequests(), WarmupQueriesSettings()) + + /** enables request authorization via a allowlist */ + val allowlistingRequired: Boolean = + flags.allowlist.get.getOrElse(env == Env.prod) + + /** read rate limit for unknown clients (when allowlistingRequired is enabled) */ + val nonAllowListedClientRateLimitPerSec: Double = + flags.grayListRateLimit() + + /** enables requests from production clients */ + val allowProductionClients: Boolean = + env == Env.prod + + /** enables replication via DRPC */ + val enableReplication: Boolean = flags.enableReplication() + + /** enables forking of some traffic to configured target */ + val trafficForkingEnabled: Boolean = + env == Env.prod + + val scribeUniquenessIds: Boolean = + env == Env.prod + + /** ClientId to send to backend services */ + val thriftClientId: ClientId = + flags.clientId.get.map(ClientId(_)).getOrElse { + env match { + case Env.dev | Env.staging => ClientId("tweetypie.staging") + case Env.prod => ClientId("tweetypie.prod") + } + } + + /** + * Instead of using DRPC for calling into the async code path, call back into the + * current instance. Used for development and test to ensure logic in the current + * instance is being tested. + */ + val simulateDeferredrpcCallbacks: Boolean = flags.simulateDeferredrpcCallbacks() + + /** + * ClientId to set in 'asynchronous' requests when simulateDeferredrpcCallbacks is + * true and Tweetypie ends up just calling itself synchronously. + */ + val deferredrpcClientId: ClientId = ClientId("deferredrpc.prod") + + /** + * ServiceIdentifier used to enable mTLS + */ + val serviceIdentifier: ServiceIdentifier = flags.serviceIdentifier() + + /** + * Decider settings + */ + val deciderBaseFilename: Option[String] = Option(flags.deciderBase()) + val deciderOverlayFilename: Option[String] = Option(flags.deciderOverlay()) + val vfDeciderOverlayFilename: Option[String] = flags.vfDeciderOverlay.get + + /** + * Used to determine whether we should fail requests for Tweets that are likely too young + * to return a non-partial response. We return NotFound for Tweets that are deemed too young. + * Used by [[com.twitter.tweetypie.repository.ManhattanTweetRepository]]. + */ + val shortCircuitLikelyPartialTweetReads: Gate[Duration] = { + // interpret the flag as a duration in milliseconds + val ageCeiling: Duration = flags.shortCircuitLikelyPartialTweetReadsMs().milliseconds + Gate(tweetAge => tweetAge < ageCeiling) + } + + // tweet-service internal settings + + val tweetKeyCacheVersion = 1 + + /** how often to flush aggregated count updates for tweet counts */ + val aggregatedTweetCountsFlushInterval: Duration = 5.seconds + + /** maximum number of keys for which aggregated cached count updates may be cached */ + val maxAggregatedCountsSize = 1000 + + /** ramp up period for decidering up forked traffic (if enabled) to the full decidered value */ + val forkingRampUp: Duration = 3.minutes + + /** how long to wait after startup for serversets to resolve before giving up and moving on */ + val waitForServerSetsTimeout: Duration = 120.seconds + + /** number of threads to use in thread pool for language identification */ + val numPenguinThreads = 4 + + /** maximum number of tweets that clients can request per getTweets RPC call */ + val maxGetTweetsRequestSize = 200 + + /** maximum batch size for any batched request (getTweets is exempt, it has its own limiting) */ + val maxRequestSize = 200 + + /** + * maximum size to allow the thrift response buffer to grow before resetting it. this is set to + * approximately the current value of `srv/thrift/response_payload_bytes.p999`, meaning roughly + * 1 out of 1000 requests will cause the buffer to be reset. + */ + val maxThriftBufferSize: Int = 200 * 1024 + + // ********* timeouts and backoffs ********** + + /** backoffs for OptimisticLockingCache lockAndSet operations */ + val lockingCacheBackoffs: Stream[Duration] = + Backoff.exponentialJittered(10.millisecond, 50.milliseconds).take(3).toStream + + /** retry once on timeout with no backoff */ + val defaultTimeoutBackoffs: Stream[Duration] = Stream(0.milliseconds).toStream + + /** backoffs when user view is missing */ + val gizmoduckMissingUserViewBackoffs: Stream[Duration] = Backoff.const(10.millis).take(3).toStream + + /** backoffs for retrying failed async-write actions after first retry failure */ + val asyncWriteRetryBackoffs: Stream[Duration] = + Backoff.exponential(10.milliseconds, 2).take(9).toStream.map(_ min 1.second) + + /** backoffs for retrying failed deferredrpc enqueues */ + val deferredrpcBackoffs: Stream[Duration] = + Backoff.exponential(10.milliseconds, 2).take(3).toStream + + /** backoffs for retrying failed cache updates for replicated events */ + val replicatedEventCacheBackoffs: Stream[Duration] = + Backoff.exponential(100.milliseconds, 2).take(10).toStream + + val escherbirdConfig: Escherbird.Config = + Escherbird.Config( + requestTimeout = 200.milliseconds, + timeoutBackoffs = defaultTimeoutBackoffs + ) + + val expandodoConfig: Expandodo.Config = + Expandodo.Config( + requestTimeout = 300.milliseconds, + timeoutBackoffs = defaultTimeoutBackoffs, + serverErrorBackoffs = Backoff.const(0.millis).take(3).toStream + ) + + val creativesContainerServiceConfig: CreativesContainerService.Config = + CreativesContainerService.Config( + requestTimeout = 300.milliseconds, + timeoutBackoffs = defaultTimeoutBackoffs, + serverErrorBackoffs = Backoff.const(0.millis).take(3).toStream + ) + + val geoScrubEventStoreConfig: GeoScrubEventStore.Config = + GeoScrubEventStore.Config( + read = GeoScrubEventStore.EndpointConfig( + requestTimeout = 200.milliseconds, + maxRetryCount = 1 + ), + write = GeoScrubEventStore.EndpointConfig( + requestTimeout = 1.second, + maxRetryCount = 1 + ) + ) + + val gizmoduckConfig: Gizmoduck.Config = + Gizmoduck.Config( + readTimeout = 300.milliseconds, + writeTimeout = 300.milliseconds, + // We bump the timeout value to 800ms because modifyAndGet is called only in async request path in GeoScrub daemon + // and we do not expect sync/realtime apps calling this thrift method + modifyAndGetTimeout = 800.milliseconds, + modifyAndGetTimeoutBackoffs = Backoff.const(0.millis).take(3).toStream, + defaultTimeoutBackoffs = defaultTimeoutBackoffs, + gizmoduckExceptionBackoffs = Backoff.const(0.millis).take(3).toStream + ) + + val limiterBackendConfig: LimiterBackend.Config = + LimiterBackend.Config( + requestTimeout = 300.milliseconds, + timeoutBackoffs = defaultTimeoutBackoffs + ) + + val mediaInfoServiceConfig: MediaInfoService.Config = + MediaInfoService.Config( + requestTimeout = 300.milliseconds, + totalTimeout = 500.milliseconds, + timeoutBackoffs = defaultTimeoutBackoffs + ) + + val scarecrowConfig: Scarecrow.Config = + Scarecrow.Config( + readTimeout = 100.milliseconds, + writeTimeout = 400.milliseconds, + timeoutBackoffs = defaultTimeoutBackoffs, + scarecrowExceptionBackoffs = Backoff.const(0.millis).take(3).toStream + ) + + val socialGraphSeviceConfig: SocialGraphService.Config = + SocialGraphService.Config( + socialGraphTimeout = 250.milliseconds, + timeoutBackoffs = defaultTimeoutBackoffs + ) + + val talonConfig: Talon.Config = + Talon.Config( + shortenTimeout = 500.milliseconds, + expandTimeout = 150.milliseconds, + timeoutBackoffs = defaultTimeoutBackoffs, + transientErrorBackoffs = Backoff.const(0.millis).take(3).toStream + ) + + /** + * page size when retrieving tflock pages for tweet deletion and undeletion + * tweet erasures have their own page size eraseUserTweetsPageSize + */ + val tflockPageSize: Int = flags.tflockPageSize() + + val tflockReadConfig: TFlock.Config = + TFlock.Config( + requestTimeout = 300.milliseconds, + timeoutBackoffs = defaultTimeoutBackoffs, + flockExceptionBackoffs = Backoff.const(0.millis).take(3).toStream, + overCapacityBackoffs = Stream.empty, + defaultPageSize = tflockPageSize + ) + + val tflockWriteConfig: TFlock.Config = + TFlock.Config( + requestTimeout = 400.milliseconds, + timeoutBackoffs = defaultTimeoutBackoffs, + flockExceptionBackoffs = Backoff.const(0.millis).take(3).toStream, + overCapacityBackoffs = Backoff.exponential(10.millis, 2).take(3).toStream + ) + + val timelineServiceConfig: TimelineService.Config = { + val tlsExceptionBackoffs = Backoff.const(0.millis).take(3).toStream + TimelineService.Config( + writeRequestPolicy = + Backend.TimeoutPolicy(4.seconds) >>> + TimelineService.FailureBackoffsPolicy( + timeoutBackoffs = defaultTimeoutBackoffs, + tlsExceptionBackoffs = tlsExceptionBackoffs + ), + readRequestPolicy = + Backend.TimeoutPolicy(400.milliseconds) >>> + TimelineService.FailureBackoffsPolicy( + timeoutBackoffs = defaultTimeoutBackoffs, + tlsExceptionBackoffs = tlsExceptionBackoffs + ) + ) + } + + val tweetStorageConfig: ManhattanTweetStorageClient.Config = { + val remoteZone = zone match { + case "atla" => "pdxa" + case "pdxa" => "atla" + case "atla" | "localhost" => "atla" + case _ => + throw new IllegalArgumentException(s"Cannot configure remote DC for unknown zone '$zone'") + } + ManhattanTweetStorageClient.Config( + applicationId = "tbird_mh", + localDestination = "/s/manhattan/cylon.native-thrift", + localTimeout = 290.milliseconds, + remoteDestination = s"/srv#/prod/$remoteZone/manhattan/cylon.native-thrift", + remoteTimeout = 1.second, + maxRequestsPerBatch = 25, + serviceIdentifier = serviceIdentifier, + opportunisticTlsLevel = OpportunisticTls.Required + ) + } + + val userImageServiceConfig: UserImageService.Config = + UserImageService.Config( + processTweetMediaTimeout = 5.seconds, + updateTweetMediaTimeout = 2.seconds, + timeoutBackoffs = defaultTimeoutBackoffs + ) + + val adsLoggingClientTopicName = env match { + case Env.prod => "ads_client_callback_prod" + case Env.dev | Env.staging => "ads_client_callback_staging" + } + + /** Delay between successive cascadedDeleteTweet calls when deleting retweets. Applied via decider. */ + val retweetDeletionDelay: Duration = 20.milliseconds + + /** + * Delay to sleep before each tweet deletion of an eraseUserTweets request. + * This is a simple rate limiting mechanism. The long term solution is + * to move async endpoints like user erasures and retweet deletions out + * of the the main tweetypie cluster and into an async cluster with first class + * rate limiting support + */ + val eraseUserTweetsDelay: Duration = 100.milliseconds + + val eraseUserTweetsPageSize = 100 + + val getStoredTweetsByUserPageSize = 20 + val getStoredTweetsByUserMaxPages = 30 + + // ********* ttls ********** + + // Unfortunately, this tombstone TTL applies equally to the case + // where the tweet was deleted and the case that the tweet does not + // exist or is unavailable. If we could differentiate between those + // cases, we'd cache deleted for a long time and not + // found/unavailable for a short time. We chose 100 + // milliseconds for the minimum TTL because there are known cases in + // which a not found result can be erroneously written to cache on + // tweet creation. This minimum TTL is a trade-off between a + // thundering herd of database requests from clients that just got + // the fanned-out tweet and the window for which these inconsistent + // results will be available. + val tweetTombstoneTtl: CachedResult.CachedNotFound[TweetId] => Duration = + TombstoneTtl.linear(min = 100.milliseconds, max = 1.day, from = 5.minutes, to = 5.hours) + + val tweetMemcacheTtl: Duration = 14.days + val urlMemcacheTtl: Duration = 1.hour + val urlMemcacheSoftTtl: Duration = 1.hour + val deviceSourceMemcacheTtl: Duration = 12.hours + val deviceSourceMemcacheSoftTtl: SoftTtl.ByAge[Nothing] = + SoftTtl.ByAge(softTtl = 1.hour, jitter = 1.minute) + val deviceSourceInProcessTtl: Duration = 8.hours + val deviceSourceInProcessSoftTtl: Duration = 30.minutes + val placeMemcacheTtl: Duration = 1.day + val placeMemcacheSoftTtl: SoftTtl.ByAge[Nothing] = + SoftTtl.ByAge(softTtl = 3.hours, jitter = 1.minute) + val cardMemcacheTtl: Duration = 20.minutes + val cardMemcacheSoftTtl: Duration = 30.seconds + val tweetCreateLockingMemcacheTtl: Duration = 10.seconds + val tweetCreateLockingMemcacheLongTtl: Duration = 12.hours + val geoScrubMemcacheTtl: Duration = 30.minutes + + val tweetCountsMemcacheTtl: Duration = 24.hours + val tweetCountsMemcacheNonZeroSoftTtl: Duration = 3.hours + val tweetCountsMemcacheZeroSoftTtl: Duration = 7.hours + + val cacheClientPendingRequestLimit: Int = flags.memcachePendingRequestLimit() + + val deviceSourceInProcessCacheMaxSize = 10000 + + val inProcessCacheConfigOpt: Option[InProcessCacheConfig] = + if (flags.enableInProcessCache()) { + Some( + InProcessCacheConfig( + ttl = flags.inProcessCacheTtlMs().milliseconds, + maximumSize = flags.inProcessCacheSize() + ) + ) + } else { + None + } + + // Begin returning OverCapacity for tweet repo when cache SR falls below 95%, + // Scale to rejecting 95% of requests when cache SR <= 80% + val tweetCacheAvailabilityFromSuccessRate: Double => Double = + Availability.linearlyScaled(0.95, 0.80, 0.05) + + // ******* repository chunking size ******** + + val tweetCountsRepoChunkSize = 6 + // n times `tweetCountsRepoChunkSize`, so chunking at higher level does not + // generate small batches at lower level. + val tweetCountsCacheChunkSize = 18 + + val duplicateTweetFinderSettings: DuplicateTweetFinder.Settings = + DuplicateTweetFinder.Settings(numTweetsToCheck = 10, maxDuplicateAge = 12.hours) + + val backendWarmupSettings: Warmup.Settings = + Warmup.Settings( + // Try for twenty seconds to warm up the backends before giving + // up. + maxWarmupDuration = 20.seconds, + // Only allow up to 50 outstanding warmup requests of any kind + // to be outstanding at a time. + maxOutstandingRequests = 50, + // These timeouts are just over the p999 latency observed in ATLA + // for requests to these backends. + requestTimeouts = Map( + "expandodo" -> 120.milliseconds, + "geo_relevance" -> 50.milliseconds, + "gizmoduck" -> 200.milliseconds, + "memcache" -> 50.milliseconds, + "scarecrow" -> 120.milliseconds, + "socialgraphservice" -> 180.milliseconds, + "talon" -> 70.milliseconds, + "tflock" -> 320.milliseconds, + "timelineservice" -> 200.milliseconds, + "tweetstorage" -> 50.milliseconds + ), + reliability = Warmup.Reliably( + // Consider a backend warmed up if 99% of requests are succeeding. + reliabilityThreshold = 0.99, + // When performing warmup, use a maximum of 10 concurrent + // requests to each backend. + concurrency = 10, + // Do not allow more than this many attempts to perform the + // warmup action before giving up. + maxAttempts = 1000 + ) + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetStores.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetStores.scala new file mode 100644 index 000000000..49cc53fb5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetStores.scala @@ -0,0 +1,577 @@ +package com.twitter.tweetypie +package config + +import com.twitter.servo.util.FutureArrow +import com.twitter.servo.util.RetryHandler +import com.twitter.servo.util.Scribe +import com.twitter.tweetypie.backends.LimiterService.Feature.MediaTagCreate +import com.twitter.tweetypie.backends.LimiterService.Feature.Updates +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.handler.TweetBuilder +import com.twitter.tweetypie.repository.TweetKeyFactory +import com.twitter.tweetypie.store._ +import com.twitter.tweetypie.tflock.TFlockIndexer +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.util.RetryPolicyBuilder +import com.twitter.util.Timer + +object TweetStores { + def apply( + settings: TweetServiceSettings, + statsReceiver: StatsReceiver, + timer: Timer, + deciderGates: TweetypieDeciderGates, + tweetKeyFactory: TweetKeyFactory, + clients: BackendClients, + caches: Caches, + asyncBuilder: ServiceInvocationBuilder, + hasMedia: Tweet => Boolean, + clientIdHelper: ClientIdHelper, + ): TotalTweetStore = { + + val deferredrpcRetryPolicy = + // retry all application exceptions for now. however, in the future, deferredrpc + // may throw a backpressure exception that should not be retried. + RetryPolicyBuilder.anyFailure(settings.deferredrpcBackoffs) + + val asyncWriteRetryPolicy = + // currently retries all failures with the same back-off times. might need + // to update to handle backpressure exceptions differently. + RetryPolicyBuilder.anyFailure(settings.asyncWriteRetryBackoffs) + + val replicatedEventRetryPolicy = + RetryPolicyBuilder.anyFailure(settings.replicatedEventCacheBackoffs) + + val logLensStore = + LogLensStore( + tweetCreationsLogger = Logger("com.twitter.tweetypie.store.TweetCreations"), + tweetDeletionsLogger = Logger("com.twitter.tweetypie.store.TweetDeletions"), + tweetUndeletionsLogger = Logger("com.twitter.tweetypie.store.TweetUndeletions"), + tweetUpdatesLogger = Logger("com.twitter.tweetypie.store.TweetUpdates"), + clientIdHelper = clientIdHelper, + ) + + val tweetStoreStats = statsReceiver.scope("tweet_store") + + val tweetStatsStore = TweetStatsStore(tweetStoreStats.scope("stats")) + + val asyncRetryConfig = + new TweetStore.AsyncRetry( + asyncWriteRetryPolicy, + deferredrpcRetryPolicy, + timer, + clients.asyncRetryTweetService, + Scribe(FailedAsyncWrite, "tweetypie_failed_async_writes") + )(_, _) + + val manhattanStore = { + val scopedStats = tweetStoreStats.scope("base") + ManhattanTweetStore(clients.tweetStorageClient) + .tracked(scopedStats) + .asyncRetry(asyncRetryConfig(scopedStats, ManhattanTweetStore.Action)) + } + + val cachingTweetStore = { + val cacheStats = tweetStoreStats.scope("caching") + CachingTweetStore( + tweetKeyFactory = tweetKeyFactory, + tweetCache = caches.tweetCache, + stats = cacheStats + ).tracked(cacheStats) + .asyncRetry(asyncRetryConfig(cacheStats, CachingTweetStore.Action)) + .replicatedRetry(RetryHandler.failuresOnly(replicatedEventRetryPolicy, timer, cacheStats)) + } + + val indexingStore = { + val indexingStats = tweetStoreStats.scope("indexing") + TweetIndexingStore( + new TFlockIndexer( + tflock = clients.tflockWriteClient, + hasMedia = hasMedia, + backgroundIndexingPriority = settings.backgroundIndexingPriority, + stats = indexingStats + ) + ).tracked(indexingStats) + .asyncRetry(asyncRetryConfig(indexingStats, TweetIndexingStore.Action)) + } + + val timelineUpdatingStore = { + val tlsScope = tweetStoreStats.scope("timeline_updating") + TlsTimelineUpdatingStore( + processEvent2 = clients.timelineService.processEvent2, + hasMedia = hasMedia, + stats = tlsScope + ).tracked(tlsScope) + .asyncRetry(asyncRetryConfig(tlsScope, TlsTimelineUpdatingStore.Action)) + } + + val guanoServiceStore = { + val guanoStats = tweetStoreStats.scope("guano") + GuanoServiceStore(clients.guano, guanoStats) + .tracked(guanoStats) + .asyncRetry(asyncRetryConfig(guanoStats, GuanoServiceStore.Action)) + } + + val mediaServiceStore = { + val mediaStats = tweetStoreStats.scope("media") + MediaServiceStore(clients.mediaClient.deleteMedia, clients.mediaClient.undeleteMedia) + .tracked(mediaStats) + .asyncRetry(asyncRetryConfig(mediaStats, MediaServiceStore.Action)) + } + + val userCountsUpdatingStore = { + val userCountsStats = tweetStoreStats.scope("user_counts") + GizmoduckUserCountsUpdatingStore(clients.gizmoduck.incrCount, hasMedia) + .tracked(userCountsStats) + .ignoreFailures + } + + val tweetCountsUpdatingStore = { + val cacheScope = statsReceiver.scope("tweet_counts_cache") + val tweetCountsStats = tweetStoreStats.scope("tweet_counts") + + val memcacheCountsStore = { + val lockingCacheCountsStore = + CachedCountsStore.fromLockingCache(caches.tweetCountsCache) + + new AggregatingCachedCountsStore( + lockingCacheCountsStore, + timer, + settings.aggregatedTweetCountsFlushInterval, + settings.maxAggregatedCountsSize, + cacheScope + ) + } + + TweetCountsCacheUpdatingStore(memcacheCountsStore) + .tracked(tweetCountsStats) + .ignoreFailures + } + + val replicatingStore = { + val replicateStats = tweetStoreStats.scope("replicate_out") + ReplicatingTweetStore( + clients.replicationClient + ).tracked(replicateStats) + .retry(RetryHandler.failuresOnly(deferredrpcRetryPolicy, timer, replicateStats)) + .asyncRetry(asyncRetryConfig(replicateStats, ReplicatingTweetStore.Action)) + .enabledBy(Gate.const(settings.enableReplication)) + } + + val scribeMediaTagStore = + ScribeMediaTagStore() + .tracked(tweetStoreStats.scope("scribe_media_tag_store")) + + val limiterStore = + LimiterStore( + clients.limiterService.incrementByOne(Updates), + clients.limiterService.increment(MediaTagCreate) + ).tracked(tweetStoreStats.scope("limiter_store")) + + val geoSearchRequestIDStore = { + val statsScope = tweetStoreStats.scope("geo_search_request_id") + GeoSearchRequestIDStore(FutureArrow(clients.geoRelevance.reportConversion _)) + .tracked(statsScope) + .asyncRetry(asyncRetryConfig(statsScope, GeoSearchRequestIDStore.Action)) + } + + val userGeotagUpdateStore = { + val geotagScope = tweetStoreStats.scope("gizmoduck_user_geotag_updating") + GizmoduckUserGeotagUpdateStore( + clients.gizmoduck.modifyAndGet, + geotagScope + ).tracked(geotagScope) + .asyncRetry(asyncRetryConfig(geotagScope, GizmoduckUserGeotagUpdateStore.Action)) + } + + val fanoutServiceStore = { + val fanoutStats = tweetStoreStats.scope("fanout_service_delivery") + FanoutServiceStore(clients.fanoutServiceClient, fanoutStats) + .tracked(fanoutStats) + .asyncRetry(asyncRetryConfig(fanoutStats, FanoutServiceStore.Action)) + } + + /** + * A store that converts Tweetypie TweetEvents to EventBus TweetEvents and sends each event to + * the underlying FutureEffect[eventbus.TweetEvent] + */ + val eventBusEnqueueStore = { + val enqueueStats = tweetStoreStats.scope("event_bus_enqueueing") + val enqueueEffect = FutureEffect[TweetEvent](clients.tweetEventsPublisher.publish) + + TweetEventBusStore( + enqueueEffect + ).tracked(enqueueStats) + .asyncRetry(asyncRetryConfig(enqueueStats, AsyncWriteAction.EventBusEnqueue)) + } + + val retweetArchivalEnqueueStore = { + val enqueueStats = tweetStoreStats.scope("retweet_archival_enqueueing") + val enqueueEffect = FutureEffect(clients.retweetArchivalEventPublisher.publish) + + RetweetArchivalEnqueueStore(enqueueEffect) + .tracked(enqueueStats) + .asyncRetry(asyncRetryConfig(enqueueStats, AsyncWriteAction.RetweetArchivalEnqueue)) + } + + val asyncEnqueueStore = { + val asyncEnqueueStats = tweetStoreStats.scope("async_enqueueing") + AsyncEnqueueStore( + asyncBuilder.asyncVia(clients.asyncTweetService).service, + TweetBuilder.scrubUserInAsyncInserts, + TweetBuilder.scrubSourceTweetInAsyncInserts, + TweetBuilder.scrubSourceUserInAsyncInserts + ).tracked(asyncEnqueueStats) + .retry(RetryHandler.failuresOnly(deferredrpcRetryPolicy, timer, asyncEnqueueStats)) + } + + val insertTweetStore = + InsertTweet.Store( + logLensStore = logLensStore, + manhattanStore = manhattanStore, + tweetStatsStore = tweetStatsStore, + cachingTweetStore = cachingTweetStore, + limiterStore = limiterStore, + asyncEnqueueStore = asyncEnqueueStore, + userCountsUpdatingStore = userCountsUpdatingStore, + tweetCountsUpdatingStore = tweetCountsUpdatingStore + ) + + val asyncInsertStore = + AsyncInsertTweet.Store( + replicatingStore = replicatingStore, + indexingStore = indexingStore, + tweetCountsUpdatingStore = tweetCountsUpdatingStore, + timelineUpdatingStore = timelineUpdatingStore, + eventBusEnqueueStore = eventBusEnqueueStore, + fanoutServiceStore = fanoutServiceStore, + scribeMediaTagStore = scribeMediaTagStore, + userGeotagUpdateStore = userGeotagUpdateStore, + geoSearchRequestIDStore = geoSearchRequestIDStore + ) + + val replicatedInsertTweetStore = + ReplicatedInsertTweet.Store( + cachingTweetStore = cachingTweetStore, + tweetCountsUpdatingStore = tweetCountsUpdatingStore + ) + + val deleteTweetStore = + DeleteTweet.Store( + cachingTweetStore = cachingTweetStore, + asyncEnqueueStore = asyncEnqueueStore, + userCountsUpdatingStore = userCountsUpdatingStore, + tweetCountsUpdatingStore = tweetCountsUpdatingStore, + logLensStore = logLensStore + ) + + val asyncDeleteTweetStore = + AsyncDeleteTweet.Store( + manhattanStore = manhattanStore, + cachingTweetStore = cachingTweetStore, + replicatingStore = replicatingStore, + indexingStore = indexingStore, + eventBusEnqueueStore = eventBusEnqueueStore, + timelineUpdatingStore = timelineUpdatingStore, + tweetCountsUpdatingStore = tweetCountsUpdatingStore, + guanoServiceStore = guanoServiceStore, + mediaServiceStore = mediaServiceStore + ) + + val replicatedDeleteTweetStore = + ReplicatedDeleteTweet.Store( + cachingTweetStore = cachingTweetStore, + tweetCountsUpdatingStore = tweetCountsUpdatingStore + ) + + val incrBookmarkCountStore = + IncrBookmarkCount.Store( + asyncEnqueueStore = asyncEnqueueStore, + replicatingStore = replicatingStore + ) + + val asyncIncrBookmarkCountStore = + AsyncIncrBookmarkCount.Store( + tweetCountsUpdatingStore = tweetCountsUpdatingStore + ) + + val replicatedIncrBookmarkCountStore = + ReplicatedIncrBookmarkCount.Store( + tweetCountsUpdatingStore = tweetCountsUpdatingStore + ) + + val incrFavCountStore = + IncrFavCount.Store( + asyncEnqueueStore = asyncEnqueueStore, + replicatingStore = replicatingStore + ) + + val asyncIncrFavCountStore = + AsyncIncrFavCount.Store( + tweetCountsUpdatingStore = tweetCountsUpdatingStore + ) + + val replicatedIncrFavCountStore = + ReplicatedIncrFavCount.Store( + tweetCountsUpdatingStore = tweetCountsUpdatingStore + ) + + val scrubGeoStore = + ScrubGeo.Store( + logLensStore = logLensStore, + manhattanStore = manhattanStore, + cachingTweetStore = cachingTweetStore, + eventBusEnqueueStore = eventBusEnqueueStore, + replicatingStore = replicatingStore + ) + + val replicatedScrubGeoStore = + ReplicatedScrubGeo.Store( + cachingTweetStore = cachingTweetStore + ) + + val takedownStore = + Takedown.Store( + logLensStore = logLensStore, + manhattanStore = manhattanStore, + cachingTweetStore = cachingTweetStore, + asyncEnqueueStore = asyncEnqueueStore + ) + + val asyncTakedownStore = + AsyncTakedown.Store( + replicatingStore = replicatingStore, + guanoStore = guanoServiceStore, + eventBusEnqueueStore = eventBusEnqueueStore + ) + + val replicatedTakedownStore = + ReplicatedTakedown.Store( + cachingTweetStore = cachingTweetStore + ) + + val updatePossiblySensitiveTweetStore = + UpdatePossiblySensitiveTweet.Store( + manhattanStore = manhattanStore, + cachingTweetStore = cachingTweetStore, + logLensStore = logLensStore, + asyncEnqueueStore = asyncEnqueueStore + ) + + val asyncUpdatePossiblySensitiveTweetStore = + AsyncUpdatePossiblySensitiveTweet.Store( + manhattanStore = manhattanStore, + cachingTweetStore = cachingTweetStore, + replicatingStore = replicatingStore, + guanoStore = guanoServiceStore, + eventBusStore = eventBusEnqueueStore + ) + + val replicatedUpdatePossiblySensitiveTweetStore = + ReplicatedUpdatePossiblySensitiveTweet.Store( + cachingTweetStore = cachingTweetStore + ) + + val setAdditionalFieldsStore = + SetAdditionalFields.Store( + manhattanStore = manhattanStore, + cachingTweetStore = cachingTweetStore, + asyncEnqueueStore = asyncEnqueueStore, + logLensStore = logLensStore + ) + + val asyncSetAdditionalFieldsStore = + AsyncSetAdditionalFields.Store( + replicatingStore = replicatingStore, + eventBusEnqueueStore = eventBusEnqueueStore + ) + + val replicatedSetAdditionalFieldsStore = + ReplicatedSetAdditionalFields.Store( + cachingTweetStore = cachingTweetStore + ) + + val setRetweetVisibilityStore = + SetRetweetVisibility.Store(asyncEnqueueStore = asyncEnqueueStore) + + val asyncSetRetweetVisibilityStore = + AsyncSetRetweetVisibility.Store( + tweetIndexingStore = indexingStore, + tweetCountsCacheUpdatingStore = tweetCountsUpdatingStore, + replicatingTweetStore = replicatingStore, + retweetArchivalEnqueueStore = retweetArchivalEnqueueStore + ) + + val replicatedSetRetweetVisibilityStore = + ReplicatedSetRetweetVisibility.Store( + tweetCountsCacheUpdatingStore = tweetCountsUpdatingStore + ) + + val deleteAdditionalFieldsStore = + DeleteAdditionalFields.Store( + cachingTweetStore = cachingTweetStore, + asyncEnqueueStore = asyncEnqueueStore, + logLensStore = logLensStore + ) + + val asyncDeleteAdditionalFieldsStore = + AsyncDeleteAdditionalFields.Store( + manhattanStore = manhattanStore, + cachingTweetStore = cachingTweetStore, + replicatingStore = replicatingStore, + eventBusEnqueueStore = eventBusEnqueueStore + ) + + val replicatedDeleteAdditionalFieldsStore = + ReplicatedDeleteAdditionalFields.Store( + cachingTweetStore = cachingTweetStore + ) + + /* + * This composed store handles all synchronous side effects of an undelete + * but does not execute the undeletion. + * + * This store is executed after the actual undelete request succeeds. + * The undeletion request is initiated by Undelete.apply() + */ + val undeleteTweetStore = + UndeleteTweet.Store( + logLensStore = logLensStore, + cachingTweetStore = cachingTweetStore, + tweetCountsUpdatingStore = tweetCountsUpdatingStore, + asyncEnqueueStore = asyncEnqueueStore + ) + + val asyncUndeleteTweetStore = + AsyncUndeleteTweet.Store( + cachingTweetStore = cachingTweetStore, + eventBusEnqueueStore = eventBusEnqueueStore, + indexingStore = indexingStore, + replicatingStore = replicatingStore, + mediaServiceStore = mediaServiceStore, + timelineUpdatingStore = timelineUpdatingStore + ) + + val replicatedUndeleteTweetStore = + ReplicatedUndeleteTweet.Store( + cachingTweetStore = cachingTweetStore, + tweetCountsUpdatingStore = tweetCountsUpdatingStore + ) + + val flushStore = + Flush.Store( + cachingTweetStore = cachingTweetStore, + tweetCountsUpdatingStore = tweetCountsUpdatingStore + ) + + val scrubGeoUpdateUserTimestampStore = + ScrubGeoUpdateUserTimestamp.Store( + cache = caches.geoScrubCache, + setInManhattan = clients.geoScrubEventStore.setGeoScrubTimestamp, + geotagUpdateStore = userGeotagUpdateStore, + tweetEventBusStore = eventBusEnqueueStore + ) + + val quotedTweetDeleteStore = + QuotedTweetDelete.Store( + eventBusEnqueueStore = eventBusEnqueueStore + ) + + val quotedTweetTakedownStore = + QuotedTweetTakedown.Store( + eventBusEnqueueStore = eventBusEnqueueStore + ) + + new TotalTweetStore { + val asyncDeleteAdditionalFields: FutureEffect[AsyncDeleteAdditionalFields.Event] = + asyncDeleteAdditionalFieldsStore.asyncDeleteAdditionalFields + val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + asyncDeleteTweetStore.asyncDeleteTweet + val asyncIncrBookmarkCount: FutureEffect[AsyncIncrBookmarkCount.Event] = + asyncIncrBookmarkCountStore.asyncIncrBookmarkCount + val asyncIncrFavCount: FutureEffect[AsyncIncrFavCount.Event] = + asyncIncrFavCountStore.asyncIncrFavCount + val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = asyncInsertStore.asyncInsertTweet + val asyncSetAdditionalFields: FutureEffect[AsyncSetAdditionalFields.Event] = + asyncSetAdditionalFieldsStore.asyncSetAdditionalFields + val asyncSetRetweetVisibility: FutureEffect[AsyncSetRetweetVisibility.Event] = + asyncSetRetweetVisibilityStore.asyncSetRetweetVisibility + val asyncTakedown: FutureEffect[AsyncTakedown.Event] = asyncTakedownStore.asyncTakedown + val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = + asyncUndeleteTweetStore.asyncUndeleteTweet + val asyncUpdatePossiblySensitiveTweet: FutureEffect[AsyncUpdatePossiblySensitiveTweet.Event] = + asyncUpdatePossiblySensitiveTweetStore.asyncUpdatePossiblySensitiveTweet + val deleteAdditionalFields: FutureEffect[DeleteAdditionalFields.Event] = + deleteAdditionalFieldsStore.deleteAdditionalFields + val deleteTweet: FutureEffect[DeleteTweet.Event] = deleteTweetStore.deleteTweet + val flush: FutureEffect[Flush.Event] = flushStore.flush + val incrBookmarkCount: FutureEffect[IncrBookmarkCount.Event] = + incrBookmarkCountStore.incrBookmarkCount + val incrFavCount: FutureEffect[IncrFavCount.Event] = incrFavCountStore.incrFavCount + val insertTweet: FutureEffect[InsertTweet.Event] = insertTweetStore.insertTweet + val quotedTweetDelete: FutureEffect[QuotedTweetDelete.Event] = + quotedTweetDeleteStore.quotedTweetDelete + val quotedTweetTakedown: FutureEffect[QuotedTweetTakedown.Event] = + quotedTweetTakedownStore.quotedTweetTakedown + val replicatedDeleteAdditionalFields: FutureEffect[ReplicatedDeleteAdditionalFields.Event] = + replicatedDeleteAdditionalFieldsStore.replicatedDeleteAdditionalFields + val replicatedDeleteTweet: FutureEffect[ReplicatedDeleteTweet.Event] = + replicatedDeleteTweetStore.replicatedDeleteTweet + val replicatedIncrBookmarkCount: FutureEffect[ReplicatedIncrBookmarkCount.Event] = + replicatedIncrBookmarkCountStore.replicatedIncrBookmarkCount + val replicatedIncrFavCount: FutureEffect[ReplicatedIncrFavCount.Event] = + replicatedIncrFavCountStore.replicatedIncrFavCount + val replicatedInsertTweet: FutureEffect[ReplicatedInsertTweet.Event] = + replicatedInsertTweetStore.replicatedInsertTweet + val replicatedScrubGeo: FutureEffect[ReplicatedScrubGeo.Event] = + replicatedScrubGeoStore.replicatedScrubGeo + val replicatedSetAdditionalFields: FutureEffect[ReplicatedSetAdditionalFields.Event] = + replicatedSetAdditionalFieldsStore.replicatedSetAdditionalFields + val replicatedSetRetweetVisibility: FutureEffect[ReplicatedSetRetweetVisibility.Event] = + replicatedSetRetweetVisibilityStore.replicatedSetRetweetVisibility + val replicatedTakedown: FutureEffect[ReplicatedTakedown.Event] = + replicatedTakedownStore.replicatedTakedown + val replicatedUndeleteTweet: FutureEffect[ReplicatedUndeleteTweet.Event] = + replicatedUndeleteTweetStore.replicatedUndeleteTweet + val replicatedUpdatePossiblySensitiveTweet: FutureEffect[ + ReplicatedUpdatePossiblySensitiveTweet.Event + ] = + replicatedUpdatePossiblySensitiveTweetStore.replicatedUpdatePossiblySensitiveTweet + val retryAsyncDeleteAdditionalFields: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteAdditionalFields.Event] + ] = + asyncDeleteAdditionalFieldsStore.retryAsyncDeleteAdditionalFields + val retryAsyncDeleteTweet: FutureEffect[TweetStoreRetryEvent[AsyncDeleteTweet.Event]] = + asyncDeleteTweetStore.retryAsyncDeleteTweet + val retryAsyncInsertTweet: FutureEffect[TweetStoreRetryEvent[AsyncInsertTweet.Event]] = + asyncInsertStore.retryAsyncInsertTweet + val retryAsyncSetAdditionalFields: FutureEffect[ + TweetStoreRetryEvent[AsyncSetAdditionalFields.Event] + ] = + asyncSetAdditionalFieldsStore.retryAsyncSetAdditionalFields + val retryAsyncSetRetweetVisibility: FutureEffect[ + TweetStoreRetryEvent[AsyncSetRetweetVisibility.Event] + ] = + asyncSetRetweetVisibilityStore.retryAsyncSetRetweetVisibility + val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[AsyncTakedown.Event]] = + asyncTakedownStore.retryAsyncTakedown + val retryAsyncUndeleteTweet: FutureEffect[TweetStoreRetryEvent[AsyncUndeleteTweet.Event]] = + asyncUndeleteTweetStore.retryAsyncUndeleteTweet + val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUpdatePossiblySensitiveTweet.Event] + ] = + asyncUpdatePossiblySensitiveTweetStore.retryAsyncUpdatePossiblySensitiveTweet + val scrubGeo: FutureEffect[ScrubGeo.Event] = scrubGeoStore.scrubGeo + val setAdditionalFields: FutureEffect[SetAdditionalFields.Event] = + setAdditionalFieldsStore.setAdditionalFields + val setRetweetVisibility: FutureEffect[SetRetweetVisibility.Event] = + setRetweetVisibilityStore.setRetweetVisibility + val takedown: FutureEffect[Takedown.Event] = takedownStore.takedown + val undeleteTweet: FutureEffect[UndeleteTweet.Event] = undeleteTweetStore.undeleteTweet + val updatePossiblySensitiveTweet: FutureEffect[UpdatePossiblySensitiveTweet.Event] = + updatePossiblySensitiveTweetStore.updatePossiblySensitiveTweet + val scrubGeoUpdateUserTimestamp: FutureEffect[ScrubGeoUpdateUserTimestamp.Event] = + scrubGeoUpdateUserTimestampStore.scrubGeoUpdateUserTimestamp + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetypieDeciderGates.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetypieDeciderGates.scala new file mode 100644 index 000000000..a20def18a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetypieDeciderGates.scala @@ -0,0 +1,91 @@ +package com.twitter.tweetypie +package config + +import com.twitter.decider.Decider +import com.twitter.tweetypie.decider.DeciderGates + +object TweetypieDeciderGates { + def apply( + _decider: Decider, + _overrides: Map[String, Boolean] = Map.empty + ): TweetypieDeciderGates = + new TweetypieDeciderGates { + override def decider: Decider = _decider + override def overrides: Map[String, Boolean] = _overrides + override def prefix: String = "tweetypie" + } +} + +trait TweetypieDeciderGates extends DeciderGates { + val checkSpamOnRetweet: Gate[Unit] = linear("check_spam_on_retweet") + val checkSpamOnTweet: Gate[Unit] = linear("check_spam_on_tweet") + val delayEraseUserTweets: Gate[Unit] = linear("delay_erase_user_tweets") + val denyNonTweetPermalinks: Gate[Unit] = linear("deny_non_tweet_permalinks") + val enableCommunityTweetCreates: Gate[Unit] = linear("enable_community_tweet_creates") + val useConversationControlFeatureSwitchResults: Gate[Unit] = linear( + "conversation_control_use_feature_switch_results") + val enableExclusiveTweetControlValidation: Gate[Unit] = linear( + "enable_exclusive_tweet_control_validation") + val enableTrustedFriendsControlValidation: Gate[Unit] = linear( + "enable_trusted_friends_control_validation" + ) + val enableStaleTweetValidation: Gate[Unit] = linear( + "enable_stale_tweet_validation" + ) + val enforceRateLimitedClients: Gate[Unit] = linear("enforce_rate_limited_clients") + val failClosedInVF: Gate[Unit] = linear("fail_closed_in_vf") + val forkDarkTraffic: Gate[Unit] = linear("fork_dark_traffic") + val hydrateConversationMuted: Gate[Unit] = linear("hydrate_conversation_muted") + val hydrateCounts: Gate[Unit] = linear("hydrate_counts") + val hydratePreviousCounts: Gate[Unit] = linear("hydrate_previous_counts") + val hydrateDeviceSources: Gate[Unit] = linear("hydrate_device_sources") + val hydrateEscherbirdAnnotations: Gate[Unit] = linear("hydrate_escherbird_annotations") + val hydrateGnipProfileGeoEnrichment: Gate[Unit] = linear("hydrate_gnip_profile_geo_enrichment") + val hydrateHasMedia: Gate[Unit] = linear("hydrate_has_media") + val hydrateMedia: Gate[Unit] = linear("hydrate_media") + val hydrateMediaRefs: Gate[Unit] = linear("hydrate_media_refs") + val hydrateMediaTags: Gate[Unit] = linear("hydrate_media_tags") + val hydratePastedMedia: Gate[Unit] = linear("hydrate_pasted_media") + val hydratePerspectives: Gate[Unit] = linear("hydrate_perspectives") + val hydratePerspectivesEditsForTimelines: Gate[Unit] = linear( + "hydrate_perspectives_edits_for_timelines") + val hydratePerspectivesEditsForTweetDetail: Gate[Unit] = linear( + "hydrate_perspectives_edits_for_tweet_details") + val hydratePerspectivesEditsForOtherSafetyLevels: Gate[Unit] = + linear("hydrate_perspectives_edits_for_other_levels") + val hydratePlaces: Gate[Unit] = linear("hydrate_places") + val hydrateScrubEngagements: Gate[Unit] = linear("hydrate_scrub_engagements") + val jiminyDarkRequests: Gate[Unit] = linear("jiminy_dark_requests") + val logCacheExceptions: Gate[Unit] = linear("log_cache_exceptions") + val logReads: Gate[Unit] = linear("log_reads") + val logTweetCacheWrites: Gate[TweetId] = byId("log_tweet_cache_writes") + val logWrites: Gate[Unit] = linear("log_writes") + val logYoungTweetCacheWrites: Gate[TweetId] = byId("log_young_tweet_cache_writes") + val maxRequestWidthEnabled: Gate[Unit] = linear("max_request_width_enabled") + val mediaRefsHydratorIncludePastedMedia: Gate[Unit] = linear( + "media_refs_hydrator_include_pasted_media") + val rateLimitByLimiterService: Gate[Unit] = linear("rate_limit_by_limiter_service") + val rateLimitTweetCreationFailure: Gate[Unit] = linear("rate_limit_tweet_creation_failure") + val replicateReadsToATLA: Gate[Unit] = linear("replicate_reads_to_atla") + val replicateReadsToPDXA: Gate[Unit] = linear("replicate_reads_to_pdxa") + val disableInviteViaMention: Gate[Unit] = linear("disable_invite_via_mention") + val shedReadTrafficVoluntarily: Gate[Unit] = linear("shed_read_traffic_voluntarily") + val preferForwardedServiceIdentifierForClientId: Gate[Unit] = + linear("prefer_forwarded_service_identifier_for_client_id") + val enableRemoveUnmentionedImplicitMentions: Gate[Unit] = linear( + "enable_remove_unmentioned_implicit_mentions") + val validateCardRefAttachmentAndroid: Gate[Unit] = linear("validate_card_ref_attachment_android") + val validateCardRefAttachmentNonAndroid: Gate[Unit] = linear( + "validate_card_ref_attachment_non_android") + val tweetVisibilityLibraryEnableParityTest: Gate[Unit] = linear( + "tweet_visibility_library_enable_parity_test") + val enableVfFeatureHydrationInQuotedTweetVLShim: Gate[Unit] = linear( + "enable_vf_feature_hydration_in_quoted_tweet_visibility_library_shim") + val disablePromotedTweetEdit: Gate[Unit] = linear("disable_promoted_tweet_edit") + val shouldMaterializeContainers: Gate[Unit] = linear("should_materialize_containers") + val checkTwitterBlueSubscriptionForEdit: Gate[Unit] = linear( + "check_twitter_blue_subscription_for_edit") + val hydrateBookmarksCount: Gate[Long] = byId("hydrate_bookmarks_count") + val hydrateBookmarksPerspective: Gate[Long] = byId("hydrate_bookmarks_perspective") + val setEditTimeWindowToSixtyMinutes: Gate[Unit] = linear("set_edit_time_window_to_sixty_minutes") +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/WritePathHydration.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/WritePathHydration.scala new file mode 100644 index 000000000..621bb8148 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/WritePathHydration.scala @@ -0,0 +1,223 @@ +package com.twitter.tweetypie +package config + +import com.twitter.servo.util.FutureArrow +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.handler.TweetBuilder +import com.twitter.tweetypie.handler.WritePathQueryOptions +import com.twitter.tweetypie.hydrator.EscherbirdAnnotationHydrator +import com.twitter.tweetypie.hydrator.LanguageHydrator +import com.twitter.tweetypie.hydrator.PlaceHydrator +import com.twitter.tweetypie.hydrator.ProfileGeoHydrator +import com.twitter.tweetypie.hydrator.TweetDataValueHydrator +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.store.InsertTweet +import com.twitter.tweetypie.store.UndeleteTweet +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.util.EditControlUtil + +object WritePathHydration { + type HydrateQuotedTweet = + FutureArrow[(User, QuotedTweet, WritePathHydrationOptions), Option[QuoteTweetMetadata]] + + case class QuoteTweetMetadata( + quotedTweet: Tweet, + quotedUser: User, + quoterHasAlreadyQuotedTweet: Boolean) + + private val log = Logger(getClass) + + val UserFieldsForInsert: Set[UserField] = + TweetBuilder.userFields + + val AllowedMissingFieldsOnWrite: Set[FieldByPath] = + Set( + EscherbirdAnnotationHydrator.hydratedField, + LanguageHydrator.hydratedField, + PlaceHydrator.HydratedField, + ProfileGeoHydrator.hydratedField + ) + + /** + * Builds a FutureArrow that performs the necessary hydration in the write-path for a + * a InsertTweet.Event. There are two separate hydration steps, pre-cache and post-cache. + * The pre-cache hydration step performs the hydration which is safe to cache, while the + * post-cache hydration step performs the hydration whose results we don't want to cache + * on the tweet. + * + * TweetInsertEvent contains two tweet fields, `tweet` and `internalTweet`. `tweet` is + * the input value used for hydration, and in the updated InsertTweet.Event returned by the + * FutureArrow, `tweet` contains the post-cache hydrated tweet while `internalTweet` contains + * the pre-cache hydrated tweet. + */ + def hydrateInsertTweetEvent( + hydrateTweet: FutureArrow[(TweetData, TweetQuery.Options), TweetData], + hydrateQuotedTweet: HydrateQuotedTweet + ): FutureArrow[InsertTweet.Event, InsertTweet.Event] = + FutureArrow { event => + val cause = TweetQuery.Cause.Insert(event.tweet.id) + val hydrationOpts = event.hydrateOptions + val isEditControlEdit = event.tweet.editControl.exists(EditControlUtil.isEditControlEdit) + val queryOpts: TweetQuery.Options = + WritePathQueryOptions.insert(cause, event.user, hydrationOpts, isEditControlEdit) + + val initTweetData = + TweetData( + tweet = event.tweet, + sourceTweetResult = event.sourceTweet.map(TweetResult(_)) + ) + + for { + tweetData <- hydrateTweet((initTweetData, queryOpts)) + hydratedTweet = tweetData.tweet + internalTweet = + tweetData.cacheableTweetResult + .map(_.value.toCachedTweet) + .getOrElse( + throw new IllegalStateException(s"expected cacheableTweetResult, e=${event}")) + + optQt = getQuotedTweet(hydratedTweet) + .orElse(event.sourceTweet.flatMap(getQuotedTweet)) + + hydratedQT <- optQt match { + case None => Future.value(None) + case Some(qt) => hydrateQuotedTweet((event.user, qt, hydrationOpts)) + } + } yield { + event.copy( + tweet = hydratedTweet, + _internalTweet = Some(internalTweet), + quotedTweet = hydratedQT.map { case QuoteTweetMetadata(t, _, _) => t }, + quotedUser = hydratedQT.map { case QuoteTweetMetadata(_, u, _) => u }, + quoterHasAlreadyQuotedTweet = hydratedQT.exists { case QuoteTweetMetadata(_, _, b) => b } + ) + } + } + + /** + * Builds a FutureArrow for retrieving a quoted tweet metadata + * QuotedTweet struct. If either the quoted tweet or the quoted user + * isn't visible to the tweeting user, the FutureArrow will return None. + */ + def hydrateQuotedTweet( + tweetRepo: TweetRepository.Optional, + userRepo: UserRepository.Optional, + quoterHasAlreadyQuotedRepo: QuoterHasAlreadyQuotedRepository.Type + ): HydrateQuotedTweet = { + FutureArrow { + case (tweetingUser, qt, hydrateOptions) => + val tweetQueryOpts = WritePathQueryOptions.quotedTweet(tweetingUser, hydrateOptions) + val userQueryOpts = + UserQueryOptions( + UserFieldsForInsert, + UserVisibility.Visible, + forUserId = Some(tweetingUser.id) + ) + + Stitch.run( + Stitch + .join( + tweetRepo(qt.tweetId, tweetQueryOpts), + userRepo(UserKey.byId(qt.userId), userQueryOpts), + // We're failing open here on tflock exceptions since this should not + // affect the ability to quote tweet if tflock goes down. (although if + // this call doesn't succeed, quote counts may be inaccurate for a brief + // period of time) + quoterHasAlreadyQuotedRepo(qt.tweetId, tweetingUser.id).liftToTry + ) + .map { + case (Some(tweet), Some(user), isAlreadyQuoted) => + Some(QuoteTweetMetadata(tweet, user, isAlreadyQuoted.getOrElse(false))) + case _ => None + } + ) + } + } + + /** + * Builds a FutureArrow that performs any additional hydration on an UndeleteTweet.Event before + * being passed to a TweetStore. + */ + def hydrateUndeleteTweetEvent( + hydrateTweet: FutureArrow[(TweetData, TweetQuery.Options), TweetData], + hydrateQuotedTweet: HydrateQuotedTweet + ): FutureArrow[UndeleteTweet.Event, UndeleteTweet.Event] = + FutureArrow { event => + val cause = TweetQuery.Cause.Undelete(event.tweet.id) + val hydrationOpts = event.hydrateOptions + val isEditControlEdit = event.tweet.editControl.exists(EditControlUtil.isEditControlEdit) + val queryOpts = WritePathQueryOptions.insert(cause, event.user, hydrationOpts, isEditControlEdit) + + // when undeleting a retweet, don't set sourceTweetResult to enable SourceTweetHydrator to + // hydrate it + val initTweetData = TweetData(tweet = event.tweet) + + for { + tweetData <- hydrateTweet((initTweetData, queryOpts)) + hydratedTweet = tweetData.tweet + internalTweet = + tweetData.cacheableTweetResult + .map(_.value.toCachedTweet) + .getOrElse( + throw new IllegalStateException(s"expected cacheableTweetResult, e=${event}")) + + optQt = getQuotedTweet(hydratedTweet) + .orElse(tweetData.sourceTweetResult.map(_.value.tweet).flatMap(getQuotedTweet)) + + hydratedQt <- optQt match { + case None => Future.value(None) + case Some(qt) => hydrateQuotedTweet((event.user, qt, hydrationOpts)) + } + } yield { + event.copy( + tweet = hydratedTweet, + _internalTweet = Some(internalTweet), + sourceTweet = tweetData.sourceTweetResult.map(_.value.tweet), + quotedTweet = hydratedQt.map { case QuoteTweetMetadata(t, _, _) => t }, + quotedUser = hydratedQt.map { case QuoteTweetMetadata(_, u, _) => u }, + quoterHasAlreadyQuotedTweet = hydratedQt.exists { case QuoteTweetMetadata(_, _, b) => b } + ) + } + } + + /** + * Converts a TweetDataValueHydrator into a FutureArrow that hydrates a tweet for the write-path. + */ + def hydrateTweet( + hydrator: TweetDataValueHydrator, + stats: StatsReceiver, + allowedMissingFields: Set[FieldByPath] = AllowedMissingFieldsOnWrite + ): FutureArrow[(TweetData, TweetQuery.Options), TweetData] = { + val hydrationStats = stats.scope("hydration") + val missingFieldsStats = hydrationStats.scope("missing_fields") + + FutureArrow[(TweetData, TweetQuery.Options), TweetData] { + case (td, opts) => + Stitch + .run(hydrator(td, opts)) + .rescue { + case ex => + log.warn("Hydration failed with exception", ex) + Future.exception( + TweetHydrationError("Hydration failed with exception: " + ex, Some(ex)) + ) + } + .flatMap { r => + // Record missing fields even if the request succeeds) + for (missingField <- r.state.failedFields) + missingFieldsStats.counter(missingField.fieldIdPath.mkString(".")).incr() + + if ((r.state.failedFields -- allowedMissingFields).nonEmpty) { + Future.exception( + TweetHydrationError( + "Failed to hydrate. Missing Fields: " + r.state.failedFields.mkString(",") + ) + ) + } else { + Future.value(r.value) + } + } + } + }.trackOutcome(stats, (_: Any) => "hydration") +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/package.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/package.scala new file mode 100644 index 000000000..a452abbd8 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/package.scala @@ -0,0 +1,11 @@ +package com.twitter.tweetypie + +import com.twitter.context.thriftscala.Viewer + +package object config { + // Bring Tweetypie permitted TwitterContext into scope + private[config] val TwitterContext = + com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) + + def getAppId: Option[AppId] = TwitterContext().getOrElse(Viewer()).clientApplicationId +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/BUILD new file mode 100644 index 000000000..a1e3cab2d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/BUILD @@ -0,0 +1,19 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/twitter/bijection:core", + "featureswitches/featureswitches-core/src/main/scala", + "scrooge/scrooge-serializer/src/main/scala", + "tweetypie/servo/repo", + "tweetypie/servo/util", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-core", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/thrift:compiled-scala", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/CardReferenceUriExtractor.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/CardReferenceUriExtractor.scala new file mode 100644 index 000000000..ca185eb4a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/CardReferenceUriExtractor.scala @@ -0,0 +1,32 @@ +package com.twitter.tweetypie +package core + +import com.twitter.tweetypie.thriftscala.CardReference +import java.net.URI + +sealed trait CardUri +object Tombstone extends CardUri +case class NonTombstone(uri: String) extends CardUri + +object CardReferenceUriExtractor { + + private def parseAsUri(cardRef: CardReference) = Try(new URI(cardRef.cardUri)).toOption + private def isTombstone(uri: URI) = uri.getScheme == "tombstone" + + /** + * Parses a CardReference to return Option[CardUri] to differentiate among: + * - Some(NonTombstone): hydrate card2 with provided uri + * - Some(Tombstone): don't hydrate card2 + * - None: fallback and attempt to use url entities uris + */ + def unapply(cardRef: CardReference): Option[CardUri] = + parseAsUri(cardRef) match { + case Some(uri) if !isTombstone(uri) => Some(NonTombstone(uri.toString)) + case Some(uri) => Some(Tombstone) + + // If a cardReference is set, but does not parse as a URI, it's likely a https? URL with + // incorrectly encoded query params. Since these occur frequently in the wild, we'll + // attempt a card2 hydration with it + case None => Some(NonTombstone(cardRef.cardUri)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/EditState.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/EditState.scala new file mode 100644 index 000000000..8766675cb --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/EditState.scala @@ -0,0 +1,48 @@ +package com.twitter.tweetypie.core + +import com.twitter.servo.data.Mutation + +/** + * An EditState is a function that changes a value and may generate + * some state about what was modified. For instance, it may record + * whether an item was changed, or whether there was an error. + * EditStates are useful because they are first-class values that can + * be composed. In particular, it is useful to concurrently access + * external data to build edits and then apply them. + * + * @tparam A The type of the value that is being edited (for instance, + * having fields hydrated with data from another service) + */ +final case class EditState[A](run: A => ValueState[A]) { + + /** + * Composes two EditStates in sequence + */ + def andThen(other: EditState[A]): EditState[A] = + EditState[A] { a0: A => + val ValueState(a1, s1) = run(a0) + val ValueState(a2, s2) = other.run(a1) + ValueState(a2, s1 ++ s2) + } +} + +object EditState { + + /** + * Creates a "passthrough" EditState: + * Leaves A unchanged and produces empty state S + */ + def unit[A]: EditState[A] = + EditState[A](ValueState.unit[A]) + + /** + * Creates an `EditState[A]` using a `Mutation[A]`. + */ + def fromMutation[A](mut: Mutation[A]): EditState[A] = + EditState[A] { a => + mut(a) match { + case None => ValueState.unmodified(a) + case Some(a2) => ValueState.modified(a2) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Exceptions.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Exceptions.scala new file mode 100644 index 000000000..b4a9fe157 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Exceptions.scala @@ -0,0 +1,14 @@ +package com.twitter.tweetypie +package core + +import scala.util.control.NoStackTrace + +case class InternalServerError(message: String) extends Exception(message) with NoStackTrace + +case class OverCapacity(message: String) extends Exception(message) with NoStackTrace + +case class RateLimited(message: String) extends Exception(message) with NoStackTrace + +case class TweetHydrationError(message: String, cause: Option[Throwable] = None) + extends Exception(message, cause.getOrElse(null)) + with NoStackTrace diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/FilteredState.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/FilteredState.scala new file mode 100644 index 000000000..0685446f3 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/FilteredState.scala @@ -0,0 +1,96 @@ +package com.twitter.tweetypie.core + +import com.twitter.servo.util.ExceptionCategorizer +import com.twitter.spam.rtf.thriftscala.FilteredReason +import scala.util.control.NoStackTrace + +sealed trait FilteredState + +object FilteredState { + + /** + * The tweet exists and the filtered state was due to business rules + * (e.g. safety label filtering, or protected accounts). Note that + * Suppress and Unavailable can both have a FilteredReason. + */ + sealed trait HasFilteredReason extends FilteredState { + def filteredReason: FilteredReason + } + + /** + * The only FilteredState that is not an exception. It indicates that + * the tweet should be returned along with a suppress reason. This is + * sometimes known as "soft filtering". Only used by VF. + */ + case class Suppress(filteredReason: FilteredReason) extends FilteredState with HasFilteredReason + + /** + * FilteredStates that cause the tweet to be unavailable are modeled + * as an [[Exception]]. (Suppressed filtered states cannot be used as + * exceptions because they should not prevent the tweet from being + * returned.) This is sometimes known as "hard filtering". + */ + sealed abstract class Unavailable extends Exception with FilteredState with NoStackTrace + + object Unavailable { + // Used for Tweets that should be dropped because of VF rules + case class Drop(filteredReason: FilteredReason) extends Unavailable with HasFilteredReason + + // Used for Tweets that should be dropped and replaced with their preview because of VF rules + case class Preview(filteredReason: FilteredReason) extends Unavailable with HasFilteredReason + + // Used for Tweets that should be dropped because of Tweetypie business logic + case object DropUnspecified extends Unavailable with HasFilteredReason { + val filteredReason: FilteredReason = FilteredReason.UnspecifiedReason(true) + } + + // Represents a Deleted tweet (NotFound is represented with stitch.NotFound) + case object TweetDeleted extends Unavailable + + // Represents a Deleted tweet that violated Twitter Rules (see go/bounced-tweet) + case object BounceDeleted extends Unavailable + + // Represents both Deleted and NotFound source tweets + case class SourceTweetNotFound(deleted: Boolean) extends Unavailable + + // Used by the [[ReportedTweetFilter]] to signal that a Tweet has a "reported" perspective from TLS + case object Reported extends Unavailable with HasFilteredReason { + val filteredReason: FilteredReason = FilteredReason.ReportedTweet(true) + } + + // The following objects are used by the [[UserRepository]] to signal problems with the Tweet author + object Author { + case object NotFound extends Unavailable + + case object Deactivated extends Unavailable with HasFilteredReason { + val filteredReason: FilteredReason = FilteredReason.AuthorIsDeactivated(true) + } + + case object Offboarded extends Unavailable with HasFilteredReason { + val filteredReason: FilteredReason = FilteredReason.AuthorAccountIsInactive(true) + } + + case object Suspended extends Unavailable with HasFilteredReason { + val filteredReason: FilteredReason = FilteredReason.AuthorIsSuspended(true) + } + + case object Protected extends Unavailable with HasFilteredReason { + val filteredReason: FilteredReason = FilteredReason.AuthorIsProtected(true) + } + + case object Unsafe extends Unavailable with HasFilteredReason { + val filteredReason: FilteredReason = FilteredReason.AuthorIsUnsafe(true) + } + } + } + + /** + * Creates a new ExceptionCategorizer which returns an empty category for any + * Unavailable value, and forwards to `underlying` for anything else. + */ + def ignoringCategorizer(underlying: ExceptionCategorizer): ExceptionCategorizer = + ExceptionCategorizer { + case _: Unavailable => Set.empty + case t => underlying(t) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/GeoSearchRequestId.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/GeoSearchRequestId.scala new file mode 100644 index 000000000..fae6377dd --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/GeoSearchRequestId.scala @@ -0,0 +1,3 @@ +package com.twitter.tweetypie.core + +case class GeoSearchRequestId(requestID: String) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/HydrationState.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/HydrationState.scala new file mode 100644 index 000000000..8c6a05a84 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/HydrationState.scala @@ -0,0 +1,122 @@ +package com.twitter.tweetypie.core + +import com.twitter.tweetypie.thriftscala.FieldByPath +import com.twitter.tweetypie.thriftscala.HydrationType + +/** + * HydrationState is used to record whether a particular piece of data was modified as a result + * of hydration, and/or if there was a failure to hydrate the data. + */ +sealed trait HydrationState { + def isEmpty: Boolean + def modified: Boolean + def completedHydrations: Set[HydrationType] = Set.empty + def failedFields: Set[FieldByPath] = Set.empty + def cacheErrorEncountered: Boolean = false + def ++(that: HydrationState): HydrationState +} + +object HydrationState { + + /** + * Base `HydrationState`. It acts as an identity value when combined with any other + * `HydrationState`. + */ + case object Empty extends HydrationState { + def isEmpty = true + def modified = false + def ++(that: HydrationState): HydrationState = that + } + + /** + * A `HydrationState` with metadata indicating a non-fatal hydration operation. + */ + case class Success( + override val modified: Boolean = false, + override val completedHydrations: Set[HydrationType] = Set.empty, + override val failedFields: Set[FieldByPath] = Set.empty, + override val cacheErrorEncountered: Boolean = false) + extends HydrationState { + + def isEmpty: Boolean = !modified && failedFields.isEmpty && !cacheErrorEncountered + + def ++(that: HydrationState): HydrationState = + that match { + case Empty => this + case that: Success => + HydrationState( + modified || that.modified, + completedHydrations ++ that.completedHydrations, + failedFields ++ that.failedFields, + cacheErrorEncountered || that.cacheErrorEncountered + ) + } + + /** + * An implementation of `copy` that avoids unnecessary allocations, by + * using the constant `HydrationState.unmodified` and `HydrationState.modified` + * values when possible. + */ + def copy( + modified: Boolean = this.modified, + completedHydrations: Set[HydrationType] = this.completedHydrations, + failedFields: Set[FieldByPath] = this.failedFields, + cacheErrorEncountered: Boolean = this.cacheErrorEncountered + ): HydrationState = + HydrationState(modified, completedHydrations, failedFields, cacheErrorEncountered) + } + + val empty: HydrationState = Empty + val modified: HydrationState = Success(true) + + def modified(completedHydration: HydrationType): HydrationState = + modified(Set(completedHydration)) + + def modified(completedHydrations: Set[HydrationType]): HydrationState = + Success(modified = true, completedHydrations = completedHydrations) + + def partial(failedField: FieldByPath): HydrationState = + partial(Set(failedField)) + + def partial(failedFields: Set[FieldByPath]): HydrationState = + Success(modified = false, failedFields = failedFields) + + def apply( + modified: Boolean, + completedHydrations: Set[HydrationType] = Set.empty, + failedFields: Set[FieldByPath] = Set.empty, + cacheErrorEncountered: Boolean = false + ): HydrationState = + if (completedHydrations.nonEmpty || failedFields.nonEmpty || cacheErrorEncountered) { + Success(modified, completedHydrations, failedFields, cacheErrorEncountered) + } else if (modified) { + HydrationState.modified + } else { + HydrationState.empty + } + + /** + * Creates a new HydrationState with modified set to true if `next` and `prev` are different, + * or false if they are the same. + */ + def delta[A](prev: A, next: A): HydrationState = + if (next != prev) modified else empty + + /** + * Join a list of HydrationStates into a single HydrationState. + * + * Note: this could just be a reduce over the HydrationStates but that would allocate + * _N_ HydrationStates. This approach also allows for shortcircuiting over the boolean + * fields. + */ + def join(states: HydrationState*): HydrationState = { + val statesSet = states.toSet + + HydrationState( + modified = states.exists(_.modified), + completedHydrations = statesSet.flatMap(_.completedHydrations), + failedFields = statesSet.flatMap(_.failedFields), + cacheErrorEncountered = states.exists(_.cacheErrorEncountered) + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/QuotedTweetResult.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/QuotedTweetResult.scala new file mode 100644 index 000000000..28f38a807 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/QuotedTweetResult.scala @@ -0,0 +1,46 @@ +package com.twitter.tweetypie.core + +import com.twitter.spam.rtf.thriftscala.FilteredReason +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.util.Try + +/** + * The data about a quoted tweet that needs to be carried forward to + * Tweetypie clients. + */ +sealed trait QuotedTweetResult { + def filteredReason: Option[FilteredReason] + def toOption: Option[TweetResult] + def map(f: TweetResult => TweetResult): QuotedTweetResult +} + +object QuotedTweetResult { + case object NotFound extends QuotedTweetResult { + def filteredReason: None.type = None + def toOption: None.type = None + def map(f: TweetResult => TweetResult): NotFound.type = this + } + case class Filtered(state: FilteredState.Unavailable) extends QuotedTweetResult { + def filteredReason: Option[FilteredReason] = + state match { + case st: FilteredState.HasFilteredReason => Some(st.filteredReason) + case _ => None + } + def toOption: None.type = None + def map(f: TweetResult => TweetResult): Filtered = this + } + case class Found(result: TweetResult) extends QuotedTweetResult { + def filteredReason: Option[FilteredReason] = result.value.suppress.map(_.filteredReason) + def toOption: Option[TweetResult] = Some(result) + def map(f: TweetResult => TweetResult): QuotedTweetResult = Found(f(result)) + } + + def fromTry(tryResult: Try[TweetResult]): Try[QuotedTweetResult] = + tryResult match { + case Return(result) => Return(Found(result)) + case Throw(state: FilteredState.Unavailable) => Return(Filtered(state)) + case Throw(com.twitter.stitch.NotFound) => Return(NotFound) + case Throw(e) => Throw(e) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Serializer.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Serializer.scala new file mode 100644 index 000000000..e367a8481 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Serializer.scala @@ -0,0 +1,31 @@ +package com.twitter.tweetypie.core + +import com.twitter.servo.cache +import com.twitter.servo.cache.CachedSerializer +import com.twitter.tweetypie.thriftscala +import com.twitter.tweetypie.thriftscala.CachedTweet +import com.twitter.tweetypie.thriftscala.Tweet +import org.apache.thrift.protocol.TCompactProtocol + +/** + * A container object for serializers. + * Creates a serializer for every object type cached by the tweetypie service + */ +object Serializer { + lazy val CompactProtocolFactory: TCompactProtocol.Factory = new TCompactProtocol.Factory + + def toCached[T](underlying: cache.Serializer[T]): cache.CachedSerializer[T] = + new cache.CachedSerializer(underlying, CompactProtocolFactory) + + object Tweet { + lazy val Compact: cache.ThriftSerializer[thriftscala.Tweet] = + new cache.ThriftSerializer(thriftscala.Tweet, CompactProtocolFactory) + lazy val CachedCompact: CachedSerializer[Tweet] = toCached(Compact) + } + + object CachedTweet { + lazy val Compact: cache.ThriftSerializer[thriftscala.CachedTweet] = + new cache.ThriftSerializer(thriftscala.CachedTweet, CompactProtocolFactory) + lazy val CachedCompact: CachedSerializer[CachedTweet] = toCached(Compact) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/StoredTweetResult.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/StoredTweetResult.scala new file mode 100644 index 000000000..ab81552e9 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/StoredTweetResult.scala @@ -0,0 +1,42 @@ +package com.twitter.tweetypie.core + +sealed trait StoredTweetResult { + def canHydrate: Boolean +} + +object StoredTweetResult { + sealed trait Error + object Error { + case object Corrupt extends Error + case object ScrubbedFieldsPresent extends Error + case object FieldsMissingOrInvalid extends Error + case object ShouldBeHardDeleted extends Error + } + + case class Present(errors: Seq[Error], canHydrate: Boolean) extends StoredTweetResult + + case class HardDeleted(softDeletedAtMsec: Long, hardDeletedAtMsec: Long) + extends StoredTweetResult { + override def canHydrate: Boolean = false + } + + case class SoftDeleted(softDeletedAtMsec: Long, errors: Seq[Error], canHydrate: Boolean) + extends StoredTweetResult + + case class BounceDeleted(deletedAtMsec: Long, errors: Seq[Error], canHydrate: Boolean) + extends StoredTweetResult + + case class Undeleted(undeletedAtMsec: Long, errors: Seq[Error], canHydrate: Boolean) + extends StoredTweetResult + + case class ForceAdded(addedAtMsec: Long, errors: Seq[Error], canHydrate: Boolean) + extends StoredTweetResult + + case class Failed(errors: Seq[Error]) extends StoredTweetResult { + override def canHydrate: Boolean = false + } + + object NotFound extends StoredTweetResult { + override def canHydrate: Boolean = false + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetCreateFailure.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetCreateFailure.scala new file mode 100644 index 000000000..bc4402fa2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetCreateFailure.scala @@ -0,0 +1,39 @@ +package com.twitter.tweetypie.core + +import com.twitter.bouncer.thriftscala.Bounce +import com.twitter.tweetypie.TweetId +import com.twitter.incentives.jiminy.thriftscala.TweetNudge +import com.twitter.tweetypie.thriftscala.PostTweetResult +import com.twitter.tweetypie.thriftscala.TweetCreateState + +sealed abstract class TweetCreateFailure extends Exception { + def toPostTweetResult: PostTweetResult +} + +object TweetCreateFailure { + case class Bounced(bounce: Bounce) extends TweetCreateFailure { + override def toPostTweetResult: PostTweetResult = + PostTweetResult(state = TweetCreateState.Bounce, bounce = Some(bounce)) + } + + case class AlreadyRetweeted(retweetId: TweetId) extends TweetCreateFailure { + override def toPostTweetResult: PostTweetResult = + PostTweetResult(state = TweetCreateState.AlreadyRetweeted) + } + + case class Nudged(nudge: TweetNudge) extends TweetCreateFailure { + override def toPostTweetResult: PostTweetResult = + PostTweetResult(state = TweetCreateState.Nudge, nudge = Some(nudge)) + } + + case class State(state: TweetCreateState, reason: Option[String] = None) + extends TweetCreateFailure { + require(state != TweetCreateState.Bounce) + require(state != TweetCreateState.Ok) + require(state != TweetCreateState.Nudge) + + override def toPostTweetResult: PostTweetResult = + PostTweetResult(state = state, failureReason = reason) + override def toString: String = s"TweetCreateFailure$$State($state, $reason)" + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetData.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetData.scala new file mode 100644 index 000000000..8e72f1e89 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetData.scala @@ -0,0 +1,86 @@ +package com.twitter.tweetypie +package core + +import com.twitter.featureswitches.v2.FeatureSwitchResults +import com.twitter.tweetypie.thriftscala._ + +object TweetData { + object Lenses { + val tweet: Lens[TweetData, Tweet] = Lens[TweetData, Tweet](_.tweet, _.copy(_)) + + val suppress: Lens[TweetData, Option[FilteredState.Suppress]] = + Lens[TweetData, Option[FilteredState.Suppress]]( + _.suppress, + (td, suppress) => td.copy(suppress = suppress) + ) + + val sourceTweetResult: Lens[TweetData, Option[TweetResult]] = + Lens[TweetData, Option[TweetResult]]( + _.sourceTweetResult, + (td, sourceTweetResult) => td.copy(sourceTweetResult = sourceTweetResult) + ) + + val quotedTweetResult: Lens[TweetData, Option[QuotedTweetResult]] = + Lens[TweetData, Option[QuotedTweetResult]]( + _.quotedTweetResult, + (td, quotedTweetResult) => td.copy(quotedTweetResult = quotedTweetResult) + ) + + val cacheableTweetResult: Lens[TweetData, Option[TweetResult]] = + Lens[TweetData, Option[TweetResult]]( + _.cacheableTweetResult, + (td, cacheableTweetResult) => td.copy(cacheableTweetResult = cacheableTweetResult) + ) + + val tweetCounts: Lens[TweetData, Option[StatusCounts]] = + Lens[TweetData, Option[StatusCounts]]( + _.tweet.counts, + (td, tweetCounts) => td.copy(tweet = td.tweet.copy(counts = tweetCounts)) + ) + } + + def fromCachedTweet(cachedTweet: CachedTweet, cachedAt: Time): TweetData = + TweetData( + tweet = cachedTweet.tweet, + completedHydrations = cachedTweet.completedHydrations.toSet, + cachedAt = Some(cachedAt), + isBounceDeleted = cachedTweet.isBounceDeleted.contains(true) + ) +} + +/** + * Encapsulates a tweet and some hydration metadata in the hydration pipeline. + * + * @param cachedAt if the tweet was read from cache, `cachedAt` contains the time at which + * the tweet was written to cache. + */ +case class TweetData( + tweet: Tweet, + suppress: Option[FilteredState.Suppress] = None, + completedHydrations: Set[HydrationType] = Set.empty, + cachedAt: Option[Time] = None, + sourceTweetResult: Option[TweetResult] = None, + quotedTweetResult: Option[QuotedTweetResult] = None, + cacheableTweetResult: Option[TweetResult] = None, + storedTweetResult: Option[StoredTweetResult] = None, + featureSwitchResults: Option[FeatureSwitchResults] = None, + // The isBounceDeleted flag is only used when reading from an underlying + // tweet repo and caching records for not-found tweets. It only exists + // as a flag on TweetData to marshal bounce-deleted through the layered + // transforming caches injected into CachingTweetRepository, ultimately + // storing this flag in thrift on CachedTweet. + // + // During tweet hydration, TweetData.isBounceDeleted is unused and + // should always be false. + isBounceDeleted: Boolean = false) { + + def addHydrated(fieldIds: Set[HydrationType]): TweetData = + copy(completedHydrations = completedHydrations ++ fieldIds) + + def toCachedTweet: CachedTweet = + CachedTweet( + tweet = tweet, + completedHydrations = completedHydrations, + isBounceDeleted = if (isBounceDeleted) Some(true) else None + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetResult.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetResult.scala new file mode 100644 index 000000000..317309be2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetResult.scala @@ -0,0 +1,39 @@ +package com.twitter.tweetypie.core + +import com.twitter.servo.data.Lens +import com.twitter.tweetypie.Mutation +import com.twitter.tweetypie.thriftscala.Tweet + +/** + * Helper class for building instances of `TweetResult`, which is a type alias + * for `ValueState[TweetData]`. + */ +object TweetResult { + object Lenses { + val value: Lens[TweetResult, TweetData] = + Lens[TweetResult, TweetData](_.value, (r, value) => r.copy(value = value)) + val state: Lens[TweetResult, HydrationState] = + Lens[TweetResult, HydrationState](_.state, (r, state) => r.copy(state = state)) + val tweet: Lens[TweetResult, Tweet] = value.andThen(TweetData.Lenses.tweet) + } + + def apply(value: TweetData, state: HydrationState = HydrationState.empty): TweetResult = + ValueState(value, state) + + def apply(tweet: Tweet): TweetResult = + apply(TweetData(tweet = tweet)) + + /** + * Apply this mutation to the tweet contained in the result, updating the modified flag if the mutation modifies the tweet. + */ + def mutate(mutation: Mutation[Tweet]): TweetResult => TweetResult = + (result: TweetResult) => + mutation(result.value.tweet) match { + case None => result + case Some(updatedTweet) => + TweetResult( + result.value.copy(tweet = updatedTweet), + result.state ++ HydrationState.modified + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/UpstreamFailure.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/UpstreamFailure.scala new file mode 100644 index 000000000..3acc2b2d8 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/UpstreamFailure.scala @@ -0,0 +1,37 @@ +package com.twitter.tweetypie.core + +import scala.util.control.NoStackTrace + +/** + * Parent exception class for failures while talking to upstream services. These will + * be counted and then converted to servo.ServerError.DependencyError + */ +sealed abstract class UpstreamFailure(msg: String) extends Exception(msg) with NoStackTrace + +object UpstreamFailure { + case class SnowflakeFailure(t: Throwable) extends UpstreamFailure(t.toString) + + case object UserProfileEmptyException extends UpstreamFailure("User.profile is empty") + + case object UserViewEmptyException extends UpstreamFailure("User.view is empty") + + case object UserSafetyEmptyException extends UpstreamFailure("User.safety is empty") + + case class TweetLookupFailure(t: Throwable) extends UpstreamFailure(t.toString) + + case class UserLookupFailure(t: Throwable) extends UpstreamFailure(t.toString) + + case class DeviceSourceLookupFailure(t: Throwable) extends UpstreamFailure(t.toString) + + case class TFlockLookupFailure(t: Throwable) extends UpstreamFailure(t.toString) + + case class UrlShorteningFailure(t: Throwable) extends UpstreamFailure(t.toString) + + case object MediaShortenUrlMalformedFailure + extends UpstreamFailure("Media shortened url is malformed") + + case object MediaExpandedUrlNotValidFailure + extends UpstreamFailure("Talon returns badInput on media expanded url") + + case class MediaServiceServerError(t: Throwable) extends UpstreamFailure(t.toString) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/ValueState.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/ValueState.scala new file mode 100644 index 000000000..e1b9ec0a4 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/ValueState.scala @@ -0,0 +1,452 @@ +package com.twitter.tweetypie.core + +import com.twitter.servo.data.Lens +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.thriftscala.FieldByPath +import com.twitter.tweetypie.thriftscala.HydrationType + +/** + * Encapsulates a value and associated HydrationState. This class is intended to be used + * with `ValueHydrator`, as the result type for hydrators that directly produce updated values, + * in contrast with EditHydrator which uses `EditState` as a result type to produce update functions. + * + * @tparam A The type of the enclosed value, which is the result of hydration. + */ +final case class ValueState[+A](value: A, state: HydrationState) { + + /** + * Applies a function to the enclosed value and produces a new `ValueState` instance. + */ + def map[B](f: A => B): ValueState[B] = + ValueState(f(value), state) + + /** + * Produces a new `ValueState` that contains the value generated by `f`, but with state that is + * the sum of the state from this `ValueState` and the one produced by `f`. + */ + def flatMap[B](f: A => ValueState[B]): ValueState[B] = { + val ValueState(value2, state2) = f(value) + ValueState(value2, state ++ state2) + } + + /** + * Applies a function to the enclosed state and produces a new `ValueState` instance. + */ + def mapState[T](f: HydrationState => HydrationState): ValueState[A] = + ValueState(value, f(state)) + + /** + * Converts a `ValueState[A]` to an `EditState[B]`, using a lens. The resulting `EditState` + * will overwrite the lensed field with the value from this `ValueState`. + */ + def edit[B, A2 >: A](lens: Lens[B, A2]): EditState[B] = + EditState[B](b => ValueState(lens.set(b, value), state)) +} + +object ValueState { + val UnmodifiedNone: ValueState[None.type] = unmodified(None) + val StitchUnmodifiedNone: Stitch[ValueState[None.type]] = Stitch.value(UnmodifiedNone) + + val UnmodifiedUnit: ValueState[Unit] = unmodified(()) + val StitchUnmodifiedUnit: Stitch[ValueState[Unit]] = Stitch.value(UnmodifiedUnit) + + val UnmodifiedNil: ValueState[Nil.type] = unmodified(Nil) + val StitchUnmodifiedNil: Stitch[ValueState[Nil.type]] = Stitch.value(UnmodifiedNil) + + /** + * Produces a ValueState instance with the given value and an empty state HydrationState. + */ + def unit[A](value: A): ValueState[A] = + ValueState[A](value, HydrationState.empty) + + def unmodified[A](value: A): ValueState[A] = + ValueState(value, HydrationState.empty) + + def modified[A](value: A): ValueState[A] = + ValueState(value, HydrationState.modified) + + def modified[A](value: A, hydrationType: HydrationType): ValueState[A] = + ValueState(value, HydrationState.modified(hydrationType)) + + def success[A](value: A, modified: Boolean): ValueState[A] = + ValueState(value, HydrationState(modified)) + + def delta[A](prev: A, next: A): ValueState[A] = + ValueState(next, HydrationState.delta(prev, next)) + + def partial[A](value: A, field: FieldByPath): ValueState[A] = + ValueState(value, HydrationState.partial(field)) + + def partial[A](value: A, fields: Set[FieldByPath]): ValueState[A] = + ValueState(value, HydrationState.partial(fields)) + + /** + * Converts a `Seq` of `ValueState[A]` to a `ValueState` of `Seq[A]`. + */ + def sequence[A](seq: Seq[ValueState[A]]): ValueState[Seq[A]] = { + ValueState( + value = seq.map(_.value), + state = HydrationState.join(seq.map(_.state): _*) + ) + } + + def join[A, B](va: ValueState[A], vb: ValueState[B]): ValueState[(A, B)] = { + val state = + HydrationState.join( + va.state, + vb.state + ) + + val value = ( + va.value, + vb.value + ) + + ValueState(value, state) + } + + def join[A, B, C]( + va: ValueState[A], + vb: ValueState[B], + vc: ValueState[C] + ): ValueState[(A, B, C)] = { + val state = + HydrationState.join( + va.state, + vb.state, + vc.state + ) + + val value = ( + va.value, + vb.value, + vc.value + ) + + ValueState(value, state) + } + + def join[A, B, C, D]( + va: ValueState[A], + vb: ValueState[B], + vc: ValueState[C], + vd: ValueState[D] + ): ValueState[(A, B, C, D)] = { + val state = + HydrationState.join( + va.state, + vb.state, + vc.state, + vd.state + ) + + val value = ( + va.value, + vb.value, + vc.value, + vd.value + ) + + ValueState(value, state) + } + + def join[A, B, C, D, E]( + va: ValueState[A], + vb: ValueState[B], + vc: ValueState[C], + vd: ValueState[D], + ve: ValueState[E] + ): ValueState[(A, B, C, D, E)] = { + val state = + HydrationState.join( + va.state, + vb.state, + vc.state, + vd.state, + ve.state + ) + + val value = ( + va.value, + vb.value, + vc.value, + vd.value, + ve.value + ) + + ValueState(value, state) + } + + def join[A, B, C, D, E, F]( + va: ValueState[A], + vb: ValueState[B], + vc: ValueState[C], + vd: ValueState[D], + ve: ValueState[E], + vf: ValueState[F] + ): ValueState[(A, B, C, D, E, F)] = { + val state = + HydrationState.join( + va.state, + vb.state, + vc.state, + vd.state, + ve.state, + vf.state + ) + + val value = ( + va.value, + vb.value, + vc.value, + vd.value, + ve.value, + vf.value + ) + + ValueState(value, state) + } + + def join[A, B, C, D, E, F, G]( + va: ValueState[A], + vb: ValueState[B], + vc: ValueState[C], + vd: ValueState[D], + ve: ValueState[E], + vf: ValueState[F], + vg: ValueState[G] + ): ValueState[(A, B, C, D, E, F, G)] = { + val state = + HydrationState.join( + va.state, + vb.state, + vc.state, + vd.state, + ve.state, + vf.state, + vg.state + ) + + val value = ( + va.value, + vb.value, + vc.value, + vd.value, + ve.value, + vf.value, + vg.value + ) + + ValueState(value, state) + } + + def join[A, B, C, D, E, F, G, H]( + va: ValueState[A], + vb: ValueState[B], + vc: ValueState[C], + vd: ValueState[D], + ve: ValueState[E], + vf: ValueState[F], + vg: ValueState[G], + vh: ValueState[H] + ): ValueState[(A, B, C, D, E, F, G, H)] = { + val state = + HydrationState.join( + va.state, + vb.state, + vc.state, + vd.state, + ve.state, + vf.state, + vg.state, + vh.state + ) + + val value = ( + va.value, + vb.value, + vc.value, + vd.value, + ve.value, + vf.value, + vg.value, + vh.value + ) + + ValueState(value, state) + } + + def join[A, B, C, D, E, F, G, H, I]( + va: ValueState[A], + vb: ValueState[B], + vc: ValueState[C], + vd: ValueState[D], + ve: ValueState[E], + vf: ValueState[F], + vg: ValueState[G], + vh: ValueState[H], + vi: ValueState[I] + ): ValueState[(A, B, C, D, E, F, G, H, I)] = { + val state = + HydrationState.join( + va.state, + vb.state, + vc.state, + vd.state, + ve.state, + vf.state, + vg.state, + vh.state, + vi.state + ) + + val value = ( + va.value, + vb.value, + vc.value, + vd.value, + ve.value, + vf.value, + vg.value, + vh.value, + vi.value + ) + + ValueState(value, state) + } + + def join[A, B, C, D, E, F, G, H, I, J]( + va: ValueState[A], + vb: ValueState[B], + vc: ValueState[C], + vd: ValueState[D], + ve: ValueState[E], + vf: ValueState[F], + vg: ValueState[G], + vh: ValueState[H], + vi: ValueState[I], + vj: ValueState[J] + ): ValueState[(A, B, C, D, E, F, G, H, I, J)] = { + val state = + HydrationState.join( + va.state, + vb.state, + vc.state, + vd.state, + ve.state, + vf.state, + vg.state, + vh.state, + vi.state, + vj.state + ) + + val value = ( + va.value, + vb.value, + vc.value, + vd.value, + ve.value, + vf.value, + vg.value, + vh.value, + vi.value, + vj.value + ) + + ValueState(value, state) + } + + def join[A, B, C, D, E, F, G, H, I, J, K]( + va: ValueState[A], + vb: ValueState[B], + vc: ValueState[C], + vd: ValueState[D], + ve: ValueState[E], + vf: ValueState[F], + vg: ValueState[G], + vh: ValueState[H], + vi: ValueState[I], + vj: ValueState[J], + vk: ValueState[K] + ): ValueState[(A, B, C, D, E, F, G, H, I, J, K)] = { + val state = + HydrationState.join( + va.state, + vb.state, + vc.state, + vd.state, + ve.state, + vf.state, + vg.state, + vh.state, + vi.state, + vj.state, + vk.state + ) + + val value = ( + va.value, + vb.value, + vc.value, + vd.value, + ve.value, + vf.value, + vg.value, + vh.value, + vi.value, + vj.value, + vk.value + ) + + ValueState(value, state) + } + + def join[A, B, C, D, E, F, G, H, I, J, K, L]( + va: ValueState[A], + vb: ValueState[B], + vc: ValueState[C], + vd: ValueState[D], + ve: ValueState[E], + vf: ValueState[F], + vg: ValueState[G], + vh: ValueState[H], + vi: ValueState[I], + vj: ValueState[J], + vk: ValueState[K], + vl: ValueState[L] + ): ValueState[(A, B, C, D, E, F, G, H, I, J, K, L)] = { + val state = + HydrationState.join( + va.state, + vb.state, + vc.state, + vd.state, + ve.state, + vf.state, + vg.state, + vh.state, + vi.state, + vj.state, + vk.state, + vl.state + ) + + val value = ( + va.value, + vb.value, + vc.value, + vd.value, + ve.value, + vf.value, + vg.value, + vh.value, + vi.value, + vj.value, + vk.value, + vl.value + ) + + ValueState(value, state) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/package.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/package.scala new file mode 100644 index 000000000..50952e98a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/package.scala @@ -0,0 +1,5 @@ +package com.twitter.tweetypie + +package object core { + type TweetResult = ValueState[TweetData] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/BUILD new file mode 100644 index 000000000..967226ca4 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/BUILD @@ -0,0 +1,35 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", + "merlin/util/src/main/scala", + "tweetypie/servo/util/src/main/scala", + "tweetypie/servo/util/src/main/scala:exception", + "src/scala/com/twitter/ads/internal/pcl/service", + "src/thrift/com/twitter/ads/adserver:adserver_rpc-scala", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/merlin:thrift-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet_service_graphql-scala", + "stitch/stitch-core/src/main/scala/com/twitter/stitch", + "strato/src/main/scala/com/twitter/strato/access/filters", + "strato/src/main/scala/com/twitter/strato/catalog", + "strato/src/main/scala/com/twitter/strato/client", + "strato/src/main/scala/com/twitter/strato/config", + "strato/src/main/scala/com/twitter/strato/fed", + "strato/src/main/scala/com/twitter/strato/server/context", + "strato/src/main/scala/com/twitter/strato/thrift", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/repository", + "tweetypie/server/src/main/thrift:compiled-scala", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/StratoCatalogBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/StratoCatalogBuilder.scala new file mode 100644 index 000000000..42841d393 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/StratoCatalogBuilder.scala @@ -0,0 +1,128 @@ +package com.twitter.tweetypie.federated + +import com.twitter.ads.internal.pcl.service.CallbackPromotedContentLogger +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.scrooge.ThriftStructFieldInfo +import com.twitter.servo.util.Gate +import com.twitter.strato.catalog.Catalog +import com.twitter.strato.client.Client +import com.twitter.strato.fed.StratoFed +import com.twitter.strato.thrift.ScroogeConv +import com.twitter.tweetypie.ThriftTweetService +import com.twitter.tweetypie.Tweet +import com.twitter.tweetypie.backends.Gizmoduck +import com.twitter.tweetypie.federated.columns._ +import com.twitter.tweetypie.federated.context.GetRequestContext +import com.twitter.tweetypie.federated.prefetcheddata.PrefetchedDataRepositoryBuilder +import com.twitter.tweetypie.federated.promotedcontent.TweetPromotedContentLogger +import com.twitter.tweetypie.repository.UnmentionInfoRepository +import com.twitter.tweetypie.repository.VibeRepository +import com.twitter.util.Activity +import com.twitter.util.logging.Logger + +object StratoCatalogBuilder { + + def catalog( + thriftTweetService: ThriftTweetService, + stratoClient: Client, + getUserResultsById: Gizmoduck.GetById, + callbackPromotedContentLogger: CallbackPromotedContentLogger, + statsReceiver: StatsReceiver, + enableCommunityTweetCreatesDecider: Gate[Unit], + ): Activity[Catalog[StratoFed.Column]] = { + val log = Logger(getClass) + + val getRequestContext = new GetRequestContext() + val prefetchedDataRepository = + PrefetchedDataRepositoryBuilder(getUserResultsById, statsReceiver) + val unmentionInfoRepository = UnmentionInfoRepository(stratoClient) + val vibeRepository = VibeRepository(stratoClient) + + val tweetPromotedContentLogger = + TweetPromotedContentLogger(callbackPromotedContentLogger) + + // A stitch group builder to be used for Federated Field Column requests. The handler must be the same across + // all Federated Field Columns to ensure requests are batched across columns for different fields + val federatedFieldGroupBuilder: FederatedFieldGroupBuilder.Type = FederatedFieldGroupBuilder( + thriftTweetService.getTweetFields) + + val columns: Seq[StratoFed.Column] = Seq( + new UnretweetColumn( + thriftTweetService.unretweet, + getRequestContext, + ), + new CreateRetweetColumn( + thriftTweetService.postRetweet, + getRequestContext, + prefetchedDataRepository, + tweetPromotedContentLogger, + statsReceiver + ), + new CreateTweetColumn( + thriftTweetService.postTweet, + getRequestContext, + prefetchedDataRepository, + unmentionInfoRepository, + vibeRepository, + tweetPromotedContentLogger, + statsReceiver, + enableCommunityTweetCreatesDecider, + ), + new DeleteTweetColumn( + thriftTweetService.deleteTweets, + getRequestContext, + ), + new GetTweetFieldsColumn(thriftTweetService.getTweetFields, statsReceiver), + new GetStoredTweetsColumn(thriftTweetService.getStoredTweets), + new GetStoredTweetsByUserColumn(thriftTweetService.getStoredTweetsByUser) + ) + + // Gather tweet field ids that are eligible to be federated field columns + val federatedFieldInfos = + Tweet.fieldInfos + .filter((info: ThriftStructFieldInfo) => + FederatedFieldColumn.isFederatedField(info.tfield.id)) + + // Instantiate the federated field columns + val federatedFieldColumns: Seq[FederatedFieldColumn] = + federatedFieldInfos.map { fieldInfo: ThriftStructFieldInfo => + val path = FederatedFieldColumn.makeColumnPath(fieldInfo.tfield) + val stratoType = ScroogeConv.typeOfFieldInfo(fieldInfo) + log.info(f"creating federated column: $path") + new FederatedFieldColumn( + federatedFieldGroupBuilder, + thriftTweetService.setAdditionalFields, + stratoType, + fieldInfo.tfield, + ) + } + + // Instantiate the federated V1 field columns + val federatedV1FieldColumns: Seq[FederatedFieldColumn] = + federatedFieldInfos + .filter(f => FederatedFieldColumn.isMigrationFederatedField(f.tfield)) + .map { fieldInfo: ThriftStructFieldInfo => + val v1Path = FederatedFieldColumn.makeV1ColumnPath(fieldInfo.tfield) + val stratoType = ScroogeConv.typeOfFieldInfo(fieldInfo) + log.info(f"creating V1 federated column: $v1Path") + new FederatedFieldColumn( + federatedFieldGroupBuilder, + thriftTweetService.setAdditionalFields, + stratoType, + fieldInfo.tfield, + Some(v1Path) + ) + } + + // Combine the dynamic and hard coded federated columns + val allColumns: Seq[StratoFed.Column] = + columns ++ federatedFieldColumns ++ federatedV1FieldColumns + + Activity.value( + Catalog( + allColumns.map { column => + column.path -> column + }: _* + )) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/AccessPolicy.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/AccessPolicy.scala new file mode 100644 index 000000000..af3ee9fd2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/AccessPolicy.scala @@ -0,0 +1,41 @@ +package com.twitter.tweetypie.federated.columns + +import com.twitter.passbird.bitfield.clientprivileges.thriftscala.{Constants => ClientAppPrivileges} +import com.twitter.strato.access.Access.AuthenticatedTwitterUserNotSuspended +import com.twitter.strato.access.Access.ClientApplicationPrivilege +import com.twitter.strato.access.Access.TwitterUserNotSuspended +import com.twitter.strato.access.ClientApplicationPrivilegeVariant +import com.twitter.strato.config._ + +object AccessPolicy { + + /** + * All Tweet Mutation operations require all of: + * - Twitter user authentication + * - Twitter user is not suspended + * - Contributor user, if provided, is not suspended + * - "Teams Access": user is acting their own behalf, or is a + * contributor using a client with ClientAppPriviledges.CONTRIBUTORS + * - Write privileges + */ + val TweetMutationCommonAccessPolicies: Policy = + AllOf( + Seq( + AllowTwitterUserId, + Has( + TwitterUserNotSuspended + ), + Has( + AuthenticatedTwitterUserNotSuspended + ), + AnyOf( + Seq( + TwitterUserContributingAsSelf, + Has(principal = ClientApplicationPrivilege(ClientApplicationPrivilegeVariant + .byId(ClientAppPrivileges.CONTRIBUTORS.toShort).get)) + )), + AllowWritableAccessToken + ) + ) + +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/ApiErrors.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/ApiErrors.scala new file mode 100644 index 000000000..82550e2c5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/ApiErrors.scala @@ -0,0 +1,110 @@ +package com.twitter.tweetypie.federated +package columns + +import com.twitter.bouncer.thriftscala.Bounce +import com.twitter.finagle.http.Status +import com.twitter.finatra.api11 +import com.twitter.finatra.api11.ApiError +import com.twitter.strato.response.Err + +object ApiErrors { + // Errs ported from StatusesRetweetController + val GenericAccessDeniedErr = toErr(ApiError.GenericAccessDenied) + val AlreadyRetweetedErr = toErr(ApiError.AlreadyRetweeted) + val DuplicateStatusErr = toErr(ApiError.DuplicateStatusError) + val InvalidRetweetForStatusErr = toErr(ApiError.InvalidRetweetForStatus) + val StatusNotFoundErr = toErr(ApiError.StatusNotFound) + val BlockedUserErr = + toErr(ApiError.BlockedUserError, "retweeting this user's tweets at their request") + val ClientNotPrivilegedErr = toErr(ApiError.ClientNotPrivileged) + val UserDeniedRetweetErr = toErr(ApiError.CurrentUserSuspended) + + // Errs ported from StatusesUpdateController + val RateLimitExceededErr = toErr(ApiError.OverStatusUpdateLimit, "User") + val TweetUrlSpamErr = toErr(ApiError.TieredActionTweetUrlSpam) + val TweetSpammerErr = toErr(ApiError.TieredActionTweetSpammer) + val CaptchaChallengeErr = toErr(ApiError.TieredActionChallengeCaptcha) + val SafetyRateLimitExceededErr = toErr(ApiError.UserActionRateLimitExceeded, "User") + val TweetCannotBeBlankErr = // was MissingRequiredParameterException + toErr(ApiError.ForbiddenMissingParameter, "tweet_text or media") + val TweetTextTooLongErr = toErr(ApiError.StatusTooLongError) + val MalwareTweetErr = toErr(ApiError.StatusMalwareError) + val DuplicateTweetErr = toErr(ApiError.DuplicateStatusError) + val CurrentUserSuspendedErr = toErr(ApiError.CurrentUserSuspended) + val MentionLimitExceededErr = toErr(ApiError.MentionLimitInTweetExceeded) + val UrlLimitExceededErr = toErr(ApiError.UrlLimitInTweetExceeded) + val HashtagLimitExceededErr = toErr(ApiError.HashtagLimitInTweetExceeded) + val CashtagLimitExceededErr = toErr(ApiError.CashtagLimitInTweetExceeded) + val HashtagLengthLimitExceededErr = toErr(ApiError.HashtagLengthLimitInTweetExceeded) + val TooManyAttachmentTypesErr = toErr(ApiError.AttachmentTypesLimitInTweetExceeded) + val InvalidAttachmentUrlErr = toErr(ApiError.InvalidParameter("attachment_url")) + val InReplyToTweetNotFoundErr = toErr(ApiError.InReplyToTweetNotFound) + val InvalidAdditionalFieldErr = toErr(ApiError.GenericBadRequest) + def invalidAdditionalFieldWithReasonErr(failureReason: String) = + toErr(ApiError.GenericBadRequest.copy(message = failureReason)) + val InvalidUrlErr = toErr(ApiError.InvalidUrl) + val InvalidCoordinatesErr = toErr(ApiError.InvalidCoordinates) + val InvalidGeoSearchRequestIdErr = + toErr(ApiError.InvalidParameter("geo_search_request_id")) + val ConversationControlNotAuthorizedErr = toErr(ApiError.ConversationControlNotAuthorized) + val ConversationControlInvalidErr = toErr(ApiError.ConversationControlInvalid) + val ConversationControlReplyRestricted = toErr(ApiError.ConversationControlReplyRestricted) + + // Errors ported from StatusesDestroyController + val DeletePermissionErr = toErr(ApiError.StatusActionPermissionError("delete")) + + // See StatusesUpdateController#GenericErrorException + val GenericTweetCreateErr = toErr(ApiError.UnknownInterpreterError, "Tweet creation failed") + val InvalidBatchModeParameterErr = toErr(ApiError.InvalidParameter("batch_mode")) + val CannotConvoControlAndCommunitiesErr = + toErr(ApiError.CommunityInvalidParams, "conversation_control") + val TooManyCommunitiesErr = toErr(ApiError.CommunityInvalidParams, "communities") + val CommunityReplyTweetNotAllowedErr = toErr(ApiError.CommunityReplyTweetNotAllowed) + val ConversationControlNotSupportedErr = toErr(ApiError.ConversationControlNotSupported) + val CommunityUserNotAuthorizedErr = toErr(ApiError.CommunityUserNotAuthorized) + val CommunityNotFoundErr = toErr(ApiError.CommunityNotFound) + val CommunityProtectedUserCannotTweetErr = toErr(ApiError.CommunityProtectedUserCannotTweet) + + val SuperFollowCreateNotAuthorizedErr = toErr(ApiError.SuperFollowsCreateNotAuthorized) + val SuperFollowInvalidParamsErr = toErr(ApiError.SuperFollowsInvalidParams) + val ExclusiveTweetEngagementNotAllowedErr = toErr(ApiError.ExclusiveTweetEngagementNotAllowed) + + val SafetyLevelMissingErr = toErr(ApiError.MissingParameter("safety_level")) + + def accessDeniedByBouncerErr(bounce: Bounce) = + toErr(ApiError.AccessDeniedByBouncer, bounce.errorMessage.getOrElse(Seq.empty)) + + def tweetEngagementLimitedErr(failureReason: String) = + toErr(ApiError.TweetEngagementsLimited(failureReason)) + + def invalidMediaErr(failureReason: Option[String]) = + toErr(ApiError.invalidMediaId(failureReason)) + + val TrustedFriendsInvalidParamsErr = toErr(ApiError.TrustedFriendsInvalidParams) + val TrustedFriendsRetweetNotAllowedErr = toErr(ApiError.TrustedFriendsRetweetNotAllowed) + val TrustedFriendsEngagementNotAllowedErr = toErr(ApiError.TrustedFriendsEngagementNotAllowed) + val TrustedFriendsCreateNotAllowedErr = toErr(ApiError.TrustedFriendsCreateNotAllowed) + val TrustedFriendsQuoteTweetNotAllowedErr = toErr(ApiError.TrustedFriendsQuoteTweetNotAllowed) + + val StaleTweetEngagementNotAllowedErr = toErr(ApiError.StaleTweetEngagementNotAllowed) + val StaleTweetQuoteTweetNotAllowedErr = toErr(ApiError.StaleTweetQuoteTweetNotAllowed) + val StaleTweetRetweetNotAllowedErr = toErr(ApiError.StaleTweetRetweetNotAllowed) + + val CollabTweetInvalidParamsErr = toErr(ApiError.CollabTweetInvalidParams) + + val FieldEditNotAllowedErr = toErr(ApiError.FieldEditNotAllowed) + val NotEligibleForEditErr = toErr(ApiError.NotEligibleForEdit) + + def toErr(apiError: api11.ApiError, args: Any*): Err = { + val errCode = apiError.status match { + case Status.Forbidden => Err.Authorization + case Status.Unauthorized => Err.Authentication + case Status.NotFound => Err.BadRequest + case Status.BadRequest => Err.BadRequest + case _ => Err.BadRequest + } + val errMessage = s"${apiError.message.format(args.mkString(","))} (${apiError.code})" + val errContext = Some(Err.Context.Api11Error(apiError.code)) + Err(errCode, errMessage, errContext) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/BUILD new file mode 100644 index 000000000..7148dfa4b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/BUILD @@ -0,0 +1,43 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "decider", + "finagle/finagle-base-http/src/main", + "finatra-internal/api11/src/main/scala/com/twitter/finatra/api11:errors", + "geo/model/src/main/scala/com/twitter/geo/model", + "passbird/bitfields-thrift/src/main/thrift:thrift-scala", + "tweetypie/servo/util/src/main/scala:exception", + "src/scala/com/twitter/accounts/util:safety-meta", + "src/thrift/com/twitter/ads/adserver:ad_engagement_details-scala", + "src/thrift/com/twitter/ads/adserver:preroll_metadata-scala", + "src/thrift/com/twitter/ads/callback:engagement_request-scala", + "src/thrift/com/twitter/bouncer:bounce-action-thrift-scala", + "src/thrift/com/twitter/consumer_privacy/mention_controls:thrift-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:stored-tweet-info-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-service-federated-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet_service_graphql-scala", + "stitch/stitch-core/src/main/scala/com/twitter/stitch", + "strato/config/src/thrift/com/twitter/strato/graphql:api-media-graphql-scala", + "strato/config/src/thrift/com/twitter/strato/graphql:graphql-scala", + "strato/config/src/thrift/com/twitter/strato/graphql:topics-graphql-scala", + "strato/src/main/scala/com/twitter/strato/client", + "strato/src/main/scala/com/twitter/strato/fed", + "strato/src/main/scala/com/twitter/strato/response", + "strato/src/main/scala/com/twitter/strato/thrift", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/repository", + "tweetypie/server/src/main/thrift:compiled-scala", + "tweetypie/common/src/scala/com/twitter/tweetypie/client_id", + "tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "vibes/src/main/thrift/com/twitter/vibes:vibes-scala", + "weaverbird/common/src/main/scala/com/twitter/weaverbird/common", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateRetweetColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateRetweetColumn.scala new file mode 100644 index 000000000..0acf695d2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateRetweetColumn.scala @@ -0,0 +1,184 @@ +package com.twitter.tweetypie +package federated.columns + +import com.twitter.accounts.util.SafetyMetadataUtils +import com.twitter.ads.callback.thriftscala.EngagementRequest +import com.twitter.bouncer.thriftscala.{Bounce => BouncerBounce} +import com.twitter.stitch.Stitch +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.AllOf +import com.twitter.strato.config.BouncerAccess +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.Policy +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle.Production +import com.twitter.strato.fed.StratoFed +import com.twitter.strato.opcontext.OpContext +import com.twitter.strato.response.Err +import com.twitter.strato.thrift.ScroogeConv +import com.twitter.tweetypie.federated.columns.ApiErrors._ +import com.twitter.tweetypie.federated.columns.CreateRetweetColumn.toCreateRetweetErr +import com.twitter.tweetypie.federated.context.GetRequestContext +import com.twitter.tweetypie.federated.prefetcheddata.PrefetchedDataRequest +import com.twitter.tweetypie.federated.prefetcheddata.PrefetchedDataResponse +import com.twitter.tweetypie.federated.promotedcontent.TweetPromotedContentLogger +import com.twitter.tweetypie.federated.promotedcontent.TweetPromotedContentLogger.RetweetEngagement +import com.twitter.tweetypie.thriftscala.TweetCreateState._ +import com.twitter.tweetypie.thriftscala.{graphql => gql} +import com.twitter.tweetypie.{thriftscala => thrift} +import com.twitter.weaverbird.common.{GetRequestContext => WGetRequestContext} + +class CreateRetweetColumn( + retweet: thrift.RetweetRequest => Future[thrift.PostTweetResult], + getRequestContext: GetRequestContext, + prefetchedDataRepository: PrefetchedDataRequest => Stitch[PrefetchedDataResponse], + logTweetPromotedContent: TweetPromotedContentLogger.Type, + statsReceiver: StatsReceiver, +) extends StratoFed.Column(CreateRetweetColumn.Path) + with StratoFed.Execute.StitchWithContext + with StratoFed.HandleDarkRequests { + + override val policy: Policy = AllOf( + Seq(AccessPolicy.TweetMutationCommonAccessPolicies, BouncerAccess())) + + // The underlying call to thriftTweetService.postRetweet is not idempotent + override val isIdempotent: Boolean = false + + override type Arg = gql.CreateRetweetRequest + override type Result = gql.CreateRetweetResponseWithSubqueryPrefetchItems + + override val argConv: Conv[Arg] = ScroogeConv.fromStruct + override val resultConv: Conv[Result] = ScroogeConv.fromStruct + + override val contactInfo: ContactInfo = TweetypieContactInfo + override val metadata: OpMetadata = OpMetadata( + Some(Production), + Some(PlainText("Creates a retweet by the calling Twitter user of the given source tweet."))) + + private val getWeaverbirdCtx = new WGetRequestContext() + + override def execute(request: Arg, opContext: OpContext): Stitch[Result] = { + val ctx = getRequestContext(opContext) + + // First, do any request parameter validation that can result in an error + // prior to calling into thriftTweetService.retweet. + val safetyLevel = ctx.safetyLevel.getOrElse(throw SafetyLevelMissingErr) + + // Macaw-tweets returns ApiError.ClientNotPrivileged if the caller provides + // an impression_id but lacks the PROMOTED_TWEETS_IN_TIMELINE privilege. + val trackingId = request.engagementRequest match { + case Some(engagementRequest: EngagementRequest) if ctx.hasPrivilegePromotedTweetsInTimeline => + TrackingId.parse(engagementRequest.impressionId, statsReceiver) + case Some(e: EngagementRequest) => + throw ClientNotPrivilegedErr + case None => + None + } + + // DeviceSource is an oauth string computed from the ClientApplicationId. + // Macaw-tweets allows non-oauth callers, but GraphQL does not. An undefined + // ClientApplicationId is similar to TweetCreateState.DeviceSourceNotFound, + // which Macaw-tweets handles via a catch-all that returns + // ApiError.GenericAccessDenied + val deviceSource = ctx.deviceSource.getOrElse(throw GenericAccessDeniedErr) + + // Macaw-tweets doesn't perform any parameter validation for the components + // used as input to makeSafetyMetaData. + val safetyMetadata = SafetyMetadataUtils.makeSafetyMetaData( + sessionHash = ctx.sessionHash, + knownDeviceToken = ctx.knownDeviceToken, + contributorId = ctx.contributorId + ) + + val thriftRetweetRequest = thrift.RetweetRequest( + sourceStatusId = request.tweetId, + userId = ctx.twitterUserId, + contributorUserId = None, // no longer supported, per tweet_service.thrift + createdVia = deviceSource, + nullcast = request.nullcast, + trackingId = trackingId, + dark = ctx.isDarkRequest, + hydrationOptions = Some(HydrationOptions.writePathHydrationOptions(ctx.cardsPlatformKey)), + safetyMetaData = Some(safetyMetadata), + ) + + val stitchRetweet = Stitch.callFuture(retweet(thriftRetweetRequest)) + + request.engagementRequest.foreach { engagement => + logTweetPromotedContent(engagement, RetweetEngagement, ctx.isDarkRequest) + } + + stitchRetweet.flatMap { result: thrift.PostTweetResult => + result.state match { + case thrift.TweetCreateState.Ok => + val r = PrefetchedDataRequest( + tweet = result.tweet.get, + sourceTweet = result.sourceTweet, + quotedTweet = result.quotedTweet, + safetyLevel = safetyLevel, + requestContext = getWeaverbirdCtx() + ) + + prefetchedDataRepository(r) + .liftToOption() + .map((prefetchedData: Option[PrefetchedDataResponse]) => { + gql.CreateRetweetResponseWithSubqueryPrefetchItems( + data = Some(gql.CreateRetweetResponse(result.tweet.map(_.id))), + subqueryPrefetchItems = prefetchedData.map(_.value) + ) + }) + case errState => + throw toCreateRetweetErr(errState, result.bounce, result.failureReason) + } + } + } +} + +object CreateRetweetColumn { + val Path = "tweetypie/createRetweet.Tweet" + + /** + * Ported from: + * StatusesRetweetController#retweetStatus rescue block + * TweetyPieStatusRepository.toRetweetException + */ + def toCreateRetweetErr( + errState: thrift.TweetCreateState, + bounce: Option[BouncerBounce], + failureReason: Option[String] + ): Err = errState match { + case CannotRetweetBlockingUser => + BlockedUserErr + case AlreadyRetweeted => + AlreadyRetweetedErr + case Duplicate => + DuplicateStatusErr + case CannotRetweetOwnTweet | CannotRetweetProtectedTweet | CannotRetweetSuspendedUser => + InvalidRetweetForStatusErr + case UserNotFound | SourceTweetNotFound | SourceUserNotFound | CannotRetweetDeactivatedUser => + StatusNotFoundErr + case UserDeactivated | UserSuspended => + UserDeniedRetweetErr + case RateLimitExceeded => + RateLimitExceededErr + case UrlSpam => + TweetUrlSpamErr + case Spam | UserReadonly => + TweetSpammerErr + case SafetyRateLimitExceeded => + SafetyRateLimitExceededErr + case Bounce if bounce.isDefined => + accessDeniedByBouncerErr(bounce.get) + case DisabledByIpiPolicy => + failureReason + .map(tweetEngagementLimitedErr) + .getOrElse(GenericAccessDeniedErr) + case TrustedFriendsRetweetNotAllowed => + TrustedFriendsRetweetNotAllowedErr + case StaleTweetRetweetNotAllowed => + StaleTweetRetweetNotAllowedErr + case _ => + GenericAccessDeniedErr + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateTweetColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateTweetColumn.scala new file mode 100644 index 000000000..3530d68d8 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateTweetColumn.scala @@ -0,0 +1,546 @@ +package com.twitter.tweetypie +package federated.columns + +import com.twitter.accounts.util.SafetyMetadataUtils +import com.twitter.ads.callback.thriftscala.EngagementRequest +import com.twitter.bouncer.thriftscala.{Bounce => BouncerBounce} +import com.twitter.escherbird.thriftscala.TweetEntityAnnotation +import com.twitter.geo.model.LatitudeLongitude +import com.twitter.stitch.Stitch +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.AllOf +import com.twitter.strato.config.BouncerAccess +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.Policy +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle.Production +import com.twitter.strato.fed.StratoFed +import com.twitter.strato.opcontext.OpContext +import com.twitter.strato.response.Err +import com.twitter.strato.thrift.ScroogeConv +import com.twitter.tweetypie.decider.overrides.TweetyPieDeciderOverrides +import com.twitter.tweetypie.federated.columns.ApiErrors._ +import com.twitter.tweetypie.federated.columns.CreateTweetColumn.toCreateTweetErr +import com.twitter.tweetypie.federated.context.GetRequestContext +import com.twitter.tweetypie.federated.prefetcheddata.PrefetchedDataRequest +import com.twitter.tweetypie.federated.prefetcheddata.PrefetchedDataResponse +import com.twitter.tweetypie.federated.promotedcontent.TweetPromotedContentLogger +import com.twitter.tweetypie.federated.promotedcontent.TweetPromotedContentLogger._ +import com.twitter.tweetypie.repository.UnmentionInfoRepository +import com.twitter.tweetypie.repository.VibeRepository +import com.twitter.tweetypie.thriftscala.TransientCreateContext +import com.twitter.tweetypie.thriftscala.TweetCreateContextKey +import com.twitter.tweetypie.thriftscala.TweetCreateState._ +import com.twitter.tweetypie.thriftscala.{graphql => gql} +import com.twitter.tweetypie.util.CommunityAnnotation +import com.twitter.tweetypie.util.ConversationControls +import com.twitter.tweetypie.util.TransientContextUtil +import com.twitter.tweetypie.{thriftscala => thrift} +import com.twitter.util.Throwables +import com.twitter.weaverbird.common.{GetRequestContext => WGetRequestContext} + +class CreateTweetColumn( + postTweet: thrift.PostTweetRequest => Future[thrift.PostTweetResult], + getRequestContext: GetRequestContext, + prefetchedDataRepository: PrefetchedDataRequest => Stitch[PrefetchedDataResponse], + unmentionInfoRepository: UnmentionInfoRepository.Type, + vibeRepository: VibeRepository.Type, + logTweetPromotedContent: TweetPromotedContentLogger.Type, + statsReceiver: StatsReceiver, + enableCommunityTweetCreatesDecider: Gate[Unit], +) extends StratoFed.Column(CreateTweetColumn.Path) + with StratoFed.Execute.StitchWithContext + with StratoFed.HandleDarkRequests { + + override val policy: Policy = AllOf( + Seq(AccessPolicy.TweetMutationCommonAccessPolicies, BouncerAccess())) + + // The underlying call to thriftTweetService.postRetweet is not idempotent + override val isIdempotent: Boolean = false + + override type Arg = gql.CreateTweetRequest + override type Result = gql.CreateTweetResponseWithSubqueryPrefetchItems + + override val argConv: Conv[Arg] = ScroogeConv.fromStruct + override val resultConv: Conv[Result] = ScroogeConv.fromStruct + + override val contactInfo: ContactInfo = TweetypieContactInfo + override val metadata: OpMetadata = + OpMetadata( + Some(Production), + Some( + PlainText( + """ + Creates a tweet using the calling authenticated Twitter user as author. + NOTE, not all Tweet space fields are GraphQL queryable in the CreateTweet mutation response. + See http://go/missing-create-tweet-fields. + """)) + ) + + private val getWeaverbirdCtx = new WGetRequestContext() + + override def execute(request: Arg, opContext: OpContext): Stitch[Result] = { + + val ctx = getRequestContext(opContext) + + // First, do any request parameter validation that can result in an error + // prior to calling into thriftTweetService.postTweet. + val safetyLevel = ctx.safetyLevel.getOrElse(throw SafetyLevelMissingErr) + + val trackingId = request.engagementRequest match { + case Some(engagementRequest: EngagementRequest) if ctx.hasPrivilegePromotedTweetsInTimeline => + TrackingId.parse(engagementRequest.impressionId, statsReceiver) + case Some(e: EngagementRequest) => + throw ClientNotPrivilegedErr + case None => + None + } + + val deviceSource = ctx.deviceSource.getOrElse(throw GenericAccessDeniedErr) + + if (request.nullcast && !ctx.hasPrivilegeNullcastingAccess) { + throw GenericAccessDeniedErr + } + + val safetyMetadata = SafetyMetadataUtils.makeSafetyMetaData( + sessionHash = ctx.sessionHash, + knownDeviceToken = ctx.knownDeviceToken, + contributorId = ctx.contributorId + ) + + val cardReference: Option[thrift.CardReference] = + request.cardUri.filter(_.nonEmpty).map(thrift.CardReference(_)) + + val escherbirdEntityAnnotations: Option[thrift.EscherbirdEntityAnnotations] = + request.semanticAnnotationIds + .filter(_.nonEmpty) + .map((seq: Seq[gql.TweetAnnotation]) => seq.map(parseTweetEntityAnnotation)) + .map(thrift.EscherbirdEntityAnnotations(_)) + + val mediaEntities = request.media.map(_.mediaEntities) + val mediaUploadIds = mediaEntities.map(_.map(_.mediaId)).filter(_.nonEmpty) + + val mediaTags: Option[thrift.TweetMediaTags] = { + val mediaTagsAuthorized = !ctx.isContributorRequest + + val tagMap: Map[MediaId, Seq[thrift.MediaTag]] = + mediaEntities + .getOrElse(Nil) + .filter(_ => mediaTagsAuthorized) + .filter(_.taggedUsers.nonEmpty) + .map(mediaEntity => + mediaEntity.mediaId -> + mediaEntity.taggedUsers + .map(user_id => thrift.MediaTag(thrift.MediaTagType.User, Some(user_id)))) + .toMap + + Option(tagMap) + .filter(_.nonEmpty) + .map(thrift.TweetMediaTags(_)) + } + + // Can not have both conversation controls and communities defined for a tweet + // as they have conflicting permissions on who can reply to the tweet. + val communities = parseCommunityIds(escherbirdEntityAnnotations) + if (request.conversationControl.isDefined && communities.nonEmpty) { + throw CannotConvoControlAndCommunitiesErr + } + + // Currently we do not support posting to multiple communities. + if (communities.length > 1) { + throw TooManyCommunitiesErr + } + + // Kill switch for community tweets in case we need to disable them for app security. + if (communities.nonEmpty && !enableCommunityTweetCreatesDecider()) { + throw CommunityUserNotAuthorizedErr + } + + // additionalFields is used to marshal multiple input params and + // should only be defined if one or more of those params are defined. + val additionalFields: Option[Tweet] = + cardReference + .orElse(escherbirdEntityAnnotations) + .orElse(mediaTags) + .map(_ => + thrift.Tweet( + 0L, + cardReference = cardReference, + escherbirdEntityAnnotations = escherbirdEntityAnnotations, + mediaTags = mediaTags + )) + + val transientContext: Option[TransientCreateContext] = + parseTransientContext( + request.batchCompose, + request.periscope, + ctx.twitterUserId, + ) + + // PostTweetRequest.additionalContext is marked as deprecated in favor of .transientContext, + // but the REST API still supports it and it is still passed along through Tweetypie, and + // FanoutService and Notifications still depend on it. + val additionalContext: Option[Map[TweetCreateContextKey, String]] = + transientContext.map(TransientContextUtil.toAdditionalContext) + + val thriftPostTweetRequest = thrift.PostTweetRequest( + userId = ctx.twitterUserId, + text = request.tweetText, + createdVia = deviceSource, + inReplyToTweetId = request.reply.map(_.inReplyToTweetId), + geo = request.geo.flatMap(parseTweetCreateGeo), + autoPopulateReplyMetadata = request.reply.isDefined, + excludeReplyUserIds = request.reply.map(_.excludeReplyUserIds).filter(_.nonEmpty), + nullcast = request.nullcast, + // Send a dark request to Tweetypie if the dark_request directive is set or + // if the Tweet is undo-able. + dark = ctx.isDarkRequest || request.undoOptions.exists(_.isUndo), + hydrationOptions = Some(HydrationOptions.writePathHydrationOptions(ctx.cardsPlatformKey)), + remoteHost = ctx.remoteHost, + safetyMetaData = Some(safetyMetadata), + attachmentUrl = request.attachmentUrl, + mediaUploadIds = mediaUploadIds, + mediaMetadata = None, + transientContext = transientContext, + additionalContext = additionalContext, + conversationControl = request.conversationControl.map(parseTweetCreateConversationControl), + exclusiveTweetControlOptions = request.exclusiveTweetControlOptions.map { _ => + thrift.ExclusiveTweetControlOptions() + }, + trustedFriendsControlOptions = + request.trustedFriendsControlOptions.map(parseTrustedFriendsControlOptions), + editOptions = request.editOptions.flatMap(_.previousTweetId.map(thrift.EditOptions(_))), + collabControlOptions = request.collabControlOptions.map(parseCollabControlOptions), + additionalFields = additionalFields, + trackingId = trackingId, + noteTweetOptions = request.noteTweetOptions.map(options => + thrift.NoteTweetOptions( + options.noteTweetId, + options.mentionedScreenNames, + options.mentionedUserIds, + options.isExpandable)) + ) + + val stitchPostTweet = + Stitch.callFuture { + TweetyPieDeciderOverrides.ConversationControlUseFeatureSwitchResults.On { + postTweet(thriftPostTweetRequest) + } + } + + for { + engagement <- request.engagementRequest + if !request.reply.exists(_.inReplyToTweetId == 0) // no op per go/rb/845242 + engagementType = if (request.reply.isDefined) ReplyEngagement else TweetEngagement + } logTweetPromotedContent(engagement, engagementType, ctx.isDarkRequest) + + stitchPostTweet.flatMap { result: thrift.PostTweetResult => + result.state match { + + case thrift.TweetCreateState.Ok => + val unmentionSuccessCounter = statsReceiver.counter("unmention_info_success") + val unmentionFailuresCounter = statsReceiver.counter("unmention_info_failures") + val unmentionFailuresScope = statsReceiver.scope("unmention_info_failures") + + val unmentionInfoStitch = result.tweet match { + case Some(tweet) => + unmentionInfoRepository(tweet) + .onFailure { t => + unmentionFailuresCounter.incr() + unmentionFailuresScope.counter(Throwables.mkString(t): _*).incr() + } + .onSuccess { _ => + unmentionSuccessCounter.incr() + } + .rescue { + case _ => + Stitch.None + } + case _ => + Stitch.None + } + + val vibeSuccessCounter = statsReceiver.counter("vibe_success") + val vibeFailuresCounter = statsReceiver.counter("vibe_failures") + val vibeFailuresScope = statsReceiver.scope("vibe_failures") + + val vibeStitch = result.tweet match { + case Some(tweet) => + vibeRepository(tweet) + .onSuccess { _ => + vibeSuccessCounter.incr() + } + .onFailure { t => + vibeFailuresCounter.incr() + vibeFailuresScope.counter(Throwables.mkString(t): _*).incr() + } + .rescue { + case _ => + Stitch.None + } + case _ => + Stitch.None + } + + Stitch + .join(unmentionInfoStitch, vibeStitch) + .liftToOption() + .flatMap { prefetchFields => + val r = PrefetchedDataRequest( + tweet = result.tweet.get, + sourceTweet = result.sourceTweet, + quotedTweet = result.quotedTweet, + safetyLevel = safetyLevel, + unmentionInfo = prefetchFields.flatMap(params => params._1), + vibe = prefetchFields.flatMap(params => params._2), + requestContext = getWeaverbirdCtx() + ) + + prefetchedDataRepository(r) + .liftToOption() + .map((prefetchedData: Option[PrefetchedDataResponse]) => { + gql.CreateTweetResponseWithSubqueryPrefetchItems( + data = Some(gql.CreateTweetResponse(result.tweet.map(_.id))), + subqueryPrefetchItems = prefetchedData.map(_.value) + ) + }) + } + + case errState => + throw toCreateTweetErr(errState, result.bounce, result.failureReason) + } + } + } + + private[this] def parseTweetCreateGeo(gqlGeo: gql.TweetGeo): Option[thrift.TweetCreateGeo] = { + val coordinates: Option[thrift.GeoCoordinates] = + gqlGeo.coordinates.map { coords => + LatitudeLongitude.of(coords.latitude, coords.longitude) match { + case Return(latlon: LatitudeLongitude) => + thrift.GeoCoordinates( + latitude = latlon.latitudeDegrees, + longitude = latlon.longitudeDegrees, + geoPrecision = latlon.precision, + display = coords.displayCoordinates + ) + case Throw(_) => + throw InvalidCoordinatesErr + } + } + + val geoSearchRequestId = gqlGeo.geoSearchRequestId.map { id => + if (id.isEmpty) { + throw InvalidGeoSearchRequestIdErr + } + thrift.TweetGeoSearchRequestID(id) + } + + if (coordinates.isEmpty && gqlGeo.placeId.isEmpty) { + None + } else { + Some( + thrift.TweetCreateGeo( + coordinates = coordinates, + placeId = gqlGeo.placeId, + geoSearchRequestId = geoSearchRequestId + )) + } + } + + private[this] def parseTweetCreateConversationControl( + gqlCC: gql.TweetConversationControl + ): thrift.TweetCreateConversationControl = + gqlCC.mode match { + case gql.ConversationControlMode.ByInvitation => + ConversationControls.Create.byInvitation() + case gql.ConversationControlMode.Community => + ConversationControls.Create.community() + case gql.ConversationControlMode.EnumUnknownConversationControlMode(_) => + throw ConversationControlNotSupportedErr + } + + private[this] def parseTweetEntityAnnotation( + gqlTweetAnnotation: gql.TweetAnnotation + ): TweetEntityAnnotation = + TweetEntityAnnotation( + gqlTweetAnnotation.groupId, + gqlTweetAnnotation.domainId, + gqlTweetAnnotation.entityId + ) + + private[this] def parseCommunityIds( + escherbirdAnnotations: Option[thrift.EscherbirdEntityAnnotations] + ): Seq[Long] = + escherbirdAnnotations + .map(_.entityAnnotations).getOrElse(Nil) + .flatMap { + case CommunityAnnotation(id) => Seq(id) + case _ => Nil + } + + private[this] def parseBatchMode( + gqlBatchComposeMode: gql.BatchComposeMode + ): thrift.BatchComposeMode = { + + gqlBatchComposeMode match { + case gql.BatchComposeMode.BatchFirst => + thrift.BatchComposeMode.BatchFirst + case gql.BatchComposeMode.BatchSubsequent => + thrift.BatchComposeMode.BatchSubsequent + case gql.BatchComposeMode.EnumUnknownBatchComposeMode(_) => + throw InvalidBatchModeParameterErr + } + } + + private[this] def parseTransientContext( + gqlBatchComposeMode: Option[gql.BatchComposeMode], + gqlPeriscope: Option[gql.TweetPeriscopeContext], + twitterUserId: UserId, + ): Option[TransientCreateContext] = { + val batchComposeMode = gqlBatchComposeMode.map(parseBatchMode) + + // Per c.t.fanoutservice.model.Tweet#deviceFollowType, isLive=None and Some(false) are + // equivalent and the creatorId is discarded in both cases. + val periscopeIsLive = gqlPeriscope.map(_.isLive).filter(_ == true) + val periscopeCreatorId = if (periscopeIsLive.isDefined) Some(twitterUserId) else None + + if (batchComposeMode.isDefined || periscopeIsLive.isDefined) { + Some( + thrift.TransientCreateContext( + batchCompose = batchComposeMode, + periscopeIsLive = periscopeIsLive, + periscopeCreatorId = periscopeCreatorId + ) + ) + } else { + None + } + } + + private[this] def parseTrustedFriendsControlOptions( + gqlTrustedFriendsControlOptions: gql.TrustedFriendsControlOptions + ): thrift.TrustedFriendsControlOptions = { + thrift.TrustedFriendsControlOptions( + trustedFriendsListId = gqlTrustedFriendsControlOptions.trustedFriendsListId + ) + } + + private[this] def parseCollabControlOptions( + gqlCollabControlOptions: gql.CollabControlOptions + ): thrift.CollabControlOptions = { + gqlCollabControlOptions.collabControlType match { + case gql.CollabControlType.CollabInvitation => + thrift.CollabControlOptions.CollabInvitation( + thrift.CollabInvitationOptions( + collaboratorUserIds = gqlCollabControlOptions.collaboratorUserIds + ) + ) + case gql.CollabControlType.EnumUnknownCollabControlType(_) => + throw CollabTweetInvalidParamsErr + } + } +} + +object CreateTweetColumn { + val Path = "tweetypie/createTweet.Tweet" + + def toCreateTweetErr( + errState: thrift.TweetCreateState, + bounce: Option[BouncerBounce], + failureReason: Option[String] + ): Err = errState match { + case TextCannotBeBlank => + TweetCannotBeBlankErr + case TextTooLong => + TweetTextTooLongErr + case Duplicate => + DuplicateStatusErr + case MalwareUrl => + MalwareTweetErr + case UserDeactivated | UserSuspended => + // should not occur since this condition is caught by access policy filters + CurrentUserSuspendedErr + case RateLimitExceeded => + RateLimitExceededErr + case UrlSpam => + TweetUrlSpamErr + case Spam | UserReadonly => + TweetSpammerErr + case SpamCaptcha => + CaptchaChallengeErr + case SafetyRateLimitExceeded => + SafetyRateLimitExceededErr + case Bounce if bounce.isDefined => + accessDeniedByBouncerErr(bounce.get) + case MentionLimitExceeded => + MentionLimitExceededErr + case UrlLimitExceeded => + UrlLimitExceededErr + case HashtagLimitExceeded => + HashtagLimitExceededErr + case CashtagLimitExceeded => + CashtagLimitExceededErr + case HashtagLengthLimitExceeded => + HashtagLengthLimitExceededErr + case TooManyAttachmentTypes => + TooManyAttachmentTypesErr + case InvalidUrl => + InvalidUrlErr + case DisabledByIpiPolicy => + failureReason + .map(tweetEngagementLimitedErr) + .getOrElse(GenericTweetCreateErr) + case InvalidAdditionalField => + failureReason + .map(invalidAdditionalFieldWithReasonErr) + .getOrElse(InvalidAdditionalFieldErr) + // InvalidImage has been deprecated by tweetypie. Use InvalidMedia instead. + case InvalidMedia | InvalidImage | MediaNotFound => + invalidMediaErr(failureReason) + case InReplyToTweetNotFound => + InReplyToTweetNotFoundErr + case InvalidAttachmentUrl => + InvalidAttachmentUrlErr + case ConversationControlNotAllowed => + ConversationControlNotAuthorizedErr + case InvalidConversationControl => + ConversationControlInvalidErr + case ReplyTweetNotAllowed => + ConversationControlReplyRestricted + case ExclusiveTweetEngagementNotAllowed => + ExclusiveTweetEngagementNotAllowedErr + case CommunityReplyTweetNotAllowed => + CommunityReplyTweetNotAllowedErr + case CommunityUserNotAuthorized => + CommunityUserNotAuthorizedErr + case CommunityNotFound => + CommunityNotFoundErr + case SuperFollowsInvalidParams => + SuperFollowInvalidParamsErr + case SuperFollowsCreateNotAuthorized => + SuperFollowCreateNotAuthorizedErr + case CommunityProtectedUserCannotTweet => + CommunityProtectedUserCannotTweetErr + case TrustedFriendsInvalidParams => + TrustedFriendsInvalidParamsErr + case TrustedFriendsEngagementNotAllowed => + TrustedFriendsEngagementNotAllowedErr + case TrustedFriendsCreateNotAllowed => + TrustedFriendsCreateNotAllowedErr + case TrustedFriendsQuoteTweetNotAllowed => + TrustedFriendsQuoteTweetNotAllowedErr + case CollabTweetInvalidParams => + CollabTweetInvalidParamsErr + case StaleTweetEngagementNotAllowed => + StaleTweetEngagementNotAllowedErr + case StaleTweetQuoteTweetNotAllowed => + StaleTweetQuoteTweetNotAllowedErr + case FieldEditNotAllowed => + FieldEditNotAllowedErr + case NotEligibleForEdit => + NotEligibleForEditErr + case _ => + GenericTweetCreateErr + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/DeleteTweetColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/DeleteTweetColumn.scala new file mode 100644 index 000000000..48828d7da --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/DeleteTweetColumn.scala @@ -0,0 +1,81 @@ +package com.twitter.tweetypie.federated.columns + +import com.twitter.stitch.Stitch +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.Policy +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle.Production +import com.twitter.strato.fed.StratoFed +import com.twitter.strato.opcontext.OpContext +import com.twitter.strato.thrift.ScroogeConv +import com.twitter.tweetypie.federated.context.GetRequestContext +import com.twitter.tweetypie.federated.prefetcheddata.PrefetchedDataResponse +import com.twitter.tweetypie.thriftscala.TweetDeleteState +import com.twitter.tweetypie.thriftscala.{graphql => gql} +import com.twitter.tweetypie.{thriftscala => thrift} +import com.twitter.util.Future + +class DeleteTweetColumn( + deleteTweet: thrift.DeleteTweetsRequest => Future[Seq[thrift.DeleteTweetResult]], + getRequestContext: GetRequestContext, +) extends StratoFed.Column(DeleteTweetColumn.Path) + with StratoFed.Execute.StitchWithContext + with StratoFed.HandleDarkRequests { + + override val policy: Policy = AccessPolicy.TweetMutationCommonAccessPolicies + + override val isIdempotent: Boolean = true + + override type Arg = gql.DeleteTweetRequest + override type Result = gql.DeleteTweetResponseWithSubqueryPrefetchItems + + override val argConv: Conv[Arg] = ScroogeConv.fromStruct + override val resultConv: Conv[Result] = ScroogeConv.fromStruct + + override val contactInfo: ContactInfo = TweetypieContactInfo + override val metadata: OpMetadata = + OpMetadata(Some(Production), Some(PlainText("Deletes a tweet by the calling Twitter user."))) + + override def execute(request: Arg, opContext: OpContext): Stitch[Result] = { + val ctx = getRequestContext(opContext) + + val thriftDeleteTweetRequest = thrift.DeleteTweetsRequest( + tweetIds = Seq(request.tweetId), + // byUserId is picked up by the context in tweetypie.deleteTweet, + // but we're passing it in here to be explicit + byUserId = Some(ctx.twitterUserId), + ) + + val stitchDeleteTweet = handleDarkRequest(opContext)( + light = { + Stitch.callFuture(deleteTweet(thriftDeleteTweetRequest)) + }, + // For dark requests, we don't want to send traffic to tweetypie. + // Since the response is the same regardless of the request, we take a no-op + // action instead. + dark = Stitch.value(Seq(thrift.DeleteTweetResult(request.tweetId, TweetDeleteState.Ok))) + ) + + stitchDeleteTweet.map { result: Seq[thrift.DeleteTweetResult] => + result.headOption match { + case Some(thrift.DeleteTweetResult(id, TweetDeleteState.Ok)) => + gql.DeleteTweetResponseWithSubqueryPrefetchItems( + data = Some(gql.DeleteTweetResponse(Some(id))), + // Prefetch data is always NotFound to prevent subqueries from hydrating via weaverbird + // and possibly returning inconsistent results, i.e. a Found tweet. + subqueryPrefetchItems = Some(PrefetchedDataResponse.notFound(id).value) + ) + case Some(thrift.DeleteTweetResult(_, TweetDeleteState.PermissionError)) => + throw ApiErrors.DeletePermissionErr + case _ => + throw ApiErrors.GenericAccessDeniedErr + } + } + } +} + +object DeleteTweetColumn { + val Path = "tweetypie/deleteTweet.Tweet" +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldColumn.scala new file mode 100644 index 000000000..c6b3cf246 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldColumn.scala @@ -0,0 +1,141 @@ +package com.twitter.tweetypie +package federated.columns + +import com.twitter.io.Buf +import com.twitter.scrooge.TFieldBlob +import com.twitter.stitch.Stitch +import com.twitter.strato.access.Access +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.AllowAll +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.Policy +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle.Production +import com.twitter.strato.data.Type +import com.twitter.strato.data.Val +import com.twitter.strato.fed.StratoFed +import com.twitter.strato.opcontext.OpContext +import com.twitter.strato.serialization.MVal +import com.twitter.strato.serialization.Thrift +import com.twitter.strato.util.Strings +import com.twitter.tweetypie.thriftscala.GetTweetFieldsResult +import com.twitter.tweetypie.thriftscala.SetAdditionalFieldsRequest +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.tweetypie.thriftscala.TweetFieldsResultState.Found +import com.twitter.util.Future +import org.apache.thrift.protocol.TField + +/** + * Federated strato column to return tweet fields + * @param federatedFieldsGroup Group to be used for Stitch batching. + * This is a function that takes a GroupOptions and returns a FederatedFieldGroup. + * Using a function that accepts a GroupOptions allows for Stitch to handle a new group for distinct GroupOptions. + * @param setAdditionalFields Handler to set additional fields on tweets. + * @param stratoValueType Type to be returned by the strato column. + * @param tfield Tweet thrift field to be stored + * @param pathName Path to be used in the strato catalog + */ +class FederatedFieldColumn( + federatedFieldsGroup: FederatedFieldGroupBuilder.Type, + setAdditionalFields: SetAdditionalFieldsRequest => Future[Unit], + stratoValueType: Type, + tfield: TField, + pathOverride: Option[String] = None) + extends StratoFed.Column(pathOverride.getOrElse(FederatedFieldColumn.makeColumnPath(tfield))) + with StratoFed.Fetch.StitchWithContext + with StratoFed.Put.Stitch { + + type Key = Long + type View = Unit + type Value = Val.T + + override val keyConv: Conv[Key] = Conv.ofType + override val viewConv: Conv[View] = Conv.ofType + override val valueConv: Conv[Value] = Conv(stratoValueType, identity, identity) + + override val policy: Policy = AllowAll + + /* + * A fetch that proxies GetTweetFieldsColumn.fetch but only requests and + * returns one specific field. + */ + override def fetch(tweetId: Key, view: View, opContext: OpContext): Stitch[Result[Value]] = { + + val twitterUserId: Option[UserId] = Access.getTwitterUserId match { + // Access.getTwitterUserId should return a value when request is made on behalf of a user + // and will not return a value otherwise + case Some(twitterUser) => Some(twitterUser.id) + case None => None + } + + val stitchGroup = federatedFieldsGroup(GroupOptions(twitterUserId)) + + Stitch + .call(FederatedFieldReq(tweetId, tfield.id), stitchGroup).map { + result: GetTweetFieldsResult => + result.tweetResult match { + case Found(f) => + f.tweet.getFieldBlob(tfield.id) match { + case Some(v: TFieldBlob) => + found(blobToVal(v)) + case None => missing + } + case _ => missing + } + } + + } + + /* + * A strato put interface for writing a single additional field to a tweet + */ + override def put(tweetId: Key, value: Val.T): Stitch[Unit] = { + val tweet: Tweet = Tweet(id = tweetId).setField(valToBlob(value)) + val request: SetAdditionalFieldsRequest = SetAdditionalFieldsRequest(tweet) + Stitch.callFuture(setAdditionalFields(request)) + } + + val mval: Thrift.Codec = MVal.codec(stratoValueType).thrift(4) + + def valToBlob(value: Val.T): TFieldBlob = + TFieldBlob(tfield, mval.write[Buf](value, Thrift.compactProto)) + + def blobToVal(thriftFieldBlob: TFieldBlob): Val.T = + mval.read(thriftFieldBlob.content, Thrift.compactProto) + + override val contactInfo: ContactInfo = TweetypieContactInfo + override val metadata: OpMetadata = OpMetadata( + lifecycle = Some(Production), + description = Some(PlainText(s"A federated column for the field tweet.$stratoValueType")) + ) +} + +object FederatedFieldColumn { + val idAllowlist: Seq[Short] = Seq( + Tweet.CoreDataField.id, + Tweet.LanguageField.id, + Tweet.ConversationMutedField.id + ) + val ID_START = 157 + val ID_END = 32000 + + private val MigrationFields: Seq[Short] = Seq(157) + + def isFederatedField(id: Short) = id >= ID_START && id < ID_END || idAllowlist.contains(id) + + def isMigrationFederatedField(tField: TField): Boolean = MigrationFields.contains(tField.id) + + /* federated field column strato configs must conform to this + * path name scheme for tweetypie to pick them up + */ + def makeColumnPath(tField: TField) = { + val columnName = Strings.toCamelCase(tField.name.stripSuffix("id")) + s"tweetypie/fields/${columnName}.Tweet" + } + + def makeV1ColumnPath(tField: TField): String = { + val columnName = Strings.toCamelCase(tField.name.stripSuffix("id")) + s"tweetypie/fields/$columnName-V1.Tweet" + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldGroup.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldGroup.scala new file mode 100644 index 000000000..88b9db624 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldGroup.scala @@ -0,0 +1,88 @@ +package com.twitter.tweetypie.federated.columns + +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.MapGroup +import com.twitter.tweetypie.UserId +import com.twitter.tweetypie.federated.columns.FederatedFieldGroupBuilder.allCountFields +import com.twitter.tweetypie.federated.columns.FederatedFieldGroupBuilder.countTweetFields +import com.twitter.tweetypie.thriftscala.GetTweetFieldsOptions +import com.twitter.tweetypie.thriftscala.GetTweetFieldsRequest +import com.twitter.tweetypie.thriftscala.GetTweetFieldsResult +import com.twitter.tweetypie.thriftscala.StatusCounts +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.tweetypie.thriftscala.TweetInclude +import com.twitter.util.Future +import com.twitter.util.Throw +import com.twitter.util.Try + +case class GroupOptions(twitterUserId: Option[UserId]) + +object FederatedFieldGroupBuilder { + type Type = GroupOptions => MapGroup[FederatedFieldReq, GetTweetFieldsResult] + + def apply( + getTweetFieldsHandler: GetTweetFieldsRequest => Future[Seq[GetTweetFieldsResult]] + ): Type = { + FederatedFieldGroup(getTweetFieldsHandler, _) + } + + // The set of non-deprecated count field includes + val allCountFields: Set[TweetInclude] = Set( + TweetInclude.CountsFieldId(StatusCounts.RetweetCountField.id), + TweetInclude.CountsFieldId(StatusCounts.QuoteCountField.id), + TweetInclude.CountsFieldId(StatusCounts.FavoriteCountField.id), + TweetInclude.CountsFieldId(StatusCounts.ReplyCountField.id), + TweetInclude.CountsFieldId(StatusCounts.BookmarkCountField.id), + ) + + // Tweet field includes which contain counts. These are the only fields where count field includes are relevant. + val countTweetFields: Set[TweetInclude] = Set( + TweetInclude.TweetFieldId(Tweet.CountsField.id), + TweetInclude.TweetFieldId(Tweet.PreviousCountsField.id)) +} + +case class FederatedFieldGroup( + getTweetFieldsHandler: GetTweetFieldsRequest => Future[Seq[GetTweetFieldsResult]], + options: GroupOptions) + extends MapGroup[FederatedFieldReq, GetTweetFieldsResult] { + override protected def run( + reqs: Seq[FederatedFieldReq] + ): Future[FederatedFieldReq => Try[GetTweetFieldsResult]] = { + + // requesting the field ids of the requested additional field ids in this group + val fieldIncludes: Set[TweetInclude] = reqs.map { req: FederatedFieldReq => + TweetInclude.TweetFieldId(req.fieldId) + }.toSet + + val allIncludes: Set[TweetInclude] = if (fieldIncludes.intersect(countTweetFields).nonEmpty) { + // if counts are being requested we include all count fields by default + // because there is no way to specify them individually with federated fields, + fieldIncludes ++ allCountFields + } else { + fieldIncludes + } + + val gtfOptions = GetTweetFieldsOptions( + tweetIncludes = allIncludes, + forUserId = options.twitterUserId, + // visibility filtering happens at the api layer / tweet top level + // and therefore is not required at individual field level + safetyLevel = Some(SafetyLevel.FilterNone) + ) + getTweetFieldsHandler( + GetTweetFieldsRequest( + tweetIds = reqs.map(_.tweetId).distinct, + options = gtfOptions + ) + ).map { + response => + { req => + response.find(_.tweetId == req.tweetId) match { + case Some(result) => Try(result) + case None => + Throw(new NoSuchElementException(s"response not found for tweet: ${req.tweetId}")) + } + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldReq.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldReq.scala new file mode 100644 index 000000000..594f46273 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldReq.scala @@ -0,0 +1,7 @@ +package com.twitter.tweetypie.federated.columns + +import com.twitter.tweetypie.TweetId + +// Case class to be used for grouping Stitch requests +// for Federated Fields +case class FederatedFieldReq(tweetId: TweetId, fieldId: Short) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsByUserColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsByUserColumn.scala new file mode 100644 index 000000000..f4aaa6e12 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsByUserColumn.scala @@ -0,0 +1,83 @@ +package com.twitter.tweetypie.federated.columns + +import com.twitter.stitch.Stitch +import com.twitter.strato.access.Access.LdapGroup +import com.twitter.strato.catalog.Fetch +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.AnyOf +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.FromColumns +import com.twitter.strato.config.Has +import com.twitter.strato.config.Path +import com.twitter.strato.config.Policy +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle.Production +import com.twitter.strato.fed.StratoFed +import com.twitter.strato.response.Err +import com.twitter.strato.thrift.ScroogeConv +import com.twitter.tweetypie.UserId +import com.twitter.tweetypie.thriftscala.federated.GetStoredTweetsByUserView +import com.twitter.tweetypie.thriftscala.federated.GetStoredTweetsByUserResponse +import com.twitter.tweetypie.{thriftscala => thrift} +import com.twitter.util.Future + +class GetStoredTweetsByUserColumn( + handler: thrift.GetStoredTweetsByUserRequest => Future[thrift.GetStoredTweetsByUserResult]) + extends StratoFed.Column(GetStoredTweetsByUserColumn.Path) + with StratoFed.Fetch.Stitch { + + override val contactInfo: ContactInfo = TweetypieContactInfo + override val metadata: OpMetadata = OpMetadata( + lifecycle = Some(Production), + description = + Some(PlainText("Fetches hydrated Tweets for a particular User regardless of Tweet state.")) + ) + override val policy: Policy = AnyOf( + Seq( + FromColumns(Set(Path("tweetypie/data-provider/storedTweets.User"))), + Has(LdapGroup("tweetypie-team")) + )) + + override type Key = UserId + override type View = GetStoredTweetsByUserView + override type Value = GetStoredTweetsByUserResponse + + override val keyConv: Conv[Key] = Conv.ofType + override val viewConv: Conv[View] = ScroogeConv.fromStruct[GetStoredTweetsByUserView] + override val valueConv: Conv[Value] = ScroogeConv.fromStruct[GetStoredTweetsByUserResponse] + + override def fetch(key: Key, view: View): Stitch[Result[Value]] = { + val request = thrift.GetStoredTweetsByUserRequest( + userId = key, + options = Some( + thrift.GetStoredTweetsByUserOptions( + bypassVisibilityFiltering = view.bypassVisibilityFiltering, + setForUserId = view.setForUserId, + startTimeMsec = view.startTimeMsec, + endTimeMsec = view.endTimeMsec, + cursor = view.cursor, + startFromOldest = view.startFromOldest, + additionalFieldIds = view.additionalFieldIds + )) + ) + + Stitch + .callFuture(handler(request)) + .map { result => + Fetch.Result.found( + GetStoredTweetsByUserResponse( + storedTweets = result.storedTweets, + cursor = result.cursor + )) + } + .rescue { + case _ => Stitch.exception(Err(Err.Internal)) + } + } + +} + +object GetStoredTweetsByUserColumn { + val Path = "tweetypie/internal/getStoredTweets.User" +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsColumn.scala new file mode 100644 index 000000000..20afd87e1 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsColumn.scala @@ -0,0 +1,99 @@ +package com.twitter.tweetypie.federated.columns + +import com.twitter.stitch.MapGroup +import com.twitter.stitch.Stitch +import com.twitter.strato.access.Access.LdapGroup +import com.twitter.strato.catalog.Fetch +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.AnyOf +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.FromColumns +import com.twitter.strato.config.Has +import com.twitter.strato.config.Path +import com.twitter.strato.config.Policy +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle.Production +import com.twitter.strato.fed.StratoFed +import com.twitter.strato.response.Err +import com.twitter.strato.thrift.ScroogeConv +import com.twitter.tweetypie.{thriftscala => thrift} +import com.twitter.tweetypie.TweetId +import com.twitter.tweetypie.thriftscala.federated.GetStoredTweetsView +import com.twitter.tweetypie.thriftscala.federated.GetStoredTweetsResponse +import com.twitter.util.Future +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.util.Try + +class GetStoredTweetsColumn( + getStoredTweets: thrift.GetStoredTweetsRequest => Future[Seq[thrift.GetStoredTweetsResult]]) + extends StratoFed.Column(GetStoredTweetsColumn.Path) + with StratoFed.Fetch.Stitch { + + override val contactInfo: ContactInfo = TweetypieContactInfo + override val metadata: OpMetadata = OpMetadata( + lifecycle = Some(Production), + description = Some(PlainText("Fetches hydrated Tweets regardless of Tweet state.")) + ) + override val policy: Policy = AnyOf( + Seq( + FromColumns( + Set( + Path("tweetypie/data-provider/storedTweets.User"), + Path("note_tweet/data-provider/noteTweetForZipbird.User"))), + Has(LdapGroup("tweetypie-team")) + )) + + override type Key = TweetId + override type View = GetStoredTweetsView + override type Value = GetStoredTweetsResponse + + override val keyConv: Conv[Key] = Conv.ofType + override val viewConv: Conv[View] = ScroogeConv.fromStruct[GetStoredTweetsView] + override val valueConv: Conv[Value] = ScroogeConv.fromStruct[GetStoredTweetsResponse] + + override def fetch(key: Key, view: View): Stitch[Result[Value]] = { + Stitch.call(key, Group(view)) + } + + private case class Group(view: GetStoredTweetsView) + extends MapGroup[TweetId, Fetch.Result[GetStoredTweetsResponse]] { + override protected def run( + keys: Seq[TweetId] + ): Future[TweetId => Try[Result[GetStoredTweetsResponse]]] = { + val options = thrift.GetStoredTweetsOptions( + bypassVisibilityFiltering = view.bypassVisibilityFiltering, + forUserId = view.forUserId, + additionalFieldIds = view.additionalFieldIds + ) + + getStoredTweets(thrift.GetStoredTweetsRequest(keys, Some(options))) + .map(transformAndGroupByTweetId) + .handle { + case _ => + _ => Throw[Result[GetStoredTweetsResponse]](Err(Err.Internal)) + } + } + + private def transformAndGroupByTweetId( + results: Seq[thrift.GetStoredTweetsResult] + ): Map[TweetId, Try[Fetch.Result[GetStoredTweetsResponse]]] = { + results + .map(result => GetStoredTweetsResponse(result.storedTweet)) + .groupBy(_.storedTweet.tweetId) + .map { + case (tweetId, Seq(result)) => (tweetId, Return(Fetch.Result.found(result))) + case (tweetId, multipleResults) => + ( + tweetId, + Throw(Err(Err.BadRequest, s"Got ${multipleResults.size} results for $tweetId"))) + } + } + + } +} + +object GetStoredTweetsColumn { + val Path = "tweetypie/internal/getStoredTweets.Tweet" +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetTweetFieldsColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetTweetFieldsColumn.scala new file mode 100644 index 000000000..2daa9bdb4 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetTweetFieldsColumn.scala @@ -0,0 +1,172 @@ +package com.twitter.tweetypie.federated.columns + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.stitch.MapGroup +import com.twitter.stitch.Stitch +import com.twitter.strato.catalog.Fetch +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.AllowAll +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.Policy +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle.Production +import com.twitter.strato.fed.StratoFed +import com.twitter.strato.opcontext.OpContext +import com.twitter.strato.response.Err +import com.twitter.strato.thrift.ScroogeConv +import com.twitter.tweetypie.TweetId +import com.twitter.tweetypie.client_id.PreferForwardedServiceIdentifierForStrato +import com.twitter.tweetypie.thriftscala.GetTweetFieldsOptions +import com.twitter.tweetypie.thriftscala.GetTweetFieldsRequest +import com.twitter.tweetypie.thriftscala.GetTweetFieldsResult +import com.twitter.tweetypie.thriftscala.TweetVisibilityPolicy +import com.twitter.util.Future +import com.twitter.util.Try + +/** + * Strato federated column implementing GetTweetFields as a Fetch. + */ +class GetTweetFieldsColumn( + handler: GetTweetFieldsRequest => Future[Seq[GetTweetFieldsResult]], + stats: StatsReceiver) + extends StratoFed.Column(GetTweetFieldsColumn.Path) + with StratoFed.Fetch.StitchWithContext { + + /** + * At this point, this fetch op will reject any requests that specify + * visibilityPolicy other than USER_VISIBLE, so no access control is needed. + */ + override val policy: Policy = AllowAll + + override type Key = TweetId + override type View = GetTweetFieldsOptions + override type Value = GetTweetFieldsResult + + override val keyConv: Conv[Key] = Conv.ofType + override val viewConv: Conv[View] = ScroogeConv.fromStruct[GetTweetFieldsOptions] + override val valueConv: Conv[Value] = ScroogeConv.fromStruct[GetTweetFieldsResult] + + override val contactInfo: ContactInfo = TweetypieContactInfo + override val metadata: OpMetadata = OpMetadata( + lifecycle = Some(Production), + description = + Some(PlainText("Get of tweets that allows fetching only specific subsets of the data.")), + ) + + val safetyOpContextOnlyCounter = stats.counter("safety_op_context_only") + val safetyOpContextOnlyValueScope = stats.scope("safety_op_context_only_value") + val safetyOpContextOnlyCallerScope = stats.scope("safety_op_context_only_caller") + + val safetyViewOnlyCounter = stats.counter("safety_view_only") + val safetyViewOnlyValueScope = stats.scope("safety_view_only_value") + val safetyViewOnlyCallerScope = stats.scope("safety_view_only_caller") + + val safetyLevelInconsistencyCounter = stats.counter("safety_level_inconsistency") + val safetyLevelInconsistencyValueScope = stats.scope("safety_level_inconsistency_value") + val safetyLevelInconsistencyCallerScope = stats.scope("safety_level_inconsistency_caller") + + override def fetch(key: Key, view: View, ctx: OpContext): Stitch[Result[Value]] = { + compareSafetyLevel(view, ctx) + checkVisibilityPolicyUserVisible(view).flatMap { _ => + Stitch.call(key, Group(view)) + } + } + + /** + * Only allow [[TweetVisibilityPolicy.UserVisible]] visibilityPolicy. + * + * This column requires access policy in order to serve requests with visibilityPolicy + * other than [[TweetVisibilityPolicy.UserVisible]]. Before we support access control, + * reject all requests that are not safe. + */ + private def checkVisibilityPolicyUserVisible(view: View): Stitch[Unit] = + view.visibilityPolicy match { + case TweetVisibilityPolicy.UserVisible => Stitch.value(Unit) + case otherValue => + Stitch.exception( + Err( + Err.BadRequest, + "GetTweetFields does not support access control on Strato yet. " + + s"Hence visibilityPolicy can only take the default ${TweetVisibilityPolicy.UserVisible} value, " + + s"got: ${otherValue}." + )) + } + + /** Compare the SafetyLevels in the View and OpContext */ + private def compareSafetyLevel(view: View, ctx: OpContext): Unit = + (view.safetyLevel, ctx.safetyLevel) match { + case (None, None) => + case (Some(viewSafety), None) => { + safetyViewOnlyCounter.incr() + safetyViewOnlyValueScope.counter(viewSafety.name).incr() + PreferForwardedServiceIdentifierForStrato.serviceIdentifier + .foreach(serviceId => safetyViewOnlyCallerScope.counter(serviceId.toString).incr()) + } + case (None, Some(ctxSafety)) => { + safetyOpContextOnlyCounter.incr() + safetyOpContextOnlyValueScope.counter(ctxSafety.name).incr() + PreferForwardedServiceIdentifierForStrato.serviceIdentifier + .foreach(serviceId => safetyOpContextOnlyCallerScope.counter(serviceId.toString).incr()) + } + case (Some(viewSafety), Some(ctxSafety)) => + def safeStringEquals(a: String, b: String) = + a.toLowerCase().trim().equals(b.toLowerCase().trim()) + if (!safeStringEquals(viewSafety.name, ctxSafety.name)) { + safetyLevelInconsistencyCounter.incr() + safetyLevelInconsistencyValueScope.counter(viewSafety.name + '-' + ctxSafety.name).incr() + PreferForwardedServiceIdentifierForStrato.serviceIdentifier + .foreach(serviceId => + safetyLevelInconsistencyCallerScope.counter(serviceId.toString).incr()) + } + } + + /** + * Means of batching of [[GetTweetFieldsColumn]] calls. + * + * Only calls issued against the same instance of [[GetTweetFieldsColumn]] + * are batched as Stitch clusters group objects based on equality, + * and nested case class implicitly captures [[GetTweetFieldsColumn]] reference. + */ + private case class Group(view: GetTweetFieldsOptions) + extends MapGroup[TweetId, Fetch.Result[GetTweetFieldsResult]] { + + /** + * Batches given [[TweetId]] lookups in a single [[GetTweetFieldsRequest]] + * and returns a result mapped by [[TweetId]]. + */ + override protected def run( + keys: Seq[TweetId] + ): Future[TweetId => Try[Fetch.Result[GetTweetFieldsResult]]] = + handler( + GetTweetFieldsRequest( + // Sorting the keys makes for simpler matchers in the tests + // as matching on a Seq needs to be in order. + tweetIds = keys.sorted, + options = view, + )).map(groupByTweetId) + + /** + * Groups given [[GetTweetFieldsResult]] objects by [[TweetId]] and returns the mapping. + */ + private def groupByTweetId( + allResults: Seq[GetTweetFieldsResult] + ): TweetId => Try[Fetch.Result[GetTweetFieldsResult]] = { + allResults + .groupBy(_.tweetId) + .mapValues { + case Seq(result) => Try(Fetch.Result.found(result)) + case manyResults => + Try { + throw Err( + Err.Dependency, + s"Expected one result per tweeet ID, got ${manyResults.length}") + } + } + } + } +} + +object GetTweetFieldsColumn { + val Path = "tweetypie/getTweetFields.Tweet" +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/HydrationOptions.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/HydrationOptions.scala new file mode 100644 index 000000000..d1e00821a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/HydrationOptions.scala @@ -0,0 +1,22 @@ +package com.twitter.tweetypie.federated.columns + +import com.twitter.tweetypie.{thriftscala => thrift} + +object HydrationOptions { + + def writePathHydrationOptions( + cardsPlatformKey: Option[String] + ) = + thrift.WritePathHydrationOptions( + // The GraphQL API extracts or "lifts" the ApiTweet.card reference field from the + // ApiTweet.card.url returned by Tweetypie. Tweetypie's card hydration business logic + // selects the single correct Card URL by first making Expandodo.getCards2 requests for + // the Tweet's cardReference, or all of the Tweet's URL entities in cases where Tweet + // does not have a stored cardReference, and then selecting the last of the hydrated + // cards returned by Expandodo. + includeCards = true, + cardsPlatformKey = cardsPlatformKey, + // The GraphQL API only supports quoted tweet results formatted per go/simplequotedtweet. + simpleQuotedTweet = true, + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TrackingId.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TrackingId.scala new file mode 100644 index 000000000..0030bcd40 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TrackingId.scala @@ -0,0 +1,29 @@ +package com.twitter.tweetypie.federated +package columns + +import com.twitter.finagle.stats.NullStatsReceiver +import com.twitter.tweetypie.StatsReceiver +import com.twitter.util.logging.Logger + +object TrackingId { + private[this] val log = Logger(getClass) + + def parse(s: String, statsReceiver: StatsReceiver = NullStatsReceiver): Option[Long] = { + val trackingStats = statsReceiver.scope("tracking_id_parser") + + val parsedCountCounter = trackingStats.scope("parsed").counter("count") + val parseFailedCounter = trackingStats.scope("parse_failed").counter("count") + Option(s).map(_.trim).filter(_.nonEmpty).flatMap { idStr => + try { + val id = java.lang.Long.parseLong(idStr, 16) + parsedCountCounter.incr() + Some(id) + } catch { + case _: NumberFormatException => + parseFailedCounter.incr() + log.warn(s"invalid tracking ID: '$s'") + None + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TweetypieContactInfo.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TweetypieContactInfo.scala new file mode 100644 index 000000000..74bd0569d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TweetypieContactInfo.scala @@ -0,0 +1,11 @@ +package com.twitter.tweetypie.federated.columns + +import com.twitter.strato.config.ContactInfo + +object TweetypieContactInfo + extends ContactInfo( + contactEmail = "", + ldapGroup = "", + jiraProject = "", + slackRoomId = "" + ) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/UnretweetColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/UnretweetColumn.scala new file mode 100644 index 000000000..489285986 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/UnretweetColumn.scala @@ -0,0 +1,69 @@ +package com.twitter.tweetypie +package federated.columns + +import com.twitter.stitch.Stitch +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.Policy +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle.Production +import com.twitter.strato.fed.StratoFed +import com.twitter.strato.opcontext.OpContext +import com.twitter.strato.thrift.ScroogeConv +import com.twitter.tweetypie.federated.context.GetRequestContext +import com.twitter.tweetypie.federated.context.RequestContext +import com.twitter.tweetypie.thriftscala.{graphql => gql} +import com.twitter.tweetypie.{thriftscala => thrift} + +class UnretweetColumn( + unretweet: thrift.UnretweetRequest => Future[thrift.UnretweetResult], + getRequestContext: GetRequestContext, +) extends StratoFed.Column("tweetypie/unretweet.Tweet") + with StratoFed.Execute.StitchWithContext + with StratoFed.HandleDarkRequests { + + override val policy: Policy = AccessPolicy.TweetMutationCommonAccessPolicies + + // It's acceptable to retry or reapply an unretweet operation, + // as multiple calls result in the same end state. + override val isIdempotent: Boolean = true + + override type Arg = gql.UnretweetRequest + override type Result = gql.UnretweetResponseWithSubqueryPrefetchItems + + override val argConv: Conv[Arg] = ScroogeConv.fromStruct + override val resultConv: Conv[Result] = ScroogeConv.fromStruct + + override val contactInfo: ContactInfo = TweetypieContactInfo + override val metadata: OpMetadata = + OpMetadata( + Some(Production), + Some(PlainText("Removes any retweets by the calling user of the given source tweet."))) + + override def execute(gqlRequest: Arg, opContext: OpContext): Stitch[Result] = { + val ctx: RequestContext = getRequestContext(opContext) + val req = thrift.UnretweetRequest( + ctx.twitterUserId, + gqlRequest.sourceTweetId, + ) + + val stitchUnretweet = handleDarkRequest(opContext)( + light = Stitch.callFuture(unretweet(req)), + // For dark requests, we don't want to send traffic to tweetypie. + // Since the response is the same regardless of the request, we take a no-op + // action instead. + dark = Stitch.value(thrift.UnretweetResult(state = thrift.TweetDeleteState.Ok)) + ) + + stitchUnretweet.map { _ => + gql.UnretweetResponseWithSubqueryPrefetchItems( + data = Some(gql.UnretweetResponse(Some(gqlRequest.sourceTweetId))) + ) + } + } +} + +object UnretweetColumn { + val Path = "tweetypie/unretweet.Tweet" +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/BUILD new file mode 100644 index 000000000..942c66697 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/BUILD @@ -0,0 +1,27 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle/finagle-core/src/main", + "finatra-internal/tfe", + "passbird/bitfields-thrift/src/main/thrift:thrift-scala", + "src/scala/com/twitter/common/ip_address_utils", + "src/thrift/com/twitter/context:feature-context-scala", + "src/thrift/com/twitter/context:twitter-context-scala", + "src/thrift/com/twitter/ip_address_utils:ip-address-utils-thrift-scala", + "src/thrift/com/twitter/spam/rtf:safety-level-scala", + "strato/src/main/scala/com/twitter/strato/access", + "strato/src/main/scala/com/twitter/strato/config", + "strato/src/main/scala/com/twitter/strato/context", + "strato/src/main/scala/com/twitter/strato/data", + "strato/src/main/scala/com/twitter/strato/opcontext", + "strato/src/main/scala/com/twitter/strato/response", + "strato/src/main/scala/com/twitter/strato/thrift", + "strato/src/main/thrift/com/twitter/strato/context:thrift-scala", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "twitter-context/src/main/scala", + "weaverbird/common/src/main/scala/com/twitter/weaverbird/common", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/RequestContext.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/RequestContext.scala new file mode 100644 index 000000000..170ba3c5c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/RequestContext.scala @@ -0,0 +1,131 @@ +package com.twitter.tweetypie +package federated.context + +import com.twitter.common.ip_address_utils.ClientIpAddressUtils +import com.twitter.context.thriftscala.Viewer +import com.twitter.context.TwitterContext +import com.twitter.finagle.core.util.InetAddressUtil +import com.twitter.passbird.bitfield.clientprivileges.thriftscala.{Constants => ClientAppPrivileges} +import com.twitter.finatra.tfe.HttpHeaderNames +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.strato.access.Access.ClientApplicationPrivilege +import com.twitter.strato.access.Access +import com.twitter.strato.access.ClientApplicationPrivilegeVariant +import com.twitter.strato.context.StratoContext +import com.twitter.strato.opcontext.OpContext +import com.twitter.strato.response.Err +import com.twitter.weaverbird.common.GetPlatformKey + +/** + * [[RequestContext]] exists to avoid wiring the federated column + * implementations directly to the request data that is derived from the + * contextual environment. Columns should not directly reference + * TwitterContext, StratoContext, strato.access.Access, HTTP headers, etc. + * Each column operation operates on two input parameters: a request (i.e. + * a column operation's Arg) and a [[RequestContext]]. + */ +private[federated] case class RequestContext( + clientApplicationId: Option[AppId] = None, + deviceSource: Option[String] = None, + knownDeviceToken: Option[KnownDeviceToken] = None, + remoteHost: Option[String] = None, + twitterUserId: UserId, + contributorId: Option[UserId] = None, + isDarkRequest: Boolean = false, + hasPrivilegeNullcastingAccess: Boolean = false, + hasPrivilegePromotedTweetsInTimeline: Boolean = false, + sessionHash: Option[String] = None, + cardsPlatformKey: Option[String] = None, + safetyLevel: Option[SafetyLevel] = None, +) { + def isContributorRequest = contributorId.exists(_ != twitterUserId) +} + +/** + * Provides a single place to derive request data from the contextual + * environment. Defined as a sealed class (vs an object) to allow mocking + * in unit tests. + */ +private[federated] sealed class GetRequestContext() { + // Bring Tweetypie permitted TwitterContext into scope + private[this] val TwitterContext: TwitterContext = + com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) + + /** + * When TwitterUserIdNotDefined is thrown, it's likely that the column + * access control configuration lacks `AllowTwitterUserId` or other + * Policy that ensures the caller is authenticated. + */ + private[federated] val TwitterUserIdNotDefined = + Err(Err.Authentication, "User authentication is required for this operation.") + + private[this] val SessionHashHeaderName = "x-tfe-session-hash" + private[this] def hasClientApplicationPrivilege(id: Int): Boolean = + Access.getPrincipals.contains( + ClientApplicationPrivilege( + ClientApplicationPrivilegeVariant + .byId(id.toShort).get)) + + private[this] def getRequestHeader(headerName: String): Option[String] = + StratoContext + .current() + .propagatedHeaders + .flatMap(_.get(headerName)) + + def apply(opContext: OpContext): RequestContext = { + val twitterUserId = Access.getTwitterUserId match { + // Access.getTwitterUserId should return a value as long as the column + // policy includes AllowTwitterUserId, which guarantees the presence of + // the value. + case Some(twitterUser) => twitterUser.id + case None => throw TwitterUserIdNotDefined + } + + // contributorId should only be defined when the authenticated user differs + // from the "Twitter user" + val contributorId = + Access.getAuthenticatedTwitterUserId.map(_.id).filter(_ != twitterUserId) + + val twitterContext = TwitterContext().getOrElse(Viewer()) + + val deviceSource = twitterContext.clientApplicationId.map("oauth:" + _) + + // Ported from StatusesUpdateController#getBirdherdOptions and + // BirdherdOption.UserIp(request.clientHost) + val remoteHost: Option[String] = + getRequestHeader(HttpHeaderNames.X_TWITTER_AUDIT_IP_THRIFT.toLowerCase) // use the new header + .flatMap(ClientIpAddressUtils.decodeClientIpAddress(_)) + .flatMap(ClientIpAddressUtils.getString(_)) + .orElse( + getRequestHeader( + HttpHeaderNames.X_TWITTER_AUDIT_IP.toLowerCase + ) // fallback to old way before migration is completed + .map(h => InetAddressUtil.getByName(h.trim).getHostAddress) + ) + + val isDarkRequest = opContext.darkRequest.isDefined + + val sessionHash = getRequestHeader(SessionHashHeaderName) + + val cardsPlatformKey = twitterContext.clientApplicationId.map(GetPlatformKey(_)) + + val safetyLevel = opContext.safetyLevel + + RequestContext( + clientApplicationId = twitterContext.clientApplicationId, + deviceSource = deviceSource, + knownDeviceToken = twitterContext.knownDeviceToken, + remoteHost = remoteHost, + twitterUserId = twitterUserId, + contributorId = contributorId, + isDarkRequest = isDarkRequest, + hasPrivilegeNullcastingAccess = + hasClientApplicationPrivilege(ClientAppPrivileges.NULLCASTING_ACCESS), + hasPrivilegePromotedTweetsInTimeline = + hasClientApplicationPrivilege(ClientAppPrivileges.PROMOTED_TWEETS_IN_TIMELINE), + sessionHash = sessionHash, + cardsPlatformKey = cardsPlatformKey, + safetyLevel = safetyLevel, + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/BUILD new file mode 100644 index 000000000..06a2a8c10 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/BUILD @@ -0,0 +1,32 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", + "finagle/finagle-core/src/main", + "tweetypie/servo/util/src/main/scala", + "src/thrift/com/twitter/consumer_privacy/mention_controls:thrift-scala", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/spam/rtf:safety-level-scala", + "src/thrift/com/twitter/spam/rtf:safety-result-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-compat/src/main/scala/com/twitter/stitch/compat", + "stitch/stitch-core/src/main/scala/com/twitter/stitch", + "stitch/stitch-gizmoduck", + "strato/config/src/thrift/com/twitter/strato/graphql:api-media-graphql-scala", + "strato/config/src/thrift/com/twitter/strato/graphql:graphql-scala", + "strato/config/src/thrift/com/twitter/strato/graphql:topics-graphql-scala", + "strato/src/main/scala/com/twitter/strato/rpc", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", + "vibes/src/main/thrift/com/twitter/vibes:vibes-scala", + "weaverbird/common/src/main/scala/com/twitter/weaverbird/common", + "weaverbird/common/src/main/scala/com/twitter/weaverbird/converters/common", + "weaverbird/common/src/main/scala/com/twitter/weaverbird/converters/tweet", + "weaverbird/common/src/main/scala/com/twitter/weaverbird/hydrators", + "weaverbird/common/src/main/scala/com/twitter/weaverbird/mappers", + "weaverbird/common/src/main/scala/com/twitter/weaverbird/repositories", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/PrefetchedDataRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/PrefetchedDataRepository.scala new file mode 100644 index 000000000..d829955db --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/PrefetchedDataRepository.scala @@ -0,0 +1,166 @@ +package com.twitter.tweetypie +package federated +package prefetcheddata + +import com.twitter.consumer_privacy.mention_controls.thriftscala.UnmentionInfo +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.gizmoduck.thriftscala.LookupContext +import com.twitter.gizmoduck.thriftscala.QueryFields +import com.twitter.gizmoduck.thriftscala.UserResult +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.compat.LegacySeqGroup +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.strato.graphql.thriftscala.CacheMissStrategy +import com.twitter.strato.graphql.thriftscala.PrefetchedData +import com.twitter.strato.graphql.thriftscala.TweetResult +import com.twitter.tweetypie.backends.Gizmoduck +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.util.Throwables +import com.twitter.vibes.thriftscala.VibeV2 +import com.twitter.weaverbird.common.GetRequestContext +import com.twitter.weaverbird.common.PerTOOAppCallerStats +import com.twitter.weaverbird.common.RequestContext +import com.twitter.weaverbird.converters.tweet.WeaverbirdEntitySetMutations +import com.twitter.weaverbird.converters.tweet.WeaverbirdTweetMutations +import com.twitter.weaverbird.hydrators._ +import com.twitter.weaverbird.mappers.ApiTweetPrefetchedMapper +import com.twitter.weaverbird.repositories.UserRepository +import com.twitter.weaverbird.converters.common.EntityRenderingOptions + +private[federated] final case class PrefetchedDataRequest( + tweet: Tweet, + sourceTweet: Option[Tweet], + quotedTweet: Option[Tweet], + unmentionInfo: Option[UnmentionInfo] = None, + vibe: Option[VibeV2] = None, + safetyLevel: SafetyLevel, + requestContext: RequestContext) + +private[federated] final case class PrefetchedDataResponse(value: PrefetchedData) + +private[federated] object PrefetchedDataResponse { + // For NotFound, there is no subsequent result or quoted_tweet_results field, so both + // settings are false here. These deciders will be removed post migration. + private[this] val prefetchedMapper = new ApiTweetPrefetchedMapper( + skipTweetResultPrefetchItem = () => false + ) + def notFound(tweetId: Long): PrefetchedDataResponse = + PrefetchedDataResponse( + value = prefetchedMapper.getPrefetchedData( + tweetId = tweetId, + apiTweet = None, + tweetResult = None + ) + ) +} + +private[federated] object PrefetchedDataRepository { + def apply( + thriftTweetToApiTweet: ThriftTweetToApiTweet, + prefetchedMapper: ApiTweetPrefetchedMapper, + statsReceiver: StatsReceiver, + ): PrefetchedDataRequest => Stitch[PrefetchedDataResponse] = + (request: PrefetchedDataRequest) => { + val thriftTweetToApiTweetRequest = ThriftTweetToApiTweetRequest( + tweet = request.tweet, + sourceTweet = request.sourceTweet, + quotedTweet = request.quotedTweet, + // For Tweet writes, filteredReason will always be None. + filteredReason = None, + safetyLevel = request.safetyLevel, + requestContext = request.requestContext, + entityRenderingOptions = EntityRenderingOptions() + ) + + val successCounter = statsReceiver.counter("success") + val failuresCounter = statsReceiver.counter("failures") + val failuresScope = statsReceiver.scope("failures") + + thriftTweetToApiTweet + .arrow(thriftTweetToApiTweetRequest) + .onSuccess(_ => successCounter.incr()) + .onFailure { t => + failuresCounter.incr() + failuresScope.counter(Throwables.mkString(t): _*).incr() + } + .map((resp: ThriftTweetToApiTweetResponse) => { + val prefetchedData: PrefetchedData = prefetchedMapper.getPrefetchedData( + tweetId = request.tweet.id, + apiTweet = Some(resp.apiTweet), + // since ApiTweet was hydrate, we can fabricate a TweetResult.Tweet + tweetResult = Some(TweetResult.Tweet(request.tweet.id)), + unmentionInfo = request.unmentionInfo, + editControl = request.tweet.editControl, + previousCounts = request.tweet.previousCounts, + vibe = request.vibe, + editPerspective = request.tweet.editPerspective, + noteTweet = request.tweet.noteTweet + ) + + // Notify GraphQL API to not attempt hydration for missing + // ApiTweet/TweetResult fields. This is only needed on the + // Tweet write path since the newly created Tweet may not + // be fully persisted yet in tbird Manhattan. + val shortCircuitedPrefetchedData = prefetchedData.copy( + onCacheMiss = CacheMissStrategy.ShortCircuitExisting + ) + + PrefetchedDataResponse(shortCircuitedPrefetchedData) + }) + } +} + +private[federated] object PrefetchedDataRepositoryBuilder { + def apply( + getUserResultsById: Gizmoduck.GetById, + statsReceiver: StatsReceiver + ): PrefetchedDataRequest => Stitch[PrefetchedDataResponse] = { + val repoStats = statsReceiver.scope("repositories") + + case class GetUserResultById( + queryFields: Set[QueryFields], + lookupContext: LookupContext, + ) extends SeqGroup[UserId, UserResult] { + override def run(keys: Seq[UserId]): Future[Seq[Try[UserResult]]] = + LegacySeqGroup.liftToSeqTry(getUserResultsById((lookupContext, keys, queryFields))) + + override def maxSize: Int = 100 + } + + val stitchGetUserResultById: UserRepository.GetUserResultById = + (userId: UserId, queryFields: Set[QueryFields], lookupContext: LookupContext) => + Stitch.call(userId, GetUserResultById(queryFields, lookupContext)) + + val userRepository = new UserRepository(stitchGetUserResultById, repoStats) + + // Note, this is weaverbird.common.GetRequestContext + val getRequestContext = new GetRequestContext() + + // TwiggyUserHydrator is needed to hydrate TwiggyUsers for CWC and misc. logic + val twiggyUserHydrator = new TwiggyUserHydrator(userRepository, getRequestContext) + + val weaverbirdMutations = new WeaverbirdTweetMutations( + new WeaverbirdEntitySetMutations( + new PerTOOAppCallerStats(statsReceiver, getRequestContext) + ) + ) + + val prefetchedMapper = new ApiTweetPrefetchedMapper( + // do not skip this in mutation path as we depends on it + skipTweetResultPrefetchItem = () => false + ) + + val thriftTweetToApiTweet: ThriftTweetToApiTweet = + new FoundThriftTweetToApiTweet( + statsReceiver, + twiggyUserHydrator, + weaverbirdMutations + ) + PrefetchedDataRepository( + thriftTweetToApiTweet, + prefetchedMapper, + repoStats.scope("prefetched_data_repo") + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/BUILD new file mode 100644 index 000000000..f0ed3efd0 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/BUILD @@ -0,0 +1,18 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "ads-common/loggingclient/src/main/scala", + "src/scala/com/twitter/ads/internal/pcl/service", + "src/scala/com/twitter/ads/internal/pcl/strato_adaptor", + "src/thrift/com/twitter/ads/adserver:ads_shared_types-scala", + "src/thrift/com/twitter/ads/callback:engagement_request-scala", + "src/thrift/com/twitter/ads/internal/pcl:promoted_content_input-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet_service_graphql-scala", + "strato/src/main/scala/com/twitter/strato/server/context", + "twitter-context/src/main/scala", + "util/util-stats/src/main/scala/com/twitter/finagle/stats", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/TweetPromotedContentLogger.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/TweetPromotedContentLogger.scala new file mode 100644 index 000000000..f3a285d65 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/TweetPromotedContentLogger.scala @@ -0,0 +1,40 @@ +package com.twitter.tweetypie +package federated +package promotedcontent + +import com.twitter.ads.callback.thriftscala.EngagementRequest +import com.twitter.ads.internal.pcl.service.CallbackPromotedContentLogger +import com.twitter.ads.internal.pcl.strato_adaptor.PromotedContentInputProvider +import com.twitter.ads.internal.pcl.thriftscala.PromotedContentInput +import com.twitter.adserver.thriftscala.EngagementType +import com.twitter.util.Future + +object TweetPromotedContentLogger { + sealed abstract class TweetEngagementType(val engagementType: EngagementType) + case object TweetEngagement extends TweetEngagementType(EngagementType.Send) + case object ReplyEngagement extends TweetEngagementType(EngagementType.Reply) + case object RetweetEngagement extends TweetEngagementType(EngagementType.Retweet) + + type Type = (EngagementRequest, TweetEngagementType, Boolean) => Future[Unit] + + private[this] val TwitterContext = + com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) + + def apply(callbackPromotedContentLogger: CallbackPromotedContentLogger): Type = + ( + engagementRequest: EngagementRequest, + tweetEngagementType: TweetEngagementType, + isDark: Boolean + ) => { + val pci: PromotedContentInput = + PromotedContentInputProvider(TwitterContext, engagementRequest) + + // The real logging is fire-and-forget, so we can create the Future and ignore returning it. + Future.when(!isDark) { + callbackPromotedContentLogger.logNonTrendEngagement( + pci, + tweetEngagementType.engagementType, + pci.impressionId) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/BUILD new file mode 100644 index 000000000..0bf98375c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/BUILD @@ -0,0 +1,43 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "decider", + "finagle/finagle-base-http/src/main", + "finatra-internal/api11/src/main/scala/com/twitter/finatra/api11:errors", + "geo/model/src/main/scala/com/twitter/geo/model", + "passbird/bitfields-thrift/src/main/thrift:thrift-scala", + "tweetypie/servo/util/src/main/scala", + "tweetypie/servo/util/src/main/scala:exception", + "src/scala/com/twitter/accounts/util:safety-meta", + "src/thrift/com/twitter/ads/adserver:ad_engagement_details-scala", + "src/thrift/com/twitter/ads/adserver:preroll_metadata-scala", + "src/thrift/com/twitter/ads/callback:engagement_request-scala", + "src/thrift/com/twitter/bouncer:bounce-action-thrift-scala", + "src/thrift/com/twitter/context:twitter-context-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet_service_graphql-scala", + "stitch/stitch-core/src/main/scala/com/twitter/stitch", + "strato/config/src/thrift/com/twitter/strato/graphql:api-media-graphql-scala", + "strato/config/src/thrift/com/twitter/strato/graphql:graphql-scala", + "strato/config/src/thrift/com/twitter/strato/graphql:topics-graphql-scala", + "strato/src/main/scala/com/twitter/strato/client", + "strato/src/main/scala/com/twitter/strato/context", + "strato/src/main/scala/com/twitter/strato/fed", + "strato/src/main/scala/com/twitter/strato/response", + "strato/src/main/scala/com/twitter/strato/test/config/bouncer", + "strato/src/main/scala/com/twitter/strato/thrift", + "strato/src/main/thrift/com/twitter/strato/context:thrift-scala", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/service", + "tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "twitter-context/src/main/scala", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/StratoCatalogWarmups.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/StratoCatalogWarmups.scala new file mode 100644 index 000000000..a020bdd3e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/StratoCatalogWarmups.scala @@ -0,0 +1,140 @@ +package com.twitter.tweetypie +package federated +package warmups + +import com.twitter.context.TwitterContext +import com.twitter.context.thriftscala.Viewer +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.Stitch +import com.twitter.strato.access.Access +import com.twitter.strato.access.Access.AccessToken +import com.twitter.strato.access.Access.AuthenticatedTwitterUserId +import com.twitter.strato.access.Access.AuthenticatedTwitterUserNotSuspended +import com.twitter.strato.access.Access.TwitterUserId +import com.twitter.strato.access.Access.TwitterUserNotSuspended +import com.twitter.strato.catalog.Ops +import com.twitter.strato.client.StaticClient +import com.twitter.strato.context.StratoContext +import com.twitter.strato.opcontext.DarkRequest +import com.twitter.strato.opcontext.OpContext +import com.twitter.strato.test.config.bouncer.TestPrincipals +import com.twitter.strato.thrift.ScroogeConvImplicits._ +import com.twitter.tweetypie.federated.columns.CreateRetweetColumn +import com.twitter.tweetypie.federated.columns.CreateTweetColumn +import com.twitter.tweetypie.federated.columns.DeleteTweetColumn +import com.twitter.tweetypie.federated.columns.UnretweetColumn +import com.twitter.tweetypie.service.WarmupQueriesSettings +import com.twitter.tweetypie.thriftscala.graphql._ +import com.twitter.util.logging.Logger +import com.twitter.util.Future +import com.twitter.util.Stopwatch + +object StratoCatalogWarmups { + private[this] val log = Logger(getClass) + + // Performs warmup queries, failing after 30 seconds + def warmup( + warmupSettings: WarmupQueriesSettings, + catalog: PartialFunction[String, Ops] + ): Future[Unit] = { + val elapsed = Stopwatch.start() + // note: we need to supply bouncer principals here, because the + // columns are gated by a bouncer policy + Access + .withPrincipals(WarmupPrincipals) { + StratoContext.withOpContext(WarmupOpContext) { + TwitterContext.let(viewer = WarmupViewer) { + warmupSettings.clientId.asCurrent { + Stitch.run(executeDarkly(catalog)) + } + } + } + } + .onSuccess { _ => log.info("warmup completed in %s".format(elapsed())) } + .onFailure { t => log.error("could not complete warmup queries before startup.", t) } + } + + private val WarmupTwitterUserId = 0L + + private val WarmupPrincipals = Set( + TestPrincipals.normalStratoBouncerAccessPrincipal, + AuthenticatedTwitterUserId(WarmupTwitterUserId), + TwitterUserId(WarmupTwitterUserId), + TwitterUserNotSuspended, + AuthenticatedTwitterUserNotSuspended, + AccessToken(isWritable = true) + ) + + private[this] val RwebClientId = 0L + + private[this] val WarmupViewer = Viewer( + userId = Some(WarmupTwitterUserId), + authenticatedUserId = Some(WarmupTwitterUserId), + clientApplicationId = Some(RwebClientId), + ) + + private[this] val WarmupOpContext = + OpContext + .safetyLevel(SafetyLevel.TweetWritesApi.name) + .copy(darkRequest = Some(DarkRequest())) + .toThrift() + + private[this] val EllenOscarSelfie = 440322224407314432L + + private[this] val TwitterContext: TwitterContext = + com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) + + private[this] def executeDarkly(catalog: PartialFunction[String, Ops]): Stitch[Unit] = { + val stratoClient = new StaticClient(catalog) + val tweetCreator = + stratoClient.executer[CreateTweetRequest, CreateTweetResponseWithSubqueryPrefetchItems]( + CreateTweetColumn.Path) + + val tweetDeletor = + stratoClient + .executer[DeleteTweetRequest, DeleteTweetResponseWithSubqueryPrefetchItems]( + DeleteTweetColumn.Path) + + val retweetCreator = + stratoClient + .executer[CreateRetweetRequest, CreateRetweetResponseWithSubqueryPrefetchItems]( + CreateRetweetColumn.Path) + + val unretweetor = + stratoClient + .executer[UnretweetRequest, UnretweetResponseWithSubqueryPrefetchItems]( + UnretweetColumn.Path) + + val stitchCreateTweet = + tweetCreator + .execute(CreateTweetRequest("getting warmer")) + .onSuccess(_ => log.info(s"${CreateTweetColumn.Path} warmup success")) + .onFailure(e => log.info(s"${CreateTweetColumn.Path} warmup fail: $e")) + + val stitchDeleteTweet = + tweetDeletor + .execute(DeleteTweetRequest(-1L)) + .onSuccess(_ => log.info(s"${DeleteTweetColumn.Path} warmup success")) + .onFailure(e => log.info(s"${DeleteTweetColumn.Path} warmup fail: $e")) + + val stitchCreateRetweet = + retweetCreator + .execute(CreateRetweetRequest(EllenOscarSelfie)) + .onSuccess(_ => log.info(s"${CreateRetweetColumn.Path} warmup success")) + .onFailure(e => log.info(s"${CreateRetweetColumn.Path} warmup fail: $e")) + + val stitchUnretweet = + unretweetor + .execute(UnretweetRequest(EllenOscarSelfie)) + .onSuccess(_ => log.info(s"${UnretweetColumn.Path} warmup success")) + .onFailure(e => log.info(s"${UnretweetColumn.Path} warmup fail: $e")) + + Stitch + .join( + stitchCreateTweet, + stitchDeleteTweet, + stitchCreateRetweet, + stitchUnretweet, + ).unit + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/AttachmentBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/AttachmentBuilder.scala new file mode 100644 index 000000000..b9c3c8616 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/AttachmentBuilder.scala @@ -0,0 +1,185 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.tweetutil.DmDeepLink +import com.twitter.tweetutil.TweetPermalink +import com.twitter.tweetypie.core.CardReferenceUriExtractor +import com.twitter.tweetypie.core.NonTombstone +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.thriftscala.CardReference +import com.twitter.tweetypie.thriftscala.DeviceSource +import com.twitter.tweetypie.thriftscala.QuotedTweet +import com.twitter.tweetypie.thriftscala.ShortenedUrl +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.tweetypie.thriftscala.TweetCreateState + +case class AttachmentBuilderRequest( + tweetId: TweetId, + user: User, + mediaUploadIds: Option[Seq[Long]], + cardReference: Option[CardReference], + attachmentUrl: Option[String], + remoteHost: Option[String], + darkTraffic: Boolean, + deviceSource: DeviceSource) { + val ctx: ValidationContext = ValidationContext( + user = user, + mediaUploadIds = mediaUploadIds, + cardReference = cardReference + ) + val passThroughResponse: AttachmentBuilderResult = + AttachmentBuilderResult(attachmentUrl = attachmentUrl, validationContext = ctx) +} + +case class ValidationContext( + user: User, + mediaUploadIds: Option[Seq[Long]], + cardReference: Option[CardReference]) + +case class AttachmentBuilderResult( + attachmentUrl: Option[String] = None, + quotedTweet: Option[QuotedTweet] = None, + extraChars: Int = 0, + validationContext: ValidationContext) + +object AttachmentBuilder { + + private[this] val log = Logger(getClass) + private[this] val attachmentCountLogger = Logger( + "com.twitter.tweetypie.handler.CreateAttachmentCount" + ) + + type Type = FutureArrow[AttachmentBuilderRequest, AttachmentBuilderResult] + type ValidationType = FutureEffect[AttachmentBuilderResult] + + def validateAttachmentUrl(attachmentUrl: Option[String]): Unit.type = + attachmentUrl match { + case None => Unit + case Some(TweetPermalink(_, _)) => Unit + case Some(DmDeepLink(_)) => Unit + case _ => throw TweetCreateFailure.State(TweetCreateState.InvalidAttachmentUrl) + } + + def validateAttachments( + stats: StatsReceiver, + validateCardRef: Gate[Option[String]] + ): AttachmentBuilder.ValidationType = + FutureEffect { result: AttachmentBuilderResult => + validateAttachmentUrl(result.attachmentUrl) + + val ctx = result.validationContext + + val cardRef = ctx.cardReference.filter { + case CardReferenceUriExtractor(NonTombstone(_)) => true + case _ => false + } + + if (result.quotedTweet.isDefined && cardRef.isEmpty) { + Future.Unit + } else { + val attachmentCount = + Seq( + ctx.mediaUploadIds, + result.attachmentUrl, + result.quotedTweet + ).count(_.nonEmpty) + + val userAgent = TwitterContext().flatMap(_.userAgent) + if (attachmentCount + cardRef.count(_ => true) > 1) { + attachmentCountLogger.warn( + s"Too many attachment types on tweet create from user: ${ctx.user.id}, " + + s"agent: '${userAgent}', media: ${ctx.mediaUploadIds}, " + + s"attachmentUrl: ${result.attachmentUrl}, cardRef: $cardRef" + ) + stats.counter("too_many_attachment_types_with_cardref").incr() + } + Future.when(attachmentCount + cardRef.count(_ => validateCardRef(userAgent)) > 1) { + Future.exception(TweetCreateFailure.State(TweetCreateState.TooManyAttachmentTypes)) + } + } + } + + private val queryInclude = TweetQuery.Include(Set(Tweet.CoreDataField.id)) + + private val queryOptions = TweetQuery.Options(include = queryInclude) + + def buildUrlShortenerCtx(request: AttachmentBuilderRequest): UrlShortener.Context = + UrlShortener.Context( + tweetId = request.tweetId, + userId = request.user.id, + createdAt = SnowflakeId(request.tweetId).time, + userProtected = request.user.safety.get.isProtected, + clientAppId = request.deviceSource.clientAppId, + remoteHost = request.remoteHost, + dark = request.darkTraffic + ) + + def asQuotedTweet(tweet: Tweet, shortenedUrl: ShortenedUrl): QuotedTweet = + getShare(tweet) match { + case None => QuotedTweet(tweet.id, getUserId(tweet), Some(shortenedUrl)) + case Some(share) => QuotedTweet(share.sourceStatusId, share.sourceUserId, Some(shortenedUrl)) + } + + def tweetPermalink(request: AttachmentBuilderRequest): Option[TweetPermalink] = + request.attachmentUrl.collectFirst { + // prevent tweet-quoting cycles + case TweetPermalink(screenName, quotedTweetId) if request.tweetId > quotedTweetId => + TweetPermalink(screenName, quotedTweetId) + } + + def apply( + tweetRepo: TweetRepository.Optional, + urlShortener: UrlShortener.Type, + validateAttachments: AttachmentBuilder.ValidationType, + stats: StatsReceiver, + denyNonTweetPermalinks: Gate[Unit] = Gate.False + ): Type = { + val tweetGetter = TweetRepository.tweetGetter(tweetRepo, queryOptions) + val attachmentNotPermalinkCounter = stats.counter("attachment_url_not_tweet_permalink") + val quotedTweetFoundCounter = stats.counter("quoted_tweet_found") + val quotedTweetNotFoundCounter = stats.counter("quoted_tweet_not_found") + + def buildAttachmentResult(request: AttachmentBuilderRequest) = + tweetPermalink(request) match { + case Some(qtPermalink) => + tweetGetter(qtPermalink.tweetId).flatMap { + case Some(tweet) => + quotedTweetFoundCounter.incr() + val ctx = buildUrlShortenerCtx(request) + urlShortener((qtPermalink.url, ctx)).map { shortenedUrl => + AttachmentBuilderResult( + quotedTweet = Some(asQuotedTweet(tweet, shortenedUrl)), + extraChars = shortenedUrl.shortUrl.length + 1, + validationContext = request.ctx + ) + } + case None => + quotedTweetNotFoundCounter.incr() + log.warn( + s"unable to extract quote tweet from attachment builder request: $request" + ) + if (denyNonTweetPermalinks()) { + throw TweetCreateFailure.State( + TweetCreateState.SourceTweetNotFound, + Some(s"quoted tweet is not found from given permalink: $qtPermalink") + ) + } else { + Future.value(request.passThroughResponse) + } + } + case _ => + attachmentNotPermalinkCounter.incr() + Future.value(request.passThroughResponse) + } + + FutureArrow { request => + for { + result <- buildAttachmentResult(request) + () <- validateAttachments(result) + } yield result + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/BUILD new file mode 100644 index 000000000..2475b5f1a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/BUILD @@ -0,0 +1,88 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/joda-time", + "3rdparty/jvm/org/apache/thrift:libthrift", + "3rdparty/jvm/org/geotools:gt-referencing", + "3rdparty/jvm/org/locationtech/spatial4j", + "compliance/user-consent/src/main/scala/com/twitter/userconsent/compliance/birthdate", + "creatives-container/thrift/src/main/thrift:creatives-container-service-scala", + "diffshow", + "eventbus/client", + "featureswitches/featureswitches-core/src/main/scala", + "finatra/inject/inject-slf4j/src/main/scala/com/twitter/inject", + "flock-client", + "flock-client/src/main/thrift:thrift-scala", + "geoduck/service/src/main/scala/com/twitter/geoduck/service/common/clientmodules", + "geoduck/util/src/main/scala/com/twitter/geoduck/util/primitives", + "geoduck/util/src/main/scala/com/twitter/geoduck/util/service", + "gizmoduck/common/src/main/scala/com/twitter/gizmoduck/util:scala", + "mediaservices/commons/src/main/thrift:thrift-scala", + "scrooge-internal/scrooge-schema/src/main/scala/com/twitter/scrooge/schema", + "scrooge-internal/scrooge-schema/src/main/scala/com/twitter/scrooge/schema/scrooge/scala", + "scrooge-internal/scrooge-schema/src/main/scala/com/twitter/scrooge/schema/tree", + "scrooge-internal/src/main/scala/com/twitter/scrooge_internal/linter/known_annotations", + "scrooge/scrooge-core", + "tweetypie/servo/repo", + "tweetypie/servo/util", + "snowflake:id", + "src/scala/com/twitter/takedown/util", + "src/thrift/com/twitter/botmaker:botmaker-scala", + "src/thrift/com/twitter/bouncer:bounce-action-thrift-scala", + "src/thrift/com/twitter/context:testing-signals-scala", + "src/thrift/com/twitter/context:twitter-context-scala", + "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", + "src/thrift/com/twitter/expandodo:only-scala", + "src/thrift/com/twitter/geoduck:geoduck-scala", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/gizmoduck:user-thrift-scala", + "src/thrift/com/twitter/gizmoduck:user-type-thrift-scala", + "src/thrift/com/twitter/relevance/feature_store:feature_store-scala", + "src/thrift/com/twitter/service/scarecrow/gen:scarecrow-scala", + "src/thrift/com/twitter/service/scarecrow/gen:tiered-actions-scala", + "src/thrift/com/twitter/service/talon/gen:thrift-scala", + "src/thrift/com/twitter/servo:servo-exception-scala", + "src/thrift/com/twitter/spam/features:safety-meta-data-scala", + "src/thrift/com/twitter/spam/rtf:safety-level-scala", + "src/thrift/com/twitter/spam/rtf:tweet-rtf-event-scala", + "src/thrift/com/twitter/timelineservice/server/internal:thrift-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:audit-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:delete_location_data-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:stored-tweet-info-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-core", + "tco-util", + "tweet-util/src/main/scala", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/media", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/repository", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/store", + "tweetypie/server/src/main/thrift:compiled-scala", + "tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields", + "tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie", + "tweetypie/common/src/scala/com/twitter/tweetypie/media", + "tweetypie/common/src/scala/com/twitter/tweetypie/storage", + "tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities", + "tweetypie/common/src/scala/com/twitter/tweetypie/tweettext", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "twitter-context", + "twitter-text/lib/java/src/main/java/com/twitter/twittertext", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + "util/util-stats", + "visibility/common/src/main/scala/com/twitter/visibility/common", + "visibility/lib/src/main/scala/com/twitter/visibility/builder", + "visibility/lib/src/main/scala/com/twitter/visibility/generators", + "visibility/lib/src/main/scala/com/twitter/visibility/models", + "visibility/writer/src/main/scala/com/twitter/visibility/writer", + "visibility/writer/src/main/scala/com/twitter/visibility/writer/interfaces/tweets", + "visibility/writer/src/main/scala/com/twitter/visibility/writer/models", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardReferenceValidationHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardReferenceValidationHandler.scala new file mode 100644 index 000000000..5a04c611f --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardReferenceValidationHandler.scala @@ -0,0 +1,74 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.expandodo.thriftscala.AttachmentEligibilityResponses +import com.twitter.expandodo.{thriftscala => expandodo} +import com.twitter.tweetypie.backends.Expandodo +import com.twitter.twittertext.Extractor +import scala.util.control.NoStackTrace +import scala.util.control.NonFatal +import java.net.URI + +object CardReferenceValidationFailedException extends Exception with NoStackTrace + +object CardReferenceValidationHandler { + type Type = FutureArrow[(UserId, CardUri), CardUri] + + def apply(checkEligibility: Expandodo.CheckAttachmentEligibility): Type = { + def validateAttachmentForUser(userId: UserId, cardUri: CardUri): Future[CardUri] = { + val request = Seq(expandodo.AttachmentEligibilityRequest(cardUri, userId)) + checkEligibility(request) + .flatMap(validatedCardUri) + .rescue { + case NonFatal(_) => Future.exception(CardReferenceValidationFailedException) + } + } + + FutureArrow { + case (userId, cardUri) => + if (shouldSkipValidation(cardUri)) { + Future.value(cardUri) + } else { + validateAttachmentForUser(userId, cardUri) + } + } + } + + private[this] def validatedCardUri(responses: AttachmentEligibilityResponses) = { + responses.results.headOption match { + case Some( + expandodo.AttachmentEligibilityResult + .Success(expandodo.ValidCardUri(validatedCardUri)) + ) => + Future.value(validatedCardUri) + case _ => + Future.exception(CardReferenceValidationFailedException) + } + } + + // We're not changing state between calls, so it's safe to share among threads + private[this] val extractor = { + val extractor = new Extractor + extractor.setExtractURLWithoutProtocol(false) + extractor + } + + // Card References with these URIs don't need validation since cards referenced by URIs in these + // schemes are public and hence not subject to restrictions. + private[handler] val isWhitelistedSchema = Set("http", "https", "tombstone") + + // NOTE: http://www.ietf.org/rfc/rfc2396.txt + private[this] def hasWhitelistedScheme(cardUri: CardUri) = + Try(new URI(cardUri)).toOption + .map(_.getScheme) + .exists(isWhitelistedSchema) + + // Even though URI spec is technically is a superset of http:// and https:// URLs, we have to + // resort to using a Regex based parser here as a fallback because many URLs found in the wild + // have unescaped components that would fail java.net.URI parsing, yet are still considered acceptable. + private[this] def isTwitterUrlEntity(cardUri: CardUri) = + extractor.extractURLs(cardUri).size == 1 + + private[this] def shouldSkipValidation(cardUri: CardUri) = + hasWhitelistedScheme(cardUri) || isTwitterUrlEntity(cardUri) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardUsersFinder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardUsersFinder.scala new file mode 100644 index 000000000..da483cef5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardUsersFinder.scala @@ -0,0 +1,52 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.CardReferenceUriExtractor +import com.twitter.tweetypie.core.NonTombstone +import com.twitter.tweetypie.core.Tombstone +import com.twitter.tweetypie.repository.CardUsersRepository +import com.twitter.tweetypie.repository.CardUsersRepository.Context +import com.twitter.tweetypie.thriftscala.CardReference + +/** + * Finds a set of UserId that may be mentioned when replying to a tweet that has a card. + * + * Replies created without 'auto_populate_reply_metadata' include both 'site' and 'author' users to + * have a more exhaustive list of mentions to match against. This is needed because iOS and Android + * have had different implementations client-side for years. + */ +object CardUsersFinder { + + case class Request( + cardReference: Option[CardReference], + urls: Seq[String], + perspectiveUserId: UserId) { + val uris: Seq[String] = cardReference match { + case Some(CardReferenceUriExtractor(cardUri)) => + cardUri match { + case NonTombstone(uri) => Seq(uri) + case Tombstone => Nil + } + case _ => urls + } + + val context: CardUsersRepository.Context = Context(perspectiveUserId) + } + + type Type = Request => Stitch[Set[UserId]] + + /** + * From a card-related arguments in [[Request]] select the set of user ids associated with the + * card. + * + * Note that this uses the same "which card do I use?" logic from Card2Hydrator which + * prioritizes CardReferenceUri and then falls back to the last resolvable (non-None) url entity. + */ + def apply(cardUserRepo: CardUsersRepository.Type): Type = + request => + Stitch + .traverse(request.uris) { uri => cardUserRepo(uri, request.context) } + // select the last, non-None Set of users ids + .map(r => r.flatten.reverse.headOption.getOrElse(Set.empty)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CollabControlBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CollabControlBuilder.scala new file mode 100644 index 000000000..058bcbce5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CollabControlBuilder.scala @@ -0,0 +1,109 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.thriftscala.CollabControl +import com.twitter.tweetypie.thriftscala.CollabControlOptions +import com.twitter.tweetypie.thriftscala.CollabInvitation +import com.twitter.tweetypie.thriftscala.CollabInvitationOptions +import com.twitter.tweetypie.thriftscala.CollabInvitationStatus +import com.twitter.tweetypie.thriftscala.CollabTweet +import com.twitter.tweetypie.thriftscala.CollabTweetOptions +import com.twitter.tweetypie.thriftscala.Communities +import com.twitter.tweetypie.thriftscala.ExclusiveTweetControl +import com.twitter.tweetypie.thriftscala.InvitedCollaborator +import com.twitter.tweetypie.thriftscala.TrustedFriendsControl +import com.twitter.tweetypie.thriftscala.TweetCreateConversationControl +import com.twitter.tweetypie.thriftscala.TweetCreateState.CollabTweetInvalidParams +import com.twitter.tweetypie.util.CommunityUtil + +object CollabControlBuilder { + type Type = Request => Future[Option[CollabControl]] + + case class Request( + collabControlOptions: Option[CollabControlOptions], + replyResult: Option[ReplyBuilder.Result], + communities: Option[Communities], + trustedFriendsControl: Option[TrustedFriendsControl], + conversationControl: Option[TweetCreateConversationControl], + exclusiveTweetControl: Option[ExclusiveTweetControl], + userId: UserId) + + def apply(): Type = { request => + val collabControl = convertToCollabControl(request.collabControlOptions, request.userId) + + validateCollabControlParams( + collabControl, + request.replyResult, + request.communities, + request.trustedFriendsControl, + request.conversationControl, + request.exclusiveTweetControl, + request.userId + ) map { _ => collabControl } + } + + def convertToCollabControl( + collabTweetOptions: Option[CollabControlOptions], + authorId: UserId + ): Option[CollabControl] = { + collabTweetOptions flatMap { + case CollabControlOptions.CollabInvitation( + collabInvitationOptions: CollabInvitationOptions) => + Some( + CollabControl.CollabInvitation( + CollabInvitation( + invitedCollaborators = collabInvitationOptions.collaboratorUserIds.map(userId => { + InvitedCollaborator( + collaboratorUserId = userId, + collabInvitationStatus = + if (userId == authorId) + CollabInvitationStatus.Accepted + else CollabInvitationStatus.Pending + ) + }) + ) + ) + ) + case CollabControlOptions.CollabTweet(collabTweetOptions: CollabTweetOptions) => + Some( + CollabControl.CollabTweet( + CollabTweet( + collaboratorUserIds = collabTweetOptions.collaboratorUserIds + ) + ) + ) + case _ => None + } + } + + def validateCollabControlParams( + collabControl: Option[CollabControl], + replyResult: Option[ReplyBuilder.Result], + communities: Option[Communities], + trustedFriendsControl: Option[TrustedFriendsControl], + conversationControl: Option[TweetCreateConversationControl], + exclusiveTweetControl: Option[ExclusiveTweetControl], + userId: UserId + ): Future[Unit] = { + val isInReplyToTweet = replyResult.exists(_.reply.inReplyToStatusId.isDefined) + + collabControl match { + case Some(_: CollabControl) + if (isInReplyToTweet || + CommunityUtil.hasCommunity(communities) || + exclusiveTweetControl.isDefined || + trustedFriendsControl.isDefined || + conversationControl.isDefined) => + Future.exception(TweetCreateFailure.State(CollabTweetInvalidParams)) + case Some(CollabControl.CollabInvitation(collab_invitation)) + if collab_invitation.invitedCollaborators.head.collaboratorUserId != userId => + Future.exception(TweetCreateFailure.State(CollabTweetInvalidParams)) + case Some(CollabControl.CollabTweet(collab_tweet)) + if collab_tweet.collaboratorUserIds.head != userId => + Future.exception(TweetCreateFailure.State(CollabTweetInvalidParams)) + case _ => + Future.Unit + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CommunitiesValidator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CommunitiesValidator.scala new file mode 100644 index 000000000..220a6e1dd --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CommunitiesValidator.scala @@ -0,0 +1,40 @@ +package com.twitter.tweetypie.handler + +import com.twitter.featureswitches.v2.FeatureSwitchResults +import com.twitter.servo.util.Gate +import com.twitter.tweetypie.Future +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.thriftscala.Communities +import com.twitter.tweetypie.thriftscala.TweetCreateState.CommunityProtectedUserCannotTweet +import com.twitter.tweetypie.util.CommunityUtil + +object CommunitiesValidator { + case class Request( + matchedResults: Option[FeatureSwitchResults], + isProtected: Boolean, + community: Option[Communities]) + + type Type = Request => Future[Unit] + + val CommunityProtectedCanCreateTweet = "communities_protected_community_tweet_creation_enabled" + + val communityProtectedCanCreateTweetGate: Gate[Request] = Gate { request: Request => + request.matchedResults + .flatMap(_.getBoolean(CommunityProtectedCanCreateTweet, shouldLogImpression = true)) + .contains(false) + } + + def apply(): Type = + (request: Request) => { + // Order is important: the feature-switch gate is checked only when the + // request is both protected & community so that the FS experiment measurements + // are based only on data from requests that are subject to rejection by this validator. + if (request.isProtected && + CommunityUtil.hasCommunity(request.community) && + communityProtectedCanCreateTweetGate(request)) { + Future.exception(TweetCreateFailure.State(CommunityProtectedUserCannotTweet)) + } else { + Future.Unit + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ConversationControlBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ConversationControlBuilder.scala new file mode 100644 index 000000000..6eeea01f9 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ConversationControlBuilder.scala @@ -0,0 +1,272 @@ +package com.twitter.tweetypie.handler + +import com.twitter.featureswitches.v2.FeatureSwitchResults +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.UserId +import com.twitter.tweetypie._ +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.repository.UserIdentityRepository +import com.twitter.tweetypie.repository.UserKey +import com.twitter.tweetypie.thriftscala.ConversationControl +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.tweetypie.thriftscala.TweetCreateConversationControl +import com.twitter.tweetypie.thriftscala.TweetCreateState.ConversationControlNotAllowed +import com.twitter.tweetypie.thriftscala.TweetCreateState.InvalidConversationControl +import com.twitter.tweetypie.util.ConversationControls +import com.twitter.util.logging.Logging + +/** + * Process request parameters into a ConversationControl value. + */ +object ConversationControlBuilder extends Logging { + type Type = Request => Stitch[Option[ConversationControl]] + + type ScreenName = String + + /** + * The fields necessary to create a [[ConversationControl]]. + * + * This is a trait rather than a case class to avoid running the + * code to extract the mentions in the cases where handling the + * request doesn't need to use them (the common case where + * tweetCreateConversationControl is None). + */ + trait Request { + def tweetCreateConversationControl: Option[TweetCreateConversationControl] + def tweetAuthorId: UserId + def mentionedUserScreenNames: Set[String] + + def noteTweetMentionedUserIds: Option[Set[Long]] + } + + object Request { + + /** + * Extract the data necessary to create a [[ConversationControl]] + * for a new [[Tweet]]. This is intended for use when creating + * Tweets. It must be called after the Tweet has had its entities + * extracted. + */ + def fromTweet( + tweet: Tweet, + tweetCreateConversationControl: Option[TweetCreateConversationControl], + noteTweetMentionedUserIdsList: Option[Seq[Long]] + ): Request = { + val cctl = tweetCreateConversationControl + new Request { + def tweetCreateConversationControl: Option[TweetCreateConversationControl] = cctl + def mentionedUserScreenNames: Set[ScreenName] = + tweet.mentions + // Enforce that the Tweet's mentions have already been + // extracted from the text. (Mentions will be None if they + // have not yet been extracted.) + .getOrElse( + throw new RuntimeException( + "Mentions must be extracted before applying ConversationControls")) + .map(_.screenName) + .toSet + + def tweetAuthorId: UserId = tweet.coreData.get.userId + def noteTweetMentionedUserIds: Option[Set[Long]] = + noteTweetMentionedUserIdsList.map(_.toSet) + } + } + } + + /** + * Create a ConversationControlBuilder that looks up user ids for + * screen names using the specified UserIdentityRepository. + */ + def fromUserIdentityRepo( + statsReceiver: StatsReceiver, + userIdentityRepo: UserIdentityRepository.Type + ): Request => Stitch[Option[ConversationControl]] = + ConversationControlBuilder( + getUserId = screenName => userIdentityRepo(UserKey.byScreenName(screenName)).map(_.id), + statsReceiver = statsReceiver + ) + + /** + * Extract the inviteViaMention value which does not exist on the TweetCreateConversationControl + * itself but does exist on the structures it unions. + */ + def inviteViaMention(tccc: TweetCreateConversationControl): Boolean = + tccc match { + case TweetCreateConversationControl.ByInvitation(c) => c.inviteViaMention.contains(true) + case TweetCreateConversationControl.Community(c) => c.inviteViaMention.contains(true) + case TweetCreateConversationControl.Followers(c) => c.inviteViaMention.contains(true) + case _ => false + } + + /** + * Translates the TweetCreateConversationControl into + * ConversationControl using the context from the rest of the tweet + * creation. For the most part, this is just a direct translation, + * plus filling in the contextual user ids (mentioned users and tweet + * author). + */ + def apply( + statsReceiver: StatsReceiver, + getUserId: ScreenName => Stitch[UserId] + ): Request => Stitch[Option[ConversationControl]] = { + val userIdLookupsCounter = statsReceiver.counter("user_id_lookups") + val conversationControlPresentCounter = statsReceiver.counter("conversation_control_present") + val conversationControlInviteViaMentionPresentCounter = + statsReceiver.counter("conversation_control_invite_via_mention_present") + val failureCounter = statsReceiver.counter("failures") + + // Get the user ids for these screen names. Any users who do not + // exist will be silently dropped. + def getExistingUserIds( + screenNames: Set[ScreenName], + mentionedUserIds: Option[Set[Long]] + ): Stitch[Set[UserId]] = { + mentionedUserIds match { + case Some(userIds) => Stitch.value(userIds) + case _ => + Stitch + .traverse(screenNames.toSeq) { screenName => + getUserId(screenName).liftNotFoundToOption + .ensure(userIdLookupsCounter.incr()) + } + .map(userIdOptions => userIdOptions.flatten.toSet) + } + } + + // This is broken out just to make it syntactically nicer to add + // the stats handling + def process(request: Request): Stitch[Option[ConversationControl]] = + request.tweetCreateConversationControl match { + case None => Stitch.None + case Some(cctl) => + cctl match { + case TweetCreateConversationControl.ByInvitation(byInvitationControl) => + for { + invitedUserIds <- getExistingUserIds( + request.mentionedUserScreenNames, + request.noteTweetMentionedUserIds) + } yield Some( + ConversationControls.byInvitation( + invitedUserIds = invitedUserIds.toSeq.filterNot(_ == request.tweetAuthorId), + conversationTweetAuthorId = request.tweetAuthorId, + byInvitationControl.inviteViaMention + ) + ) + + case TweetCreateConversationControl.Community(communityControl) => + for { + invitedUserIds <- getExistingUserIds( + request.mentionedUserScreenNames, + request.noteTweetMentionedUserIds) + } yield Some( + ConversationControls.community( + invitedUserIds = invitedUserIds.toSeq.filterNot(_ == request.tweetAuthorId), + conversationTweetAuthorId = request.tweetAuthorId, + communityControl.inviteViaMention + ) + ) + case TweetCreateConversationControl.Followers(followersControl) => + for { + invitedUserIds <- getExistingUserIds( + request.mentionedUserScreenNames, + request.noteTweetMentionedUserIds) + } yield Some( + ConversationControls.followers( + invitedUserIds = invitedUserIds.toSeq.filterNot(_ == request.tweetAuthorId), + conversationTweetAuthorId = request.tweetAuthorId, + followersControl.inviteViaMention + ) + ) + // This should only ever happen if a new value is added to the + // union and we don't update this code. + case TweetCreateConversationControl.UnknownUnionField(fld) => + throw new RuntimeException(s"Unexpected TweetCreateConversationControl: $fld") + } + } + + (request: Request) => { + // Wrap in Stitch to encapsulate any exceptions that happen + // before making a Stitch call inside of process. + Stitch(process(request)).flatten.respond { response => + // If we count this before doing the work, and the stats are + // collected before the RPC completes, then any failures + // will get counted in a different minute than the request + // that caused it. + request.tweetCreateConversationControl.foreach { cc => + conversationControlPresentCounter.incr() + if (inviteViaMention(cc)) conversationControlInviteViaMentionPresentCounter.incr() + } + + response.onFailure { e => + error(message = "Failed to create conversation control", cause = e) + // Don't bother counting individual exceptions, because + // the cost of keeping those stats is probably not worth + // the convenience of not having to look in the logs. + failureCounter.incr() + } + } + } + } + + /** + * Validates if a conversation control request is allowed by feature switches + * and is only requested on a root tweet. + */ + object Validate { + case class Request( + matchedResults: Option[FeatureSwitchResults], + conversationControl: Option[TweetCreateConversationControl], + inReplyToTweetId: Option[TweetId]) + + type Type = FutureEffect[Request] + + val ExInvalidConversationControl = TweetCreateFailure.State(InvalidConversationControl) + val ExConversationControlNotAllowed = TweetCreateFailure.State(ConversationControlNotAllowed) + val ConversationControlStatusUpdateEnabledKey = "conversation_control_status_update_enabled" + val ConversationControlFollowersEnabledKey = "conversation_control_my_followers_enabled" + + def apply( + useFeatureSwitchResults: Gate[Unit], + statsReceiver: StatsReceiver + ): Type = request => { + def fsDenied(fsKey: String): Boolean = { + val featureEnabledOpt: Option[Boolean] = + // Do not log impressions, which would interfere with shared client experiment data. + request.matchedResults.flatMap(_.getBoolean(fsKey, shouldLogImpression = false)) + val fsEnabled = featureEnabledOpt.contains(true) + if (!fsEnabled) { + statsReceiver.counter(s"check_conversation_control/unauthorized/fs/$fsKey").incr() + } + !fsEnabled + } + + val isCcRequest: Boolean = request.conversationControl.isDefined + + val isCcInvalidParams = isCcRequest && { + val isRootTweet = request.inReplyToTweetId.isEmpty + if (!isRootTweet) { + statsReceiver.counter("check_conversation_control/invalid").incr() + } + !isRootTweet + } + + val isCcDeniedByFs = isCcRequest && { + val isFollower = request.conversationControl.exists { + case _: TweetCreateConversationControl.Followers => true + case _ => false + } + + fsDenied(ConversationControlStatusUpdateEnabledKey) || + (isFollower && fsDenied(ConversationControlFollowersEnabledKey)) + } + + if (isCcDeniedByFs && useFeatureSwitchResults()) { + Future.exception(ExConversationControlNotAllowed) + } else if (isCcInvalidParams) { + Future.exception(ExInvalidConversationControl) + } else { + Future.Unit + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteAdditionalFieldsBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteAdditionalFieldsBuilder.scala new file mode 100644 index 000000000..c6b1fd0e9 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteAdditionalFieldsBuilder.scala @@ -0,0 +1,66 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.repository.UserKey +import com.twitter.tweetypie.repository.UserQueryOptions +import com.twitter.tweetypie.repository.UserRepository +import com.twitter.tweetypie.repository.UserVisibility +import com.twitter.tweetypie.store.AsyncDeleteAdditionalFields +import com.twitter.tweetypie.store.DeleteAdditionalFields +import com.twitter.tweetypie.store.TweetStoreEventOrRetry +import com.twitter.tweetypie.thriftscala.AsyncDeleteAdditionalFieldsRequest +import com.twitter.tweetypie.thriftscala.DeleteAdditionalFieldsRequest + +object DeleteAdditionalFieldsBuilder { + type Type = DeleteAdditionalFieldsRequest => Future[Seq[DeleteAdditionalFields.Event]] + + val tweetQueryOptions = TweetQuery.Options(include = GetTweetsHandler.BaseInclude) + + def apply(tweetRepo: TweetRepository.Type): Type = { + def getTweet(tweetId: TweetId) = + Stitch.run( + tweetRepo(tweetId, tweetQueryOptions) + .rescue(HandlerError.translateNotFoundToClientError(tweetId)) + ) + + request => { + Future.collect( + request.tweetIds.map { tweetId => + getTweet(tweetId).map { tweet => + DeleteAdditionalFields.Event( + tweetId = tweetId, + fieldIds = request.fieldIds, + userId = getUserId(tweet), + timestamp = Time.now + ) + } + } + ) + } + } +} + +object AsyncDeleteAdditionalFieldsBuilder { + type Type = AsyncDeleteAdditionalFieldsRequest => Future[ + TweetStoreEventOrRetry[AsyncDeleteAdditionalFields.Event] + ] + + val userQueryOpts: UserQueryOptions = UserQueryOptions(Set(UserField.Safety), UserVisibility.All) + + def apply(userRepo: UserRepository.Type): Type = { + def getUser(userId: UserId): Future[User] = + Stitch.run( + userRepo(UserKey.byId(userId), userQueryOpts) + .rescue { case NotFound => Stitch.exception(HandlerError.userNotFound(userId)) } + ) + + request => + getUser(request.userId).map { user => + AsyncDeleteAdditionalFields.Event.fromAsyncRequest(request, user) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteLocationDataHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteLocationDataHandler.scala new file mode 100644 index 000000000..34b588a17 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteLocationDataHandler.scala @@ -0,0 +1,62 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.eventbus.client.EventBusPublisher +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.backends.GeoScrubEventStore.GetGeoScrubTimestamp +import com.twitter.tweetypie.thriftscala.DeleteLocationData +import com.twitter.tweetypie.thriftscala.DeleteLocationDataRequest + +/** + * Initiates the process of removing the geo information from a user's + * tweets. + */ +object DeleteLocationDataHandler { + type Type = DeleteLocationDataRequest => Future[Unit] + + def apply( + getLastScrubTime: GetGeoScrubTimestamp, + scribe: DeleteLocationData => Future[Unit], + eventbus: EventBusPublisher[DeleteLocationData] + ): Type = + request => { + // Attempt to bound the time range of the tweets that need to be + // scrubbed by finding the most recent scrub time on record. This + // is an optimization that prevents scrubbing already-scrubbed + // tweets, so it is OK if the value that we find is occasionally + // stale or if the lookup fails. Primarily, this is intended to + // protect against intentional abuse by enqueueing multiple + // delete_location_data events that have to traverse a very long + // timeline. + Stitch + .run(getLastScrubTime(request.userId)) + // If there is no timestamp or the lookup failed, continue with + // an unchanged request. + .handle { case _ => None } + .flatMap { lastScrubTime => + // Due to clock skew, it's possible for the last scrub + // timestamp to be larger than the timestamp from the request, + // but we ignore that so that we keep a faithful record of + // user requests. The execution of such events will end up a + // no-op. + val event = + DeleteLocationData( + userId = request.userId, + timestampMs = Time.now.inMilliseconds, + lastTimestampMs = lastScrubTime.map(_.inMilliseconds) + ) + + Future.join( + Seq( + // Scribe the event so that we can reprocess events if + // there is a bug or operational issue that causes some + // events to be lost. + scribe(event), + // The actual deletion process is handled by the TweetyPie + // geoscrub daemon. + eventbus.publish(event) + ) + ) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DuplicateTweetFinder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DuplicateTweetFinder.scala new file mode 100644 index 000000000..168dde9c6 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DuplicateTweetFinder.scala @@ -0,0 +1,254 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.Stitch +import com.twitter.timelineservice.{thriftscala => tls} +import com.twitter.tweetypie.backends.TimelineService +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.thriftscala.CardReference +import com.twitter.tweetypie.thriftscala.ConversationControl +import com.twitter.tweetypie.thriftscala.ConversationControlByInvitation +import com.twitter.tweetypie.thriftscala.ConversationControlCommunity +import com.twitter.tweetypie.thriftscala.ConversationControlFollowers +import com.twitter.tweetypie.thriftscala.EditControl +import com.twitter.tweetypie.thriftscala.EditOptions +import com.twitter.tweetypie.thriftscala.NoteTweetOptions +import com.twitter.tweetypie.thriftscala.PostTweetRequest +import com.twitter.tweetypie.thriftscala.TweetCreateConversationControl +import com.twitter.tweetypie.util.ConversationControls +import com.twitter.tweetypie.util.EditControlUtil +import com.twitter.util.Time + +/** + * Used at tweet creation time to determine whether the tweet creation + * request should be considered a duplicate of an existing tweet. + */ +object DuplicateTweetFinder { + + /** + * Return the ids of any tweets that are found to be duplicates of + * this request. + */ + type Type = RequestInfo => Future[Option[TweetId]] + + final case class Settings( + // The number of tweets that are loaded from the user's timeline + // for the heuristic duplicate check + numTweetsToCheck: Int, + // The oldest that a tweet can be to still be considered a + // duplicate by the heuristic duplicate check + maxDuplicateAge: Duration) + + // Takes a ConversationControl from a Tweet and converts to the equivalent + // TweetCreateConversationControl. Note: this is a lossy conversion because the + // ConversationControl contains additional data from the Tweet. + def toTweetCreateConversationControl( + conversationControl: ConversationControl + ): TweetCreateConversationControl = + conversationControl match { + case ConversationControl.ByInvitation( + ConversationControlByInvitation(_, _, inviteViaMention)) => + ConversationControls.Create.byInvitation(inviteViaMention) + case ConversationControl.Community(ConversationControlCommunity(_, _, inviteViaMention)) => + ConversationControls.Create.community(inviteViaMention) + case ConversationControl.Followers(ConversationControlFollowers(_, _, inviteViaMention)) => + ConversationControls.Create.followers(inviteViaMention) + case _ => throw new IllegalArgumentException + } + + /** + * The parts of the request that we need in order to perform + * duplicate detection. + */ + final case class RequestInfo( + userId: UserId, + isNarrowcast: Boolean, + isNullcast: Boolean, + text: String, + replyToTweetId: Option[TweetId], + mediaUploadIds: Seq[MediaId], + cardReference: Option[CardReference], + conversationControl: Option[TweetCreateConversationControl], + underlyingCreativesContainer: Option[CreativesContainerId], + editOptions: Option[EditOptions] = None, + noteTweetOptions: Option[NoteTweetOptions] = None) { + + def isDuplicateOf(tweet: Tweet, oldestAcceptableTimestamp: Time): Boolean = { + val createdAt = getTimestamp(tweet) + val isDuplicateText = text == getText(tweet) + val isDuplicateReplyToTweetId = replyToTweetId == getReply(tweet).flatMap(_.inReplyToStatusId) + val isDuplicateMedia = getMedia(tweet).map(_.mediaId) == mediaUploadIds + val isDuplicateCardReference = getCardReference(tweet) == cardReference + val isDuplicateConversationControl = + tweet.conversationControl.map(toTweetCreateConversationControl) == conversationControl + val isDuplicateConversationContainerId = { + tweet.underlyingCreativesContainerId == underlyingCreativesContainer + } + + val isDuplicateIfEditRequest = if (editOptions.isDefined) { + // We do not count an incoming edit request as creating a duplicate tweet if: + // 1) The tweet that is considered a duplicate is a previous version of this tweet OR + // 2) The tweet that is considered a duplicate is otherwise stale. + val tweetEditChain = tweet.editControl match { + case Some(EditControl.Initial(initial)) => + initial.editTweetIds + case Some(EditControl.Edit(edit)) => + edit.editControlInitial.map(_.editTweetIds).getOrElse(Nil) + case _ => Nil + } + val tweetIsAPreviousVersion = + editOptions.map(_.previousTweetId).exists(tweetEditChain.contains) + + val tweetIsStale = EditControlUtil.isLatestEdit(tweet.editControl, tweet.id) match { + case Return(false) => true + case _ => false + } + + !(tweetIsStale || tweetIsAPreviousVersion) + } else { + // If not an edit request, this condition is true as duplication checking is not blocked + true + } + + // Note that this does not prevent you from tweeting the same + // image twice with different text, or the same text twice with + // different images, because if you upload the same media twice, + // we will store two copies of it, each with a different media + // URL and thus different t.co URL, and since the text that + // we're checking here has that t.co URL added to it already, it + // is necessarily different. + // + // We shouldn't have to check the user id or whether it's a + // retweet, because we loaded the tweets from the user's + // (non-retweet) timelines, but it doesn't hurt and protects + // against possible future changes. + (oldestAcceptableTimestamp <= createdAt) && + getShare(tweet).isEmpty && + (getUserId(tweet) == userId) && + isDuplicateText && + isDuplicateReplyToTweetId && + isDuplicateMedia && + isDuplicateCardReference && + isDuplicateConversationControl && + isDuplicateConversationContainerId && + isDuplicateIfEditRequest && + noteTweetOptions.isEmpty // Skip duplicate checks for NoteTweets + } + } + + object RequestInfo { + + /** + * Extract the information relevant to the DuplicateTweetFinder + * from the PostTweetRequest. + */ + def fromPostTweetRequest(req: PostTweetRequest, processedText: String): RequestInfo = + RequestInfo( + userId = req.userId, + isNarrowcast = req.narrowcast.nonEmpty, + isNullcast = req.nullcast, + text = processedText, + replyToTweetId = req.inReplyToTweetId, + mediaUploadIds = req.mediaUploadIds.getOrElse[Seq[MediaId]](Seq.empty), + cardReference = req.additionalFields.flatMap(_.cardReference), + conversationControl = req.conversationControl, + underlyingCreativesContainer = req.underlyingCreativesContainerId, + editOptions = req.editOptions, + noteTweetOptions = req.noteTweetOptions + ) + } + + /** + * Encapsulates the external interactions that we need to do for + * duplicate checking. + */ + trait TweetSource { + def loadTweets(tweetIds: Seq[TweetId]): Future[Seq[Tweet]] + def loadUserTimelineIds(userId: UserId, maxCount: Int): Future[Seq[TweetId]] + def loadNarrowcastTimelineIds(userId: UserId, maxCount: Int): Future[Seq[TweetId]] + } + + object TweetSource { + + /** + * Use the provided services to access tweets. + */ + def fromServices( + tweetRepo: TweetRepository.Optional, + getStatusTimeline: TimelineService.GetStatusTimeline + ): TweetSource = + new TweetSource { + // only fields needed by RequestInfo.isDuplicateOf() + private[this] val tweetQueryOption = + TweetQuery.Options( + TweetQuery.Include( + tweetFields = Set( + Tweet.CoreDataField.id, + Tweet.MediaField.id, + Tweet.ConversationControlField.id, + Tweet.EditControlField.id + ), + pastedMedia = true + ) + ) + + private[this] def loadTimeline(query: tls.TimelineQuery): Future[Seq[Long]] = + getStatusTimeline(Seq(query)).map(_.head.entries.map(_.statusId)) + + override def loadUserTimelineIds(userId: UserId, maxCount: Int): Future[Seq[Long]] = + loadTimeline( + tls.TimelineQuery( + timelineType = tls.TimelineType.User, + timelineId = userId, + maxCount = maxCount.toShort + ) + ) + + override def loadNarrowcastTimelineIds(userId: UserId, maxCount: Int): Future[Seq[Long]] = + loadTimeline( + tls.TimelineQuery( + timelineType = tls.TimelineType.Narrowcasted, + timelineId = userId, + maxCount = maxCount.toShort + ) + ) + + override def loadTweets(tweetIds: Seq[TweetId]): Future[Seq[Tweet]] = + if (tweetIds.isEmpty) { + Future.value(Seq[Tweet]()) + } else { + Stitch + .run( + Stitch.traverse(tweetIds) { tweetId => tweetRepo(tweetId, tweetQueryOption) } + ) + .map(_.flatten) + } + } + } + + def apply(settings: Settings, tweetSource: TweetSource): Type = { reqInfo => + if (reqInfo.isNullcast) { + // iff nullcast, we bypass duplication logic all together + Future.None + } else { + val oldestAcceptableTimestamp = Time.now - settings.maxDuplicateAge + val userTweetIdsFut = + tweetSource.loadUserTimelineIds(reqInfo.userId, settings.numTweetsToCheck) + + // Check the narrowcast timeline iff this is a narrowcasted tweet + val narrowcastTweetIdsFut = + if (reqInfo.isNarrowcast) { + tweetSource.loadNarrowcastTimelineIds(reqInfo.userId, settings.numTweetsToCheck) + } else { + Future.value(Seq.empty) + } + + for { + userTweetIds <- userTweetIdsFut + narrowcastTweetIds <- narrowcastTweetIdsFut + candidateTweets <- tweetSource.loadTweets(userTweetIds ++ narrowcastTweetIds) + } yield candidateTweets.find(reqInfo.isDuplicateOf(_, oldestAcceptableTimestamp)).map(_.id) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditControlBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditControlBuilder.scala new file mode 100644 index 000000000..d3baa0ae0 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditControlBuilder.scala @@ -0,0 +1,361 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.expandodo.thriftscala.Card2RequestOptions +import com.twitter.featureswitches.v2.FeatureSwitchResults +import com.twitter.gizmoduck.util.UserUtil +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.repository.Card2Repository +import com.twitter.tweetypie.repository.StratoPromotedTweetRepository +import com.twitter.tweetypie.repository.StratoSubscriptionVerificationRepository +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.repository.UrlCard2Key +import com.twitter.tweetypie.thriftscala.EditControl +import com.twitter.tweetypie.thriftscala.EditOptions +import com.twitter.tweetypie.thriftscala.TweetCreateState +import com.twitter.tweetypie.util.EditControlUtil._ +import com.twitter.tweetypie.thriftscala.CardReference +import com.twitter.tweetypie.thriftscala.EditControlInitial +import com.twitter.tweetypie.thriftscala.PostTweetRequest +import com.twitter.tweetypie.util.CommunityAnnotation +import com.twitter.tweetypie.util.EditControlUtil +import com.twitter.util.Future + +object EditControlBuilder { + type Type = Request => Future[Option[EditControl]] + + val editTweetCountStat = "edit_tweet_count" + val editControlQueryOptions = TweetQuery.Options( + TweetQuery.Include(Set(Tweet.CoreDataField.id, Tweet.EditControlField.id)) + ) + val TweetEditCreationEnabledKey = "tweet_edit_creation_enabled" + val TweetEditCreationEnabledForTwitterBlueKey = "tweet_edit_creation_enabled_for_twitter_blue" + + val pollCardNames: Set[String] = Set( + "poll2choice_text_only", + "poll3choice_text_only", + "poll4choice_text_only", + "poll2choice_image", + "poll3choice_image", + "poll4choice_image", + "poll2choice_video", + "poll3choice_video", + "poll4choice_video", + ) + + /** Used just for checking card name for poll check in case cards platform key not provided. */ + val defaultCardsPlatformKey = "iPhone-13" + + /** + * Do we assume a Tweet has a poll (which makes it not editable) when it has a card + * that could be a poll, and it cannot be resolved at create. + */ + val isPollCardAssumption = true + + val tweetEditSubscriptionResource = "feature/tweet_edit" + + val log: Logger = Logger(getClass) + + case class Request( + postTweetRequest: PostTweetRequest, + tweet: Tweet, + matchedResults: Option[FeatureSwitchResults]) { + def editOptions: Option[EditOptions] = postTweetRequest.editOptions + + def authorId: UserId = postTweetRequest.userId + + def createdAt: Time = Time.fromMilliseconds(tweet.coreData.get.createdAtSecs * 1000L) + + def tweetId: TweetId = tweet.id + + def cardReference: Option[CardReference] = + postTweetRequest.additionalFields.flatMap(_.cardReference) + + def cardsPlatformKey: Option[String] = + postTweetRequest.hydrationOptions.flatMap(_.cardsPlatformKey) + } + + def apply( + tweetRepo: TweetRepository.Type, + card2Repo: Card2Repository.Type, + promotedTweetRepo: StratoPromotedTweetRepository.Type, + subscriptionVerificationRepo: StratoSubscriptionVerificationRepository.Type, + disablePromotedTweetEdit: Gate[Unit], + checkTwitterBlueSubscription: Gate[Unit], + setEditWindowToSixtyMinutes: Gate[Unit], + stats: StatsReceiver + ): Type = { + + // Nullcast tweets not allowed, except if the tweet has a community annotation + def isNullcastedButNotCommunityTweet(request: Request): Boolean = { + + val isNullcasted: Boolean = request.tweet.coreData.get.nullcast + + val communityIds: Option[Seq[CommunityId]] = + request.postTweetRequest.additionalFields + .flatMap(CommunityAnnotation.additionalFieldsToCommunityIDs) + + isNullcasted && !(communityIds.exists(_.nonEmpty)) + } + + def isSuperFollow(tweet: Tweet): Boolean = tweet.exclusiveTweetControl.isDefined + + def isCollabTweet(tweet: Tweet): Boolean = tweet.collabControl.isDefined + + def isReplyToTweet(tweet: Tweet): Boolean = + getReply(tweet).flatMap(_.inReplyToStatusId).isDefined + + // When card is tombstone, tweet is not considered a poll, and therefore can be edit eligible. + val cardReferenceUriIsTombstone = stats.counter("edit_control_builder_card_tombstoned") + // We check whether tweets are polls since these are not edit eligible. + // If we are not sure due to lookup failure, we take an `isPollCardAssumption`. + def isPoll( + card2Repo: Card2Repository.Type, + cardReference: CardReference, + cardsPlatformKey: String, + ): Stitch[Boolean] = { + if (cardReference.cardUri == "tombstone://card") { + cardReferenceUriIsTombstone.incr() + Stitch.value(false) + } else { + val key = UrlCard2Key(cardReference.cardUri) + // `allowNonTcoUrls = true` This allows us to check if non-tco urls (e.g. apple.com) have a card + // at this point in tweet builder urls can be in their original form and not tcoified. + val options = Card2RequestOptions( + platformKey = cardsPlatformKey, + allowNonTcoUrls = true + ) + card2Repo(key, options) + .map(card2 => pollCardNames.contains(card2.name)) + } + } + + def isFeatureSwitchEnabled(matchedResults: Option[FeatureSwitchResults], key: String): Boolean = + matchedResults.flatMap(_.getBoolean(key, shouldLogImpression = false)).contains(true) + + def wrapInitial(initial: EditControlInitial): Option[EditControl.Initial] = + Some(EditControl.Initial(initial = initial)) + + // Checks for validity of an edit are implemented as procedures + // that throw an error in case a check fails. This composes way better than + // returning a Try/Future/Stitch because: + // 1. We do not need to decide which of the aforementioned containers to use. + // 2. The checks as below compose with callbacks in all the aforementioned containers. + + val editRequestOutsideOfAllowlist = stats.counter("edit_control_builder_rejected", "allowlist") + + // This method uses two feature switches: + // - TweetEditCreationEnabledKey authorizes the user to edit tweets directly + // - TweetEditCreationEnabledForTwitterBlueKey authorizes the user to edit tweets if they have + // a Twitter Blue subscription + // + // Test users are always authorized to edit tweets. + def checkUserEligibility( + authorId: UserId, + matchedResults: Option[FeatureSwitchResults] + ): Stitch[Unit] = { + val isTestUser = UserUtil.isTestUserId(authorId) + val authorizedWithoutTwitterBlue = + isFeatureSwitchEnabled(matchedResults, TweetEditCreationEnabledKey) + + if (isTestUser || authorizedWithoutTwitterBlue) { + // If the editing user is a test user or is authorized by the non-Twitter Blue feature + // switch, allow editing. + Stitch.Done + } else { + // Otherwise, check if they're authorized by the Twitter Blue feature switch and if they're + // subscribed to Twitter Blue. + val authorizedWithTwitterBlue: Stitch[Boolean] = + if (checkTwitterBlueSubscription() && + isFeatureSwitchEnabled(matchedResults, TweetEditCreationEnabledForTwitterBlueKey)) { + subscriptionVerificationRepo(authorId, tweetEditSubscriptionResource) + } else Stitch.value(false) + + authorizedWithTwitterBlue.flatMap { authorized => + if (!authorized) { + log.error(s"User ${authorId} unauthorized to edit") + editRequestOutsideOfAllowlist.incr() + Stitch.exception(TweetCreateFailure.State(TweetCreateState.EditTweetUserNotAuthorized)) + } else Stitch.Done + } + } + } + + val editRequestByNonAuthor = stats.counter("edit_control_builder_rejected", "not_author") + def checkAuthor( + authorId: UserId, + previousTweetAuthorId: UserId + ): Unit = { + if (authorId != previousTweetAuthorId) { + editRequestByNonAuthor.incr() + throw TweetCreateFailure.State(TweetCreateState.EditTweetUserNotAuthor) + } + } + + val tweetEditForStaleTweet = stats.counter("edit_control_builder_rejected", "stale") + def checkLatestEdit( + previousTweetId: TweetId, + initial: EditControlInitial, + ): Unit = { + if (previousTweetId != initial.editTweetIds.last) { + tweetEditForStaleTweet.incr() + throw TweetCreateFailure.State(TweetCreateState.EditTweetNotLatestVersion) + } + } + + val tweetEditForLimitReached = stats.counter("edit_control_builder_rejected", "edits_limit") + def checkEditsRemaining(initial: EditControlInitial): Unit = { + initial.editsRemaining match { + case Some(number) if number > 0 => // OK + case _ => + tweetEditForLimitReached.incr() + throw TweetCreateFailure.State(TweetCreateState.EditCountLimitReached) + } + } + + val editTweetExpired = stats.counter("edit_control_builder_rejected", "expired") + val editTweetExpiredNoEditControl = + stats.counter("edit_control_builder_rejected", "expired", "no_edit_control") + def checkEditTimeWindow(initial: EditControlInitial): Unit = { + initial.editableUntilMsecs match { + case Some(millis) if Time.now < Time.fromMilliseconds(millis) => // OK + case Some(_) => + editTweetExpired.incr() + throw TweetCreateFailure.State(TweetCreateState.EditTimeLimitReached) + case editable => + editTweetExpired.incr() + if (editable.isEmpty) { + editTweetExpiredNoEditControl.incr() + } + throw TweetCreateFailure.State(TweetCreateState.EditTimeLimitReached) + } + } + + val tweetEditNotEligible = stats.counter("edit_control_builder_rejected", "not_eligible") + def checkIsEditEligible(initial: EditControlInitial): Unit = { + initial.isEditEligible match { + case Some(true) => // OK + case _ => + tweetEditNotEligible.incr() + throw TweetCreateFailure.State(TweetCreateState.NotEligibleForEdit) + } + } + + val editControlInitialMissing = + stats.counter("edit_control_builder_rejected", "initial_missing") + def findEditControlInitial(previousTweet: Tweet): EditControlInitial = { + previousTweet.editControl match { + case Some(EditControl.Initial(initial)) => initial + case Some(EditControl.Edit(edit)) => + edit.editControlInitial.getOrElse { + editControlInitialMissing.incr() + throw new IllegalStateException( + "Encountered edit tweet with missing editControlInitial.") + } + case _ => + throw TweetCreateFailure.State(TweetCreateState.EditTimeLimitReached) + } + } + + val editPromotedTweet = stats.counter("tweet_edit_for_promoted_tweet") + def checkPromotedTweet( + previousTweetId: TweetId, + promotedTweetRepo: StratoPromotedTweetRepository.Type, + disablePromotedTweetEdit: Gate[Unit] + ): Stitch[Unit] = { + if (disablePromotedTweetEdit()) { + promotedTweetRepo(previousTweetId).flatMap { + case false => + Stitch.Done + case true => + editPromotedTweet.incr() + Stitch.exception(TweetCreateFailure.State(TweetCreateState.EditTweetUserNotAuthorized)) + } + } else { + Stitch.Done + } + } + + // Each time edit is made, count how many versions a tweet already has. + // Value should be always between 1 and 4. + val editTweetCount = 0 + .to(EditControlUtil.maxTweetEditsAllowed) + .map(i => i -> stats.counter("edit_control_builder_edits_count", i.toString)) + .toMap + // Overall counter and failures of card resolution for poll lookups. Needed because polls are not editable. + val pollCardResolutionTotal = stats.counter("edit_control_builder_card_resolution", "total") + val pollCardResolutionFailure = + stats.counter("edit_control_builder_card_resolution", "failures") + // Edit of initial tweet requested, and all edit checks successful. + val initialEditTweet = stats.counter("edit_control_builder_initial_edit") + request => + Stitch.run { + request.editOptions match { + case None => + val editControl = + makeEditControlInitial( + tweetId = request.tweetId, + createdAt = request.createdAt, + setEditWindowToSixtyMinutes = setEditWindowToSixtyMinutes + ).initial.copy( + isEditEligible = Some( + !isNullcastedButNotCommunityTweet(request) + && !isSuperFollow(request.tweet) + && !isCollabTweet(request.tweet) + && !isReplyToTweet(request.tweet) + ), + ) + (editControl.isEditEligible, request.cardReference) match { + case (Some(true), Some(reference)) => + pollCardResolutionTotal.incr() + isPoll( + card2Repo = card2Repo, + cardReference = reference, + cardsPlatformKey = request.cardsPlatformKey.getOrElse(defaultCardsPlatformKey), + ).rescue { + // Revert to the assumed value if card cannot be resolved. + case _ => + pollCardResolutionFailure.incr() + Stitch.value(isPollCardAssumption) + } + .map { tweetIsAPoll => + wrapInitial(editControl.copy(isEditEligible = Some(!tweetIsAPoll))) + } + case _ => Stitch.value(wrapInitial(editControl)) + } + case Some(editOptions) => + for { + (previousTweet, _, _) <- Stitch.join( + tweetRepo(editOptions.previousTweetId, editControlQueryOptions), + checkPromotedTweet( + editOptions.previousTweetId, + promotedTweetRepo, + disablePromotedTweetEdit), + checkUserEligibility( + authorId = request.authorId, + matchedResults = request.matchedResults) + ) + } yield { + val initial = findEditControlInitial(previousTweet) + checkAuthor( + authorId = request.authorId, + previousTweetAuthorId = getUserId(previousTweet)) + editTweetCount + .get(initial.editTweetIds.size) + .orElse(editTweetCount.get(EditControlUtil.maxTweetEditsAllowed)) + .foreach(counter => counter.incr()) + checkLatestEdit(previousTweet.id, initial) + checkEditsRemaining(initial) + checkEditTimeWindow(initial) + checkIsEditEligible(initial) + if (initial.editTweetIds == Seq(previousTweet.id)) { + initialEditTweet.incr() + } + Some(editControlEdit(initialTweetId = initial.editTweetIds.head)) + } + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditValidator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditValidator.scala new file mode 100644 index 000000000..0177996ec --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditValidator.scala @@ -0,0 +1,137 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.scrooge.schema.scrooge.scala.CompiledScroogeDefBuilder +import com.twitter.scrooge.schema.scrooge.scala.CompiledScroogeValueExtractor +import com.twitter.scrooge.schema.tree.DefinitionTraversal +import com.twitter.scrooge.schema.tree.FieldPath +import com.twitter.scrooge.schema.{ThriftDefinitions => DEF} +import com.twitter.scrooge_internal.linter.known_annotations.AllowedAnnotationKeys.TweetEditAllowed +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.repository.TweetQuery.Options +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.thriftscala.ConversationControl +import com.twitter.tweetypie.thriftscala.TweetCreateState.FieldEditNotAllowed +import com.twitter.tweetypie.thriftscala.TweetCreateState.InitialTweetNotFound +import com.twitter.tweetypie.thriftscala.EditOptions +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.util.Future +import com.twitter.util.logging.Logger + +/** + * This class constructs a validator `Tweet => Future[Unit]` which + * takes a new edit tweet and performs some validations. Specifically, it + * + * 1) ensures that no uneditable fields were edited. Uneditable fields are marked + * on the tweet.thrift using the thrift annotation "tweetEditAllowed=false". + * By default, fields with no annotation are treated as editable. + * + * 2) ensures that the conversationControl field (which is editable) remains the + * same type, e.g. a ConversationControl.ByInvitation doesn't change to a + * ConversationControl.Community. + * + * If either of these validations fail, the validator fails with a `FieldEditNotAllowed` + * tweet create state. + */ +object EditValidator { + type Type = (Tweet, Option[EditOptions]) => Future[Unit] + + val log: Logger = Logger(getClass) + + // An object that describes the tweet thrift, used to walk a tweet object looking + // for annotated fields. + val TweetDef = CompiledScroogeDefBuilder.build[Tweet].asInstanceOf[DEF.StructDef] + + // Collect the `FieldPath` for any nested tweet field with a uneditable field annotation + // that is set to false. These are the fields that this validator ensures cannot be edited. + val uneditableFieldPaths: Seq[FieldPath] = { + DefinitionTraversal().collect(TweetDef) { + case (d: DEF.FieldDef, path) if (d.annotations.get(TweetEditAllowed).contains("false")) => + path + } + } + + // A tweet query options which includes + // - any top level tweet field which either is an uneditable field, or contains an uneditable + // subfield. + // - the conversationControl field + // These fields must be present on the initial tweet in order for us to compare them against the + // edit tweet. + val previousTweetQueryOptions = { + // A set of the top level field ids for each (potentially nested) uneditable field. + val topLevelUneditableTweetFields = uneditableFieldPaths.map(_.ids.head).toSet + Options( + TweetQuery.Include( + tweetFields = topLevelUneditableTweetFields + Tweet.ConversationControlField.id + )) + } + + def validateUneditableFields(previousTweet: Tweet, editTweet: Tweet): Unit = { + // Collect uneditable fields that were edited + val invalidEditedFields = uneditableFieldPaths.flatMap { fieldPath => + val previousValue = + FieldPath.lensGet(CompiledScroogeValueExtractor, previousTweet, fieldPath) + val editValue = FieldPath.lensGet(CompiledScroogeValueExtractor, editTweet, fieldPath) + + if (previousValue != editValue) { + Some(fieldPath.toString) + } else { + None + } + } + + if (invalidEditedFields.nonEmpty) { + // If any inequalities are found, log them and return an exception. + val msg = "uneditable fields were edited: " + invalidEditedFields.mkString(",") + log.error(msg) + throw TweetCreateFailure.State(FieldEditNotAllowed, Some(msg)) + } + } + + def validateConversationControl( + previous: Option[ConversationControl], + edit: Option[ConversationControl] + ): Unit = { + import ConversationControl.ByInvitation + import ConversationControl.Community + import ConversationControl.Followers + + (previous, edit) match { + case (None, None) => () + case (Some(ByInvitation(_)), Some(ByInvitation(_))) => () + case (Some(Community(_)), Some(Community(_))) => () + case (Some(Followers(_)), Some(Followers(_))) => () + case (_, _) => + val msg = "conversationControl type was edited" + log.error(msg) + throw TweetCreateFailure.State(FieldEditNotAllowed, Some(msg)) + } + } + + def apply(tweetRepo: TweetRepository.Optional): Type = { (tweet, editOptions) => + Stitch.run( + editOptions match { + case Some(EditOptions(previousTweetId)) => { + // Query for the previous tweet so that we can compare the + // fields between the two tweets. + tweetRepo(previousTweetId, previousTweetQueryOptions).map { + case Some(previousTweet) => + validateUneditableFields(previousTweet, tweet) + validateConversationControl( + previousTweet.conversationControl, + tweet.conversationControl) + case _ => + // If the previous tweet is not found we cannot perform validations that + // compare tweet fields and we have to fail tweet creation. + throw TweetCreateFailure.State(InitialTweetNotFound) + } + } + // This is the case where this isn't an edit tweet (since editOptions = None) + // Since this tweet is not an edit there are no fields to validate. + case _ => Stitch.Unit + } + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EraseUserTweetsHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EraseUserTweetsHandler.scala new file mode 100644 index 000000000..64441439b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EraseUserTweetsHandler.scala @@ -0,0 +1,102 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.finagle.stats.Stat +import com.twitter.flockdb.client._ +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.thriftscala._ + +trait EraseUserTweetsHandler { + + val eraseUserTweetsRequest: FutureArrow[EraseUserTweetsRequest, Unit] + + val asyncEraseUserTweetsRequest: FutureArrow[AsyncEraseUserTweetsRequest, Unit] +} + +/** + * This library allows you to erase all of a users's tweets. It's used to clean up + * tweets after a user deletes their account. + */ +object EraseUserTweetsHandler { + + /** + * Build a FutureEffect which, when called, deletes one page worth of tweets at the + * specified flock cursor. When the page of tweets has been deleted another asyncEraseUserTweets + * request is made with the updated cursor location so that the next page of tweets can be processed. + */ + def apply( + selectPage: FutureArrow[Select[StatusGraph], PageResult[Long]], + deleteTweet: FutureEffect[(TweetId, UserId)], + asyncEraseUserTweets: FutureArrow[AsyncEraseUserTweetsRequest, Unit], + stats: StatsReceiver, + sleep: () => Future[Unit] = () => Future.Unit + ): EraseUserTweetsHandler = + new EraseUserTweetsHandler { + val latencyStat: Stat = stats.stat("latency_ms") + val deletedTweetsStat: Stat = stats.stat("tweets_deleted_for_erased_user") + + val selectUserTweets: AsyncEraseUserTweetsRequest => Select[StatusGraph] = + (request: AsyncEraseUserTweetsRequest) => + UserTimelineGraph + .from(request.userId) + .withCursor(Cursor(request.flockCursor)) + + // For a provided list of tweetIds, delete each one sequentially, sleeping between each call + // This is a rate limiting mechanism to slow down deletions. + def deletePage(page: PageResult[Long], expectedUserId: UserId): Future[Unit] = + page.entries.foldLeft(Future.Unit) { (previousFuture, nextId) => + for { + _ <- previousFuture + _ <- sleep() + _ <- deleteTweet((nextId, expectedUserId)) + } yield () + } + + /** + * If we aren't on the last page, make another EraseUserTweets request to delete + * the next page of tweets + */ + val nextRequestOrEnd: (AsyncEraseUserTweetsRequest, PageResult[Long]) => Future[Unit] = + (request: AsyncEraseUserTweetsRequest, page: PageResult[Long]) => + if (page.nextCursor.isEnd) { + latencyStat.add(Time.fromMilliseconds(request.startTimestamp).untilNow.inMillis) + deletedTweetsStat.add(request.tweetCount + page.entries.size) + Future.Unit + } else { + asyncEraseUserTweets( + request.copy( + flockCursor = page.nextCursor.value, + tweetCount = request.tweetCount + page.entries.size + ) + ) + } + + override val eraseUserTweetsRequest: FutureArrow[EraseUserTweetsRequest, Unit] = + FutureArrow { request => + asyncEraseUserTweets( + AsyncEraseUserTweetsRequest( + userId = request.userId, + flockCursor = Cursor.start.value, + startTimestamp = Time.now.inMillis, + tweetCount = 0L + ) + ) + } + + override val asyncEraseUserTweetsRequest: FutureArrow[AsyncEraseUserTweetsRequest, Unit] = + FutureArrow { request => + for { + _ <- sleep() + + // get one page of tweets + page <- selectPage(selectUserTweets(request)) + + // delete tweets + _ <- deletePage(page, request.userId) + + // make call to delete the next page of tweets + _ <- nextRequestOrEnd(request, page) + } yield () + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GeoBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GeoBuilder.scala new file mode 100644 index 000000000..19cbbded0 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GeoBuilder.scala @@ -0,0 +1,137 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.finagle.stats.Counter +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.repository.PlaceKey +import com.twitter.tweetypie.repository.PlaceRepository +import com.twitter.tweetypie.serverutil.ExceptionCounter +import com.twitter.tweetypie.thriftscala._ + +object GeoStats { + val topTenCountryCodes: Set[PlaceLanguage] = + Set("US", "JP", "GB", "ID", "BR", "SA", "TR", "MX", "ES", "CA") + + def apply(stats: StatsReceiver): Effect[Option[Place]] = { + val totalCount = stats.counter("total") + val notFoundCount = stats.counter("not_found") + val countryStats: Map[String, Counter] = + topTenCountryCodes.map(cc => cc -> stats.scope("with_country_code").counter(cc)).toMap + + val placeTypeStats: Map[PlaceType, Counter] = + Map( + PlaceType.Admin -> stats.counter("admin"), + PlaceType.City -> stats.counter("city"), + PlaceType.Country -> stats.counter("country"), + PlaceType.Neighborhood -> stats.counter("neighborhood"), + PlaceType.Poi -> stats.counter("poi"), + PlaceType.Unknown -> stats.counter("unknown") + ) + + Effect.fromPartial { + case Some(place) => { + totalCount.incr() + placeTypeStats(place.`type`).incr() + place.countryCode.foreach(cc => countryStats.get(cc).foreach(_.incr())) + } + case None => notFoundCount.incr() + } + } +} + +object GeoBuilder { + case class Request(createGeo: TweetCreateGeo, userGeoEnabled: Boolean, language: String) + + case class Result(geoCoordinates: Option[GeoCoordinates], placeId: Option[PlaceId]) + + type Type = FutureArrow[Request, Result] + + def apply(placeRepo: PlaceRepository.Type, rgc: ReverseGeocoder, stats: StatsReceiver): Type = { + val exceptionCounters = ExceptionCounter(stats) + + def ignoreFailures[A](future: Future[Option[A]]): Future[Option[A]] = + exceptionCounters(future).handle { case _ => None } + + def isValidPlaceId(placeId: String) = PlaceIdRegex.pattern.matcher(placeId).matches + + def isValidLatLon(latitude: Double, longitude: Double): Boolean = + latitude >= -90.0 && latitude <= 90.0 && + longitude >= -180.0 && longitude <= 180.0 && + // some clients send (0.0, 0.0) for unknown reasons, but this is highly unlikely to be + // valid and should be treated as if no coordinates were sent. if a place Id is provided, + // that will still be used. + (latitude != 0.0 || longitude != 0.0) + + // Count the number of times we erase geo information based on user preferences. + val geoErasedCounter = stats.counter("geo_erased") + // Count the number of times we override a user's preferences and add geo anyway. + val geoOverriddenCounter = stats.counter("geo_overridden") + + val geoScope = stats.scope("create_geotagged_tweet") + + // Counter for geo tweets with neither lat lon nor place id data + val noGeoCounter = geoScope.counter("no_geo_info") + val invalidCoordinates = geoScope.counter("invalid_coordinates") + val inValidPlaceId = geoScope.counter("invalid_place_id") + val latlonStatsEffect = GeoStats(geoScope.scope("from_latlon")) + val placeIdStatsEffect = GeoStats(geoScope.scope("from_place_id")) + + def validateCoordinates(coords: GeoCoordinates): Option[GeoCoordinates] = + if (isValidLatLon(coords.latitude, coords.longitude)) Some(coords) + else { + invalidCoordinates.incr() + None + } + + def validatePlaceId(placeId: String): Option[String] = + if (isValidPlaceId(placeId)) Some(placeId) + else { + inValidPlaceId.incr() + None + } + + def getPlaceByRGC(coordinates: GeoCoordinates, language: String): Future[Option[Place]] = + ignoreFailures( + rgc((coordinates, language)).onSuccess(latlonStatsEffect) + ) + + def getPlaceById(placeId: String, language: String): Future[Option[Place]] = + ignoreFailures( + Stitch + .run(placeRepo(PlaceKey(placeId, language)).liftNotFoundToOption) + .onSuccess(placeIdStatsEffect) + ) + + FutureArrow[Request, Result] { request => + val createGeo = request.createGeo + val allowGeo = createGeo.overrideUserGeoSetting || request.userGeoEnabled + val overrideGeo = createGeo.overrideUserGeoSetting && !request.userGeoEnabled + + if (createGeo.placeId.isEmpty && createGeo.coordinates.isEmpty) { + noGeoCounter.incr() + Future.value(Result(None, None)) + } else if (!allowGeo) { + // Record that we had geo information but had to erase it based on user preferences. + geoErasedCounter.incr() + Future.value(Result(None, None)) + } else { + if (overrideGeo) geoOverriddenCounter.incr() + + // treat invalidate coordinates the same as no-coordinates + val validatedCoordinates = createGeo.coordinates.flatMap(validateCoordinates) + val validatedPlaceId = createGeo.placeId.flatMap(validatePlaceId) + + for { + place <- (createGeo.placeId, validatedPlaceId, validatedCoordinates) match { + // if the request contains an invalid place id, we want to return None for the + // place instead of reverse-geocoding the coordinates + case (Some(_), None, _) => Future.None + case (_, Some(placeId), _) => getPlaceById(placeId, request.language) + case (_, _, Some(coords)) => getPlaceByRGC(coords, request.language) + case _ => Future.None + } + } yield Result(validatedCoordinates, place.map(_.id)) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetDeletedTweetsHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetDeletedTweetsHandler.scala new file mode 100644 index 000000000..b74acf94d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetDeletedTweetsHandler.scala @@ -0,0 +1,119 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.InternalServerError +import com.twitter.tweetypie.core.OverCapacity +import com.twitter.tweetypie.storage.Response.TweetResponseCode +import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet +import com.twitter.tweetypie.storage.DeleteState +import com.twitter.tweetypie.storage.DeletedTweetResponse +import com.twitter.tweetypie.storage.RateLimited +import com.twitter.tweetypie.storage.TweetStorageClient +import com.twitter.tweetypie.thriftscala._ + +/** + * Allow access to raw, unhydrated deleted tweet fields from storage backends (currently Manhattan) + */ +object GetDeletedTweetsHandler { + + type Type = FutureArrow[GetDeletedTweetsRequest, Seq[GetDeletedTweetResult]] + type TweetsExist = Seq[TweetId] => Stitch[Set[TweetId]] + + def processTweetResponse(response: Try[GetTweet.Response]): Stitch[Option[Tweet]] = { + import GetTweet.Response._ + + response match { + case Return(Found(tweet)) => Stitch.value(Some(tweet)) + case Return(Deleted | NotFound | BounceDeleted(_)) => Stitch.None + case Throw(_: RateLimited) => Stitch.exception(OverCapacity("manhattan")) + case Throw(exception) => Stitch.exception(exception) + } + } + + def convertDeletedTweetResponse( + r: DeletedTweetResponse, + extantIds: Set[TweetId] + ): GetDeletedTweetResult = { + val id = r.tweetId + if (extantIds.contains(id) || r.deleteState == DeleteState.NotDeleted) { + GetDeletedTweetResult(id, DeletedTweetState.NotDeleted) + } else { + r.overallResponse match { + case TweetResponseCode.Success => + GetDeletedTweetResult(id, convertState(r.deleteState), r.tweet) + case TweetResponseCode.OverCapacity => throw OverCapacity("manhattan") + case _ => + throw InternalServerError( + s"Unhandled response ${r.overallResponse} from getDeletedTweets for tweet $id" + ) + } + } + } + + def convertState(d: DeleteState): DeletedTweetState = d match { + case DeleteState.NotFound => DeletedTweetState.NotFound + case DeleteState.NotDeleted => DeletedTweetState.NotDeleted + case DeleteState.SoftDeleted => DeletedTweetState.SoftDeleted + // Callers of this endpoint treat BounceDeleted tweets the same as SoftDeleted + case DeleteState.BounceDeleted => DeletedTweetState.SoftDeleted + case DeleteState.HardDeleted => DeletedTweetState.HardDeleted + } + + /** + * Converts [[TweetStorageClient.GetTweet]] into a FutureArrow that returns extant tweet ids from + * the original list. This method is used to check underlying storage againt cache, preferring + * cache if a tweet exists there. + */ + def tweetsExist(getTweet: TweetStorageClient.GetTweet): TweetsExist = + (tweetIds: Seq[TweetId]) => + for { + response <- Stitch.traverse(tweetIds) { tweetId => getTweet(tweetId).liftToTry } + tweets <- Stitch.collect(response.map(processTweetResponse)) + } yield tweets.flatten.map(_.id).toSet.filter(tweetIds.contains) + + def apply( + getDeletedTweets: TweetStorageClient.GetDeletedTweets, + tweetsExist: TweetsExist, + stats: StatsReceiver + ): Type = { + + val notFound = stats.counter("not_found") + val notDeleted = stats.counter("not_deleted") + val softDeleted = stats.counter("soft_deleted") + val hardDeleted = stats.counter("hard_deleted") + val unknown = stats.counter("unknown") + + def trackState(results: Seq[GetDeletedTweetResult]): Unit = + results.foreach { r => + r.state match { + case DeletedTweetState.NotFound => notFound.incr() + case DeletedTweetState.NotDeleted => notDeleted.incr() + case DeletedTweetState.SoftDeleted => softDeleted.incr() + case DeletedTweetState.HardDeleted => hardDeleted.incr() + case _ => unknown.incr() + } + } + + FutureArrow { request => + Stitch.run { + Stitch + .join( + getDeletedTweets(request.tweetIds), + tweetsExist(request.tweetIds) + ) + .map { + case (deletedTweetResponses, extantIds) => + val responseIds = deletedTweetResponses.map(_.tweetId) + assert( + responseIds == request.tweetIds, + s"getDeletedTweets response does not match order of request: Request ids " + + s"(${request.tweetIds.mkString(", ")}) != response ids (${responseIds + .mkString(", ")})" + ) + deletedTweetResponses.map { r => convertDeletedTweetResponse(r, extantIds) } + } + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetStoredTweetsByUserHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetStoredTweetsByUserHandler.scala new file mode 100644 index 000000000..c9b096e0f --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetStoredTweetsByUserHandler.scala @@ -0,0 +1,188 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.flockdb.client.Cursor +import com.twitter.flockdb.client.PageResult +import com.twitter.flockdb.client.Select +import com.twitter.flockdb.client.StatusGraph +import com.twitter.flockdb.client.UserTimelineGraph +import com.twitter.flockdb.client.thriftscala.EdgeState +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.storage.TweetStorageClient +import com.twitter.tweetypie.storage.TweetStorageClient.GetStoredTweet +import com.twitter.tweetypie.thriftscala.GetStoredTweetsByUserOptions +import com.twitter.tweetypie.thriftscala.GetStoredTweetsByUserRequest +import com.twitter.tweetypie.thriftscala.GetStoredTweetsByUserResult +import com.twitter.tweetypie.thriftscala.GetStoredTweetsOptions +import com.twitter.tweetypie.thriftscala.GetStoredTweetsRequest + +object GetStoredTweetsByUserHandler { + type Type = FutureArrow[GetStoredTweetsByUserRequest, GetStoredTweetsByUserResult] + + def apply( + getStoredTweetsHandler: GetStoredTweetsHandler.Type, + getStoredTweet: TweetStorageClient.GetStoredTweet, + selectPage: FutureArrow[Select[StatusGraph], PageResult[Long]], + maxPages: Int + ): Type = { + FutureArrow { request => + val options = request.options.getOrElse(GetStoredTweetsByUserOptions()) + + val startTimeMsec: Long = options.startTimeMsec.getOrElse(0L) + val endTimeMsec: Long = options.endTimeMsec.getOrElse(Time.now.inMillis) + val cursor = options.cursor.map(Cursor(_)).getOrElse { + if (options.startFromOldest) Cursor.lowest else Cursor.highest + } + + getNextTweetIdsInTimeRange( + request.userId, + startTimeMsec, + endTimeMsec, + cursor, + selectPage, + getStoredTweet, + maxPages, + numTries = 0 + ).flatMap { + case (tweetIds, cursor) => + val getStoredTweetsRequest = toGetStoredTweetsRequest(tweetIds, request.userId, options) + + getStoredTweetsHandler(getStoredTweetsRequest) + .map { getStoredTweetsResults => + GetStoredTweetsByUserResult( + storedTweets = getStoredTweetsResults.map(_.storedTweet), + cursor = if (cursor.isEnd) None else Some(cursor.value) + ) + } + } + } + } + + private def toGetStoredTweetsRequest( + tweetIds: Seq[TweetId], + userId: UserId, + getStoredTweetsByUserOptions: GetStoredTweetsByUserOptions + ): GetStoredTweetsRequest = { + + val options: GetStoredTweetsOptions = GetStoredTweetsOptions( + bypassVisibilityFiltering = getStoredTweetsByUserOptions.bypassVisibilityFiltering, + forUserId = if (getStoredTweetsByUserOptions.setForUserId) Some(userId) else None, + additionalFieldIds = getStoredTweetsByUserOptions.additionalFieldIds + ) + + GetStoredTweetsRequest( + tweetIds = tweetIds, + options = Some(options) + ) + } + + private def getNextTweetIdsInTimeRange( + userId: UserId, + startTimeMsec: Long, + endTimeMsec: Long, + cursor: Cursor, + selectPage: FutureArrow[Select[StatusGraph], PageResult[Long]], + getStoredTweet: TweetStorageClient.GetStoredTweet, + maxPages: Int, + numTries: Int + ): Future[(Seq[TweetId], Cursor)] = { + val select = Select( + sourceId = userId, + graph = UserTimelineGraph, + stateIds = + Some(Seq(EdgeState.Archived.value, EdgeState.Positive.value, EdgeState.Removed.value)) + ).withCursor(cursor) + + def inTimeRange(timestamp: Long): Boolean = + timestamp >= startTimeMsec && timestamp <= endTimeMsec + def pastTimeRange(timestamps: Seq[Long]) = { + if (cursor.isAscending) { + timestamps.max > endTimeMsec + } else { + timestamps.min < startTimeMsec + } + } + + val pageResultFuture: Future[PageResult[Long]] = selectPage(select) + + pageResultFuture.flatMap { pageResult => + val groupedIds = pageResult.entries.groupBy(SnowflakeId.isSnowflakeId) + val nextCursor = if (cursor.isAscending) pageResult.previousCursor else pageResult.nextCursor + + // Timestamps for the creation of Tweets with snowflake IDs can be calculated from the IDs + // themselves. + val snowflakeIdsTimestamps: Seq[(Long, Long)] = groupedIds.getOrElse(true, Seq()).map { id => + val snowflakeTimeMillis = SnowflakeId.unixTimeMillisFromId(id) + (id, snowflakeTimeMillis) + } + + // For non-snowflake Tweets, we need to fetch the Tweet data from Manhattan to see when the + // Tweet was created. + val nonSnowflakeIdsTimestamps: Future[Seq[(Long, Long)]] = Stitch.run( + Stitch + .traverse(groupedIds.getOrElse(false, Seq()))(getStoredTweet) + .map { + _.flatMap { + case GetStoredTweet.Response.FoundAny(tweet, _, _, _, _) => { + if (tweet.coreData.exists(_.createdAtSecs > 0)) { + Some((tweet.id, tweet.coreData.get.createdAtSecs)) + } else None + } + case _ => None + } + }) + + nonSnowflakeIdsTimestamps.flatMap { nonSnowflakeList => + val allTweetIdsAndTimestamps = snowflakeIdsTimestamps ++ nonSnowflakeList + val filteredTweetIds = allTweetIdsAndTimestamps + .filter { + case (_, ts) => inTimeRange(ts) + } + .map(_._1) + + if (nextCursor.isEnd) { + // We've considered the last Tweet for this User. There are no more Tweets to return. + Future.value((filteredTweetIds, Cursor.end)) + } else if (allTweetIdsAndTimestamps.nonEmpty && + pastTimeRange(allTweetIdsAndTimestamps.map(_._2))) { + // At least one Tweet returned from Tflock has a timestamp past our time range, i.e. + // greater than the end time (if we're fetching in an ascending order) or lower than the + // start time (if we're fetching in a descending order). There is no point in looking at + // any more Tweets from this User as they'll all be outside the time range. + Future.value((filteredTweetIds, Cursor.end)) + } else if (filteredTweetIds.isEmpty) { + // We're here because one of two things happened: + // 1. allTweetIdsAndTimestamps is empty: Either Tflock has returned an empty page of Tweets + // or we weren't able to fetch timestamps for any of the Tweets Tflock returned. In this + // case, we fetch the next page of Tweets. + // 2. allTweetIdsAndTimestamps is non-empty but filteredTweetIds is empty: The current page + // has no Tweets inside the requested time range. We fetch the next page of Tweets and + // try again. + // If we hit the limit for the maximum number of pages from tflock to be requested, we + // return an empty list of Tweets with the cursor for the caller to try again. + + if (numTries == maxPages) { + Future.value((filteredTweetIds, nextCursor)) + } else { + getNextTweetIdsInTimeRange( + userId = userId, + startTimeMsec = startTimeMsec, + endTimeMsec = endTimeMsec, + cursor = nextCursor, + selectPage = selectPage, + getStoredTweet = getStoredTweet, + maxPages = maxPages, + numTries = numTries + 1 + ) + } + } else { + // filteredTweetIds is non-empty: There are some Tweets in this page that are within the + // requested time range, and we aren't out of the time range yet. We return the Tweets we + // have and set the cursor forward for the next request. + Future.value((filteredTweetIds, nextCursor)) + } + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetStoredTweetsHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetStoredTweetsHandler.scala new file mode 100644 index 000000000..ab8bfb4ad --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetStoredTweetsHandler.scala @@ -0,0 +1,161 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.StoredTweetResult._ +import com.twitter.tweetypie.core.StoredTweetResult +import com.twitter.tweetypie.core.TweetResult +import com.twitter.tweetypie.FieldId +import com.twitter.tweetypie.FutureArrow +import com.twitter.tweetypie.repository.CacheControl +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetResultRepository +import com.twitter.tweetypie.thriftscala.{BounceDeleted => BounceDeletedState} +import com.twitter.tweetypie.thriftscala.{ForceAdded => ForceAddedState} +import com.twitter.tweetypie.thriftscala.GetStoredTweetsRequest +import com.twitter.tweetypie.thriftscala.GetStoredTweetsOptions +import com.twitter.tweetypie.thriftscala.GetStoredTweetsResult +import com.twitter.tweetypie.thriftscala.{HardDeleted => HardDeletedState} +import com.twitter.tweetypie.thriftscala.{NotFound => NotFoundState} +import com.twitter.tweetypie.thriftscala.{SoftDeleted => SoftDeletedState} +import com.twitter.tweetypie.thriftscala.StatusCounts +import com.twitter.tweetypie.thriftscala.StoredTweetError +import com.twitter.tweetypie.thriftscala.StoredTweetInfo +import com.twitter.tweetypie.thriftscala.StoredTweetState +import com.twitter.tweetypie.thriftscala.{Undeleted => UndeletedState} + +object GetStoredTweetsHandler { + type Type = FutureArrow[GetStoredTweetsRequest, Seq[GetStoredTweetsResult]] + + def apply(tweetRepo: TweetResultRepository.Type): Type = { + FutureArrow[GetStoredTweetsRequest, Seq[GetStoredTweetsResult]] { request => + val requestOptions: GetStoredTweetsOptions = + request.options.getOrElse(GetStoredTweetsOptions()) + val queryOptions = toTweetQueryOptions(requestOptions) + + val result = Stitch + .traverse(request.tweetIds) { tweetId => + tweetRepo(tweetId, queryOptions) + .map(toStoredTweetInfo) + .map(GetStoredTweetsResult(_)) + .handle { + case _ => + GetStoredTweetsResult( + StoredTweetInfo( + tweetId = tweetId, + errors = Seq(StoredTweetError.FailedFetch) + ) + ) + } + } + + Stitch.run(result) + } + } + + private def toTweetQueryOptions(options: GetStoredTweetsOptions): TweetQuery.Options = { + val countsFields: Set[FieldId] = Set( + StatusCounts.FavoriteCountField.id, + StatusCounts.ReplyCountField.id, + StatusCounts.RetweetCountField.id, + StatusCounts.QuoteCountField.id + ) + + TweetQuery.Options( + include = GetTweetsHandler.BaseInclude.also( + tweetFields = Set(Tweet.CountsField.id) ++ options.additionalFieldIds, + countsFields = countsFields + ), + cacheControl = CacheControl.NoCache, + enforceVisibilityFiltering = !options.bypassVisibilityFiltering, + forUserId = options.forUserId, + requireSourceTweet = false, + fetchStoredTweets = true + ) + } + + private def toStoredTweetInfo(tweetResult: TweetResult): StoredTweetInfo = { + def translateErrors(errors: Seq[StoredTweetResult.Error]): Seq[StoredTweetError] = { + errors.map { + case StoredTweetResult.Error.Corrupt => StoredTweetError.Corrupt + case StoredTweetResult.Error.FieldsMissingOrInvalid => + StoredTweetError.FieldsMissingOrInvalid + case StoredTweetResult.Error.ScrubbedFieldsPresent => StoredTweetError.ScrubbedFieldsPresent + case StoredTweetResult.Error.ShouldBeHardDeleted => StoredTweetError.ShouldBeHardDeleted + } + } + + val tweetData = tweetResult.value + + tweetData.storedTweetResult match { + case Some(storedTweetResult) => { + val (tweet, storedTweetState, errors) = storedTweetResult match { + case Present(errors, _) => (Some(tweetData.tweet), None, translateErrors(errors)) + case HardDeleted(softDeletedAtMsec, hardDeletedAtMsec) => + ( + Some(tweetData.tweet), + Some( + StoredTweetState.HardDeleted( + HardDeletedState(softDeletedAtMsec, hardDeletedAtMsec))), + Seq() + ) + case SoftDeleted(softDeletedAtMsec, errors, _) => + ( + Some(tweetData.tweet), + Some(StoredTweetState.SoftDeleted(SoftDeletedState(softDeletedAtMsec))), + translateErrors(errors) + ) + case BounceDeleted(deletedAtMsec, errors, _) => + ( + Some(tweetData.tweet), + Some(StoredTweetState.BounceDeleted(BounceDeletedState(deletedAtMsec))), + translateErrors(errors) + ) + case Undeleted(undeletedAtMsec, errors, _) => + ( + Some(tweetData.tweet), + Some(StoredTweetState.Undeleted(UndeletedState(undeletedAtMsec))), + translateErrors(errors) + ) + case ForceAdded(addedAtMsec, errors, _) => + ( + Some(tweetData.tweet), + Some(StoredTweetState.ForceAdded(ForceAddedState(addedAtMsec))), + translateErrors(errors) + ) + case Failed(errors) => (None, None, translateErrors(errors)) + case NotFound => (None, Some(StoredTweetState.NotFound(NotFoundState())), Seq()) + } + + StoredTweetInfo( + tweetId = tweetData.tweet.id, + tweet = tweet.map(sanitizeNullMediaFields), + storedTweetState = storedTweetState, + errors = errors + ) + } + + case None => + StoredTweetInfo( + tweetId = tweetData.tweet.id, + tweet = Some(sanitizeNullMediaFields(tweetData.tweet)) + ) + } + } + + private def sanitizeNullMediaFields(tweet: Tweet): Tweet = { + // Some media fields are initialized as `null` at the storage layer. + // If the Tweet is meant to be hard deleted, or is not hydrated for + // some other reason but the media entities still exist, we sanitize + // these fields to allow serialization. + tweet.copy(media = tweet.media.map(_.map { mediaEntity => + mediaEntity.copy( + url = Option(mediaEntity.url).getOrElse(""), + mediaUrl = Option(mediaEntity.mediaUrl).getOrElse(""), + mediaUrlHttps = Option(mediaEntity.mediaUrlHttps).getOrElse(""), + displayUrl = Option(mediaEntity.displayUrl).getOrElse(""), + expandedUrl = Option(mediaEntity.expandedUrl).getOrElse(""), + ) + })) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetTweetCountsHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetTweetCountsHandler.scala new file mode 100644 index 000000000..4100a76dc --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetTweetCountsHandler.scala @@ -0,0 +1,44 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.servo.util.FutureArrow +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +/** + * Handler for the `getTweetCounts` endpoint. + */ +object GetTweetCountsHandler { + type Type = FutureArrow[GetTweetCountsRequest, Seq[GetTweetCountsResult]] + + def apply(repo: TweetCountsRepository.Type): Type = { + + def idToResult(id: TweetId, req: GetTweetCountsRequest): Stitch[GetTweetCountsResult] = + Stitch + .join( + // .liftToOption() converts any failures to None result + if (req.includeRetweetCount) repo(RetweetsKey(id)).liftToOption() else Stitch.None, + if (req.includeReplyCount) repo(RepliesKey(id)).liftToOption() else Stitch.None, + if (req.includeFavoriteCount) repo(FavsKey(id)).liftToOption() else Stitch.None, + if (req.includeQuoteCount) repo(QuotesKey(id)).liftToOption() else Stitch.None, + if (req.includeBookmarkCount) repo(BookmarksKey(id)).liftToOption() else Stitch.None + ).map { + case (retweetCount, replyCount, favoriteCount, quoteCount, bookmarkCount) => + GetTweetCountsResult( + tweetId = id, + retweetCount = retweetCount, + replyCount = replyCount, + favoriteCount = favoriteCount, + quoteCount = quoteCount, + bookmarkCount = bookmarkCount + ) + } + + FutureArrow[GetTweetCountsRequest, Seq[GetTweetCountsResult]] { request => + Stitch.run( + Stitch.traverse(request.tweetIds)(idToResult(_, request)) + ) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetTweetFieldsHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetTweetFieldsHandler.scala new file mode 100644 index 000000000..55ab6cb18 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetTweetFieldsHandler.scala @@ -0,0 +1,395 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.container.thriftscala.MaterializeAsTweetFieldsRequest +import com.twitter.context.TestingSignalsContext +import com.twitter.servo.util.FutureArrow +import com.twitter.spam.rtf.thriftscala.FilteredReason +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository.DeletedTweetVisibilityRepository +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala.TweetFieldsResultState +import com.twitter.tweetypie.thriftscala._ + +/** + * Handler for the `getTweetFields` endpoint. + */ +object GetTweetFieldsHandler { + type Type = GetTweetFieldsRequest => Future[Seq[GetTweetFieldsResult]] + + def apply( + tweetRepo: TweetResultRepository.Type, + deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type, + containerAsGetTweetFieldsResultRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType, + stats: StatsReceiver, + shouldMaterializeContainers: Gate[Unit] + ): Type = { + FutureArrow[GetTweetFieldsRequest, Seq[GetTweetFieldsResult]] { request => + val queryOptions = toTweetQueryOptions(request.options) + + Stitch.run( + Stitch.traverse(request.tweetIds) { id => + tweetRepo(id, queryOptions).liftToTry.flatMap { tweetResult => + toGetTweetFieldsResult( + id, + tweetResult, + request.options, + deletedTweetVisibilityRepo, + containerAsGetTweetFieldsResultRepo, + stats, + shouldMaterializeContainers + ) + } + } + ) + } + } + + /** + * Converts a `GetTweetFieldsOptions` into an internal `TweetQuery.Options`. + */ + def toTweetQueryOptions(options: GetTweetFieldsOptions): TweetQuery.Options = { + val includes = options.tweetIncludes + val shouldSkipCache = TestingSignalsContext().flatMap(_.simulateBackPressure).nonEmpty + val cacheControl = + if (shouldSkipCache) CacheControl.NoCache + else if (options.doNotCache) CacheControl.ReadOnlyCache + else CacheControl.ReadWriteCache + + TweetQuery.Options( + include = TweetQuery + .Include( + tweetFields = includes.collect { + case TweetInclude.TweetFieldId(id) => id + case TweetInclude.CountsFieldId(_) => Tweet.CountsField.id + case TweetInclude.MediaEntityFieldId(_) => Tweet.MediaField.id + }.toSet, + countsFields = includes.collect { case TweetInclude.CountsFieldId(id) => id }.toSet, + mediaFields = includes.collect { case TweetInclude.MediaEntityFieldId(id) => id }.toSet, + quotedTweet = options.includeQuotedTweet, + pastedMedia = true + ).also( + /** + * Always fetching underlying creatives container id. see + * [[hydrateCreativeContainerBackedTweet]] for more detail. + */ + tweetFields = Seq(Tweet.UnderlyingCreativesContainerIdField.id) + ), + cacheControl = cacheControl, + enforceVisibilityFiltering = options.visibilityPolicy == TweetVisibilityPolicy.UserVisible, + safetyLevel = options.safetyLevel.getOrElse(SafetyLevel.FilterNone), + forUserId = options.forUserId, + languageTag = options.languageTag.getOrElse("en"), + cardsPlatformKey = options.cardsPlatformKey, + extensionsArgs = options.extensionsArgs, + forExternalConsumption = true, + simpleQuotedTweet = options.simpleQuotedTweet + ) + } + + def toGetTweetFieldsResult( + tweetId: TweetId, + res: Try[TweetResult], + options: GetTweetFieldsOptions, + deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type, + containerAsGetTweetFieldsResultRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType, + stats: StatsReceiver, + shouldMaterializeContainers: Gate[Unit] + ): Stitch[GetTweetFieldsResult] = { + val measureRacyReads: TweetId => Unit = trackLossyReadsAfterWrite( + stats.stat("racy_reads", "get_tweet_fields"), + Duration.fromSeconds(3) + ) + + res match { + case Throw(NotFound) => + measureRacyReads(tweetId) + Stitch.value(GetTweetFieldsResult(tweetId, NotFoundResultState)) + + case Throw(ex) => + val resultStateStitch = failureResultState(ex) match { + case notFoundResultState @ TweetFieldsResultState.NotFound(_) => + deletedTweetVisibilityRepo( + DeletedTweetVisibilityRepository.VisibilityRequest( + ex, + tweetId, + options.safetyLevel, + options.forUserId, + isInnerQuotedTweet = false + ) + ).map(withVisibilityFilteredReason(notFoundResultState, _)) + case res => Stitch.value(res) + } + resultStateStitch.map(res => GetTweetFieldsResult(tweetId, res)) + case Return(r) => + toTweetFieldsResult( + r, + options, + deletedTweetVisibilityRepo, + containerAsGetTweetFieldsResultRepo, + stats, + shouldMaterializeContainers + ).flatMap { getTweetFieldsResult => + hydrateCreativeContainerBackedTweet( + r.value.tweet.underlyingCreativesContainerId, + getTweetFieldsResult, + options, + containerAsGetTweetFieldsResultRepo, + tweetId, + stats, + shouldMaterializeContainers + ) + } + } + } + + private def failureResultState(ex: Throwable): TweetFieldsResultState = + ex match { + case FilteredState.Unavailable.TweetDeleted => DeletedResultState + case FilteredState.Unavailable.BounceDeleted => BounceDeletedResultState + case FilteredState.Unavailable.SourceTweetNotFound(d) => notFoundResultState(deleted = d) + case FilteredState.Unavailable.Author.NotFound => NotFoundResultState + case fs: FilteredState.HasFilteredReason => toFilteredState(fs.filteredReason) + case OverCapacity(_) => toFailedState(overcapacity = true, None) + case _ => toFailedState(overcapacity = false, Some(ex.toString)) + } + + private val NotFoundResultState = TweetFieldsResultState.NotFound(TweetFieldsResultNotFound()) + + private val DeletedResultState = TweetFieldsResultState.NotFound( + TweetFieldsResultNotFound(deleted = true) + ) + + private val BounceDeletedResultState = TweetFieldsResultState.NotFound( + TweetFieldsResultNotFound(deleted = true, bounceDeleted = true) + ) + + def notFoundResultState(deleted: Boolean): TweetFieldsResultState.NotFound = + if (deleted) DeletedResultState else NotFoundResultState + + private def toFailedState( + overcapacity: Boolean, + message: Option[String] + ): TweetFieldsResultState = + TweetFieldsResultState.Failed(TweetFieldsResultFailed(overcapacity, message)) + + private def toFilteredState(reason: FilteredReason): TweetFieldsResultState = + TweetFieldsResultState.Filtered( + TweetFieldsResultFiltered(reason = reason) + ) + + /** + * Converts a `TweetResult` into a `GetTweetFieldsResult`. For retweets, missing or filtered source + * tweets cause the retweet to be treated as missing or filtered. + */ + private def toTweetFieldsResult( + tweetResult: TweetResult, + options: GetTweetFieldsOptions, + deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type, + creativesContainerRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType, + stats: StatsReceiver, + shouldMaterializeContainers: Gate[Unit] + ): Stitch[GetTweetFieldsResult] = { + val primaryResultState = toTweetFieldsResultState(tweetResult, options) + val quotedResultStateStitch = primaryResultState match { + case TweetFieldsResultState.Found(_) if options.includeQuotedTweet => + val tweetData = tweetResult.value.sourceTweetResult + .getOrElse(tweetResult) + .value + tweetData.quotedTweetResult + .map { + case QuotedTweetResult.NotFound => Stitch.value(NotFoundResultState) + case QuotedTweetResult.Filtered(state) => + val resultState = failureResultState(state) + + (tweetData.tweet.quotedTweet, resultState) match { + //When QT exists => contribute VF filtered reason to result state + case (Some(qt), notFoundResultState @ TweetFieldsResultState.NotFound(_)) => + deletedTweetVisibilityRepo( + DeletedTweetVisibilityRepository.VisibilityRequest( + state, + qt.tweetId, + options.safetyLevel, + options.forUserId, + isInnerQuotedTweet = true + ) + ).map(withVisibilityFilteredReason(notFoundResultState, _)) + //When QT is absent => result state without filtered reason + case _ => Stitch.value(resultState) + } + case QuotedTweetResult.Found(res) => + Stitch + .value(toTweetFieldsResultState(res, options)) + .flatMap { resultState => + hydrateCreativeContainerBackedTweet( + creativesContainerId = res.value.tweet.underlyingCreativesContainerId, + originalGetTweetFieldsResult = GetTweetFieldsResult( + tweetId = res.value.tweet.id, + tweetResult = resultState, + ), + getTweetFieldsRequestOptions = options, + creativesContainerRepo = creativesContainerRepo, + res.value.tweet.id, + stats, + shouldMaterializeContainers + ) + } + .map(_.tweetResult) + } + //Quoted tweet result not requested + case _ => None + } + + quotedResultStateStitch + .map(qtStitch => qtStitch.map(Some(_))) + .getOrElse(Stitch.None) + .map(qtResult => + GetTweetFieldsResult( + tweetId = tweetResult.value.tweet.id, + tweetResult = primaryResultState, + quotedTweetResult = qtResult + )) + } + + /** + * @return a copy of resultState with filtered reason when @param filteredReasonOpt is present + */ + private def withVisibilityFilteredReason( + resultState: TweetFieldsResultState.NotFound, + filteredReasonOpt: Option[FilteredReason] + ): TweetFieldsResultState.NotFound = { + filteredReasonOpt match { + case Some(fs) => + resultState.copy( + notFound = resultState.notFound.copy( + filteredReason = Some(fs) + )) + case _ => resultState + } + } + + private def toTweetFieldsResultState( + tweetResult: TweetResult, + options: GetTweetFieldsOptions + ): TweetFieldsResultState = { + val tweetData = tweetResult.value + val suppressReason = tweetData.suppress.map(_.filteredReason) + val tweetFailedFields = tweetResult.state.failedFields + val sourceTweetFailedFields = + tweetData.sourceTweetResult.map(_.state.failedFields).getOrElse(Set()) + val sourceTweetOpt = tweetData.sourceTweetResult.map(_.value.tweet) + val sourceTweetSuppressReason = + tweetData.sourceTweetResult.flatMap(_.value.suppress.map(_.filteredReason)) + val isTweetPartial = tweetFailedFields.nonEmpty || sourceTweetFailedFields.nonEmpty + + val tweetFoundResult = tweetData.sourceTweetResult match { + case None => + // if `sourceTweetResult` is empty, this isn't a retweet + TweetFieldsResultFound( + tweet = tweetData.tweet, + suppressReason = suppressReason + ) + case Some(r) => + // if the source tweet result state is Found, merge that into the primary result + TweetFieldsResultFound( + tweet = tweetData.tweet, + retweetedTweet = sourceTweetOpt.filter(_ => options.includeRetweetedTweet), + suppressReason = suppressReason.orElse(sourceTweetSuppressReason) + ) + } + + if (isTweetPartial) { + TweetFieldsResultState.Failed( + TweetFieldsResultFailed( + overCapacity = false, + message = Some( + "Failed to load: " + (tweetFailedFields ++ sourceTweetFailedFields).mkString(", ")), + partial = Some( + TweetFieldsPartial( + found = tweetFoundResult, + missingFields = tweetFailedFields, + sourceTweetMissingFields = sourceTweetFailedFields + ) + ) + ) + ) + } else { + TweetFieldsResultState.Found( + tweetFoundResult + ) + } + } + + /** + * if tweet data is backed by creatives container, it'll be hydrated from creatives + * container service. + */ + private def hydrateCreativeContainerBackedTweet( + creativesContainerId: Option[Long], + originalGetTweetFieldsResult: GetTweetFieldsResult, + getTweetFieldsRequestOptions: GetTweetFieldsOptions, + creativesContainerRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType, + tweetId: Long, + stats: StatsReceiver, + shouldMaterializeContainers: Gate[Unit] + ): Stitch[GetTweetFieldsResult] = { + // creatives container backed tweet stats + val ccTweetMaterialized = stats.scope("creatives_container", "get_tweet_fields") + val ccTweetMaterializeRequests = ccTweetMaterialized.counter("requests") + val ccTweetMaterializeSuccess = ccTweetMaterialized.counter("success") + val ccTweetMaterializeFailed = ccTweetMaterialized.counter("failed") + val ccTweetMaterializeFiltered = ccTweetMaterialized.scope("filtered") + + ( + creativesContainerId, + originalGetTweetFieldsResult.tweetResult, + getTweetFieldsRequestOptions.disableTweetMaterialization, + shouldMaterializeContainers() + ) match { + // 1. creatives container backed tweet is determined by `underlyingCreativesContainerId` field presence. + // 2. if the frontend tweet is suppressed by any reason, respect that and not do this hydration. + // (this logic can be revisited and improved further) + case (None, _, _, _) => + Stitch.value(originalGetTweetFieldsResult) + case (Some(_), _, _, false) => + ccTweetMaterializeFiltered.counter("decider_suppressed").incr() + Stitch.value { + GetTweetFieldsResult( + tweetId = tweetId, + tweetResult = TweetFieldsResultState.NotFound(TweetFieldsResultNotFound()) + ) + } + case (Some(containerId), TweetFieldsResultState.Found(_), false, _) => + ccTweetMaterializeRequests.incr() + val materializationRequest = + MaterializeAsTweetFieldsRequest(containerId, tweetId, Some(originalGetTweetFieldsResult)) + creativesContainerRepo( + materializationRequest, + getTweetFieldsRequestOptions + ).onSuccess(_ => ccTweetMaterializeSuccess.incr()) + .onFailure(_ => ccTweetMaterializeFailed.incr()) + .handle { + case ex => + GetTweetFieldsResult( + tweetId = tweetId, + tweetResult = failureResultState(ex) + ) + } + case (Some(_), _, true, _) => + ccTweetMaterializeFiltered.counter("suppressed").incr() + Stitch.value( + GetTweetFieldsResult( + tweetId = tweetId, + tweetResult = TweetFieldsResultState.NotFound(TweetFieldsResultNotFound()) + ) + ) + case (Some(_), state, _, _) => + ccTweetMaterializeFiltered.counter(state.getClass.getName).incr() + Stitch.value(originalGetTweetFieldsResult) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetTweetsHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetTweetsHandler.scala new file mode 100644 index 000000000..f0f144dd5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetTweetsHandler.scala @@ -0,0 +1,415 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.container.thriftscala.MaterializeAsTweetRequest +import com.twitter.context.TestingSignalsContext +import com.twitter.servo.exception.thriftscala.ClientError +import com.twitter.servo.exception.thriftscala.ClientErrorCause +import com.twitter.servo.util.FutureArrow +import com.twitter.spam.rtf.thriftscala.FilteredReason +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.additionalfields.AdditionalFields +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +/** + * Handler for the `getTweets` endpoint. + */ +object GetTweetsHandler { + type Type = FutureArrow[GetTweetsRequest, Seq[GetTweetResult]] + + /** + * A `TweetQuery.Include` instance with options set as the default base options + * for the `getTweets` endpoint. + */ + val BaseInclude: TweetQuery.Include = + TweetQuery.Include( + tweetFields = Set( + Tweet.CoreDataField.id, + Tweet.UrlsField.id, + Tweet.MentionsField.id, + Tweet.MediaField.id, + Tweet.HashtagsField.id, + Tweet.CashtagsField.id, + Tweet.TakedownCountryCodesField.id, + Tweet.TakedownReasonsField.id, + Tweet.DeviceSourceField.id, + Tweet.LanguageField.id, + Tweet.ContributorField.id, + Tweet.QuotedTweetField.id, + Tweet.UnderlyingCreativesContainerIdField.id, + ), + pastedMedia = true + ) + + def apply( + tweetRepo: TweetResultRepository.Type, + creativesContainerRepo: CreativesContainerMaterializationRepository.GetTweetType, + deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type, + stats: StatsReceiver, + shouldMaterializeContainers: Gate[Unit] + ): Type = { + FutureArrow[GetTweetsRequest, Seq[GetTweetResult]] { request => + val requestOptions = request.options.getOrElse(GetTweetOptions()) + + val invalidAdditionalFields = + requestOptions.additionalFieldIds.filter(!AdditionalFields.isAdditionalFieldId(_)) + + if (invalidAdditionalFields.nonEmpty) { + Future.exception( + ClientError( + ClientErrorCause.BadRequest, + "Requested additional fields contain invalid field id " + + s"${invalidAdditionalFields.mkString(", ")}. Additional fields ids must be greater than 100." + ) + ) + } else { + val opts = toTweetQueryOptions(requestOptions) + val measureRacyReads: TweetId => Unit = trackLossyReadsAfterWrite( + stats.stat("racy_reads", "get_tweets"), + Duration.fromSeconds(3) + ) + + Stitch.run( + Stitch.traverse(request.tweetIds) { id => + tweetRepo(id, opts).liftToTry + .flatMap { + case Throw(NotFound) => + measureRacyReads(id) + + Stitch.value(GetTweetResult(id, StatusState.NotFound)) + case Throw(ex) => + failureResult(deletedTweetVisibilityRepo, id, requestOptions, ex) + case Return(r) => + toGetTweetResult( + deletedTweetVisibilityRepo, + creativesContainerRepo, + requestOptions, + tweetResult = r, + includeSourceTweet = requestOptions.includeSourceTweet, + includeQuotedTweet = requestOptions.includeQuotedTweet, + stats, + shouldMaterializeContainers + ) + }.flatMap { getTweetResult => + // check if tweet data is backed by creatives container and needs to be hydrated from creatives + // container service. + hydrateCreativeContainerBackedTweet( + getTweetResult, + requestOptions, + creativesContainerRepo, + stats, + shouldMaterializeContainers + ) + } + } + ) + } + } + } + + def toTweetQueryOptions(options: GetTweetOptions): TweetQuery.Options = { + val shouldSkipCache = TestingSignalsContext().flatMap(_.simulateBackPressure).nonEmpty + val cacheControl = + if (shouldSkipCache) CacheControl.NoCache + else if (options.doNotCache) CacheControl.ReadOnlyCache + else CacheControl.ReadWriteCache + + val countsFields = toCountsFields(options) + val mediaFields = toMediaFields(options) + + TweetQuery.Options( + include = BaseInclude.also( + tweetFields = toTweetFields(options, countsFields), + countsFields = countsFields, + mediaFields = mediaFields, + quotedTweet = Some(options.includeQuotedTweet) + ), + cacheControl = cacheControl, + cardsPlatformKey = options.cardsPlatformKey, + excludeReported = options.excludeReported, + enforceVisibilityFiltering = !options.bypassVisibilityFiltering, + safetyLevel = options.safetyLevel.getOrElse(SafetyLevel.FilterDefault), + forUserId = options.forUserId, + languageTag = options.languageTag, + extensionsArgs = options.extensionsArgs, + forExternalConsumption = true, + simpleQuotedTweet = options.simpleQuotedTweet + ) + } + + private def toTweetFields(opts: GetTweetOptions, countsFields: Set[FieldId]): Set[FieldId] = { + val bldr = Set.newBuilder[FieldId] + + bldr ++= opts.additionalFieldIds + + if (opts.includePlaces) bldr += Tweet.PlaceField.id + if (opts.forUserId.nonEmpty) { + if (opts.includePerspectivals) bldr += Tweet.PerspectiveField.id + if (opts.includeConversationMuted) bldr += Tweet.ConversationMutedField.id + } + if (opts.includeCards && opts.cardsPlatformKey.isEmpty) bldr += Tweet.CardsField.id + if (opts.includeCards && opts.cardsPlatformKey.nonEmpty) bldr += Tweet.Card2Field.id + if (opts.includeProfileGeoEnrichment) bldr += Tweet.ProfileGeoEnrichmentField.id + + if (countsFields.nonEmpty) bldr += Tweet.CountsField.id + + if (opts.includeCardUri) bldr += Tweet.CardReferenceField.id + + bldr.result() + } + + private def toCountsFields(opts: GetTweetOptions): Set[FieldId] = { + val bldr = Set.newBuilder[FieldId] + + if (opts.includeRetweetCount) bldr += StatusCounts.RetweetCountField.id + if (opts.includeReplyCount) bldr += StatusCounts.ReplyCountField.id + if (opts.includeFavoriteCount) bldr += StatusCounts.FavoriteCountField.id + if (opts.includeQuoteCount) bldr += StatusCounts.QuoteCountField.id + + bldr.result() + } + + private def toMediaFields(opts: GetTweetOptions): Set[FieldId] = { + if (opts.includeMediaAdditionalMetadata) + Set(MediaEntity.AdditionalMetadataField.id) + else + Set.empty + } + + /** + * Converts a `TweetResult` into a `GetTweetResult`. + */ + def toGetTweetResult( + deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type, + creativesContainerRepo: CreativesContainerMaterializationRepository.GetTweetType, + options: GetTweetOptions, + tweetResult: TweetResult, + includeSourceTweet: Boolean, + includeQuotedTweet: Boolean, + stats: StatsReceiver, + shouldMaterializeContainers: Gate[Unit] + ): Stitch[GetTweetResult] = { + val tweetData = tweetResult.value + + // only include missing fields if non empty + def asMissingFields(set: Set[FieldByPath]): Option[Set[FieldByPath]] = + if (set.isEmpty) None else Some(set) + + val missingFields = asMissingFields(tweetResult.state.failedFields) + + val sourceTweetResult = + tweetData.sourceTweetResult + .filter(_ => includeSourceTweet) + + val sourceTweetData = tweetData.sourceTweetResult + .getOrElse(tweetResult) + .value + val quotedTweetResult: Option[QuotedTweetResult] = sourceTweetData.quotedTweetResult + .filter(_ => includeQuotedTweet) + + val qtFilteredReasonStitch = + ((sourceTweetData.tweet.quotedTweet, quotedTweetResult) match { + case (Some(quotedTweet), Some(QuotedTweetResult.Filtered(filteredState))) => + deletedTweetVisibilityRepo( + DeletedTweetVisibilityRepository.VisibilityRequest( + filteredState, + quotedTweet.tweetId, + options.safetyLevel, + options.forUserId, + isInnerQuotedTweet = true + ) + ) + case _ => Stitch.None + }) + //Use quotedTweetResult filtered reason when VF filtered reason is not present + .map(fsOpt => fsOpt.orElse(quotedTweetResult.flatMap(_.filteredReason))) + + val suppress = tweetData.suppress.orElse(tweetData.sourceTweetResult.flatMap(_.value.suppress)) + + val quotedTweetStitch: Stitch[Option[Tweet]] = + quotedTweetResult match { + // check if quote tweet is backed by creatives container and needs to be hydrated from creatives + // container service. detail see go/creatives-containers-tdd + case Some(QuotedTweetResult.Found(tweetResult)) => + hydrateCreativeContainerBackedTweet( + originalGetTweetResult = GetTweetResult( + tweetId = tweetResult.value.tweet.id, + tweetState = StatusState.Found, + tweet = Some(tweetResult.value.tweet) + ), + getTweetRequestOptions = options, + creativesContainerRepo = creativesContainerRepo, + stats = stats, + shouldMaterializeContainers + ).map(_.tweet) + case _ => + Stitch.value( + quotedTweetResult + .flatMap(_.toOption) + .map(_.value.tweet) + ) + } + + Stitch.join(qtFilteredReasonStitch, quotedTweetStitch).map { + case (qtFilteredReason, quotedTweet) => + GetTweetResult( + tweetId = tweetData.tweet.id, + tweetState = + if (suppress.nonEmpty) StatusState.Suppress + else if (missingFields.nonEmpty) StatusState.Partial + else StatusState.Found, + tweet = Some(tweetData.tweet), + missingFields = missingFields, + filteredReason = suppress.map(_.filteredReason), + sourceTweet = sourceTweetResult.map(_.value.tweet), + sourceTweetMissingFields = sourceTweetResult + .map(_.state.failedFields) + .flatMap(asMissingFields), + quotedTweet = quotedTweet, + quotedTweetMissingFields = quotedTweetResult + .flatMap(_.toOption) + .map(_.state.failedFields) + .flatMap(asMissingFields), + quotedTweetFilteredReason = qtFilteredReason + ) + } + } + + private[this] val AuthorAccountIsInactive = FilteredReason.AuthorAccountIsInactive(true) + + def failureResult( + deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type, + tweetId: TweetId, + options: GetTweetOptions, + ex: Throwable + ): Stitch[GetTweetResult] = { + def deletedState(deleted: Boolean, statusState: StatusState) = + if (deleted && options.enableDeletedState) { + statusState + } else { + StatusState.NotFound + } + + ex match { + case FilteredState.Unavailable.Author.Deactivated => + Stitch.value(GetTweetResult(tweetId, StatusState.DeactivatedUser)) + case FilteredState.Unavailable.Author.NotFound => + Stitch.value(GetTweetResult(tweetId, StatusState.NotFound)) + case FilteredState.Unavailable.Author.Offboarded => + Stitch.value( + GetTweetResult(tweetId, StatusState.Drop, filteredReason = Some(AuthorAccountIsInactive))) + case FilteredState.Unavailable.Author.Suspended => + Stitch.value(GetTweetResult(tweetId, StatusState.SuspendedUser)) + case FilteredState.Unavailable.Author.Protected => + Stitch.value(GetTweetResult(tweetId, StatusState.ProtectedUser)) + case FilteredState.Unavailable.Author.Unsafe => + Stitch.value(GetTweetResult(tweetId, StatusState.Drop)) + //Handle delete state with optional FilteredReason + case FilteredState.Unavailable.TweetDeleted => + deletedTweetVisibilityRepo( + DeletedTweetVisibilityRepository.VisibilityRequest( + ex, + tweetId, + options.safetyLevel, + options.forUserId, + isInnerQuotedTweet = false + ) + ).map(filteredReasonOpt => { + val deleteState = deletedState(deleted = true, StatusState.Deleted) + GetTweetResult(tweetId, deleteState, filteredReason = filteredReasonOpt) + }) + + case FilteredState.Unavailable.BounceDeleted => + deletedTweetVisibilityRepo( + DeletedTweetVisibilityRepository.VisibilityRequest( + ex, + tweetId, + options.safetyLevel, + options.forUserId, + isInnerQuotedTweet = false + ) + ).map(filteredReasonOpt => { + val deleteState = deletedState(deleted = true, StatusState.BounceDeleted) + GetTweetResult(tweetId, deleteState, filteredReason = filteredReasonOpt) + }) + + case FilteredState.Unavailable.SourceTweetNotFound(d) => + deletedTweetVisibilityRepo( + DeletedTweetVisibilityRepository.VisibilityRequest( + ex, + tweetId, + options.safetyLevel, + options.forUserId, + isInnerQuotedTweet = false + ) + ).map(filteredReasonOpt => { + val deleteState = deletedState(d, StatusState.Deleted) + GetTweetResult(tweetId, deleteState, filteredReason = filteredReasonOpt) + }) + case FilteredState.Unavailable.Reported => + Stitch.value(GetTweetResult(tweetId, StatusState.ReportedTweet)) + case fs: FilteredState.HasFilteredReason => + Stitch.value( + GetTweetResult(tweetId, StatusState.Drop, filteredReason = Some(fs.filteredReason))) + case OverCapacity(_) => Stitch.value(GetTweetResult(tweetId, StatusState.OverCapacity)) + case _ => Stitch.value(GetTweetResult(tweetId, StatusState.Failed)) + } + } + + private def hydrateCreativeContainerBackedTweet( + originalGetTweetResult: GetTweetResult, + getTweetRequestOptions: GetTweetOptions, + creativesContainerRepo: CreativesContainerMaterializationRepository.GetTweetType, + stats: StatsReceiver, + shouldMaterializeContainers: Gate[Unit] + ): Stitch[GetTweetResult] = { + // creatives container backed tweet stats + val ccTweetMaterialized = stats.scope("creatives_container", "get_tweets") + val ccTweetMaterializeFiltered = ccTweetMaterialized.scope("filtered") + val ccTweetMaterializeSuccess = ccTweetMaterialized.counter("success") + val ccTweetMaterializeFailed = ccTweetMaterialized.counter("failed") + val ccTweetMaterializeRequests = ccTweetMaterialized.counter("requests") + + val tweetId = originalGetTweetResult.tweetId + val tweetState = originalGetTweetResult.tweetState + val underlyingCreativesContainerId = + originalGetTweetResult.tweet.flatMap(_.underlyingCreativesContainerId) + ( + tweetState, + underlyingCreativesContainerId, + getTweetRequestOptions.disableTweetMaterialization, + shouldMaterializeContainers() + ) match { + // 1. creatives container backed tweet is determined by `underlyingCreativesContainerId` field presence. + // 2. if the frontend tweet is suppressed by any reason, respect that and not do this hydration. + // (this logic can be revisited and improved further) + case (_, None, _, _) => + Stitch.value(originalGetTweetResult) + case (_, Some(_), _, false) => + ccTweetMaterializeFiltered.counter("decider_suppressed").incr() + Stitch.value(GetTweetResult(tweetId, StatusState.NotFound)) + case (StatusState.Found, Some(containerId), false, _) => + ccTweetMaterializeRequests.incr() + val materializationRequest = + MaterializeAsTweetRequest(containerId, tweetId, Some(originalGetTweetResult)) + creativesContainerRepo( + materializationRequest, + Some(getTweetRequestOptions) + ).onSuccess(_ => ccTweetMaterializeSuccess.incr()) + .onFailure(_ => ccTweetMaterializeFailed.incr()) + .handle { + case _ => GetTweetResult(tweetId, StatusState.Failed) + } + case (_, Some(_), true, _) => + ccTweetMaterializeFiltered.counter("suppressed").incr() + Stitch.value(GetTweetResult(tweetId, StatusState.NotFound)) + case (state, Some(_), _, _) => + ccTweetMaterializeFiltered.counter(state.name).incr() + Stitch.value(originalGetTweetResult) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/HandlerError.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/HandlerError.scala new file mode 100644 index 000000000..6ec0fc611 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/HandlerError.scala @@ -0,0 +1,45 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.servo.exception.thriftscala.ClientError +import com.twitter.servo.exception.thriftscala.ClientErrorCause +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.FilteredState.Unavailable._ + +private[tweetypie] object HandlerError { + + def translateNotFoundToClientError[U](tweetId: TweetId): PartialFunction[Throwable, Stitch[U]] = { + case NotFound => + Stitch.exception(HandlerError.tweetNotFound(tweetId)) + case TweetDeleted | BounceDeleted => + Stitch.exception(HandlerError.tweetNotFound(tweetId, true)) + case SourceTweetNotFound(deleted) => + Stitch.exception(HandlerError.tweetNotFound(tweetId, deleted)) + } + + def tweetNotFound(tweetId: TweetId, deleted: Boolean = false): ClientError = + ClientError( + ClientErrorCause.BadRequest, + s"tweet ${if (deleted) "deleted" else "not found"}: $tweetId" + ) + + def userNotFound(userId: UserId): ClientError = + ClientError(ClientErrorCause.BadRequest, s"user not found: $userId") + + def tweetNotFoundException(tweetId: TweetId): Future[Nothing] = + Future.exception(tweetNotFound(tweetId)) + + def userNotFoundException(userId: UserId): Future[Nothing] = + Future.exception(userNotFound(userId)) + + def getRequired[A, B]( + optionFutureArrow: FutureArrow[A, Option[B]], + notFound: A => Future[B] + ): FutureArrow[A, B] = + FutureArrow(key => + optionFutureArrow(key).flatMap { + case Some(x) => Future.value(x) + case None => notFound(key) + }) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/MediaBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/MediaBuilder.scala new file mode 100644 index 000000000..560c51304 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/MediaBuilder.scala @@ -0,0 +1,176 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.mediaservices.commons.mediainformation.thriftscala.UserDefinedProductMetadata +import com.twitter.mediaservices.commons.thriftscala.MediaKey +import com.twitter.mediaservices.commons.tweetmedia.thriftscala._ +import com.twitter.servo.util.FutureArrow +import com.twitter.tco_util.TcoSlug +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.media._ +import com.twitter.tweetypie.serverutil.ExceptionCounter +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.tweettext.Offset + +object CreateMediaTco { + import UpstreamFailure._ + + case class Request( + tweetId: TweetId, + userId: UserId, + userScreenName: String, + isProtected: Boolean, + createdAt: Time, + isVideo: Boolean, + dark: Boolean) + + type Type = FutureArrow[Request, Media.MediaTco] + + def apply(urlShortener: UrlShortener.Type): Type = + FutureArrow[Request, Media.MediaTco] { req => + val expandedUrl = MediaUrl.Permalink(req.userScreenName, req.tweetId, req.isVideo) + val shortenCtx = + UrlShortener.Context( + userId = req.userId, + userProtected = req.isProtected, + tweetId = req.tweetId, + createdAt = req.createdAt, + dark = req.dark + ) + + urlShortener((expandedUrl, shortenCtx)) + .flatMap { metadata => + metadata.shortUrl match { + case TcoSlug(slug) => + Future.value( + Media.MediaTco( + expandedUrl, + metadata.shortUrl, + MediaUrl.Display.fromTcoSlug(slug) + ) + ) + + case _ => + // should never get here, since shortened urls from talon + // always start with "http://t.co/", just in case... + Future.exception(MediaShortenUrlMalformedFailure) + } + } + .rescue { + case UrlShortener.InvalidUrlError => + // should never get here, since media expandedUrl should always be a valid + // input to talon. + Future.exception(MediaExpandedUrlNotValidFailure) + } + } +} + +object MediaBuilder { + private val log = Logger(getClass) + + case class Request( + mediaUploadIds: Seq[MediaId], + text: String, + tweetId: TweetId, + userId: UserId, + userScreenName: String, + isProtected: Boolean, + createdAt: Time, + dark: Boolean = false, + productMetadata: Option[Map[MediaId, UserDefinedProductMetadata]] = None) + + case class Result(updatedText: String, mediaEntities: Seq[MediaEntity], mediaKeys: Seq[MediaKey]) + + type Type = FutureArrow[Request, Result] + + def apply( + processMedia: MediaClient.ProcessMedia, + createMediaTco: CreateMediaTco.Type, + stats: StatsReceiver + ): Type = + FutureArrow[Request, Result] { + case Request( + mediaUploadIds, + text, + tweetId, + userId, + screenName, + isProtected, + createdAt, + dark, + productMetadata + ) => + for { + mediaKeys <- processMedia( + ProcessMediaRequest( + mediaUploadIds, + userId, + tweetId, + isProtected, + productMetadata + ) + ) + mediaTco <- createMediaTco( + CreateMediaTco.Request( + tweetId, + userId, + screenName, + isProtected, + createdAt, + mediaKeys.exists(MediaKeyClassifier.isVideo(_)), + dark + ) + ) + } yield produceResult(text, mediaTco, isProtected, mediaKeys) + }.countExceptions( + ExceptionCounter(stats) + ) + .onFailure[Request] { (req, ex) => log.info(req.toString, ex) } + .translateExceptions { + case e: MediaExceptions.MediaClientException => + TweetCreateFailure.State(TweetCreateState.InvalidMedia, Some(e.getMessage)) + } + + def produceResult( + text: String, + mediaTco: Media.MediaTco, + userIsProtected: Boolean, + mediaKeys: Seq[MediaKey] + ): Result = { + + val newText = + if (text == "") mediaTco.url + else text + " " + mediaTco.url + + val to = Offset.CodePoint.length(newText) + val from = to - Offset.CodePoint.length(mediaTco.url) + + val mediaEntities = + mediaKeys.map { mediaKey => + MediaEntity( + mediaKey = Some(mediaKey), + fromIndex = from.toShort, + toIndex = to.toShort, + url = mediaTco.url, + displayUrl = mediaTco.displayUrl, + expandedUrl = mediaTco.expandedUrl, + mediaId = mediaKey.mediaId, + mediaPath = "", // to be hydrated + mediaUrl = null, // to be hydrated + mediaUrlHttps = null, // to be hydrated + nsfw = false, // deprecated + sizes = Set( + MediaSize( + sizeType = MediaSizeType.Orig, + resizeMethod = MediaResizeMethod.Fit, + deprecatedContentType = MediaKeyUtil.contentType(mediaKey), + width = -1, // to be hydrated + height = -1 // to be hydrated + ) + ) + ) + } + + Result(newText, mediaEntities, mediaKeys) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/PostTweet.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/PostTweet.scala new file mode 100644 index 000000000..2ee6d1063 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/PostTweet.scala @@ -0,0 +1,395 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.context.thriftscala.FeatureContext +import com.twitter.tweetypie.backends.LimiterService +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.serverutil.ExceptionCounter +import com.twitter.tweetypie.store.InsertTweet +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.util.TweetCreationLock.{Key => TweetCreationLockKey} + +object PostTweet { + type Type[R] = FutureArrow[R, PostTweetResult] + + /** + * A type-class to abstract over tweet creation requests. + */ + trait RequestView[R] { + def isDark(req: R): Boolean + def sourceTweetId(req: R): Option[TweetId] + def options(req: R): Option[WritePathHydrationOptions] + def userId(req: R): UserId + def uniquenessId(req: R): Option[Long] + def returnSuccessOnDuplicate(req: R): Boolean + def returnDuplicateTweet(req: R): Boolean = + returnSuccessOnDuplicate(req) || uniquenessId(req).nonEmpty + def lockKey(req: R): TweetCreationLockKey + def geo(req: R): Option[TweetCreateGeo] + def featureContext(req: R): Option[FeatureContext] + def additionalContext(req: R): Option[collection.Map[TweetCreateContextKey, String]] + def transientContext(req: R): Option[TransientCreateContext] + def additionalFields(req: R): Option[Tweet] + def duplicateState: TweetCreateState + def scope: String + def isNullcast(req: R): Boolean + def creativesContainerId(req: R): Option[CreativesContainerId] + def noteTweetMentionedUserIds(req: R): Option[Seq[Long]] + } + + /** + * An implementation of `RequestView` for `PostTweetRequest`. + */ + implicit object PostTweetRequestView extends RequestView[PostTweetRequest] { + def isDark(req: PostTweetRequest): Boolean = req.dark + def sourceTweetId(req: PostTweetRequest): None.type = None + def options(req: PostTweetRequest): Option[WritePathHydrationOptions] = req.hydrationOptions + def userId(req: PostTweetRequest): UserId = req.userId + def uniquenessId(req: PostTweetRequest): Option[Long] = req.uniquenessId + def returnSuccessOnDuplicate(req: PostTweetRequest) = false + def lockKey(req: PostTweetRequest): TweetCreationLockKey = TweetCreationLockKey.byRequest(req) + def geo(req: PostTweetRequest): Option[TweetCreateGeo] = req.geo + def featureContext(req: PostTweetRequest): Option[FeatureContext] = req.featureContext + def additionalContext( + req: PostTweetRequest + ): Option[collection.Map[TweetCreateContextKey, String]] = req.additionalContext + def transientContext(req: PostTweetRequest): Option[TransientCreateContext] = + req.transientContext + def additionalFields(req: PostTweetRequest): Option[Tweet] = req.additionalFields + def duplicateState: TweetCreateState.Duplicate.type = TweetCreateState.Duplicate + def scope = "tweet" + def isNullcast(req: PostTweetRequest): Boolean = req.nullcast + def creativesContainerId(req: PostTweetRequest): Option[CreativesContainerId] = + req.underlyingCreativesContainerId + def noteTweetMentionedUserIds(req: PostTweetRequest): Option[Seq[Long]] = + req.noteTweetOptions match { + case Some(noteTweetOptions) => noteTweetOptions.mentionedUserIds + case _ => None + } + } + + /** + * An implementation of `RequestView` for `RetweetRequest`. + */ + implicit object RetweetRequestView extends RequestView[RetweetRequest] { + def isDark(req: RetweetRequest): Boolean = req.dark + def sourceTweetId(req: RetweetRequest): None.type = None + def options(req: RetweetRequest): Option[WritePathHydrationOptions] = req.hydrationOptions + def userId(req: RetweetRequest): UserId = req.userId + def uniquenessId(req: RetweetRequest): Option[Long] = req.uniquenessId + def returnSuccessOnDuplicate(req: RetweetRequest): Boolean = req.returnSuccessOnDuplicate + def lockKey(req: RetweetRequest): TweetCreationLockKey = + req.uniquenessId match { + case Some(id) => TweetCreationLockKey.byUniquenessId(req.userId, id) + case None => TweetCreationLockKey.bySourceTweetId(req.userId, req.sourceStatusId) + } + def geo(req: RetweetRequest): None.type = None + def featureContext(req: RetweetRequest): Option[FeatureContext] = req.featureContext + def additionalContext(req: RetweetRequest): None.type = None + def transientContext(req: RetweetRequest): None.type = None + def additionalFields(req: RetweetRequest): Option[Tweet] = req.additionalFields + def duplicateState: TweetCreateState.AlreadyRetweeted.type = TweetCreateState.AlreadyRetweeted + def scope = "retweet" + def isNullcast(req: RetweetRequest): Boolean = req.nullcast + def creativesContainerId(req: RetweetRequest): Option[CreativesContainerId] = None + def noteTweetMentionedUserIds(req: RetweetRequest): Option[Seq[Long]] = None + } + + /** + * A `Filter` is used to decorate a `FutureArrow` that has a known return type + * and an input type for which there is a `RequestView` type-class instance. + */ + trait Filter[Res] { self => + type T[Req] = FutureArrow[Req, Res] + + /** + * Wraps a base arrow with additional behavior. + */ + def apply[Req: RequestView](base: T[Req]): T[Req] + + /** + * Composes two filter. The resulting filter itself composes FutureArrows. + */ + def andThen(next: Filter[Res]): Filter[Res] = + new Filter[Res] { + def apply[Req: RequestView](base: T[Req]): T[Req] = + next(self(base)) + } + } + + /** + * This filter attempts to prevent some race-condition related duplicate tweet creations, + * via use of a `TweetCreateLock`. When a duplicate is detected, this filter can synthesize + * a successful `PostTweetResult` if applicable, or return the appropriate coded response. + */ + object DuplicateHandler { + def apply( + tweetCreationLock: TweetCreationLock, + getTweets: GetTweetsHandler.Type, + stats: StatsReceiver + ): Filter[PostTweetResult] = + new Filter[PostTweetResult] { + def apply[R: RequestView](base: T[R]): T[R] = { + val view = implicitly[RequestView[R]] + val notFoundCount = stats.counter(view.scope, "not_found") + val foundCounter = stats.counter(view.scope, "found") + + FutureArrow.rec[R, PostTweetResult] { self => req => + val duplicateKey = view.lockKey(req) + + // attempts to find the duplicate tweet. + // + // if `returnDupTweet` is true and we find the tweet, then we return a + // successful `PostTweetResult` with that tweet. if we don't find the + // tweet, we throw an `InternalServerError`. + // + // if `returnDupTweet` is false and we find the tweet, then we return + // the appropriate duplicate state. if we don't find the tweet, then + // we unlock the duplicate key and try again. + def duplicate(tweetId: TweetId, returnDupTweet: Boolean) = + findDuplicate(tweetId, req).flatMap { + case Some(postTweetResult) => + foundCounter.incr() + if (returnDupTweet) Future.value(postTweetResult) + else Future.value(PostTweetResult(state = view.duplicateState)) + + case None => + notFoundCount.incr() + if (returnDupTweet) { + // If we failed to load the tweet, but we know that it + // should exist, then return an InternalServerError, so that + // the client treats it as a failed tweet creation req. + Future.exception( + InternalServerError("Failed to load duplicate existing tweet: " + tweetId) + ) + } else { + // Assume the lock is stale if we can't load the tweet. It's + // possible that the lock is not stale, but the tweet is not + // yet available, which requires that it not be present in + // cache and not yet available from the backend. This means + // that the failure mode is to allow tweeting if we can't + // determine the state, but it should be rare that we can't + // determine it. + tweetCreationLock.unlock(duplicateKey).before(self(req)) + } + } + + tweetCreationLock(duplicateKey, view.isDark(req), view.isNullcast(req)) { + base(req) + }.rescue { + case TweetCreationInProgress => + Future.value(PostTweetResult(state = TweetCreateState.Duplicate)) + + // if tweetCreationLock detected a duplicate, look up the duplicate + // and return the appropriate result + case DuplicateTweetCreation(tweetId) => + duplicate(tweetId, view.returnDuplicateTweet(req)) + + // it's possible that tweetCreationLock didn't find a duplicate for a + // retweet attempt, but `RetweetBuilder` did. + case TweetCreateFailure.AlreadyRetweeted(tweetId) if view.returnDuplicateTweet(req) => + duplicate(tweetId, true) + } + } + } + + private def findDuplicate[R: RequestView]( + tweetId: TweetId, + req: R + ): Future[Option[PostTweetResult]] = { + val view = implicitly[RequestView[R]] + val readRequest = + GetTweetsRequest( + tweetIds = Seq(tweetId), + // Assume that the defaults are OK for all of the hydration + // options except the ones that are explicitly set in the + // req. + options = Some( + GetTweetOptions( + forUserId = Some(view.userId(req)), + includePerspectivals = true, + includeCards = view.options(req).exists(_.includeCards), + cardsPlatformKey = view.options(req).flatMap(_.cardsPlatformKey) + ) + ) + ) + + getTweets(readRequest).map { + case Seq(result) => + if (result.tweetState == StatusState.Found) { + // If the tweet was successfully found, then convert the + // read result into a successful write result. + Some( + PostTweetResult( + TweetCreateState.Ok, + result.tweet, + // if the retweet is really old, the retweet perspective might no longer + // be available, but we want to maintain the invariant that the `postRetweet` + // endpoint always returns a source tweet with the correct perspective. + result.sourceTweet.map { srcTweet => + TweetLenses.perspective + .update(_.map(_.copy(retweeted = true, retweetId = Some(tweetId)))) + .apply(srcTweet) + }, + result.quotedTweet + ) + ) + } else { + None + } + } + } + } + } + + /** + * A `Filter` that applies rate limiting to failing requests. + */ + object RateLimitFailures { + def apply( + validateLimit: RateLimitChecker.Validate, + incrementSuccess: LimiterService.IncrementByOne, + incrementFailure: LimiterService.IncrementByOne + ): Filter[TweetBuilderResult] = + new Filter[TweetBuilderResult] { + def apply[R: RequestView](base: T[R]): T[R] = { + val view = implicitly[RequestView[R]] + + FutureArrow[R, TweetBuilderResult] { req => + val userId = view.userId(req) + val dark = view.isDark(req) + val contributorUserId: Option[UserId] = getContributor(userId).map(_.userId) + + validateLimit((userId, dark)) + .before { + base(req).onFailure { _ => + // We don't increment the failure rate limit if the failure + // was from the failure rate limit so that the user can't + // get in a loop where tweet creation is never attempted. We + // don't increment it if the creation is dark because there + // is no way to perform a dark tweet creation through the + // API, so it's most likey some kind of test traffic like + // tap-compare. + if (!dark) incrementFailure(userId, contributorUserId) + } + } + .onSuccess { resp => + // If we return a silent failure, then we want to + // increment the rate limit as if the tweet was fully + // created, because we want it to appear that way to the + // user whose creation silently failed. + if (resp.isSilentFail) incrementSuccess(userId, contributorUserId) + } + } + } + } + } + + /** + * A `Filter` for counting non-`TweetCreateFailure` failures. + */ + object CountFailures { + def apply[Res](stats: StatsReceiver, scopeSuffix: String = "_builder"): Filter[Res] = + new Filter[Res] { + def apply[R: RequestView](base: T[R]): T[R] = { + val view = implicitly[RequestView[R]] + val exceptionCounter = ExceptionCounter(stats.scope(view.scope + scopeSuffix)) + base.onFailure { + case (_, _: TweetCreateFailure) => + case (_, ex) => exceptionCounter(ex) + } + } + } + } + + /** + * A `Filter` for logging failures. + */ + object LogFailures extends Filter[PostTweetResult] { + private[this] val failedTweetCreationsLogger = Logger( + "com.twitter.tweetypie.FailedTweetCreations" + ) + + def apply[R: RequestView](base: T[R]): T[R] = + FutureArrow[R, PostTweetResult] { req => + base(req).onFailure { + case failure => failedTweetCreationsLogger.info(s"request: $req\nfailure: $failure") + } + } + } + + /** + * A `Filter` for converting a thrown `TweetCreateFailure` into a `PostTweetResult`. + */ + object RescueTweetCreateFailure extends Filter[PostTweetResult] { + def apply[R: RequestView](base: T[R]): T[R] = + FutureArrow[R, PostTweetResult] { req => + base(req).rescue { + case failure: TweetCreateFailure => Future.value(failure.toPostTweetResult) + } + } + } + + /** + * Builds a base handler for `PostTweetRequest` and `RetweetRequest`. The handler + * calls an underlying tweet builder, creates a `InsertTweet.Event`, hydrates + * that, passes it to `tweetStore`, and then converts it to a `PostTweetResult`. + */ + object Handler { + def apply[R: RequestView]( + tweetBuilder: FutureArrow[R, TweetBuilderResult], + hydrateInsertEvent: FutureArrow[InsertTweet.Event, InsertTweet.Event], + tweetStore: InsertTweet.Store, + ): Type[R] = { + FutureArrow { req => + for { + bldrRes <- tweetBuilder(req) + event <- hydrateInsertEvent(toInsertTweetEvent(req, bldrRes)) + _ <- Future.when(!event.dark)(tweetStore.insertTweet(event)) + } yield toPostTweetResult(event) + } + } + + /** + * Converts a request/`TweetBuilderResult` pair into an `InsertTweet.Event`. + */ + def toInsertTweetEvent[R: RequestView]( + req: R, + bldrRes: TweetBuilderResult + ): InsertTweet.Event = { + val view = implicitly[RequestView[R]] + InsertTweet.Event( + tweet = bldrRes.tweet, + user = bldrRes.user, + sourceTweet = bldrRes.sourceTweet, + sourceUser = bldrRes.sourceUser, + parentUserId = bldrRes.parentUserId, + timestamp = bldrRes.createdAt, + dark = view.isDark(req) || bldrRes.isSilentFail, + hydrateOptions = view.options(req).getOrElse(WritePathHydrationOptions()), + featureContext = view.featureContext(req), + initialTweetUpdateRequest = bldrRes.initialTweetUpdateRequest, + geoSearchRequestId = for { + geo <- view.geo(req) + searchRequestID <- geo.geoSearchRequestId + } yield { + GeoSearchRequestId(requestID = searchRequestID.id) + }, + additionalContext = view.additionalContext(req), + transientContext = view.transientContext(req), + noteTweetMentionedUserIds = view.noteTweetMentionedUserIds(req) + ) + } + + /** + * Converts an `InsertTweet.Event` into a successful `PostTweetResult`. + */ + def toPostTweetResult(event: InsertTweet.Event): PostTweetResult = + PostTweetResult( + TweetCreateState.Ok, + Some(event.tweet), + sourceTweet = event.sourceTweet, + quotedTweet = event.quotedTweet + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/QuotedTweetDeleteEventBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/QuotedTweetDeleteEventBuilder.scala new file mode 100644 index 000000000..834cda148 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/QuotedTweetDeleteEventBuilder.scala @@ -0,0 +1,34 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.store.QuotedTweetDelete +import com.twitter.tweetypie.thriftscala.QuotedTweetDeleteRequest + +/** + * Create the appropriate QuotedTweetDelete.Event for a QuotedTweetDelete request. + */ +object QuotedTweetDeleteEventBuilder { + type Type = QuotedTweetDeleteRequest => Future[Option[QuotedTweetDelete.Event]] + + val queryOptions: TweetQuery.Options = + TweetQuery.Options(GetTweetsHandler.BaseInclude) + + def apply(tweetRepo: TweetRepository.Optional): Type = + request => + Stitch.run( + tweetRepo(request.quotingTweetId, queryOptions).map { + _.map { quotingTweet => + QuotedTweetDelete.Event( + quotingTweetId = request.quotingTweetId, + quotingUserId = getUserId(quotingTweet), + quotedTweetId = request.quotedTweetId, + quotedUserId = request.quotedUserId, + timestamp = Time.now + ) + } + } + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/QuotedTweetTakedownEventBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/QuotedTweetTakedownEventBuilder.scala new file mode 100644 index 000000000..7a44845a8 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/QuotedTweetTakedownEventBuilder.scala @@ -0,0 +1,36 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.store.QuotedTweetTakedown +import com.twitter.tweetypie.thriftscala.QuotedTweetTakedownRequest + +/** + * Create the appropriate QuotedTweetTakedown.Event for a QuotedTweetTakedown request. + */ +object QuotedTweetTakedownEventBuilder { + type Type = QuotedTweetTakedownRequest => Future[Option[QuotedTweetTakedown.Event]] + + val queryOptions: TweetQuery.Options = + TweetQuery.Options(GetTweetsHandler.BaseInclude) + + def apply(tweetRepo: TweetRepository.Optional): Type = + request => + Stitch.run( + tweetRepo(request.quotingTweetId, queryOptions).map { + _.map { quotingTweet => + QuotedTweetTakedown.Event( + quotingTweetId = request.quotingTweetId, + quotingUserId = getUserId(quotingTweet), + quotedTweetId = request.quotedTweetId, + quotedUserId = request.quotedUserId, + takedownCountryCodes = request.takedownCountryCodes, + takedownReasons = request.takedownReasons, + timestamp = Time.now + ) + } + } + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/RateLimitChecker.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/RateLimitChecker.scala new file mode 100644 index 000000000..cac90aab6 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/RateLimitChecker.scala @@ -0,0 +1,49 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.backends.LimiterService +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.thriftscala.TweetCreateState.RateLimitExceeded + +object RateLimitChecker { + type Dark = Boolean + type GetRemaining = FutureArrow[(UserId, Dark), Int] + type Validate = FutureArrow[(UserId, Dark), Unit] + + def getMaxMediaTags(minRemaining: LimiterService.MinRemaining, maxMediaTags: Int): GetRemaining = + FutureArrow { + case (userId, dark) => + if (dark) Future.value(maxMediaTags) + else { + val contributorUserId = getContributor(userId).map(_.userId) + minRemaining(userId, contributorUserId) + .map(_.min(maxMediaTags)) + .handle { case _ => maxMediaTags } + } + } + + def validate( + hasRemaining: LimiterService.HasRemaining, + featureStats: StatsReceiver, + rateLimitEnabled: () => Boolean + ): Validate = { + val exceededCounter = featureStats.counter("exceeded") + val checkedCounter = featureStats.counter("checked") + FutureArrow { + case (userId, dark) => + if (dark || !rateLimitEnabled()) { + Future.Unit + } else { + checkedCounter.incr() + val contributorUserId = getContributor(userId).map(_.userId) + hasRemaining(userId, contributorUserId).map { + case false => + exceededCounter.incr() + throw TweetCreateFailure.State(RateLimitExceeded) + case _ => () + } + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ReplyBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ReplyBuilder.scala new file mode 100644 index 000000000..2e1963074 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ReplyBuilder.scala @@ -0,0 +1,633 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.serverutil.ExceptionCounter +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.tweettext.Offset +import com.twitter.twittertext.Extractor +import scala.annotation.tailrec +import scala.collection.JavaConverters._ +import scala.collection.mutable +import scala.util.control.NoStackTrace + +object ReplyBuilder { + private val extractor = new Extractor + private val InReplyToTweetNotFound = + TweetCreateFailure.State(TweetCreateState.InReplyToTweetNotFound) + + case class Request( + authorId: UserId, + authorScreenName: String, + inReplyToTweetId: Option[TweetId], + tweetText: String, + prependImplicitMentions: Boolean, + enableTweetToNarrowcasting: Boolean, + excludeUserIds: Seq[UserId], + spamResult: Spam.Result, + batchMode: Option[BatchComposeMode]) + + /** + * This case class contains the fields that are shared between legacy and simplified replies. + */ + case class BaseResult( + reply: Reply, + conversationId: Option[ConversationId], + selfThreadMetadata: Option[SelfThreadMetadata], + community: Option[Communities] = None, + exclusiveTweetControl: Option[ExclusiveTweetControl] = None, + trustedFriendsControl: Option[TrustedFriendsControl] = None, + editControl: Option[EditControl] = None) { + // Creates a Result by providing the fields that differ between legacy and simplified replies. + def toResult( + tweetText: String, + directedAtMetadata: DirectedAtUserMetadata, + visibleStart: Offset.CodePoint = Offset.CodePoint(0), + ): Result = + Result( + reply, + tweetText, + directedAtMetadata, + conversationId, + selfThreadMetadata, + visibleStart, + community, + exclusiveTweetControl, + trustedFriendsControl, + editControl + ) + } + + /** + * @param reply the Reply object to include in the tweet. + * @param tweetText updated tweet text which may include prepended at-mentions, trimmed + * @param directedAtMetadata see DirectedAtHydrator for usage. + * @param conversationId conversation id to assign to the tweet. + * @param selfThreadMetadata returns the result of `SelfThreadBuilder` + * @param visibleStart offset into `tweetText` separating hideable at-mentions from the + * visible text. + */ + case class Result( + reply: Reply, + tweetText: String, + directedAtMetadata: DirectedAtUserMetadata, + conversationId: Option[ConversationId] = None, + selfThreadMetadata: Option[SelfThreadMetadata] = None, + visibleStart: Offset.CodePoint = Offset.CodePoint(0), + community: Option[Communities] = None, + exclusiveTweetControl: Option[ExclusiveTweetControl] = None, + trustedFriendsControl: Option[TrustedFriendsControl] = None, + editControl: Option[EditControl] = None) { + + /** + * @param finalText final tweet text after any server-side additions. + * @return true iff the final tweet text consists exclusively of a hidden reply mention prefix. + * When this happens there's no content to the reply and thus the tweet creation should + * fail. + */ + def replyTextIsEmpty(finalText: String): Boolean = { + + // Length of the tweet text originally output via ReplyBuilder.Result before server-side + // additions (e.g. media, quoted-tweet URLs) + val origTextLength = Offset.CodePoint.length(tweetText) + + // Length of the tweet text after server-side additions. + val finalTextLength = Offset.CodePoint.length(finalText) + + val prefixWasEntireText = origTextLength == visibleStart + val textLenUnchanged = origTextLength == finalTextLength + + prefixWasEntireText && textLenUnchanged + } + } + + type Type = Request => Future[Option[Result]] + + private object InvalidUserException extends NoStackTrace + + /** + * A user ID and screen name used for building replies. + */ + private case class User(id: UserId, screenName: String) + + /** + * Captures the in-reply-to tweet, its author, and if the user is attempting to reply to a + * retweet, then that retweet and its author. + */ + private case class ReplySource( + srcTweet: Tweet, + srcUser: User, + retweet: Option[Tweet] = None, + rtUser: Option[User] = None) { + private val photoTaggedUsers: Seq[User] = + srcTweet.mediaTags + .map(_.tagMap.values.flatten) + .getOrElse(Nil) + .map(toUser) + .toSeq + + private def toUser(mt: MediaTag): User = + mt match { + case MediaTag(_, Some(id), Some(screenName), _) => User(id, screenName) + case _ => throw InvalidUserException + } + + private def toUser(e: MentionEntity): User = + e match { + case MentionEntity(_, _, screenName, Some(id), _, _) => User(id, screenName) + case _ => throw InvalidUserException + } + + private def toUser(d: DirectedAtUser) = User(d.userId, d.screenName) + + def allCardUsers(authorUser: User, cardUsersFinder: CardUsersFinder.Type): Future[Set[UserId]] = + Stitch.run( + cardUsersFinder( + CardUsersFinder.Request( + cardReference = getCardReference(srcTweet), + urls = getUrls(srcTweet).map(_.url), + perspectiveUserId = authorUser.id + ) + ) + ) + + def srcTweetMentionedUsers: Seq[User] = getMentions(srcTweet).map(toUser) + + private trait ReplyType { + + val allExcludedUserIds: Set[UserId] + + def directedAt: Option[User] + def requiredTextMention: Option[User] + + def isExcluded(u: User): Boolean = allExcludedUserIds.contains(u.id) + + def buildPrefix(otherMentions: Seq[User], maxImplicits: Int): String = { + val seen = new mutable.HashSet[UserId] + seen ++= allExcludedUserIds + // Never exclude the required mention + seen --= requiredTextMention.map(_.id) + + (requiredTextMention.toSeq ++ otherMentions) + .filter(u => seen.add(u.id)) + .take(maxImplicits.max(requiredTextMention.size)) + .map(u => s"@${u.screenName}") + .mkString(" ") + } + } + + private case class SelfReply( + allExcludedUserIds: Set[UserId], + enableTweetToNarrowcasting: Boolean) + extends ReplyType { + + private def srcTweetDirectedAt: Option[User] = getDirectedAtUser(srcTweet).map(toUser) + + override def directedAt: Option[User] = + if (!enableTweetToNarrowcasting) None + else Seq.concat(rtUser, srcTweetDirectedAt).find(!isExcluded(_)) + + override def requiredTextMention: Option[User] = + // Make sure the directedAt user is in the text to avoid confusion + directedAt + } + + private case class BatchSubsequentReply(allExcludedUserIds: Set[UserId]) extends ReplyType { + + override def directedAt: Option[User] = None + + override def requiredTextMention: Option[User] = None + + override def buildPrefix(otherMentions: Seq[User], maxImplicits: Int): String = "" + } + + private case class RegularReply( + allExcludedUserIds: Set[UserId], + enableTweetToNarrowcasting: Boolean) + extends ReplyType { + + override def directedAt: Option[User] = + Some(srcUser) + .filterNot(isExcluded) + .filter(_ => enableTweetToNarrowcasting) + + override def requiredTextMention: Option[User] = + // Include the source tweet's author as a mention in the reply, even if the reply is not + // narrowcasted to that user. All non-self-reply tweets require this mention. + Some(srcUser) + } + + /** + * Computes an implicit mention prefix to add to the tweet text as well as any directed-at user. + * + * The first implicit mention is the source-tweet's author unless the reply is a self-reply, in + * which case it inherits the DirectedAtUser from the source tweet, though the current author is + * never added. This mention, if it exists, is the only mention that may be used to direct-at a + * user and is the user that ends up in DirectedAtUserMetadata. If the user replied to a + * retweet and the reply doesn't explicitly mention the retweet author, then the retweet author + * will be next, followed by source tweet mentions and source tweet photo-tagged users. + * + * Users in excludedScreenNames originate from the PostTweetRequest and are filtered out of any + * non-leading mention. + * + * Note on maxImplicits: + * This method returns at most 'maxImplicits' mentions unless 'maxImplicits' is 0 and a + * directed-at mention is required, in which case it returns 1. If this happens the reply may + * fail downstream validation checks (e.g. TweetBuilder). With 280 visible character limit it's + * theoretically possible to explicitly mention 93 users (280 / 3) but this bug shouldn't really + * be an issue because: + * 1.) Most replies don't have 50 explicit mentions + * 2.) TOO-clients have switched to batchMode=Subsequent for self-replies which disable + source tweet's directed-at user inheritance + * 3.) Requests rarely are rejected due to mention_limit_exceeded + * If this becomes a problem we could reopen the mention limit discussion, specifically if the + * backend should allow 51 while the explicit limit remains at 50. + * + * Note on batchMode: + * Implicit mention prefix will be empty string if batchMode is BatchSubsequent. This is to + * support batch composer. + */ + def implicitMentionPrefixAndDAU( + maxImplicits: Int, + excludedUsers: Seq[User], + author: User, + enableTweetToNarrowcasting: Boolean, + batchMode: Option[BatchComposeMode] + ): (String, Option[User]) = { + def allExcludedUserIds = + (excludedUsers ++ Seq(author)).map(_.id).toSet + + val replyType = + if (author.id == srcUser.id) { + if (batchMode.contains(BatchComposeMode.BatchSubsequent)) { + BatchSubsequentReply(allExcludedUserIds) + } else { + SelfReply(allExcludedUserIds, enableTweetToNarrowcasting) + } + } else { + RegularReply(allExcludedUserIds, enableTweetToNarrowcasting) + } + + val prefix = + replyType.buildPrefix( + otherMentions = List.concat(rtUser, srcTweetMentionedUsers, photoTaggedUsers), + maxImplicits = maxImplicits + ) + + (prefix, replyType.directedAt) + } + + /** + * Finds the longest possible prefix of whitespace separated @-mentions, restricted to + * @-mentions that are derived from the reply chain. + */ + def hideablePrefix( + text: String, + cardUsers: Seq[User], + explicitMentions: Seq[Extractor.Entity] + ): Offset.CodePoint = { + val allowedMentions = + (srcTweetMentionedUsers.toSet + srcUser ++ rtUser.toSet ++ photoTaggedUsers ++ cardUsers) + .map(_.screenName.toLowerCase) + val len = Offset.CodeUnit.length(text) + + // To allow NO-BREAK SPACE' (U+00A0) in the prefix need .isSpaceChar + def isWhitespace(c: Char) = c.isWhitespace || c.isSpaceChar + + @tailrec + def skipWs(offset: Offset.CodeUnit): Offset.CodeUnit = + if (offset == len || !isWhitespace(text.charAt(offset.toInt))) offset + else skipWs(offset.incr) + + @tailrec + def go(offset: Offset.CodeUnit, mentions: Stream[Extractor.Entity]): Offset.CodeUnit = + if (offset == len) offset + else { + mentions match { + // if we are at the next mention, and it is allowed, skip past and recurse + case next #:: tail if next.getStart == offset.toInt => + if (!allowedMentions.contains(next.getValue.toLowerCase)) offset + else go(skipWs(Offset.CodeUnit(next.getEnd)), tail) + // we found non-mention text + case _ => offset + } + } + + go(Offset.CodeUnit(0), explicitMentions.toStream).toCodePoint(text) + } + } + + private def replyToUser(user: User, inReplyToStatusId: Option[TweetId] = None): Reply = + Reply( + inReplyToUserId = user.id, + inReplyToScreenName = Some(user.screenName), + inReplyToStatusId = inReplyToStatusId + ) + + /** + * A builder that generates reply from `inReplyToTweetId` or tweet text + * + * There are two kinds of "reply": + * 1. reply to tweet, which is generated from `inReplyToTweetId`. + * + * A valid reply-to-tweet satisfies the following conditions: + * 1). the tweet that is in-reply-to exists (and is visible to the user creating the tweet) + * 2). the author of the in-reply-to tweet is mentioned anywhere in the tweet, or + * this is a tweet that is in reply to the author's own tweet + * + * 2. reply to user, is generated when the tweet text starts with @user_name. This is only + * attempted if PostTweetRequest.enableTweetToNarrowcasting is true (default). + */ + def apply( + userIdentityRepo: UserIdentityRepository.Type, + tweetRepo: TweetRepository.Optional, + replyCardUsersFinder: CardUsersFinder.Type, + selfThreadBuilder: SelfThreadBuilder, + relationshipRepo: RelationshipRepository.Type, + unmentionedEntitiesRepo: UnmentionedEntitiesRepository.Type, + enableRemoveUnmentionedImplicits: Gate[Unit], + stats: StatsReceiver, + maxMentions: Int + ): Type = { + val exceptionCounters = ExceptionCounter(stats) + val modeScope = stats.scope("mode") + val compatModeCounter = modeScope.counter("compat") + val simpleModeCounter = modeScope.counter("simple") + + def getUser(key: UserKey): Future[Option[User]] = + Stitch.run( + userIdentityRepo(key) + .map(ident => User(ident.id, ident.screenName)) + .liftNotFoundToOption + ) + + def getUsers(userIds: Seq[UserId]): Future[Seq[ReplyBuilder.User]] = + Stitch.run( + Stitch + .traverse(userIds)(id => userIdentityRepo(UserKey(id)).liftNotFoundToOption) + .map(_.flatten) + .map { identities => identities.map { ident => User(ident.id, ident.screenName) } } + ) + + val tweetQueryIncludes = + TweetQuery.Include( + tweetFields = Set( + Tweet.CoreDataField.id, + Tweet.CardReferenceField.id, + Tweet.CommunitiesField.id, + Tweet.MediaTagsField.id, + Tweet.MentionsField.id, + Tweet.UrlsField.id, + Tweet.EditControlField.id + ) ++ selfThreadBuilder.requiredReplySourceFields.map(_.id) + ) + + def tweetQueryOptions(forUserId: UserId) = + TweetQuery.Options( + tweetQueryIncludes, + forUserId = Some(forUserId), + enforceVisibilityFiltering = true + ) + + def getTweet(tweetId: TweetId, forUserId: UserId): Future[Option[Tweet]] = + Stitch.run(tweetRepo(tweetId, tweetQueryOptions(forUserId))) + + def checkBlockRelationship(authorId: UserId, result: Result): Future[Unit] = { + val inReplyToBlocksTweeter = + RelationshipKey.blocks( + sourceId = result.reply.inReplyToUserId, + destinationId = authorId + ) + + Stitch.run(relationshipRepo(inReplyToBlocksTweeter)).flatMap { + case true => Future.exception(InReplyToTweetNotFound) + case false => Future.Unit + } + } + + def checkIPIPolicy(request: Request, reply: Reply): Future[Unit] = { + if (request.spamResult == Spam.DisabledByIpiPolicy) { + Future.exception(Spam.DisabledByIpiFailure(reply.inReplyToScreenName)) + } else { + Future.Unit + } + } + + def getUnmentionedUsers(replySource: ReplySource): Future[Seq[UserId]] = { + if (enableRemoveUnmentionedImplicits()) { + val srcDirectedAt = replySource.srcTweet.directedAtUserMetadata.flatMap(_.userId) + val srcTweetMentions = replySource.srcTweet.mentions.getOrElse(Nil).flatMap(_.userId) + val idsToCheck = srcTweetMentions ++ srcDirectedAt + + val conversationId = replySource.srcTweet.coreData.flatMap(_.conversationId) + conversationId match { + case Some(cid) if idsToCheck.nonEmpty => + stats.counter("unmentioned_implicits_check").incr() + Stitch + .run(unmentionedEntitiesRepo(cid, idsToCheck)).liftToTry.map { + case Return(Some(unmentionedUserIds)) => + unmentionedUserIds + case _ => Seq[UserId]() + } + case _ => Future.Nil + + } + } else { + Future.Nil + } + } + + /** + * Constructs a `ReplySource` for the given `tweetId`, which captures the source tweet to be + * replied to, its author, and if `tweetId` is for a retweet of the source tweet, then also + * that retweet and its author. If the source tweet (or a retweet of it), or a corresponding + * author, can't be found or isn't visible to the replier, then `InReplyToTweetNotFound` is + * thrown. + */ + def getReplySource(tweetId: TweetId, forUserId: UserId): Future[ReplySource] = + for { + tweet <- getTweet(tweetId, forUserId).flatMap { + case None => Future.exception(InReplyToTweetNotFound) + case Some(t) => Future.value(t) + } + + user <- getUser(UserKey(getUserId(tweet))).flatMap { + case None => Future.exception(InReplyToTweetNotFound) + case Some(u) => Future.value(u) + } + + res <- getShare(tweet) match { + case None => Future.value(ReplySource(tweet, user)) + case Some(share) => + // if the user is replying to a retweet, find the retweet source tweet, + // then update with the retweet and author. + getReplySource(share.sourceStatusId, forUserId) + .map(_.copy(retweet = Some(tweet), rtUser = Some(user))) + } + } yield res + + /** + * Computes a `Result` for the reply-to-tweet case. If `inReplyToTweetId` is for a retweet, + * the reply will be computed against the source tweet. If `prependImplicitMentions` is true + * and source tweet can't be found or isn't visible to replier, then this method will return + * a `InReplyToTweetNotFound` failure. If `prependImplicitMentions` is false, then the reply + * text must either mention the source tweet user, or it must be a reply to self; if both of + * those conditions fail, then `None` is returned. + */ + def makeReplyToTweet( + inReplyToTweetId: TweetId, + text: String, + author: User, + prependImplicitMentions: Boolean, + enableTweetToNarrowcasting: Boolean, + excludeUserIds: Seq[UserId], + batchMode: Option[BatchComposeMode] + ): Future[Option[Result]] = { + val explicitMentions: Seq[Extractor.Entity] = + extractor.extractMentionedScreennamesWithIndices(text).asScala.toSeq + val mentionedScreenNames = + explicitMentions.map(_.getValue.toLowerCase).toSet + + /** + * If `prependImplicitMentions` is true, or the reply author is the same as the in-reply-to + * author, then the reply text doesn't have to mention the in-reply-to author. Otherwise, + * check that the text contains a mention of the reply author. + */ + def isValidReplyTo(inReplyToUser: User): Boolean = + prependImplicitMentions || + (inReplyToUser.id == author.id) || + mentionedScreenNames.contains(inReplyToUser.screenName.toLowerCase) + + getReplySource(inReplyToTweetId, author.id) + .flatMap { replySrc => + val baseResult = BaseResult( + reply = replyToUser(replySrc.srcUser, Some(replySrc.srcTweet.id)), + conversationId = getConversationId(replySrc.srcTweet), + selfThreadMetadata = selfThreadBuilder.build(author.id, replySrc.srcTweet), + community = replySrc.srcTweet.communities, + // Reply tweets retain the same exclusive + // tweet controls as the tweet being replied to. + exclusiveTweetControl = replySrc.srcTweet.exclusiveTweetControl, + trustedFriendsControl = replySrc.srcTweet.trustedFriendsControl, + editControl = replySrc.srcTweet.editControl + ) + + if (isValidReplyTo(replySrc.srcUser)) { + if (prependImplicitMentions) { + + // Simplified Replies mode - append server-side generated prefix to passed in text + simpleModeCounter.incr() + // remove the in-reply-to tweet author from the excluded users, in-reply-to tweet author will always be a directedAtUser + val filteredExcludedIds = + excludeUserIds.filterNot(uid => uid == TweetLenses.userId(replySrc.srcTweet)) + for { + unmentionedUserIds <- getUnmentionedUsers(replySrc) + excludedUsers <- getUsers(filteredExcludedIds ++ unmentionedUserIds) + (prefix, directedAtUser) = replySrc.implicitMentionPrefixAndDAU( + maxImplicits = math.max(0, maxMentions - explicitMentions.size), + excludedUsers = excludedUsers, + author = author, + enableTweetToNarrowcasting = enableTweetToNarrowcasting, + batchMode = batchMode + ) + } yield { + // prefix or text (or both) can be empty strings. Add " " separator and adjust + // prefix length only when both prefix and text are non-empty. + val textChunks = Seq(prefix, text).map(_.trim).filter(_.nonEmpty) + val tweetText = textChunks.mkString(" ") + val visibleStart = + if (textChunks.size == 2) { + Offset.CodePoint.length(prefix + " ") + } else { + Offset.CodePoint.length(prefix) + } + + Some( + baseResult.toResult( + tweetText = tweetText, + directedAtMetadata = DirectedAtUserMetadata(directedAtUser.map(_.id)), + visibleStart = visibleStart + ) + ) + } + } else { + // Backwards-compatibility mode - walk from beginning of text until find visibleStart + compatModeCounter.incr() + for { + cardUserIds <- replySrc.allCardUsers(author, replyCardUsersFinder) + cardUsers <- getUsers(cardUserIds.toSeq) + optUserIdentity <- extractReplyToUser(text) + directedAtUserId = optUserIdentity.map(_.id).filter(_ => enableTweetToNarrowcasting) + } yield { + Some( + baseResult.toResult( + tweetText = text, + directedAtMetadata = DirectedAtUserMetadata(directedAtUserId), + visibleStart = replySrc.hideablePrefix(text, cardUsers, explicitMentions), + ) + ) + } + } + } else { + Future.None + } + } + .handle { + // if `getReplySource` throws this exception, but we aren't computing implicit + // mentions, then we fall back to the reply-to-user case instead of reply-to-tweet + case InReplyToTweetNotFound if !prependImplicitMentions => None + } + } + + def makeReplyToUser(text: String): Future[Option[Result]] = + extractReplyToUser(text).map(_.map { user => + Result(replyToUser(user), text, DirectedAtUserMetadata(Some(user.id))) + }) + + def extractReplyToUser(text: String): Future[Option[User]] = + Option(extractor.extractReplyScreenname(text)) match { + case None => Future.None + case Some(screenName) => getUser(UserKey(screenName)) + } + + FutureArrow[Request, Option[Result]] { request => + exceptionCounters { + (request.inReplyToTweetId.filter(_ > 0) match { + case None => + Future.None + + case Some(tweetId) => + makeReplyToTweet( + tweetId, + request.tweetText, + User(request.authorId, request.authorScreenName), + request.prependImplicitMentions, + request.enableTweetToNarrowcasting, + request.excludeUserIds, + request.batchMode + ) + }).flatMap { + case Some(r) => + // Ensure that the author of this reply is not blocked by + // the user who they are replying to. + checkBlockRelationship(request.authorId, r) + .before(checkIPIPolicy(request, r.reply)) + .before(Future.value(Some(r))) + + case None if request.enableTweetToNarrowcasting => + // We don't check the block relationship when the tweet is + // not part of a conversation (which is to say, we allow + // directed-at tweets from a blocked user.) These tweets + // will not cause notifications for the blocking user, + // despite the presence of the reply struct. + makeReplyToUser(request.tweetText) + + case None => + Future.None + } + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/RetweetBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/RetweetBuilder.scala new file mode 100644 index 000000000..e14eecc84 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/RetweetBuilder.scala @@ -0,0 +1,352 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.flockdb.client._ +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.additionalfields.AdditionalFields.setAdditionalFields +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.thriftscala.entities.EntityExtractor +import com.twitter.tweetypie.tweettext.Truncator +import com.twitter.tweetypie.util.CommunityUtil +import com.twitter.tweetypie.util.EditControlUtil + +case class SourceTweetRequest( + tweetId: TweetId, + user: User, + hydrateOptions: WritePathHydrationOptions) + +object RetweetBuilder { + import TweetBuilder._ + import UpstreamFailure._ + + type Type = FutureArrow[RetweetRequest, TweetBuilderResult] + + val SGSTestRole = "socialgraph" + + val log: Logger = Logger(getClass) + + /** + * Retweets text gets RT and username prepended + */ + def composeRetweetText(text: String, sourceUser: User): String = + composeRetweetText(text, sourceUser.profile.get.screenName) + + /** + * Retweets text gets RT and username prepended + */ + def composeRetweetText(text: String, screenName: String): String = + Truncator.truncateForRetweet("RT @" + screenName + ": " + text) + + // We do not want to allow community tweets to be retweeted. + def validateNotCommunityTweet(sourceTweet: Tweet): Future[Unit] = + if (CommunityUtil.hasCommunity(sourceTweet.communities)) { + Future.exception(TweetCreateFailure.State(TweetCreateState.CommunityRetweetNotAllowed)) + } else { + Future.Unit + } + + // We do not want to allow Trusted Friends tweets to be retweeted. + def validateNotTrustedFriendsTweet(sourceTweet: Tweet): Future[Unit] = + sourceTweet.trustedFriendsControl match { + case Some(trustedFriendsControl) => + Future.exception(TweetCreateFailure.State(TweetCreateState.TrustedFriendsRetweetNotAllowed)) + case None => + Future.Unit + } + + // We do not want to allow retweet of a stale version of a tweet in an edit chain. + def validateStaleTweet(sourceTweet: Tweet): Future[Unit] = { + if (!EditControlUtil.isLatestEdit(sourceTweet.editControl, sourceTweet.id).getOrElse(true)) { + Future.exception(TweetCreateFailure.State(TweetCreateState.StaleTweetRetweetNotAllowed)) + } else { + // the source tweet does not have any edit control or the source tweet is the latest tweet + Future.Unit + } + } + + /** + * Builds the RetweetBuilder + */ + def apply( + validateRequest: RetweetRequest => Future[Unit], + tweetIdGenerator: TweetIdGenerator, + tweetRepo: TweetRepository.Type, + userRepo: UserRepository.Type, + tflock: TFlockClient, + deviceSourceRepo: DeviceSourceRepository.Type, + validateUpdateRateLimit: RateLimitChecker.Validate, + spamChecker: Spam.Checker[RetweetSpamRequest] = Spam.DoNotCheckSpam, + updateUserCounts: (User, Tweet) => Future[User], + superFollowRelationsRepo: StratoSuperFollowRelationsRepository.Type, + unretweetEdits: TweetDeletePathHandler.UnretweetEdits, + setEditWindowToSixtyMinutes: Gate[Unit] + ): RetweetBuilder.Type = { + val entityExtactor = EntityExtractor.mutationAll.endo + + val sourceTweetRepo: SourceTweetRequest => Stitch[Tweet] = + req => { + tweetRepo( + req.tweetId, + WritePathQueryOptions.retweetSourceTweet(req.user, req.hydrateOptions) + ).rescue { + case _: FilteredState => Stitch.NotFound + } + .rescue { + convertRepoExceptions(TweetCreateState.SourceTweetNotFound, TweetLookupFailure(_)) + } + } + + val getUser = userLookup(userRepo) + val getSourceUser = sourceUserLookup(userRepo) + val getDeviceSource = deviceSourceLookup(deviceSourceRepo) + + /** + * We exempt SGS test users from the check to get them through Block v2 testing. + */ + def isSGSTestRole(user: User): Boolean = + user.roles.exists { roles => roles.roles.contains(SGSTestRole) } + + def validateCanRetweet( + user: User, + sourceUser: User, + sourceTweet: Tweet, + request: RetweetRequest + ): Future[Unit] = + Future + .join( + validateNotCommunityTweet(sourceTweet), + validateNotTrustedFriendsTweet(sourceTweet), + validateSourceUserRetweetable(user, sourceUser), + validateStaleTweet(sourceTweet), + Future.when(!request.dark) { + if (request.returnSuccessOnDuplicate) + failWithRetweetIdIfAlreadyRetweeted(user, sourceTweet) + else + validateNotAlreadyRetweeted(user, sourceTweet) + } + ) + .unit + + def validateSourceUserRetweetable(user: User, sourceUser: User): Future[Unit] = + if (sourceUser.profile.isEmpty) + Future.exception(UserProfileEmptyException) + else if (sourceUser.safety.isEmpty) + Future.exception(UserSafetyEmptyException) + else if (sourceUser.view.isEmpty) + Future.exception(UserViewEmptyException) + else if (user.id != sourceUser.id && sourceUser.safety.get.isProtected) + Future.exception(TweetCreateFailure.State(TweetCreateState.CannotRetweetProtectedTweet)) + else if (sourceUser.safety.get.deactivated) + Future.exception(TweetCreateFailure.State(TweetCreateState.CannotRetweetDeactivatedUser)) + else if (sourceUser.safety.get.suspended) + Future.exception(TweetCreateFailure.State(TweetCreateState.CannotRetweetSuspendedUser)) + else if (sourceUser.view.get.blockedBy && !isSGSTestRole(user)) + Future.exception(TweetCreateFailure.State(TweetCreateState.CannotRetweetBlockingUser)) + else if (sourceUser.profile.get.screenName.isEmpty) + Future.exception( + TweetCreateFailure.State(TweetCreateState.CannotRetweetUserWithoutScreenName) + ) + else + Future.Unit + + def tflockGraphContains( + graph: StatusGraph, + fromId: Long, + toId: Long, + dir: Direction + ): Future[Boolean] = + tflock.contains(graph, fromId, toId, dir).rescue { + case ex: OverCapacity => Future.exception(ex) + case ex => Future.exception(TFlockLookupFailure(ex)) + } + + def getRetweetIdFromTflock(sourceTweetId: TweetId, userId: UserId): Future[Option[Long]] = + tflock + .selectAll( + Select( + sourceId = sourceTweetId, + graph = RetweetsGraph, + direction = Forward + ).intersect( + Select( + sourceId = userId, + graph = UserTimelineGraph, + direction = Forward + ) + ) + ) + .map(_.headOption) + + def validateNotAlreadyRetweeted(user: User, sourceTweet: Tweet): Future[Unit] = + // use the perspective object from TLS if available, otherwise, check with tflock + (sourceTweet.perspective match { + case Some(perspective) => + Future.value(perspective.retweeted) + case None => + // we have to query the RetweetSourceGraph in the Reverse order because + // it is only defined in that direction, instead of bi-directionally + tflockGraphContains(RetweetSourceGraph, user.id, sourceTweet.id, Reverse) + }).flatMap { + case true => + Future.exception(TweetCreateFailure.State(TweetCreateState.AlreadyRetweeted)) + case false => Future.Unit + } + + def failWithRetweetIdIfAlreadyRetweeted(user: User, sourceTweet: Tweet): Future[Unit] = + // use the perspective object from TLS if available, otherwise, check with tflock + (sourceTweet.perspective.flatMap(_.retweetId) match { + case Some(tweetId) => Future.value(Some(tweetId)) + case None => + getRetweetIdFromTflock(sourceTweet.id, user.id) + }).flatMap { + case None => Future.Unit + case Some(tweetId) => + Future.exception(TweetCreateFailure.AlreadyRetweeted(tweetId)) + } + + def validateContributor(contributorIdOpt: Option[UserId]): Future[Unit] = + if (contributorIdOpt.isDefined) + Future.exception(TweetCreateFailure.State(TweetCreateState.ContributorNotSupported)) + else + Future.Unit + + case class RetweetSource(sourceTweet: Tweet, parentUserId: UserId) + + /** + * Recursively follows a retweet chain to the root source tweet. Also returns user id from the + * first walked tweet as the 'parentUserId'. + * In practice, the depth of the chain should never be greater than 2 because + * share.sourceStatusId should always reference the root (unlike share.parentStatusId). + */ + def findRetweetSource( + tweetId: TweetId, + forUser: User, + hydrateOptions: WritePathHydrationOptions + ): Future[RetweetSource] = + Stitch + .run(sourceTweetRepo(SourceTweetRequest(tweetId, forUser, hydrateOptions))) + .flatMap { tweet => + getShare(tweet) match { + case None => Future.value(RetweetSource(tweet, getUserId(tweet))) + case Some(share) => + findRetweetSource(share.sourceStatusId, forUser, hydrateOptions) + .map(_.copy(parentUserId = getUserId(tweet))) + } + } + + FutureArrow { request => + for { + () <- validateRequest(request) + userFuture = Stitch.run(getUser(request.userId)) + tweetIdFuture = tweetIdGenerator() + devsrcFuture = Stitch.run(getDeviceSource(request.createdVia)) + user <- userFuture + tweetId <- tweetIdFuture + devsrc <- devsrcFuture + rtSource <- findRetweetSource( + request.sourceStatusId, + user, + request.hydrationOptions.getOrElse(WritePathHydrationOptions(simpleQuotedTweet = true)) + ) + sourceTweet = rtSource.sourceTweet + sourceUser <- Stitch.run(getSourceUser(getUserId(sourceTweet), request.userId)) + + // We want to confirm that a user is actually allowed to + // retweet an Exclusive Tweet (only available to super followers) + () <- StratoSuperFollowRelationsRepository.Validate( + sourceTweet.exclusiveTweetControl, + user.id, + superFollowRelationsRepo) + + () <- validateUser(user) + () <- validateUpdateRateLimit((user.id, request.dark)) + () <- validateContributor(request.contributorUserId) + () <- validateCanRetweet(user, sourceUser, sourceTweet, request) + () <- unretweetEdits(sourceTweet.editControl, sourceTweet.id, user.id) + + spamRequest = RetweetSpamRequest( + retweetId = tweetId, + sourceUserId = getUserId(sourceTweet), + sourceTweetId = sourceTweet.id, + sourceTweetText = getText(sourceTweet), + sourceUserName = sourceUser.profile.map(_.screenName), + safetyMetaData = request.safetyMetaData + ) + + spamResult <- spamChecker(spamRequest) + + safety = user.safety.get + + share = Share( + sourceStatusId = sourceTweet.id, + sourceUserId = sourceUser.id, + parentStatusId = request.sourceStatusId + ) + + retweetText = composeRetweetText(getText(sourceTweet), sourceUser) + createdAt = SnowflakeId(tweetId).time + + coreData = TweetCoreData( + userId = request.userId, + text = retweetText, + createdAtSecs = createdAt.inSeconds, + createdVia = devsrc.internalName, + share = Some(share), + hasTakedown = safety.hasTakedown, + trackingId = request.trackingId, + nsfwUser = safety.nsfwUser, + nsfwAdmin = safety.nsfwAdmin, + narrowcast = request.narrowcast, + nullcast = request.nullcast + ) + + retweet = Tweet( + id = tweetId, + coreData = Some(coreData), + contributor = getContributor(request.userId), + editControl = Some( + EditControl.Initial( + EditControlUtil + .makeEditControlInitial( + tweetId = tweetId, + createdAt = createdAt, + setEditWindowToSixtyMinutes = setEditWindowToSixtyMinutes + ) + .initial + .copy(isEditEligible = Some(false)) + ) + ), + ) + + retweetWithEntities = entityExtactor(retweet) + retweetWithAdditionalFields = setAdditionalFields( + retweetWithEntities, + request.additionalFields + ) + // update the perspective and counts fields of the source tweet to reflect the effects + // of the user performing a retweet, even though those effects haven't happened yet. + updatedSourceTweet = sourceTweet.copy( + perspective = sourceTweet.perspective.map { + _.copy(retweeted = true, retweetId = Some(retweet.id)) + }, + counts = sourceTweet.counts.map { c => c.copy(retweetCount = c.retweetCount.map(_ + 1)) } + ) + + user <- updateUserCounts(user, retweetWithAdditionalFields) + } yield { + TweetBuilderResult( + tweet = retweetWithAdditionalFields, + user = user, + createdAt = createdAt, + sourceTweet = Some(updatedSourceTweet), + sourceUser = Some(sourceUser), + parentUserId = Some(rtSource.parentUserId), + isSilentFail = spamResult == Spam.SilentFail + ) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ReverseGeocoder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ReverseGeocoder.scala new file mode 100644 index 000000000..8a675a8ce --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ReverseGeocoder.scala @@ -0,0 +1,78 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.geoduck.backend.hydration.thriftscala.HydrationContext +import com.twitter.geoduck.common.thriftscala.Constants +import com.twitter.geoduck.common.thriftscala.PlaceQuery +import com.twitter.geoduck.common.thriftscala.PlaceQueryFields +import com.twitter.geoduck.service.common.clientmodules.GeoduckGeohashLocate +import com.twitter.geoduck.service.thriftscala.LocationResponse +import com.twitter.geoduck.util.primitives.LatLon +import com.twitter.geoduck.util.primitives.{Geohash => GDGeohash} +import com.twitter.geoduck.util.primitives.{Place => GDPlace} +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.repository.GeoduckPlaceConverter +import com.twitter.tweetypie.{thriftscala => TP} + +object ReverseGeocoder { + val log: Logger = Logger(getClass) + + private def validatingRGC(rgc: ReverseGeocoder): ReverseGeocoder = + FutureArrow { + case (coords: TP.GeoCoordinates, language: PlaceLanguage) => + if (LatLon.isValid(coords.latitude, coords.longitude)) + rgc((coords, language)) + else + Future.None + } + + /** + * create a Geo backed ReverseGeocoder + */ + def fromGeoduck(geohashLocate: GeoduckGeohashLocate): ReverseGeocoder = + validatingRGC( + FutureArrow { + case (geo: TP.GeoCoordinates, language: PlaceLanguage) => + if (log.isDebugEnabled) { + log.debug("RGC'ing " + geo.toString() + " with geoduck") + } + + val hydrationContext = + HydrationContext( + placeFields = Set[PlaceQueryFields]( + PlaceQueryFields.PlaceNames + ) + ) + + val gh = GDGeohash(LatLon(lat = geo.latitude, lon = geo.longitude)) + val placeQuery = PlaceQuery(placeTypes = Some(Constants.ConsumerPlaceTypes)) + + geohashLocate + .locateGeohashes(Seq(gh.toThrift), placeQuery, hydrationContext) + .onFailure { case ex => log.warn("failed to rgc " + geo.toString(), ex) } + .map { + (resp: Seq[Try[LocationResponse]]) => + resp.headOption.flatMap { + case Throw(ex) => + log.warn("rgc failed for coords: " + geo.toString(), ex) + None + case Return(locationResponse) => + GDPlace.tryLocationResponse(locationResponse) match { + case Throw(ex) => + log + .warn("rgc failed in response handling for coords: " + geo.toString(), ex) + None + case Return(tplaces) => + GDPlace.pickConsumerLocation(tplaces).map { place: GDPlace => + if (log.isDebugEnabled) { + log.debug("successfully rgc'd " + geo + " to " + place.id) + } + GeoduckPlaceConverter(language, place) + } + } + + } + } + } + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ScarecrowRetweetSpamChecker.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ScarecrowRetweetSpamChecker.scala new file mode 100644 index 000000000..3c7a78fd9 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ScarecrowRetweetSpamChecker.scala @@ -0,0 +1,64 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.finagle.tracing.Trace +import com.twitter.service.gen.scarecrow.thriftscala.Retweet +import com.twitter.service.gen.scarecrow.thriftscala.TieredAction +import com.twitter.service.gen.scarecrow.thriftscala.TieredActionResult +import com.twitter.spam.features.thriftscala.SafetyMetaData +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.repository.RetweetSpamCheckRepository +import com.twitter.tweetypie.thriftscala.TweetCreateState + +case class RetweetSpamRequest( + retweetId: TweetId, + sourceUserId: UserId, + sourceTweetId: TweetId, + sourceTweetText: String, + sourceUserName: Option[String], + safetyMetaData: Option[SafetyMetaData]) + +/** + * Use the Scarecrow service as the spam checker for retweets. + */ +object ScarecrowRetweetSpamChecker { + val log: Logger = Logger(getClass) + + def requestToScarecrowRetweet(req: RetweetSpamRequest): Retweet = + Retweet( + id = req.retweetId, + sourceUserId = req.sourceUserId, + text = req.sourceTweetText, + sourceTweetId = req.sourceTweetId, + safetyMetaData = req.safetyMetaData + ) + + def apply( + stats: StatsReceiver, + repo: RetweetSpamCheckRepository.Type + ): Spam.Checker[RetweetSpamRequest] = { + + def handler(request: RetweetSpamRequest): Spam.Checker[TieredAction] = + Spam.handleScarecrowResult(stats) { + case (TieredActionResult.NotSpam, _, _) => Spam.AllowFuture + case (TieredActionResult.SilentFail, _, _) => Spam.SilentFailFuture + case (TieredActionResult.UrlSpam, _, denyMessage) => + Future.exception(TweetCreateFailure.State(TweetCreateState.UrlSpam, denyMessage)) + case (TieredActionResult.Deny, _, denyMessage) => + Future.exception(TweetCreateFailure.State(TweetCreateState.Spam, denyMessage)) + case (TieredActionResult.DenyByIpiPolicy, _, denyMessage) => + Future.exception(Spam.DisabledByIpiFailure(request.sourceUserName, denyMessage)) + case (TieredActionResult.RateLimit, _, denyMessage) => + Future.exception( + TweetCreateFailure.State(TweetCreateState.SafetyRateLimitExceeded, denyMessage)) + case (TieredActionResult.Bounce, Some(b), _) => + Future.exception(TweetCreateFailure.Bounced(b)) + } + + req => { + Trace.record("com.twitter.tweetypie.ScarecrowRetweetSpamChecker.retweetId=" + req.retweetId) + Stitch.run(repo(requestToScarecrowRetweet(req))).flatMap(handler(req)) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ScarecrowTweetSpamChecker.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ScarecrowTweetSpamChecker.scala new file mode 100644 index 000000000..5db66c4dc --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ScarecrowTweetSpamChecker.scala @@ -0,0 +1,106 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.finagle.tracing.Trace +import com.twitter.relevance.feature_store.thriftscala.FeatureData +import com.twitter.relevance.feature_store.thriftscala.FeatureValue +import com.twitter.service.gen.scarecrow.thriftscala.TieredAction +import com.twitter.service.gen.scarecrow.thriftscala.TieredActionResult +import com.twitter.service.gen.scarecrow.thriftscala.TweetContext +import com.twitter.service.gen.scarecrow.thriftscala.TweetNew +import com.twitter.spam.features.thriftscala.SafetyMetaData +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.handler.Spam.Checker +import com.twitter.tweetypie.repository.TweetSpamCheckRepository +import com.twitter.tweetypie.thriftscala.TweetCreateState +import com.twitter.tweetypie.thriftscala.TweetMediaTags + +case class TweetSpamRequest( + tweetId: TweetId, + userId: UserId, + text: String, + mediaTags: Option[TweetMediaTags], + safetyMetaData: Option[SafetyMetaData], + inReplyToTweetId: Option[TweetId], + quotedTweetId: Option[TweetId], + quotedTweetUserId: Option[UserId]) + +/** + * Use the Scarecrow service as the spam checker for tweets. + */ +object ScarecrowTweetSpamChecker { + val log: Logger = Logger(getClass) + + private def requestToScarecrowTweet(req: TweetSpamRequest): TweetNew = { + // compile additional input features for the spam check + val mediaTaggedUserIds = { + val mediaTags = req.mediaTags.getOrElse(TweetMediaTags()) + mediaTags.tagMap.values.flatten.flatMap(_.userId).toSet + } + + val additionalInputFeatures = { + val mediaTaggedUserFeatures = if (mediaTaggedUserIds.nonEmpty) { + Seq( + "mediaTaggedUsers" -> FeatureData(Some(FeatureValue.LongSetValue(mediaTaggedUserIds))), + "victimIds" -> FeatureData(Some(FeatureValue.LongSetValue(mediaTaggedUserIds))) + ) + } else { + Seq.empty + } + + val quotedTweetIdFeature = req.quotedTweetId.map { quotedTweetId => + "quotedTweetId" -> FeatureData(Some(FeatureValue.LongValue(quotedTweetId))) + } + + val quotedTweetUserIdFeature = req.quotedTweetUserId.map { quotedTweetUserId => + "quotedTweetUserId" -> FeatureData(Some(FeatureValue.LongValue(quotedTweetUserId))) + } + + val featureMap = + (mediaTaggedUserFeatures ++ quotedTweetIdFeature ++ quotedTweetUserIdFeature).toMap + + if (featureMap.nonEmpty) Some(featureMap) else None + } + + TweetNew( + id = req.tweetId, + userId = req.userId, + text = req.text, + additionalInputFeatures = additionalInputFeatures, + safetyMetaData = req.safetyMetaData, + inReplyToStatusId = req.inReplyToTweetId + ) + } + + private def tieredActionHandler(stats: StatsReceiver): Checker[TieredAction] = + Spam.handleScarecrowResult(stats) { + case (TieredActionResult.NotSpam, _, _) => Spam.AllowFuture + case (TieredActionResult.SilentFail, _, _) => Spam.SilentFailFuture + case (TieredActionResult.DenyByIpiPolicy, _, _) => Spam.DisabledByIpiPolicyFuture + case (TieredActionResult.UrlSpam, _, denyMessage) => + Future.exception(TweetCreateFailure.State(TweetCreateState.UrlSpam, denyMessage)) + case (TieredActionResult.Deny, _, denyMessage) => + Future.exception(TweetCreateFailure.State(TweetCreateState.Spam, denyMessage)) + case (TieredActionResult.Captcha, _, denyMessage) => + Future.exception(TweetCreateFailure.State(TweetCreateState.SpamCaptcha, denyMessage)) + case (TieredActionResult.RateLimit, _, denyMessage) => + Future.exception( + TweetCreateFailure.State(TweetCreateState.SafetyRateLimitExceeded, denyMessage)) + case (TieredActionResult.Bounce, Some(b), _) => + Future.exception(TweetCreateFailure.Bounced(b)) + } + + def fromSpamCheckRepository( + stats: StatsReceiver, + repo: TweetSpamCheckRepository.Type + ): Spam.Checker[TweetSpamRequest] = { + val handler = tieredActionHandler(stats) + req => { + Trace.record("com.twitter.tweetypie.ScarecrowTweetSpamChecker.userId=" + req.userId) + Stitch.run(repo(requestToScarecrowTweet(req), TweetContext.Creation)).flatMap { resp => + handler(resp.tieredAction) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ScrubGeoEventBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ScrubGeoEventBuilder.scala new file mode 100644 index 000000000..77c3b2bb3 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ScrubGeoEventBuilder.scala @@ -0,0 +1,72 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.store.ScrubGeo +import com.twitter.tweetypie.store.ScrubGeoUpdateUserTimestamp +import com.twitter.tweetypie.thriftscala.DeleteLocationData +import com.twitter.tweetypie.thriftscala.GeoScrub + +/** + * Create the appropriate ScrubGeo.Event for a GeoScrub request. + */ +object ScrubGeoEventBuilder { + val userQueryOptions: UserQueryOptions = + UserQueryOptions( + Set(UserField.Safety, UserField.Roles), + UserVisibility.All + ) + + private def userLoader( + stats: StatsReceiver, + userRepo: UserRepository.Optional + ): UserId => Future[Option[User]] = { + val userNotFoundCounter = stats.counter("user_not_found") + userId => + Stitch.run( + userRepo(UserKey(userId), userQueryOptions) + .onSuccess(userOpt => if (userOpt.isEmpty) userNotFoundCounter.incr()) + ) + } + + object UpdateUserTimestamp { + type Type = DeleteLocationData => Future[ScrubGeoUpdateUserTimestamp.Event] + + def apply( + stats: StatsReceiver, + userRepo: UserRepository.Optional, + ): Type = { + val timestampDiffStat = stats.stat("now_delta_ms") + val loadUser = userLoader(stats, userRepo) + request: DeleteLocationData => + loadUser(request.userId).map { userOpt => + // delta between users requesting deletion and the time we publish to TweetEvents + timestampDiffStat.add((Time.now.inMillis - request.timestampMs).toFloat) + ScrubGeoUpdateUserTimestamp.Event( + userId = request.userId, + timestamp = Time.fromMilliseconds(request.timestampMs), + optUser = userOpt + ) + } + } + } + + object ScrubTweets { + type Type = GeoScrub => Future[ScrubGeo.Event] + + def apply(stats: StatsReceiver, userRepo: UserRepository.Optional): Type = { + val loadUser = userLoader(stats, userRepo) + geoScrub => + loadUser(geoScrub.userId).map { userOpt => + ScrubGeo.Event( + tweetIdSet = geoScrub.statusIds.toSet, + userId = geoScrub.userId, + enqueueMax = geoScrub.hosebirdEnqueue, + optUser = userOpt, + timestamp = Time.now + ) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/SelfThreadBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/SelfThreadBuilder.scala new file mode 100644 index 000000000..adc2c5739 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/SelfThreadBuilder.scala @@ -0,0 +1,119 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.tweetypie.thriftscala.Reply +import com.twitter.tweetypie.thriftscala.SelfThreadMetadata +import org.apache.thrift.protocol.TField + +trait SelfThreadBuilder { + def requiredReplySourceFields: Set[TField] = + Set( + Tweet.CoreDataField, // for Reply and ConversationId + Tweet.SelfThreadMetadataField // for continuing existing self-threads + ) + + def build(authorUserId: UserId, replySourceTweet: Tweet): Option[SelfThreadMetadata] +} + +/** + * SelfThreadBuilder is used to build metadata for self-threads (tweetstorms). + * + * This builder is invoked from ReplyBuilder on tweets that pass in a inReplyToStatusId and create + * a Reply. The invocation is done inside ReplyBuilder as ReplyBuilder has already loaded the + * "reply source tweet" which has all the information needed to determine the self-thread metadata. + * + * Note that Tweet.SelfThreadMetadata schema supports representing two types of self-threads: + * 1. root self-thread : self-thread that begins alone and does not start with replying to another + * tweet. This self-thread has a self-thread ID equal to the conversation ID. + * 2. reply self-thread : self-thread that begins as a reply to another user's tweet. + * This self-thread has a self-thread ID equal to the first tweet in the + * current self-reply chain which will not equal the conversation ID. + * + * Currently only type #1 "root self-thread" is handled. + */ +object SelfThreadBuilder { + + def apply(stats: StatsReceiver): SelfThreadBuilder = { + // We want to keep open the possibility for differentiation between root + // self-threads (current functionality) and reply self-threads (possible + // future functionality). + val rootThreadStats = stats.scope("root_thread") + + // A tweet becomes a root of a self-thread only after the first self-reply + // is created. root_thread/start is incr()d during the write-path of the + // self-reply tweet, when it is known that the first/root tweet has not + // yet been assigned a SelfThreadMetadata. The write-path of the second + // tweet does not add the SelfThreadMetadata to the first tweet - that + // happens asynchronously by the SelfThreadDaemon. + val rootThreadStartCounter = rootThreadStats.counter("start") + + // root_thread/continue provides visibility into the frequency of + // continuation tweets off leaf tweets in a tweet storm. Also incr()d in + // the special case of a reply to the root tweet, which does not yet have a + // SelfThreadMetadata(isLeaf=true). + val rootThreadContinueCounter = rootThreadStats.counter("continue") + + // root_thread/branch provides visibility into how frequently self-threads + // get branched - that is, when the author self-replies to a non-leaf tweet + // in an existing thread. Knowing the frequency of branching will help us + // determine the priority of accounting for branching in various + // tweet-delete use cases. Currently we do not fix up the root tweet's + // SelfThreadMetadata when its reply tweets are deleted. + val rootThreadBranchCounter = rootThreadStats.counter("branch") + + def observeSelfThreadMetrics(replySourceSTM: Option[SelfThreadMetadata]): Unit = { + replySourceSTM match { + case Some(SelfThreadMetadata(_, isLeaf)) => + if (isLeaf) rootThreadContinueCounter.incr() + else rootThreadBranchCounter.incr() + case None => + rootThreadStartCounter.incr() + } + } + + new SelfThreadBuilder { + + override def build( + authorUserId: UserId, + replySourceTweet: Tweet + ): Option[SelfThreadMetadata] = { + // the "reply source tweet"'s author must match the current author + if (getUserId(replySourceTweet) == authorUserId) { + val replySourceSTM = getSelfThreadMetadata(replySourceTweet) + + observeSelfThreadMetrics(replySourceSTM) + + // determine if replySourceTweet stands alone (non-reply) + getReply(replySourceTweet) match { + case None | Some(Reply(None, _, _)) => + // 'replySourceTweet' started a new self-thread that stands alone + // which happens when there's no Reply or the Reply does not have + // inReplyToStatusId (directed-at user) + + // requiredReplySourceFields requires coreData and conversationId + // is required so this would have previously thrown an exception + // in ReplyBuilder if the read was partial + val convoId = replySourceTweet.coreData.get.conversationId.get + Some(SelfThreadMetadata(id = convoId, isLeaf = true)) + + case _ => + // 'replySourceTweet' was also a reply-to-tweet, so continue any + // self-thread by inheriting any SelfThreadMetadata it has + // (though always setting isLeaf to true) + replySourceSTM.map(_.copy(isLeaf = true)) + } + } else { + // Replying to a different user currently never creates a self-thread + // as all self-threads must start at the root (and match conversation + // ID). + // + // In the future replying to a different user *might* be part of a + // self-thread but we wouldn't mark it as such until the *next* tweet + // is created (at which time the self_thread daemon goes back and + // marks the first tweet as in the self-thread. + None + } + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/SetAdditionalFieldsBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/SetAdditionalFieldsBuilder.scala new file mode 100644 index 000000000..423543d8f --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/SetAdditionalFieldsBuilder.scala @@ -0,0 +1,61 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.repository.UserKey +import com.twitter.tweetypie.repository.UserQueryOptions +import com.twitter.tweetypie.repository.UserRepository +import com.twitter.tweetypie.repository.UserVisibility +import com.twitter.tweetypie.store.AsyncSetAdditionalFields +import com.twitter.tweetypie.store.SetAdditionalFields +import com.twitter.tweetypie.store.TweetStoreEventOrRetry +import com.twitter.tweetypie.thriftscala.AsyncSetAdditionalFieldsRequest +import com.twitter.tweetypie.thriftscala.SetAdditionalFieldsRequest + +object SetAdditionalFieldsBuilder { + type Type = SetAdditionalFieldsRequest => Future[SetAdditionalFields.Event] + + val tweetOptions: TweetQuery.Options = TweetQuery.Options(include = GetTweetsHandler.BaseInclude) + + def apply(tweetRepo: TweetRepository.Type): Type = { + def getTweet(tweetId: TweetId) = + Stitch.run( + tweetRepo(tweetId, tweetOptions) + .rescue(HandlerError.translateNotFoundToClientError(tweetId)) + ) + + request => { + getTweet(request.additionalFields.id).map { tweet => + SetAdditionalFields.Event( + additionalFields = request.additionalFields, + userId = getUserId(tweet), + timestamp = Time.now + ) + } + } + } +} + +object AsyncSetAdditionalFieldsBuilder { + type Type = AsyncSetAdditionalFieldsRequest => Future[ + TweetStoreEventOrRetry[AsyncSetAdditionalFields.Event] + ] + + val userQueryOpts: UserQueryOptions = UserQueryOptions(Set(UserField.Safety), UserVisibility.All) + + def apply(userRepo: UserRepository.Type): Type = { + def getUser(userId: UserId): Future[User] = + Stitch.run( + userRepo(UserKey.byId(userId), userQueryOpts) + .rescue { case NotFound => Stitch.exception(HandlerError.userNotFound(userId)) } + ) + + request => + getUser(request.userId).map { user => + AsyncSetAdditionalFields.Event.fromAsyncRequest(request, user) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/SetRetweetVisibilityHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/SetRetweetVisibilityHandler.scala new file mode 100644 index 000000000..48dc91014 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/SetRetweetVisibilityHandler.scala @@ -0,0 +1,45 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.tweetypie.store.SetRetweetVisibility +import com.twitter.tweetypie.thriftscala.SetRetweetVisibilityRequest +import com.twitter.tweetypie.thriftscala.Share +import com.twitter.tweetypie.thriftscala.Tweet + +/** + * Create a [[SetRetweetVisibility.Event]] from a [[SetRetweetVisibilityRequest]] and then + * pipe the event to [[store.SetRetweetVisibility]]. The event contains the information + * to determine if a retweet should be included in its source tweet's retweet count. + * + * Showing/hiding a retweet count is done by calling TFlock to modify an edge's state between + * `Positive` <--> `Archived` in the RetweetsGraph(6) and modifying the count in cache directly. + */ +object SetRetweetVisibilityHandler { + type Type = SetRetweetVisibilityRequest => Future[Unit] + + def apply( + tweetGetter: TweetId => Future[Option[Tweet]], + setRetweetVisibilityStore: SetRetweetVisibility.Event => Future[Unit] + ): Type = + req => + tweetGetter(req.retweetId).map { + case Some(retweet) => + getShare(retweet).map { share: Share => + val event = SetRetweetVisibility.Event( + retweetId = req.retweetId, + visible = req.visible, + srcId = share.sourceStatusId, + retweetUserId = getUserId(retweet), + srcTweetUserId = share.sourceUserId, + timestamp = Time.now + ) + setRetweetVisibilityStore(event) + } + + case None => + // No-op if either the retweet has been deleted or has no source id. + // If deleted, then we do not want to accidentally undelete a legitimately deleted retweets. + // If no source id, then we do not know the source tweet to modify its count. + Unit + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/Spam.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/Spam.scala new file mode 100644 index 000000000..088f9b8a9 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/Spam.scala @@ -0,0 +1,99 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.botmaker.thriftscala.BotMakerResponse +import com.twitter.bouncer.thriftscala.Bounce +import com.twitter.finagle.tracing.Trace +import com.twitter.relevance.feature_store.thriftscala.FeatureData +import com.twitter.relevance.feature_store.thriftscala.FeatureValue.StrValue +import com.twitter.service.gen.scarecrow.thriftscala.TieredAction +import com.twitter.service.gen.scarecrow.thriftscala.TieredActionResult +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.thriftscala.TweetCreateState + +object Spam { + sealed trait Result + case object Allow extends Result + case object SilentFail extends Result + case object DisabledByIpiPolicy extends Result + + val AllowFuture: Future[Allow.type] = Future.value(Allow) + val SilentFailFuture: Future[SilentFail.type] = Future.value(SilentFail) + val DisabledByIpiPolicyFuture: Future[DisabledByIpiPolicy.type] = + Future.value(DisabledByIpiPolicy) + + def DisabledByIpiFailure( + userName: Option[String], + customDenyMessage: Option[String] = None + ): TweetCreateFailure.State = { + val errorMsg = (customDenyMessage, userName) match { + case (Some(denyMessage), _) => denyMessage + case (_, Some(name)) => s"Some actions on this ${name} Tweet have been disabled by Twitter." + case _ => "Some actions on this Tweet have been disabled by Twitter." + } + TweetCreateFailure.State(TweetCreateState.DisabledByIpiPolicy, Some(errorMsg)) + } + + type Checker[T] = T => Future[Result] + + /** + * Dummy spam checker that always allows requests. + */ + val DoNotCheckSpam: Checker[AnyRef] = _ => AllowFuture + + def gated[T](gate: Gate[Unit])(checker: Checker[T]): Checker[T] = + req => if (gate()) checker(req) else AllowFuture + + def selected[T](gate: Gate[Unit])(ifTrue: Checker[T], ifFalse: Checker[T]): Checker[T] = + req => gate.select(ifTrue, ifFalse)()(req) + + def withEffect[T](check: Checker[T], effect: T => Unit): T => Future[Result] = { t: T => + effect(t) + check(t) + } + + /** + * Wrapper that implicitly allows retweet or tweet creation when spam + * checking fails. + */ + def allowOnException[T](checker: Checker[T]): Checker[T] = + req => + checker(req).rescue { + case e: TweetCreateFailure => Future.exception(e) + case _ => AllowFuture + } + + /** + * Handler for scarecrow result to be used by a Checker. + */ + def handleScarecrowResult( + stats: StatsReceiver + )( + handler: PartialFunction[(TieredActionResult, Option[Bounce], Option[String]), Future[Result]] + ): Checker[TieredAction] = + result => { + stats.scope("scarecrow_result").counter(result.resultCode.name).incr() + Trace.record("com.twitter.tweetypie.Spam.scarecrow_result=" + result.resultCode.name) + /* + * A bot can return a custom DenyMessage + * + * If it does, we substitute this for the 'message' in the ValidationError. + */ + val customDenyMessage: Option[String] = for { + botMakeResponse: BotMakerResponse <- result.botMakerResponse + outputFeatures <- botMakeResponse.outputFeatures + denyMessageFeature: FeatureData <- outputFeatures.get("DenyMessage") + denyMessageFeatureValue <- denyMessageFeature.featureValue + denyMessage <- denyMessageFeatureValue match { + case stringValue: StrValue => + Some(stringValue.strValue) + case _ => + None + } + } yield denyMessage + handler.applyOrElse( + (result.resultCode, result.bounce, customDenyMessage), + withEffect(DoNotCheckSpam, (_: AnyRef) => stats.counter("unexpected_result").incr()) + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TakedownHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TakedownHandler.scala new file mode 100644 index 000000000..e729e3cce --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TakedownHandler.scala @@ -0,0 +1,76 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.servo.util.FutureArrow +import com.twitter.takedown.util.TakedownReasons._ +import com.twitter.tweetypie.store.Takedown +import com.twitter.tweetypie.thriftscala.TakedownRequest +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.tweetypie.util.Takedowns + +/** + * This handler processes TakedownRequest objects sent to Tweetypie's takedown endpoint. + * The request object specifies which takedown countries are being added and which are + * being removed. It also includes side effect flags for setting the tweet's has_takedown + * bit, scribing to Guano, and enqueuing to EventBus. For more information about inputs + * to the takedown endpoint, see the TakedownRequest documentation in the thrift definition. + */ +object TakedownHandler { + type Type = FutureArrow[TakedownRequest, Unit] + + def apply( + getTweet: FutureArrow[TweetId, Tweet], + getUser: FutureArrow[UserId, User], + writeTakedown: FutureEffect[Takedown.Event] + ): Type = { + FutureArrow { request => + for { + tweet <- getTweet(request.tweetId) + user <- getUser(getUserId(tweet)) + userHasTakedowns = user.takedowns.map(userTakedownsToReasons).exists(_.nonEmpty) + + existingTweetReasons = Takedowns.fromTweet(tweet).reasons + + reasonsToRemove = (request.countriesToRemove.map(countryCodeToReason) ++ + request.reasonsToRemove.map(normalizeReason)).distinct.sortBy(_.toString) + + reasonsToAdd = (request.countriesToAdd.map(countryCodeToReason) ++ + request.reasonsToAdd.map(normalizeReason)).distinct.sortBy(_.toString) + + updatedTweetTakedowns = + (existingTweetReasons ++ reasonsToAdd) + .filterNot(reasonsToRemove.contains) + .toSeq + .sortBy(_.toString) + + (cs, rs) = Takedowns.partitionReasons(updatedTweetTakedowns) + + updatedTweet = Lens.setAll( + tweet, + // these fields are cached on the Tweet in CachingTweetStore and written in + // ManhattanTweetStore + TweetLenses.hasTakedown -> (updatedTweetTakedowns.nonEmpty || userHasTakedowns), + TweetLenses.tweetypieOnlyTakedownCountryCodes -> Some(cs).filter(_.nonEmpty), + TweetLenses.tweetypieOnlyTakedownReasons -> Some(rs).filter(_.nonEmpty) + ) + + _ <- writeTakedown.when(tweet != updatedTweet) { + Takedown.Event( + tweet = updatedTweet, + timestamp = Time.now, + user = Some(user), + takedownReasons = updatedTweetTakedowns, + reasonsToAdd = reasonsToAdd, + reasonsToRemove = reasonsToRemove, + auditNote = request.auditNote, + host = request.host, + byUserId = request.byUserId, + eventbusEnqueue = request.eventbusEnqueue, + scribeForAudit = request.scribeForAudit, + updateCodesAndReasons = true + ) + } + } yield () + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetBuilder.scala new file mode 100644 index 000000000..98bb33064 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetBuilder.scala @@ -0,0 +1,1180 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.featureswitches.v2.FeatureSwitchResults +import com.twitter.featureswitches.v2.FeatureSwitches +import com.twitter.gizmoduck.thriftscala.AccessPolicy +import com.twitter.gizmoduck.thriftscala.LabelValue +import com.twitter.gizmoduck.thriftscala.UserType +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.additionalfields.AdditionalFields._ +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.jiminy.tweetypie.NudgeBuilder +import com.twitter.tweetypie.jiminy.tweetypie.NudgeBuilderRequest +import com.twitter.tweetypie.media.Media +import com.twitter.tweetypie.repository.StratoCommunityAccessRepository.CommunityAccess +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.serverutil.DeviceSourceParser +import com.twitter.tweetypie.serverutil.ExtendedTweetMetadataBuilder +import com.twitter.tweetypie.store._ +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.thriftscala.entities.EntityExtractor +import com.twitter.tweetypie.tweettext._ +import com.twitter.tweetypie.util.CommunityAnnotation +import com.twitter.tweetypie.util.CommunityUtil +import com.twitter.twittertext.Regex.{VALID_URL => UrlPattern} +import com.twitter.twittertext.TwitterTextParser + +case class TweetBuilderResult( + tweet: Tweet, + user: User, + createdAt: Time, + sourceTweet: Option[Tweet] = None, + sourceUser: Option[User] = None, + parentUserId: Option[UserId] = None, + isSilentFail: Boolean = false, + geoSearchRequestId: Option[GeoSearchRequestId] = None, + initialTweetUpdateRequest: Option[InitialTweetUpdateRequest] = None) + +object TweetBuilder { + import GizmoduckUserCountsUpdatingStore.isUserTweet + import PostTweet._ + import Preprocessor._ + import TweetCreateState.{Spam => CreateStateSpam, _} + import TweetText._ + import UpstreamFailure._ + + type Type = FutureArrow[PostTweetRequest, TweetBuilderResult] + + val log: Logger = Logger(getClass) + + private[this] val _unitMutation = Future.value(Mutation.unit[Any]) + def MutationUnitFuture[T]: Future[Mutation[T]] = _unitMutation.asInstanceOf[Future[Mutation[T]]] + + case class MissingConversationId(inReplyToTweetId: TweetId) extends RuntimeException + + case class TextVisibility( + visibleTextRange: Option[TextRange], + totalTextDisplayLength: Offset.DisplayUnit, + visibleText: String) { + val isExtendedTweet: Boolean = totalTextDisplayLength.toInt > OriginalMaxDisplayLength + + /** + * Going forward we will be moving away from quoted-tweets urls in tweet text, but we + * have a backwards-compat layer in Tweetypie which adds the QT url to text to provide + * support for all clients to read in a backwards-compatible way until they upgrade. + * + * Tweets can become extended as their display length can go beyond 140 + * after adding the QT short url. Therefore, we are adding below function + * to account for legacy formatting during read-time and generate a self-permalink. + */ + def isExtendedWithExtraChars(extraChars: Int): Boolean = + totalTextDisplayLength.toInt > (OriginalMaxDisplayLength - extraChars) + } + + /** Max number of users that can be tagged on a single tweet */ + val MaxMediaTagCount = 10 + + val MobileWebApp = "oauth:49152" + val M2App = "oauth:3033294" + val M5App = "oauth:3033300" + + val TestRateLimitUserRole = "stresstest" + + /** + * The fields to fetch for the user creating the tweet. + */ + val userFields: Set[UserField] = + Set( + UserField.Profile, + UserField.ProfileDesign, + UserField.Account, + UserField.Safety, + UserField.Counts, + UserField.Roles, + UserField.UrlEntities, + UserField.Labels + ) + + /** + * The fields to fetch for the user of the source tweet in a retweet. + */ + val sourceUserFields: Set[UserField] = + userFields + UserField.View + + /** + * Converts repository exceptions into an API-compatible exception type + */ + def convertRepoExceptions[A]( + notFoundState: TweetCreateState, + failureHandler: Throwable => Throwable + ): PartialFunction[Throwable, Stitch[A]] = { + // stitch.NotFound is converted to the supplied TweetCreateState, wrapped in TweetCreateFailure + case NotFound => Stitch.exception(TweetCreateFailure.State(notFoundState)) + // OverCapacity exceptions should not be translated and should bubble up to the top + case ex: OverCapacity => Stitch.exception(ex) + // Other exceptions are wrapped in the supplied failureHandler + case ex => Stitch.exception(failureHandler(ex)) + } + + /** + * Adapts a UserRepository to a Repository for looking up a single user and that + * fails with an appropriate TweetCreateFailure if the user is not found. + */ + def userLookup(userRepo: UserRepository.Type): UserId => Stitch[User] = { + val opts = UserQueryOptions(queryFields = userFields, visibility = UserVisibility.All) + + userId => + userRepo(UserKey(userId), opts) + .rescue(convertRepoExceptions[User](UserNotFound, UserLookupFailure(_))) + } + + /** + * Adapts a UserRepository to a Repository for looking up a single user and that + * fails with an appropriate TweetCreateFailure if the user is not found. + */ + def sourceUserLookup(userRepo: UserRepository.Type): (UserId, UserId) => Stitch[User] = { + val opts = UserQueryOptions(queryFields = sourceUserFields, visibility = UserVisibility.All) + + (userId, forUserId) => + userRepo(UserKey(userId), opts.copy(forUserId = Some(forUserId))) + .rescue(convertRepoExceptions[User](SourceUserNotFound, UserLookupFailure(_))) + } + + /** + * Any fields that are loaded on the user via TweetBuilder/RetweetBuilder, but which should not + * be included on the user in the async-insert actions (such as hosebird) should be removed here. + * + * This will include perspectival fields that were loaded relative to the user creating the tweet. + */ + def scrubUserInAsyncInserts: User => User = + user => user.copy(view = None) + + /** + * Any fields that are loaded on the source user via TweetBuilder/RetweetBuilder, but which + * should not be included on the user in the async-insert actions (such as hosebird) should + * be removed here. + * + * This will include perspectival fields that were loaded relative to the user creating the tweet. + */ + def scrubSourceUserInAsyncInserts: User => User = + // currently the same as scrubUserInAsyncInserts, could be different in the future + scrubUserInAsyncInserts + + /** + * Any fields that are loaded on the source tweet via RetweetBuilder, but which should not be + * included on the source tweetypie in the async-insert actions (such as hosebird) should + * be removed here. + * + * This will include perspectival fields that were loaded relative to the user creating the tweet. + */ + def scrubSourceTweetInAsyncInserts: Tweet => Tweet = + tweet => tweet.copy(perspective = None, cards = None, card2 = None) + + /** + * Adapts a DeviceSource to a Repository for looking up a single device-source and that + * fails with an appropriate TweetCreateFailure if not found. + */ + def deviceSourceLookup(devSrcRepo: DeviceSourceRepository.Type): DeviceSourceRepository.Type = + appIdStr => { + val result: Stitch[DeviceSource] = + if (DeviceSourceParser.isValid(appIdStr)) { + devSrcRepo(appIdStr) + } else { + Stitch.exception(NotFound) + } + + result.rescue(convertRepoExceptions(DeviceSourceNotFound, DeviceSourceLookupFailure(_))) + } + + /** + * Checks: + * - that we have all the user fields we need + * - that the user is active + * - that they are not a frictionless follower account + */ + def validateUser(user: User): Future[Unit] = + if (user.safety.isEmpty) + Future.exception(UserSafetyEmptyException) + else if (user.profile.isEmpty) + Future.exception(UserProfileEmptyException) + else if (user.safety.get.deactivated) + Future.exception(TweetCreateFailure.State(UserDeactivated)) + else if (user.safety.get.suspended) + Future.exception(TweetCreateFailure.State(UserSuspended)) + else if (user.labels.exists(_.labels.exists(_.labelValue == LabelValue.ReadOnly))) + Future.exception(TweetCreateFailure.State(CreateStateSpam)) + else if (user.userType == UserType.Frictionless) + Future.exception(TweetCreateFailure.State(UserNotFound)) + else if (user.userType == UserType.Soft) + Future.exception(TweetCreateFailure.State(UserNotFound)) + else if (user.safety.get.accessPolicy == AccessPolicy.BounceAll || + user.safety.get.accessPolicy == AccessPolicy.BounceAllPublicWrites) + Future.exception(TweetCreateFailure.State(UserReadonly)) + else + Future.Unit + + def validateCommunityReply( + communities: Option[Communities], + replyResult: Option[ReplyBuilder.Result] + ): Future[Unit] = { + + if (replyResult.flatMap(_.reply.inReplyToStatusId).nonEmpty) { + val rootCommunities = replyResult.flatMap(_.community) + val rootCommunityIds = CommunityUtil.communityIds(rootCommunities) + val replyCommunityIds = CommunityUtil.communityIds(communities) + + if (rootCommunityIds == replyCommunityIds) { + Future.Unit + } else { + Future.exception(TweetCreateFailure.State(CommunityReplyTweetNotAllowed)) + } + } else { + Future.Unit + } + } + + // Project requirements do not allow exclusive tweets to be replies. + // All exclusive tweets must be root tweets. + def validateExclusiveTweetNotReplies( + exclusiveTweetControls: Option[ExclusiveTweetControl], + replyResult: Option[ReplyBuilder.Result] + ): Future[Unit] = { + val isInReplyToTweet = replyResult.exists(_.reply.inReplyToStatusId.isDefined) + if (exclusiveTweetControls.isDefined && isInReplyToTweet) { + Future.exception(TweetCreateFailure.State(SuperFollowsInvalidParams)) + } else { + Future.Unit + } + } + + // Invalid parameters for Exclusive Tweets: + // - Community field set # Tweets can not be both at the same time. + def validateExclusiveTweetParams( + exclusiveTweetControls: Option[ExclusiveTweetControl], + communities: Option[Communities] + ): Future[Unit] = { + if (exclusiveTweetControls.isDefined && CommunityUtil.hasCommunity(communities)) { + Future.exception(TweetCreateFailure.State(SuperFollowsInvalidParams)) + } else { + Future.Unit + } + } + + def validateTrustedFriendsNotReplies( + trustedFriendsControl: Option[TrustedFriendsControl], + replyResult: Option[ReplyBuilder.Result] + ): Future[Unit] = { + val isInReplyToTweet = replyResult.exists(_.reply.inReplyToStatusId.isDefined) + if (trustedFriendsControl.isDefined && isInReplyToTweet) { + Future.exception(TweetCreateFailure.State(TrustedFriendsInvalidParams)) + } else { + Future.Unit + } + } + + def validateTrustedFriendsParams( + trustedFriendsControl: Option[TrustedFriendsControl], + conversationControl: Option[TweetCreateConversationControl], + communities: Option[Communities], + exclusiveTweetControl: Option[ExclusiveTweetControl] + ): Future[Unit] = { + if (trustedFriendsControl.isDefined && + (conversationControl.isDefined || CommunityUtil.hasCommunity( + communities) || exclusiveTweetControl.isDefined)) { + Future.exception(TweetCreateFailure.State(TrustedFriendsInvalidParams)) + } else { + Future.Unit + } + } + + /** + * Checks the weighted tweet text length using twitter-text, as used by clients. + * This should ensure that any tweet the client deems valid will also be deemed + * valid by Tweetypie. + */ + def prevalidateTextLength(text: String, stats: StatsReceiver): Future[Unit] = { + val twitterTextConfig = TwitterTextParser.TWITTER_TEXT_DEFAULT_CONFIG + val twitterTextResult = TwitterTextParser.parseTweet(text, twitterTextConfig) + val textTooLong = !twitterTextResult.isValid && text.length > 0 + + Future.when(textTooLong) { + val weightedLength = twitterTextResult.weightedLength + log.debug( + s"Weighted length too long. weightedLength: $weightedLength" + + s", Tweet text: '${diffshow.show(text)}'" + ) + stats.counter("check_weighted_length/text_too_long").incr() + Future.exception(TweetCreateFailure.State(TextTooLong)) + } + } + + /** + * Checks that the tweet text is neither blank nor too long. + */ + def validateTextLength( + text: String, + visibleText: String, + replyResult: Option[ReplyBuilder.Result], + stats: StatsReceiver + ): Future[Unit] = { + val utf8Length = Offset.Utf8.length(text) + + def visibleTextTooLong = + Offset.DisplayUnit.length(visibleText) > Offset.DisplayUnit(MaxVisibleWeightedEmojiLength) + + def utf8LengthTooLong = + utf8Length > Offset.Utf8(MaxUtf8Length) + + if (isBlank(text)) { + stats.counter("validate_text_length/text_cannot_be_blank").incr() + Future.exception(TweetCreateFailure.State(TextCannotBeBlank)) + } else if (replyResult.exists(_.replyTextIsEmpty(text))) { + stats.counter("validate_text_length/reply_text_cannot_be_blank").incr() + Future.exception(TweetCreateFailure.State(TextCannotBeBlank)) + } else if (visibleTextTooLong) { + // Final check that visible text does not exceed MaxVisibleWeightedEmojiLength + // characters. + // prevalidateTextLength() does some portion of validation as well, most notably + // weighted length on raw, unescaped text. + stats.counter("validate_text_length/text_too_long.visible_length_explicit").incr() + log.debug( + s"Explicit MaxVisibleWeightedLength visible length check failed. " + + s"visibleText: '${diffshow.show(visibleText)}' and " + + s"total text: '${diffshow.show(text)}'" + ) + Future.exception(TweetCreateFailure.State(TextTooLong)) + } else if (utf8LengthTooLong) { + stats.counter("validate_text_length/text_too_long.utf8_length").incr() + Future.exception(TweetCreateFailure.State(TextTooLong)) + } else { + stats.stat("validate_text_length/utf8_length").add(utf8Length.toInt) + Future.Unit + } + } + + def getTextVisibility( + text: String, + replyResult: Option[ReplyBuilder.Result], + urlEntities: Seq[UrlEntity], + mediaEntities: Seq[MediaEntity], + attachmentUrl: Option[String] + ): TextVisibility = { + val totalTextLength = Offset.CodePoint.length(text) + val totalTextDisplayLength = Offset.DisplayUnit.length(text) + + /** + * visibleEnd for multiple scenarios: + * + * normal tweet + media - fromIndex of mediaEntity (hydrated from last media permalink) + * quote tweet + media - fromIndex of mediaEntity + * replies + media - fromIndex of mediaEntity + * normal quote tweet - total text length (visible text range will be None) + * tweets with other attachments (DM deep links) + * fromIndex of the last URL entity + */ + val visibleEnd = mediaEntities.headOption + .map(_.fromIndex) + .orElse(attachmentUrl.flatMap(_ => urlEntities.lastOption).map(_.fromIndex)) + .map(from => (from - 1).max(0)) // for whitespace, unless there is none + .map(Offset.CodePoint(_)) + .getOrElse(totalTextLength) + + val visibleStart = replyResult match { + case Some(rr) => rr.visibleStart.min(visibleEnd) + case None => Offset.CodePoint(0) + } + + if (visibleStart.toInt == 0 && visibleEnd == totalTextLength) { + TextVisibility( + visibleTextRange = None, + totalTextDisplayLength = totalTextDisplayLength, + visibleText = text + ) + } else { + val charFrom = visibleStart.toCodeUnit(text) + val charTo = charFrom.offsetByCodePoints(text, visibleEnd - visibleStart) + val visibleText = text.substring(charFrom.toInt, charTo.toInt) + + TextVisibility( + visibleTextRange = Some(TextRange(visibleStart.toInt, visibleEnd.toInt)), + totalTextDisplayLength = totalTextDisplayLength, + visibleText = visibleText + ) + } + } + + def isValidHashtag(entity: HashtagEntity): Boolean = + TweetText.codePointLength(entity.text) <= TweetText.MaxHashtagLength + + /** + * Validates that the number of various entities are within the limits, and the + * length of hashtags are with the limit. + */ + def validateEntities(tweet: Tweet): Future[Unit] = + if (getMentions(tweet).length > TweetText.MaxMentions) + Future.exception(TweetCreateFailure.State(MentionLimitExceeded)) + else if (getUrls(tweet).length > TweetText.MaxUrls) + Future.exception(TweetCreateFailure.State(UrlLimitExceeded)) + else if (getHashtags(tweet).length > TweetText.MaxHashtags) + Future.exception(TweetCreateFailure.State(HashtagLimitExceeded)) + else if (getCashtags(tweet).length > TweetText.MaxCashtags) + Future.exception(TweetCreateFailure.State(CashtagLimitExceeded)) + else if (getHashtags(tweet).exists(e => !isValidHashtag(e))) + Future.exception(TweetCreateFailure.State(HashtagLengthLimitExceeded)) + else + Future.Unit + + /** + * Update the user to what it should look like after the tweet is created + */ + def updateUserCounts(hasMedia: Tweet => Boolean): (User, Tweet) => Future[User] = + (user: User, tweet: Tweet) => { + val countAsUserTweet = isUserTweet(tweet) + val tweetsDelta = if (countAsUserTweet) 1 else 0 + val mediaTweetsDelta = if (countAsUserTweet && hasMedia(tweet)) 1 else 0 + + Future.value( + user.copy( + counts = user.counts.map { counts => + counts.copy( + tweets = counts.tweets + tweetsDelta, + mediaTweets = counts.mediaTweets.map(_ + mediaTweetsDelta) + ) + } + ) + ) + } + + def validateAdditionalFields[R](implicit view: RequestView[R]): FutureEffect[R] = + FutureEffect[R] { req => + view + .additionalFields(req) + .map(tweet => + unsettableAdditionalFieldIds(tweet) ++ rejectedAdditionalFieldIds(tweet)) match { + case Some(unsettableFieldIds) if unsettableFieldIds.nonEmpty => + Future.exception( + TweetCreateFailure.State( + InvalidAdditionalField, + Some(unsettableAdditionalFieldIdsErrorMessage(unsettableFieldIds)) + ) + ) + case _ => Future.Unit + } + } + + def validateTweetMediaTags( + stats: StatsReceiver, + getUserMediaTagRateLimit: RateLimitChecker.GetRemaining, + userRepo: UserRepository.Optional + ): (Tweet, Boolean) => Future[Mutation[Tweet]] = { + val userRepoWithStats: UserRepository.Optional = + (userKey, queryOptions) => + userRepo(userKey, queryOptions).liftToTry.map { + case Return(res @ Some(_)) => + stats.counter("found").incr() + res + case Return(None) => + stats.counter("not_found").incr() + None + case Throw(_) => + stats.counter("failed").incr() + None + } + + (tweet: Tweet, dark: Boolean) => { + val mediaTags = getMediaTagMap(tweet) + + if (mediaTags.isEmpty) { + MutationUnitFuture + } else { + getUserMediaTagRateLimit((getUserId(tweet), dark)).flatMap { remainingMediaTagCount => + val maxMediaTagCount = math.min(remainingMediaTagCount, MaxMediaTagCount) + + val taggedUserIds = + mediaTags.values.flatten.toSeq.collect { + case MediaTag(MediaTagType.User, Some(userId), _, _) => userId + }.distinct + + val droppedTagCount = taggedUserIds.size - maxMediaTagCount + if (droppedTagCount > 0) stats.counter("over_limit_tags").incr(droppedTagCount) + + val userQueryOpts = + UserQueryOptions( + queryFields = Set(UserField.MediaView), + visibility = UserVisibility.MediaTaggable, + forUserId = Some(getUserId(tweet)) + ) + + val keys = taggedUserIds.take(maxMediaTagCount).map(UserKey.byId) + val keyOpts = keys.map((_, userQueryOpts)) + + Stitch.run { + Stitch + .traverse(keyOpts)(userRepoWithStats.tupled) + .map(_.flatten) + .map { users => + val userMap = users.map(u => u.id -> u).toMap + val mediaTagsMutation = + Mutation[Seq[MediaTag]] { mediaTags => + val validMediaTags = + mediaTags.filter { + case MediaTag(MediaTagType.User, Some(userId), _, _) => + userMap.get(userId).exists(_.mediaView.exists(_.canMediaTag)) + case _ => false + } + val invalidCount = mediaTags.size - validMediaTags.size + + if (invalidCount != 0) { + stats.counter("invalid").incr(invalidCount) + Some(validMediaTags) + } else { + None + } + } + TweetLenses.mediaTagMap.mutation(mediaTagsMutation.liftMapValues) + } + } + } + } + } + } + + def validateCommunityMembership( + communityMembershipRepository: StratoCommunityMembershipRepository.Type, + communityAccessRepository: StratoCommunityAccessRepository.Type, + communities: Option[Communities] + ): Future[Unit] = + communities match { + case Some(Communities(Seq(communityId))) => + Stitch + .run { + communityMembershipRepository(communityId).flatMap { + case true => Stitch.value(None) + case false => + communityAccessRepository(communityId).map { + case Some(CommunityAccess.Public) | Some(CommunityAccess.Closed) => + Some(TweetCreateState.CommunityUserNotAuthorized) + case Some(CommunityAccess.Private) | None => + Some(TweetCreateState.CommunityNotFound) + } + } + }.flatMap { + case None => + Future.Done + case Some(tweetCreateState) => + Future.exception(TweetCreateFailure.State(tweetCreateState)) + } + case Some(Communities(communities)) if communities.length > 1 => + // Not allowed to specify more than one community ID. + Future.exception(TweetCreateFailure.State(TweetCreateState.InvalidAdditionalField)) + case _ => Future.Done + } + + private[this] val CardUriSchemeRegex = "(?i)^(?:card|tombstone):".r + + /** + * Is the given String a URI that is allowed as a card reference + * without a matching URL in the text? + */ + def hasCardsUriScheme(uri: String): Boolean = + CardUriSchemeRegex.findPrefixMatchOf(uri).isDefined + + val InvalidAdditionalFieldEmptyUrlEntities: TweetCreateFailure.State = + TweetCreateFailure.State( + TweetCreateState.InvalidAdditionalField, + Some("url entities are empty") + ) + + val InvalidAdditionalFieldNonMatchingUrlAndShortUrl: TweetCreateFailure.State = + TweetCreateFailure.State( + TweetCreateState.InvalidAdditionalField, + Some("non-matching url and short url") + ) + + val InvalidAdditionalFieldInvalidUri: TweetCreateFailure.State = + TweetCreateFailure.State( + TweetCreateState.InvalidAdditionalField, + Some("invalid URI") + ) + + val InvalidAdditionalFieldInvalidCardUri: TweetCreateFailure.State = + TweetCreateFailure.State( + TweetCreateState.InvalidAdditionalField, + Some("invalid card URI") + ) + + type CardReferenceBuilder = + (Tweet, UrlShortener.Context) => Future[Mutation[Tweet]] + + def cardReferenceBuilder( + cardReferenceValidator: CardReferenceValidationHandler.Type, + urlShortener: UrlShortener.Type + ): CardReferenceBuilder = + (tweet, urlShortenerCtx) => { + getCardReference(tweet) match { + case Some(CardReference(uri)) => + for { + cardUri <- + if (hasCardsUriScheme(uri)) { + // This is an explicit card references that does not + // need a corresponding URL in the text. + Future.value(uri) + } else if (UrlPattern.matcher(uri).matches) { + // The card reference is being used to specify which URL + // card to show. We need to verify that the URL is + // actually in the tweet text, or it can be effectively + // used to bypass the tweet length limit. + val urlEntities = getUrls(tweet) + + if (urlEntities.isEmpty) { + // Fail fast if there can't possibly be a matching URL entity + Future.exception(InvalidAdditionalFieldEmptyUrlEntities) + } else { + // Look for the URL in the expanded URL entities. If + // it is present, then map it to the t.co shortened + // version of the URL. + urlEntities + .collectFirst { + case urlEntity if urlEntity.expanded.exists(_ == uri) => + Future.value(urlEntity.url) + } + .getOrElse { + // The URL may have been altered when it was + // returned from Talon, such as expanding a pasted + // t.co link. In this case, we t.co-ize the link and + // make sure that the corresponding t.co is present + // as a URL entity. + urlShortener((uri, urlShortenerCtx)).flatMap { shortened => + if (urlEntities.exists(_.url == shortened.shortUrl)) { + Future.value(shortened.shortUrl) + } else { + Future.exception(InvalidAdditionalFieldNonMatchingUrlAndShortUrl) + } + } + } + } + } else { + Future.exception(InvalidAdditionalFieldInvalidUri) + } + + validatedCardUri <- cardReferenceValidator((getUserId(tweet), cardUri)).rescue { + case CardReferenceValidationFailedException => + Future.exception(InvalidAdditionalFieldInvalidCardUri) + } + } yield { + TweetLenses.cardReference.mutation( + Mutation[CardReference] { cardReference => + Some(cardReference.copy(cardUri = validatedCardUri)) + }.checkEq.liftOption + ) + } + + case None => + MutationUnitFuture + } + } + + def filterInvalidData( + validateTweetMediaTags: (Tweet, Boolean) => Future[Mutation[Tweet]], + cardReferenceBuilder: CardReferenceBuilder + ): (Tweet, PostTweetRequest, UrlShortener.Context) => Future[Tweet] = + (tweet: Tweet, request: PostTweetRequest, urlShortenerCtx: UrlShortener.Context) => { + Future + .join( + validateTweetMediaTags(tweet, request.dark), + cardReferenceBuilder(tweet, urlShortenerCtx) + ) + .map { + case (mediaMutation, cardRefMutation) => + mediaMutation.also(cardRefMutation).endo(tweet) + } + } + + def apply( + stats: StatsReceiver, + validateRequest: PostTweetRequest => Future[Unit], + validateEdit: EditValidator.Type, + validateUser: User => Future[Unit] = TweetBuilder.validateUser, + validateUpdateRateLimit: RateLimitChecker.Validate, + tweetIdGenerator: TweetIdGenerator, + userRepo: UserRepository.Type, + deviceSourceRepo: DeviceSourceRepository.Type, + communityMembershipRepo: StratoCommunityMembershipRepository.Type, + communityAccessRepo: StratoCommunityAccessRepository.Type, + urlShortener: UrlShortener.Type, + urlEntityBuilder: UrlEntityBuilder.Type, + geoBuilder: GeoBuilder.Type, + replyBuilder: ReplyBuilder.Type, + mediaBuilder: MediaBuilder.Type, + attachmentBuilder: AttachmentBuilder.Type, + duplicateTweetFinder: DuplicateTweetFinder.Type, + spamChecker: Spam.Checker[TweetSpamRequest], + filterInvalidData: (Tweet, PostTweetRequest, UrlShortener.Context) => Future[Tweet], + updateUserCounts: (User, Tweet) => Future[User], + validateConversationControl: ConversationControlBuilder.Validate.Type, + conversationControlBuilder: ConversationControlBuilder.Type, + validateTweetWrite: TweetWriteValidator.Type, + nudgeBuilder: NudgeBuilder.Type, + communitiesValidator: CommunitiesValidator.Type, + collabControlBuilder: CollabControlBuilder.Type, + editControlBuilder: EditControlBuilder.Type, + featureSwitches: FeatureSwitches + ): TweetBuilder.Type = { + val entityExtractor = EntityExtractor.mutationWithoutUrls.endo + val getUser = userLookup(userRepo) + val getDeviceSource = deviceSourceLookup(deviceSourceRepo) + + // create a tco of the permalink for given a tweetId + val permalinkShortener = (tweetId: TweetId, ctx: UrlShortener.Context) => + urlShortener((s"https://twitter.com/i/web/status/$tweetId", ctx)).rescue { + // propagate OverCapacity + case e: OverCapacity => Future.exception(e) + // convert any other failure into UrlShorteningFailure + case e => Future.exception(UrlShorteningFailure(e)) + } + + def extractGeoSearchRequestId(tweetGeoOpt: Option[TweetCreateGeo]): Option[GeoSearchRequestId] = + for { + tweetGeo <- tweetGeoOpt + geoSearchRequestId <- tweetGeo.geoSearchRequestId + } yield GeoSearchRequestId(geoSearchRequestId.id) + + def featureSwitchResults(user: User, stats: StatsReceiver): Option[FeatureSwitchResults] = + TwitterContext() + .flatMap { viewer => + UserViewerRecipient(user, viewer, stats) + }.map { recipient => + featureSwitches.matchRecipient(recipient) + } + + FutureArrow { request => + for { + () <- validateRequest(request) + + (tweetId, user, devsrc) <- Future.join( + tweetIdGenerator().rescue { case t => Future.exception(SnowflakeFailure(t)) }, + Stitch.run(getUser(request.userId)), + Stitch.run(getDeviceSource(request.createdVia)) + ) + + () <- validateUser(user) + () <- validateUpdateRateLimit((user.id, request.dark)) + + // Feature Switch results are calculated once and shared between multiple builders + matchedResults = featureSwitchResults(user, stats) + + () <- validateConversationControl( + ConversationControlBuilder.Validate.Request( + matchedResults = matchedResults, + conversationControl = request.conversationControl, + inReplyToTweetId = request.inReplyToTweetId + ) + ) + + // strip illegal chars, normalize newlines, collapse blank lines, etc. + text = preprocessText(request.text) + + () <- prevalidateTextLength(text, stats) + + attachmentResult <- attachmentBuilder( + AttachmentBuilderRequest( + tweetId = tweetId, + user = user, + mediaUploadIds = request.mediaUploadIds, + cardReference = request.additionalFields.flatMap(_.cardReference), + attachmentUrl = request.attachmentUrl, + remoteHost = request.remoteHost, + darkTraffic = request.dark, + deviceSource = devsrc + ) + ) + + // updated text with appended attachment url, if any. + text <- Future.value( + attachmentResult.attachmentUrl match { + case None => text + case Some(url) => s"$text $url" + } + ) + + spamResult <- spamChecker( + TweetSpamRequest( + tweetId = tweetId, + userId = request.userId, + text = text, + mediaTags = request.additionalFields.flatMap(_.mediaTags), + safetyMetaData = request.safetyMetaData, + inReplyToTweetId = request.inReplyToTweetId, + quotedTweetId = attachmentResult.quotedTweet.map(_.tweetId), + quotedTweetUserId = attachmentResult.quotedTweet.map(_.userId) + ) + ) + + safety = user.safety.get + createdAt = SnowflakeId(tweetId).time + + urlShortenerCtx = UrlShortener.Context( + tweetId = tweetId, + userId = user.id, + createdAt = createdAt, + userProtected = safety.isProtected, + clientAppId = devsrc.clientAppId, + remoteHost = request.remoteHost, + dark = request.dark + ) + + replyRequest = ReplyBuilder.Request( + authorId = request.userId, + authorScreenName = user.profile.map(_.screenName).get, + inReplyToTweetId = request.inReplyToTweetId, + tweetText = text, + prependImplicitMentions = request.autoPopulateReplyMetadata, + enableTweetToNarrowcasting = request.enableTweetToNarrowcasting, + excludeUserIds = request.excludeReplyUserIds.getOrElse(Nil), + spamResult = spamResult, + batchMode = request.transientContext.flatMap(_.batchCompose) + ) + + replyResult <- replyBuilder(replyRequest) + replyOpt = replyResult.map(_.reply) + + replyConversationId <- replyResult match { + case Some(r) if r.reply.inReplyToStatusId.nonEmpty => + r.conversationId match { + case None => + // Throw this specific exception to make it easier to + // count how often we hit this corner case. + Future.exception(MissingConversationId(r.reply.inReplyToStatusId.get)) + case conversationIdOpt => Future.value(conversationIdOpt) + } + case _ => Future.value(None) + } + + // Validate that the current user can reply to this conversation, based on + // the conversation's ConversationControl. + // Note: currently we only validate conversation controls access on replies, + // therefore we use the conversationId from the inReplyToStatus. + // Validate that the exclusive tweet control option is only used by allowed users. + () <- validateTweetWrite( + TweetWriteValidator.Request( + replyConversationId, + request.userId, + request.exclusiveTweetControlOptions, + replyResult.flatMap(_.exclusiveTweetControl), + request.trustedFriendsControlOptions, + replyResult.flatMap(_.trustedFriendsControl), + attachmentResult.quotedTweet, + replyResult.flatMap(_.reply.inReplyToStatusId), + replyResult.flatMap(_.editControl), + request.editOptions + ) + ) + + convoId = replyConversationId match { + case Some(replyConvoId) => replyConvoId + case None => + // This is a root tweet, so the tweet id is the conversation id. + tweetId + } + + () <- nudgeBuilder( + NudgeBuilderRequest( + text = text, + inReplyToTweetId = replyOpt.flatMap(_.inReplyToStatusId), + conversationId = if (convoId == tweetId) None else Some(convoId), + hasQuotedTweet = attachmentResult.quotedTweet.nonEmpty, + nudgeOptions = request.nudgeOptions, + tweetId = Some(tweetId), + ) + ) + + // updated text with implicit reply mentions inserted, if any + text <- Future.value( + replyResult.map(_.tweetText).getOrElse(text) + ) + + // updated text with urls replaced with t.cos + ((text, urlEntities), (geoCoords, placeIdOpt)) <- Future.join( + urlEntityBuilder((text, urlShortenerCtx)) + .map { + case (text, urlEntities) => + UrlEntityBuilder.updateTextAndUrls(text, urlEntities)(partialHtmlEncode) + }, + if (request.geo.isEmpty) + Future.value((None, None)) + else + geoBuilder( + GeoBuilder.Request( + request.geo.get, + user.account.map(_.geoEnabled).getOrElse(false), + user.account.map(_.language).getOrElse("en") + ) + ).map(r => (r.geoCoordinates, r.placeId)) + ) + + // updated text with trailing media url + MediaBuilder.Result(text, mediaEntities, mediaKeys) <- + request.mediaUploadIds.getOrElse(Nil) match { + case Nil => Future.value(MediaBuilder.Result(text, Nil, Nil)) + case ids => + mediaBuilder( + MediaBuilder.Request( + mediaUploadIds = ids, + text = text, + tweetId = tweetId, + userId = user.id, + userScreenName = user.profile.get.screenName, + isProtected = user.safety.get.isProtected, + createdAt = createdAt, + dark = request.dark, + productMetadata = request.mediaMetadata.map(_.toMap) + ) + ) + } + + () <- Future.when(!request.dark) { + val reqInfo = + DuplicateTweetFinder.RequestInfo.fromPostTweetRequest(request, text) + + duplicateTweetFinder(reqInfo).flatMap { + case None => Future.Unit + case Some(duplicateId) => + log.debug(s"timeline_duplicate_check_failed:$duplicateId") + Future.exception(TweetCreateFailure.State(TweetCreateState.Duplicate)) + } + } + + textVisibility = getTextVisibility( + text = text, + replyResult = replyResult, + urlEntities = urlEntities, + mediaEntities = mediaEntities, + attachmentUrl = attachmentResult.attachmentUrl + ) + + () <- validateTextLength( + text = text, + visibleText = textVisibility.visibleText, + replyResult = replyResult, + stats = stats + ) + + communities = + request.additionalFields + .flatMap(CommunityAnnotation.additionalFieldsToCommunityIDs) + .map(ids => Communities(communityIds = ids)) + + rootExclusiveControls = request.exclusiveTweetControlOptions.map { _ => + ExclusiveTweetControl(request.userId) + } + + () <- validateExclusiveTweetNotReplies(rootExclusiveControls, replyResult) + () <- validateExclusiveTweetParams(rootExclusiveControls, communities) + + replyExclusiveControls = replyResult.flatMap(_.exclusiveTweetControl) + + // The userId is pulled off of the request rather than being supplied + // via the ExclusiveTweetControlOptions because additional fields + // can be set by clients to contain any value they want. + // This could include userIds that don't match their actual userId. + // Only one of replyResult or request.exclusiveTweetControlOptions will be defined. + exclusiveTweetControl = replyExclusiveControls.orElse(rootExclusiveControls) + + rootTrustedFriendsControl = request.trustedFriendsControlOptions.map { options => + TrustedFriendsControl(options.trustedFriendsListId) + } + + () <- validateTrustedFriendsNotReplies(rootTrustedFriendsControl, replyResult) + () <- validateTrustedFriendsParams( + rootTrustedFriendsControl, + request.conversationControl, + communities, + exclusiveTweetControl + ) + + replyTrustedFriendsControl = replyResult.flatMap(_.trustedFriendsControl) + + trustedFriendsControl = replyTrustedFriendsControl.orElse(rootTrustedFriendsControl) + + collabControl <- collabControlBuilder( + CollabControlBuilder.Request( + collabControlOptions = request.collabControlOptions, + replyResult = replyResult, + communities = communities, + trustedFriendsControl = trustedFriendsControl, + conversationControl = request.conversationControl, + exclusiveTweetControl = exclusiveTweetControl, + userId = request.userId + )) + + isCollabInvitation = collabControl.isDefined && (collabControl.get match { + case CollabControl.CollabInvitation(_: CollabInvitation) => true + case _ => false + }) + + coreData = TweetCoreData( + userId = request.userId, + text = text, + createdAtSecs = createdAt.inSeconds, + createdVia = devsrc.internalName, + reply = replyOpt, + hasTakedown = safety.hasTakedown, + // We want to nullcast community tweets and CollabInvitations + // This will disable tweet fanout to followers' home timelines, + // and filter the tweets from appearing from the tweeter's profile + // or search results for the tweeter's tweets. + nullcast = + request.nullcast || CommunityUtil.hasCommunity(communities) || isCollabInvitation, + narrowcast = request.narrowcast, + nsfwUser = request.possiblySensitive.getOrElse(safety.nsfwUser), + nsfwAdmin = safety.nsfwAdmin, + trackingId = request.trackingId, + placeId = placeIdOpt, + coordinates = geoCoords, + conversationId = Some(convoId), + // Set hasMedia to true if we know that there is media, + // and leave it unknown if not, so that it will be + // correctly set for pasted media. + hasMedia = if (mediaEntities.nonEmpty) Some(true) else None + ) + + tweet = Tweet( + id = tweetId, + coreData = Some(coreData), + urls = Some(urlEntities), + media = Some(mediaEntities), + mediaKeys = if (mediaKeys.nonEmpty) Some(mediaKeys) else None, + contributor = getContributor(request.userId), + visibleTextRange = textVisibility.visibleTextRange, + selfThreadMetadata = replyResult.flatMap(_.selfThreadMetadata), + directedAtUserMetadata = replyResult.map(_.directedAtMetadata), + composerSource = request.composerSource, + quotedTweet = attachmentResult.quotedTweet, + exclusiveTweetControl = exclusiveTweetControl, + trustedFriendsControl = trustedFriendsControl, + collabControl = collabControl, + noteTweet = request.noteTweetOptions.map(options => + NoteTweet(options.noteTweetId, options.isExpandable)) + ) + + editControl <- editControlBuilder( + EditControlBuilder.Request( + postTweetRequest = request, + tweet = tweet, + matchedResults = matchedResults + ) + ) + + tweet <- Future.value(tweet.copy(editControl = editControl)) + + tweet <- Future.value(entityExtractor(tweet)) + + () <- validateEntities(tweet) + + tweet <- { + val cctlRequest = + ConversationControlBuilder.Request.fromTweet( + tweet, + request.conversationControl, + request.noteTweetOptions.flatMap(_.mentionedUserIds)) + Stitch.run(conversationControlBuilder(cctlRequest)).map { conversationControl => + tweet.copy(conversationControl = conversationControl) + } + } + + tweet <- Future.value( + setAdditionalFields(tweet, request.additionalFields) + ) + () <- validateCommunityMembership(communityMembershipRepo, communityAccessRepo, communities) + () <- validateCommunityReply(communities, replyResult) + () <- communitiesValidator( + CommunitiesValidator.Request(matchedResults, safety.isProtected, communities)) + + tweet <- Future.value(tweet.copy(communities = communities)) + + tweet <- Future.value( + tweet.copy(underlyingCreativesContainerId = request.underlyingCreativesContainerId) + ) + + // For certain tweets we want to write a self-permalink which is used to generate modified + // tweet text for legacy clients that contains a link. NOTE: this permalink is for + // the tweet being created - we also create permalinks for related tweets further down + // e.g. if this tweet is an edit, we might create a permalink for the initial tweet as well + tweet <- { + val isBeyond140 = textVisibility.isExtendedWithExtraChars(attachmentResult.extraChars) + val isEditTweet = request.editOptions.isDefined + val isMixedMedia = Media.isMixedMedia(mediaEntities) + val isNoteTweet = request.noteTweetOptions.isDefined + + if (isBeyond140 || isEditTweet || isMixedMedia || isNoteTweet) + permalinkShortener(tweetId, urlShortenerCtx) + .map { selfPermalink => + tweet.copy( + selfPermalink = Some(selfPermalink), + extendedTweetMetadata = Some(ExtendedTweetMetadataBuilder(tweet, selfPermalink)) + ) + } + else { + Future.value(tweet) + } + } + + // When an edit tweet is created we have to update some information on the + // initial tweet, this object stores info about those updates for use + // in the tweet insert store. + // We update the editControl for each edit tweet and for the first edit tweet + // we update the self permalink. + initialTweetUpdateRequest: Option[InitialTweetUpdateRequest] <- editControl match { + case Some(EditControl.Edit(edit)) => + // Identifies the first edit of an initial tweet + val isFirstEdit = + request.editOptions.map(_.previousTweetId).contains(edit.initialTweetId) + + // A potential permalink for this tweet being created's initial tweet + val selfPermalinkForInitial: Future[Option[ShortenedUrl]] = + if (isFirstEdit) { + // `tweet` is the first edit of an initial tweet, which means + // we need to write a self permalink. We create it here in + // TweetBuilder and pass it through to the tweet store to + // be written to the initial tweet. + permalinkShortener(edit.initialTweetId, urlShortenerCtx).map(Some(_)) + } else { + Future.value(None) + } + + selfPermalinkForInitial.map { link => + Some( + InitialTweetUpdateRequest( + initialTweetId = edit.initialTweetId, + editTweetId = tweet.id, + selfPermalink = link + )) + } + + // This is not an edit this is the initial tweet - so there are no initial + // tweet updates + case _ => Future.value(None) + } + + tweet <- filterInvalidData(tweet, request, urlShortenerCtx) + + () <- validateEdit(tweet, request.editOptions) + + user <- updateUserCounts(user, tweet) + + } yield { + TweetBuilderResult( + tweet, + user, + createdAt, + isSilentFail = spamResult == Spam.SilentFail, + geoSearchRequestId = extractGeoSearchRequestId(request.geo), + initialTweetUpdateRequest = initialTweetUpdateRequest + ) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetCreationLock.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetCreationLock.scala new file mode 100644 index 000000000..a530e95a2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetCreationLock.scala @@ -0,0 +1,402 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.servo.cache.Cache +import com.twitter.servo.util.Scribe +import com.twitter.tweetypie.serverutil.ExceptionCounter +import com.twitter.tweetypie.thriftscala.PostTweetResult +import com.twitter.tweetypie.util.TweetCreationLock.Key +import com.twitter.tweetypie.util.TweetCreationLock.State +import com.twitter.util.Base64Long +import scala.util.Random +import scala.util.control.NoStackTrace +import scala.util.control.NonFatal + +/** + * This exception is returned from TweetCreationLock if there is an + * in-progress cache entry for this key. It is possible that the key + * exists because the key was not properly cleaned up, but it's + * impossible to differentiate between these cases. We resolve this by + * returning TweetCreationInProgress and having a (relatively) short TTL + * on the cache entry so that the client and/or user may retry. + */ +case object TweetCreationInProgress extends Exception with NoStackTrace + +/** + * Thrown when the TweetCreationLock discovers that there is already + * a tweet with the specified uniqueness id. + */ +case class DuplicateTweetCreation(tweetId: TweetId) extends Exception with NoStackTrace + +trait TweetCreationLock { + def apply( + key: Key, + dark: Boolean, + nullcast: Boolean + )( + insert: => Future[PostTweetResult] + ): Future[PostTweetResult] + def unlock(key: Key): Future[Unit] +} + +object CacheBasedTweetCreationLock { + + /** + * Indicates that setting the lock value failed because the state of + * that key in the cache has been changed (by another process or + * cache eviction). + */ + case object UnexpectedCacheState extends Exception with NoStackTrace + + /** + * Thrown when the process of updating the lock cache failed more + * than the allowed number of times. + */ + case class RetriesExhausted(failures: Seq[Exception]) extends Exception with NoStackTrace + + def shouldRetry(e: Exception): Boolean = + e match { + case TweetCreationInProgress => false + case _: DuplicateTweetCreation => false + case _: RetriesExhausted => false + case _ => true + } + + def ttlChooser(shortTtl: Duration, longTtl: Duration): (Key, State) => Duration = + (_, state) => + state match { + case _: State.AlreadyCreated => longTtl + case _ => shortTtl + } + + /** + * The log format is tab-separated (base 64 tweet_id, base 64 + * uniqueness_id). It's logged this way in order to minimize the + * storage requirement and to make it easy to analyze. Each log line + * should be 24 bytes, including newline. + */ + val formatUniquenessLogEntry: ((String, TweetId)) => String = { + case (uniquenessId, tweetId) => Base64Long.toBase64(tweetId) + "\t" + uniquenessId + } + + /** + * Scribe the uniqueness id paired with the tweet id so that we can + * track the rate of failures of the uniqueness id check by + * detecting multiple tweets created with the same uniqueness id. + * + * Scribe to a test category because we only need to keep this + * information around for long enough to find any duplicates. + */ + val ScribeUniquenessId: FutureEffect[(String, TweetId)] = + Scribe("test_tweetypie_uniqueness_id") contramap formatUniquenessLogEntry + + private[this] val UniquenessIdLog = Logger("com.twitter.tweetypie.handler.UniquenessId") + + /** + * Log the uniqueness ids to a standard logger (for use when it's + * not production traffic). + */ + val LogUniquenessId: FutureEffect[(String, TweetId)] = FutureEffect[(String, TweetId)] { rec => + UniquenessIdLog.info(formatUniquenessLogEntry(rec)) + Future.Unit + } + + private val log = Logger(getClass) +} + +/** + * This class adds locking around Tweet creation, to prevent creating + * duplicate tweets when two identical requests arrive simultaneously. + * A lock is created in cache using the user id and a hash of the tweet text + * in the case of tweets, or the source_status_id in the case of retweets. + * If another process attempts to lock for the same user and hash, the request + * fails as a duplicate. The lock lasts for 10 seconds if it is not deleted. + * Given the hard timeout of 5 seconds on all requests, it should never take + * us longer than 5 seconds to create a request, but we've observed times of up + * to 10 seconds to create statuses for some of our more popular users. + * + * When a request with a uniqueness id is successful, the id of the + * created tweet will be stored in the cache so that subsequent + * requests can retrieve the originally-created tweet rather than + * duplicating creation or getting an exception. + */ +class CacheBasedTweetCreationLock( + cache: Cache[Key, State], + maxTries: Int, + stats: StatsReceiver, + logUniquenessId: FutureEffect[(String, TweetId)]) + extends TweetCreationLock { + import CacheBasedTweetCreationLock._ + + private[this] val eventCounters = stats.scope("event") + + private[this] def event(k: Key, name: String): Unit = { + log.debug(s"$name:$k") + eventCounters.counter(name).incr() + } + + private[this] def retryLoop[A](action: => Future[A]): Future[A] = { + def go(failures: List[Exception]): Future[A] = + if (failures.length >= maxTries) { + Future.exception(RetriesExhausted(failures.reverse)) + } else { + action.rescue { + case e: Exception if shouldRetry(e) => go(e :: failures) + } + } + + go(Nil) + } + + private[this] val lockerExceptions = ExceptionCounter(stats) + + /** + * Obtain the lock for creating a tweet. If this method completes + * without throwing an exception, then the lock value was + * successfully set in cache, which indicates a high probability + * that this is the only process that is attempting to create this + * tweet. (The uncertainty comes from the possibility of lock + * entries missing from the cache.) + * + * @throws TweetCreationInProgress if there is another process + * trying to create this tweet. + * + * @throws DuplicateTweetCreation if a tweet has already been + * created for a duplicate request. The exception has the id of + * the created tweet. + * + * @throws RetriesExhausted if obtaining the lock failed more than + * the requisite number of times. + */ + private[this] def obtainLock(k: Key, token: Long): Future[Time] = retryLoop { + val lockTime = Time.now + + // Get the current state for this key. + cache + .getWithChecksum(Seq(k)) + .flatMap(initialStateKvr => Future.const(initialStateKvr(k))) + .flatMap { + case None => + // Nothing in cache for this key + cache + .add(k, State.InProgress(token, lockTime)) + .flatMap { + case true => Future.value(lockTime) + case false => Future.exception(UnexpectedCacheState) + } + case Some((Throw(e), _)) => + Future.exception(e) + case Some((Return(st), cs)) => + st match { + case State.Unlocked => + // There is an Unlocked entry for this key, which + // implies that a previous attempt was cleaned up. + cache + .checkAndSet(k, State.InProgress(token, lockTime), cs) + .flatMap { + case true => Future.value(lockTime) + case false => Future.exception(UnexpectedCacheState) + } + case State.InProgress(cachedToken, creationStartedTimestamp) => + if (cachedToken == token) { + // There is an in-progress entry for *this process*. This + // can happen on a retry if the `add` actually succeeds + // but the future fails. The retry can return the result + // of the add that we previously tried. + Future.value(creationStartedTimestamp) + } else { + // There is an in-progress entry for *a different + // process*. This implies that there is another tweet + // creation in progress for *this tweet*. + val tweetCreationAge = Time.now - creationStartedTimestamp + k.uniquenessId.foreach { id => + log.info( + "Found an in-progress tweet creation for uniqueness id %s %s ago" + .format(id, tweetCreationAge) + ) + } + stats.stat("in_progress_age_ms").add(tweetCreationAge.inMilliseconds) + Future.exception(TweetCreationInProgress) + } + case State.AlreadyCreated(tweetId, creationStartedTimestamp) => + // Another process successfully created a tweet for this + // key. + val tweetCreationAge = Time.now - creationStartedTimestamp + stats.stat("already_created_age_ms").add(tweetCreationAge.inMilliseconds) + Future.exception(DuplicateTweetCreation(tweetId)) + } + } + } + + /** + * Attempt to remove this process' lock entry from the cache. This + * is done by writing a short-lived tombstone, so that we can ensure + * that we only overwrite the entry if it is still an entry for this + * process instead of another process' entry. + */ + private[this] def cleanupLoop(k: Key, token: Long): Future[Unit] = + retryLoop { + // Instead of deleting the value, we attempt to write Unlocked, + // because we only want to delete it if it was the value that we + // wrote ourselves, and there is no delete call that is + // conditional on the extant value. + cache + .getWithChecksum(Seq(k)) + .flatMap(kvr => Future.const(kvr(k))) + .flatMap { + case None => + // Nothing in the cache for this tweet creation, so cleanup + // is successful. + Future.Unit + + case Some((tryV, cs)) => + // If we failed trying to deserialize the value, then we + // want to let the error bubble up, because there is no good + // recovery procedure, since we can't tell whether the entry + // is ours. + Future.const(tryV).flatMap { + case State.InProgress(presentToken, _) => + if (presentToken == token) { + // This is *our* in-progress marker, so we want to + // overwrite it with the tombstone. If checkAndSet + // returns false, that's OK, because that means + // someone else overwrote the value, and we don't have + // to clean it up anymore. + cache.checkAndSet(k, State.Unlocked, cs).unit + } else { + // Indicates that another request has overwritten our + // state before we cleaned it up. This should only + // happen when our token was cleared from cache and + // another process started a duplicate create. This + // should be very infrequent. We count it just to be + // sure. + event(k, "other_attempt_in_progress") + Future.Unit + } + + case _ => + // Cleanup has succeeded, because we are not responsible + // for the cache entry anymore. + Future.Unit + } + } + }.onSuccess { _ => event(k, "cleanup_attempt_succeeded") } + .handle { + case _ => event(k, "cleanup_attempt_failed") + } + + /** + * Mark that a tweet has been successfully created. Subsequent calls + * to `apply` with this key will receive a DuplicateTweetCreation + * exception with the specified id. + */ + private[this] def creationComplete(k: Key, tweetId: TweetId, lockTime: Time): Future[Unit] = + // Unconditionally set the state because regardless of the + // value present, we know that we want to transition to the + // AlreadyCreated state for this key. + retryLoop(cache.set(k, State.AlreadyCreated(tweetId, lockTime))) + .onSuccess(_ => event(k, "mark_created_succeeded")) + .onFailure { case _ => event(k, "mark_created_failed") } + // If this fails, it's OK for the request to complete + // successfully, because it's more harmful to create the tweet + // and return failure than it is to complete it successfully, + // but fail to honor the uniqueness id next time. + .handle { case NonFatal(_) => } + + private[this] def createWithLock( + k: Key, + create: => Future[PostTweetResult] + ): Future[PostTweetResult] = { + val token = Random.nextLong + event(k, "lock_attempted") + + obtainLock(k, token) + .onSuccess { _ => event(k, "lock_obtained") } + .handle { + // If we run out of retries when trying to get the lock, then + // just go ahead with tweet creation. We should keep an eye on + // how frequently this happens, because this means that the + // only sign that this is happening will be duplicate tweet + // creations. + case RetriesExhausted(failures) => + event(k, "lock_failure_ignored") + // Treat this as the time that we obtained the lock. + Time.now + } + .onFailure { + case e => lockerExceptions(e) + } + .flatMap { lockTime => + create.transform { + case r @ Return(PostTweetResult(_, Some(tweet), _, _, _, _, _)) => + event(k, "create_succeeded") + + k.uniquenessId.foreach { u => logUniquenessId((u, tweet.id)) } + + // Update the lock entry to remember the id of the tweet we + // created and extend the TTL. + creationComplete(k, tweet.id, lockTime).before(Future.const(r)) + case other => + other match { + case Throw(e) => + log.debug(s"Tweet creation failed for key $k", e) + case Return(r) => + log.debug(s"Tweet creation failed for key $k, so unlocking: $r") + } + + event(k, "create_failed") + + // Attempt to clean up the lock after the failed create. + cleanupLoop(k, token).before(Future.const(other)) + } + } + } + + /** + * Make a best-effort attempt at removing the duplicate cache entry + * for this key. If this fails, it is not catastrophic. The worst-case + * behavior should be that the user has to wait for the short TTL to + * elapse before tweeting succeeds. + */ + def unlock(k: Key): Future[Unit] = + retryLoop(cache.delete(k).unit).onSuccess(_ => event(k, "deleted")) + + /** + * Prevent duplicate tweet creation. + * + * Ensures that no more than one tweet creation for the same key is + * happening at the same time. If `create` fails, then the key will + * be removed from the cache. If it succeeds, then the key will be + * retained. + * + * @throws DuplicateTweetCreation if a tweet has already been + * created by a previous request. The exception has the id of the + * created tweet. + * + * @throws TweetCreationInProgress. See the documentation above. + */ + def apply( + k: Key, + isDark: Boolean, + nullcast: Boolean + )( + create: => Future[PostTweetResult] + ): Future[PostTweetResult] = + if (isDark) { + event(k, "dark_create") + create + } else if (nullcast) { + event(k, "nullcast_create") + create + } else { + createWithLock(k, create).onFailure { + // Another process is creating this same tweet (or has already + // created it) + case TweetCreationInProgress => + event(k, "tweet_creation_in_progress") + case _: DuplicateTweetCreation => + event(k, "tweet_already_created") + case _ => + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetDeletePathHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetDeletePathHandler.scala new file mode 100644 index 000000000..e1052a887 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetDeletePathHandler.scala @@ -0,0 +1,811 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.conversions.DurationOps.RichDuration +import com.twitter.servo.exception.thriftscala.ClientError +import com.twitter.servo.exception.thriftscala.ClientErrorCause +import com.twitter.servo.util.FutureArrow +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.stitch.Stitch +import com.twitter.stitch.NotFound +import com.twitter.timelineservice.thriftscala.PerspectiveResult +import com.twitter.timelineservice.{thriftscala => tls} +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.store._ +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.Time +import com.twitter.util.Try +import Try._ +import com.twitter.spam.rtf.thriftscala.SafetyLabelType +import com.twitter.tweetypie.backends.TimelineService.GetPerspectives +import com.twitter.tweetypie.util.EditControlUtil +import scala.util.control.NoStackTrace + +case class CascadedDeleteNotAvailable(retweetId: TweetId) extends Exception with NoStackTrace { + override def getMessage: String = + s"""|Cascaded delete tweet failed because tweet $retweetId + |is not present in cache or manhattan.""".stripMargin +} + +object TweetDeletePathHandler { + + type DeleteTweets = + (DeleteTweetsRequest, Boolean) => Future[Seq[DeleteTweetResult]] + + type UnretweetEdits = (Option[EditControl], TweetId, UserId) => Future[Unit] + + /** The information from a deleteTweet request that can be inspected by a deleteTweets validator */ + case class DeleteTweetsContext( + byUserId: Option[UserId], + authenticatedUserId: Option[UserId], + tweetAuthorId: UserId, + users: Map[UserId, User], + isUserErasure: Boolean, + expectedErasureUserId: Option[UserId], + tweetIsBounced: Boolean, + isBounceDelete: Boolean) + + /** Provides reason a tweet deletion was allowed */ + sealed trait DeleteAuthorization { def byUserId: Option[UserId] } + + case class AuthorizedByTweetOwner(userId: UserId) extends DeleteAuthorization { + def byUserId: Option[UserId] = Some(userId) + } + case class AuthorizedByTweetContributor(contributorUserId: UserId) extends DeleteAuthorization { + def byUserId: Option[UserId] = Some(contributorUserId) + } + case class AuthorizedByAdmin(adminUserId: UserId) extends DeleteAuthorization { + def byUserId: Option[UserId] = Some(adminUserId) + } + case object AuthorizedByErasure extends DeleteAuthorization { + def byUserId: None.type = None + } + + // Type for a method that receives all the relevant information about a proposed internal tweet + // deletion and can return Future.exception to cancel the delete due to a validation error or + // return a [[DeleteAuthorization]] specifying the reason the deletion is allowed. + type ValidateDeleteTweets = FutureArrow[DeleteTweetsContext, DeleteAuthorization] + + val userFieldsForDelete: Set[UserField] = + Set(UserField.Account, UserField.Profile, UserField.Roles, UserField.Safety) + + val userQueryOptions: UserQueryOptions = + UserQueryOptions( + userFieldsForDelete, + UserVisibility.All + ) + + // user_agent property originates from the client so truncate to a reasonable length + val MaxUserAgentLength = 1000 + + // Age under which we treat not found tweets in + // cascaded_delete_tweet as a temporary condition (the most likely + // explanation being that the tweet has not yet been + // replicated). Tweets older than this we assume are due to + // *permanently* inconsistent data, either spurious edges in tflock or + // tweets that are not loadable from Manhattan. + val MaxCascadedDeleteTweetTemporaryInconsistencyAge: Duration = + 10.minutes +} + +trait TweetDeletePathHandler { + import TweetDeletePathHandler.ValidateDeleteTweets + + def cascadedDeleteTweet(request: CascadedDeleteTweetRequest): Future[Unit] + + def deleteTweets( + request: DeleteTweetsRequest, + isUnretweetEdits: Boolean = false, + ): Future[Seq[DeleteTweetResult]] + + def internalDeleteTweets( + request: DeleteTweetsRequest, + byUserId: Option[UserId], + authenticatedUserId: Option[UserId], + validate: ValidateDeleteTweets, + isUnretweetEdits: Boolean = false + ): Future[Seq[DeleteTweetResult]] + + def unretweetEdits( + optEditControl: Option[EditControl], + excludedTweetId: TweetId, + byUserId: UserId + ): Future[Unit] +} + +/** + * Implementation of TweetDeletePathHandler + */ +class DefaultTweetDeletePathHandler( + stats: StatsReceiver, + tweetResultRepo: TweetResultRepository.Type, + userRepo: UserRepository.Optional, + stratoSafetyLabelsRepo: StratoSafetyLabelsRepository.Type, + lastQuoteOfQuoterRepo: LastQuoteOfQuoterRepository.Type, + tweetStore: TotalTweetStore, + getPerspectives: GetPerspectives) + extends TweetDeletePathHandler { + import TweetDeletePathHandler._ + + val tweetRepo: TweetRepository.Type = TweetRepository.fromTweetResult(tweetResultRepo) + + // attempt to delete tweets was made by someone other than the tweet owner or an admin user + object DeleteTweetsPermissionException extends Exception with NoStackTrace + object ExpectedUserIdMismatchException extends Exception with NoStackTrace + + private[this] val log = Logger("com.twitter.tweetypie.store.TweetDeletions") + + private[this] val cascadeEditDelete = stats.scope("cascade_edit_delete") + private[this] val cascadeEditDeletesEnqueued = cascadeEditDelete.counter("enqueued") + private[this] val cascadeEditDeleteTweets = cascadeEditDelete.counter("tweets") + private[this] val cascadeEditDeleteFailures = cascadeEditDelete.counter("failures") + + private[this] val cascadedDeleteTweet = stats.scope("cascaded_delete_tweet") + private[this] val cascadedDeleteTweetFailures = cascadedDeleteTweet.counter("failures") + private[this] val cascadedDeleteTweetSourceMatch = cascadedDeleteTweet.counter("source_match") + private[this] val cascadedDeleteTweetSourceMismatch = + cascadedDeleteTweet.counter("source_mismatch") + private[this] val cascadedDeleteTweetTweetNotFound = + cascadedDeleteTweet.counter("tweet_not_found") + private[this] val cascadedDeleteTweetTweetNotFoundAge = + cascadedDeleteTweet.stat("tweet_not_found_age") + private[this] val cascadedDeleteTweetUserNotFound = cascadedDeleteTweet.counter("user_not_found") + + private[this] val deleteTweets = stats.scope("delete_tweets") + private[this] val deleteTweetsAuth = deleteTweets.scope("per_tweet_auth") + private[this] val deleteTweetsAuthAttempts = deleteTweetsAuth.counter("attempts") + private[this] val deleteTweetsAuthFailures = deleteTweetsAuth.counter("failures") + private[this] val deleteTweetsAuthSuccessAdmin = deleteTweetsAuth.counter("success_admin") + private[this] val deleteTweetsAuthSuccessByUser = deleteTweetsAuth.counter("success_by_user") + private[this] val deleteTweetsTweets = deleteTweets.counter("tweets") + private[this] val deleteTweetsFailures = deleteTweets.counter("failures") + private[this] val deleteTweetsTweetNotFound = deleteTweets.counter("tweet_not_found") + private[this] val deleteTweetsUserNotFound = deleteTweets.counter("user_not_found") + private[this] val userIdMismatchInTweetDelete = + deleteTweets.counter("expected_actual_user_id_mismatch") + private[this] val bounceDeleteFlagNotSet = + deleteTweets.counter("bounce_delete_flag_not_set") + + private[this] def getUser(userId: UserId): Future[Option[User]] = + Stitch.run(userRepo(UserKey(userId), userQueryOptions)) + + private[this] def getUsersForDeleteTweets(userIds: Seq[UserId]): Future[Map[UserId, User]] = + Stitch.run( + Stitch + .traverse(userIds) { userId => + userRepo(UserKey(userId), userQueryOptions).map { + case Some(u) => Some(userId -> u) + case None => deleteTweetsUserNotFound.incr(); None + } + } + .map(_.flatten.toMap) + ) + + private[this] def getTweet(tweetId: TweetId): Future[Tweet] = + Stitch.run(tweetRepo(tweetId, WritePathQueryOptions.deleteTweetsWithoutEditControl)) + + private[this] def getSingleDeletedTweet( + id: TweetId, + isCascadedEditTweetDeletion: Boolean = false + ): Stitch[Option[TweetData]] = { + val opts = if (isCascadedEditTweetDeletion) { + // Disable edit control hydration if this is cascade delete of edits. + // When edit control is hydrated, the tweet will actually be considered already deleted. + WritePathQueryOptions.deleteTweetsWithoutEditControl + } else { + WritePathQueryOptions.deleteTweets + } + tweetResultRepo(id, opts) + .map(_.value) + .liftToOption { + // We treat the request the same whether the tweet never + // existed or is in one of the already-deleted states by + // just filtering out those tweets. Any tweets that we + // return should be deleted. If the tweet has been + // bounce-deleted, we never want to soft-delete it, and + // vice versa. + case NotFound | FilteredState.Unavailable.TweetDeleted | + FilteredState.Unavailable.BounceDeleted => + true + } + } + + private[this] def getTweetsForDeleteTweets( + ids: Seq[TweetId], + isCascadedEditTweetDeletion: Boolean + ): Future[Map[TweetId, TweetData]] = + Stitch + .run { + Stitch.traverse(ids) { id => + getSingleDeletedTweet(id, isCascadedEditTweetDeletion) + .map { + // When deleting a tweet that has been edited, we want to instead delete the initial version. + // Because the initial tweet will be hydrated in every request, if it is deleted, later + // revisions will be hidden, and cleaned up asynchronously by TP Daemons + + // However, we don't need to do a second lookup if it's already the original tweet + // or if we're doing a cascading edit tweet delete (deleting the entire tweet history) + case Some(tweetData) + if EditControlUtil.isInitialTweet(tweetData.tweet) || + isCascadedEditTweetDeletion => + Stitch.value(Some(tweetData)) + case Some(tweetData) => + getSingleDeletedTweet(EditControlUtil.getInitialTweetId(tweetData.tweet)) + case None => + Stitch.value(None) + // We need to preserve the input tweetId, and the initial TweetData + }.flatten.map(tweetData => (id, tweetData)) + } + } + .map(_.collect { case (tweetId, Some(tweetData)) => (tweetId, tweetData) }.toMap) + + private[this] def getStratoBounceStatuses( + ids: Seq[Long], + isUserErasure: Boolean, + isCascadedEditedTweetDeletion: Boolean + ): Future[Map[TweetId, Boolean]] = { + // Don't load bounce label for user erasure tweet deletion. + // User Erasure deletions cause unnecessary spikes of traffic + // to Strato when we read the bounce label that we don't use. + + // We also want to always delete a bounced tweet if the rest of the + // edit chain is being deleted in a cascaded edit tweet delete + if (isUserErasure || isCascadedEditedTweetDeletion) { + Future.value(ids.map(id => id -> false).toMap) + } else { + Stitch.run( + Stitch + .traverse(ids) { id => + stratoSafetyLabelsRepo(id, SafetyLabelType.Bounce).map { label => + id -> label.isDefined + } + } + .map(_.toMap) + ) + } + } + + /** A suspended/deactivated user can't delete tweets */ + private[this] def userNotSuspendedOrDeactivated(user: User): Try[User] = + user.safety match { + case None => Throw(UpstreamFailure.UserSafetyEmptyException) + case Some(safety) if safety.deactivated => + Throw( + AccessDenied( + s"User deactivated userId: ${user.id}", + errorCause = Some(AccessDeniedCause.UserDeactivated) + ) + ) + case Some(safety) if safety.suspended => + Throw( + AccessDenied( + s"User suspended userId: ${user.id}", + errorCause = Some(AccessDeniedCause.UserSuspended) + ) + ) + case _ => Return(user) + } + + /** + * Ensure that byUser has permission to delete tweet either by virtue of owning the tweet or being + * an admin user. Returns the reason as a DeleteAuthorization or else throws an Exception if not + * authorized. + */ + private[this] def userAuthorizedToDeleteTweet( + byUser: User, + optAuthenticatedUserId: Option[UserId], + tweetAuthorId: UserId + ): Try[DeleteAuthorization] = { + + def hasAdminPrivilege = + byUser.roles.exists(_.rights.contains("delete_user_tweets")) + + deleteTweetsAuthAttempts.incr() + if (byUser.id == tweetAuthorId) { + deleteTweetsAuthSuccessByUser.incr() + optAuthenticatedUserId match { + case Some(uid) => + Return(AuthorizedByTweetContributor(uid)) + case None => + Return(AuthorizedByTweetOwner(byUser.id)) + } + } else if (optAuthenticatedUserId.isEmpty && hasAdminPrivilege) { // contributor may not assume admin role + deleteTweetsAuthSuccessAdmin.incr() + Return(AuthorizedByAdmin(byUser.id)) + } else { + deleteTweetsAuthFailures.incr() + Throw(DeleteTweetsPermissionException) + } + } + + /** + * expected user id is the id provided on the DeleteTweetsRequest that the indicates which user + * owns the tweets they want to delete. The actualUserId is the actual userId on the tweet we are about to delete. + * we check to ensure they are the same as a safety check against accidental deletion of tweets either from user mistakes + * or from corrupted data (e.g bad tflock edges) + */ + private[this] def expectedUserIdMatchesActualUserId( + expectedUserId: UserId, + actualUserId: UserId + ): Try[Unit] = + if (expectedUserId == actualUserId) { + Return.Unit + } else { + userIdMismatchInTweetDelete.incr() + Throw(ExpectedUserIdMismatchException) + } + + /** + * Validation for the normal public tweet delete case, the user must be found and must + * not be suspended or deactivated. + */ + val validateTweetsForPublicDelete: ValidateDeleteTweets = FutureArrow { + ctx: DeleteTweetsContext => + Future.const( + for { + + // byUserId must be present + byUserId <- ctx.byUserId.orThrow( + ClientError(ClientErrorCause.BadRequest, "Missing byUserId") + ) + + // the byUser must be found + byUserOpt = ctx.users.get(byUserId) + byUser <- byUserOpt.orThrow( + ClientError(ClientErrorCause.BadRequest, s"User $byUserId not found") + ) + + _ <- userNotSuspendedOrDeactivated(byUser) + + _ <- validateBounceConditions( + ctx.tweetIsBounced, + ctx.isBounceDelete + ) + + // if there's a contributor, make sure the user is found and not suspended or deactivated + _ <- + ctx.authenticatedUserId + .map { uid => + ctx.users.get(uid) match { + case None => + Throw(ClientError(ClientErrorCause.BadRequest, s"Contributor $uid not found")) + case Some(authUser) => + userNotSuspendedOrDeactivated(authUser) + } + } + .getOrElse(Return.Unit) + + // if the expected user id is present, make sure it matches the user id on the tweet + _ <- + ctx.expectedErasureUserId + .map { expectedUserId => + expectedUserIdMatchesActualUserId(expectedUserId, ctx.tweetAuthorId) + } + .getOrElse(Return.Unit) + + // User must own the tweet or be an admin + deleteAuth <- userAuthorizedToDeleteTweet( + byUser, + ctx.authenticatedUserId, + ctx.tweetAuthorId + ) + } yield deleteAuth + ) + } + + private def validateBounceConditions( + tweetIsBounced: Boolean, + isBounceDelete: Boolean + ): Try[Unit] = { + if (tweetIsBounced && !isBounceDelete) { + bounceDeleteFlagNotSet.incr() + Throw(ClientError(ClientErrorCause.BadRequest, "Cannot normal delete a Bounced Tweet")) + } else { + Return.Unit + } + } + + /** + * Validation for the user erasure case. User may be missing. + */ + val validateTweetsForUserErasureDaemon: ValidateDeleteTweets = FutureArrow { + ctx: DeleteTweetsContext => + Future + .const( + for { + expectedUserId <- ctx.expectedErasureUserId.orThrow( + ClientError( + ClientErrorCause.BadRequest, + "expectedUserId is required for DeleteTweetRequests" + ) + ) + + // It's critical to always check that the userId on the tweet we want to delete matches the + // userId on the erasure request. This prevents us from accidentally deleting tweets not owned by the + // erased user, even if tflock serves us bad data. + validationResult <- expectedUserIdMatchesActualUserId(expectedUserId, ctx.tweetAuthorId) + } yield validationResult + ) + .map(_ => AuthorizedByErasure) + } + + /** + * Fill in missing values of AuditDeleteTweet with values from TwitterContext. + */ + def enrichMissingFromTwitterContext(orig: AuditDeleteTweet): AuditDeleteTweet = { + val viewer = TwitterContext() + orig.copy( + host = orig.host.orElse(viewer.flatMap(_.auditIp)), + clientApplicationId = orig.clientApplicationId.orElse(viewer.flatMap(_.clientApplicationId)), + userAgent = orig.userAgent.orElse(viewer.flatMap(_.userAgent)).map(_.take(MaxUserAgentLength)) + ) + } + + /** + * core delete tweets implementation. + * + * The [[deleteTweets]] method wraps this method and provides validation required + * for a public endpoint. + */ + override def internalDeleteTweets( + request: DeleteTweetsRequest, + byUserId: Option[UserId], + authenticatedUserId: Option[UserId], + validate: ValidateDeleteTweets, + isUnretweetEdits: Boolean = false + ): Future[Seq[DeleteTweetResult]] = { + + val auditDeleteTweet = + enrichMissingFromTwitterContext(request.auditPassthrough.getOrElse(AuditDeleteTweet())) + deleteTweetsTweets.incr(request.tweetIds.size) + for { + tweetDataMap <- getTweetsForDeleteTweets( + request.tweetIds, + request.cascadedEditedTweetDeletion.getOrElse(false) + ) + + userIds: Seq[UserId] = (tweetDataMap.values.map { td => + getUserId(td.tweet) + } ++ byUserId ++ authenticatedUserId).toSeq.distinct + + users <- getUsersForDeleteTweets(userIds) + + stratoBounceStatuses <- getStratoBounceStatuses( + tweetDataMap.keys.toSeq, + request.isUserErasure, + request.cascadedEditedTweetDeletion.getOrElse(false)) + + results <- Future.collect { + request.tweetIds.map { tweetId => + tweetDataMap.get(tweetId) match { + // already deleted, so nothing to do + case None => + deleteTweetsTweetNotFound.incr() + Future.value(DeleteTweetResult(tweetId, TweetDeleteState.Ok)) + case Some(tweetData) => + val tweet: Tweet = tweetData.tweet + val tweetIsBounced = stratoBounceStatuses(tweetId) + val optSourceTweet: Option[Tweet] = tweetData.sourceTweetResult.map(_.value.tweet) + + val validation: Future[(Boolean, DeleteAuthorization)] = for { + isLastQuoteOfQuoter <- isFinalQuoteOfQuoter(tweet) + deleteAuth <- validate( + DeleteTweetsContext( + byUserId = byUserId, + authenticatedUserId = authenticatedUserId, + tweetAuthorId = getUserId(tweet), + users = users, + isUserErasure = request.isUserErasure, + expectedErasureUserId = request.expectedUserId, + tweetIsBounced = tweetIsBounced, + isBounceDelete = request.isBounceDelete + ) + ) + _ <- optSourceTweet match { + case Some(sourceTweet) if !isUnretweetEdits => + // If this is a retweet and this deletion was not triggered by + // unretweetEdits, unretweet edits of the source Tweet + // before deleting the retweet. + // + // deleteAuth will always contain a byUserId except for erasure deletion, + // in which case the retweets will be deleted individually. + deleteAuth.byUserId match { + case Some(userId) => + unretweetEdits(sourceTweet.editControl, sourceTweet.id, userId) + case None => Future.Unit + } + case _ => Future.Unit + } + } yield { + (isLastQuoteOfQuoter, deleteAuth) + } + + validation + .flatMap { + case (isLastQuoteOfQuoter: Boolean, deleteAuth: DeleteAuthorization) => + val isAdminDelete = deleteAuth match { + case AuthorizedByAdmin(_) => true + case _ => false + } + + val event = + DeleteTweet.Event( + tweet = tweet, + timestamp = Time.now, + user = users.get(getUserId(tweet)), + byUserId = deleteAuth.byUserId, + auditPassthrough = Some(auditDeleteTweet), + isUserErasure = request.isUserErasure, + isBounceDelete = request.isBounceDelete && tweetIsBounced, + isLastQuoteOfQuoter = isLastQuoteOfQuoter, + isAdminDelete = isAdminDelete + ) + val numberOfEdits: Int = tweet.editControl + .collect { + case EditControl.Initial(initial) => + initial.editTweetIds.count(_ != tweet.id) + } + .getOrElse(0) + cascadeEditDeletesEnqueued.incr(numberOfEdits) + tweetStore + .deleteTweet(event) + .map(_ => DeleteTweetResult(tweetId, TweetDeleteState.Ok)) + } + .onFailure { _ => + deleteTweetsFailures.incr() + } + .handle { + case ExpectedUserIdMismatchException => + DeleteTweetResult(tweetId, TweetDeleteState.ExpectedUserIdMismatch) + case DeleteTweetsPermissionException => + DeleteTweetResult(tweetId, TweetDeleteState.PermissionError) + } + } + } + } + } yield results + } + + private def isFinalQuoteOfQuoter(tweet: Tweet): Future[Boolean] = { + tweet.quotedTweet match { + case Some(qt) => + Stitch.run { + lastQuoteOfQuoterRepo + .apply(qt.tweetId, getUserId(tweet)) + .liftToTry + .map(_.getOrElse(false)) + } + case None => Future(false) + } + } + + /** + * Validations for the public deleteTweets endpoint. + * - ensures that the byUserId user can be found and is in the correct user state + * - ensures that the tweet is being deleted by the tweet's owner, or by an admin + * If there is a validation error, a future.exception is returned + * + * If the delete request is part of a user erasure, validations are relaxed (the User is allowed to be missing). + */ + val deleteTweetsValidator: ValidateDeleteTweets = + FutureArrow { context => + if (context.isUserErasure) { + validateTweetsForUserErasureDaemon(context) + } else { + validateTweetsForPublicDelete(context) + } + } + + override def deleteTweets( + request: DeleteTweetsRequest, + isUnretweetEdits: Boolean = false, + ): Future[Seq[DeleteTweetResult]] = { + + // For comparison testing we only want to compare the DeleteTweetsRequests that are generated + // in DeleteTweets path and not the call that comes from the Unretweet path + val context = TwitterContext() + internalDeleteTweets( + request, + byUserId = request.byUserId.orElse(context.flatMap(_.userId)), + context.flatMap(_.authenticatedUserId), + deleteTweetsValidator, + isUnretweetEdits + ) + } + + // Cascade delete tweet is the logic for removing tweets that are detached + // from their dependency which has been deleted. They are already filtered + // out from serving, so this operation reconciles storage with the view + // presented by Tweetypie. + // This RPC call is delegated from daemons or batch jobs. Currently there + // are two use-cases when this call is issued: + // * Deleting detached retweets after the source tweet was deleted. + // This is done through RetweetsDeletion daemon and the + // CleanupDetachedRetweets job. + // * Deleting edits of an initial tweet that has been deleted. + // This is done by CascadedEditedTweetDelete daemon. + // Note that, when serving the original delete request for an edit, + // the initial tweet is only deleted, which makes all edits hidden. + override def cascadedDeleteTweet(request: CascadedDeleteTweetRequest): Future[Unit] = { + val contextViewer = TwitterContext() + getTweet(request.tweetId) + .transform { + case Throw( + FilteredState.Unavailable.TweetDeleted | FilteredState.Unavailable.BounceDeleted) => + // The retweet or edit was already deleted via some other mechanism + Future.Unit + + case Throw(NotFound) => + cascadedDeleteTweetTweetNotFound.incr() + val recentlyCreated = + if (SnowflakeId.isSnowflakeId(request.tweetId)) { + val age = Time.now - SnowflakeId(request.tweetId).time + cascadedDeleteTweetTweetNotFoundAge.add(age.inMilliseconds) + age < MaxCascadedDeleteTweetTemporaryInconsistencyAge + } else { + false + } + + if (recentlyCreated) { + // Treat the NotFound as a temporary condition, most + // likely due to replication lag. + Future.exception(CascadedDeleteNotAvailable(request.tweetId)) + } else { + // Treat the NotFound as a permanent inconsistenty, either + // spurious edges in tflock or invalid data in Manhattan. This + // was happening a few times an hour during the time that we + // were not treating it specially. For now, we will just log that + // it happened, but in the longer term, it would be good + // to collect this data and repair the corruption. + log.warn( + Seq( + "cascaded_delete_tweet_old_not_found", + request.tweetId, + request.cascadedFromTweetId + ).mkString("\t") + ) + Future.Done + } + + // Any other FilteredStates should not be thrown because of + // the options that we used to load the tweet, so we will just + // let them bubble up as an internal server error + case Throw(other) => + Future.exception(other) + + case Return(tweet) => + Future + .join( + isFinalQuoteOfQuoter(tweet), + getUser(getUserId(tweet)) + ) + .flatMap { + case (isLastQuoteOfQuoter, user) => + if (user.isEmpty) { + cascadedDeleteTweetUserNotFound.incr() + } + val tweetSourceId = getShare(tweet).map(_.sourceStatusId) + val initialEditId = tweet.editControl.collect { + case EditControl.Edit(edit) => edit.initialTweetId + } + if (initialEditId.contains(request.cascadedFromTweetId)) { + cascadeEditDeleteTweets.incr() + } + if (tweetSourceId.contains(request.cascadedFromTweetId) + || initialEditId.contains(request.cascadedFromTweetId)) { + cascadedDeleteTweetSourceMatch.incr() + val deleteEvent = + DeleteTweet.Event( + tweet = tweet, + timestamp = Time.now, + user = user, + byUserId = contextViewer.flatMap(_.userId), + cascadedFromTweetId = Some(request.cascadedFromTweetId), + auditPassthrough = request.auditPassthrough, + isUserErasure = false, + // cascaded deletes of retweets or edits have not been through a bouncer flow, + // so are not considered to be "bounce deleted". + isBounceDelete = false, + isLastQuoteOfQuoter = isLastQuoteOfQuoter, + isAdminDelete = false + ) + tweetStore + .deleteTweet(deleteEvent) + .onFailure { _ => + if (initialEditId.contains(request.cascadedFromTweetId)) { + cascadeEditDeleteFailures.incr() + } + } + } else { + cascadedDeleteTweetSourceMismatch.incr() + log.warn( + Seq( + "cascaded_from_tweet_id_source_mismatch", + request.tweetId, + request.cascadedFromTweetId, + tweetSourceId.orElse(initialEditId).getOrElse("-") + ).mkString("\t") + ) + Future.Done + } + } + } + .onFailure(_ => cascadedDeleteTweetFailures.incr()) + } + + // Given a list of edit Tweet ids and a user id, find the retweet ids of those edit ids from the given user + private def editTweetIdRetweetsFromUser( + editTweetIds: Seq[TweetId], + byUserId: UserId + ): Future[Seq[TweetId]] = { + if (editTweetIds.isEmpty) { + Future.value(Seq()) + } else { + getPerspectives( + Seq(tls.PerspectiveQuery(byUserId, editTweetIds)) + ).map { res: Seq[PerspectiveResult] => + res.headOption.toSeq + .flatMap(_.perspectives.flatMap(_.retweetId)) + } + } + } + + /* This function is called from three places - + * 1. When Tweetypie gets a request to retweet the latest version of an edit chain, all the + * previous revisons should be unretweeted. + * i.e. On Retweet of the latest tweet - unretweets all the previous revisions for this user. + * - create A + * - retweet A'(retweet of A) + * - create edit B(edit of A) + * - retweet B' => Deletes A' + * + * 2. When Tweetypie gets an unretweet request for a source tweet that is an edit tweet, all + * the versions of the edit chain is retweeted. + * i.e. On unretweet of any version in the edit chain - unretweets all the revisions for this user + * - create A + * - retweet A' + * - create B + * - unretweet B => Deletes A' (& also any B' if it existed) + * + * 3. When Tweetypie gets a delete request for a retweet, say A1. & if A happens to the source + * tweet for A1 & if A is an edit tweet, then the entire edit chain should be unretweeted & not + * A. i.e. On delete of a retweet - unretweet all the revisions for this user. + * - create A + * - retweet A' + * - create B + * - delete A' => Deletes A' (& also any B' if it existed) + * + * The following function has two failure scenarios - + * i. when it fails to get perspectives of any of the edit tweets. + * ii. the deletion of any of the retweets of these edits fail. + * + * In either of this scenario, we fail the entire request & the error bubbles up to the top. + * Note: The above unretweet of edits only happens for the current user. + * In normal circumstances, a maximum of one Tweet in the edit chain will have been retweeted, + * but we don't know which one it was. Additionally, there may be circumstances where + * unretweet failed, and we end up with multiple versions retweeted. For these reasons, + * we always unretweet all the revisions (except for `excludedTweetId`). + * This is a no-op if none of these versions have been retweeted. + * */ + override def unretweetEdits( + optEditControl: Option[EditControl], + excludedTweetId: TweetId, + byUserId: UserId + ): Future[Unit] = { + + val editTweetIds: Seq[TweetId] = + EditControlUtil.getEditTweetIds(optEditControl).get().filter(_ != excludedTweetId) + + (editTweetIdRetweetsFromUser(editTweetIds, byUserId).flatMap { tweetIds => + if (tweetIds.nonEmpty) { + deleteTweets( + DeleteTweetsRequest(tweetIds = tweetIds, byUserId = Some(byUserId)), + isUnretweetEdits = true + ) + } else { + Future.Nil + } + }).unit + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetWriteValidator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetWriteValidator.scala new file mode 100644 index 000000000..2164b8a84 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetWriteValidator.scala @@ -0,0 +1,118 @@ +package com.twitter.tweetypie.handler + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.repository.ConversationControlRepository +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.thriftscala.ExclusiveTweetControl +import com.twitter.tweetypie.thriftscala.ExclusiveTweetControlOptions +import com.twitter.tweetypie.thriftscala.QuotedTweet +import com.twitter.tweetypie.thriftscala.TrustedFriendsControl +import com.twitter.tweetypie.thriftscala.TrustedFriendsControlOptions +import com.twitter.tweetypie.thriftscala.TweetCreateState +import com.twitter.tweetypie.FutureEffect +import com.twitter.tweetypie.Gate +import com.twitter.tweetypie.TweetId +import com.twitter.tweetypie.UserId +import com.twitter.tweetypie.thriftscala.EditControl +import com.twitter.tweetypie.thriftscala.EditOptions +import com.twitter.visibility.writer.interfaces.tweets.TweetWriteEnforcementLibrary +import com.twitter.visibility.writer.interfaces.tweets.TweetWriteEnforcementRequest +import com.twitter.visibility.writer.models.ActorContext +import com.twitter.visibility.writer.Allow +import com.twitter.visibility.writer.Deny +import com.twitter.visibility.writer.DenyExclusiveTweetReply +import com.twitter.visibility.writer.DenyStaleTweetQuoteTweet +import com.twitter.visibility.writer.DenyStaleTweetReply +import com.twitter.visibility.writer.DenySuperFollowsCreate +import com.twitter.visibility.writer.DenyTrustedFriendsCreate +import com.twitter.visibility.writer.DenyTrustedFriendsQuoteTweet +import com.twitter.visibility.writer.DenyTrustedFriendsReply + +object TweetWriteValidator { + case class Request( + conversationId: Option[TweetId], + userId: UserId, + exclusiveTweetControlOptions: Option[ExclusiveTweetControlOptions], + replyToExclusiveTweetControl: Option[ExclusiveTweetControl], + trustedFriendsControlOptions: Option[TrustedFriendsControlOptions], + inReplyToTrustedFriendsControl: Option[TrustedFriendsControl], + quotedTweetOpt: Option[QuotedTweet], + inReplyToTweetId: Option[TweetId], + inReplyToEditControl: Option[EditControl], + editOptions: Option[EditOptions]) + + type Type = FutureEffect[Request] + + def apply( + convoCtlRepo: ConversationControlRepository.Type, + tweetWriteEnforcementLibrary: TweetWriteEnforcementLibrary, + enableExclusiveTweetControlValidation: Gate[Unit], + enableTrustedFriendsControlValidation: Gate[Unit], + enableStaleTweetValidation: Gate[Unit] + ): FutureEffect[Request] = + FutureEffect[Request] { request => + // We are creating up an empty TweetQuery.Options here so we can use the default + // CacheControl value and avoid hard coding it here. + val queryOptions = TweetQuery.Options(TweetQuery.Include()) + Stitch.run { + for { + convoCtl <- request.conversationId match { + case Some(convoId) => + convoCtlRepo( + convoId, + queryOptions.cacheControl + ) + case None => + Stitch.value(None) + } + + result <- tweetWriteEnforcementLibrary( + TweetWriteEnforcementRequest( + rootConversationControl = convoCtl, + convoId = request.conversationId, + exclusiveTweetControlOptions = request.exclusiveTweetControlOptions, + replyToExclusiveTweetControl = request.replyToExclusiveTweetControl, + trustedFriendsControlOptions = request.trustedFriendsControlOptions, + inReplyToTrustedFriendsControl = request.inReplyToTrustedFriendsControl, + quotedTweetOpt = request.quotedTweetOpt, + actorContext = ActorContext(request.userId), + inReplyToTweetId = request.inReplyToTweetId, + inReplyToEditControl = request.inReplyToEditControl, + editOptions = request.editOptions + ), + enableExclusiveTweetControlValidation = enableExclusiveTweetControlValidation, + enableTrustedFriendsControlValidation = enableTrustedFriendsControlValidation, + enableStaleTweetValidation = enableStaleTweetValidation + ) + _ <- result match { + case Allow => + Stitch.Done + case Deny => + Stitch.exception(TweetCreateFailure.State(TweetCreateState.ReplyTweetNotAllowed)) + case DenyExclusiveTweetReply => + Stitch.exception( + TweetCreateFailure.State(TweetCreateState.ExclusiveTweetEngagementNotAllowed)) + case DenySuperFollowsCreate => + Stitch.exception( + TweetCreateFailure.State(TweetCreateState.SuperFollowsCreateNotAuthorized)) + case DenyTrustedFriendsReply => + Stitch.exception( + TweetCreateFailure.State(TweetCreateState.TrustedFriendsEngagementNotAllowed)) + case DenyTrustedFriendsCreate => + Stitch.exception( + TweetCreateFailure.State(TweetCreateState.TrustedFriendsCreateNotAllowed)) + case DenyTrustedFriendsQuoteTweet => + Stitch.exception( + TweetCreateFailure.State(TweetCreateState.TrustedFriendsQuoteTweetNotAllowed)) + case DenyStaleTweetReply => + Stitch.exception( + TweetCreateFailure.State(TweetCreateState.StaleTweetEngagementNotAllowed)) + case DenyStaleTweetQuoteTweet => + Stitch.exception( + TweetCreateFailure.State(TweetCreateState.StaleTweetQuoteTweetNotAllowed)) + } + } yield () + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/U13ValidationUtil.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/U13ValidationUtil.scala new file mode 100644 index 000000000..1b4d46de1 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/U13ValidationUtil.scala @@ -0,0 +1,21 @@ +package com.twitter.tweetypie.handler + +import com.twitter.compliance.userconsent.compliance.birthdate.GlobalBirthdateUtil +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.tweetypie.thriftscala.DeletedTweet +import org.joda.time.DateTime + +/* + * As part of GDPR U13 work, we want to block tweets created from when a user + * was < 13 from being restored. + */ + +private[handler] object U13ValidationUtil { + def wasTweetCreatedBeforeUserTurned13(user: User, deletedTweet: DeletedTweet): Boolean = + deletedTweet.createdAtSecs match { + case None => + throw NoCreatedAtTimeException + case Some(createdAtSecs) => + GlobalBirthdateUtil.isUnderSomeAge(13, new DateTime(createdAtSecs * 1000L), user) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UndeleteTweetHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UndeleteTweetHandler.scala new file mode 100644 index 000000000..c24590298 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UndeleteTweetHandler.scala @@ -0,0 +1,215 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.servo.util.FutureArrow +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.core.TweetHydrationError +import com.twitter.tweetypie.repository.ParentUserIdRepository +import com.twitter.tweetypie.storage.TweetStorageClient.Undelete +import com.twitter.tweetypie.storage.DeleteState +import com.twitter.tweetypie.storage.DeletedTweetResponse +import com.twitter.tweetypie.storage.TweetStorageClient +import com.twitter.tweetypie.store.UndeleteTweet +import com.twitter.tweetypie.thriftscala.UndeleteTweetState.{Success => TweetypieSuccess, _} +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.thriftscala.entities.EntityExtractor +import scala.util.control.NoStackTrace + +trait UndeleteException extends Exception with NoStackTrace + +/** + * Exceptions we return to the user, things that we don't expect to ever happen unless there is a + * problem with the underlying data in Manhattan or a bug in [[com.twitter.tweetypie.storage.TweetStorageClient]] + */ +object NoDeletedAtTimeException extends UndeleteException +object NoCreatedAtTimeException extends UndeleteException +object NoStatusWithSuccessException extends UndeleteException +object NoUserIdWithTweetException extends UndeleteException +object NoDeletedTweetException extends UndeleteException +object SoftDeleteUserIdNotFoundException extends UndeleteException + +/** + * represents a problem that we choose to return to the user as a response state + * rather than as an exception. + */ +case class ResponseException(state: UndeleteTweetState) extends Exception with NoStackTrace { + def toResponse: UndeleteTweetResponse = UndeleteTweetResponse(state = state) +} + +private[this] object SoftDeleteExpiredException extends ResponseException(SoftDeleteExpired) +private[this] object BounceDeleteException extends ResponseException(TweetIsBounceDeleted) +private[this] object SourceTweetNotFoundException extends ResponseException(SourceTweetNotFound) +private[this] object SourceUserNotFoundException extends ResponseException(SourceUserNotFound) +private[this] object TweetExistsException extends ResponseException(TweetAlreadyExists) +private[this] object TweetNotFoundException extends ResponseException(TweetNotFound) +private[this] object U13TweetException extends ResponseException(TweetIsU13Tweet) +private[this] object UserNotFoundException extends ResponseException(UserNotFound) + +/** + * Undelete Notes: + * + * If request.force is set to true, then the undelete will take place even if the undeleted tweet + * is already present in Manhattan. This is useful if a tweet was recently restored to the backend, + * but the async actions portion of the undelete failed and you want to retry them. + * + * Before undeleting the tweet we check if it's a retweet, in which case we require that the sourceTweet + * and sourceUser exist. + * + * Tweets can only be undeleted for N days where N is the number of days before tweets marked with + * the soft_delete_state flag are deleted permanently by the cleanup job + * + */ +object UndeleteTweetHandler { + + type Type = FutureArrow[UndeleteTweetRequest, UndeleteTweetResponse] + + /** Extract an optional value inside a future or throw if it's missing. */ + def required[T](option: Future[Option[T]], ex: => Exception): Future[T] = + option.flatMap { + case None => Future.exception(ex) + case Some(i) => Future.value(i) + } + + def apply( + undelete: TweetStorageClient.Undelete, + tweetExists: FutureArrow[TweetId, Boolean], + getUser: FutureArrow[UserId, Option[User]], + getDeletedTweets: TweetStorageClient.GetDeletedTweets, + parentUserIdRepo: ParentUserIdRepository.Type, + save: FutureArrow[UndeleteTweet.Event, Tweet] + ): Type = { + + def getParentUserId(tweet: Tweet): Future[Option[UserId]] = + Stitch.run { + parentUserIdRepo(tweet) + .handle { + case ParentUserIdRepository.ParentTweetNotFound(id) => None + } + } + + val entityExtractor = EntityExtractor.mutationAll.endo + + val getDeletedTweet: Long => Future[DeletedTweetResponse] = + id => Stitch.run(getDeletedTweets(Seq(id)).map(_.head)) + + def getRequiredUser(userId: Option[UserId]): Future[User] = + userId match { + case None => Future.exception(SoftDeleteUserIdNotFoundException) + case Some(id) => required(getUser(id), UserNotFoundException) + } + + def getValidatedDeletedTweet( + tweetId: TweetId, + allowNotDeleted: Boolean + ): Future[DeletedTweet] = { + import DeleteState._ + val deletedTweet = getDeletedTweet(tweetId).map { response => + response.deleteState match { + case SoftDeleted => response.tweet + // BounceDeleted tweets violated Twitter Rules and may not be undeleted + case BounceDeleted => throw BounceDeleteException + case HardDeleted => throw SoftDeleteExpiredException + case NotDeleted => if (allowNotDeleted) response.tweet else throw TweetExistsException + case NotFound => throw TweetNotFoundException + } + } + + required(deletedTweet, NoDeletedTweetException) + } + + /** + * Fetch the source tweet's user for a deleted share + */ + def getSourceUser(share: Option[DeletedTweetShare]): Future[Option[User]] = + share match { + case None => Future.value(None) + case Some(s) => required(getUser(s.sourceUserId), SourceUserNotFoundException).map(Some(_)) + } + + /** + * Ensure that the undelete response contains all the required information to continue with + * the tweetypie undelete. + */ + def validateUndeleteResponse(response: Undelete.Response, force: Boolean): Future[Tweet] = + Future { + (response.code, response.tweet) match { + case (Undelete.UndeleteResponseCode.NotCreated, _) => throw TweetNotFoundException + case (Undelete.UndeleteResponseCode.BackupNotFound, _) => throw SoftDeleteExpiredException + case (Undelete.UndeleteResponseCode.Success, None) => throw NoStatusWithSuccessException + case (Undelete.UndeleteResponseCode.Success, Some(tweet)) => + // archivedAtMillis is required on the response unless force is present + // or the tweet is a retweet. retweets have no favs or retweets to clean up + // of their own so the original deleted at time is not needed + if (response.archivedAtMillis.isEmpty && !force && !isRetweet(tweet)) + throw NoDeletedAtTimeException + else + tweet + case (code, _) => throw new Exception(s"Unknown UndeleteResponseCode $code") + } + } + + def enforceU13Compliance(user: User, deletedTweet: DeletedTweet): Future[Unit] = + Future.when(U13ValidationUtil.wasTweetCreatedBeforeUserTurned13(user, deletedTweet)) { + throw U13TweetException + } + + /** + * Fetch required data and perform before/after validations for undelete. + * If everything looks good with the undelete, kick off the tweetypie undelete + * event. + */ + FutureArrow { request => + val hydrationOptions = request.hydrationOptions.getOrElse(WritePathHydrationOptions()) + val force = request.force.getOrElse(false) + val tweetId = request.tweetId + + (for { + // we must be able to query the tweet from the soft delete table + deletedTweet <- getValidatedDeletedTweet(tweetId, allowNotDeleted = force) + + // we always require the user + user <- getRequiredUser(deletedTweet.userId) + + // Make sure we're not restoring any u13 tweets. + () <- enforceU13Compliance(user, deletedTweet) + + // if a retweet, then sourceUser is required; sourceTweet will be hydrated in save() + sourceUser <- getSourceUser(deletedTweet.share) + + // validations passed, perform the undelete. + undeleteResponse <- Stitch.run(undelete(tweetId)) + + // validate the response + tweet <- validateUndeleteResponse(undeleteResponse, force) + + // Extract entities from tweet text + tweetWithEntities = entityExtractor(tweet) + + // If a retweet, get user id of parent retweet + parentUserId <- getParentUserId(tweet) + + // undeletion was successful, hydrate the tweet and + // kick off tweetypie async undelete actions + hydratedTweet <- save( + UndeleteTweet.Event( + tweet = tweetWithEntities, + user = user, + timestamp = Time.now, + hydrateOptions = hydrationOptions, + deletedAt = undeleteResponse.archivedAtMillis.map(Time.fromMilliseconds), + sourceUser = sourceUser, + parentUserId = parentUserId + ) + ) + } yield { + UndeleteTweetResponse(TweetypieSuccess, Some(hydratedTweet)) + }).handle { + case TweetHydrationError(_, Some(FilteredState.Unavailable.SourceTweetNotFound(_))) => + SourceTweetNotFoundException.toResponse + case ex: ResponseException => + ex.toResponse + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UnretweetHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UnretweetHandler.scala new file mode 100644 index 000000000..4747ff0ea --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UnretweetHandler.scala @@ -0,0 +1,65 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.Future +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.thriftscala._ +import com.twitter.timelineservice.{thriftscala => tls} +import com.twitter.tweetypie.backends.TimelineService.GetPerspectives + +object UnretweetHandler { + + type Type = UnretweetRequest => Future[UnretweetResult] + + def apply( + deleteTweets: TweetDeletePathHandler.DeleteTweets, + getPerspectives: GetPerspectives, + unretweetEdits: TweetDeletePathHandler.UnretweetEdits, + tweetRepo: TweetRepository.Type, + ): Type = { request: UnretweetRequest => + val handleEdits = getSourceTweet(request.sourceTweetId, tweetRepo).liftToTry.flatMap { + case Return(sourceTweet) => + // If we're able to fetch the source Tweet, unretweet all its other versions + unretweetEdits(sourceTweet.editControl, request.sourceTweetId, request.userId) + case Throw(_) => Future.Done + } + + handleEdits.flatMap(_ => unretweetSourceTweet(request, deleteTweets, getPerspectives)) + } + + def unretweetSourceTweet( + request: UnretweetRequest, + deleteTweets: TweetDeletePathHandler.DeleteTweets, + getPerspectives: GetPerspectives, + ): Future[UnretweetResult] = + getPerspectives( + Seq(tls.PerspectiveQuery(request.userId, Seq(request.sourceTweetId))) + ).map { results => results.head.perspectives.headOption.flatMap(_.retweetId) } + .flatMap { + case Some(id) => + deleteTweets( + DeleteTweetsRequest(tweetIds = Seq(id), byUserId = Some(request.userId)), + false + ).map(_.head).map { deleteTweetResult => + UnretweetResult(Some(deleteTweetResult.tweetId), deleteTweetResult.state) + } + case None => Future.value(UnretweetResult(None, TweetDeleteState.Ok)) + } + + def getSourceTweet( + sourceTweetId: TweetId, + tweetRepo: TweetRepository.Type + ): Future[Tweet] = { + val options: TweetQuery.Options = TweetQuery + .Options(include = TweetQuery.Include(tweetFields = Set(Tweet.EditControlField.id))) + + Stitch.run { + tweetRepo(sourceTweetId, options).rescue { + case _: FilteredState => Stitch.NotFound + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UpdatePossiblySensitiveTweetHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UpdatePossiblySensitiveTweetHandler.scala new file mode 100644 index 000000000..875edb63c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UpdatePossiblySensitiveTweetHandler.scala @@ -0,0 +1,46 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.tweetypie.store.UpdatePossiblySensitiveTweet +import com.twitter.tweetypie.thriftscala.UpdatePossiblySensitiveTweetRequest +import com.twitter.tweetypie.util.TweetLenses + +object UpdatePossiblySensitiveTweetHandler { + type Type = FutureArrow[UpdatePossiblySensitiveTweetRequest, Unit] + + def apply( + tweetGetter: FutureArrow[TweetId, Tweet], + userGetter: FutureArrow[UserId, User], + updatePossiblySensitiveTweetStore: FutureEffect[UpdatePossiblySensitiveTweet.Event] + ): Type = + FutureArrow { request => + val nsfwAdminMutation = Mutation[Boolean](_ => request.nsfwAdmin).checkEq + val nsfwUserMutation = Mutation[Boolean](_ => request.nsfwUser).checkEq + val tweetMutation = + TweetLenses.nsfwAdmin + .mutation(nsfwAdminMutation) + .also(TweetLenses.nsfwUser.mutation(nsfwUserMutation)) + + for { + originalTweet <- tweetGetter(request.tweetId) + _ <- tweetMutation(originalTweet) match { + case None => Future.Unit + case Some(mutatedTweet) => + userGetter(getUserId(originalTweet)) + .map { user => + UpdatePossiblySensitiveTweet.Event( + tweet = mutatedTweet, + user = user, + timestamp = Time.now, + byUserId = request.byUserId, + nsfwAdminChange = nsfwAdminMutation(TweetLenses.nsfwAdmin.get(originalTweet)), + nsfwUserChange = nsfwUserMutation(TweetLenses.nsfwUser.get(originalTweet)), + note = request.note, + host = request.host + ) + } + .flatMap(updatePossiblySensitiveTweetStore) + } + } yield () + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UrlEntityBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UrlEntityBuilder.scala new file mode 100644 index 000000000..5de0fa625 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UrlEntityBuilder.scala @@ -0,0 +1,102 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.tco_util.TcoUrl +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.thriftscala.entities.EntityExtractor +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.tweettext.IndexConverter +import com.twitter.tweetypie.tweettext.Offset +import com.twitter.tweetypie.tweettext.Preprocessor._ + +object UrlEntityBuilder { + import UpstreamFailure.UrlShorteningFailure + import UrlShortener.Context + + /** + * Extracts URLs from the given tweet text, shortens them, and returns an updated tweet + * text that contains the shortened URLs, along with the generated `UrlEntity`s. + */ + type Type = FutureArrow[(String, Context), (String, Seq[UrlEntity])] + + def fromShortener(shortener: UrlShortener.Type): Type = + FutureArrow { + case (text, ctx) => + Future + .collect(EntityExtractor.extractAllUrls(text).map(shortenEntity(shortener, _, ctx))) + .map(_.flatMap(_.toSeq)) + .map(updateTextAndUrls(text, _)(replaceInvisiblesWithWhitespace)) + } + + /** + * Update a url entity with tco-ed url + * + * @param urlEntity an url entity with long url in the `url` field + * @param ctx additional data needed to build the shortener request + * @return an updated url entity with tco-ed url in the `url` field, + * and long url in the `expanded` field + */ + private def shortenEntity( + shortener: UrlShortener.Type, + entity: UrlEntity, + ctx: Context + ): Future[Option[UrlEntity]] = + shortener((TcoUrl.normalizeProtocol(entity.url), ctx)) + .map { urlData => + Some( + entity.copy( + url = urlData.shortUrl, + expanded = Some(urlData.longUrl), + display = Some(urlData.displayText) + ) + ) + } + .rescue { + // fail tweets with invalid urls + case UrlShortener.InvalidUrlError => + Future.exception(TweetCreateFailure.State(TweetCreateState.InvalidUrl)) + // fail tweets with malware urls + case UrlShortener.MalwareUrlError => + Future.exception(TweetCreateFailure.State(TweetCreateState.MalwareUrl)) + // propagate OverCapacity + case e @ OverCapacity(_) => Future.exception(e) + // convert any other failure into UrlShorteningFailure + case e => Future.exception(UrlShorteningFailure(e)) + } + + /** + * Applies a text-modification function to all parts of the text not found within a UrlEntity, + * and then updates all the UrlEntity indices as necessary. + */ + def updateTextAndUrls( + text: String, + urlEntities: Seq[UrlEntity] + )( + textMod: String => String + ): (String, Seq[UrlEntity]) = { + var offsetInText = Offset.CodePoint(0) + var offsetInNewText = Offset.CodePoint(0) + val newText = new StringBuilder + val newUrlEntities = Seq.newBuilder[UrlEntity] + val indexConverter = new IndexConverter(text) + + urlEntities.foreach { e => + val nonUrl = textMod(indexConverter.substringByCodePoints(offsetInText.toInt, e.fromIndex)) + newText.append(nonUrl) + newText.append(e.url) + offsetInText = Offset.CodePoint(e.toIndex.toInt) + + val urlFrom = offsetInNewText + Offset.CodePoint.length(nonUrl) + val urlTo = urlFrom + Offset.CodePoint.length(e.url) + val newEntity = + e.copy(fromIndex = urlFrom.toShort, toIndex = urlTo.toShort) + + newUrlEntities += newEntity + offsetInNewText = urlTo + } + + newText.append(textMod(indexConverter.substringByCodePoints(offsetInText.toInt))) + + (newText.toString, newUrlEntities.result()) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UrlShortener.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UrlShortener.scala new file mode 100644 index 000000000..bdf939da7 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UrlShortener.scala @@ -0,0 +1,106 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.service.talon.thriftscala._ +import com.twitter.servo.util.FutureArrow +import com.twitter.tco_util.DisplayUrl +import com.twitter.tco_util.TcoUrl +import com.twitter.tweetypie.backends.Talon +import com.twitter.tweetypie.core.OverCapacity +import com.twitter.tweetypie.store.Guano +import com.twitter.tweetypie.thriftscala.ShortenedUrl +import scala.util.control.NoStackTrace + +object UrlShortener { + type Type = FutureArrow[(String, Context), ShortenedUrl] + + case class Context( + tweetId: TweetId, + userId: UserId, + createdAt: Time, + userProtected: Boolean, + clientAppId: Option[Long] = None, + remoteHost: Option[String] = None, + dark: Boolean = false) + + object MalwareUrlError extends Exception with NoStackTrace + object InvalidUrlError extends Exception with NoStackTrace + + /** + * Returns a new UrlShortener that checks the response from the underlying shortner + * and, if the request is not dark but fails with a MalwareUrlError, scribes request + * info to guano. + */ + def scribeMalware(guano: Guano)(underlying: Type): Type = + FutureArrow { + case (longUrl, ctx) => + underlying((longUrl, ctx)).onFailure { + case MalwareUrlError if !ctx.dark => + guano.scribeMalwareAttempt( + Guano.MalwareAttempt( + longUrl, + ctx.userId, + ctx.clientAppId, + ctx.remoteHost + ) + ) + case _ => + } + } + + def fromTalon(talonShorten: Talon.Shorten): Type = { + val log = Logger(getClass) + + FutureArrow { + case (longUrl, ctx) => + val request = + ShortenRequest( + userId = ctx.userId, + longUrl = longUrl, + auditMsg = "tweetypie", + directMessage = Some(false), + protectedAccount = Some(ctx.userProtected), + maxShortUrlLength = None, + tweetData = Some(TweetData(ctx.tweetId, ctx.createdAt.inMilliseconds)), + trafficType = + if (ctx.dark) ShortenTrafficType.Testing + else ShortenTrafficType.Production + ) + + talonShorten(request).flatMap { res => + res.responseCode match { + case ResponseCode.Ok => + if (res.malwareStatus == MalwareStatus.UrlBlocked) { + Future.exception(MalwareUrlError) + } else { + val shortUrl = + res.fullShortUrl.getOrElse { + // fall back to fromSlug if talon response does not have the full short url + // Could be replaced with an exception once the initial integration on production + // is done + TcoUrl.fromSlug(res.shortUrl, TcoUrl.isHttps(res.longUrl)) + } + + Future.value( + ShortenedUrl( + shortUrl = shortUrl, + longUrl = res.longUrl, + displayText = DisplayUrl(shortUrl, Some(res.longUrl), true) + ) + ) + } + + case ResponseCode.BadInput => + log.warn(s"Talon rejected URL that Extractor thought was fine: $longUrl") + Future.exception(InvalidUrlError) + + // we shouldn't see other ResponseCodes, because Talon.Shorten translates them to + // exceptions, but we have this catch-all just in case. + case resCode => + log.warn(s"Unexpected response code $resCode for '$longUrl'") + Future.exception(OverCapacity("talon")) + } + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UserTakedownHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UserTakedownHandler.scala new file mode 100644 index 000000000..1410525d5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UserTakedownHandler.scala @@ -0,0 +1,79 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.store.Takedown +import com.twitter.tweetypie.thriftscala.DataError +import com.twitter.tweetypie.thriftscala.DataErrorCause +import com.twitter.tweetypie.thriftscala.SetTweetUserTakedownRequest + +trait UserTakedownHandler { + val setTweetUserTakedownRequest: FutureArrow[SetTweetUserTakedownRequest, Unit] +} + +/** + * This handler processes SetTweetUserTakedownRequest objects sent to Tweetypie's + * setTweetUserTakedown endpoint. These requests originate from tweetypie daemon and the + * request object specifies the user ID of the user who is being modified, and a boolean value + * to indicate whether takedown is being added or removed. + * + * If takedown is being added, the hasTakedown bit is set on all of the user's tweets. + * If takedown is being removed, we can't automatically unset the hasTakedown bit on all tweets + * since some of the tweets might have tweet-specific takedowns, in which case the hasTakedown bit + * needs to remain set. Instead, we flush the user's tweets from cache, and let the repairer + * unset the bit when hydrating tweets where the bit is set but no user or tweet + * takedown country codes are present. + */ +object UserTakedownHandler { + type Type = FutureArrow[SetTweetUserTakedownRequest, Unit] + + def takedownEvent(userHasTakedown: Boolean): Tweet => Option[Takedown.Event] = + tweet => { + val tweetHasTakedown = + TweetLenses.tweetypieOnlyTakedownCountryCodes(tweet).exists(_.nonEmpty) || + TweetLenses.tweetypieOnlyTakedownReasons(tweet).exists(_.nonEmpty) + val updatedHasTakedown = userHasTakedown || tweetHasTakedown + if (updatedHasTakedown == TweetLenses.hasTakedown(tweet)) + None + else + Some( + Takedown.Event( + tweet = TweetLenses.hasTakedown.set(tweet, updatedHasTakedown), + timestamp = Time.now, + eventbusEnqueue = false, + scribeForAudit = false, + updateCodesAndReasons = false + ) + ) + } + + def setHasTakedown( + tweetTakedown: FutureEffect[Takedown.Event], + userHasTakedown: Boolean + ): FutureEffect[Seq[Tweet]] = + tweetTakedown.contramapOption(takedownEvent(userHasTakedown)).liftSeq + + def verifyTweetUserId(expectedUserId: Option[UserId], tweet: Tweet): Unit = { + val tweetUserId: UserId = getUserId(tweet) + val tweetId: Long = tweet.id + expectedUserId.filter(_ != tweetUserId).foreach { u => + throw DataError( + message = + s"SetTweetUserTakedownRequest userId $u does not match userId $tweetUserId for Tweet: $tweetId", + errorCause = Some(DataErrorCause.UserTweetRelationship), + ) + } + } + + def apply( + getTweet: FutureArrow[TweetId, Option[Tweet]], + tweetTakedown: FutureEffect[Takedown.Event], + ): Type = + FutureArrow { request => + for { + tweet <- getTweet(request.tweetId) + _ = tweet.foreach(t => verifyTweetUserId(request.userId, t)) + _ <- setHasTakedown(tweetTakedown, request.hasTakedown)(tweet.toSeq) + } yield () + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/WritePathQueryOptions.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/WritePathQueryOptions.scala new file mode 100644 index 000000000..5ef7573f2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/WritePathQueryOptions.scala @@ -0,0 +1,153 @@ +package com.twitter.tweetypie.handler + +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.tweetypie.repository.CacheControl +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.thriftscala.MediaEntity +import com.twitter.tweetypie.thriftscala.StatusCounts +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.tweetypie.thriftscala.WritePathHydrationOptions + +object WritePathQueryOptions { + + /** + * Base TweetQuery.Include for all hydration options. + */ + val BaseInclude: TweetQuery.Include = + GetTweetsHandler.BaseInclude.also( + tweetFields = Set( + Tweet.CardReferenceField.id, + Tweet.MediaTagsField.id, + Tweet.SelfPermalinkField.id, + Tweet.ExtendedTweetMetadataField.id, + Tweet.VisibleTextRangeField.id, + Tweet.NsfaHighRecallLabelField.id, + Tweet.CommunitiesField.id, + Tweet.ExclusiveTweetControlField.id, + Tweet.TrustedFriendsControlField.id, + Tweet.CollabControlField.id, + Tweet.EditControlField.id, + Tweet.EditPerspectiveField.id, + Tweet.NoteTweetField.id + ) + ) + + /** + * Base TweetQuery.Include for all creation-related hydrations. + */ + val BaseCreateInclude: TweetQuery.Include = + BaseInclude + .also( + tweetFields = Set( + Tweet.PlaceField.id, + Tweet.ProfileGeoEnrichmentField.id, + Tweet.SelfThreadMetadataField.id + ), + mediaFields = Set(MediaEntity.AdditionalMetadataField.id), + quotedTweet = Some(true), + pastedMedia = Some(true) + ) + + /** + * Base TweetQuery.Include for all deletion-related hydrations. + */ + val BaseDeleteInclude: TweetQuery.Include = BaseInclude + .also(tweetFields = + Set(Tweet.BounceLabelField.id, Tweet.ConversationControlField.id, Tweet.EditControlField.id)) + + val AllCounts: Set[Short] = StatusCounts.fieldInfos.map(_.tfield.id).toSet + + def insert( + cause: TweetQuery.Cause, + user: User, + options: WritePathHydrationOptions, + isEditControlEdit: Boolean + ): TweetQuery.Options = + createOptions( + writePathHydrationOptions = options, + includePerspective = false, + // include counts if tweet edit, otherwise false + includeCounts = isEditControlEdit, + cause = cause, + forUser = user, + // Do not perform any filtering when we are hydrating the tweet we are creating + safetyLevel = SafetyLevel.FilterNone + ) + + def retweetSourceTweet(user: User, options: WritePathHydrationOptions): TweetQuery.Options = + createOptions( + writePathHydrationOptions = options, + includePerspective = true, + includeCounts = true, + cause = TweetQuery.Cause.Read, + forUser = user, + // If Scarecrow is down, we may proceed with creating a RT. The safetyLevel is necessary + // to prevent so that the inner tweet's count is not sent in the TweetCreateEvent we send + // to EventBus. If this were emitted, live pipeline would publish counts to the clients. + safetyLevel = SafetyLevel.TweetWritesApi + ) + + def quotedTweet(user: User, options: WritePathHydrationOptions): TweetQuery.Options = + createOptions( + writePathHydrationOptions = options, + includePerspective = true, + includeCounts = true, + cause = TweetQuery.Cause.Read, + forUser = user, + // We pass in the safetyLevel so that the inner tweet's are excluded + // from the TweetCreateEvent we send to EventBus. If this were emitted, + // live pipeline would publish counts to the clients. + safetyLevel = SafetyLevel.TweetWritesApi + ) + + private def condSet[A](cond: Boolean, item: A): Set[A] = + if (cond) Set(item) else Set.empty + + private def createOptions( + writePathHydrationOptions: WritePathHydrationOptions, + includePerspective: Boolean, + includeCounts: Boolean, + cause: TweetQuery.Cause, + forUser: User, + safetyLevel: SafetyLevel, + ): TweetQuery.Options = { + val cardsEnabled: Boolean = writePathHydrationOptions.includeCards + val cardsPlatformKeySpecified: Boolean = writePathHydrationOptions.cardsPlatformKey.nonEmpty + val cardsV1Enabled: Boolean = cardsEnabled && !cardsPlatformKeySpecified + val cardsV2Enabled: Boolean = cardsEnabled && cardsPlatformKeySpecified + + TweetQuery.Options( + include = BaseCreateInclude.also( + tweetFields = + condSet(includePerspective, Tweet.PerspectiveField.id) ++ + condSet(cardsV1Enabled, Tweet.CardsField.id) ++ + condSet(cardsV2Enabled, Tweet.Card2Field.id) ++ + condSet(includeCounts, Tweet.CountsField.id) ++ + // for PreviousCountsField, copy includeCounts state on the write path + condSet(includeCounts, Tweet.PreviousCountsField.id) ++ + // hydrate ConversationControl on Reply Tweet creations so clients can consume + Set(Tweet.ConversationControlField.id), + countsFields = if (includeCounts) AllCounts else Set.empty + ), + cause = cause, + forUserId = Some(forUser.id), + cardsPlatformKey = writePathHydrationOptions.cardsPlatformKey, + languageTag = forUser.account.map(_.language).getOrElse("en"), + extensionsArgs = writePathHydrationOptions.extensionsArgs, + safetyLevel = safetyLevel, + simpleQuotedTweet = writePathHydrationOptions.simpleQuotedTweet + ) + } + + def deleteTweets: TweetQuery.Options = + TweetQuery.Options( + include = BaseDeleteInclude, + cacheControl = CacheControl.ReadOnlyCache, + extensionsArgs = None, + requireSourceTweet = false // retweet should be deletable even if source tweet missing + ) + + def deleteTweetsWithoutEditControl: TweetQuery.Options = + deleteTweets.copy(enableEditControlHydration = false) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/package.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/package.scala new file mode 100644 index 000000000..e9d5021a0 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/package.scala @@ -0,0 +1,42 @@ +package com.twitter.tweetypie + +import com.twitter.context.thriftscala.Viewer +import com.twitter.tweetypie.thriftscala._ + +import scala.util.matching.Regex +import com.twitter.context.TwitterContext +import com.twitter.finagle.stats.Stat +import com.twitter.snowflake.id.SnowflakeId + +package object handler { + type PlaceLanguage = String + type TweetIdGenerator = () => Future[TweetId] + type NarrowcastValidator = FutureArrow[Narrowcast, Narrowcast] + type ReverseGeocoder = FutureArrow[(GeoCoordinates, PlaceLanguage), Option[Place]] + type CardUri = String + + // A narrowcast location can be a PlaceId or a US metro code. + type NarrowcastLocation = String + + val PlaceIdRegex: Regex = """(?i)\A[0-9a-fA-F]{16}\Z""".r + + // Bring Tweetypie permitted TwitterContext into scope + val TwitterContext: TwitterContext = + com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) + + def getContributor(userId: UserId): Option[Contributor] = { + val viewer = TwitterContext().getOrElse(Viewer()) + viewer.authenticatedUserId.filterNot(_ == userId).map(id => Contributor(id)) + } + + def trackLossyReadsAfterWrite(stat: Stat, windowLength: Duration)(tweetId: TweetId): Unit = { + // If the requested Tweet is NotFound, and the tweet age is less than the defined {{windowLength}} duration, + // then we capture the percentiles of when this request was attempted. + // This is being tracked to understand how lossy the reads are directly after tweet creation. + for { + timestamp <- SnowflakeId.timeFromIdOpt(tweetId) + age = Time.now.since(timestamp) + if age.inMillis <= windowLength.inMillis + } yield stat.add(age.inMillis) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/BUILD new file mode 100644 index 000000000..0fb53615d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/BUILD @@ -0,0 +1,58 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "core-app-services/lib:coreservices", + "featureswitches/featureswitches-core:v2", + "featureswitches/featureswitches-core/src/main/scala", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", + "mediaservices/commons/src/main/thrift:thrift-scala", + "mediaservices/media-util", + "scrooge/scrooge-core", + "tweetypie/servo/repo", + "tweetypie/servo/repo/src/main/thrift:thrift-scala", + "tweetypie/servo/util", + "snowflake/src/main/scala/com/twitter/snowflake/id", + "src/scala/com/twitter/takedown/util", + "src/thrift/com/twitter/context:twitter-context-scala", + "src/thrift/com/twitter/dataproducts:enrichments_profilegeo-scala", + "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", + "src/thrift/com/twitter/escherbird:tweet-annotation-scala", + "src/thrift/com/twitter/escherbird/common:common-scala", + "src/thrift/com/twitter/expandodo:cards-scala", + "src/thrift/com/twitter/expandodo:only-scala", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/gizmoduck:user-thrift-scala", + "src/thrift/com/twitter/spam/rtf:safety-label-scala", + "src/thrift/com/twitter/spam/rtf:safety-level-scala", + "src/thrift/com/twitter/timelineservice/server/internal:thrift-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-core", + "stitch/stitch-timelineservice/src/main/scala", + "strato/src/main/scala/com/twitter/strato/access", + "strato/src/main/scala/com/twitter/strato/callcontext", + "tco-util", + "tweet-util", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/media", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/repository", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", + "tweetypie/server/src/main/thrift:compiled-scala", + "tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields", + "tweetypie/common/src/scala/com/twitter/tweetypie/client_id", + "tweetypie/common/src/scala/com/twitter/tweetypie/media", + "tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities", + "tweetypie/common/src/scala/com/twitter/tweetypie/tweettext", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "twitter-context", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + "util/util-stats/src/main/scala", + "visibility/common/src/main/thrift/com/twitter/visibility:action-scala", + "visibility/results/src/main/scala/com/twitter/visibility/results/counts", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/Card2Hydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/Card2Hydrator.scala new file mode 100644 index 000000000..08ad91bc8 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/Card2Hydrator.scala @@ -0,0 +1,76 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.expandodo.thriftscala.Card2 +import com.twitter.expandodo.thriftscala.Card2RequestOptions +import com.twitter.featureswitches.v2.FeatureSwitchResults +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.CardReferenceUriExtractor +import com.twitter.tweetypie.core.NonTombstone +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object Card2Hydrator { + type Type = ValueHydrator[Option[Card2], Ctx] + + case class Ctx( + urlEntities: Seq[UrlEntity], + mediaEntities: Seq[MediaEntity], + cardReference: Option[CardReference], + underlyingTweetCtx: TweetCtx, + featureSwitchResults: Option[FeatureSwitchResults]) + extends TweetCtx.Proxy + + val hydratedField: FieldByPath = fieldByPath(Tweet.Card2Field) + val hydrationUrlBlockListKey = "card_hydration_blocklist" + + def apply(repo: Card2Repository.Type): ValueHydrator[Option[Card2], Ctx] = + ValueHydrator[Option[Card2], Ctx] { (_, ctx) => + val repoCtx = requestOptions(ctx) + val filterURLs = ctx.featureSwitchResults + .flatMap(_.getStringArray(hydrationUrlBlockListKey, false)) + .getOrElse(Seq()) + + val requests = + ctx.cardReference match { + case Some(CardReferenceUriExtractor(cardUri)) => + cardUri match { + case NonTombstone(uri) if !filterURLs.contains(uri) => + Seq((UrlCard2Key(uri), repoCtx)) + case _ => Nil + } + case _ => + ctx.urlEntities + .filterNot(e => e.expanded.exists(filterURLs.contains)) + .map(e => (UrlCard2Key(e.url), repoCtx)) + } + + Stitch + .traverse(requests) { + case (key, opts) => repo(key, opts).liftNotFoundToOption + }.liftToTry.map { + case Return(results) => + results.flatten.lastOption match { + case None => ValueState.UnmodifiedNone + case res => ValueState.modified(res) + } + case Throw(_) => ValueState.partial(None, hydratedField) + } + }.onlyIf { (curr, ctx) => + curr.isEmpty && + ctx.tweetFieldRequested(Tweet.Card2Field) && + ctx.opts.cardsPlatformKey.nonEmpty && + !ctx.isRetweet && + ctx.mediaEntities.isEmpty && + (ctx.cardReference.nonEmpty || ctx.urlEntities.nonEmpty) + } + + private[this] def requestOptions(ctx: Ctx) = + Card2RequestOptions( + platformKey = ctx.opts.cardsPlatformKey.get, + perspectiveUserId = ctx.opts.forUserId, + allowNonTcoUrls = ctx.cardReference.nonEmpty, + languageTag = Some(ctx.opts.languageTag) + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/CardHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/CardHydrator.scala new file mode 100644 index 000000000..4a267bfb6 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/CardHydrator.scala @@ -0,0 +1,47 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.expandodo.thriftscala.Card +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object CardHydrator { + type Type = ValueHydrator[Option[Seq[Card]], Ctx] + + case class Ctx( + urlEntities: Seq[UrlEntity], + mediaEntities: Seq[MediaEntity], + underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy + + val hydratedField: FieldByPath = fieldByPath(Tweet.CardsField) + + private[this] val partialResult = ValueState.partial(None, hydratedField) + + def apply(repo: CardRepository.Type): Type = { + def getCards(url: String): Stitch[Seq[Card]] = + repo(url).handle { case NotFound => Nil } + + ValueHydrator[Option[Seq[Card]], Ctx] { (_, ctx) => + val urls = ctx.urlEntities.map(_.url) + + Stitch.traverse(urls)(getCards _).liftToTry.map { + case Return(cards) => + // even though we are hydrating a type of Option[Seq[Card]], we only + // ever return at most one card, and always the last one. + val res = cards.flatten.lastOption.toSeq + if (res.isEmpty) ValueState.UnmodifiedNone + else ValueState.modified(Some(res)) + case _ => partialResult + } + }.onlyIf { (curr, ctx) => + curr.isEmpty && + ctx.tweetFieldRequested(Tweet.CardsField) && + !ctx.isRetweet && + ctx.mediaEntities.isEmpty + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ContributorHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ContributorHydrator.scala new file mode 100644 index 000000000..8adee73b3 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ContributorHydrator.scala @@ -0,0 +1,36 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.NotFound +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object ContributorHydrator { + type Type = ValueHydrator[Option[Contributor], TweetCtx] + + val hydratedField: FieldByPath = fieldByPath(Tweet.ContributorField, Contributor.ScreenNameField) + + def once(h: Type): Type = + TweetHydration.completeOnlyOnce( + hydrationType = HydrationType.Contributor, + hydrator = h + ) + + def apply(repo: UserIdentityRepository.Type): Type = + ValueHydrator[Contributor, TweetCtx] { (curr, _) => + repo(UserKey(curr.userId)).liftToTry.map { + case Return(userIdent) => ValueState.delta(curr, update(curr, userIdent)) + case Throw(NotFound) => ValueState.unmodified(curr) + case Throw(_) => ValueState.partial(curr, hydratedField) + } + }.onlyIf((curr, _) => curr.screenName.isEmpty).liftOption + + /** + * Updates a Contributor using the given user data. + */ + private def update(curr: Contributor, userIdent: UserIdentity): Contributor = + curr.copy( + screenName = Some(userIdent.screenName) + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ContributorVisibilityFilter.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ContributorVisibilityFilter.scala new file mode 100644 index 000000000..079b90f78 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ContributorVisibilityFilter.scala @@ -0,0 +1,42 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.thriftscala._ + +/** + * Remove contributor data from tweet if it should not be available to the + * caller. The contributor field is populated in the cached + * [[ContributorHydrator]]. + * + * Contributor data is always available on the write path. It is available on + * the read path for the tweet author (or user authenticated as the tweet + * author in the case of contributors/teams), or if the caller has disabled + * visibility filtering. + * + * The condition for running this filtering hydrator (onlyIf) has been a + * source of confusion. Keep in mind that the condition expresses when to + * *remove* data, not when to return it. + * + * In short, keep data when: + * !reading || requested by author || !(enforce visibility filtering) + * + * Remove data when none of these conditions apply: + * reading && !(requested by author) && enforce visibility filtering + * + */ +object ContributorVisibilityFilter { + type Type = ValueHydrator[Option[Contributor], TweetCtx] + + def apply(): Type = + ValueHydrator + .map[Option[Contributor], TweetCtx] { + case (Some(_), _) => ValueState.modified(None) + case (None, _) => ValueState.unmodified(None) + } + .onlyIf { (_, ctx) => + ctx.opts.cause.reading(ctx.tweetId) && + !ctx.opts.forUserId.contains(ctx.userId) && + ctx.opts.enforceVisibilityFiltering + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ConversationControlHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ConversationControlHydrator.scala new file mode 100644 index 000000000..55df7e8a7 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ConversationControlHydrator.scala @@ -0,0 +1,108 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.repository.ConversationControlRepository +import com.twitter.tweetypie.serverutil.ExceptionCounter +import com.twitter.tweetypie.thriftscala.ConversationControl + +private object ReplyTweetConversationControlHydrator { + type Type = ConversationControlHydrator.Type + type Ctx = ConversationControlHydrator.Ctx + + // The conversation control thrift field was added Feb 17th, 2020. + // No conversation before this will have a conversation control field to hydrate. + // We explicitly short circuit to save resources from querying for tweets we + // know do not have conversation control fields set. + val FirstValidDate: Time = Time.fromMilliseconds(1554076800000L) // 2020-02-17 + + def apply( + repo: ConversationControlRepository.Type, + stats: StatsReceiver + ): Type = { + val exceptionCounter = ExceptionCounter(stats) + + ValueHydrator[Option[ConversationControl], Ctx] { (curr, ctx) => + repo(ctx.conversationId.get, ctx.opts.cacheControl).liftToTry.map { + case Return(conversationControl) => + ValueState.delta(curr, conversationControl) + case Throw(exception) => { + // In the case where we get an exception, we want to count the + // exception but fail open. + exceptionCounter(exception) + + // Reply Tweet Tweet.ConversationControlField hydration should fail open. + // Ideally we would return ValueState.partial here to notify Tweetypie the caller + // that requested the Tweet.ConversationControlField field was not hydrated. + // We cannot do so because GetTweetFields will return TweetFieldsResultFailed + // for partial results which would fail closed. + ValueState.unmodified(curr) + } + } + }.onlyIf { (_, ctx) => + // This hydrator is specifically for replies so only run when Tweet is a reply + ctx.inReplyToTweetId.isDefined && + // See comment for FirstValidDate + ctx.createdAt > FirstValidDate && + // We need conversation id to get ConversationControl + ctx.conversationId.isDefined && + // Only run if the ConversationControl was requested + ctx.tweetFieldRequested(Tweet.ConversationControlField) + } + } +} + +/** + * ConversationControlHydrator is used to hydrate the conversationControl field. + * For root Tweets, this hydrator just passes through the existing conversationControl. + * For reply Tweets, it loads the conversationControl from the root Tweet of the conversation. + * Only root Tweets in a conversation (i.e. the Tweet pointed to by conversationId) have + * a persisted conversationControl, so we have to hydrate that field for all replies in order + * to know if a Tweet in a conversation can be replied to. + */ +object ConversationControlHydrator { + type Type = ValueHydrator[Option[ConversationControl], Ctx] + + case class Ctx(conversationId: Option[ConversationId], underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy + + private def scrubInviteViaMention( + ccOpt: Option[ConversationControl] + ): Option[ConversationControl] = { + ccOpt collect { + case ConversationControl.ByInvitation(byInvitation) => + ConversationControl.ByInvitation(byInvitation.copy(inviteViaMention = None)) + case ConversationControl.Community(community) => + ConversationControl.Community(community.copy(inviteViaMention = None)) + case ConversationControl.Followers(followers) => + ConversationControl.Followers(followers.copy(inviteViaMention = None)) + } + } + + def apply( + repo: ConversationControlRepository.Type, + disableInviteViaMention: Gate[Unit], + stats: StatsReceiver + ): Type = { + val replyTweetConversationControlHydrator = ReplyTweetConversationControlHydrator( + repo, + stats + ) + + ValueHydrator[Option[ConversationControl], Ctx] { (curr, ctx) => + val ccUpdated = if (disableInviteViaMention()) { + scrubInviteViaMention(curr) + } else { + curr + } + + if (ctx.inReplyToTweetId.isEmpty) { + // For non-reply tweets, pass through the existing conversation control + Stitch.value(ValueState.delta(curr, ccUpdated)) + } else { + replyTweetConversationControlHydrator(ccUpdated, ctx) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ConversationIdHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ConversationIdHydrator.scala new file mode 100644 index 000000000..172ff1746 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ConversationIdHydrator.scala @@ -0,0 +1,33 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +/** + * Hydrates the conversationId field for any tweet that is a reply to another tweet. + * It uses that other tweet's conversationId. + */ +object ConversationIdHydrator { + type Type = ValueHydrator[Option[ConversationId], TweetCtx] + + val hydratedField: FieldByPath = + fieldByPath(Tweet.CoreDataField, TweetCoreData.ConversationIdField) + + def apply(repo: ConversationIdRepository.Type): Type = + ValueHydrator[Option[ConversationId], TweetCtx] { (_, ctx) => + ctx.inReplyToTweetId match { + case None => + // Not a reply to another tweet, use tweet id as conversation root + Stitch.value(ValueState.modified(Some(ctx.tweetId))) + case Some(parentId) => + // Lookup conversation id from in-reply-to tweet + repo(ConversationIdKey(ctx.tweetId, parentId)).liftToTry.map { + case Return(rootId) => ValueState.modified(Some(rootId)) + case Throw(_) => ValueState.partial(None, hydratedField) + } + } + }.onlyIf((curr, _) => curr.isEmpty) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ConversationMutedHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ConversationMutedHydrator.scala new file mode 100644 index 000000000..3f6e6ad7e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ConversationMutedHydrator.scala @@ -0,0 +1,54 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala.FieldByPath + +/** + * Hydrates the `conversationMuted` field of Tweet. `conversationMuted` + * will be true if the conversation that this tweet is part of has been + * muted by the user. This field is perspectival, so the result of this + * hydrator should never be cached. + */ +object ConversationMutedHydrator { + type Type = ValueHydrator[Option[Boolean], Ctx] + + case class Ctx(conversationId: Option[TweetId], underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy + + val hydratedField: FieldByPath = fieldByPath(Tweet.ConversationMutedField) + + private[this] val partialResult = ValueState.partial(None, hydratedField) + private[this] val modifiedTrue = ValueState.modified(Some(true)) + private[this] val modifiedFalse = ValueState.modified(Some(false)) + + def apply(repo: ConversationMutedRepository.Type): Type = { + + ValueHydrator[Option[Boolean], Ctx] { (_, ctx) => + (ctx.opts.forUserId, ctx.conversationId) match { + case (Some(userId), Some(convoId)) => + repo(userId, convoId).liftToTry + .map { + case Return(true) => modifiedTrue + case Return(false) => modifiedFalse + case Throw(_) => partialResult + } + case _ => + ValueState.StitchUnmodifiedNone + } + }.onlyIf { (curr, ctx) => + // It is unlikely that this field will already be set, but if, for + // some reason, this hydrator is run on a tweet that already has + // this value set, we will skip the work to check again. + curr.isEmpty && + // We only hydrate this field if it is explicitly requested. At + // the time of this writing, this field is only used for + // displaying UI for toggling the muted state of the relevant + // conversation. + ctx.tweetFieldRequested(Tweet.ConversationMutedField) && + // Retweets are not part of a conversation, so should not be muted. + !ctx.isRetweet + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/CopyFromSourceTweet.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/CopyFromSourceTweet.scala new file mode 100644 index 000000000..0e8a9eada --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/CopyFromSourceTweet.scala @@ -0,0 +1,229 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.tweettext.TweetText +import com.twitter.tweetypie.thriftscala._ + +object CopyFromSourceTweet { + + /** + * A `ValueHydrator` that copies and/or merges certain fields from a retweet's source + * tweet into the retweet. + */ + def hydrator: ValueHydrator[TweetData, TweetQuery.Options] = + ValueHydrator.map { (td, _) => + td.sourceTweetResult.map(_.value.tweet) match { + case None => ValueState.unmodified(td) + case Some(src) => ValueState.modified(td.copy(tweet = copy(src, td.tweet))) + } + } + + /** + * Updates `dst` with fields from `src`. This is more complicated than you would think, because: + * + * - the tweet has an extra mention entity due to the "RT @user" prefix; + * - the retweet text may be truncated at the end, and doesn't necessarily contain all of the + * the text from the source tweet. truncation may happen in the middle of entity. + * - the text in the retweet may have a different unicode normalization, which affects + * code point indices. this means entities aren't shifted by a fixed amount equal to + * the RT prefix. + * - url entities, when hydrated, may be converted to media entities; url entities may not + * be hydrated in the retweet, so the source tweet may have a media entity that corresponds + * to an unhydrated url entity in the retweet. + * - there may be multiple media entities that map to a single url entity, because the tweet + * may have multiple photos. + */ + def copy(src: Tweet, dst: Tweet): Tweet = { + val srcCoreData = src.coreData.get + val dstCoreData = dst.coreData.get + + // get the code point index of the end of the text + val max = getText(dst).codePointCount(0, getText(dst).length).toShort + + // get all entities from the source tweet, merged into a single list sorted by fromIndex. + val srcEntities = getWrappedEntities(src) + + // same for the retweet, but drop first @mention, add back later + val dstEntities = getWrappedEntities(dst).drop(1) + + // merge indices from dst into srcEntities. at the end, resort entities back + // to their original ordering. for media entities, order matters to clients. + val mergedEntities = merge(srcEntities, dstEntities, max).sortBy(_.position) + + // extract entities back out by type + val mentions = mergedEntities.collect { case WrappedMentionEntity(e, _) => e } + val hashtags = mergedEntities.collect { case WrappedHashtagEntity(e, _) => e } + val cashtags = mergedEntities.collect { case WrappedCashtagEntity(e, _) => e } + val urls = mergedEntities.collect { case WrappedUrlEntity(e, _) => e } + val media = mergedEntities.collect { case WrappedMediaEntity(e, _) => e } + + // merge the updated entities back into the retweet, adding the RT @mention back in + dst.copy( + coreData = Some( + dstCoreData.copy( + hasMedia = srcCoreData.hasMedia, + hasTakedown = dstCoreData.hasTakedown || srcCoreData.hasTakedown + ) + ), + mentions = Some(getMentions(dst).take(1) ++ mentions), + hashtags = Some(hashtags), + cashtags = Some(cashtags), + urls = Some(urls), + media = Some(media.map(updateSourceStatusId(src.id, getUserId(src)))), + quotedTweet = src.quotedTweet, + card2 = src.card2, + cards = src.cards, + language = src.language, + mediaTags = src.mediaTags, + spamLabel = src.spamLabel, + takedownCountryCodes = + mergeTakedowns(Seq(src, dst).map(TweetLenses.takedownCountryCodes.get): _*), + conversationControl = src.conversationControl, + exclusiveTweetControl = src.exclusiveTweetControl + ) + } + + /** + * Merges one or more optional lists of takedowns. If no lists are defined, returns None. + */ + private def mergeTakedowns(takedowns: Option[Seq[CountryCode]]*): Option[Seq[CountryCode]] = + if (takedowns.exists(_.isDefined)) { + Some(takedowns.flatten.flatten.distinct.sorted) + } else { + None + } + + /** + * A retweet should never have media without a source_status_id or source_user_id + */ + private def updateSourceStatusId( + srcTweetId: TweetId, + srcUserId: UserId + ): MediaEntity => MediaEntity = + mediaEntity => + if (mediaEntity.sourceStatusId.nonEmpty) { + // when sourceStatusId is set this indicates the media is "pasted media" so the values + // should already be correct (retweeting won't change sourceStatusId / sourceUserId) + mediaEntity + } else { + mediaEntity.copy( + sourceStatusId = Some(srcTweetId), + sourceUserId = Some(mediaEntity.sourceUserId.getOrElse(srcUserId)) + ) + } + + /** + * Attempts to match up entities from the source tweet with entities from the retweet, + * and to use the source tweet entities but shifted to the retweet entity indices. If an entity + * got truncated at the end of the retweet text, we drop it and any following entities. + */ + private def merge( + srcEntities: List[WrappedEntity], + rtEntities: List[WrappedEntity], + maxIndex: Short + ): List[WrappedEntity] = { + (srcEntities, rtEntities) match { + case (Nil, Nil) => + // successfully matched all entities! + Nil + + case (Nil, _) => + // no more source tweet entities, but we still have remaining retweet entities. + // this can happen if a a text truncation turns something invalid like #tag1#tag2 or + // @mention1@mention2 into a valid entity. just drop all the remaining retweet entities. + Nil + + case (_, Nil) => + // no more retweet entities, which means the remaining entities have been truncated. + Nil + + case (srcHead :: srcTail, rtHead :: rtTail) => + // we have more entities from the source tweet and the retweet. typically, we can + // match these entities because they have the same normalized text, but the retweet + // entity might be truncated, so we allow for a prefix match if the retweet entity + // ends at the end of the tweet. + val possiblyTruncated = rtHead.toIndex == maxIndex - 1 + val exactMatch = srcHead.normalizedText == rtHead.normalizedText + + if (exactMatch) { + // there could be multiple media entities for the same t.co url, so we need to find + // contiguous groupings of entities that share the same fromIndex. + val rtTail = rtEntities.dropWhile(_.fromIndex == rtHead.fromIndex) + val srcGroup = + srcEntities + .takeWhile(_.fromIndex == srcHead.fromIndex) + .map(_.shift(rtHead.fromIndex, rtHead.toIndex)) + val srcTail = srcEntities.drop(srcGroup.size) + + srcGroup ++ merge(srcTail, rtTail, maxIndex) + } else { + // if we encounter a mismatch, it is most likely because of truncation, + // so we stop here. + Nil + } + } + } + + /** + * Wraps all the entities with the appropriate WrappedEntity subclasses, merges them into + * a single list, and sorts by fromIndex. + */ + private def getWrappedEntities(tweet: Tweet): List[WrappedEntity] = + (getUrls(tweet).zipWithIndex.map { case (e, p) => WrappedUrlEntity(e, p) } ++ + getMedia(tweet).zipWithIndex.map { case (e, p) => WrappedMediaEntity(e, p) } ++ + getMentions(tweet).zipWithIndex.map { case (e, p) => WrappedMentionEntity(e, p) } ++ + getHashtags(tweet).zipWithIndex.map { case (e, p) => WrappedHashtagEntity(e, p) } ++ + getCashtags(tweet).zipWithIndex.map { case (e, p) => WrappedCashtagEntity(e, p) }) + .sortBy(_.fromIndex) + .toList + + /** + * The thrift-entity classes don't share a common entity parent class, so we wrap + * them with a class that allows us to mix entities together into a single list, and + * to provide a generic interface for shifting indicies. + */ + private sealed abstract class WrappedEntity( + val fromIndex: Short, + val toIndex: Short, + val rawText: String) { + + /** the original position of the entity within the entity group */ + val position: Int + + val normalizedText: String = TweetText.nfcNormalize(rawText).toLowerCase + + def shift(fromIndex: Short, toIndex: Short): WrappedEntity + } + + private case class WrappedUrlEntity(entity: UrlEntity, position: Int) + extends WrappedEntity(entity.fromIndex, entity.toIndex, entity.url) { + override def shift(fromIndex: Short, toIndex: Short): WrappedUrlEntity = + copy(entity.copy(fromIndex = fromIndex, toIndex = toIndex)) + } + + private case class WrappedMediaEntity(entity: MediaEntity, position: Int) + extends WrappedEntity(entity.fromIndex, entity.toIndex, entity.url) { + override def shift(fromIndex: Short, toIndex: Short): WrappedMediaEntity = + copy(entity.copy(fromIndex = fromIndex, toIndex = toIndex)) + } + + private case class WrappedMentionEntity(entity: MentionEntity, position: Int) + extends WrappedEntity(entity.fromIndex, entity.toIndex, entity.screenName) { + override def shift(fromIndex: Short, toIndex: Short): WrappedMentionEntity = + copy(entity.copy(fromIndex = fromIndex, toIndex = toIndex)) + } + + private case class WrappedHashtagEntity(entity: HashtagEntity, position: Int) + extends WrappedEntity(entity.fromIndex, entity.toIndex, entity.text) { + override def shift(fromIndex: Short, toIndex: Short): WrappedHashtagEntity = + copy(entity.copy(fromIndex = fromIndex, toIndex = toIndex)) + } + + private case class WrappedCashtagEntity(entity: CashtagEntity, position: Int) + extends WrappedEntity(entity.fromIndex, entity.toIndex, entity.text) { + override def shift(fromIndex: Short, toIndex: Short): WrappedCashtagEntity = + copy(entity.copy(fromIndex = fromIndex, toIndex = toIndex)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/CreatedAtRepairer.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/CreatedAtRepairer.scala new file mode 100644 index 000000000..88d3fca3e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/CreatedAtRepairer.scala @@ -0,0 +1,49 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.conversions.DurationOps._ +import com.twitter.snowflake.id.SnowflakeId + +object CreatedAtRepairer { + // no createdAt value should be less than this + val jan_01_2006 = 1136073600000L + + // no non-snowflake createdAt value should be greater than this + val jan_01_2011 = 1293840000000L + + // allow createdAt timestamp to be up to this amount off from the snowflake id + // before applying the correction. + val varianceThreshold: MediaId = 10.minutes.inMilliseconds +} + +/** + * Detects tweets with bad createdAt timestamps and attempts to fix, if possible + * using the snowflake id. pre-snowflake tweets are left unmodified. + */ +class CreatedAtRepairer(scribe: FutureEffect[String]) extends Mutation[Tweet] { + import CreatedAtRepairer._ + + def apply(tweet: Tweet): Option[Tweet] = { + assert(tweet.coreData.nonEmpty, "tweet core data is missing") + val createdAtMillis = getCreatedAt(tweet) * 1000 + + if (SnowflakeId.isSnowflakeId(tweet.id)) { + val snowflakeMillis = SnowflakeId(tweet.id).unixTimeMillis.asLong + val diff = (snowflakeMillis - createdAtMillis).abs + + if (diff >= varianceThreshold) { + scribe(tweet.id + "\t" + createdAtMillis) + val snowflakeSeconds = snowflakeMillis / 1000 + Some(TweetLenses.createdAt.set(tweet, snowflakeSeconds)) + } else { + None + } + } else { + // not a snowflake id, hard to repair, so just log it + if (createdAtMillis < jan_01_2006 || createdAtMillis > jan_01_2011) { + scribe(tweet.id + "\t" + createdAtMillis) + } + None + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/DeviceSourceHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/DeviceSourceHydrator.scala new file mode 100644 index 000000000..c1a0c5fcd --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/DeviceSourceHydrator.scala @@ -0,0 +1,33 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.NotFound +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.serverutil.DeviceSourceParser +import com.twitter.tweetypie.thriftscala.DeviceSource +import com.twitter.tweetypie.thriftscala.FieldByPath + +object DeviceSourceHydrator { + type Type = ValueHydrator[Option[DeviceSource], TweetCtx] + + // WebOauthId is the created_via value for Macaw-Swift through Woodstar. + // We need to special-case it to return the same device_source as "web", + // since we can't map multiple created_via strings to one device_source. + val WebOauthId: String = s"oauth:${DeviceSourceParser.Web}" + + val hydratedField: FieldByPath = fieldByPath(Tweet.DeviceSourceField) + + private def convertForWeb(createdVia: String) = + if (createdVia == DeviceSourceHydrator.WebOauthId) "web" else createdVia + + def apply(repo: DeviceSourceRepository.Type): Type = + ValueHydrator[Option[DeviceSource], TweetCtx] { (_, ctx) => + val req = convertForWeb(ctx.createdVia) + repo(req).liftToTry.map { + case Return(deviceSource) => ValueState.modified(Some(deviceSource)) + case Throw(NotFound) => ValueState.UnmodifiedNone + case Throw(_) => ValueState.partial(None, hydratedField) + } + }.onlyIf((curr, ctx) => curr.isEmpty && ctx.tweetFieldRequested(Tweet.DeviceSourceField)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/DirectedAtHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/DirectedAtHydrator.scala new file mode 100644 index 000000000..a64d91c2e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/DirectedAtHydrator.scala @@ -0,0 +1,92 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.finagle.stats.NullStatsReceiver +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +/** + * Hydrates the "directedAtUser" field on the tweet. This hydrators uses one of two paths depending + * if DirectedAtUserMetadata is present: + * + * 1. If DirectedAtUserMetadata exists, we use metadata.userId. + * 2. If DirectedAtUserMetadata does not exist, we use the User screenName from the mention starting + * at index 0 if the tweet also has a reply. Creation of a "reply to user" for + * leading @mentions is controlled by PostTweetRequest.enableTweetToNarrowcasting + */ +object DirectedAtHydrator { + type Type = ValueHydrator[Option[DirectedAtUser], Ctx] + + case class Ctx( + mentions: Seq[MentionEntity], + metadata: Option[DirectedAtUserMetadata], + underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy { + val directedAtScreenName: Option[String] = + mentions.headOption.filter(_.fromIndex == 0).map(_.screenName) + } + + val hydratedField: FieldByPath = + fieldByPath(Tweet.CoreDataField, TweetCoreData.DirectedAtUserField) + + def once(h: Type): Type = + TweetHydration.completeOnlyOnce( + hydrationType = HydrationType.DirectedAt, + hydrator = h + ) + + private val partial = ValueState.partial(None, hydratedField) + + def apply(repo: UserIdentityRepository.Type, stats: StatsReceiver = NullStatsReceiver): Type = { + val withMetadata = stats.counter("with_metadata") + val noScreenName = stats.counter("no_screen_name") + val withoutMetadata = stats.counter("without_metadata") + + ValueHydrator[Option[DirectedAtUser], Ctx] { (_, ctx) => + ctx.metadata match { + case Some(DirectedAtUserMetadata(Some(uid))) => + // 1a. new approach of relying exclusively on directed-at metadata if it exists and has a user id + withMetadata.incr() + + repo(UserKey.byId(uid)).liftToTry.map { + case Return(u) => + ValueState.modified(Some(DirectedAtUser(u.id, u.screenName))) + case Throw(NotFound) => + // If user is not found, fallback to directedAtScreenName + ctx.directedAtScreenName + .map { screenName => ValueState.modified(Some(DirectedAtUser(uid, screenName))) } + .getOrElse { + // This should never happen, but let's make sure with a counter + noScreenName.incr() + ValueState.UnmodifiedNone + } + case Throw(_) => partial + } + + case Some(DirectedAtUserMetadata(None)) => + withMetadata.incr() + // 1b. new approach of relying exclusively on directed-at metadata if it exists and has no userId + ValueState.StitchUnmodifiedNone + + case None => + // 2. when DirectedAtUserMetadata not present, look for first leading mention when has reply + withoutMetadata.incr() + + val userKey = ctx.directedAtScreenName + .filter(_ => ctx.isReply) + .map(UserKey.byScreenName) + + val results = userKey.map(repo.apply).getOrElse(Stitch.NotFound) + + results.liftToTry.map { + case Return(u) => ValueState.modified(Some(DirectedAtUser(u.id, u.screenName))) + case Throw(NotFound) => ValueState.UnmodifiedNone + case Throw(_) => partial + } + } + }.onlyIf((curr, _) => curr.isEmpty) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EditControlHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EditControlHydrator.scala new file mode 100644 index 000000000..8d3c5d8e2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EditControlHydrator.scala @@ -0,0 +1,132 @@ +package com.twitter.tweetypie.hydrator + +import com.twitter.servo.util.Gate +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.StatsReceiver +import com.twitter.tweetypie.Tweet +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.util.EditControlUtil +import com.twitter.tweetypie.serverutil.ExceptionCounter +import com.twitter.tweetypie.thriftscala.EditControl +import com.twitter.tweetypie.thriftscala.EditControlInitial +import com.twitter.tweetypie.thriftscala.FieldByPath +import com.twitter.tweetypie.util.TweetEditFailure.TweetEditGetInitialEditControlException +import com.twitter.tweetypie.util.TweetEditFailure.TweetEditInvalidEditControlException + +/** + * EditControlHydrator is used to hydrate the EditControlEdit arm of the editControl field. + * + * For Tweets without edits and for initial Tweets with subsequent edit(s), this hydrator + * passes through the existing editControl (either None or EditControlInitial). + * + * For edit Tweets, it hydrates the initial Tweet's edit control, set as a field on + * the edit control of the edit Tweet and returns the new edit control. + */ +object EditControlHydrator { + type Type = ValueHydrator[Option[EditControl], TweetCtx] + + val hydratedField: FieldByPath = fieldByPath(Tweet.EditControlField) + + def apply( + repo: TweetRepository.Type, + setEditTimeWindowToSixtyMinutes: Gate[Unit], + stats: StatsReceiver + ): Type = { + val exceptionCounter = ExceptionCounter(stats) + + // Count hydration of edit control for tweets that were written before writing edit control initial. + val noEditControlHydration = stats.counter("noEditControlHydration") + // Count hydration of edit control edit tweets + val editControlEditHydration = stats.counter("editControlEditHydration") + // Count edit control edit hydration which successfully found an edit control initial + val editControlEditHydrationSuccessful = stats.counter("editControlEditHydration", "success") + // Count of initial tweets being hydrated. + val editControlInitialHydration = stats.counter("editControlInitialHydration") + // Count of edits loaded where the ID of edit is not present in the initial tweet + val editTweetIdsMissingAnEdit = stats.counter("editTweetIdsMissingAnEdit") + // Count hydrated tweets where edit control is set, but neither initial nor edit + val unknownUnionVariant = stats.counter("unknownEditControlUnionVariant") + + ValueHydrator[Option[EditControl], TweetCtx] { (curr, ctx) => + curr match { + // Tweet was created before we write edit control - hydrate the value at read. + case None => + noEditControlHydration.incr() + val editControl = EditControlUtil.makeEditControlInitial( + ctx.tweetId, + ctx.createdAt, + setEditTimeWindowToSixtyMinutes) + Stitch.value(ValueState.delta(curr, Some(editControl))) + // Tweet is an initial tweet + case Some(EditControl.Initial(_)) => + editControlInitialHydration.incr() + Stitch.value(ValueState.unmodified(curr)) + + // Tweet is an edited version + case Some(EditControl.Edit(edit)) => + editControlEditHydration.incr() + getInitialTweet(repo, edit.initialTweetId, ctx) + .flatMap(getEditControlInitial(ctx)) + .map { initial: Option[EditControlInitial] => + editControlEditHydrationSuccessful.incr() + + initial.foreach { initialTweet => + // We are able to fetch the initial tweet for this edit but this edit tweet is + // not present in the initial's editTweetIds list + if (!initialTweet.editTweetIds.contains(ctx.tweetId)) { + editTweetIdsMissingAnEdit.incr() + } + } + + val updated = edit.copy(editControlInitial = initial) + ValueState.delta(curr, Some(EditControl.Edit(updated))) + } + .onFailure(exceptionCounter(_)) + case Some(_) => // Unknown union variant + unknownUnionVariant.incr() + Stitch.exception(TweetEditInvalidEditControlException) + } + }.onlyIf { (_, ctx) => ctx.opts.enableEditControlHydration } + } + + def getInitialTweet( + repo: TweetRepository.Type, + initialTweetId: Long, + ctx: TweetCtx, + ): Stitch[Tweet] = { + val options = TweetQuery.Options( + include = TweetQuery.Include(Set(Tweet.EditControlField.id)), + cacheControl = ctx.opts.cacheControl, + enforceVisibilityFiltering = false, + safetyLevel = SafetyLevel.FilterNone, + fetchStoredTweets = ctx.opts.fetchStoredTweets + ) + repo(initialTweetId, options) + } + + def getEditControlInitial(ctx: TweetCtx): Tweet => Stitch[Option[EditControlInitial]] = { + initialTweet: Tweet => + initialTweet.editControl match { + case Some(EditControl.Initial(initial)) => + Stitch.value( + if (ctx.opts.cause.writing(ctx.tweetId)) { + // On the write path we hydrate edit control initial + // as if the initial tweet is already updated. + Some(EditControlUtil.plusEdit(initial, ctx.tweetId)) + } else { + Some(initial) + } + ) + case _ if ctx.opts.fetchStoredTweets => + // If the fetchStoredTweets parameter is set to true, it means we're fetching + // and hydrating tweets regardless of state. In this case, if the initial tweet + // doesn't exist, we return None here to ensure we still hydrate and return the + // current edit tweet. + Stitch.None + case _ => Stitch.exception(TweetEditGetInitialEditControlException) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EditHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EditHydrator.scala new file mode 100644 index 000000000..d14dad52c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EditHydrator.scala @@ -0,0 +1,63 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.EditState + +/** + * An EditHydrator hydrates a value of type `A`, with a hydration context of type `C`, + * and produces a function that takes a value and context and returns an EditState[A, C] + * (an EditState encapsulates a function that takes a value and returns a new ValueState). + * + * A series of EditHydrators of the same type may be run in parallel via + * `EditHydrator.inParallel`. + */ +class EditHydrator[A, C] private (val run: (A, C) => Stitch[EditState[A]]) { + + /** + * Apply this hydrator to a value, producing an EditState. + */ + def apply(a: A, ctx: C): Stitch[EditState[A]] = run(a, ctx) + + /** + * Convert this EditHydrator to the equivalent ValueHydrator. + */ + def toValueHydrator: ValueHydrator[A, C] = + ValueHydrator[A, C] { (a, ctx) => this.run(a, ctx).map(editState => editState.run(a)) } + + /** + * Runs two EditHydrators in parallel. + */ + def inParallelWith(next: EditHydrator[A, C]): EditHydrator[A, C] = + EditHydrator[A, C] { (x0, ctx) => + Stitch.joinMap(run(x0, ctx), next.run(x0, ctx)) { + case (r1, r2) => r1.andThen(r2) + } + } +} + +object EditHydrator { + + /** + * Create an EditHydrator from a function that returns Stitch[EditState[A]]. + */ + def apply[A, C](f: (A, C) => Stitch[EditState[A]]): EditHydrator[A, C] = + new EditHydrator[A, C](f) + + /** + * Creates a "passthrough" Edit: + * Leaves A unchanged and produces empty HydrationState. + */ + def unit[A, C]: EditHydrator[A, C] = + EditHydrator { (_, _) => Stitch.value(EditState.unit[A]) } + + /** + * Runs several EditHydrators in parallel. + */ + def inParallel[A, C](bs: EditHydrator[A, C]*): EditHydrator[A, C] = + bs match { + case Seq(b) => b + case Seq(b1, b2) => b1.inParallelWith(b2) + case _ => bs.reduceLeft(_.inParallelWith(_)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EditPerspectiveHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EditPerspectiveHydrator.scala new file mode 100644 index 000000000..bc6ed36ef --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EditPerspectiveHydrator.scala @@ -0,0 +1,179 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.featureswitches.v2.FeatureSwitchResults +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.Stitch +import com.twitter.stitch.timelineservice.TimelineService.GetPerspectives.Query +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.repository.PerspectiveRepository +import com.twitter.tweetypie.thriftscala.EditControl +import com.twitter.tweetypie.thriftscala.FieldByPath +import com.twitter.tweetypie.thriftscala.StatusPerspective +import com.twitter.tweetypie.thriftscala.TweetPerspective + +object EditPerspectiveHydrator { + + type Type = ValueHydrator[Option[TweetPerspective], Ctx] + val HydratedField: FieldByPath = fieldByPath(Tweet.EditPerspectiveField) + + case class Ctx( + currentTweetPerspective: Option[StatusPerspective], + editControl: Option[EditControl], + featureSwitchResults: Option[FeatureSwitchResults], + underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy + + // Timeline safety levels determine some part of high level traffic + // that we might want to turn off with a decider if edits traffic + // is too big for perspectives to handle. The decider allows us + // to turn down the traffic without the impact on tweet detail. + val TimelinesSafetyLevels: Set[SafetyLevel] = Set( + SafetyLevel.TimelineFollowingActivity, + SafetyLevel.TimelineHome, + SafetyLevel.TimelineConversations, + SafetyLevel.DeprecatedTimelineConnect, + SafetyLevel.TimelineMentions, + SafetyLevel.DeprecatedTimelineActivity, + SafetyLevel.TimelineFavorites, + SafetyLevel.TimelineLists, + SafetyLevel.TimelineInjection, + SafetyLevel.StickersTimeline, + SafetyLevel.LiveVideoTimeline, + SafetyLevel.QuoteTweetTimeline, + SafetyLevel.TimelineHomeLatest, + SafetyLevel.TimelineLikedBy, + SafetyLevel.TimelineRetweetedBy, + SafetyLevel.TimelineBookmark, + SafetyLevel.TimelineMedia, + SafetyLevel.TimelineReactiveBlending, + SafetyLevel.TimelineProfile, + SafetyLevel.TimelineFocalTweet, + SafetyLevel.TimelineHomeRecommendations, + SafetyLevel.NotificationsTimelineDeviceFollow, + SafetyLevel.TimelineConversationsDownranking, + SafetyLevel.TimelineHomeTopicFollowRecommendations, + SafetyLevel.TimelineHomeHydration, + SafetyLevel.FollowedTopicsTimeline, + SafetyLevel.ModeratedTweetsTimeline, + SafetyLevel.TimelineModeratedTweetsHydration, + SafetyLevel.ElevatedQuoteTweetTimeline, + SafetyLevel.TimelineConversationsDownrankingMinimal, + SafetyLevel.BirdwatchNoteTweetsTimeline, + SafetyLevel.TimelineSuperLikedBy, + SafetyLevel.UserScopedTimeline, + SafetyLevel.TweetScopedTimeline, + SafetyLevel.TimelineHomePromotedHydration, + SafetyLevel.NearbyTimeline, + SafetyLevel.TimelineProfileAll, + SafetyLevel.TimelineProfileSuperFollows, + SafetyLevel.SpaceTweetAvatarHomeTimeline, + SafetyLevel.SpaceHomeTimelineUpranking, + SafetyLevel.BlockMuteUsersTimeline, + SafetyLevel.RitoActionedTweetTimeline, + SafetyLevel.TimelineScorer, + SafetyLevel.ArticleTweetTimeline, + SafetyLevel.DesQuoteTweetTimeline, + SafetyLevel.EditHistoryTimeline, + SafetyLevel.DirectMessagesConversationTimeline, + SafetyLevel.DesHomeTimeline, + SafetyLevel.TimelineContentControls, + SafetyLevel.TimelineFavoritesSelfView, + SafetyLevel.TimelineProfileSpaces, + ) + val TweetDetailSafetyLevels: Set[SafetyLevel] = Set( + SafetyLevel.TweetDetail, + SafetyLevel.TweetDetailNonToo, + SafetyLevel.TweetDetailWithInjectionsHydration, + SafetyLevel.DesTweetDetail, + ) + + def apply( + repo: PerspectiveRepository.Type, + timelinesGate: Gate[Unit], + tweetDetailsGate: Gate[Unit], + otherSafetyLevelsGate: Gate[Unit], + bookmarksGate: Gate[Long], + stats: StatsReceiver + ): Type = { + + val statsByLevel = + SafetyLevel.list.map { level => + (level, stats.counter("perspective_by_safety_label", level.name, "calls")) + }.toMap + val editsAggregated = stats.counter("edit_perspective", "edits_aggregated") + + ValueHydrator[Option[TweetPerspective], Ctx] { (curr, ctx) => + val safetyLevel = ctx.opts.safetyLevel + val lookupsDecider = + if (TimelinesSafetyLevels.contains(safetyLevel)) timelinesGate + else if (TweetDetailSafetyLevels.contains(safetyLevel)) tweetDetailsGate + else otherSafetyLevelsGate + + val tweetIds: Seq[TweetId] = if (lookupsDecider()) tweetIdsToAggregate(ctx).toSeq else Seq() + statsByLevel + .getOrElse( + safetyLevel, + stats.counter("perspective_by_safety_label", safetyLevel.name, "calls")) + .incr(tweetIds.size) + editsAggregated.incr(tweetIds.size) + + Stitch + .traverse(tweetIds) { id => + repo( + Query( + ctx.opts.forUserId.get, + id, + PerspectiveHydrator.evaluatePerspectiveTypes( + ctx.opts.forUserId.get, + bookmarksGate, + ctx.featureSwitchResults))).liftToTry + }.map { seq => + if (seq.isEmpty) { + val editPerspective = ctx.currentTweetPerspective.map { c => + TweetPerspective( + c.favorited, + c.retweeted, + c.bookmarked + ) + } + ValueState.delta(curr, editPerspective) + } else { + val returns = seq.collect { case Return(r) => r } + val aggregate = Some( + TweetPerspective( + favorited = + returns.exists(_.favorited) || ctx.currentTweetPerspective.exists(_.favorited), + retweeted = + returns.exists(_.retweeted) || ctx.currentTweetPerspective.exists(_.retweeted), + bookmarked = Some( + returns.exists(_.bookmarked.contains(true)) || ctx.currentTweetPerspective.exists( + _.bookmarked.contains(true))) + ) + ) + + if (seq.exists(_.isThrow)) { + ValueState.partial(aggregate, HydratedField) + } else { + ValueState.modified(aggregate) + } + } + } + }.onlyIf { (curr, ctx) => + curr.isEmpty && + ctx.opts.forUserId.isDefined && + ctx.tweetFieldRequested(Tweet.EditPerspectiveField) + } + } + + private def tweetIdsToAggregate(ctx: Ctx): Set[TweetId] = { + ctx.editControl + .flatMap { + case EditControl.Initial(initial) => Some(initial) + case EditControl.Edit(edit) => edit.editControlInitial + case _ => None + } + .map(_.editTweetIds.toSet) + .getOrElse(Set()) - ctx.tweetId + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EscherbirdAnnotationHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EscherbirdAnnotationHydrator.scala new file mode 100644 index 000000000..578af57e5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EscherbirdAnnotationHydrator.scala @@ -0,0 +1,22 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala.EscherbirdEntityAnnotations +import com.twitter.tweetypie.thriftscala.FieldByPath + +object EscherbirdAnnotationHydrator { + type Type = ValueHydrator[Option[EscherbirdEntityAnnotations], Tweet] + + val hydratedField: FieldByPath = fieldByPath(Tweet.EscherbirdEntityAnnotationsField) + + def apply(repo: EscherbirdAnnotationRepository.Type): Type = + ValueHydrator[Option[EscherbirdEntityAnnotations], Tweet] { (curr, tweet) => + repo(tweet).liftToTry.map { + case Return(Some(anns)) => ValueState.modified(Some(anns)) + case Return(None) => ValueState.unmodified(curr) + case Throw(_) => ValueState.partial(curr, hydratedField) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/FeatureSwitchResultsHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/FeatureSwitchResultsHydrator.scala new file mode 100644 index 000000000..8931f153c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/FeatureSwitchResultsHydrator.scala @@ -0,0 +1,42 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.context.thriftscala.Viewer +import com.twitter.featureswitches.FSRecipient +import com.twitter.featureswitches.UserAgent +import com.twitter.featureswitches.v2.FeatureSwitches +import com.twitter.finagle.mtls.authentication.EmptyServiceIdentifier +import com.twitter.strato.callcontext.CallContext +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.core.ValueState + +/** + * Hydrate Feature Switch results in TweetData. We can do this once at the + * start of the hydration pipeline so that the rest of the hydrators can + * use the Feature Switch values. + */ +object FeatureSwitchResultsHydrator { + + def apply( + featureSwitchesWithoutExperiments: FeatureSwitches, + clientIdHelper: ClientIdHelper + ): TweetDataValueHydrator = ValueHydrator.map { (td, opts) => + val viewer = TwitterContext().getOrElse(Viewer()) + val recipient = + FSRecipient( + userId = viewer.userId, + clientApplicationId = viewer.clientApplicationId, + userAgent = viewer.userAgent.flatMap(UserAgent(_)), + ).withCustomFields( + "thrift_client_id" -> + clientIdHelper.effectiveClientIdRoot.getOrElse(ClientIdHelper.UnknownClientId), + "forwarded_service_id" -> + CallContext.forwardedServiceIdentifier + .map(_.toString).getOrElse(EmptyServiceIdentifier), + "safety_level" -> opts.safetyLevel.toString, + "client_app_id_is_defined" -> viewer.clientApplicationId.isDefined.toString, + ) + val results = featureSwitchesWithoutExperiments.matchRecipient(recipient) + ValueState.unit(td.copy(featureSwitchResults = Some(results))) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/GeoScrubHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/GeoScrubHydrator.scala new file mode 100644 index 000000000..b53c24497 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/GeoScrubHydrator.scala @@ -0,0 +1,31 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +/** + * This hydrator, which is really more of a "repairer", scrubs at read-time geo data + * that should have been scrubbed but wasn't. For any tweet with geo data, it checks + * the last geo-scrub timestamp, if any, for the user, and if the tweet was created before + * that timestamp, it removes the geo data. + */ +object GeoScrubHydrator { + type Data = (Option[GeoCoordinates], Option[PlaceId]) + type Type = ValueHydrator[Data, TweetCtx] + + private[this] val modifiedNoneNoneResult = ValueState.modified((None, None)) + + def apply(repo: GeoScrubTimestampRepository.Type, scribeTweetId: FutureEffect[TweetId]): Type = + ValueHydrator[Data, TweetCtx] { (curr, ctx) => + repo(ctx.userId).liftToTry.map { + case Return(geoScrubTime) if ctx.createdAt <= geoScrubTime => + scribeTweetId(ctx.tweetId) + modifiedNoneNoneResult + + // no-op on failure and no result + case _ => ValueState.unmodified(curr) + } + }.onlyIf { case ((coords, place), _) => coords.nonEmpty || place.nonEmpty } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/HasMediaHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/HasMediaHydrator.scala new file mode 100644 index 000000000..486a6ee23 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/HasMediaHydrator.scala @@ -0,0 +1,14 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.thriftscala._ + +object HasMediaHydrator { + type Type = ValueHydrator[Option[Boolean], Tweet] + + def apply(hasMedia: Tweet => Boolean): Type = + ValueHydrator + .map[Option[Boolean], Tweet] { (_, tweet) => ValueState.modified(Some(hasMedia(tweet))) } + .onlyIf((curr, ctx) => curr.isEmpty) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/IM1837FilterHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/IM1837FilterHydrator.scala new file mode 100644 index 000000000..951aa40c9 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/IM1837FilterHydrator.scala @@ -0,0 +1,23 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.coreservices.IM1837 +import com.twitter.tweetypie.core._ +import com.twitter.stitch.Stitch + +object IM1837FilterHydrator { + type Type = ValueHydrator[Unit, TweetCtx] + + private val Drop = + Stitch.exception(FilteredState.Unavailable.DropUnspecified) + private val Success = Stitch.value(ValueState.unmodified(())) + + def apply(): Type = + ValueHydrator[Unit, TweetCtx] { (_, ctx) => + val userAgent = TwitterContext().flatMap(_.userAgent) + val userAgentAffected = userAgent.exists(IM1837.isAffectedClient) + val mightCrash = userAgentAffected && IM1837.textMightCrashIOS(ctx.text) + + if (mightCrash) Drop else Success + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/IM2884FilterHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/IM2884FilterHydrator.scala new file mode 100644 index 000000000..16222dec4 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/IM2884FilterHydrator.scala @@ -0,0 +1,27 @@ +package com.twitter.tweetypie.hydrator + +import com.twitter.coreservices.IM2884 +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.core.ValueState +import com.twitter.stitch.Stitch + +object IM2884FilterHydrator { + type Type = ValueHydrator[Unit, TweetCtx] + + private val Drop = + Stitch.exception(FilteredState.Unavailable.DropUnspecified) + private val Success = Stitch.value(ValueState.unmodified(())) + + def apply(stats: StatsReceiver): Type = { + + val im2884 = new IM2884(stats) + + ValueHydrator[Unit, TweetCtx] { (_, ctx) => + val userAgent = TwitterContext().flatMap(_.userAgent) + val userAgentAffected = userAgent.exists(im2884.isAffectedClient) + val mightCrash = userAgentAffected && im2884.textMightCrashIOS(ctx.text) + if (mightCrash) Drop else Success + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/IM3433FilterHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/IM3433FilterHydrator.scala new file mode 100644 index 000000000..71ee6139d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/IM3433FilterHydrator.scala @@ -0,0 +1,25 @@ +package com.twitter.tweetypie.hydrator + +import com.twitter.coreservices.IM3433 +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.core.ValueState + +object IM3433FilterHydrator { + type Type = ValueHydrator[Unit, TweetCtx] + + private val Drop = + Stitch.exception(FilteredState.Unavailable.DropUnspecified) + private val Success = Stitch.value(ValueState.unmodified(())) + + def apply(stats: StatsReceiver): Type = { + + ValueHydrator[Unit, TweetCtx] { (_, ctx) => + val userAgent = TwitterContext().flatMap(_.userAgent) + val userAgentAffected = userAgent.exists(IM3433.isAffectedClient) + val mightCrash = userAgentAffected && IM3433.textMightCrashIOS(ctx.text) + if (mightCrash) Drop else Success + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/LanguageHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/LanguageHydrator.scala new file mode 100644 index 000000000..2a86091b9 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/LanguageHydrator.scala @@ -0,0 +1,24 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object LanguageHydrator { + type Type = ValueHydrator[Option[Language], TweetCtx] + + val hydratedField: FieldByPath = fieldByPath(Tweet.LanguageField) + + private[this] def isApplicable(curr: Option[Language], ctx: TweetCtx) = + ctx.tweetFieldRequested(Tweet.LanguageField) && !ctx.isRetweet && curr.isEmpty + + def apply(repo: LanguageRepository.Type): Type = + ValueHydrator[Option[Language], TweetCtx] { (langOpt, ctx) => + repo(ctx.text).liftToTry.map { + case Return(Some(l)) => ValueState.modified(Some(l)) + case Return(None) => ValueState.unmodified(langOpt) + case Throw(_) => ValueState.partial(None, hydratedField) + } + }.onlyIf((curr, ctx) => isApplicable(curr, ctx)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaEntityHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaEntityHydrator.scala new file mode 100644 index 000000000..3f3e63fe2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaEntityHydrator.scala @@ -0,0 +1,67 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.mediaservices.commons.thriftscala.MediaKey +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object MediaEntitiesHydrator { + object Cacheable { + type Ctx = MediaEntityHydrator.Cacheable.Ctx + type Type = ValueHydrator[Seq[MediaEntity], Ctx] + + def once(h: MediaEntityHydrator.Cacheable.Type): Type = + TweetHydration.completeOnlyOnce( + queryFilter = MediaEntityHydrator.queryFilter, + hydrationType = HydrationType.CacheableMedia, + dependsOn = Set(HydrationType.Urls), + hydrator = h.liftSeq + ) + } + + object Uncacheable { + type Ctx = MediaEntityHydrator.Uncacheable.Ctx + type Type = ValueHydrator[Seq[MediaEntity], Ctx] + } +} + +object MediaEntityHydrator { + val hydratedField: FieldByPath = fieldByPath(Tweet.MediaField) + + object Cacheable { + type Type = ValueHydrator[MediaEntity, Ctx] + + case class Ctx(urlEntities: Seq[UrlEntity], underlyingTweetCtx: TweetCtx) extends TweetCtx.Proxy + + /** + * Builds a single media-hydrator out of finer-grained hydrators + * only with cacheable information. + */ + def apply(hydrateMediaUrls: Type, hydrateMediaIsProtected: Type): Type = + hydrateMediaUrls.andThen(hydrateMediaIsProtected) + } + + object Uncacheable { + type Type = ValueHydrator[MediaEntity, Ctx] + + case class Ctx(mediaKeys: Option[Seq[MediaKey]], underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy { + + def includeMediaEntities: Boolean = tweetFieldRequested(Tweet.MediaField) + def includeAdditionalMetadata: Boolean = + opts.include.mediaFields.contains(MediaEntity.AdditionalMetadataField.id) + } + + /** + * Builds a single media-hydrator out of finer-grained hydrators + * only with uncacheable information. + */ + def apply(hydrateMediaKey: Type, hydrateMediaInfo: Type): Type = + (hydrateMediaKey + .andThen(hydrateMediaInfo)) + .onlyIf((_, ctx) => ctx.includeMediaEntities) + } + + def queryFilter(opts: TweetQuery.Options): Boolean = + opts.include.tweetFields.contains(Tweet.MediaField.id) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaInfoHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaInfoHydrator.scala new file mode 100644 index 000000000..86e7d8e1a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaInfoHydrator.scala @@ -0,0 +1,73 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.media.MediaKeyUtil +import com.twitter.tweetypie.media.MediaMetadataRequest +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ +import java.nio.ByteBuffer + +object MediaInfoHydrator { + type Ctx = MediaEntityHydrator.Uncacheable.Ctx + type Type = MediaEntityHydrator.Uncacheable.Type + + private[this] val log = Logger(getClass) + + def apply(repo: MediaMetadataRepository.Type, stats: StatsReceiver): Type = { + val attributableUserCounter = stats.counter("attributable_user") + + ValueHydrator[MediaEntity, Ctx] { (curr, ctx) => + val request = + toMediaMetadataRequest( + mediaEntity = curr, + tweetId = ctx.tweetId, + extensionsArgs = ctx.opts.extensionsArgs + ) + + request match { + case None => Stitch.value(ValueState.unmodified(curr)) + + case Some(req) => + repo(req).liftToTry.map { + case Return(metadata) => + if (metadata.attributableUserId.nonEmpty) attributableUserCounter.incr() + + ValueState.delta( + curr, + metadata.updateEntity( + mediaEntity = curr, + tweetUserId = ctx.userId, + includeAdditionalMetadata = ctx.includeAdditionalMetadata + ) + ) + + case Throw(ex) if !PartialEntityCleaner.isPartialMedia(curr) => + log.info("Ignored media info repo failure, media entity already hydrated", ex) + ValueState.unmodified(curr) + + case Throw(ex) => + log.error("Media info hydration failed", ex) + ValueState.partial(curr, MediaEntityHydrator.hydratedField) + } + } + } + } + + def toMediaMetadataRequest( + mediaEntity: MediaEntity, + tweetId: TweetId, + extensionsArgs: Option[ByteBuffer] + ): Option[MediaMetadataRequest] = + mediaEntity.isProtected.map { isProtected => + val mediaKey = MediaKeyUtil.get(mediaEntity) + + MediaMetadataRequest( + tweetId = tweetId, + mediaKey = mediaKey, + isProtected = isProtected, + extensionsArgs = extensionsArgs + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaIsProtectedHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaIsProtectedHydrator.scala new file mode 100644 index 000000000..9ddfe5851 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaIsProtectedHydrator.scala @@ -0,0 +1,36 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.NotFound +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.media.Media +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object MediaIsProtectedHydrator { + type Ctx = MediaEntityHydrator.Cacheable.Ctx + type Type = MediaEntityHydrator.Cacheable.Type + + val hydratedField: FieldByPath = MediaEntityHydrator.hydratedField + + def apply(repo: UserProtectionRepository.Type): Type = + ValueHydrator[MediaEntity, Ctx] { (curr, ctx) => + val request = UserKey(ctx.userId) + + repo(request).liftToTry.map { + case Return(p) => ValueState.modified(curr.copy(isProtected = Some(p))) + case Throw(NotFound) => ValueState.unmodified(curr) + case Throw(_) => ValueState.partial(curr, hydratedField) + } + }.onlyIf { (curr, ctx) => + // We need to update isProtected for media entities that: + // 1. Do not already have it set. + // 2. Did not come from another tweet. + // + // If the entity does not have an expandedUrl, we can't be sure + // whether the media originated with this tweet. + curr.isProtected.isEmpty && + Media.isOwnMedia(ctx.tweetId, curr) && + curr.expandedUrl != null + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaKeyHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaKeyHydrator.scala new file mode 100644 index 000000000..a6e491d61 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaKeyHydrator.scala @@ -0,0 +1,54 @@ +package com.twitter.tweetypie.hydrator + +import com.twitter.mediaservices.commons.tweetmedia.thriftscala._ +import com.twitter.mediaservices.commons.thriftscala._ +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.thriftscala._ + +object MediaKeyHydrator { + type Ctx = MediaEntityHydrator.Uncacheable.Ctx + type Type = MediaEntityHydrator.Uncacheable.Type + + def apply(): Type = + ValueHydrator + .map[MediaEntity, Ctx] { (curr, ctx) => + val mediaKey = infer(ctx.mediaKeys, curr) + ValueState.modified(curr.copy(mediaKey = Some(mediaKey))) + } + .onlyIf((curr, ctx) => curr.mediaKey.isEmpty) + + def infer(mediaKeys: Option[Seq[MediaKey]], mediaEntity: MediaEntity): MediaKey = { + + def inferByMediaId = + mediaKeys + .flatMap(_.find(_.mediaId == mediaEntity.mediaId)) + + def contentType = + mediaEntity.sizes.find(_.sizeType == MediaSizeType.Orig).map(_.deprecatedContentType) + + def inferByContentType = + contentType.map { tpe => + val category = + tpe match { + case MediaContentType.VideoMp4 => MediaCategory.TweetGif + case MediaContentType.VideoGeneric => MediaCategory.TweetVideo + case _ => MediaCategory.TweetImage + } + MediaKey(category, mediaEntity.mediaId) + } + + def fail = + throw new IllegalStateException( + s""" + |Can't infer media key. + | mediaKeys:'$mediaKeys' + | mediaEntity:'$mediaEntity' + """.stripMargin + ) + + mediaEntity.mediaKey + .orElse(inferByMediaId) + .orElse(inferByContentType) + .getOrElse(fail) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaRefsHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaRefsHydrator.scala new file mode 100644 index 000000000..c2408b634 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaRefsHydrator.scala @@ -0,0 +1,124 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.mediaservices.commons.thriftscala.MediaKey +import com.twitter.mediaservices.media_util.GenericMediaKey +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.thriftscala.MediaEntity +import com.twitter.tweetypie.thriftscala.UrlEntity +import com.twitter.tweetypie.media.thriftscala.MediaRef +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.thriftscala.FieldByPath + +/** + * MediaRefsHydrator hydrates the Tweet.mediaRefs field based on stored media keys + * and pasted media. Media keys are available in three ways: + * + * 1. (For old Tweets): in the stored MediaEntity + * 2. (For 2016+ Tweets): in the mediaKeys field + * 3. From other Tweets using pasted media + * + * This hydrator combines these three sources into a single field, providing the + * media key and source Tweet information for pasted media. + * + * Long-term we will move this logic to the write path and backfill the field for old Tweets. + */ +object MediaRefsHydrator { + type Type = ValueHydrator[Option[Seq[MediaRef]], Ctx] + + case class Ctx( + media: Seq[MediaEntity], + mediaKeys: Seq[MediaKey], + urlEntities: Seq[UrlEntity], + underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy { + def includePastedMedia: Boolean = opts.include.pastedMedia + } + + val hydratedField: FieldByPath = fieldByPath(Tweet.MediaRefsField) + + def mediaKeyToMediaRef(mediaKey: MediaKey): MediaRef = + MediaRef( + genericMediaKey = GenericMediaKey(mediaKey).toStringKey() + ) + + // Convert a pasted Tweet into a Seq of MediaRef from that Tweet with the correct sourceTweetId and sourceUserId + def pastedTweetToMediaRefs( + tweet: Tweet + ): Seq[MediaRef] = + tweet.mediaRefs.toSeq.flatMap { mediaRefs => + mediaRefs.map( + _.copy( + sourceTweetId = Some(tweet.id), + sourceUserId = Some(getUserId(tweet)) + )) + } + + // Fetch MediaRefs from pasted media Tweet URLs in the Tweet text + def getPastedMediaRefs( + repo: TweetRepository.Optional, + ctx: Ctx, + includePastedMedia: Gate[Unit] + ): Stitch[Seq[MediaRef]] = { + if (includePastedMedia() && ctx.includePastedMedia) { + + // Extract Tweet ids from pasted media permalinks in the Tweet text + val pastedMediaTweetIds: Seq[TweetId] = + PastedMediaHydrator.pastedIdsAndEntities(ctx.tweetId, ctx.urlEntities).map(_._1) + + val opts = TweetQuery.Options( + include = TweetQuery.Include( + tweetFields = Set(Tweet.CoreDataField.id, Tweet.MediaRefsField.id), + pastedMedia = false // don't recursively load pasted media refs + )) + + // Load a Seq of Tweets with pasted media, ignoring any returned with NotFound or a FilteredState + val pastedTweets: Stitch[Seq[Tweet]] = Stitch + .traverse(pastedMediaTweetIds) { id => + repo(id, opts) + }.map(_.flatten) + + pastedTweets.map(_.flatMap(pastedTweetToMediaRefs)) + } else { + Stitch.Nil + } + } + + // Make empty Seq None and non-empty Seq Some(Seq(...)) to comply with the thrift field type + def optionalizeSeq(mediaRefs: Seq[MediaRef]): Option[Seq[MediaRef]] = + Some(mediaRefs).filterNot(_.isEmpty) + + def apply( + repo: TweetRepository.Optional, + includePastedMedia: Gate[Unit] + ): Type = { + ValueHydrator[Option[Seq[MediaRef]], Ctx] { (curr, ctx) => + // Fetch mediaRefs from Tweet media + val storedMediaRefs: Seq[MediaRef] = ctx.media.map { mediaEntity => + // Use MediaKeyHydrator.infer to determine the media key from the media entity + val mediaKey = MediaKeyHydrator.infer(Some(ctx.mediaKeys), mediaEntity) + mediaKeyToMediaRef(mediaKey) + } + + // Fetch mediaRefs from pasted media + getPastedMediaRefs(repo, ctx, includePastedMedia).liftToTry.map { + case Return(pastedMediaRefs) => + // Combine the refs from the Tweet's own media and those from pasted media, then limit + // to MaxMediaEntitiesPerTweet. + val limitedRefs = + (storedMediaRefs ++ pastedMediaRefs).take(PastedMediaHydrator.MaxMediaEntitiesPerTweet) + + ValueState.delta(curr, optionalizeSeq(limitedRefs)) + case Throw(_) => + ValueState.partial(optionalizeSeq(storedMediaRefs), hydratedField) + } + + }.onlyIf { (_, ctx) => + ctx.tweetFieldRequested(Tweet.MediaRefsField) || + ctx.opts.safetyLevel != SafetyLevel.FilterNone + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaTagsHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaTagsHydrator.scala new file mode 100644 index 000000000..4e3f1bc42 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaTagsHydrator.scala @@ -0,0 +1,103 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object MediaTagsHydrator { + type Type = ValueHydrator[Option[TweetMediaTags], TweetCtx] + + /** + * TweetMediaTags contains a map of MediaId to Seq[MediaTag]. + * The outer traverse maps over each MediaId, while the inner + * traverse maps over each MediaTag. + * + * A MediaTag has four fields: + * + * 1: MediaTagType tag_type + * 2: optional i64 user_id + * 3: optional string screen_name + * 4: optional string name + * + * For each MediaTag, if the tag type is MediaTagType.User and the user id is defined + * (see mediaTagToKey) we look up the tagged user, using the tagging user (the tweet + * author) as the viewer id (this means that visibility rules between the tagged user + * and tagging user are applied). + * + * If we get a taggable user back, we fill in the screen name and name fields. If not, + * we drop the tag. + */ + def apply(repo: UserViewRepository.Type): Type = + ValueHydrator[TweetMediaTags, TweetCtx] { (tags, ctx) => + val mediaTagsByMediaId: Seq[(MediaId, Seq[MediaTag])] = tags.tagMap.toSeq + + Stitch + .traverse(mediaTagsByMediaId) { + case (mediaId, mediaTags) => + Stitch.traverse(mediaTags)(tag => hydrateMediaTag(repo, tag, ctx.userId)).map { + ValueState.sequence(_).map(tags => (mediaId, tags.flatten)) + } + } + .map { + // Reconstruct TweetMediaTags(tagMap: Map[MediaId, SeqMediaTag]) + ValueState.sequence(_).map(s => TweetMediaTags(s.toMap)) + } + }.onlyIf { (_, ctx) => + !ctx.isRetweet && ctx.tweetFieldRequested(Tweet.MediaTagsField) + }.liftOption + + /** + * A function to hydrate a single `MediaTag`. The return type is `Option[MediaTag]` + * because we may return `None` to filter out a `MediaTag` if the tagged user doesn't + * exist or isn't taggable. + */ + private[this] def hydrateMediaTag( + repo: UserViewRepository.Type, + mediaTag: MediaTag, + authorId: UserId + ): Stitch[ValueState[Option[MediaTag]]] = + mediaTagToKey(mediaTag) match { + case None => Stitch.value(ValueState.unmodified(Some(mediaTag))) + case Some(key) => + repo(toRepoQuery(key, authorId)) + .map { + case user if user.mediaView.exists(_.canMediaTag) => + ValueState.modified( + Some( + mediaTag.copy( + userId = Some(user.id), + screenName = user.profile.map(_.screenName), + name = user.profile.map(_.name) + ) + ) + ) + + // if `canMediaTag` is false, drop the tag + case _ => ValueState.modified(None) + } + .handle { + // if user is not found, drop the tag + case NotFound => ValueState.modified(None) + } + } + + private[this] val queryFields: Set[UserField] = Set(UserField.Profile, UserField.MediaView) + + def toRepoQuery(userKey: UserKey, forUserId: UserId): UserViewRepository.Query = + UserViewRepository.Query( + userKey = userKey, + // view is based on tagging user, not tweet viewer + forUserId = Some(forUserId), + visibility = UserVisibility.MediaTaggable, + queryFields = queryFields + ) + + private[this] def mediaTagToKey(mediaTag: MediaTag): Option[UserKey] = + mediaTag match { + case MediaTag(MediaTagType.User, Some(taggedUserId), _, _) => Some(UserKey(taggedUserId)) + case _ => None + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaUrlFieldsHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaUrlFieldsHydrator.scala new file mode 100644 index 000000000..0cacf3b74 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaUrlFieldsHydrator.scala @@ -0,0 +1,25 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.media.Media +import com.twitter.tweetypie.media.MediaUrl +import com.twitter.tweetypie.thriftscala._ + +object MediaUrlFieldsHydrator { + type Ctx = MediaEntityHydrator.Cacheable.Ctx + type Type = MediaEntityHydrator.Cacheable.Type + + def mediaPermalink(ctx: Ctx): Option[UrlEntity] = + ctx.urlEntities.view.reverse.find(MediaUrl.Permalink.hasTweetId(_, ctx.tweetId)) + + def apply(): Type = + ValueHydrator + .map[MediaEntity, Ctx] { (curr, ctx) => + mediaPermalink(ctx) match { + case None => ValueState.unmodified(curr) + case Some(urlEntity) => ValueState.modified(Media.copyFromUrlEntity(curr, urlEntity)) + } + } + .onlyIf((curr, ctx) => curr.url == null && Media.isOwnMedia(ctx.tweetId, curr)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MentionEntityHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MentionEntityHydrator.scala new file mode 100644 index 000000000..a1d7c09cd --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MentionEntityHydrator.scala @@ -0,0 +1,47 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.NotFound +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object MentionEntitiesHydrator { + type Type = ValueHydrator[Seq[MentionEntity], TweetCtx] + + def once(h: MentionEntityHydrator.Type): Type = + TweetHydration.completeOnlyOnce( + queryFilter = queryFilter, + hydrationType = HydrationType.Mentions, + hydrator = h.liftSeq + ) + + def queryFilter(opts: TweetQuery.Options): Boolean = + opts.include.tweetFields.contains(Tweet.MentionsField.id) +} + +object MentionEntityHydrator { + type Type = ValueHydrator[MentionEntity, TweetCtx] + + val hydratedField: FieldByPath = fieldByPath(Tweet.MentionsField) + + def apply(repo: UserIdentityRepository.Type): Type = + ValueHydrator[MentionEntity, TweetCtx] { (entity, _) => + repo(UserKey(entity.screenName)).liftToTry.map { + case Return(user) => ValueState.delta(entity, update(entity, user)) + case Throw(NotFound) => ValueState.unmodified(entity) + case Throw(_) => ValueState.partial(entity, hydratedField) + } + // only hydrate mention if userId or name is empty + }.onlyIf((entity, _) => entity.userId.isEmpty || entity.name.isEmpty) + + /** + * Updates a MentionEntity using the given user data. + */ + def update(entity: MentionEntity, userIdent: UserIdentity): MentionEntity = + entity.copy( + screenName = userIdent.screenName, + userId = Some(userIdent.id), + name = Some(userIdent.realName) + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/NegativeVisibleTextRangeRepairer.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/NegativeVisibleTextRangeRepairer.scala new file mode 100644 index 000000000..5babf5b88 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/NegativeVisibleTextRangeRepairer.scala @@ -0,0 +1,18 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.thriftscala.TextRange + +/** + * Some tweets with visibleTextRange may have fromIndex > toIndex, in which case set fromIndex + * to toIndex. + */ +object NegativeVisibleTextRangeRepairer { + private val mutation = + Mutation[Option[TextRange]] { + case Some(TextRange(from, to)) if from > to => Some(Some(TextRange(to, to))) + case _ => None + } + + private[tweetypie] val tweetMutation = TweetLenses.visibleTextRange.mutation(mutation) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/NoteTweetSuffixHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/NoteTweetSuffixHydrator.scala new file mode 100644 index 000000000..c7224a8db --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/NoteTweetSuffixHydrator.scala @@ -0,0 +1,66 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.TweetData +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.thriftscala.entities.Implicits._ +import com.twitter.tweetypie.thriftscala.TextRange +import com.twitter.tweetypie.tweettext.Offset +import com.twitter.tweetypie.tweettext.TextModification +import com.twitter.tweetypie.tweettext.TweetText +import com.twitter.tweetypie.util.TweetLenses + +object NoteTweetSuffixHydrator { + + val ELLIPSIS: String = "\u2026" + + private def addTextSuffix(tweet: Tweet): Tweet = { + val originalText = TweetLenses.text(tweet) + val originalTextLength = TweetText.codePointLength(originalText) + + val visibleTextRange: TextRange = + TweetLenses + .visibleTextRange(tweet) + .getOrElse(TextRange(0, originalTextLength)) + + val insertAtCodePoint = Offset.CodePoint(visibleTextRange.toIndex) + + val textModification = TextModification.insertAt( + originalText, + insertAtCodePoint, + ELLIPSIS + ) + + val mediaEntities = TweetLenses.media(tweet) + val urlEntities = TweetLenses.urls(tweet) + + val modifiedText = textModification.updated + val modifiedMediaEntities = textModification.reindexEntities(mediaEntities) + val modifiedUrlEntities = textModification.reindexEntities(urlEntities) + val modifiedVisibleTextRange = visibleTextRange.copy(toIndex = + visibleTextRange.toIndex + TweetText.codePointLength(ELLIPSIS)) + + val updatedTweet = + Lens.setAll( + tweet, + TweetLenses.text -> modifiedText, + TweetLenses.urls -> modifiedUrlEntities.sortBy(_.fromIndex), + TweetLenses.media -> modifiedMediaEntities.sortBy(_.fromIndex), + TweetLenses.visibleTextRange -> Some(modifiedVisibleTextRange) + ) + + updatedTweet + } + + def apply(): TweetDataValueHydrator = { + ValueHydrator[TweetData, TweetQuery.Options] { (td, _) => + val updatedTweet = addTextSuffix(td.tweet) + Stitch.value(ValueState.delta(td, td.copy(tweet = updatedTweet))) + }.onlyIf { (td, _) => + td.tweet.noteTweet.isDefined && + td.tweet.noteTweet.flatMap(_.isExpandable).getOrElse(true) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PartialEntityCleaner.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PartialEntityCleaner.scala new file mode 100644 index 000000000..a15e64383 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PartialEntityCleaner.scala @@ -0,0 +1,80 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.mediaservices.commons.tweetmedia.thriftscala._ +import com.twitter.tweetypie.media._ +import com.twitter.tweetypie.thriftscala._ +import scala.collection.Set + +/** + * Removes partial Url, Media, and Mention entities that were not + * fully hydrated. Rather than returning no value or a value with + * incomplete entities on an entity hydration failure, we gracefully + * degrade to just omitting those entities. This step needs to be + * applied in the post-cache filter, so that we don't cache the value + * with missing entities. + * + * A MediaEntity will first be converted back to a UrlEntity if it is only + * partially hydrated. If the resulting UrlEntity is itself then only partially + * hydrated, it will get dropped also. + */ +object PartialEntityCleaner { + def apply(stats: StatsReceiver): Mutation[Tweet] = { + val scopedStats = stats.scope("partial_entity_cleaner") + Mutation + .all( + Seq( + TweetLenses.urls.mutation(urls.countMutations(scopedStats.counter("urls"))), + TweetLenses.media.mutation(media.countMutations(scopedStats.counter("media"))), + TweetLenses.mentions.mutation(mentions.countMutations(scopedStats.counter("mentions"))) + ) + ) + .onlyIf(!isRetweet(_)) + } + + private[this] def clean[E](isPartial: E => Boolean) = + Mutation[Seq[E]] { items => + items.partition(isPartial) match { + case (Nil, nonPartial) => None + case (partial, nonPartial) => Some(nonPartial) + } + } + + private[this] val mentions = + clean[MentionEntity](e => e.userId.isEmpty || e.name.isEmpty) + + private[this] val urls = + clean[UrlEntity](e => + isNullOrEmpty(e.url) || isNullOrEmpty(e.expanded) || isNullOrEmpty(e.display)) + + private[this] val media = + Mutation[Seq[MediaEntity]] { mediaEntities => + mediaEntities.partition(isPartialMedia) match { + case (Nil, nonPartial) => None + case (partial, nonPartial) => Some(nonPartial) + } + } + + def isPartialMedia(e: MediaEntity): Boolean = + e.fromIndex < 0 || + e.toIndex <= 0 || + isNullOrEmpty(e.url) || + isNullOrEmpty(e.displayUrl) || + isNullOrEmpty(e.mediaUrl) || + isNullOrEmpty(e.mediaUrlHttps) || + isNullOrEmpty(e.expandedUrl) || + e.mediaInfo.isEmpty || + e.mediaKey.isEmpty || + (MediaKeyClassifier.isImage(MediaKeyUtil.get(e)) && containsInvalidSizeVariant(e.sizes)) + + private[this] val userMentions = + clean[UserMention](e => e.screenName.isEmpty || e.name.isEmpty) + + def isNullOrEmpty(optString: Option[String]): Boolean = + optString.isEmpty || optString.exists(isNullOrEmpty(_)) + + def isNullOrEmpty(str: String): Boolean = str == null || str.isEmpty + + def containsInvalidSizeVariant(sizes: Set[MediaSize]): Boolean = + sizes.exists(size => size.height == 0 || size.width == 0) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PastedMediaHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PastedMediaHydrator.scala new file mode 100644 index 000000000..769c9bead --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PastedMediaHydrator.scala @@ -0,0 +1,102 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.media.MediaUrl +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object PastedMediaHydrator { + type Type = ValueHydrator[PastedMedia, Ctx] + + /** + * Ensure that the final tweet has at most 4 media entities. + */ + val MaxMediaEntitiesPerTweet = 4 + + /** + * Enforce visibility rules when hydrating media for a write. + */ + val writeSafetyLevel = SafetyLevel.TweetWritesApi + + case class Ctx(urlEntities: Seq[UrlEntity], underlyingTweetCtx: TweetCtx) extends TweetCtx.Proxy { + def includePastedMedia: Boolean = opts.include.pastedMedia + def includeMediaEntities: Boolean = tweetFieldRequested(Tweet.MediaField) + def includeAdditionalMetadata: Boolean = + mediaFieldRequested(MediaEntity.AdditionalMetadataField.id) + def includeMediaTags: Boolean = tweetFieldRequested(Tweet.MediaTagsField) + } + + def getPastedMedia(t: Tweet): PastedMedia = PastedMedia(getMedia(t), Map.empty) + + def apply(repo: PastedMediaRepository.Type): Type = { + def hydrateOneReference( + tweetId: TweetId, + urlEntity: UrlEntity, + repoCtx: PastedMediaRepository.Ctx + ): Stitch[PastedMedia] = + repo(tweetId, repoCtx).liftToTry.map { + case Return(pastedMedia) => pastedMedia.updateEntities(urlEntity) + case _ => PastedMedia.empty + } + + ValueHydrator[PastedMedia, Ctx] { (curr, ctx) => + val repoCtx = asRepoCtx(ctx) + val idsAndEntities = pastedIdsAndEntities(ctx.tweetId, ctx.urlEntities) + + val res = Stitch.traverse(idsAndEntities) { + case (tweetId, urlEntity) => + hydrateOneReference(tweetId, urlEntity, repoCtx) + } + + res.liftToTry.map { + case Return(pastedMedias) => + val merged = pastedMedias.foldLeft(curr)(_.merge(_)) + val limited = merged.take(MaxMediaEntitiesPerTweet) + ValueState.delta(curr, limited) + + case Throw(_) => ValueState.unmodified(curr) + } + }.onlyIf { (_, ctx) => + // we only attempt to hydrate pasted media if media is requested + ctx.includePastedMedia && + !ctx.isRetweet && + ctx.includeMediaEntities + } + } + + /** + * Finds url entities for foreign permalinks, and returns a sequence of tuples containing + * the foreign tweet IDs and the associated UrlEntity containing the permalink. If the same + * permalink appears multiple times, only one of the duplicate entities is returned. + */ + def pastedIdsAndEntities( + tweetId: TweetId, + urlEntities: Seq[UrlEntity] + ): Seq[(TweetId, UrlEntity)] = + urlEntities + .foldLeft(Map.empty[TweetId, UrlEntity]) { + case (z, e) => + MediaUrl.Permalink.getTweetId(e).filter(_ != tweetId) match { + case Some(id) if !z.contains(id) => z + (id -> e) + case _ => z + } + } + .toSeq + + def asRepoCtx(ctx: Ctx) = + PastedMediaRepository.Ctx( + ctx.includeMediaEntities, + ctx.includeAdditionalMetadata, + ctx.includeMediaTags, + ctx.opts.extensionsArgs, + if (ctx.opts.cause == TweetQuery.Cause.Insert(ctx.tweetId) || + ctx.opts.cause == TweetQuery.Cause.Undelete(ctx.tweetId)) { + writeSafetyLevel + } else { + ctx.opts.safetyLevel + } + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PerspectiveHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PerspectiveHydrator.scala new file mode 100644 index 000000000..4a055f5ec --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PerspectiveHydrator.scala @@ -0,0 +1,112 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.featureswitches.v2.FeatureSwitchResults +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.Stitch +import com.twitter.stitch.timelineservice.TimelineService.GetPerspectives.Query +import com.twitter.timelineservice.{thriftscala => tls} +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository.PerspectiveRepository +import com.twitter.tweetypie.thriftscala.FieldByPath +import com.twitter.tweetypie.thriftscala.StatusPerspective + +object PerspectiveHydrator { + type Type = ValueHydrator[Option[StatusPerspective], Ctx] + val hydratedField: FieldByPath = fieldByPath(Tweet.PerspectiveField) + + case class Ctx(featureSwitchResults: Option[FeatureSwitchResults], underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy + + val Types: Set[tls.PerspectiveType] = + Set( + tls.PerspectiveType.Reported, + tls.PerspectiveType.Favorited, + tls.PerspectiveType.Retweeted, + tls.PerspectiveType.Bookmarked + ) + + val TypesWithoutBookmarked: Set[tls.PerspectiveType] = + Set( + tls.PerspectiveType.Reported, + tls.PerspectiveType.Favorited, + tls.PerspectiveType.Retweeted + ) + + private[this] val partialResult = ValueState.partial(None, hydratedField) + + val bookmarksPerspectiveHydrationEnabledKey = "bookmarks_perspective_hydration_enabled" + + def evaluatePerspectiveTypes( + userId: Long, + bookmarksPerspectiveDecider: Gate[Long], + featureSwitchResults: Option[FeatureSwitchResults] + ): Set[tls.PerspectiveType] = { + if (bookmarksPerspectiveDecider(userId) || + featureSwitchResults + .flatMap(_.getBoolean(bookmarksPerspectiveHydrationEnabledKey, false)) + .getOrElse(false)) + Types + else + TypesWithoutBookmarked + } + + def apply( + repo: PerspectiveRepository.Type, + shouldHydrateBookmarksPerspective: Gate[Long], + stats: StatsReceiver + ): Type = { + val statsByLevel = + SafetyLevel.list.map(level => (level, stats.counter(level.name, "calls"))).toMap + + ValueHydrator[Option[StatusPerspective], Ctx] { (_, ctx) => + val res: Stitch[tls.TimelineEntryPerspective] = if (ctx.isRetweet) { + Stitch.value( + tls.TimelineEntryPerspective( + favorited = false, + retweetId = None, + retweeted = false, + reported = false, + bookmarked = None + ) + ) + } else { + statsByLevel + .getOrElse(ctx.opts.safetyLevel, stats.counter(ctx.opts.safetyLevel.name, "calls")) + .incr() + + repo( + Query( + userId = ctx.opts.forUserId.get, + tweetId = ctx.tweetId, + types = evaluatePerspectiveTypes( + ctx.opts.forUserId.get, + shouldHydrateBookmarksPerspective, + ctx.featureSwitchResults) + )) + } + + res.liftToTry.map { + case Return(perspective) => + ValueState.modified( + Some( + StatusPerspective( + userId = ctx.opts.forUserId.get, + favorited = perspective.favorited, + retweeted = perspective.retweeted, + retweetId = perspective.retweetId, + reported = perspective.reported, + bookmarked = perspective.bookmarked + ) + ) + ) + case _ => partialResult + } + + }.onlyIf { (curr, ctx) => + curr.isEmpty && + ctx.opts.forUserId.nonEmpty && + (ctx.tweetFieldRequested(Tweet.PerspectiveField) || ctx.opts.excludeReported) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PlaceHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PlaceHydrator.scala new file mode 100644 index 000000000..186619df8 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PlaceHydrator.scala @@ -0,0 +1,28 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.NotFound +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object PlaceHydrator { + type Type = ValueHydrator[Option[Place], TweetCtx] + + val HydratedField: FieldByPath = fieldByPath(Tweet.PlaceField) + + def apply(repo: PlaceRepository.Type): Type = + ValueHydrator[Option[Place], TweetCtx] { (_, ctx) => + val key = PlaceKey(ctx.placeId.get, ctx.opts.languageTag) + repo(key).liftToTry.map { + case Return(place) => ValueState.modified(Some(place)) + case Throw(NotFound) => ValueState.UnmodifiedNone + case Throw(_) => ValueState.partial(None, HydratedField) + } + }.onlyIf { (curr, ctx) => + curr.isEmpty && + ctx.tweetFieldRequested(Tweet.PlaceField) && + !ctx.isRetweet && + ctx.placeId.nonEmpty + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PreviousTweetCountsHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PreviousTweetCountsHydrator.scala new file mode 100644 index 000000000..5dff256ac --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PreviousTweetCountsHydrator.scala @@ -0,0 +1,152 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.featureswitches.v2.FeatureSwitchResults +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.FieldId +import com.twitter.tweetypie.TweetId +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.repository.TweetCountKey +import com.twitter.tweetypie.repository.TweetCountsRepository +import com.twitter.tweetypie.thriftscala.EditControl +import com.twitter.tweetypie.thriftscala.StatusCounts +import com.twitter.tweetypie.thriftscala._ + +/* + * A constructor for a ValueHydrator that hydrates `previous_counts` + * information. Previous counts are applied to edit tweets, they + * are the summation of all the status_counts in an edit chain up to + * but not including the tweet being hydrated. + * + */ +object PreviousTweetCountsHydrator { + + case class Ctx( + editControl: Option[EditControl], + featureSwitchResults: Option[FeatureSwitchResults], + underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy + + type Type = ValueHydrator[Option[StatusCounts], Ctx] + + val hydratedField: FieldByPath = fieldByPath(Tweet.PreviousCountsField) + + /* + * Params: + * tweetId: The tweet being hydrated. + * editTweetIds: The sorted list of all edits in an edit chain. + * + * Returns: tweetIds in an edit chain from the initial tweet up to but not including + * the tweet being hydrated (`tweetId`) + */ + def previousTweetIds(tweetId: TweetId, editTweetIds: Seq[TweetId]): Seq[TweetId] = { + editTweetIds.takeWhile(_ < tweetId) + } + + /* An addition operation for Option[Long] */ + def sumOptions(A: Option[Long], B: Option[Long]): Option[Long] = + (A, B) match { + case (None, None) => None + case (Some(a), None) => Some(a) + case (None, Some(b)) => Some(b) + case (Some(a), Some(b)) => Some(a + b) + } + + /* An addition operation for StatusCounts */ + def sumStatusCounts(A: StatusCounts, B: StatusCounts): StatusCounts = + StatusCounts( + retweetCount = sumOptions(A.retweetCount, B.retweetCount), + replyCount = sumOptions(A.replyCount, B.replyCount), + favoriteCount = sumOptions(A.favoriteCount, B.favoriteCount), + quoteCount = sumOptions(A.quoteCount, B.quoteCount), + bookmarkCount = sumOptions(A.bookmarkCount, B.bookmarkCount) + ) + + def apply(repo: TweetCountsRepository.Type, shouldHydrateBookmarksCount: Gate[Long]): Type = { + + /* + * Get a StatusCount representing the summed engagements of all previous + * StatusCounts in an edit chain. Only `countsFields` that are specifically requested + * are included in the aggregate StatusCount, otherwise those fields are None. + */ + def getPreviousEngagementCounts( + tweetId: TweetId, + editTweetIds: Seq[TweetId], + countsFields: Set[FieldId] + ): Stitch[ValueState[StatusCounts]] = { + val editTweetIdList = previousTweetIds(tweetId, editTweetIds) + + // StatusCounts for each edit tweet revision + val statusCountsPerEditVersion: Stitch[Seq[ValueState[StatusCounts]]] = + Stitch.collect(editTweetIdList.map { tweetId => + // Which tweet count keys to request, as indicated by the tweet options. + val keys: Seq[TweetCountKey] = + TweetCountsHydrator.toKeys(tweetId, countsFields, None) + + // A separate StatusCounts for each count field, for `tweetId` + // e.g. Seq(StatusCounts(retweetCounts=5L), StatusCounts(favCounts=6L)) + val statusCountsPerCountField: Stitch[Seq[ValueState[StatusCounts]]] = + Stitch.collect(keys.map(key => TweetCountsHydrator.statusCountsRepo(key, repo))) + + // Reduce the per-field counts into a single StatusCounts for `tweetId` + statusCountsPerCountField.map { vs => + // NOTE: This StatusCounts reduction uses different logic than + // `sumStatusCounts`. This reduction takes the latest value for a field. + // instead of summing the fields. + ValueState.sequence(vs).map(TweetCountsHydrator.reduceStatusCounts) + } + }) + + // Sum together the StatusCounts for each edit tweet revision into a single Status Count + statusCountsPerEditVersion.map { vs => + ValueState.sequence(vs).map { statusCounts => + // Reduce a list of StatusCounts into a single StatusCount by summing their fields. + statusCounts.reduce { (a, b) => sumStatusCounts(a, b) } + } + } + } + + ValueHydrator[Option[StatusCounts], Ctx] { (inputStatusCounts, ctx) => + val countsFields: Set[FieldId] = TweetCountsHydrator.filterRequestedCounts( + ctx.opts.forUserId.getOrElse(ctx.userId), + ctx.opts.include.countsFields, + shouldHydrateBookmarksCount, + ctx.featureSwitchResults + ) + + ctx.editControl match { + case Some(EditControl.Edit(edit)) => + edit.editControlInitial match { + case Some(initial) => + val previousStatusCounts: Stitch[ValueState[StatusCounts]] = + getPreviousEngagementCounts(ctx.tweetId, initial.editTweetIds, countsFields) + + // Add the new aggregated StatusCount to the TweetData and return it + previousStatusCounts.map { valueState => + valueState.map { statusCounts => Some(statusCounts) } + } + case None => + // EditControlInitial is not hydrated within EditControlEdit + // This means we cannot provide aggregated previous counts, we will + // fail open and return the input data unchanged. + Stitch.value(ValueState.partial(inputStatusCounts, hydratedField)) + } + + case _ => + // If the tweet has an EditControlInitial - it's the first Tweet in the Edit Chain + // or has no EditControl - it could be an old Tweet from when no Edit Controls existed + // then the previous counts are set to be equal to None. + Stitch.value(ValueState.unit(None)) + } + }.onlyIf { (_, ctx: Ctx) => + // only run if the CountsField was requested; note this is ran both on read and write path + TweetCountsHydrator + .filterRequestedCounts( + ctx.opts.forUserId.getOrElse(ctx.userId), + ctx.opts.include.countsFields, + shouldHydrateBookmarksCount, + ctx.featureSwitchResults + ).nonEmpty + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ProfileGeoHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ProfileGeoHydrator.scala new file mode 100644 index 000000000..ea461bae8 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ProfileGeoHydrator.scala @@ -0,0 +1,31 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.dataproducts.enrichments.thriftscala.ProfileGeoEnrichment +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository.ProfileGeoKey +import com.twitter.tweetypie.repository.ProfileGeoRepository +import com.twitter.tweetypie.thriftscala.FieldByPath + +object ProfileGeoHydrator { + type Type = ValueHydrator[Option[ProfileGeoEnrichment], TweetCtx] + + val hydratedField: FieldByPath = fieldByPath(Tweet.ProfileGeoEnrichmentField) + + private[this] val partialResult = ValueState.partial(None, hydratedField) + + def apply(repo: ProfileGeoRepository.Type): Type = + ValueHydrator[Option[ProfileGeoEnrichment], TweetCtx] { (curr, ctx) => + val key = + ProfileGeoKey( + tweetId = ctx.tweetId, + userId = Some(ctx.userId), + coords = ctx.geoCoordinates + ) + repo(key).liftToTry.map { + case Return(enrichment) => ValueState.modified(Some(enrichment)) + case Throw(_) => partialResult + } + }.onlyIf((curr, ctx) => + curr.isEmpty && ctx.tweetFieldRequested(Tweet.ProfileGeoEnrichmentField)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuoteTweetVisibilityHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuoteTweetVisibilityHydrator.scala new file mode 100644 index 000000000..f82e9fa0b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuoteTweetVisibilityHydrator.scala @@ -0,0 +1,93 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala.QuotedTweet + +/** + * Enforce that users are not shown quoted tweets where the author of the + * inner quoted tweet blocks the author of the outer quote tweet or the author + * of the inner quoted tweet is otherwise not visible to the outer author. + * + * In the example below, QuoteTweetVisibilityHydrator checks if @jack + * blocks @trollmaster. + * + * {{{ + * @viewer + * +------------------------------+ + * | @trollmaster | <-- OUTER QUOTE TWEET + * | lol u can't spell twitter | + * | +--------------------------+ | + * | | @jack | <---- INNER QUOTED TWEET + * | | just setting up my twttr | | + * | +--------------------------+ | + * +------------------------------+ + * }}} + * + * In the example below, QuoteTweetVisibilityHydrator checks if @h4x0r can view + * user @protectedUser. + * + * {{{ + * @viewer + * +------------------------------+ + * | @h4x0r | <-- OUTER QUOTE TWEET + * | lol nice password | + * | +--------------------------+ | + * | | @protectedUser | <---- INNER QUOTED TWEET + * | | my password is 1234 | | + * | +--------------------------+ | + * +------------------------------+ + * }}} + * + * + * In the example below, QuoteTweetVisibilityHydrator checks if @viewer blocks @jack: + * + * {{{ + * @viewer + * +------------------------------+ + * | @sometweeter | <-- OUTER QUOTE TWEET + * | This is a historic tweet | + * | +--------------------------+ | + * | | @jack | <---- INNER QUOTED TWEET + * | | just setting up my twttr | | + * | +--------------------------+ | + * +------------------------------+ + * }}} + * + */ +object QuoteTweetVisibilityHydrator { + type Type = ValueHydrator[Option[FilteredState.Unavailable], TweetCtx] + + def apply(repo: QuotedTweetVisibilityRepository.Type): QuoteTweetVisibilityHydrator.Type = + ValueHydrator[Option[FilteredState.Unavailable], TweetCtx] { (_, ctx) => + val innerTweet: QuotedTweet = ctx.quotedTweet.get + val request = QuotedTweetVisibilityRepository.Request( + outerTweetId = ctx.tweetId, + outerAuthorId = ctx.userId, + innerTweetId = innerTweet.tweetId, + innerAuthorId = innerTweet.userId, + viewerId = ctx.opts.forUserId, + safetyLevel = ctx.opts.safetyLevel + ) + + repo(request).liftToTry.map { + case Return(Some(f: FilteredState.Unavailable)) => + ValueState.modified(Some(f)) + + // For tweet::quotedTweet relationships, all other FilteredStates + // allow the quotedTweet to be hydrated and filtered independently + case Return(_) => + ValueState.UnmodifiedNone + + // On VF failure, gracefully degrade to no filtering + case Throw(_) => + ValueState.UnmodifiedNone + } + }.onlyIf { (_, ctx) => + !ctx.isRetweet && + ctx.tweetFieldRequested(Tweet.QuotedTweetField) && + ctx.opts.enforceVisibilityFiltering && + ctx.quotedTweet.isDefined + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuotedTweetHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuotedTweetHydrator.scala new file mode 100644 index 000000000..e112ef395 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuotedTweetHydrator.scala @@ -0,0 +1,51 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ + +/** + * Loads the tweet referenced by `Tweet.quotedTweet`. + */ +object QuotedTweetHydrator { + type Type = ValueHydrator[Option[QuotedTweetResult], Ctx] + + case class Ctx( + quotedTweetFilteredState: Option[FilteredState.Unavailable], + underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy + + def apply(repo: TweetResultRepository.Type): Type = { + ValueHydrator[Option[QuotedTweetResult], Ctx] { (_, ctx) => + (ctx.quotedTweetFilteredState, ctx.quotedTweet) match { + + case (_, None) => + // If there is no quoted tweet ref, leave the value as None, + // indicating undefined + ValueState.StitchUnmodifiedNone + + case (Some(fs), _) => + Stitch.value(ValueState.modified(Some(QuotedTweetResult.Filtered(fs)))) + + case (None, Some(qtRef)) => + val qtQueryOptions = + ctx.opts.copy( + // we don't want to recursively load quoted tweets + include = ctx.opts.include.copy(quotedTweet = false), + // be sure to get a clean version of the tweet + scrubUnrequestedFields = true, + // TweetVisibilityLibrary filters quoted tweets slightly differently from other tweets. + // Specifically, most Interstitial verdicts are converted to Drops. + isInnerQuotedTweet = true + ) + + repo(qtRef.tweetId, qtQueryOptions).transform { t => + Stitch.const { + QuotedTweetResult.fromTry(t).map(r => ValueState.modified(Some(r))) + } + } + } + }.onlyIf((curr, ctx) => curr.isEmpty && ctx.opts.include.quotedTweet) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuotedTweetRefHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuotedTweetRefHydrator.scala new file mode 100644 index 000000000..e2556f986 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuotedTweetRefHydrator.scala @@ -0,0 +1,129 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetutil.TweetPermalink +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +/** + * Adds QuotedTweet structs to tweets that contain a tweet permalink url at the end of the + * tweet text. After introduction of QT + Media, we stopped storing inner tweet permalinks + * in the outer tweet text. So this hydrator would run only for below cases: + * + * - historical quote tweets which have inner tweet url in the tweet text and url entities. + * - new quote tweets created with pasted tweet permalinks, going forward we want to persist + * quoted_tweet struct in MH for these tweets + */ +object QuotedTweetRefHydrator { + type Type = ValueHydrator[Option[QuotedTweet], Ctx] + + case class Ctx(urlEntities: Seq[UrlEntity], underlyingTweetCtx: TweetCtx) extends TweetCtx.Proxy + + val hydratedField: FieldByPath = fieldByPath(Tweet.QuotedTweetField) + + private val partial = ValueState.partial(None, hydratedField) + + val queryOptions: TweetQuery.Options = + TweetQuery.Options( + include = TweetQuery.Include(Set(Tweet.CoreDataField.id)), + // Don't enforce visibility filtering when loading the QuotedTweet struct because it is + // cacheable. The filtering happens in QuoteTweetVisibilityHydrator. + enforceVisibilityFiltering = false, + forUserId = None + ) + + def once(h: Type): Type = + TweetHydration.completeOnlyOnce( + queryFilter = queryFilter, + hydrationType = HydrationType.QuotedTweetRef, + dependsOn = Set(HydrationType.Urls), + hydrator = h + ) + + case class UrlHydrationFailed(url: String) extends Exception + + /** + * Iterate through UrlEntity objects in reverse to identify a quoted-tweet ID + * to hydrate. Quoted tweets are indicated by a TweetPermalink in the tweet text + * that references an older tweet ID. If a quoted tweet permalink is found, also + * return the corresponding UrlEntity. + * + * @throws UrlHydrationFailed if we encounter a partial URL entity before + * finding a tweet permalink URL. + */ + def quotedTweetId(ctx: Ctx): Option[(UrlEntity, TweetId)] = + ctx.urlEntities.reverseIterator // we want the rightmost tweet permalink + .map { e: UrlEntity => + if (UrlEntityHydrator.hydrationFailed(e)) throw UrlHydrationFailed(e.url) + else (e, e.expanded) + } + .collectFirst { + case (e, Some(TweetPermalink(_, quotedTweetId))) => (e, quotedTweetId) + } + // Prevent tweet-quoting cycles + .filter { case (_, quotedTweetId) => ctx.tweetId > quotedTweetId } + + def buildShortenedUrl(e: UrlEntity): ShortenedUrl = + ShortenedUrl( + shortUrl = e.url, + // Reading from MH will also default the following to "". + // QuotedTweetRefUrlsHydrator will hydrate these cases + longUrl = e.expanded.getOrElse(""), + displayText = e.display.getOrElse("") + ) + + /** + * We run this hydrator only if: + * + * - quoted_tweet struct is empty + * - quoted_tweet is present but permalink is not + * - url entities is present. QT hydration depends on urls - long term goal + * is to entirely rely on persisted quoted_tweet struct in MH + * - requested tweet is not a retweet + * + * Hydration steps: + * - We determine the last tweet permalink from url entities + * - Extract the inner tweet Id from the permalink + * - Query tweet repo with inner tweet Id + * - Construct quoted_tweet struct from hydrated tweet object and last permalink + */ + def apply(repo: TweetRepository.Type): Type = + ValueHydrator[Option[QuotedTweet], Ctx] { (_, ctx) => + // propagate errors from quotedTweetId in Stitch + Stitch(quotedTweetId(ctx)).liftToTry.flatMap { + case Return(Some((lastPermalinkEntity, quotedTweetId))) => + repo(quotedTweetId, queryOptions).liftToTry.map { + case Return(tweet) => + ValueState.modified( + Some(asQuotedTweet(tweet, lastPermalinkEntity)) + ) + case Throw(NotFound | _: FilteredState) => ValueState.UnmodifiedNone + case Throw(_) => partial + } + case Return(None) => Stitch(ValueState.UnmodifiedNone) + case Throw(_) => Stitch(partial) + } + }.onlyIf { (curr, ctx) => + (curr.isEmpty || curr.exists(_.permalink.isEmpty)) && + !ctx.isRetweet && ctx.urlEntities.nonEmpty + } + + def queryFilter(opts: TweetQuery.Options): Boolean = + opts.include.tweetFields(Tweet.QuotedTweetField.id) + + /** + * We construct Tweet.quoted_tweet from hydrated inner tweet. + * Note: if the inner tweet is a Retweet, we populate the quoted_tweet struct from source tweet. + */ + def asQuotedTweet(tweet: Tweet, entity: UrlEntity): QuotedTweet = { + val shortenedUrl = Some(buildShortenedUrl(entity)) + getShare(tweet) match { + case None => QuotedTweet(tweet.id, getUserId(tweet), shortenedUrl) + case Some(share) => QuotedTweet(share.sourceStatusId, share.sourceUserId, shortenedUrl) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuotedTweetRefUrlsHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuotedTweetRefUrlsHydrator.scala new file mode 100644 index 000000000..b25acfc2e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuotedTweetRefUrlsHydrator.scala @@ -0,0 +1,61 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tco_util.DisplayUrl +import com.twitter.tweetutil.TweetPermalink +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +/** + * This populates expanded URL and display text in ShortenedUrl struct, + * which is part of QuotedTweet metadata. We are using User Identity repo + * to retrieve user's current screen-name to construct expanded url, instead + * of relying on URL hydration. + * + * Expanded urls contain a mutable screen name and an immutable tweetId. + * when visiting the link, you're always redirected to the link with + * correct screen name - therefore, it's okay to have permalinks containing + * old screen names that have since been changed by their user in the cache. + * Keys will be auto-refreshed based on the 14 days TTL, we can also have + * a daemon flush the keys with screen-name change. + * + */ +object QuotedTweetRefUrlsHydrator { + type Type = ValueHydrator[Option[QuotedTweet], TweetCtx] + + /** + * Return true if longUrl is not set or if a prior hydration set it to shortUrl due to + * a partial (to re-attempt hydration). + */ + def needsHydration(s: ShortenedUrl): Boolean = + s.longUrl.isEmpty || s.displayText.isEmpty || s.longUrl == s.shortUrl + + def apply(repo: UserIdentityRepository.Type): Type = { + ValueHydrator[QuotedTweet, TweetCtx] { (curr, _) => + repo(UserKey(curr.userId)).liftToTry.map { r => + // we verify curr.permalink.exists pre-hydration + val shortUrl = curr.permalink.get.shortUrl + val expandedUrl = r match { + case Return(user) => TweetPermalink(user.screenName, curr.tweetId).httpsUrl + case Throw(_) => shortUrl // fall-back to shortUrl as expandedUrl + } + ValueState.delta( + curr, + curr.copy( + permalink = Some( + ShortenedUrl( + shortUrl, + expandedUrl, + DisplayUrl.truncateUrl(expandedUrl, true) + ) + ) + ) + ) + } + } + }.onlyIf { (curr, ctx) => + curr.permalink.exists(needsHydration) && + ctx.tweetFieldRequested(Tweet.QuotedTweetField) && !ctx.isRetweet + }.liftOption +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/RepairMutation.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/RepairMutation.scala new file mode 100644 index 000000000..f960740b2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/RepairMutation.scala @@ -0,0 +1,15 @@ +package com.twitter.tweetypie +package hydrator + +/** + * A Mutation that will note all repairs that took place in the + * supplied StatsReceiver, under the names in repairers. + */ +object RepairMutation { + def apply[T](stats: StatsReceiver, repairers: (String, Mutation[T])*): Mutation[T] = + Mutation.all( + repairers.map { + case (name, mutation) => mutation.countMutations(stats.counter(name)) + } + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ReplyScreenNameHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ReplyScreenNameHydrator.scala new file mode 100644 index 000000000..6fa50d572 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ReplyScreenNameHydrator.scala @@ -0,0 +1,33 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.NotFound +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object ReplyScreenNameHydrator { + import TweetLenses.Reply.{inReplyToScreenName => screenNameLens} + + type Type = ValueHydrator[Option[Reply], TweetCtx] + + val hydratedField: FieldByPath = + fieldByPath(Tweet.CoreDataField, TweetCoreData.ReplyField, Reply.InReplyToScreenNameField) + + def once(h: ValueHydrator[Option[Reply], TweetCtx]): Type = + TweetHydration.completeOnlyOnce( + hydrationType = HydrationType.ReplyScreenName, + hydrator = h + ) + + def apply[C](repo: UserIdentityRepository.Type): ValueHydrator[Option[Reply], C] = + ValueHydrator[Reply, C] { (reply, ctx) => + val key = UserKey(reply.inReplyToUserId) + + repo(key).liftToTry.map { + case Return(user) => ValueState.modified(screenNameLens.set(reply, Some(user.screenName))) + case Throw(NotFound) => ValueState.unmodified(reply) + case Throw(_) => ValueState.partial(reply, hydratedField) + } + }.onlyIf((reply, _) => screenNameLens.get(reply).isEmpty).liftOption +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ReportedTweetFilter.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ReportedTweetFilter.scala new file mode 100644 index 000000000..6f22c0634 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ReportedTweetFilter.scala @@ -0,0 +1,25 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.thriftscala._ + +object ReportedTweetFilter { + type Type = ValueHydrator[Unit, Ctx] + + object MissingPerspectiveError + extends TweetHydrationError("Cannot determine reported state because perspective is missing") + + case class Ctx(perspective: Option[StatusPerspective], underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy + + def apply(): Type = + ValueHydrator[Unit, Ctx] { (_, ctx) => + ctx.perspective match { + case Some(p) if !p.reported => ValueState.StitchUnmodifiedUnit + case Some(_) => Stitch.exception(FilteredState.Unavailable.Reported) + case None => Stitch.exception(MissingPerspectiveError) + } + }.onlyIf { (_, ctx) => ctx.opts.excludeReported && ctx.opts.forUserId.isDefined } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/RetweetMediaRepairer.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/RetweetMediaRepairer.scala new file mode 100644 index 000000000..c200c0d75 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/RetweetMediaRepairer.scala @@ -0,0 +1,15 @@ +package com.twitter.tweetypie +package hydrator + +/** + * Retweets should never have their own media, and should never be cached with a media + * entity. + */ +object RetweetMediaRepairer extends Mutation[Tweet] { + def apply(tweet: Tweet): Option[Tweet] = { + if (isRetweet(tweet) && getMedia(tweet).nonEmpty) + Some(TweetLenses.media.set(tweet, Nil)) + else + None + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/RetweetParentStatusIdRepairer.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/RetweetParentStatusIdRepairer.scala new file mode 100644 index 000000000..5206d39f1 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/RetweetParentStatusIdRepairer.scala @@ -0,0 +1,19 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.thriftscala.Share + +/** + * When creating a retweet, we set parent_status_id to the tweet id that the user sent (the tweet they're retweeting). + * Old tweets have parent_status_id set to zero. + * When loading the old tweets, we should set parent_status_id to source_status_id if it's zero. + */ +object RetweetParentStatusIdRepairer { + private val shareMutation = + Mutation.fromPartial[Option[Share]] { + case Some(share) if share.parentStatusId == 0L => + Some(share.copy(parentStatusId = share.sourceStatusId)) + } + + private[tweetypie] val tweetMutation = TweetLenses.share.mutation(shareMutation) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ScrubEngagementHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ScrubEngagementHydrator.scala new file mode 100644 index 000000000..068a283ca --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ScrubEngagementHydrator.scala @@ -0,0 +1,27 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.spam.rtf.thriftscala.FilteredReason +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.thriftscala._ +import com.twitter.visibility.results.counts.EngagementCounts + +/** + * Redact Tweet.counts (StatusCounts) for some visibility results + */ +object ScrubEngagementHydrator { + type Type = ValueHydrator[Option[StatusCounts], Ctx] + + case class Ctx(filteredState: Option[FilteredState.Suppress]) + + def apply(): Type = + ValueHydrator.map[Option[StatusCounts], Ctx] { (curr: Option[StatusCounts], ctx: Ctx) => + ctx.filteredState match { + case Some(FilteredState.Suppress(FilteredReason.SafetyResult(result))) if curr.nonEmpty => + ValueState.delta(curr, EngagementCounts.scrubEngagementCounts(result.action, curr)) + case _ => + ValueState.unmodified(curr) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ScrubUncacheableTweetRepairer.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ScrubUncacheableTweetRepairer.scala new file mode 100644 index 000000000..ef76e9e76 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ScrubUncacheableTweetRepairer.scala @@ -0,0 +1,38 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.thriftscala._ + +object ScrubUncacheable { + + // A mutation to use for scrubbing tweets for cache + val tweetMutation: Mutation[Tweet] = + Mutation { tweet => + if (tweet.place != None || + tweet.counts != None || + tweet.deviceSource != None || + tweet.perspective != None || + tweet.cards != None || + tweet.card2 != None || + tweet.spamLabels != None || + tweet.conversationMuted != None) + Some( + tweet.copy( + place = None, + counts = None, + deviceSource = None, + perspective = None, + cards = None, + card2 = None, + spamLabels = None, + conversationMuted = None + ) + ) + else + None + } + + // throws an AssertionError if a tweet when a tweet is scrubbed + def assertNotScrubbed(message: String): Mutation[Tweet] = + tweetMutation.withEffect(Effect(update => assert(update.isEmpty, message))) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/SourceTweetHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/SourceTweetHydrator.scala new file mode 100644 index 000000000..7309b016c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/SourceTweetHydrator.scala @@ -0,0 +1,67 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.FilteredState.Unavailable._ +import com.twitter.tweetypie.core.TweetResult +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetResultRepository +import com.twitter.tweetypie.thriftscala.DetachedRetweet + +/** + * Loads the source tweet for a retweet + */ +object SourceTweetHydrator { + type Type = ValueHydrator[Option[TweetResult], TweetCtx] + + def configureOptions(opts: TweetQuery.Options): TweetQuery.Options = { + // set scrubUnrequestedFields to false so that we will have access to + // additional fields, which will be copied into the retweet. + // set fetchStoredTweets to false because we don't want to fetch and hydrate + // the source tweet if it is deleted. + opts.copy(scrubUnrequestedFields = false, fetchStoredTweets = false, isSourceTweet = true) + } + + private object NotFoundException { + def unapply(t: Throwable): Option[Boolean] = + t match { + case NotFound => Some(false) + case TweetDeleted | BounceDeleted => Some(true) + case _ => None + } + } + + def apply( + repo: TweetResultRepository.Type, + stats: StatsReceiver, + scribeDetachedRetweets: FutureEffect[DetachedRetweet] = FutureEffect.unit + ): Type = { + val notFoundCounter = stats.counter("not_found") + + ValueHydrator[Option[TweetResult], TweetCtx] { (_, ctx) => + ctx.sourceTweetId match { + case None => + ValueState.StitchUnmodifiedNone + case Some(srcTweetId) => + repo(srcTweetId, configureOptions(ctx.opts)).liftToTry.flatMap { + case Throw(NotFoundException(isDeleted)) => + notFoundCounter.incr() + scribeDetachedRetweets(detachedRetweet(srcTweetId, ctx)) + if (ctx.opts.requireSourceTweet) { + Stitch.exception(SourceTweetNotFound(isDeleted)) + } else { + ValueState.StitchUnmodifiedNone + } + + case Return(r) => Stitch.value(ValueState.modified(Some(r))) + case Throw(t) => Stitch.exception(t) + } + } + }.onlyIf((curr, _) => curr.isEmpty) + } + + def detachedRetweet(srcTweetId: TweetId, ctx: TweetCtx): DetachedRetweet = + DetachedRetweet(ctx.tweetId, ctx.userId, srcTweetId) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/StripHiddenGeoCoordinates.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/StripHiddenGeoCoordinates.scala new file mode 100644 index 000000000..3727c8779 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/StripHiddenGeoCoordinates.scala @@ -0,0 +1,12 @@ +package com.twitter.tweetypie +package hydrator + +object StripHiddenGeoCoordinates extends Mutation[Tweet] { + def apply(tweet: Tweet): Option[Tweet] = + for { + coreData <- tweet.coreData + coords <- coreData.coordinates + if !coords.display + coreData2 = coreData.copy(coordinates = None) + } yield tweet.copy(coreData = Some(coreData2)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/SuperfluousUrlEntityScrubber.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/SuperfluousUrlEntityScrubber.scala new file mode 100644 index 000000000..d49b2c17a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/SuperfluousUrlEntityScrubber.scala @@ -0,0 +1,37 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.thriftscala._ + +/** + * Removes superfluous urls entities when there is a corresponding MediaEntity for the same + * url. + */ +object SuperfluousUrlEntityScrubber { + case class RawEntity(fromIndex: Short, toIndex: Short, url: String) + + object RawEntity { + def from(e: UrlEntity): RawEntity = RawEntity(e.fromIndex, e.toIndex, e.url) + def fromUrls(es: Seq[UrlEntity]): Set[RawEntity] = es.map(from(_)).toSet + def from(e: MediaEntity): RawEntity = RawEntity(e.fromIndex, e.toIndex, e.url) + def fromMedia(es: Seq[MediaEntity]): Set[RawEntity] = es.map(from(_)).toSet + } + + val mutation: Mutation[Tweet] = + Mutation[Tweet] { tweet => + val mediaEntities = getMedia(tweet) + val urlEntities = getUrls(tweet) + + if (mediaEntities.isEmpty || urlEntities.isEmpty) { + None + } else { + val mediaUrls = mediaEntities.map(RawEntity.from(_)).toSet + val scrubbedUrls = urlEntities.filterNot(e => mediaUrls.contains(RawEntity.from(e))) + + if (scrubbedUrls.size == urlEntities.size) + None + else + Some(TweetLenses.urls.set(tweet, scrubbedUrls)) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TakedownHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TakedownHydrator.scala new file mode 100644 index 000000000..f5a510047 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TakedownHydrator.scala @@ -0,0 +1,45 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala.FieldByPath +import com.twitter.tweetypie.util.Takedowns + +/** + * Hydrates per-country takedowns which is a union of: + * 1. per-tweet takedowns, from tweetypieOnlyTakedown{CountryCode|Reasons} fields + * 2. user takedowns, read from gizmoduck. + * + * Note that this hydrator performs backwards compatibility by converting to and from + * [[com.twitter.tseng.withholding.thriftscala.TakedownReason]]. This is possible because a taken + * down country code can always be represented as a + * [[com.twitter.tseng.withholding.thriftscala.UnspecifiedReason]]. + */ +object TakedownHydrator { + type Type = ValueHydrator[Option[Takedowns], Ctx] + + case class Ctx(tweetTakedowns: Takedowns, underlyingTweetCtx: TweetCtx) extends TweetCtx.Proxy + + val hydratedFields: Set[FieldByPath] = + Set( + fieldByPath(Tweet.TakedownCountryCodesField), + fieldByPath(Tweet.TakedownReasonsField) + ) + + def apply(repo: UserTakedownRepository.Type): Type = + ValueHydrator[Option[Takedowns], Ctx] { (curr, ctx) => + repo(ctx.userId).liftToTry.map { + case Return(userReasons) => + val reasons = Seq.concat(ctx.tweetTakedowns.reasons, userReasons).toSet + ValueState.delta(curr, Some(Takedowns(reasons))) + case Throw(_) => + ValueState.partial(curr, hydratedFields) + } + }.onlyIf { (_, ctx) => + ( + ctx.tweetFieldRequested(Tweet.TakedownCountryCodesField) || + ctx.tweetFieldRequested(Tweet.TakedownReasonsField) + ) && ctx.hasTakedown + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TextRepairer.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TextRepairer.scala new file mode 100644 index 000000000..5a5e62c3d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TextRepairer.scala @@ -0,0 +1,47 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.serverutil.ExtendedTweetMetadataBuilder +import com.twitter.tweetypie.tweettext.Preprocessor._ +import com.twitter.tweetypie.tweettext.TextModification +import com.twitter.tweetypie.thriftscala.entities.Implicits._ + +object TextRepairer { + def apply(replace: String => Option[TextModification]): Mutation[Tweet] = + Mutation { tweet => + replace(getText(tweet)).map { mod => + val repairedTweet = tweet.copy( + coreData = tweet.coreData.map(c => c.copy(text = mod.updated)), + urls = Some(getUrls(tweet).flatMap(mod.reindexEntity(_))), + mentions = Some(getMentions(tweet).flatMap(mod.reindexEntity(_))), + hashtags = Some(getHashtags(tweet).flatMap(mod.reindexEntity(_))), + cashtags = Some(getCashtags(tweet).flatMap(mod.reindexEntity(_))), + media = Some(getMedia(tweet).flatMap(mod.reindexEntity(_))), + visibleTextRange = tweet.visibleTextRange.flatMap(mod.reindexEntity(_)) + ) + + val repairedExtendedTweetMetadata = repairedTweet.selfPermalink.flatMap { permalink => + val extendedTweetMetadata = ExtendedTweetMetadataBuilder(repairedTweet, permalink) + val repairedTextLength = getText(repairedTweet).length + if (extendedTweetMetadata.apiCompatibleTruncationIndex == repairedTextLength) { + None + } else { + Some(extendedTweetMetadata) + } + } + + repairedTweet.copy(extendedTweetMetadata = repairedExtendedTweetMetadata) + } + } + + /** + * Removes whitespace from the tweet, and updates all entity indices. + */ + val BlankLineCollapser: Mutation[Tweet] = TextRepairer(collapseBlankLinesModification _) + + /** + * Replace a special unicode string that crashes ios app with '\ufffd' + */ + val CoreTextBugPatcher: Mutation[Tweet] = TextRepairer(replaceCoreTextBugModification _) + +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetAuthorVisibilityHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetAuthorVisibilityHydrator.scala new file mode 100644 index 000000000..c9c5c71f9 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetAuthorVisibilityHydrator.scala @@ -0,0 +1,43 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ + +/** + * Ensures that the tweet's author and source tweet's author (if retweet) are visible to the + * viewing user - ctx.opts.forUserId - when enforceVisibilityFiltering is true. + * If either of these users is not visible then a FilteredState.Suppress will be returned. + * + * Note: blocking relationship is NOT checked here, this means if viewing user `forUserId` is blocked + * by either the tweet's author or source tweet's author, this will not filter out the tweet. + */ +object TweetAuthorVisibilityHydrator { + type Type = ValueHydrator[Unit, TweetCtx] + + def apply(repo: UserVisibilityRepository.Type): Type = + ValueHydrator[Unit, TweetCtx] { (_, ctx) => + val ids = Seq(ctx.userId) ++ ctx.sourceUserId + val keys = ids.map(id => toRepoQuery(id, ctx)) + + Stitch + .traverse(keys)(repo.apply).flatMap { responses => + val fs: Option[FilteredState.Unavailable] = responses.flatten.headOption + + fs match { + case Some(fs: FilteredState.Unavailable) => Stitch.exception(fs) + case None => ValueState.StitchUnmodifiedUnit + } + } + }.onlyIf((_, ctx) => ctx.opts.enforceVisibilityFiltering) + + private def toRepoQuery(userId: UserId, ctx: TweetCtx) = + UserVisibilityRepository.Query( + UserKey(userId), + ctx.opts.forUserId, + ctx.tweetId, + ctx.isRetweet, + ctx.opts.isInnerQuotedTweet, + Some(ctx.opts.safetyLevel)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetCountsHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetCountsHydrator.scala new file mode 100644 index 000000000..17462081a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetCountsHydrator.scala @@ -0,0 +1,189 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.featureswitches.v2.FeatureSwitchResults +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ +import scala.collection.mutable + +object TweetCountsHydrator { + type Type = ValueHydrator[Option[StatusCounts], Ctx] + + case class Ctx(featureSwitchResults: Option[FeatureSwitchResults], underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy + + val retweetCountField: FieldByPath = + fieldByPath(Tweet.CountsField, StatusCounts.RetweetCountField) + val replyCountField: FieldByPath = fieldByPath(Tweet.CountsField, StatusCounts.ReplyCountField) + val favoriteCountField: FieldByPath = + fieldByPath(Tweet.CountsField, StatusCounts.FavoriteCountField) + val quoteCountField: FieldByPath = fieldByPath(Tweet.CountsField, StatusCounts.QuoteCountField) + val bookmarkCountField: FieldByPath = + fieldByPath(Tweet.CountsField, StatusCounts.BookmarkCountField) + + val emptyCounts = StatusCounts() + + val retweetCountPartial = ValueState.partial(emptyCounts, retweetCountField) + val replyCountPartial = ValueState.partial(emptyCounts, replyCountField) + val favoriteCountPartial = ValueState.partial(emptyCounts, favoriteCountField) + val quoteCountPartial = ValueState.partial(emptyCounts, quoteCountField) + val bookmarkCountPartial = ValueState.partial(emptyCounts, bookmarkCountField) + + val bookmarksCountHydrationEnabledKey = "bookmarks_count_hydration_enabled" + + /** + * Take a Seq of StatusCounts and reduce down to a single StatusCounts. + * Note: `reduce` here is safe because we are guaranteed to always have at least + * one value. + */ + def reduceStatusCounts(counts: Seq[StatusCounts]): StatusCounts = + counts.reduce { (a, b) => + StatusCounts( + retweetCount = b.retweetCount.orElse(a.retweetCount), + replyCount = b.replyCount.orElse(a.replyCount), + favoriteCount = b.favoriteCount.orElse(a.favoriteCount), + quoteCount = b.quoteCount.orElse(a.quoteCount), + bookmarkCount = b.bookmarkCount.orElse(a.bookmarkCount) + ) + } + + def toKeys( + tweetId: TweetId, + countsFields: Set[FieldId], + curr: Option[StatusCounts] + ): Seq[TweetCountKey] = { + val keys = new mutable.ArrayBuffer[TweetCountKey](4) + + countsFields.foreach { + case StatusCounts.RetweetCountField.id => + if (curr.flatMap(_.retweetCount).isEmpty) + keys += RetweetsKey(tweetId) + + case StatusCounts.ReplyCountField.id => + if (curr.flatMap(_.replyCount).isEmpty) + keys += RepliesKey(tweetId) + + case StatusCounts.FavoriteCountField.id => + if (curr.flatMap(_.favoriteCount).isEmpty) + keys += FavsKey(tweetId) + + case StatusCounts.QuoteCountField.id => + if (curr.flatMap(_.quoteCount).isEmpty) + keys += QuotesKey(tweetId) + + case StatusCounts.BookmarkCountField.id => + if (curr.flatMap(_.bookmarkCount).isEmpty) + keys += BookmarksKey(tweetId) + + case _ => + } + + keys + } + + /* + * Get a StatusCounts object for a specific tweet and specific field (e.g. only fav, or reply etc). + * StatusCounts returned from here can be combined with other StatusCounts using `sumStatusCount` + */ + def statusCountsRepo( + key: TweetCountKey, + repo: TweetCountsRepository.Type + ): Stitch[ValueState[StatusCounts]] = + repo(key).liftToTry.map { + case Return(count) => + ValueState.modified( + key match { + case _: RetweetsKey => StatusCounts(retweetCount = Some(count)) + case _: RepliesKey => StatusCounts(replyCount = Some(count)) + case _: FavsKey => StatusCounts(favoriteCount = Some(count)) + case _: QuotesKey => StatusCounts(quoteCount = Some(count)) + case _: BookmarksKey => StatusCounts(bookmarkCount = Some(count)) + } + ) + + case Throw(_) => + key match { + case _: RetweetsKey => retweetCountPartial + case _: RepliesKey => replyCountPartial + case _: FavsKey => favoriteCountPartial + case _: QuotesKey => quoteCountPartial + case _: BookmarksKey => bookmarkCountPartial + } + } + + def filterRequestedCounts( + userId: UserId, + requestedCounts: Set[FieldId], + bookmarkCountsDecider: Gate[Long], + featureSwitchResults: Option[FeatureSwitchResults] + ): Set[FieldId] = { + if (requestedCounts.contains(StatusCounts.BookmarkCountField.id)) + if (bookmarkCountsDecider(userId) || + featureSwitchResults + .flatMap(_.getBoolean(bookmarksCountHydrationEnabledKey, false)) + .getOrElse(false)) + requestedCounts + else + requestedCounts.filter(_ != StatusCounts.BookmarkCountField.id) + else + requestedCounts + } + + def apply(repo: TweetCountsRepository.Type, shouldHydrateBookmarksCount: Gate[Long]): Type = { + + val all: Set[FieldId] = StatusCounts.fieldInfos.map(_.tfield.id).toSet + + val modifiedZero: Map[Set[FieldId], ValueState[Some[StatusCounts]]] = { + for (set <- all.subsets) yield { + @inline + def zeroOrNone(fieldId: FieldId) = + if (set.contains(fieldId)) Some(0L) else None + + val statusCounts = + StatusCounts( + retweetCount = zeroOrNone(StatusCounts.RetweetCountField.id), + replyCount = zeroOrNone(StatusCounts.ReplyCountField.id), + favoriteCount = zeroOrNone(StatusCounts.FavoriteCountField.id), + quoteCount = zeroOrNone(StatusCounts.QuoteCountField.id), + bookmarkCount = zeroOrNone(StatusCounts.BookmarkCountField.id) + ) + + set -> ValueState.modified(Some(statusCounts)) + } + }.toMap + + ValueHydrator[Option[StatusCounts], Ctx] { (curr, ctx) => + val countsFields: Set[FieldId] = filterRequestedCounts( + ctx.opts.forUserId.getOrElse(ctx.userId), + ctx.opts.include.countsFields, + shouldHydrateBookmarksCount, + ctx.featureSwitchResults + ) + if (ctx.isRetweet) { + // To avoid a reflection-induced key error where the countsFields can contain a fieldId + // that is not in the thrift schema loaded at start, we strip unknown field_ids using + // `intersect` + Stitch.value(modifiedZero(countsFields.intersect(all))) + } else { + val keys = toKeys(ctx.tweetId, countsFields, curr) + + Stitch.traverse(keys)(key => statusCountsRepo(key, repo)).map { results => + // always flag modified if starting from None + val vs0 = ValueState.success(curr.getOrElse(emptyCounts), curr.isEmpty) + val vs = vs0 +: results + + ValueState.sequence(vs).map(reduceStatusCounts).map(Some(_)) + } + } + }.onlyIf { (_, ctx) => + filterRequestedCounts( + ctx.opts.forUserId.getOrElse(ctx.userId), + ctx.opts.include.countsFields, + shouldHydrateBookmarksCount, + ctx.featureSwitchResults + ).nonEmpty + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetCtx.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetCtx.scala new file mode 100644 index 000000000..5540dc8dc --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetCtx.scala @@ -0,0 +1,90 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie +import com.twitter.tweetypie.core.TweetData +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ +import org.apache.thrift.protocol.TField + +/** + * Encapsulates basic, immutable details about a tweet to be hydrated, along with the + * `TweetQuery.Options`. Only tweet data that are not affected by hydration should be + * exposed here, as a single `TweetCtx` instance should be usable for the entire hydration + * of a tweet. + */ +trait TweetCtx { + def opts: TweetQuery.Options + + def tweetId: TweetId + def userId: UserId + def text: String + def createdAt: Time + def createdVia: String + def isRetweet: Boolean + def isReply: Boolean + def isSelfReply: Boolean + def sourceUserId: Option[UserId] + def sourceTweetId: Option[TweetId] + def inReplyToTweetId: Option[TweetId] + def geoCoordinates: Option[GeoCoordinates] + def placeId: Option[String] + def hasTakedown: Boolean + def quotedTweet: Option[QuotedTweet] + + def completedHydrations: Set[HydrationType] + + def isInitialInsert: Boolean = opts.cause.initialInsert(tweetId) + + def tweetFieldRequested(field: TField): Boolean = tweetFieldRequested(field.id) + def tweetFieldRequested(fieldId: FieldId): Boolean = opts.include.tweetFields.contains(fieldId) + + def mediaFieldRequested(field: TField): Boolean = mediaFieldRequested(field.id) + def mediaFieldRequested(fieldId: FieldId): Boolean = opts.include.mediaFields.contains(fieldId) +} + +object TweetCtx { + def from(td: TweetData, opts: TweetQuery.Options): TweetCtx = FromTweetData(td, opts) + + trait Proxy extends TweetCtx { + protected def underlyingTweetCtx: TweetCtx + + def opts: TweetQuery.Options = underlyingTweetCtx.opts + def tweetId: TweetId = underlyingTweetCtx.tweetId + def userId: UserId = underlyingTweetCtx.userId + def text: String = underlyingTweetCtx.text + def createdAt: Time = underlyingTweetCtx.createdAt + def createdVia: String = underlyingTweetCtx.createdVia + def isRetweet: Boolean = underlyingTweetCtx.isRetweet + def isReply: Boolean = underlyingTweetCtx.isReply + def isSelfReply: Boolean = underlyingTweetCtx.isSelfReply + def sourceUserId: Option[UserId] = underlyingTweetCtx.sourceUserId + def sourceTweetId: Option[TweetId] = underlyingTweetCtx.sourceTweetId + def inReplyToTweetId: Option[TweetId] = underlyingTweetCtx.inReplyToTweetId + def geoCoordinates: Option[GeoCoordinates] = underlyingTweetCtx.geoCoordinates + def placeId: Option[String] = underlyingTweetCtx.placeId + def hasTakedown: Boolean = underlyingTweetCtx.hasTakedown + def completedHydrations: Set[HydrationType] = underlyingTweetCtx.completedHydrations + def quotedTweet: Option[QuotedTweet] = underlyingTweetCtx.quotedTweet + } + + private case class FromTweetData(td: TweetData, opts: TweetQuery.Options) extends TweetCtx { + private val tweet = td.tweet + def tweetId: MediaId = tweet.id + def userId: UserId = getUserId(tweet) + def text: String = getText(tweet) + def createdAt: Time = getTimestamp(tweet) + def createdVia: String = TweetLenses.createdVia.get(tweet) + def isRetweet: Boolean = getShare(tweet).isDefined + def isSelfReply: Boolean = tweetypie.isSelfReply(tweet) + def isReply: Boolean = getReply(tweet).isDefined + def sourceUserId: Option[MediaId] = getShare(tweet).map(_.sourceUserId) + def sourceTweetId: Option[MediaId] = getShare(tweet).map(_.sourceStatusId) + def inReplyToTweetId: Option[MediaId] = getReply(tweet).flatMap(_.inReplyToStatusId) + def geoCoordinates: Option[GeoCoordinates] = TweetLenses.geoCoordinates.get(tweet) + def placeId: Option[String] = TweetLenses.placeId.get(tweet) + def hasTakedown: Boolean = TweetLenses.hasTakedown(tweet) + def completedHydrations: Set[HydrationType] = td.completedHydrations + def quotedTweet: Option[QuotedTweet] = getQuotedTweet(tweet) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetHydration.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetHydration.scala new file mode 100644 index 000000000..a12295322 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetHydration.scala @@ -0,0 +1,848 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.expandodo.thriftscala.Card +import com.twitter.expandodo.thriftscala.Card2 +import com.twitter.servo.cache.Cached +import com.twitter.servo.cache.CachedValueStatus +import com.twitter.servo.cache.LockingCache +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.media.thriftscala.MediaRef +import com.twitter.tweetypie.repository.PastedMedia +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepoCachePicker +import com.twitter.tweetypie.repository.TweetResultRepository +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.util.Takedowns +import com.twitter.util.Return +import com.twitter.util.Throw + +object TweetHydration { + + /** + * Wires up a set of hydrators that include those whose results are cached on the tweet, + * and some whose results are not cached but depend upon the results of the former. + */ + def apply( + hydratorStats: StatsReceiver, + hydrateFeatureSwitchResults: TweetDataValueHydrator, + hydrateMentions: MentionEntitiesHydrator.Type, + hydrateLanguage: LanguageHydrator.Type, + hydrateUrls: UrlEntitiesHydrator.Type, + hydrateQuotedTweetRef: QuotedTweetRefHydrator.Type, + hydrateQuotedTweetRefUrls: QuotedTweetRefUrlsHydrator.Type, + hydrateMediaCacheable: MediaEntitiesHydrator.Cacheable.Type, + hydrateReplyScreenName: ReplyScreenNameHydrator.Type, + hydrateConvoId: ConversationIdHydrator.Type, + hydratePerspective: PerspectiveHydrator.Type, + hydrateEditPerspective: EditPerspectiveHydrator.Type, + hydrateConversationMuted: ConversationMutedHydrator.Type, + hydrateContributor: ContributorHydrator.Type, + hydrateTakedowns: TakedownHydrator.Type, + hydrateDirectedAt: DirectedAtHydrator.Type, + hydrateGeoScrub: GeoScrubHydrator.Type, + hydrateCacheableRepairs: TweetDataValueHydrator, + hydrateMediaUncacheable: MediaEntitiesHydrator.Uncacheable.Type, + hydratePostCacheRepairs: TweetDataValueHydrator, + hydrateTweetLegacyFormat: TweetDataValueHydrator, + hydrateQuoteTweetVisibility: QuoteTweetVisibilityHydrator.Type, + hydrateQuotedTweet: QuotedTweetHydrator.Type, + hydratePastedMedia: PastedMediaHydrator.Type, + hydrateMediaRefs: MediaRefsHydrator.Type, + hydrateMediaTags: MediaTagsHydrator.Type, + hydrateClassicCards: CardHydrator.Type, + hydrateCard2: Card2Hydrator.Type, + hydrateContributorVisibility: ContributorVisibilityFilter.Type, + hydrateHasMedia: HasMediaHydrator.Type, + hydrateTweetCounts: TweetCountsHydrator.Type, + hydratePreviousTweetCounts: PreviousTweetCountsHydrator.Type, + hydratePlace: PlaceHydrator.Type, + hydrateDeviceSource: DeviceSourceHydrator.Type, + hydrateProfileGeo: ProfileGeoHydrator.Type, + hydrateSourceTweet: SourceTweetHydrator.Type, + hydrateIM1837State: IM1837FilterHydrator.Type, + hydrateIM2884State: IM2884FilterHydrator.Type, + hydrateIM3433State: IM3433FilterHydrator.Type, + hydrateTweetAuthorVisibility: TweetAuthorVisibilityHydrator.Type, + hydrateReportedTweetVisibility: ReportedTweetFilter.Type, + scrubSuperfluousUrlEntities: TweetDataValueHydrator, + copyFromSourceTweet: TweetDataValueHydrator, + hydrateTweetVisibility: TweetVisibilityHydrator.Type, + hydrateEscherbirdAnnotations: EscherbirdAnnotationHydrator.Type, + hydrateScrubEngagements: ScrubEngagementHydrator.Type, + hydrateConversationControl: ConversationControlHydrator.Type, + hydrateEditControl: EditControlHydrator.Type, + hydrateUnmentionData: UnmentionDataHydrator.Type, + hydrateNoteTweetSuffix: TweetDataValueHydrator + ): TweetDataValueHydrator = { + val scrubCachedTweet: TweetDataValueHydrator = + ValueHydrator + .fromMutation[Tweet, TweetQuery.Options]( + ScrubUncacheable.tweetMutation.countMutations(hydratorStats.counter("scrub_cached_tweet")) + ) + .lensed(TweetData.Lenses.tweet) + .onlyIf((td, opts) => opts.cause.reading(td.tweet.id)) + + // We perform independent hydrations of individual bits of + // data and pack the results into tuples instead of updating + // the tweet for each one in order to avoid making lots of + // copies of the tweet. + + val hydratePrimaryCacheableFields: TweetDataValueHydrator = + ValueHydrator[TweetData, TweetQuery.Options] { (td, opts) => + val ctx = TweetCtx.from(td, opts) + val tweet = td.tweet + + val urlsMediaQuoteTweet: Stitch[ + ValueState[(Seq[UrlEntity], Seq[MediaEntity], Option[QuotedTweet])] + ] = + for { + urls <- hydrateUrls(getUrls(tweet), ctx) + (media, quotedTweet) <- Stitch.join( + hydrateMediaCacheable( + getMedia(tweet), + MediaEntityHydrator.Cacheable.Ctx(urls.value, ctx) + ), + for { + qtRef <- hydrateQuotedTweetRef( + tweet.quotedTweet, + QuotedTweetRefHydrator.Ctx(urls.value, ctx) + ) + qtRefWithUrls <- hydrateQuotedTweetRefUrls(qtRef.value, ctx) + } yield { + ValueState(qtRefWithUrls.value, qtRef.state ++ qtRefWithUrls.state) + } + ) + } yield { + ValueState.join(urls, media, quotedTweet) + } + + val conversationId: Stitch[ValueState[Option[ConversationId]]] = + hydrateConvoId(getConversationId(tweet), ctx) + + val mentions: Stitch[ValueState[Seq[MentionEntity]]] = + hydrateMentions(getMentions(tweet), ctx) + + val replyScreenName: Stitch[ValueState[Option[Reply]]] = + hydrateReplyScreenName(getReply(tweet), ctx) + + val directedAt: Stitch[ValueState[Option[DirectedAtUser]]] = + hydrateDirectedAt( + getDirectedAtUser(tweet), + DirectedAtHydrator.Ctx( + mentions = getMentions(tweet), + metadata = tweet.directedAtUserMetadata, + underlyingTweetCtx = ctx + ) + ) + + val language: Stitch[ValueState[Option[Language]]] = + hydrateLanguage(tweet.language, ctx) + + val contributor: Stitch[ValueState[Option[Contributor]]] = + hydrateContributor(tweet.contributor, ctx) + + val geoScrub: Stitch[ValueState[(Option[GeoCoordinates], Option[PlaceId])]] = + hydrateGeoScrub( + (TweetLenses.geoCoordinates(tweet), TweetLenses.placeId(tweet)), + ctx + ) + + Stitch + .joinMap( + urlsMediaQuoteTweet, + conversationId, + mentions, + replyScreenName, + directedAt, + language, + contributor, + geoScrub + )(ValueState.join(_, _, _, _, _, _, _, _)) + .map { values => + if (values.state.isEmpty) { + ValueState.unmodified(td) + } else { + values.map { + case ( + (urls, media, quotedTweet), + conversationId, + mentions, + reply, + directedAt, + language, + contributor, + coreGeo + ) => + val (coordinates, placeId) = coreGeo + td.copy( + tweet = tweet.copy( + coreData = tweet.coreData.map( + _.copy( + reply = reply, + conversationId = conversationId, + directedAtUser = directedAt, + coordinates = coordinates, + placeId = placeId + ) + ), + urls = Some(urls), + media = Some(media), + mentions = Some(mentions), + language = language, + quotedTweet = quotedTweet, + contributor = contributor + ) + ) + } + } + } + } + + val assertNotScrubbed: TweetDataValueHydrator = + ValueHydrator.fromMutation[TweetData, TweetQuery.Options]( + ScrubUncacheable + .assertNotScrubbed( + "output of the cacheable tweet hydrator should not require scrubbing" + ) + .lensed(TweetData.Lenses.tweet) + ) + + val hydrateDependentUncacheableFields: TweetDataValueHydrator = + ValueHydrator[TweetData, TweetQuery.Options] { (td, opts) => + val ctx = TweetCtx.from(td, opts) + val tweet = td.tweet + + val quotedTweetResult: Stitch[ValueState[Option[QuotedTweetResult]]] = + for { + qtFilterState <- hydrateQuoteTweetVisibility(None, ctx) + quotedTweet <- hydrateQuotedTweet( + td.quotedTweetResult, + QuotedTweetHydrator.Ctx(qtFilterState.value, ctx) + ) + } yield { + ValueState.join(qtFilterState, quotedTweet).map(_._2) + } + + val pastedMedia: Stitch[ValueState[PastedMedia]] = + hydratePastedMedia( + PastedMediaHydrator.getPastedMedia(tweet), + PastedMediaHydrator.Ctx(getUrls(tweet), ctx) + ) + + val mediaTags: Stitch[ValueState[Option[TweetMediaTags]]] = + hydrateMediaTags(tweet.mediaTags, ctx) + + val classicCards: Stitch[ValueState[Option[Seq[Card]]]] = + hydrateClassicCards( + tweet.cards, + CardHydrator.Ctx(getUrls(tweet), getMedia(tweet), ctx) + ) + + val card2: Stitch[ValueState[Option[Card2]]] = + hydrateCard2( + tweet.card2, + Card2Hydrator.Ctx( + getUrls(tweet), + getMedia(tweet), + getCardReference(tweet), + ctx, + td.featureSwitchResults + ) + ) + + val contributorVisibility: Stitch[ValueState[Option[Contributor]]] = + hydrateContributorVisibility(tweet.contributor, ctx) + + val takedowns: Stitch[ValueState[Option[Takedowns]]] = + hydrateTakedowns( + None, // None because uncacheable hydrator doesn't depend on previous value + TakedownHydrator.Ctx(Takedowns.fromTweet(tweet), ctx) + ) + + val conversationControl: Stitch[ValueState[Option[ConversationControl]]] = + hydrateConversationControl( + tweet.conversationControl, + ConversationControlHydrator.Ctx(getConversationId(tweet), ctx) + ) + + // PreviousTweetCounts and Perspective hydration depends on tweet.editControl.edit_control_initial + // having been hydrated in EditControlHydrator; thus we are chaining them together. + val editControlWithDependencies: Stitch[ + ValueState[ + ( + Option[EditControl], + Option[StatusPerspective], + Option[StatusCounts], + Option[TweetPerspective] + ) + ] + ] = + for { + (edit, perspective) <- Stitch.join( + hydrateEditControl(tweet.editControl, ctx), + hydratePerspective( + tweet.perspective, + PerspectiveHydrator.Ctx(td.featureSwitchResults, ctx)) + ) + (counts, editPerspective) <- Stitch.join( + hydratePreviousTweetCounts( + tweet.previousCounts, + PreviousTweetCountsHydrator.Ctx(edit.value, td.featureSwitchResults, ctx)), + hydrateEditPerspective( + tweet.editPerspective, + EditPerspectiveHydrator + .Ctx(perspective.value, edit.value, td.featureSwitchResults, ctx)) + ) + } yield { + ValueState.join(edit, perspective, counts, editPerspective) + } + + Stitch + .joinMap( + quotedTweetResult, + pastedMedia, + mediaTags, + classicCards, + card2, + contributorVisibility, + takedowns, + conversationControl, + editControlWithDependencies + )(ValueState.join(_, _, _, _, _, _, _, _, _)) + .map { values => + if (values.state.isEmpty) { + ValueState.unmodified(td) + } else { + values.map { + case ( + quotedTweetResult, + pastedMedia, + ownedMediaTags, + cards, + card2, + contributor, + takedowns, + conversationControl, + (editControl, perspective, previousCounts, editPerspective) + ) => + td.copy( + tweet = tweet.copy( + media = Some(pastedMedia.mediaEntities), + mediaTags = pastedMedia.mergeTweetMediaTags(ownedMediaTags), + cards = cards, + card2 = card2, + contributor = contributor, + takedownCountryCodes = takedowns.map(_.countryCodes.toSeq), + takedownReasons = takedowns.map(_.reasons.toSeq), + conversationControl = conversationControl, + editControl = editControl, + previousCounts = previousCounts, + perspective = perspective, + editPerspective = editPerspective, + ), + quotedTweetResult = quotedTweetResult + ) + } + } + } + } + + val hydrateIndependentUncacheableFields: TweetDataEditHydrator = + EditHydrator[TweetData, TweetQuery.Options] { (td, opts) => + val ctx = TweetCtx.from(td, opts) + val tweet = td.tweet + + // Group together the results of hydrators that don't perform + // filtering, because we don't care about the precedence of + // exceptions from these hydrators, because the exceptions all + // indicate failures, and picking any failure will be + // fine. (All of the other hydrators might throw filtering + // exceptions, so we need to make sure that we give precedence + // to their failures.) + val hydratorsWithoutFiltering = + Stitch.joinMap( + hydrateTweetCounts(tweet.counts, TweetCountsHydrator.Ctx(td.featureSwitchResults, ctx)), + // Note: Place is cached in memcache, it is just not cached on the Tweet. + hydratePlace(tweet.place, ctx), + hydrateDeviceSource(tweet.deviceSource, ctx), + hydrateProfileGeo(tweet.profileGeoEnrichment, ctx) + )(ValueState.join(_, _, _, _)) + + /** + * Multiple hydrators throw visibility filtering exceptions so specify an order to achieve + * a deterministic hydration result while ensuring that any retweet has a source tweet: + * 1. hydrateSourceTweet throws SourceTweetNotFound, this is a detached-retweet so treat + * the retweet hydration as if it were not found + * 2. hydrateTweetAuthorVisibility + * 3. hydrateSourceTweet (other than SourceTweetNotFound already handled above) + * 4. hydrateIM1837State + * 5. hydrateIM2884State + * 6. hydrateIM3433State + * 7. hydratorsWithoutFiltering miscellaneous exceptions (any visibility filtering + * exceptions should win over failure of a hydrator) + */ + val sourceTweetAndTweetAuthorResult = + Stitch + .joinMap( + hydrateSourceTweet(td.sourceTweetResult, ctx).liftToTry, + hydrateTweetAuthorVisibility((), ctx).liftToTry, + hydrateIM1837State((), ctx).liftToTry, + hydrateIM2884State((), ctx).liftToTry, + hydrateIM3433State((), ctx).liftToTry + ) { + case (Throw(t @ FilteredState.Unavailable.SourceTweetNotFound(_)), _, _, _, _) => + Throw(t) + case (_, Throw(t), _, _, _) => Throw(t) // TweetAuthorVisibility + case (Throw(t), _, _, _, _) => Throw(t) // SourceTweet + case (_, _, Throw(t), _, _) => Throw(t) // IM1837State + case (_, _, _, Throw(t), _) => Throw(t) // IM2884State + case (_, _, _, _, Throw(t)) => Throw(t) // IM3433State + case ( + Return(sourceTweetResultValue), + Return(authorVisibilityValue), + Return(im1837Value), + Return(im2884Value), + Return(im3433Value) + ) => + Return( + ValueState + .join( + sourceTweetResultValue, + authorVisibilityValue, + im1837Value, + im2884Value, + im3433Value + ) + ) + }.lowerFromTry + + StitchExceptionPrecedence(sourceTweetAndTweetAuthorResult) + .joinWith(hydratorsWithoutFiltering)(ValueState.join(_, _)) + .toStitch + .map { values => + if (values.state.isEmpty) { + EditState.unit[TweetData] + } else { + EditState[TweetData] { tweetData => + val tweet = tweetData.tweet + values.map { + case ( + (sourceTweetResult, _, _, _, _), + (counts, place, deviceSource, profileGeo) + ) => + tweetData.copy( + tweet = tweet.copy( + counts = counts, + place = place, + deviceSource = deviceSource, + profileGeoEnrichment = profileGeo + ), + sourceTweetResult = sourceTweetResult + ) + } + } + } + } + } + + val hydrateUnmentionDataToTweetData: TweetDataValueHydrator = + TweetHydration.setOnTweetData( + TweetData.Lenses.tweet.andThen(TweetLenses.unmentionData), + (td: TweetData, opts: TweetQuery.Options) => + UnmentionDataHydrator + .Ctx(getConversationId(td.tweet), getMentions(td.tweet), TweetCtx.from(td, opts)), + hydrateUnmentionData + ) + + val hydrateCacheableFields: TweetDataValueHydrator = + ValueHydrator.inSequence( + scrubCachedTweet, + hydratePrimaryCacheableFields, + // Relies on mentions being hydrated in hydratePrimaryCacheableFields + hydrateUnmentionDataToTweetData, + assertNotScrubbed, + hydrateCacheableRepairs + ) + + // The conversation muted hydrator needs the conversation id, + // which comes from the primary cacheable fields, and the media hydrator + // needs the cacheable media entities. + val hydrateUncacheableMedia: TweetDataValueHydrator = + ValueHydrator[TweetData, TweetQuery.Options] { (td, opts) => + val ctx = TweetCtx.from(td, opts) + val tweet = td.tweet + + val mediaCtx = + MediaEntityHydrator.Uncacheable.Ctx(td.tweet.mediaKeys, ctx) + + val media: Stitch[ValueState[Option[Seq[MediaEntity]]]] = + hydrateMediaUncacheable.liftOption.apply(td.tweet.media, mediaCtx) + + val conversationMuted: Stitch[ValueState[Option[Boolean]]] = + hydrateConversationMuted( + tweet.conversationMuted, + ConversationMutedHydrator.Ctx(getConversationId(tweet), ctx) + ) + + // MediaRefs need to be hydrated at this phase because they rely on the media field + // on the Tweet, which can get unset by later hydrators. + val mediaRefs: Stitch[ValueState[Option[Seq[MediaRef]]]] = + hydrateMediaRefs( + tweet.mediaRefs, + MediaRefsHydrator.Ctx(getMedia(tweet), getMediaKeys(tweet), getUrls(tweet), ctx) + ) + + Stitch + .joinMap( + media, + conversationMuted, + mediaRefs + )(ValueState.join(_, _, _)) + .map { values => + if (values.state.isEmpty) { + ValueState.unmodified(td) + } else { + val tweet = td.tweet + values.map { + case (media, conversationMuted, mediaRefs) => + td.copy( + tweet = tweet.copy( + media = media, + conversationMuted = conversationMuted, + mediaRefs = mediaRefs + ) + ) + } + } + } + } + + val hydrateHasMediaToTweetData: TweetDataValueHydrator = + TweetHydration.setOnTweetData( + TweetData.Lenses.tweet.andThen(TweetLenses.hasMedia), + (td: TweetData, opts: TweetQuery.Options) => td.tweet, + hydrateHasMedia + ) + + val hydrateReportedTweetVisibilityToTweetData: TweetDataValueHydrator = { + // Create a TweetDataValueHydrator that calls hydrateReportedTweetVisibility, which + // either throws a FilteredState.Unavailable or returns Unit. + ValueHydrator[TweetData, TweetQuery.Options] { (td, opts) => + val ctx = ReportedTweetFilter.Ctx(td.tweet.perspective, TweetCtx.from(td, opts)) + hydrateReportedTweetVisibility((), ctx).map { _ => + ValueState.unmodified(td) + } + } + } + + val hydrateTweetVisibilityToTweetData: TweetDataValueHydrator = + TweetHydration.setOnTweetData( + TweetData.Lenses.suppress, + (td: TweetData, opts: TweetQuery.Options) => + TweetVisibilityHydrator.Ctx(td.tweet, TweetCtx.from(td, opts)), + hydrateTweetVisibility + ) + + val hydrateEscherbirdAnnotationsToTweetAndCachedTweet: TweetDataValueHydrator = + TweetHydration.setOnTweetAndCachedTweet( + TweetLenses.escherbirdEntityAnnotations, + (td: TweetData, _: TweetQuery.Options) => td.tweet, + hydrateEscherbirdAnnotations + ) + + val scrubEngagements: TweetDataValueHydrator = + TweetHydration.setOnTweetData( + TweetData.Lenses.tweetCounts, + (td: TweetData, _: TweetQuery.Options) => ScrubEngagementHydrator.Ctx(td.suppress), + hydrateScrubEngagements + ) + + /** + * This is where we wire up all the separate hydrators into a single [[TweetDataValueHydrator]]. + * + * Each hydrator here is either a [[TweetDataValueHydrator]] or a [[TweetDataEditHydrator]]. + * We use [[EditHydrator]]s for anything that needs to run in parallel ([[ValueHydrator]]s can + * only be run in sequence). + */ + ValueHydrator.inSequence( + // Hydrate FeatureSwitchResults first, so they can be used by other hydrators if needed + hydrateFeatureSwitchResults, + EditHydrator + .inParallel( + ValueHydrator + .inSequence( + // The result of running these hydrators is saved as `cacheableTweetResult` and + // written back to cache via `cacheChangesEffect` in `hydrateRepo` + TweetHydration.captureCacheableTweetResult( + hydrateCacheableFields + ), + // Uncacheable hydrators that depend only on the cacheable fields + hydrateUncacheableMedia, + // clean-up partially hydrated entities before any of the hydrators that look at + // url and media entities run, so that they never see bad entities. + hydratePostCacheRepairs, + // These hydrators are all dependent on each other and/or the previous hydrators + hydrateDependentUncacheableFields, + // Sets `hasMedia`. Comes after PastedMediaHydrator in order to include pasted + // pics as well as other media & urls. + hydrateHasMediaToTweetData + ) + .toEditHydrator, + // These hydrators do not rely on any other hydrators and so can be run in parallel + // with the above hydrators (and with each other) + hydrateIndependentUncacheableFields + ) + .toValueHydrator, + // Depends on reported perspectival having been hydrated in PerspectiveHydrator + hydrateReportedTweetVisibilityToTweetData, + // Remove superfluous urls entities when there is a corresponding MediaEntity for the same url + scrubSuperfluousUrlEntities, + // The copyFromSourceTweet hydrator needs to be located after the hydrators that produce the + // fields to copy. It must be located after PartialEntityCleaner (part of postCacheRepairs), + // which removes failed MediaEntities. It also depends on takedownCountryCodes having been + // hydrated in TakedownHydrator. + copyFromSourceTweet, + // depends on AdditionalFieldsHydrator and CopyFromSourceTweet to copy safety labels + hydrateTweetVisibilityToTweetData, + // for IPI'd tweets, we want to disable tweet engagement counts from being returned + // StatusCounts for replyCount, retweetCount. + // scrubEngagements hydrator must come after tweet visibility hydrator. + // tweet visibility hydrator emits the suppressed FilteredState needed for scrubbing. + scrubEngagements, + // this hydrator runs when writing the current tweet + // Escherbird comes last in order to consume a tweet that's as close as possible + // to the tweet written to tweet_events + hydrateEscherbirdAnnotationsToTweetAndCachedTweet + .onlyIf((td, opts) => opts.cause.writing(td.tweet.id)), + // Add an ellipsis to the end of the text for a Tweet that has a NoteTweet associated. + // This is so that the Tweet is displayed on the home timeline with an ellipsis, letting + // the User know that there's more to see. + hydrateNoteTweetSuffix, + /** + * Post-cache repair of QT text and entities to support rendering on all clients + * Moving this to end of the pipeline to avoid/minimize chance of following hydrators + * depending on modified tweet text or entities. + * When we start persisting shortUrl in MH - permalink won't be empty. therefore, + * we won't run QuotedTweetRefHydrator and just hydrate expanded and display + * using QuotedTweetRefUrlsHydrator. We will use hydrated permalink to repair + * QT text and entities for non-upgraded clients in this step. + * */ + hydrateTweetLegacyFormat + ) + } + + /** + * Returns a new hydrator that takes the produced result, and captures the result value + * in the `cacheableTweetResult` field of the enclosed `TweetData`. + */ + def captureCacheableTweetResult(h: TweetDataValueHydrator): TweetDataValueHydrator = + ValueHydrator[TweetData, TweetQuery.Options] { (td, opts) => + h(td, opts).map { v => + // In addition to saving off a copy of ValueState, make sure that the TweetData inside + // the ValueState has its "completedHydrations" set to the ValueState.HydrationStates's + // completedHydrations. This is used when converting to a CachedTweet. + v.map { td => + td.copy( + cacheableTweetResult = Some(v.map(_.addHydrated(v.state.completedHydrations))) + ) + } + } + } + + /** + * Takes a ValueHydrator and a Lens and returns a `TweetDataValueHydrator` that does three things: + * + * 1. Runs the ValueHydrator on the lensed value + * 2. Saves the result back to the main tweet using the lens + * 3. Saves the result back to the tweet in cacheableTweetResult using the lens + */ + def setOnTweetAndCachedTweet[A, C]( + l: Lens[Tweet, A], + mkCtx: (TweetData, TweetQuery.Options) => C, + h: ValueHydrator[A, C] + ): TweetDataValueHydrator = { + // A lens that goes from TweetData -> tweet -> l + val tweetDataLens = TweetData.Lenses.tweet.andThen(l) + + // A lens that goes from TweetData -> cacheableTweetResult -> tweet -> l + val cachedTweetLens = + TweetLenses + .requireSome(TweetData.Lenses.cacheableTweetResult) + .andThen(TweetResult.Lenses.tweet) + .andThen(l) + + ValueHydrator[TweetData, TweetQuery.Options] { (td, opts) => + h.run(tweetDataLens.get(td), mkCtx(td, opts)).map { r => + if (r.state.isEmpty) { + ValueState.unmodified(td) + } else { + r.map { v => Lens.setAll(td, tweetDataLens -> v, cachedTweetLens -> v) } + } + } + } + } + + /** + * Creates a `TweetDataValueHydrator` that hydrates a lensed value, overwriting + * the existing value. + */ + def setOnTweetData[A, C]( + lens: Lens[TweetData, A], + mkCtx: (TweetData, TweetQuery.Options) => C, + h: ValueHydrator[A, C] + ): TweetDataValueHydrator = + ValueHydrator[TweetData, TweetQuery.Options] { (td, opts) => + h.run(lens.get(td), mkCtx(td, opts)).map { r => + if (r.state.isEmpty) ValueState.unmodified(td) else r.map(lens.set(td, _)) + } + } + + /** + * Produces an [[Effect]] that can be applied to a [[TweetDataValueHydrator]] to write updated + * values back to cache. + */ + def cacheChanges( + cache: LockingCache[TweetId, Cached[TweetData]], + stats: StatsReceiver + ): Effect[ValueState[TweetData]] = { + val updatedCounter = stats.counter("updated") + val unchangedCounter = stats.counter("unchanged") + val picker = new TweetRepoCachePicker[TweetData](_.cachedAt) + val cacheErrorCounter = stats.counter("cache_error") + val missingCacheableResultCounter = stats.counter("missing_cacheable_result") + + Effect[TweetResult] { result => + // cacheErrorEncountered will never be set on `cacheableTweetResult`, so we need to + // look at the outer tweet state. + val cacheErrorEncountered = result.state.cacheErrorEncountered + + result.value.cacheableTweetResult match { + case Some(ValueState(td, state)) if state.modified && !cacheErrorEncountered => + val tweetData = td.addHydrated(state.completedHydrations) + val now = Time.now + val cached = Cached(Some(tweetData), CachedValueStatus.Found, now, Some(now)) + val handler = LockingCache.PickingHandler(cached, picker) + + updatedCounter.incr() + cache.lockAndSet(tweetData.tweet.id, handler) + + case Some(ValueState(_, _)) if cacheErrorEncountered => + cacheErrorCounter.incr() + + case None => + missingCacheableResultCounter.incr() + + case _ => + unchangedCounter.incr() + } + } + } + + /** + * Wraps a hydrator with a check such that it only executes the hydrator if `queryFilter` + * returns true for the `TweetQuery.Option` in the `Ctx` value, and the specified + * `HydrationType` is not already marked as having been completed in + * `ctx.tweetData.completedHydrations`. If these conditions pass, and the underlying + * hydrator is executed, and the result does not contain a field-level or total failure, + * then the resulting `HydrationState` is updated to indicate that the specified + * `HydrationType` has been completed. + */ + def completeOnlyOnce[A, C <: TweetCtx]( + queryFilter: TweetQuery.Options => Boolean = _ => true, + hydrationType: HydrationType, + dependsOn: Set[HydrationType] = Set.empty, + hydrator: ValueHydrator[A, C] + ): ValueHydrator[A, C] = { + val completedState = HydrationState.modified(hydrationType) + + ValueHydrator[A, C] { (a, ctx) => + hydrator(a, ctx).map { res => + if (res.state.failedFields.isEmpty && + dependsOn.forall(ctx.completedHydrations.contains)) { + // successful result! + if (!ctx.completedHydrations.contains(hydrationType)) { + res.copy(state = res.state ++ completedState) + } else { + // forced rehydration - don't add hydrationType or change modified flag + res + } + } else { + // hydration failed or not all dependencies satisfied so don't mark as complete + res + } + } + }.onlyIf { (a, ctx) => + queryFilter(ctx.opts) && + (!ctx.completedHydrations.contains(hydrationType)) + } + } + + /** + * Applies a `TweetDataValueHydrator` to a `TweetRepository.Type`-typed repository. + * The incoming `TweetQuery.Options` are first expanded using `optionsExpander`, and the + * resulting options passed to `repo` and `hydrator`. The resulting tweet result + * objects are passed to `cacheChangesEffect` for possible write-back to cache. Finally, + * the tweets are scrubbed according to the original input `TweetQuery.Options`. + */ + def hydrateRepo( + hydrator: TweetDataValueHydrator, + cacheChangesEffect: Effect[TweetResult], + optionsExpander: TweetQueryOptionsExpander.Type + )( + repo: TweetResultRepository.Type + ): TweetResultRepository.Type = + (tweetId: TweetId, originalOpts: TweetQuery.Options) => { + val expandedOpts = optionsExpander(originalOpts) + + for { + repoResult <- repo(tweetId, expandedOpts) + hydratorResult <- hydrator(repoResult.value, expandedOpts) + } yield { + val hydratingRepoResult = + TweetResult(hydratorResult.value, repoResult.state ++ hydratorResult.state) + + if (originalOpts.cacheControl.writeToCache) { + cacheChangesEffect(hydratingRepoResult) + } + + UnrequestedFieldScrubber(originalOpts).scrub(hydratingRepoResult) + } + } + + /** + * A trivial wrapper around a Stitch[_] to provide a `joinWith` + * method that lets us choose the precedence of exceptions. + * + * This wrapper is useful for the case in which it's important that + * we specify which of the two exceptions wins (such as visibility + * filtering). + * + * Since this is an [[AnyVal]], using this is no more expensive than + * inlining the joinWith method. + */ + // exposed for testing + case class StitchExceptionPrecedence[A](toStitch: Stitch[A]) extends AnyVal { + + /** + * Concurrently evaluate two Stitch[_] values. This is different + * from Stitch.join in that any exception from the expression on + * the left hand side will take precedence over an exception on + * the right hand side. This means that an exception from the + * right-hand side will not short-circuit evaluation, but an + * exception on the left-hand side *will* short-circuit. This is + * desirable because it allows us to return the failure with as + * little latency as possible. (Compare to lifting *both* to Try, + * which would force us to wait for both computations to complete + * before returning, even if the one with the higher precedence is + * already known to be an exception.) + */ + def joinWith[B, C](rhs: Stitch[B])(f: (A, B) => C): StitchExceptionPrecedence[C] = + StitchExceptionPrecedence { + Stitch + .joinMap(toStitch, rhs.liftToTry) { (a, tryB) => tryB.map(b => f(a, b)) } + .lowerFromTry + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetLegacyFormatter.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetLegacyFormatter.scala new file mode 100644 index 000000000..adadcefd0 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetLegacyFormatter.scala @@ -0,0 +1,330 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.media.Media +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.serverutil.ExtendedTweetMetadataBuilder +import com.twitter.tweetypie.thriftscala.UrlEntity +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.thriftscala.entities.Implicits._ +import com.twitter.tweetypie.tweettext.Offset +import com.twitter.tweetypie.tweettext.TextModification +import com.twitter.tweetypie.tweettext.TweetText +import com.twitter.tweetypie.util.EditControlUtil +import com.twitter.tweetypie.util.TweetLenses + +/** + * This hydrator is the backwards-compatibility layer to support QT, Edit Tweets & Mixed Media + * Tweets rendering on legacy non-updated clients. Legacy rendering provides a way for every client + * to consume these Tweets until the client is upgraded. For Edit and Mixed Media Tweets, the + * Tweet's self-permalink is appended to the visible text. For Quoting Tweets, the Quoted Tweet's + * permalink is appended to the text. For Tweets that meet multiple criteria for legacy rendering + * (e.g. QT containing Mixed Media), only one permalink is appended and the self-permalink takes + * precedence. + */ +object TweetLegacyFormatter { + + private[this] val log = Logger(getClass) + + import TweetText._ + + def legacyQtPermalink( + td: TweetData, + opts: TweetQuery.Options + ): Option[ShortenedUrl] = { + val tweet = td.tweet + val tweetText = TweetLenses.text(tweet) + val urls = TweetLenses.urls(tweet) + val ctx = TweetCtx.from(td, opts) + val qtPermalink: Option[ShortenedUrl] = tweet.quotedTweet.flatMap(_.permalink) + val qtShortUrl = qtPermalink.map(_.shortUrl) + + def urlsContains(url: String): Boolean = + urls.exists(_.url == url) + + val doLegacyQtFormatting = + !opts.simpleQuotedTweet && !ctx.isRetweet && + qtPermalink.isDefined && qtShortUrl.isDefined && + !qtShortUrl.exists(tweetText.contains) && + !qtShortUrl.exists(urlsContains) + + if (doLegacyQtFormatting) qtPermalink else None + } + + def legacySelfPermalink( + td: TweetData + ): Option[ShortenedUrl] = { + val tweet = td.tweet + val selfPermalink = tweet.selfPermalink + val tweetText = TweetLenses.text(tweet) + val urls = TweetLenses.urls(tweet) + val selfShortUrl = selfPermalink.map(_.shortUrl) + + def urlsContains(url: String): Boolean = + urls.exists(_.url == url) + + val doLegacyFormatting = + selfPermalink.isDefined && selfShortUrl.isDefined && + !selfShortUrl.exists(tweetText.contains) && + !selfShortUrl.exists(urlsContains) && + needsLegacyFormatting(td) + + if (doLegacyFormatting) selfPermalink else None + } + + def isMixedMediaTweet(tweet: Tweet): Boolean = + tweet.media.exists(Media.isMixedMedia) + + def buildUrlEntity(from: Short, to: Short, permalink: ShortenedUrl): UrlEntity = + UrlEntity( + fromIndex = from, + toIndex = to, + url = permalink.shortUrl, + expanded = Some(permalink.longUrl), + display = Some(permalink.displayText) + ) + + private[this] def isValidVisibleRange( + tweetIdForLogging: TweetId, + textRange: TextRange, + textLength: Int + ) = { + val isValid = textRange.fromIndex <= textRange.toIndex && textRange.toIndex <= textLength + if (!isValid) { + log.warn(s"Tweet $tweetIdForLogging has invalid visibleTextRange: $textRange") + } + isValid + } + + // This Function checks if legacy formatting is required for Edit & Mixed Media Tweets. + // Calls FeatureSwitches.matchRecipient which is an expensive call, + // so caution is taken to call it only once and only when needed. + def needsLegacyFormatting( + td: TweetData + ): Boolean = { + val isEdit = EditControlUtil.isEditTweet(td.tweet) + val isMixedMedia = isMixedMediaTweet(td.tweet) + val isNoteTweet = td.tweet.noteTweet.isDefined + + if (isEdit || isMixedMedia || isNoteTweet) { + + // These feature switches are disabled unless greater than certain android, ios versions + // & all versions of RWEB. + val TweetEditConsumptionEnabledKey = "tweet_edit_consumption_enabled" + val MixedMediaEnabledKey = "mixed_media_enabled" + val NoteTweetConsumptionEnabledKey = "note_tweet_consumption_enabled" + + def fsEnabled(fsKey: String): Boolean = { + td.featureSwitchResults + .flatMap(_.getBoolean(fsKey, shouldLogImpression = false)) + .getOrElse(false) + } + + val tweetEditConsumptionEnabled = fsEnabled(TweetEditConsumptionEnabledKey) + val mixedMediaEnabled = fsEnabled(MixedMediaEnabledKey) + val noteTweetConsumptionEnabled = fsEnabled(NoteTweetConsumptionEnabledKey) + + (isEdit && !tweetEditConsumptionEnabled) || + (isMixedMedia && !mixedMediaEnabled) || + (isNoteTweet && !noteTweetConsumptionEnabled) + } else { + false + } + } + + //given a permalink, the tweet text gets updated + def updateTextAndURLsAndMedia( + permalink: ShortenedUrl, + tweet: Tweet, + statsReceiver: StatsReceiver + ): Tweet = { + + val originalText = TweetLenses.text(tweet) + val originalTextLength = codePointLength(originalText) + + // Default the visible range to the whole tweet if the existing visible range is invalid. + val visibleRange: TextRange = + TweetLenses + .visibleTextRange(tweet) + .filter((r: TextRange) => isValidVisibleRange(tweet.id, r, originalTextLength)) + .getOrElse(TextRange(0, originalTextLength)) + + val permalinkShortUrl = permalink.shortUrl + val insertAtCodePoint = Offset.CodePoint(visibleRange.toIndex) + + /* + * Insertion at position 0 implies that the original tweet text has no + * visible text, so the resulting text should be only the url without + * leading padding. + */ + val padLeft = if (insertAtCodePoint.toInt > 0) " " else "" + + /* + * Empty visible text at position 0 implies that the original tweet text + * only contains a URL in the hidden suffix area, which would not already + * be padded. + */ + val padRight = if (visibleRange == TextRange(0, 0)) " " else "" + val paddedShortUrl = s"$padLeft$permalinkShortUrl$padRight" + + val tweetTextModification = TextModification.insertAt( + originalText, + insertAtCodePoint, + paddedShortUrl + ) + + /* + * As we modified tweet text and appended tweet permalink above + * we have to correct the url and media entities accordingly as they are + * expected to be present in the hidden suffix of text. + * + * - we compute the new (from, to) indices for the url entity + * - build new url entity for quoted tweet permalink or self permalink for Edit/ MM Tweets + * - shift url entities which are after visible range end + * - shift media entities associated with above url entities + */ + val shortUrlLength = codePointLength(permalinkShortUrl) + val fromIndex = insertAtCodePoint.toInt + codePointLength(padLeft) + val toIndex = fromIndex + shortUrlLength + + val tweetUrlEntity = buildUrlEntity( + from = fromIndex.toShort, + to = toIndex.toShort, + permalink = permalink + ) + + val tweetMedia = if (isMixedMediaTweet(tweet)) { + TweetLenses.media(tweet).take(1) + } else { + TweetLenses.media(tweet) + } + + val modifiedMedia = tweetTextModification.reindexEntities(tweetMedia) + val modifiedUrls = + tweetTextModification.reindexEntities(TweetLenses.urls(tweet)) :+ tweetUrlEntity + val modifiedText = tweetTextModification.updated + + /* + * Visible Text Range computation differs by scenario + * == Any Tweet with Media == + * Tweet text has a media url *after* the visible text range + * original text: [visible text] https://t.co/mediaUrl + * original range: ^START END^ + * + * Append the permalink URL to the *visible text* so non-upgraded clients can see it + * modified text: [visible text https://t.co/permalink] https://t.co/mediaUrl + * modified range: ^START END^ + * visible range expanded, permalink is visible + * + * == Non-QT Tweet w/o Media == + * original text: [visible text] + * original range: None (default: whole text is visible) + * + * modified text: [visible text https://t.co/selfPermalink] + * modified range: None (default: whole text is visible) + * trailing self permalink will be visible + * + * == QT w/o Media == + * original text: [visible text] + * original range: None (default: whole text is visible) + * + * modified text: [visible text] https://t.co/qtPermalink + * modified range: ^START END^ + * trailing QT permalink is *hidden* because legacy clients that process the visible text range know how to display QTs + * + * == Non-QT Replies w/o media == + * original text: @user [visible text] + * original range: ^START END^ + * + * modified text: @user [visible text https://t.co/selfPermalink] + * modified range: ^START END^ + * visible range expanded, self permalink is visible + * + * == QT Replies w/o media == + * original text: @user [visible text] + * original range: ^START END^ + * + * modified text: @user [visible text] https://t.co/qtPermalink + * modified range: ^START END^ + * visible range remains the same, trailing QT permalink is hidden + * + */ + + val modifiedVisibleTextRange = + if (modifiedMedia.nonEmpty || + EditControlUtil.isEditTweet(tweet) || + tweet.noteTweet.isDefined) { + Some( + visibleRange.copy( + toIndex = visibleRange.toIndex + codePointLength(padLeft) + shortUrlLength + ) + ) + } else { + Some(visibleRange) + } + + val updatedTweet = + Lens.setAll( + tweet, + TweetLenses.text -> modifiedText, + TweetLenses.urls -> modifiedUrls.sortBy(_.fromIndex), + TweetLenses.media -> modifiedMedia.sortBy(_.fromIndex), + TweetLenses.visibleTextRange -> modifiedVisibleTextRange + ) + + /** + * compute extended tweet metadata when text length > 140 + * and apply the final lens to return a modified tweet + */ + val totalDisplayLength = displayLength(modifiedText) + if (totalDisplayLength > OriginalMaxDisplayLength) { + updatedTweet.selfPermalink match { + case Some(permalink) => + val extendedTweetMetadata = ExtendedTweetMetadataBuilder(updatedTweet, permalink) + updatedTweet.copy( + extendedTweetMetadata = Some(extendedTweetMetadata) + ) + case None => + /** + * This case shouldn't happen as TweetBuilder currently populates + * selfPermalink for extended tweets. In QT + Media, we will + * use AttachmentBuilder to store selfPermalink during writes, + * if text display length is going to exceed 140 after QT url append. + */ + log.error( + s"Failed to compute extended metadata for tweet: ${tweet.id} with " + + s"display length: ${totalDisplayLength}, as self-permalink is empty." + ) + statsReceiver.counter("self_permalink_not_found").incr() + tweet + } + } else { + updatedTweet + } + } + + def apply( + statsReceiver: StatsReceiver + ): TweetDataValueHydrator = { + ValueHydrator[TweetData, TweetQuery.Options] { (td, opts) => + // Prefer any required self permalink rendering over QT permalink rendering because a + // client that doesn't understand the attributes of the Tweet (i.e. Edit, Mixed + // Media) won't be able to render the Tweet properly at all, regardless of whether + // it's a QT. By preferring a visible self-permalink, the viewer is linked to an + // RWeb view of the Tweet which can fully display all of its features. + val permalink: Option[ShortenedUrl] = + legacySelfPermalink(td) + .orElse(legacyQtPermalink(td, opts)) + + permalink match { + case Some(permalink) => + val updatedTweet = updateTextAndURLsAndMedia(permalink, td.tweet, statsReceiver) + Stitch(ValueState.delta(td, td.copy(tweet = updatedTweet))) + case _ => + Stitch(ValueState.unmodified(td)) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetQueryOptionsExpander.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetQueryOptionsExpander.scala new file mode 100644 index 000000000..732b9c752 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetQueryOptionsExpander.scala @@ -0,0 +1,144 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.repository.TweetQuery + +/** + * An instance of `TweetQueryOptionsExpander.Type` can be used to take a `TweetQuery.Options` + * instance provided by a user, and expand the set of options included to take into account + * dependencies between fields and options. + */ +object TweetQueryOptionsExpander { + import TweetQuery._ + + /** + * Used by AdditionalFieldsHydrator, this function type can filter out or inject fieldIds to + * request from Manhattan per tweet. + */ + type Type = Options => Options + + /** + * The identity TweetQueryOptionsExpander, which passes through fieldIds unchanged. + */ + val unit: TweetQueryOptionsExpander.Type = identity + + case class Selector(f: Include => Boolean) { + def apply(i: Include): Boolean = f(i) + + def ||(other: Selector) = Selector(i => this(i) || other(i)) + } + + private def selectTweetField(fieldId: FieldId): Selector = + Selector(_.tweetFields.contains(fieldId)) + + private val firstOrderDependencies: Seq[(Selector, Include)] = + Seq( + selectTweetField(Tweet.MediaField.id) -> + Include(tweetFields = Set(Tweet.UrlsField.id, Tweet.MediaKeysField.id)), + selectTweetField(Tweet.QuotedTweetField.id) -> + Include(tweetFields = Set(Tweet.UrlsField.id)), + selectTweetField(Tweet.MediaRefsField.id) -> + Include(tweetFields = Set(Tweet.UrlsField.id, Tweet.MediaKeysField.id)), + selectTweetField(Tweet.CardsField.id) -> + Include(tweetFields = Set(Tweet.UrlsField.id)), + selectTweetField(Tweet.Card2Field.id) -> + Include(tweetFields = Set(Tweet.UrlsField.id, Tweet.CardReferenceField.id)), + selectTweetField(Tweet.CoreDataField.id) -> + Include(tweetFields = Set(Tweet.DirectedAtUserMetadataField.id)), + selectTweetField(Tweet.SelfThreadInfoField.id) -> + Include(tweetFields = Set(Tweet.CoreDataField.id)), + (selectTweetField(Tweet.TakedownCountryCodesField.id) || + selectTweetField(Tweet.TakedownReasonsField.id)) -> + Include( + tweetFields = Set( + Tweet.TweetypieOnlyTakedownCountryCodesField.id, + Tweet.TweetypieOnlyTakedownReasonsField.id + ) + ), + selectTweetField(Tweet.EditPerspectiveField.id) -> + Include(tweetFields = Set(Tweet.PerspectiveField.id)), + Selector(_.quotedTweet) -> + Include(tweetFields = Set(Tweet.QuotedTweetField.id)), + // asking for any count implies getting the Tweet.counts field + Selector(_.countsFields.nonEmpty) -> + Include(tweetFields = Set(Tweet.CountsField.id)), + // asking for any media field implies getting the Tweet.media field + Selector(_.mediaFields.nonEmpty) -> + Include(tweetFields = Set(Tweet.MediaField.id)), + selectTweetField(Tweet.UnmentionDataField.id) -> + Include(tweetFields = Set(Tweet.MentionsField.id)), + ) + + private val allDependencies = + firstOrderDependencies.map { + case (sel, inc) => sel -> transitiveExpand(inc) + } + + private def transitiveExpand(inc: Include): Include = + firstOrderDependencies.foldLeft(inc) { + case (z, (selector, include)) => + if (!selector(z)) z + else z ++ include ++ transitiveExpand(include) + } + + /** + * Sequentially composes multiple TweetQueryOptionsExpander into a new TweetQueryOptionsExpander + */ + def sequentially(updaters: TweetQueryOptionsExpander.Type*): TweetQueryOptionsExpander.Type = + options => + updaters.foldLeft(options) { + case (options, updater) => updater(options) + } + + /** + * For requested fields that depend on other fields being present for correct hydration, + * returns an updated `TweetQuery.Options` with those dependee fields included. + */ + def expandDependencies: TweetQueryOptionsExpander.Type = + options => + options.copy( + include = allDependencies.foldLeft(options.include) { + case (z, (selector, include)) => + if (!selector(options.include)) z + else z ++ include + } + ) + + /** + * If the gate is true, add 'fields' to the list of tweetFields to load. + */ + def gatedTweetFieldUpdater( + gate: Gate[Unit], + fields: Seq[FieldId] + ): TweetQueryOptionsExpander.Type = + options => + if (gate()) { + options.copy( + include = options.include.also(tweetFields = fields) + ) + } else { + options + } + + /** + * Uses a `ThreadLocal` to remember the last expansion performed, and to reuse the + * previous result if the input value is the same. This is useful to avoid repeatedly + * computing the expansion of the same input when multiple tweets are queried together + * with the same options. + */ + def threadLocalMemoize(expander: Type): Type = { + val memo: ThreadLocal[Option[(Options, Options)]] = + new ThreadLocal[Option[(Options, Options)]] { + override def initialValue(): None.type = None + } + + options => + memo.get() match { + case Some((`options`, res)) => res + case _ => + val res = expander(options) + memo.set(Some((options, res))) + res + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetVisibilityHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetVisibilityHydrator.scala new file mode 100644 index 000000000..9d05fbf8e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetVisibilityHydrator.scala @@ -0,0 +1,66 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.util.CommunityUtil + +object TweetVisibilityHydrator { + type Type = ValueHydrator[Option[FilteredState.Suppress], Ctx] + + case class Ctx(tweet: Tweet, underlyingTweetCtx: TweetCtx) extends TweetCtx.Proxy + + def apply( + repo: TweetVisibilityRepository.Type, + failClosedInVF: Gate[Unit], + stats: StatsReceiver + ): Type = { + val outcomeScope = stats.scope("outcome") + val unavailable = outcomeScope.counter("unavailable") + val suppress = outcomeScope.counter("suppress") + val allow = outcomeScope.counter("allow") + val failClosed = outcomeScope.counter("fail_closed") + val communityFailClosed = outcomeScope.counter("community_fail_closed") + val failOpen = outcomeScope.counter("fail_open") + + ValueHydrator[Option[FilteredState.Suppress], Ctx] { (curr, ctx) => + val request = TweetVisibilityRepository.Request( + tweet = ctx.tweet, + viewerId = ctx.opts.forUserId, + safetyLevel = ctx.opts.safetyLevel, + isInnerQuotedTweet = ctx.opts.isInnerQuotedTweet, + isRetweet = ctx.isRetweet, + hydrateConversationControl = ctx.tweetFieldRequested(Tweet.ConversationControlField), + isSourceTweet = ctx.opts.isSourceTweet + ) + + repo(request).liftToTry.flatMap { + // If FilteredState.Unavailable is returned from repo then throw it + case Return(Some(fs: FilteredState.Unavailable)) => + unavailable.incr() + Stitch.exception(fs) + // If FilteredState.Suppress is returned from repo then return it + case Return(Some(fs: FilteredState.Suppress)) => + suppress.incr() + Stitch.value(ValueState.modified(Some(fs))) + // If None is returned from repo then return unmodified + case Return(None) => + allow.incr() + ValueState.StitchUnmodifiedNone + // Propagate thrown exceptions if fail closed + case Throw(e) if failClosedInVF() => + failClosed.incr() + Stitch.exception(e) + // Community tweets are special cased to fail closed to avoid + // leaking tweets expected to be private to a community. + case Throw(e) if CommunityUtil.hasCommunity(request.tweet.communities) => + communityFailClosed.incr() + Stitch.exception(e) + case Throw(_) => + failOpen.incr() + Stitch.value(ValueState.unmodified(curr)) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/UnmentionDataHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/UnmentionDataHydrator.scala new file mode 100644 index 000000000..dd6b1ee91 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/UnmentionDataHydrator.scala @@ -0,0 +1,28 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.thriftscala.MentionEntity +import com.twitter.tweetypie.unmentions.thriftscala.UnmentionData + +object UnmentionDataHydrator { + type Type = ValueHydrator[Option[UnmentionData], Ctx] + + case class Ctx( + conversationId: Option[TweetId], + mentions: Seq[MentionEntity], + underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy + + def apply(): Type = { + ValueHydrator.map[Option[UnmentionData], Ctx] { (_, ctx) => + val mentionedUserIds: Seq[UserId] = ctx.mentions.flatMap(_.userId) + + ValueState.modified( + Some(UnmentionData(ctx.conversationId, Option(mentionedUserIds).filter(_.nonEmpty))) + ) + } + }.onlyIf { (_, ctx) => + ctx.tweetFieldRequested(Tweet.UnmentionDataField) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/UnrequestedFieldScrubber.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/UnrequestedFieldScrubber.scala new file mode 100644 index 000000000..1f69b7ecd --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/UnrequestedFieldScrubber.scala @@ -0,0 +1,211 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.additionalfields.AdditionalFields +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.thriftscala._ + +/** + * A hydrator that scrubs tweet fields that weren't requested. Those fields might be + * present because they were previously requested and were cached with the tweet. + */ +trait UnrequestedFieldScrubber { + def scrub(tweetResult: TweetResult): TweetResult + def scrub(tweetData: TweetData): TweetData + def scrub(tweet: Tweet): Tweet +} + +object UnrequestedFieldScrubber { + def apply(options: TweetQuery.Options): UnrequestedFieldScrubber = + if (!options.scrubUnrequestedFields) NullScrubber + else new ScrubberImpl(options.include) + + private object NullScrubber extends UnrequestedFieldScrubber { + def scrub(tweetResult: TweetResult): TweetResult = tweetResult + def scrub(tweetData: TweetData): TweetData = tweetData + def scrub(tweet: Tweet): Tweet = tweet + } + + class ScrubberImpl(i: TweetQuery.Include) extends UnrequestedFieldScrubber { + def scrub(tweetResult: TweetResult): TweetResult = + tweetResult.map(scrub(_)) + + def scrub(tweetData: TweetData): TweetData = + tweetData.copy( + tweet = scrub(tweetData.tweet), + sourceTweetResult = tweetData.sourceTweetResult.map(scrub(_)), + quotedTweetResult = + if (!i.quotedTweet) None + else tweetData.quotedTweetResult.map(qtr => qtr.map(scrub)) + ) + + def scrub(tweet: Tweet): Tweet = { + val tweet2 = scrubKnownFields(tweet) + + val unhandledFields = i.tweetFields -- AdditionalFields.CompiledFieldIds + + if (unhandledFields.isEmpty) { + tweet2 + } else { + tweet2.unsetFields(unhandledFields) + } + } + + def scrubKnownFields(tweet: Tweet): Tweet = { + @inline + def filter[A](fieldId: FieldId, value: Option[A]): Option[A] = + if (i.tweetFields.contains(fieldId)) value else None + + tweet.copy( + coreData = filter(Tweet.CoreDataField.id, tweet.coreData), + urls = filter(Tweet.UrlsField.id, tweet.urls), + mentions = filter(Tweet.MentionsField.id, tweet.mentions), + hashtags = filter(Tweet.HashtagsField.id, tweet.hashtags), + cashtags = filter(Tweet.CashtagsField.id, tweet.cashtags), + media = filter(Tweet.MediaField.id, tweet.media), + place = filter(Tweet.PlaceField.id, tweet.place), + quotedTweet = filter(Tweet.QuotedTweetField.id, tweet.quotedTweet), + takedownCountryCodes = + filter(Tweet.TakedownCountryCodesField.id, tweet.takedownCountryCodes), + counts = filter(Tweet.CountsField.id, tweet.counts.map(scrub)), + deviceSource = filter(Tweet.DeviceSourceField.id, tweet.deviceSource), + perspective = filter(Tweet.PerspectiveField.id, tweet.perspective), + cards = filter(Tweet.CardsField.id, tweet.cards), + card2 = filter(Tweet.Card2Field.id, tweet.card2), + language = filter(Tweet.LanguageField.id, tweet.language), + spamLabels = None, // unused + contributor = filter(Tweet.ContributorField.id, tweet.contributor), + profileGeoEnrichment = + filter(Tweet.ProfileGeoEnrichmentField.id, tweet.profileGeoEnrichment), + conversationMuted = filter(Tweet.ConversationMutedField.id, tweet.conversationMuted), + takedownReasons = filter(Tweet.TakedownReasonsField.id, tweet.takedownReasons), + selfThreadInfo = filter(Tweet.SelfThreadInfoField.id, tweet.selfThreadInfo), + // additional fields + mediaTags = filter(Tweet.MediaTagsField.id, tweet.mediaTags), + schedulingInfo = filter(Tweet.SchedulingInfoField.id, tweet.schedulingInfo), + bindingValues = filter(Tweet.BindingValuesField.id, tweet.bindingValues), + replyAddresses = None, // unused + obsoleteTwitterSuggestInfo = None, // unused + escherbirdEntityAnnotations = + filter(Tweet.EscherbirdEntityAnnotationsField.id, tweet.escherbirdEntityAnnotations), + spamLabel = filter(Tweet.SpamLabelField.id, tweet.spamLabel), + abusiveLabel = filter(Tweet.AbusiveLabelField.id, tweet.abusiveLabel), + lowQualityLabel = filter(Tweet.LowQualityLabelField.id, tweet.lowQualityLabel), + nsfwHighPrecisionLabel = + filter(Tweet.NsfwHighPrecisionLabelField.id, tweet.nsfwHighPrecisionLabel), + nsfwHighRecallLabel = filter(Tweet.NsfwHighRecallLabelField.id, tweet.nsfwHighRecallLabel), + abusiveHighRecallLabel = + filter(Tweet.AbusiveHighRecallLabelField.id, tweet.abusiveHighRecallLabel), + lowQualityHighRecallLabel = + filter(Tweet.LowQualityHighRecallLabelField.id, tweet.lowQualityHighRecallLabel), + personaNonGrataLabel = + filter(Tweet.PersonaNonGrataLabelField.id, tweet.personaNonGrataLabel), + recommendationsLowQualityLabel = filter( + Tweet.RecommendationsLowQualityLabelField.id, + tweet.recommendationsLowQualityLabel + ), + experimentationLabel = + filter(Tweet.ExperimentationLabelField.id, tweet.experimentationLabel), + tweetLocationInfo = filter(Tweet.TweetLocationInfoField.id, tweet.tweetLocationInfo), + cardReference = filter(Tweet.CardReferenceField.id, tweet.cardReference), + supplementalLanguage = + filter(Tweet.SupplementalLanguageField.id, tweet.supplementalLanguage), + selfPermalink = filter(Tweet.SelfPermalinkField.id, tweet.selfPermalink), + extendedTweetMetadata = + filter(Tweet.ExtendedTweetMetadataField.id, tweet.extendedTweetMetadata), + communities = filter(Tweet.CommunitiesField.id, tweet.communities), + visibleTextRange = filter(Tweet.VisibleTextRangeField.id, tweet.visibleTextRange), + spamHighRecallLabel = filter(Tweet.SpamHighRecallLabelField.id, tweet.spamHighRecallLabel), + duplicateContentLabel = + filter(Tweet.DuplicateContentLabelField.id, tweet.duplicateContentLabel), + liveLowQualityLabel = filter(Tweet.LiveLowQualityLabelField.id, tweet.liveLowQualityLabel), + nsfaHighRecallLabel = filter(Tweet.NsfaHighRecallLabelField.id, tweet.nsfaHighRecallLabel), + pdnaLabel = filter(Tweet.PdnaLabelField.id, tweet.pdnaLabel), + searchBlacklistLabel = + filter(Tweet.SearchBlacklistLabelField.id, tweet.searchBlacklistLabel), + lowQualityMentionLabel = + filter(Tweet.LowQualityMentionLabelField.id, tweet.lowQualityMentionLabel), + bystanderAbusiveLabel = + filter(Tweet.BystanderAbusiveLabelField.id, tweet.bystanderAbusiveLabel), + automationHighRecallLabel = + filter(Tweet.AutomationHighRecallLabelField.id, tweet.automationHighRecallLabel), + goreAndViolenceLabel = + filter(Tweet.GoreAndViolenceLabelField.id, tweet.goreAndViolenceLabel), + untrustedUrlLabel = filter(Tweet.UntrustedUrlLabelField.id, tweet.untrustedUrlLabel), + goreAndViolenceHighRecallLabel = filter( + Tweet.GoreAndViolenceHighRecallLabelField.id, + tweet.goreAndViolenceHighRecallLabel + ), + nsfwVideoLabel = filter(Tweet.NsfwVideoLabelField.id, tweet.nsfwVideoLabel), + nsfwNearPerfectLabel = + filter(Tweet.NsfwNearPerfectLabelField.id, tweet.nsfwNearPerfectLabel), + automationLabel = filter(Tweet.AutomationLabelField.id, tweet.automationLabel), + nsfwCardImageLabel = filter(Tweet.NsfwCardImageLabelField.id, tweet.nsfwCardImageLabel), + duplicateMentionLabel = + filter(Tweet.DuplicateMentionLabelField.id, tweet.duplicateMentionLabel), + bounceLabel = filter(Tweet.BounceLabelField.id, tweet.bounceLabel), + selfThreadMetadata = filter(Tweet.SelfThreadMetadataField.id, tweet.selfThreadMetadata), + composerSource = filter(Tweet.ComposerSourceField.id, tweet.composerSource), + editControl = filter(Tweet.EditControlField.id, tweet.editControl), + developerBuiltCardId = filter( + Tweet.DeveloperBuiltCardIdField.id, + tweet.developerBuiltCardId + ), + creativeEntityEnrichmentsForTweet = filter( + Tweet.CreativeEntityEnrichmentsForTweetField.id, + tweet.creativeEntityEnrichmentsForTweet + ), + previousCounts = filter(Tweet.PreviousCountsField.id, tweet.previousCounts), + mediaRefs = filter(Tweet.MediaRefsField.id, tweet.mediaRefs), + isCreativesContainerBackendTweet = filter( + Tweet.IsCreativesContainerBackendTweetField.id, + tweet.isCreativesContainerBackendTweet), + editPerspective = filter(Tweet.EditPerspectiveField.id, tweet.editPerspective), + noteTweet = filter(Tweet.NoteTweetField.id, tweet.noteTweet), + + // tweetypie-internal metadata + directedAtUserMetadata = + filter(Tweet.DirectedAtUserMetadataField.id, tweet.directedAtUserMetadata), + tweetypieOnlyTakedownReasons = + filter(Tweet.TweetypieOnlyTakedownReasonsField.id, tweet.tweetypieOnlyTakedownReasons), + mediaKeys = filter(Tweet.MediaKeysField.id, tweet.mediaKeys), + tweetypieOnlyTakedownCountryCodes = filter( + Tweet.TweetypieOnlyTakedownCountryCodesField.id, + tweet.tweetypieOnlyTakedownCountryCodes + ), + underlyingCreativesContainerId = filter( + Tweet.UnderlyingCreativesContainerIdField.id, + tweet.underlyingCreativesContainerId), + unmentionData = filter(Tweet.UnmentionDataField.id, tweet.unmentionData), + blockingUnmentions = filter(Tweet.BlockingUnmentionsField.id, tweet.blockingUnmentions), + settingsUnmentions = filter(Tweet.SettingsUnmentionsField.id, tweet.settingsUnmentions) + ) + } + + def scrub(counts: StatusCounts): StatusCounts = { + @inline + def filter[A](fieldId: FieldId, value: Option[A]): Option[A] = + if (i.countsFields.contains(fieldId)) value else None + + StatusCounts( + replyCount = filter(StatusCounts.ReplyCountField.id, counts.replyCount), + favoriteCount = filter(StatusCounts.FavoriteCountField.id, counts.favoriteCount), + retweetCount = filter(StatusCounts.RetweetCountField.id, counts.retweetCount), + quoteCount = filter(StatusCounts.QuoteCountField.id, counts.quoteCount), + bookmarkCount = filter(StatusCounts.BookmarkCountField.id, counts.bookmarkCount) + ) + } + + def scrub(media: MediaEntity): MediaEntity = { + @inline + def filter[A](fieldId: FieldId, value: Option[A]): Option[A] = + if (i.mediaFields.contains(fieldId)) value else None + + media.copy( + additionalMetadata = + filter(MediaEntity.AdditionalMetadataField.id, media.additionalMetadata) + ) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/UrlEntityHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/UrlEntityHydrator.scala new file mode 100644 index 000000000..9ffdf0139 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/UrlEntityHydrator.scala @@ -0,0 +1,122 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tco_util.DisplayUrl +import com.twitter.tco_util.InvalidUrlException +import com.twitter.tco_util.TcoSlug +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ +import scala.util.control.NonFatal + +object UrlEntitiesHydrator { + type Type = ValueHydrator[Seq[UrlEntity], TweetCtx] + + def once(h: ValueHydrator[UrlEntity, TweetCtx]): Type = + TweetHydration.completeOnlyOnce( + queryFilter = queryFilter, + hydrationType = HydrationType.Urls, + hydrator = h.liftSeq + ) + + def queryFilter(opts: TweetQuery.Options): Boolean = + opts.include.tweetFields.contains(Tweet.UrlsField.id) +} + +/** + * Hydrates UrlEntities. If there is a failure to hydrate an entity, the entity is left + * unhydrated, so that we can try again later. The PartialEntityCleaner will remove + * the partial entity before returning to clients. + */ +object UrlEntityHydrator { + + /** + * a function type that takes a shorten-url and an expanded-url, and generates a + * "display url" (which isn't really a url). this may fail if the expanded-url + * can't be parsed as a valid url, in which case None is returned. + */ + type Truncator = (String, String) => Option[String] + + val hydratedField: FieldByPath = fieldByPath(Tweet.UrlsField) + val log: Logger = Logger(getClass) + + def apply(repo: UrlRepository.Type, stats: StatsReceiver): ValueHydrator[UrlEntity, TweetCtx] = { + val toDisplayUrl = truncator(stats) + + ValueHydrator[UrlEntity, TweetCtx] { (curr, _) => + val slug = getTcoSlug(curr) + + val result: Stitch[Option[Try[ExpandedUrl]]] = Stitch.collect(slug.map(repo(_).liftToTry)) + + result.map { + case Some(Return(expandedUrl)) => + ValueState.modified(update(curr, expandedUrl, toDisplayUrl)) + + case None => + ValueState.unmodified(curr) + + case Some(Throw(NotFound)) => + // If the UrlEntity contains an invalid t.co slug that can't be resolved, + // leave the entity unhydrated, to be removed later by the PartialEntityCleaner. + // We don't consider this a partial because the input is invalid and is not + // expected to succeed. + ValueState.unmodified(curr) + + case Some(Throw(_)) => + // On failure, use the t.co link as the expanded url so that it is still clickable, + // but also still flag the failure + ValueState.partial( + update(curr, ExpandedUrl(curr.url), toDisplayUrl), + hydratedField + ) + } + }.onlyIf((curr, ctx) => !ctx.isRetweet && isUnhydrated(curr)) + } + + /** + * a UrlEntity needs hydration if the expanded url is either unset or set to the + * shortened url . + */ + def isUnhydrated(entity: UrlEntity): Boolean = + entity.expanded.isEmpty || hydrationFailed(entity) + + /** + * Did the hydration of this URL entity fail? + */ + def hydrationFailed(entity: UrlEntity): Boolean = + entity.expanded.contains(entity.url) + + def update(entity: UrlEntity, expandedUrl: ExpandedUrl, toDisplayUrl: Truncator): UrlEntity = + entity.copy( + expanded = Some(expandedUrl.text), + display = toDisplayUrl(entity.url, expandedUrl.text) + ) + + def getTcoSlug(entity: UrlEntity): Option[UrlSlug] = + TcoSlug.unapply(entity.url).map(UrlSlug(_)) + + def truncator(stats: StatsReceiver): Truncator = { + val truncationStats = stats.scope("truncations") + val truncationsCounter = truncationStats.counter("count") + val truncationExceptionsCounter = truncationStats.counter("exceptions") + + (shortUrl, expandedUrl) => + try { + truncationsCounter.incr() + Some(DisplayUrl(shortUrl, Some(expandedUrl), true)) + } catch { + case NonFatal(ex) => + truncationExceptionsCounter.incr() + truncationStats.counter(ex.getClass.getName).incr() + ex match { + case InvalidUrlException(_) => + log.warn(s"failed to truncate: `$shortUrl` / `$expandedUrl`") + case _ => + log.warn(s"failed to truncate: `$shortUrl` / `$expandedUrl`", ex) + } + None + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ValueHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ValueHydrator.scala new file mode 100644 index 000000000..0504d7429 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ValueHydrator.scala @@ -0,0 +1,200 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.servo.util.ExceptionCounter +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.EditState +import com.twitter.tweetypie.core.ValueState +import com.twitter.util.Try + +/** + * A ValueHydrator hydrates a value of type `A`, with a hydration context of type `C`, + * and produces a value of type ValueState[A] (ValueState encapsulates the value and + * its associated HydrationState). + * + * Because ValueHydrators take a value and produce a new value, they can easily be run + * in sequence, but not in parallel. To run hydrators in parallel, see [[EditHydrator]]. + * + * A series of ValueHydrators of the same type may be run in sequence via + * `ValueHydrator.inSequence`. + * + */ +class ValueHydrator[A, C] private (val run: (A, C) => Stitch[ValueState[A]]) { + + /** + * Apply this hydrator to a value, producing a ValueState. + */ + def apply(a: A, ctx: C): Stitch[ValueState[A]] = run(a, ctx) + + /** + * Apply with an empty context: only used in tests. + */ + def apply(a: A)(implicit ev: Unit <:< C): Stitch[ValueState[A]] = + apply(a, ev(())) + + /** + * Convert this ValueHydrator to the equivalent EditHydrator. + */ + def toEditHydrator: EditHydrator[A, C] = + EditHydrator[A, C] { (a, ctx) => this.run(a, ctx).map(value => EditState(_ => value)) } + + /** + * Chains two ValueHydrators in sequence. + */ + def andThen(next: ValueHydrator[A, C]): ValueHydrator[A, C] = + ValueHydrator[A, C] { (x0, ctx) => + for { + r1 <- run(x0, ctx) + r2 <- next.run(r1.value, ctx) + } yield { + ValueState(r2.value, r1.state ++ r2.state) + } + } + + /** + * Executes this ValueHydrator conditionally based on a Gate. + */ + def ifEnabled(gate: Gate[Unit]): ValueHydrator[A, C] = + onlyIf((_, _) => gate()) + + /** + * Executes this ValueHydrator conditionally based on a boolean function. + */ + def onlyIf(cond: (A, C) => Boolean): ValueHydrator[A, C] = + ValueHydrator { (a, c) => + if (cond(a, c)) { + run(a, c) + } else { + Stitch.value(ValueState.unit(a)) + } + } + + /** + * Converts a ValueHydrator of input type `A` to input type `Option[A]`. + */ + def liftOption: ValueHydrator[Option[A], C] = + liftOption(None) + + /** + * Converts a ValueHydrator of input type `A` to input type `Option[A]` with a + * default input value. + */ + def liftOption(default: A): ValueHydrator[Option[A], C] = + liftOption(Some(default)) + + private def liftOption(default: Option[A]): ValueHydrator[Option[A], C] = { + val none = Stitch.value(ValueState.unit(None)) + + ValueHydrator[Option[A], C] { (a, ctx) => + a.orElse(default) match { + case Some(a) => this.run(a, ctx).map(s => s.map(Some.apply)) + case None => none + } + } + } + + /** + * Converts a ValueHydrator of input type `A` to input type `Seq[A]`. + */ + def liftSeq: ValueHydrator[Seq[A], C] = + ValueHydrator[Seq[A], C] { (as, ctx) => + Stitch.traverse(as)(a => run(a, ctx)).map(rs => ValueState.sequence[A](rs)) + } + + /** + * Produces a new ValueHydrator that collects stats on the hydration. + */ + def observe( + stats: StatsReceiver, + mkExceptionCounter: (StatsReceiver, String) => ExceptionCounter = (stats, scope) => + new ExceptionCounter(stats, scope) + ): ValueHydrator[A, C] = { + val callCounter = stats.counter("calls") + val noopCounter = stats.counter("noop") + val modifiedCounter = stats.counter("modified") + val partialCounter = stats.counter("partial") + val completedCounter = stats.counter("completed") + + val exceptionCounter = mkExceptionCounter(stats, "failures") + + ValueHydrator[A, C] { (a, ctx) => + this.run(a, ctx).respond { + case Return(ValueState(_, state)) => + callCounter.incr() + + if (state.isEmpty) { + noopCounter.incr() + } else { + if (state.modified) modifiedCounter.incr() + if (state.failedFields.nonEmpty) partialCounter.incr() + if (state.completedHydrations.nonEmpty) completedCounter.incr() + } + case Throw(ex) => + callCounter.incr() + exceptionCounter(ex) + } + } + } + + /** + * Produces a new ValueHydrator that uses a lens to extract the value to hydrate, + * using this hydrator, and then to put the updated value back in the enclosing struct. + */ + def lensed[B](lens: Lens[B, A]): ValueHydrator[B, C] = + ValueHydrator[B, C] { (b, ctx) => + this.run(lens.get(b), ctx).map { + case ValueState(value, state) => + ValueState(lens.set(b, value), state) + } + } +} + +object ValueHydrator { + + /** + * Create a ValueHydrator from a function that returns Stitch[ValueState[A]] + */ + def apply[A, C](f: (A, C) => Stitch[ValueState[A]]): ValueHydrator[A, C] = + new ValueHydrator[A, C](f) + + /** + * Produces a ValueState instance with the given value and an empty HydrationState + */ + def unit[A, C]: ValueHydrator[A, C] = + ValueHydrator { (a, _) => Stitch.value(ValueState.unit(a)) } + + /** + * Runs several ValueHydrators in sequence. + */ + def inSequence[A, C](bs: ValueHydrator[A, C]*): ValueHydrator[A, C] = + bs match { + case Seq(b) => b + case Seq(b1, b2) => b1.andThen(b2) + case _ => bs.reduceLeft(_.andThen(_)) + } + + /** + * Creates a `ValueHydrator` from a Mutation. If the mutation returns None (indicating + * no change) the hydrator will return an ValueState.unmodified with the input value; + * otherwise, it will return an ValueState.modified with the mutated value. + * If the mutation throws an exception, it will be caught and lifted to Stitch.exception. + */ + def fromMutation[A, C](mutation: Mutation[A]): ValueHydrator[A, C] = + ValueHydrator[A, C] { (input, _) => + Stitch.const( + Try { + mutation(input) match { + case None => ValueState.unmodified(input) + case Some(output) => ValueState.modified(output) + } + } + ) + } + + /** + * Creates a Hydrator from a non-`Stitch` producing function. If the function throws + * an error it will be caught and converted to a Throw. + */ + def map[A, C](f: (A, C) => ValueState[A]): ValueHydrator[A, C] = + ValueHydrator[A, C] { (a, ctx) => Stitch.const(Try(f(a, ctx))) } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/package.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/package.scala new file mode 100644 index 000000000..0542cf4f5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/package.scala @@ -0,0 +1,17 @@ +package com.twitter.tweetypie + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.thriftscala.FieldByPath +import org.apache.thrift.protocol.TField +import com.twitter.context.TwitterContext + +package object hydrator { + type TweetDataValueHydrator = ValueHydrator[TweetData, TweetQuery.Options] + type TweetDataEditHydrator = EditHydrator[TweetData, TweetQuery.Options] + + def fieldByPath(fields: TField*): FieldByPath = FieldByPath(fields.map(_.id)) + + val TwitterContext: TwitterContext = + com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/BUILD new file mode 100644 index 000000000..dc5edd30e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/BUILD @@ -0,0 +1,21 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "image-fetcher-service/thrift/src/main/thrift:thrift-scala", + "mediaservices/commons/src/main/thrift:thrift-scala", + "mediaservices/mediainfo-server/thrift/src/main/thrift:thrift-scala", + "tweetypie/servo/util", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "tweetypie/common/src/scala/com/twitter/tweetypie/media", + "user-image-service/thrift/src/main/thrift:thrift-scala", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaClient.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaClient.scala new file mode 100644 index 000000000..c33ed5e66 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaClient.scala @@ -0,0 +1,288 @@ +package com.twitter.tweetypie +package media + +import com.twitter.mediainfo.server.{thriftscala => mis} +import com.twitter.mediaservices.commons.mediainformation.thriftscala.UserDefinedProductMetadata +import com.twitter.mediaservices.commons.photurkey.thriftscala.PrivacyType +import com.twitter.mediaservices.commons.servercommon.thriftscala.{ServerError => CommonServerError} +import com.twitter.mediaservices.commons.thriftscala.ProductKey +import com.twitter.mediaservices.commons.thriftscala.MediaKey +import com.twitter.servo.util.FutureArrow +import com.twitter.thumbingbird.{thriftscala => ifs} +import com.twitter.tweetypie.backends.MediaInfoService +import com.twitter.tweetypie.backends.UserImageService +import com.twitter.tweetypie.core.UpstreamFailure +import com.twitter.user_image_service.{thriftscala => uis} +import com.twitter.user_image_service.thriftscala.MediaUpdateAction +import com.twitter.user_image_service.thriftscala.MediaUpdateAction.Delete +import com.twitter.user_image_service.thriftscala.MediaUpdateAction.Undelete +import java.nio.ByteBuffer +import scala.util.control.NoStackTrace + +/** + * The MediaClient trait encapsulates the various operations we make to the different media services + * backends. + */ +trait MediaClient { + import MediaClient._ + + /** + * On tweet creation, if the tweet contains media upload ids, we call this operation to process + * that media and get back metadata about the media. + */ + def processMedia: ProcessMedia + + /** + * On the read path, when hydrating a MediaEntity, we call this operation to get metadata + * about existing media. + */ + def getMediaMetadata: GetMediaMetadata + + def deleteMedia: DeleteMedia + + def undeleteMedia: UndeleteMedia +} + +/** + * Request type for the MediaClient.updateMedia operation. + */ +private case class UpdateMediaRequest( + mediaKey: MediaKey, + action: MediaUpdateAction, + tweetId: TweetId) + +case class DeleteMediaRequest(mediaKey: MediaKey, tweetId: TweetId) { + private[media] def toUpdateMediaRequest = UpdateMediaRequest(mediaKey, Delete, tweetId) +} + +case class UndeleteMediaRequest(mediaKey: MediaKey, tweetId: TweetId) { + private[media] def toUpdateMediaRequest = UpdateMediaRequest(mediaKey, Undelete, tweetId) +} + +/** + * Request type for the MediaClient.processMedia operation. + */ +case class ProcessMediaRequest( + mediaIds: Seq[MediaId], + userId: UserId, + tweetId: TweetId, + isProtected: Boolean, + productMetadata: Option[Map[MediaId, UserDefinedProductMetadata]]) { + private[media] def toProcessTweetMediaRequest = + uis.ProcessTweetMediaRequest(mediaIds, userId, tweetId) + + private[media] def toUpdateProductMetadataRequests(mediaKeys: Seq[MediaKey]) = + productMetadata match { + case None => Seq() + case Some(map) => + mediaKeys.flatMap { mediaKey => + map.get(mediaKey.mediaId).map { metadata => + uis.UpdateProductMetadataRequest(ProductKey(tweetId.toString, mediaKey), metadata) + } + } + } +} + +/** + * Request type for the MediaClient.getMediaMetdata operation. + */ +case class MediaMetadataRequest( + mediaKey: MediaKey, + tweetId: TweetId, + isProtected: Boolean, + extensionsArgs: Option[ByteBuffer]) { + private[media] def privacyType = MediaClient.toPrivacyType(isProtected) + + /** + * For debugging purposes, make a copy of the byte buffer at object + * creation time, so that we can inspect the original buffer if there + * is an error. + * + * Once we have found the problem, this method should be removed. + */ + val savedExtensionArgs: Option[ByteBuffer] = + extensionsArgs.map { buf => + val b = buf.asReadOnlyBuffer() + val ary = new Array[Byte](b.remaining) + b.get(ary) + ByteBuffer.wrap(ary) + } + + private[media] def toGetTweetMediaInfoRequest = + mis.GetTweetMediaInfoRequest( + mediaKey = mediaKey, + tweetId = Some(tweetId), + privacyType = privacyType, + stratoExtensionsArgs = extensionsArgs + ) +} + +object MediaClient { + import MediaExceptions._ + + /** + * Operation type for processing uploaded media during tweet creation. + */ + type ProcessMedia = FutureArrow[ProcessMediaRequest, Seq[MediaKey]] + + /** + * Operation type for deleting and undeleting tweets. + */ + private[media] type UpdateMedia = FutureArrow[UpdateMediaRequest, Unit] + + type UndeleteMedia = FutureArrow[UndeleteMediaRequest, Unit] + + type DeleteMedia = FutureArrow[DeleteMediaRequest, Unit] + + /** + * Operation type for getting media metadata for existing media during tweet reads. + */ + type GetMediaMetadata = FutureArrow[MediaMetadataRequest, MediaMetadata] + + /** + * Builds a UpdateMedia FutureArrow using UserImageService endpoints. + */ + private[media] object UpdateMedia { + def apply(updateTweetMedia: UserImageService.UpdateTweetMedia): UpdateMedia = + FutureArrow[UpdateMediaRequest, Unit] { r => + updateTweetMedia(uis.UpdateTweetMediaRequest(r.mediaKey, r.action, Some(r.tweetId))).unit + }.translateExceptions(handleMediaExceptions) + } + + /** + * Builds a ProcessMedia FutureArrow using UserImageService endpoints. + */ + object ProcessMedia { + + def apply( + updateProductMetadata: UserImageService.UpdateProductMetadata, + processTweetMedia: UserImageService.ProcessTweetMedia + ): ProcessMedia = { + + val updateProductMetadataSeq = updateProductMetadata.liftSeq + + FutureArrow[ProcessMediaRequest, Seq[MediaKey]] { req => + for { + mediaKeys <- processTweetMedia(req.toProcessTweetMediaRequest).map(_.mediaKeys) + _ <- updateProductMetadataSeq(req.toUpdateProductMetadataRequests(mediaKeys)) + } yield { + sortKeysByIds(req.mediaIds, mediaKeys) + } + }.translateExceptions(handleMediaExceptions) + } + + /** + * Sort the mediaKeys Seq based on the media id ordering specified by the + * caller's request mediaIds Seq. + */ + private def sortKeysByIds(mediaIds: Seq[MediaId], mediaKeys: Seq[MediaKey]): Seq[MediaKey] = { + val idToKeyMap = mediaKeys.map(key => (key.mediaId, key)).toMap + mediaIds.flatMap(idToKeyMap.get) + } + } + + /** + * Builds a GetMediaMetadata FutureArrow using MediaInfoService endpoints. + */ + object GetMediaMetadata { + + private[this] val log = Logger(getClass) + + def apply(getTweetMediaInfo: MediaInfoService.GetTweetMediaInfo): GetMediaMetadata = + FutureArrow[MediaMetadataRequest, MediaMetadata] { req => + getTweetMediaInfo(req.toGetTweetMediaInfoRequest).map { res => + MediaMetadata( + res.mediaKey, + res.assetUrlHttps, + res.sizes.toSet, + res.mediaInfo, + res.additionalMetadata.flatMap(_.productMetadata), + res.stratoExtensionsReply, + res.additionalMetadata + ) + } + }.translateExceptions(handleMediaExceptions) + } + + private[media] def toPrivacyType(isProtected: Boolean): PrivacyType = + if (isProtected) PrivacyType.Protected else PrivacyType.Public + + /** + * Constructs an implementation of the MediaClient interface using backend instances. + */ + def fromBackends( + userImageService: UserImageService, + mediaInfoService: MediaInfoService + ): MediaClient = + new MediaClient { + + val getMediaMetadata = + GetMediaMetadata( + getTweetMediaInfo = mediaInfoService.getTweetMediaInfo + ) + + val processMedia = + ProcessMedia( + userImageService.updateProductMetadata, + userImageService.processTweetMedia + ) + + private val updateMedia = + UpdateMedia( + userImageService.updateTweetMedia + ) + + val deleteMedia: FutureArrow[DeleteMediaRequest, Unit] = + FutureArrow[DeleteMediaRequest, Unit](r => updateMedia(r.toUpdateMediaRequest)) + + val undeleteMedia: FutureArrow[UndeleteMediaRequest, Unit] = + FutureArrow[UndeleteMediaRequest, Unit](r => updateMedia(r.toUpdateMediaRequest)) + } +} + +/** + * Exceptions from the various media services backends that indicate bad requests (validation + * failures) are converted to a MediaClientException. Exceptions that indicate a server + * error are converted to a UpstreamFailure.MediaServiceServerError. + * + * MediaNotFound: Given media id does not exist. It could have been expired + * BadMedia: Given media is corrupted and can not be processed. + * InvalidMedia: Given media has failed to pass one or more validations (size, dimensions, type etc.) + * BadRequest Request is bad, but reason not available + */ +object MediaExceptions { + import UpstreamFailure.MediaServiceServerError + + // Extends NoStackTrace because the circumstances in which the + // exceptions are generated don't yield useful stack traces + // (e.g. you can't tell from the stack trace anything about what + // backend call was being made.) + abstract class MediaClientException(message: String) extends Exception(message) with NoStackTrace + + class MediaNotFound(message: String) extends MediaClientException(message) + class BadMedia(message: String) extends MediaClientException(message) + class InvalidMedia(message: String) extends MediaClientException(message) + class BadRequest(message: String) extends MediaClientException(message) + + // translations from various media service errors into MediaExceptions + val handleMediaExceptions: PartialFunction[Any, Exception] = { + case uis.BadRequest(msg, reason) => + reason match { + case Some(uis.BadRequestReason.MediaNotFound) => new MediaNotFound(msg) + case Some(uis.BadRequestReason.BadMedia) => new BadMedia(msg) + case Some(uis.BadRequestReason.InvalidMedia) => new InvalidMedia(msg) + case _ => new BadRequest(msg) + } + case ifs.BadRequest(msg, reason) => + reason match { + case Some(ifs.BadRequestReason.NotFound) => new MediaNotFound(msg) + case _ => new BadRequest(msg) + } + case mis.BadRequest(msg, reason) => + reason match { + case Some(mis.BadRequestReason.MediaNotFound) => new MediaNotFound(msg) + case _ => new BadRequest(msg) + } + case ex: CommonServerError => MediaServiceServerError(ex) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaKeyClassifier.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaKeyClassifier.scala new file mode 100644 index 000000000..013bd0dea --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaKeyClassifier.scala @@ -0,0 +1,25 @@ +package com.twitter.tweetypie.media + +import com.twitter.mediaservices.commons.thriftscala.MediaKey +import com.twitter.mediaservices.commons.thriftscala.MediaCategory + +object MediaKeyClassifier { + + class Classifier(categories: Set[MediaCategory]) { + + def apply(mediaKey: MediaKey): Boolean = + categories.contains(mediaKey.mediaCategory) + + def unapply(mediaKey: MediaKey): Option[MediaKey] = + apply(mediaKey) match { + case false => None + case true => Some(mediaKey) + } + } + + val isImage: Classifier = new Classifier(Set(MediaCategory.TweetImage)) + val isGif: Classifier = new Classifier(Set(MediaCategory.TweetGif)) + val isVideo: Classifier = new Classifier( + Set(MediaCategory.TweetVideo, MediaCategory.AmplifyVideo) + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaKeyUtil.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaKeyUtil.scala new file mode 100644 index 000000000..6a62e1d3d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaKeyUtil.scala @@ -0,0 +1,24 @@ +package com.twitter.tweetypie.media + +import com.twitter.mediaservices.commons.thriftscala._ +import com.twitter.mediaservices.commons.tweetmedia.thriftscala._ +import com.twitter.tweetypie.thriftscala.MediaEntity + +object MediaKeyUtil { + + def get(mediaEntity: MediaEntity): MediaKey = + mediaEntity.mediaKey.getOrElse { + throw new IllegalStateException("""Media key undefined. This state is unexpected, the media + |key should be set by the tweet creation for new tweets + |and by `MediaKeyHydrator` for legacy tweets.""".stripMargin) + } + + def contentType(mediaKey: MediaKey): MediaContentType = + mediaKey.mediaCategory match { + case MediaCategory.TweetImage => MediaContentType.ImageJpeg + case MediaCategory.TweetGif => MediaContentType.VideoMp4 + case MediaCategory.TweetVideo => MediaContentType.VideoGeneric + case MediaCategory.AmplifyVideo => MediaContentType.VideoGeneric + case mediaCats => throw new NotImplementedError(mediaCats.toString) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaMetadata.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaMetadata.scala new file mode 100644 index 000000000..135ec014d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaMetadata.scala @@ -0,0 +1,58 @@ +package com.twitter.tweetypie +package media + +import com.twitter.mediaservices.commons.mediainformation.{thriftscala => mic} +import com.twitter.mediaservices.commons.thriftscala.MediaKey +import com.twitter.mediaservices.commons.tweetmedia.thriftscala._ +import com.twitter.tweetypie.thriftscala._ +import java.nio.ByteBuffer + +/** + * MediaMetadata encapsulates the metadata about tweet media that we receive from + * the various media services backends on tweet create or on tweet read. This data, + * combined with data stored on the tweet, is sufficient to hydrate tweet media entities. + */ +case class MediaMetadata( + mediaKey: MediaKey, + assetUrlHttps: String, + sizes: Set[MediaSize], + mediaInfo: MediaInfo, + productMetadata: Option[mic.UserDefinedProductMetadata] = None, + extensionsReply: Option[ByteBuffer] = None, + additionalMetadata: Option[mic.AdditionalMetadata] = None) { + def assetUrlHttp: String = MediaUrl.httpsToHttp(assetUrlHttps) + + def attributableUserId: Option[UserId] = + additionalMetadata.flatMap(_.ownershipInfo).flatMap(_.attributableUserId) + + def updateEntity( + mediaEntity: MediaEntity, + tweetUserId: UserId, + includeAdditionalMetadata: Boolean + ): MediaEntity = { + // Abort if we accidentally try to replace the media. This + // indicates a logic error that caused mismatched media info. + // This could be internal or external to TweetyPie. + require( + mediaEntity.mediaId == mediaKey.mediaId, + "Tried to update media with mediaId=%s with mediaInfo.mediaId=%s" + .format(mediaEntity.mediaId, mediaKey.mediaId) + ) + + mediaEntity.copy( + mediaUrl = assetUrlHttp, + mediaUrlHttps = assetUrlHttps, + sizes = sizes, + mediaInfo = Some(mediaInfo), + extensionsReply = extensionsReply, + // the following two fields are deprecated and will be removed soon + nsfw = false, + mediaPath = MediaUrl.mediaPathFromUrl(assetUrlHttps), + metadata = productMetadata, + additionalMetadata = additionalMetadata.filter(_ => includeAdditionalMetadata), + // MIS allows media to be shared among authorized users so add in sourceUserId if it doesn't + // match the current tweet's userId. + sourceUserId = attributableUserId.filter(_ != tweetUserId) + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/package.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/package.scala new file mode 100644 index 000000000..c2f836e97 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/package.scala @@ -0,0 +1,114 @@ +package com.twitter + +import com.twitter.mediaservices.commons.thriftscala.MediaKey +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.tweetypie.thriftscala._ +import com.twitter.gizmoduck.thriftscala.QueryFields + +package object tweetypie { + // common imports that many classes need, will probably expand this list in the future. + type Logger = com.twitter.util.logging.Logger + val Logger: com.twitter.util.logging.Logger.type = com.twitter.util.logging.Logger + type StatsReceiver = com.twitter.finagle.stats.StatsReceiver + val TweetLenses: com.twitter.tweetypie.util.TweetLenses.type = + com.twitter.tweetypie.util.TweetLenses + + type Future[A] = com.twitter.util.Future[A] + val Future: com.twitter.util.Future.type = com.twitter.util.Future + + type Duration = com.twitter.util.Duration + val Duration: com.twitter.util.Duration.type = com.twitter.util.Duration + + type Time = com.twitter.util.Time + val Time: com.twitter.util.Time.type = com.twitter.util.Time + + type Try[A] = com.twitter.util.Try[A] + val Try: com.twitter.util.Try.type = com.twitter.util.Try + + type Throw[A] = com.twitter.util.Throw[A] + val Throw: com.twitter.util.Throw.type = com.twitter.util.Throw + + type Return[A] = com.twitter.util.Return[A] + val Return: com.twitter.util.Return.type = com.twitter.util.Return + + type Gate[T] = com.twitter.servo.util.Gate[T] + val Gate: com.twitter.servo.util.Gate.type = com.twitter.servo.util.Gate + + type Effect[A] = com.twitter.servo.util.Effect[A] + val Effect: com.twitter.servo.util.Effect.type = com.twitter.servo.util.Effect + + type FutureArrow[A, B] = com.twitter.servo.util.FutureArrow[A, B] + val FutureArrow: com.twitter.servo.util.FutureArrow.type = com.twitter.servo.util.FutureArrow + + type FutureEffect[A] = com.twitter.servo.util.FutureEffect[A] + val FutureEffect: com.twitter.servo.util.FutureEffect.type = com.twitter.servo.util.FutureEffect + + type Lens[A, B] = com.twitter.servo.data.Lens[A, B] + val Lens: com.twitter.servo.data.Lens.type = com.twitter.servo.data.Lens + + type Mutation[A] = com.twitter.servo.data.Mutation[A] + val Mutation: com.twitter.servo.data.Mutation.type = com.twitter.servo.data.Mutation + + type User = com.twitter.gizmoduck.thriftscala.User + val User: com.twitter.gizmoduck.thriftscala.User.type = com.twitter.gizmoduck.thriftscala.User + type Safety = com.twitter.gizmoduck.thriftscala.Safety + val Safety: com.twitter.gizmoduck.thriftscala.Safety.type = + com.twitter.gizmoduck.thriftscala.Safety + type UserField = com.twitter.gizmoduck.thriftscala.QueryFields + val UserField: QueryFields.type = com.twitter.gizmoduck.thriftscala.QueryFields + + type Tweet = thriftscala.Tweet + val Tweet: com.twitter.tweetypie.thriftscala.Tweet.type = thriftscala.Tweet + + type ThriftTweetService = TweetServiceInternal.MethodPerEndpoint + + type TweetId = Long + type UserId = Long + type MediaId = Long + type AppId = Long + type KnownDeviceToken = String + type ConversationId = Long + type CommunityId = Long + type PlaceId = String + type FieldId = Short + type Count = Long + type CountryCode = String // ISO 3166-1-alpha-2 + type CreativesContainerId = Long + + def hasGeo(tweet: Tweet): Boolean = + TweetLenses.placeId.get(tweet).nonEmpty || + TweetLenses.geoCoordinates.get(tweet).nonEmpty + + def getUserId(tweet: Tweet): UserId = TweetLenses.userId.get(tweet) + def getText(tweet: Tweet): String = TweetLenses.text.get(tweet) + def getCreatedAt(tweet: Tweet): Long = TweetLenses.createdAt.get(tweet) + def getCreatedVia(tweet: Tweet): String = TweetLenses.createdVia.get(tweet) + def getReply(tweet: Tweet): Option[Reply] = TweetLenses.reply.get(tweet) + def getDirectedAtUser(tweet: Tweet): Option[DirectedAtUser] = + TweetLenses.directedAtUser.get(tweet) + def getShare(tweet: Tweet): Option[Share] = TweetLenses.share.get(tweet) + def getQuotedTweet(tweet: Tweet): Option[QuotedTweet] = TweetLenses.quotedTweet.get(tweet) + def getUrls(tweet: Tweet): Seq[UrlEntity] = TweetLenses.urls.get(tweet) + def getMedia(tweet: Tweet): Seq[MediaEntity] = TweetLenses.media.get(tweet) + def getMediaKeys(tweet: Tweet): Seq[MediaKey] = TweetLenses.mediaKeys.get(tweet) + def getMentions(tweet: Tweet): Seq[MentionEntity] = TweetLenses.mentions.get(tweet) + def getCashtags(tweet: Tweet): Seq[CashtagEntity] = TweetLenses.cashtags.get(tweet) + def getHashtags(tweet: Tweet): Seq[HashtagEntity] = TweetLenses.hashtags.get(tweet) + def getMediaTagMap(tweet: Tweet): Map[MediaId, Seq[MediaTag]] = TweetLenses.mediaTagMap.get(tweet) + def isRetweet(tweet: Tweet): Boolean = tweet.coreData.flatMap(_.share).nonEmpty + def isSelfReply(authorUserId: UserId, r: Reply): Boolean = + r.inReplyToStatusId.isDefined && (r.inReplyToUserId == authorUserId) + def isSelfReply(tweet: Tweet): Boolean = { + getReply(tweet).exists { r => isSelfReply(getUserId(tweet), r) } + } + def getConversationId(tweet: Tweet): Option[TweetId] = TweetLenses.conversationId.get(tweet) + def getSelfThreadMetadata(tweet: Tweet): Option[SelfThreadMetadata] = + TweetLenses.selfThreadMetadata.get(tweet) + def getCardReference(tweet: Tweet): Option[CardReference] = TweetLenses.cardReference.get(tweet) + def getEscherbirdAnnotations(tweet: Tweet): Option[EscherbirdEntityAnnotations] = + TweetLenses.escherbirdEntityAnnotations.get(tweet) + def getCommunities(tweet: Tweet): Option[Communities] = TweetLenses.communities.get(tweet) + def getTimestamp(tweet: Tweet): Time = + if (SnowflakeId.isSnowflakeId(tweet.id)) SnowflakeId(tweet.id).time + else Time.fromSeconds(getCreatedAt(tweet).toInt) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/BUILD new file mode 100644 index 000000000..a57db5f55 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/BUILD @@ -0,0 +1,82 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/fasterxml/jackson/module:jackson-module-scala", + "3rdparty/jvm/com/ibm/icu:icu4j", + "3rdparty/jvm/com/twitter/bijection:core", + "3rdparty/jvm/com/twitter/bijection:scrooge", + "3rdparty/jvm/com/twitter/bijection:thrift", + "3rdparty/jvm/org/apache/thrift:libthrift", + "audience-rewards/thrift/src/main/thrift:thrift-scala", + "creatives-container/thrift/src/main/thrift:creatives-container-service-scala", + "featureswitches/featureswitches-core/src/main/scala:recipient", + "featureswitches/featureswitches-core/src/main/scala:useragent", + "finagle/finagle-core/src/main", + "flock-client/src/main/scala", + "flock-client/src/main/thrift:thrift-scala", + "geoduck/util/src/main/scala/com/twitter/geoduck/util/primitives", + "geoduck/util/src/main/scala/com/twitter/geoduck/util/service", + "passbird/thrift-only/src/main/thrift:thrift-scala", + "scrooge/scrooge-core", + "tweetypie/servo/json", + "tweetypie/servo/repo", + "tweetypie/servo/repo/src/main/thrift:thrift-scala", + "tweetypie/servo/util", + "snowflake/src/main/scala/com/twitter/snowflake/id", + "src/java/com/twitter/common/text/language:language-identifier", + "src/java/com/twitter/common/text/pipeline", + "src/scala/com/twitter/search/blender/services/strato", + "src/scala/com/twitter/takedown/util", + "src/thrift/com/twitter/consumer_privacy/mention_controls:thrift-scala", + "src/thrift/com/twitter/context:twitter-context-scala", + "src/thrift/com/twitter/dataproducts:enrichments_profilegeo-scala", + "src/thrift/com/twitter/dataproducts:service-scala", + "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", + "src/thrift/com/twitter/escherbird/common:common-scala", + "src/thrift/com/twitter/escherbird/metadata:metadata-service-scala", + "src/thrift/com/twitter/expandodo:cards-scala", + "src/thrift/com/twitter/expandodo:only-scala", + "src/thrift/com/twitter/geoduck:geoduck-scala", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/gizmoduck:user-thrift-scala", + "src/thrift/com/twitter/service/scarecrow/gen:scarecrow-scala", + "src/thrift/com/twitter/service/scarecrow/gen:tiered-actions-scala", + "src/thrift/com/twitter/service/talon/gen:thrift-scala", + "src/thrift/com/twitter/socialgraph:thrift-scala", + "src/thrift/com/twitter/spam/rtf:safety-label-scala", + "src/thrift/com/twitter/spam/rtf:safety-level-scala", + "src/thrift/com/twitter/spam/rtf:tweet-rtf-event-scala", + "src/thrift/com/twitter/timelineservice/server/internal:thrift-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-compat/src/main/scala/com/twitter/stitch/compat", + "stitch/stitch-core", + "stitch/stitch-timelineservice", + "strato/src/main/scala/com/twitter/strato/catalog", + "strato/src/main/scala/com/twitter/strato/client", + "strato/src/main/scala/com/twitter/strato/data", + "strato/src/main/scala/com/twitter/strato/thrift", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/media", + "tweetypie/server/src/main/thrift:compiled-scala", + "tweetypie/common/src/scala/com/twitter/tweetypie/client_id", + "tweetypie/common/src/scala/com/twitter/tweetypie/media", + "tweetypie/common/src/scala/com/twitter/tweetypie/storage", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "twitter-context/src/main/scala", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + "util/util-stats/src/main/scala", + "vibes/src/main/thrift/com/twitter/vibes:vibes-scala", + "visibility/common/src/main/scala/com/twitter/visibility/common/tflock", + "visibility/common/src/main/scala/com/twitter/visibility/common/user_result", + "visibility/common/src/main/thrift/com/twitter/visibility:action-scala", + "visibility/lib:tweets", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CacheStitch.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CacheStitch.scala new file mode 100644 index 000000000..fd1ad5fd3 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CacheStitch.scala @@ -0,0 +1,87 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.servo.repository._ +import com.twitter.stitch.Stitch +import com.twitter.util.Try + +object CacheStitch { + + /** + * Cacheable defines a function that takes a cache query and a Try value, + * and returns what should be written to cache, as a Option[StitchLockingCache.Val]. + * + * None signifies that this value should not be written to cache. + * + * Val can be one of Found[V], NotFound, and Deleted. The function will determine what kinds + * of values and exceptions (captured in the Try) correspond to which kind of cached values. + */ + type Cacheable[Q, V] = (Q, Try[V]) => Option[StitchLockingCache.Val[V]] + + // Cache successful values as Found, stitch.NotFound as NotFound, and don't cache other exceptions + def cacheFoundAndNotFound[K, V]: CacheStitch.Cacheable[K, V] = + (_, t: Try[V]) => + t match { + // Write successful values as Found + case Return(v) => Some(StitchLockingCache.Val.Found[V](v)) + + // Write stitch.NotFound as NotFound + case Throw(com.twitter.stitch.NotFound) => Some(StitchLockingCache.Val.NotFound) + + // Don't write other exceptions back to cache + case _ => None + } +} + +case class CacheStitch[Q, K, V]( + repo: Q => Stitch[V], + cache: StitchLockingCache[K, V], + queryToKey: Q => K, + handler: CachedResult.Handler[K, V], + cacheable: CacheStitch.Cacheable[Q, V]) + extends (Q => Stitch[V]) { + import com.twitter.servo.repository.CachedResultAction._ + + private[this] def getFromCache(key: K): Stitch[CachedResult[K, V]] = { + cache + .get(key) + .handle { + case t => CachedResult.Failed(key, t) + } + } + + // Exposed for testing + private[repository] def readThrough(query: Q): Stitch[V] = + repo(query).liftToTry.applyEffect { value: Try[V] => + cacheable(query, value) match { + case Some(v) => + // cacheable returned Some of a StitchLockingCache.Val to cache + // + // This is async to ensure that we don't wait for the cache + // update to complete before returning. This also ignores + // any exceptions from setting the value. + Stitch.async(cache.lockAndSet(queryToKey(query), v)) + case None => + // cacheable returned None so don't cache + Stitch.Unit + } + }.lowerFromTry + + private[this] def handle(query: Q, action: CachedResultAction[V]): Stitch[V] = + action match { + case HandleAsFound(value) => Stitch(value) + case HandleAsMiss => readThrough(query) + case HandleAsDoNotCache => repo(query) + case HandleAsFailed(t) => Stitch.exception(t) + case HandleAsNotFound => Stitch.NotFound + case t: TransformSubAction[V] => handle(query, t.action).map(t.f) + case SoftExpiration(subAction) => + Stitch + .async(readThrough(query)) + .flatMap { _ => handle(query, subAction) } + } + + override def apply(query: Q): Stitch[V] = + getFromCache(queryToKey(query)) + .flatMap { result: CachedResult[K, V] => handle(query, handler(result)) } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CachingTweetRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CachingTweetRepository.scala new file mode 100644 index 000000000..0ebf12998 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CachingTweetRepository.scala @@ -0,0 +1,329 @@ +package com.twitter.tweetypie +package repository + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.twitter.finagle.tracing.Trace +import com.twitter.servo.cache._ +import com.twitter.servo.repository._ +import com.twitter.servo.util.Transformer +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.core.FilteredState.Unavailable.BounceDeleted +import com.twitter.tweetypie.core.FilteredState.Unavailable.TweetDeleted +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository.CachedBounceDeleted.isBounceDeleted +import com.twitter.tweetypie.repository.CachedBounceDeleted.toBounceDeletedTweetResult +import com.twitter.tweetypie.thriftscala.CachedTweet +import com.twitter.util.Base64Long + +case class TweetKey(cacheVersion: Int, id: TweetId) + extends ScopedCacheKey("t", "t", cacheVersion, Base64Long.toBase64(id)) + +case class TweetKeyFactory(cacheVersion: Int) { + val fromId: TweetId => TweetKey = (id: TweetId) => TweetKey(cacheVersion, id) + val fromTweet: Tweet => TweetKey = (tweet: Tweet) => fromId(tweet.id) + val fromCachedTweet: CachedTweet => TweetKey = (ms: CachedTweet) => fromTweet(ms.tweet) +} + +// Helper methods for working with cached bounce-deleted tweets, +// grouped together here to keep the definitions of "bounce +// deleted" in one place. +object CachedBounceDeleted { + // CachedTweet for use in CachingTweetStore + def toBounceDeletedCachedTweet(tweetId: TweetId): CachedTweet = + CachedTweet( + tweet = Tweet(id = tweetId), + isBounceDeleted = Some(true) + ) + + def isBounceDeleted(cached: Cached[CachedTweet]): Boolean = + cached.status == CachedValueStatus.Found && + cached.value.flatMap(_.isBounceDeleted).contains(true) + + // TweetResult for use in CachingTweetRepository + def toBounceDeletedTweetResult(tweetId: TweetId): TweetResult = + TweetResult( + TweetData( + tweet = Tweet(id = tweetId), + isBounceDeleted = true + ) + ) + + def isBounceDeleted(tweetResult: TweetResult): Boolean = + tweetResult.value.isBounceDeleted +} + +object TweetResultCache { + def apply( + tweetDataCache: Cache[TweetId, Cached[TweetData]] + ): Cache[TweetId, Cached[TweetResult]] = { + val transformer: Transformer[Cached[TweetResult], Cached[TweetData]] = + new Transformer[Cached[TweetResult], Cached[TweetData]] { + def to(cached: Cached[TweetResult]) = + Return(cached.map(_.value)) + + def from(cached: Cached[TweetData]) = + Return(cached.map(TweetResult(_))) + } + + new KeyValueTransformingCache( + tweetDataCache, + transformer, + identity + ) + } +} + +object TweetDataCache { + def apply( + cachedTweetCache: Cache[TweetKey, Cached[CachedTweet]], + tweetKeyFactory: TweetId => TweetKey + ): Cache[TweetId, Cached[TweetData]] = { + val transformer: Transformer[Cached[TweetData], Cached[CachedTweet]] = + new Transformer[Cached[TweetData], Cached[CachedTweet]] { + def to(cached: Cached[TweetData]) = + Return(cached.map(_.toCachedTweet)) + + def from(cached: Cached[CachedTweet]) = + Return(cached.map(c => TweetData.fromCachedTweet(c, cached.cachedAt))) + } + + new KeyValueTransformingCache( + cachedTweetCache, + transformer, + tweetKeyFactory + ) + } +} + +object TombstoneTtl { + import CachedResult._ + + def fixed(ttl: Duration): CachedNotFound[TweetId] => Duration = + _ => ttl + + /** + * A simple ttl calculator that is set to `min` if the age is less than `from`, + * then linearly interpolated between `min` and `max` when the age is between `from` and `to`, + * and then equal to `max` if the age is greater than `to`. + */ + def linear( + min: Duration, + max: Duration, + from: Duration, + to: Duration + ): CachedNotFound[TweetId] => Duration = { + val rate = (max - min).inMilliseconds / (to - from).inMilliseconds.toDouble + cached => { + if (SnowflakeId.isSnowflakeId(cached.key)) { + val age = cached.cachedAt - SnowflakeId(cached.key).time + if (age <= from) min + else if (age >= to) max + else min + (age - from) * rate + } else { + // When it's not a snowflake id, cache it for the maximum time. + max + } + } + } + + /** + * Checks if the given `cached` value is an expired tombstone + */ + def isExpired( + tombstoneTtl: CachedNotFound[TweetId] => Duration, + cached: CachedNotFound[TweetId] + ): Boolean = + Time.now - cached.cachedAt > tombstoneTtl(cached) +} + +object CachingTweetRepository { + import CachedResult._ + import CachedResultAction._ + + val failuresLog: Logger = Logger("com.twitter.tweetypie.repository.CachingTweetRepoFailures") + + def apply( + cache: LockingCache[TweetId, Cached[TweetResult]], + tombstoneTtl: CachedNotFound[TweetId] => Duration, + stats: StatsReceiver, + clientIdHelper: ClientIdHelper, + logCacheExceptions: Gate[Unit] = Gate.False, + )( + underlying: TweetResultRepository.Type + ): TweetResultRepository.Type = { + val cachingRepo: ((TweetId, TweetQuery.Options)) => Stitch[TweetResult] = + CacheStitch[(TweetId, TweetQuery.Options), TweetId, TweetResult]( + repo = underlying.tupled, + cache = StitchLockingCache( + underlying = cache, + picker = new TweetRepoCachePicker[TweetResult](_.value.cachedAt) + ), + queryToKey = _._1, // extract tweet id from (TweetId, TweetQuery.Options) + handler = mkHandler(tombstoneTtl, stats, logCacheExceptions, clientIdHelper), + cacheable = cacheable + ) + + (tweetId, options) => + if (options.cacheControl.readFromCache) { + cachingRepo((tweetId, options)) + } else { + underlying(tweetId, options) + } + } + + val cacheable: CacheStitch.Cacheable[(TweetId, TweetQuery.Options), TweetResult] = { + case ((tweetId, options), tweetResult) => + if (!options.cacheControl.writeToCache) { + None + } else { + tweetResult match { + // Write stitch.NotFound as a NotFound cache entry + case Throw(com.twitter.stitch.NotFound) => + Some(StitchLockingCache.Val.NotFound) + + // Write FilteredState.TweetDeleted as a Deleted cache entry + case Throw(TweetDeleted) => + Some(StitchLockingCache.Val.Deleted) + + // Write BounceDeleted as a Found cache entry, with the CachedTweet.isBounceDeleted flag. + // servo.cache.thriftscala.CachedValueStatus.Deleted tombstones do not allow for storing + // app-defined metadata. + case Throw(BounceDeleted) => + Some(StitchLockingCache.Val.Found(toBounceDeletedTweetResult(tweetId))) + + // Regular found tweets are not written to cache here - instead the cacheable result is + // written to cache via TweetHydration.cacheChanges + case Return(_: TweetResult) => None + + // Don't write other exceptions back to cache + case _ => None + } + } + } + + object LogLens { + private[this] val mapper = new ObjectMapper().registerModule(DefaultScalaModule) + + def logMessage(logger: Logger, clientIdHelper: ClientIdHelper, data: (String, Any)*): Unit = { + val allData = data ++ defaultData(clientIdHelper) + val msg = mapper.writeValueAsString(Map(allData: _*)) + logger.info(msg) + } + + private def defaultData(clientIdHelper: ClientIdHelper): Seq[(String, Any)] = { + val viewer = TwitterContext() + Seq( + "client_id" -> clientIdHelper.effectiveClientId, + "trace_id" -> Trace.id.traceId.toString, + "audit_ip" -> viewer.flatMap(_.auditIp), + "application_id" -> viewer.flatMap(_.clientApplicationId), + "user_agent" -> viewer.flatMap(_.userAgent), + "authenticated_user_id" -> viewer.flatMap(_.authenticatedUserId) + ) + } + } + + def mkHandler( + tombstoneTtl: CachedNotFound[TweetId] => Duration, + stats: StatsReceiver, + logCacheExceptions: Gate[Unit], + clientIdHelper: ClientIdHelper, + ): Handler[TweetId, TweetResult] = { + val baseHandler = defaultHandler[TweetId, TweetResult] + val cacheErrorState = HydrationState(modified = false, cacheErrorEncountered = true) + val cachedFoundCounter = stats.counter("cached_found") + val notFoundCounter = stats.counter("not_found") + val cachedNotFoundAsNotFoundCounter = stats.counter("cached_not_found_as_not_found") + val cachedNotFoundAsMissCounter = stats.counter("cached_not_found_as_miss") + val cachedDeletedCounter = stats.counter("cached_deleted") + val cachedBounceDeletedCounter = stats.counter("cached_bounce_deleted") + val failedCounter = stats.counter("failed") + val otherCounter = stats.counter("other") + + { + case res @ CachedFound(_, tweetResult, _, _) => + if (isBounceDeleted(tweetResult)) { + cachedBounceDeletedCounter.incr() + HandleAsFailed(FilteredState.Unavailable.BounceDeleted) + } else { + cachedFoundCounter.incr() + baseHandler(res) + } + + case res @ NotFound(_) => + notFoundCounter.incr() + baseHandler(res) + + // expires NotFound tombstones if old enough + case cached @ CachedNotFound(_, _, _) => + if (TombstoneTtl.isExpired(tombstoneTtl, cached)) { + cachedNotFoundAsMissCounter.incr() + HandleAsMiss + } else { + cachedNotFoundAsNotFoundCounter.incr() + HandleAsNotFound + } + + case CachedDeleted(_, _, _) => + cachedDeletedCounter.incr() + HandleAsFailed(FilteredState.Unavailable.TweetDeleted) + + // don't attempt to write back to cache on a cache read failure + case Failed(k, t) => + // After result is found, mark it with cacheErrorEncountered + failedCounter.incr() + + if (logCacheExceptions()) { + LogLens.logMessage( + failuresLog, + clientIdHelper, + "type" -> "cache_failed", + "tweet_id" -> k, + "throwable" -> t.getClass.getName + ) + } + + TransformSubAction[TweetResult](HandleAsDoNotCache, _.mapState(_ ++ cacheErrorState)) + + case res => + otherCounter.incr() + baseHandler(res) + } + + } +} + +/** + * A LockingCache.Picker for use with CachingTweetRepository which prevents overwriting values in + * cache that are newer than the value previously read from cache. + */ +class TweetRepoCachePicker[T](cachedAt: T => Option[Time]) extends LockingCache.Picker[Cached[T]] { + private val newestPicker = new PreferNewestCached[T] + + override def apply(newValue: Cached[T], oldValue: Cached[T]): Option[Cached[T]] = { + oldValue.status match { + // never overwrite a `Deleted` tombstone via read-through. + case CachedValueStatus.Deleted => None + + // only overwrite a `Found` value with an update based off of that same cache entry. + case CachedValueStatus.Found => + newValue.value.flatMap(cachedAt) match { + // if prevCacheAt is the same as oldValue.cachedAt, then the value in cache hasn't changed + case Some(prevCachedAt) if prevCachedAt == oldValue.cachedAt => Some(newValue) + // otherwise, the value in cache has changed since we read it, so don't overwrite + case _ => None + } + + // we may hit an expired/older tombstone, which should be safe to overwrite with a fresh + // tombstone of a new value returned from Manhattan. + case CachedValueStatus.NotFound => newestPicker(newValue, oldValue) + + // we shouldn't see any other CachedValueStatus, but if we do, play it safe and don't + // overwrite (it will be as if the read that triggered this never happened) + case _ => None + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/Card2Repository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/Card2Repository.scala new file mode 100644 index 000000000..9b6f4b154 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/Card2Repository.scala @@ -0,0 +1,56 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.expandodo.thriftscala._ +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup +import com.twitter.tweetypie.backends.Expandodo + +sealed trait Card2Key { + def toCard2Request: Card2Request +} + +final case class UrlCard2Key(url: String) extends Card2Key { + override def toCard2Request: Card2Request = + Card2Request(`type` = Card2RequestType.ByUrl, url = Some(url)) +} + +final case class ImmediateValuesCard2Key(values: Seq[Card2ImmediateValue], tweetId: TweetId) + extends Card2Key { + override def toCard2Request: Card2Request = + Card2Request( + `type` = Card2RequestType.ByImmediateValues, + immediateValues = Some(values), + statusId = Some(tweetId) + ) +} + +object Card2Repository { + type Type = (Card2Key, Card2RequestOptions) => Stitch[Card2] + + def apply(getCards2: Expandodo.GetCards2, maxRequestSize: Int): Type = { + case class RequestGroup(opts: Card2RequestOptions) extends SeqGroup[Card2Key, Option[Card2]] { + override def run(keys: Seq[Card2Key]): Future[Seq[Try[Option[Card2]]]] = + LegacySeqGroup.liftToSeqTry( + getCards2((keys.map(_.toCard2Request), opts)).map { res => + res.responsesCode match { + case Card2ResponsesCode.Ok => + res.responses.map(_.card) + + case _ => + // treat all other failure cases as card-not-found + Seq.fill(keys.size)(None) + } + } + ) + + override def maxSize: Int = maxRequestSize + } + + (card2Key, opts) => + Stitch + .call(card2Key, RequestGroup(opts)) + .lowerFromOption() + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CardRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CardRepository.scala new file mode 100644 index 000000000..b420b5814 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CardRepository.scala @@ -0,0 +1,28 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.expandodo.thriftscala._ +import com.twitter.stitch.MapGroup +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.backends.Expandodo + +object CardRepository { + type Type = String => Stitch[Seq[Card]] + + def apply(getCards: Expandodo.GetCards, maxRequestSize: Int): Type = { + object RequestGroup extends MapGroup[String, Seq[Card]] { + override def run(urls: Seq[String]): Future[String => Try[Seq[Card]]] = + getCards(urls.toSet).map { responseMap => url => + responseMap.get(url) match { + case None => Throw(NotFound) + case Some(r) => Return(r.cards.getOrElse(Nil)) + } + } + + override def maxSize: Int = maxRequestSize + } + + url => Stitch.call(url, RequestGroup) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CardUsersRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CardUsersRepository.scala new file mode 100644 index 000000000..3cf546bb7 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CardUsersRepository.scala @@ -0,0 +1,43 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.expandodo.thriftscala._ +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup +import com.twitter.tweetypie.backends.Expandodo + +object CardUsersRepository { + type CardUri = String + type Type = (CardUri, Context) => Stitch[Option[Set[UserId]]] + + case class Context(perspectiveUserId: UserId) extends AnyVal + + case class GetUsersGroup(perspectiveId: UserId, getCardUsers: Expandodo.GetCardUsers) + extends SeqGroup[CardUri, GetCardUsersResponse] { + protected override def run(keys: Seq[CardUri]): Future[Seq[Try[GetCardUsersResponse]]] = + LegacySeqGroup.liftToSeqTry( + getCardUsers( + GetCardUsersRequests( + requests = keys.map(k => GetCardUsersRequest(k)), + perspectiveUserId = Some(perspectiveId) + ) + ).map(_.responses) + ) + } + + def apply(getCardUsers: Expandodo.GetCardUsers): Type = + (cardUri, ctx) => + Stitch.call(cardUri, GetUsersGroup(ctx.perspectiveUserId, getCardUsers)).map { resp => + val authorUserIds = resp.authorUserIds.map(_.toSet) + val siteUserIds = resp.siteUserIds.map(_.toSet) + + if (authorUserIds.isEmpty) { + siteUserIds + } else if (siteUserIds.isEmpty) { + authorUserIds + } else { + Some(authorUserIds.get ++ siteUserIds.get) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ConversationControlRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ConversationControlRepository.scala new file mode 100644 index 000000000..64052b116 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ConversationControlRepository.scala @@ -0,0 +1,51 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.FilteredState.Unavailable.TweetDeleted +import com.twitter.tweetypie.thriftscala.ConversationControl + +/** + * This repository loads up the conversation control values for a tweet which controls who can reply + * to a tweet. Because the conversation control values are stored on the root tweet of a conversation, + * we need to make sure that the code is able to load the data from the root tweet. To ensure this, + * no visibility filtering options are set on the query to load the root tweet fields. + * + * If visibility filtering was enabled, and the root tweet was filtered for the requesting user, + * then the conversation control data would not be returned and enforcement would effectively be + * side-stepped. + */ +object ConversationControlRepository { + private[this] val log = Logger(getClass) + type Type = (TweetId, CacheControl) => Stitch[Option[ConversationControl]] + + def apply(repo: TweetRepository.Type, stats: StatsReceiver): Type = + (conversationId: TweetId, cacheControl: CacheControl) => { + val options = TweetQuery.Options( + include = TweetQuery.Include(Set(Tweet.ConversationControlField.id)), + // We want the root tweet of a conversation that we're looking up to be + // cached with the same policy as the tweet we're looking up. + cacheControl = cacheControl, + enforceVisibilityFiltering = false, + safetyLevel = SafetyLevel.FilterNone + ) + + repo(conversationId, options) + .map(rootTweet => rootTweet.conversationControl) + .handle { + // We don't know of any case where tweets would return NotFound, but for + // for pragmatic reasons, we're opening the conversation for replies + // in case a bug causing tweets to be NotFound exists. + case NotFound => + stats.counter("tweet_not_found") + None + // If no root tweet is found, the reply has no conversation controls + // this is by design, deleting the root tweet "opens" the conversation + case TweetDeleted => + stats.counter("tweet_deleted") + None + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ConversationIdRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ConversationIdRepository.scala new file mode 100644 index 000000000..b9a9b26ad --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ConversationIdRepository.scala @@ -0,0 +1,95 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.flockdb.client._ +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup + +case class ConversationIdKey(tweetId: TweetId, parentId: TweetId) + +object ConversationIdRepository { + type Type = ConversationIdKey => Stitch[TweetId] + + def apply(multiSelectOne: Iterable[Select[StatusGraph]] => Future[Seq[Option[Long]]]): Type = + key => Stitch.call(key, Group(multiSelectOne)) + + private case class Group( + multiSelectOne: Iterable[Select[StatusGraph]] => Future[Seq[Option[Long]]]) + extends SeqGroup[ConversationIdKey, TweetId] { + + private[this] def getConversationIds( + keys: Seq[ConversationIdKey], + getLookupId: ConversationIdKey => TweetId + ): Future[Map[ConversationIdKey, TweetId]] = { + val distinctIds = keys.map(getLookupId).distinct + val tflockQueries = distinctIds.map(ConversationGraph.to) + if (tflockQueries.isEmpty) { + Future.value(Map[ConversationIdKey, TweetId]()) + } else { + multiSelectOne(tflockQueries).map { results => + // first, we need to match up the distinct ids requested with the corresponding result + val resultMap = + distinctIds + .zip(results) + .collect { + case (id, Some(conversationId)) => id -> conversationId + } + .toMap + + // then we need to map keys into the above map + keys.flatMap { key => resultMap.get(getLookupId(key)).map(key -> _) }.toMap + } + } + } + + /** + * Returns a key-value result that maps keys to the tweet's conversation IDs. + * + * Example: + * Tweet B is a reply to tweet A with conversation ID c. + * We want to get B's conversation ID. Then, for the request + * + * ConversationIdRequest(B.id, A.id) + * + * our key-value result's "found" map will contain a pair (B.id -> c). + */ + protected override def run(keys: Seq[ConversationIdKey]): Future[Seq[Try[TweetId]]] = + LegacySeqGroup.liftToSeqTry( + for { + // Try to get the conversation IDs for the parent tweets + convIdsFromParent <- getConversationIds(keys, _.parentId) + + // Collect the tweet IDs whose parents' conversation IDs couldn't be found. + // We assume that happened in one of two cases: + // * for a tweet whose parent has been deleted + // * for a tweet whose parent is the root of a conversation + // Note: In either case, we will try to look up the conversation ID of the tweet whose parent + // couldn't be found. If that can't be found either, we will eventually return the parent ID. + tweetsWhoseParentsDontHaveConvoIds = keys.toSet -- convIdsFromParent.keys + + // Collect the conversation IDs for the tweets whose parents have not been found, now using the + // tweets' own IDs. + convIdsFromTweet <- + getConversationIds(tweetsWhoseParentsDontHaveConvoIds.toSeq, _.tweetId) + + // Combine the by-parent-ID and by-tweet-ID results. + convIdMap = convIdsFromParent ++ convIdsFromTweet + + // Assign conversation IDs to all not-found tweet IDs. + // A tweet might not have received a conversation ID if + // * the parent of the tweet is the root of the conversation, and we are in the write path + // for creating the tweet. In that case, the conversation ID should be the tweet's parent + // ID. + // * it had been created before TFlock started handling conversation IDs. In that case, the + // conversation ID will just point to the parent tweet so that we can have a conversation of + // at least two tweets. + // So in both cases, we want to return the tweet's parent ID. + } yield { + keys.map { + case k @ ConversationIdKey(t, p) => convIdMap.getOrElse(k, p) + } + } + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ConversationMutedRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ConversationMutedRepository.scala new file mode 100644 index 000000000..16e08f46c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ConversationMutedRepository.scala @@ -0,0 +1,13 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.stitch.Stitch + +object ConversationMutedRepository { + + /** + * Same type as com.twitter.stitch.timelineservice.TimelineService.GetConversationMuted but + * without using Arrow. + */ + type Type = (UserId, TweetId) => Stitch[Boolean] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CreativesContainerMaterializationRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CreativesContainerMaterializationRepository.scala new file mode 100644 index 000000000..d74c1c185 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CreativesContainerMaterializationRepository.scala @@ -0,0 +1,62 @@ +package com.twitter.tweetypie.repository + +import com.twitter.container.thriftscala.MaterializeAsTweetFieldsRequest +import com.twitter.container.thriftscala.MaterializeAsTweetRequest +import com.twitter.container.{thriftscala => ccs} +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.Return +import com.twitter.tweetypie.{thriftscala => tp} +import com.twitter.tweetypie.backends +import com.twitter.tweetypie.thriftscala.GetTweetFieldsResult +import com.twitter.tweetypie.thriftscala.GetTweetResult +import com.twitter.util.Future +import com.twitter.util.Try + +/** + * A special kind of tweet is that, when [[tp.Tweet.underlyingCreativesContainerId]] is presented. + * tweetypie will delegate hydration of this tweet to creatives container service. + */ +object CreativesContainerMaterializationRepository { + + type GetTweetType = + (ccs.MaterializeAsTweetRequest, Option[tp.GetTweetOptions]) => Stitch[tp.GetTweetResult] + + type GetTweetFieldsType = + ( + ccs.MaterializeAsTweetFieldsRequest, + tp.GetTweetFieldsOptions + ) => Stitch[tp.GetTweetFieldsResult] + + def apply( + materializeAsTweet: backends.CreativesContainerService.MaterializeAsTweet + ): GetTweetType = { + case class RequestGroup(opts: Option[tp.GetTweetOptions]) + extends SeqGroup[ccs.MaterializeAsTweetRequest, tp.GetTweetResult] { + override protected def run( + keys: Seq[MaterializeAsTweetRequest] + ): Future[Seq[Try[GetTweetResult]]] = + materializeAsTweet(ccs.MaterializeAsTweetRequests(keys, opts)).map { + res: Seq[GetTweetResult] => res.map(Return(_)) + } + } + + (request, options) => Stitch.call(request, RequestGroup(options)) + } + + def materializeAsTweetFields( + materializeAsTweetFields: backends.CreativesContainerService.MaterializeAsTweetFields + ): GetTweetFieldsType = { + case class RequestGroup(opts: tp.GetTweetFieldsOptions) + extends SeqGroup[ccs.MaterializeAsTweetFieldsRequest, tp.GetTweetFieldsResult] { + override protected def run( + keys: Seq[MaterializeAsTweetFieldsRequest] + ): Future[Seq[Try[GetTweetFieldsResult]]] = + materializeAsTweetFields(ccs.MaterializeAsTweetFieldsRequests(keys, opts)).map { + res: Seq[GetTweetFieldsResult] => res.map(Return(_)) + } + } + + (request, options) => Stitch.call(request, RequestGroup(options)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/DeletedTweetVisibilityRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/DeletedTweetVisibilityRepository.scala new file mode 100644 index 000000000..711e603c1 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/DeletedTweetVisibilityRepository.scala @@ -0,0 +1,84 @@ +package com.twitter.tweetypie.repository + +import com.twitter.spam.rtf.thriftscala.FilteredReason +import com.twitter.spam.rtf.thriftscala.{SafetyLevel => ThriftSafetyLevel} +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.TweetId +import com.twitter.tweetypie.core.FilteredState.HasFilteredReason +import com.twitter.tweetypie.core.FilteredState.Unavailable.BounceDeleted +import com.twitter.tweetypie.core.FilteredState.Unavailable.SourceTweetNotFound +import com.twitter.tweetypie.core.FilteredState.Unavailable.TweetDeleted +import com.twitter.tweetypie.repository.VisibilityResultToFilteredState.toFilteredStateUnavailable +import com.twitter.visibility.interfaces.tweets.DeletedTweetVisibilityLibrary +import com.twitter.visibility.models.SafetyLevel +import com.twitter.visibility.models.TweetDeleteReason +import com.twitter.visibility.models.TweetDeleteReason.TweetDeleteReason +import com.twitter.visibility.models.ViewerContext + +/** + * Generate FilteredReason for tweet entities in following delete states: + * com.twitter.tweetypie.core.FilteredState.Unavailable + * - SourceTweetNotFound(true) + * - TweetDeleted + * - BounceDeleted + * + * Callers of this repository should be ready to handle empty response (Stitch.None) + * from the underlying VF library when: + * 1.the tweet should not NOT be filtered for the given safety level + * 2.the tweet is not a relevant content to be handled by the library + */ +object DeletedTweetVisibilityRepository { + type Type = VisibilityRequest => Stitch[Option[FilteredReason]] + + case class VisibilityRequest( + filteredState: Throwable, + tweetId: TweetId, + safetyLevel: Option[ThriftSafetyLevel], + viewerId: Option[Long], + isInnerQuotedTweet: Boolean) + + def apply( + visibilityLibrary: DeletedTweetVisibilityLibrary.Type + ): Type = { request => + toVisibilityTweetDeleteState(request.filteredState, request.isInnerQuotedTweet) + .map { deleteReason => + val safetyLevel = SafetyLevel.fromThrift( + request.safetyLevel.getOrElse(ThriftSafetyLevel.FilterDefault) + ) + val isRetweet = request.filteredState == SourceTweetNotFound(true) + visibilityLibrary( + DeletedTweetVisibilityLibrary.Request( + request.tweetId, + safetyLevel, + ViewerContext.fromContextWithViewerIdFallback(request.viewerId), + deleteReason, + isRetweet, + request.isInnerQuotedTweet + ) + ).map(toFilteredStateUnavailable) + .map { + //Accept FilteredReason + case Some(fs) if fs.isInstanceOf[HasFilteredReason] => + Some(fs.asInstanceOf[HasFilteredReason].filteredReason) + case _ => None + } + } + .getOrElse(Stitch.None) + } + + /** + * @return map an error from tweet hydration to a VF model TweetDeleteReason, + * None when the error is not related to delete state tweets. + */ + private def toVisibilityTweetDeleteState( + tweetDeleteState: Throwable, + isInnerQuotedTweet: Boolean + ): Option[TweetDeleteReason] = { + tweetDeleteState match { + case TweetDeleted => Some(TweetDeleteReason.Deleted) + case BounceDeleted => Some(TweetDeleteReason.BounceDeleted) + case SourceTweetNotFound(true) if !isInnerQuotedTweet => Some(TweetDeleteReason.Deleted) + case _ => None + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/DeviceSourceRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/DeviceSourceRepository.scala new file mode 100644 index 000000000..f88513458 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/DeviceSourceRepository.scala @@ -0,0 +1,75 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.passbird.clientapplication.thriftscala.ClientApplication +import com.twitter.passbird.clientapplication.thriftscala.GetClientApplicationsResponse +import com.twitter.servo.cache.ScopedCacheKey +import com.twitter.stitch.MapGroup +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.thriftscala.DeviceSource + +// converts the device source parameter value to lower-case, to make the cached +// key case-insensitive +case class DeviceSourceKey(param: String) extends ScopedCacheKey("t", "ds", 1, param.toLowerCase) + +object DeviceSourceRepository { + type Type = String => Stitch[DeviceSource] + + type GetClientApplications = FutureArrow[Seq[Long], GetClientApplicationsResponse] + + val DefaultUrl = "https://help.twitter.com/en/using-twitter/how-to-tweet#source-labels" + + def formatUrl(name: String, url: String): String = s"""$name""" + + /** + * Construct an html a tag from the client application + * name and url for the display field because some + * clients depend on this. + */ + def deviceSourceDisplay( + name: String, + urlOpt: Option[String] + ): String = + urlOpt match { + case Some(url) => formatUrl(name = name, url = url) // data sanitized by passbird + case None => + formatUrl(name = name, url = DefaultUrl) // data sanitized by passbird + } + + def toDeviceSource(app: ClientApplication): DeviceSource = + DeviceSource( + // The id field used to represent the id of a row + // in the now deprecated device_sources mysql table. + id = 0L, + parameter = "oauth:" + app.id, + internalName = "oauth:" + app.id, + name = app.name, + url = app.url.getOrElse(""), + display = deviceSourceDisplay(app.name, app.url), + clientAppId = Some(app.id) + ) + + def apply( + parseAppId: String => Option[Long], + getClientApplications: GetClientApplications + ): DeviceSourceRepository.Type = { + val getClientApplicationsGroup = new MapGroup[Long, DeviceSource] { + def run(ids: Seq[Long]): Future[Long => Try[DeviceSource]] = + getClientApplications(ids).map { response => id => + response.found.get(id) match { + case Some(app) => Return(toDeviceSource(app)) + case None => Throw(NotFound) + } + } + } + + appIdStr => + parseAppId(appIdStr) match { + case Some(appId) => + Stitch.call(appId, getClientApplicationsGroup) + case None => + Stitch.exception(NotFound) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/EscherbirdAnnotationRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/EscherbirdAnnotationRepository.scala new file mode 100644 index 000000000..57857c386 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/EscherbirdAnnotationRepository.scala @@ -0,0 +1,23 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup +import com.twitter.tweetypie.backends.Escherbird +import com.twitter.tweetypie.thriftscala.EscherbirdEntityAnnotations + +object EscherbirdAnnotationRepository { + type Type = Tweet => Stitch[Option[EscherbirdEntityAnnotations]] + + def apply(annotate: Escherbird.Annotate): Type = + // use a `SeqGroup` to group the future-calls together, even though they can be + // executed independently, in order to help keep hydration between different tweets + // in sync, to improve batching in hydrators which occur later in the pipeline. + tweet => + Stitch + .call(tweet, LegacySeqGroup(annotate.liftSeq)) + .map { annotations => + if (annotations.isEmpty) None + else Some(EscherbirdEntityAnnotations(annotations)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/GeoScrubTimestampRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/GeoScrubTimestampRepository.scala new file mode 100644 index 000000000..476790b60 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/GeoScrubTimestampRepository.scala @@ -0,0 +1,16 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.servo.cache.ScopedCacheKey +import com.twitter.stitch.Stitch +import com.twitter.util.Base64Long + +case class GeoScrubTimestampKey(userId: UserId) + extends ScopedCacheKey("t", "gs", 1, Base64Long.toBase64(userId)) + +object GeoScrubTimestampRepository { + type Type = UserId => Stitch[Time] + + def apply(getLastGeoScrubTime: UserId => Stitch[Option[Time]]): Type = + userId => getLastGeoScrubTime(userId).lowerFromOption() +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/GeoduckPlaceRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/GeoduckPlaceRepository.scala new file mode 100644 index 000000000..483f3f73f --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/GeoduckPlaceRepository.scala @@ -0,0 +1,132 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.geoduck.common.{thriftscala => Geoduck} +import com.twitter.geoduck.service.thriftscala.GeoContext +import com.twitter.geoduck.service.thriftscala.Key +import com.twitter.geoduck.service.thriftscala.LocationResponse +import com.twitter.geoduck.util.service.GeoduckLocate +import com.twitter.geoduck.util.service.LocationResponseExtractors +import com.twitter.geoduck.util.{primitives => GDPrimitive} +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup +import com.twitter.tweetypie.{thriftscala => TP} + +object GeoduckPlaceConverter { + + def LocationResponseToTPPlace(lang: String, lr: LocationResponse): Option[TP.Place] = + GDPrimitive.Place + .fromLocationResponse(lr) + .headOption + .map(apply(lang, _)) + + def convertPlaceType(pt: Geoduck.PlaceType): TP.PlaceType = pt match { + case Geoduck.PlaceType.Unknown => TP.PlaceType.Unknown + case Geoduck.PlaceType.Country => TP.PlaceType.Country + case Geoduck.PlaceType.Admin => TP.PlaceType.Admin + case Geoduck.PlaceType.City => TP.PlaceType.City + case Geoduck.PlaceType.Neighborhood => TP.PlaceType.Neighborhood + case Geoduck.PlaceType.Poi => TP.PlaceType.Poi + case Geoduck.PlaceType.ZipCode => TP.PlaceType.Admin + case Geoduck.PlaceType.Metro => TP.PlaceType.Admin + case Geoduck.PlaceType.Admin0 => TP.PlaceType.Admin + case Geoduck.PlaceType.Admin1 => TP.PlaceType.Admin + case _ => + throw new IllegalStateException(s"Invalid place type: $pt") + } + + def convertPlaceName(gd: Geoduck.PlaceName): TP.PlaceName = + TP.PlaceName( + name = gd.name, + language = gd.language.getOrElse("en"), + `type` = convertPlaceNameType(gd.nameType), + preferred = gd.preferred + ) + + def convertPlaceNameType(pt: Geoduck.PlaceNameType): TP.PlaceNameType = pt match { + case Geoduck.PlaceNameType.Normal => TP.PlaceNameType.Normal + case Geoduck.PlaceNameType.Abbreviation => TP.PlaceNameType.Abbreviation + case Geoduck.PlaceNameType.Synonym => TP.PlaceNameType.Synonym + case _ => + throw new IllegalStateException(s"Invalid place name type: $pt") + } + + def convertAttributes(attrs: collection.Set[Geoduck.PlaceAttribute]): Map[String, String] = + attrs.map(attr => attr.key -> attr.value.getOrElse("")).toMap + + def convertBoundingBox(geom: GDPrimitive.Geometry): Seq[TP.GeoCoordinates] = + geom.coordinates.map { coord => + TP.GeoCoordinates( + latitude = coord.lat, + longitude = coord.lon + ) + } + + def apply(queryLang: String, geoplace: GDPrimitive.Place): TP.Place = { + val bestname = geoplace.bestName(queryLang).getOrElse(geoplace.hexId) + TP.Place( + id = geoplace.hexId, + `type` = convertPlaceType(geoplace.placeType), + name = bestname, + fullName = geoplace.fullName(queryLang).getOrElse(bestname), + attributes = convertAttributes(geoplace.attributes), + boundingBox = geoplace.boundingBox.map(convertBoundingBox), + countryCode = geoplace.countryCode, + containers = Some(geoplace.cone.map(_.hexId).toSet + geoplace.hexId), + countryName = geoplace.countryName(queryLang) + ) + } + + def convertGDKey(key: Key, lang: String): PlaceKey = { + val Key.PlaceId(pid) = key + PlaceKey("%016x".format(pid), lang) + } +} + +object GeoduckPlaceRepository { + val context: GeoContext = + GeoContext( + placeFields = Set( + Geoduck.PlaceQueryFields.Attributes, + Geoduck.PlaceQueryFields.BoundingBox, + Geoduck.PlaceQueryFields.PlaceNames, + Geoduck.PlaceQueryFields.Cone + ), + placeTypes = Set( + Geoduck.PlaceType.Country, + Geoduck.PlaceType.Admin0, + Geoduck.PlaceType.Admin1, + Geoduck.PlaceType.City, + Geoduck.PlaceType.Neighborhood + ), + includeCountryCode = true, + hydrateCone = true + ) + + def apply(geoduck: GeoduckLocate): PlaceRepository.Type = { + val geoduckGroup = LegacySeqGroup((ids: Seq[Key.PlaceId]) => geoduck(context, ids)) + + placeKey => + val placeId = + try { + Stitch.value( + Key.PlaceId(java.lang.Long.parseUnsignedLong(placeKey.placeId, 16)) + ) + } catch { + case _: NumberFormatException => Stitch.exception(NotFound) + } + + placeId + .flatMap(id => Stitch.call(id, geoduckGroup)) + .rescue { case LocationResponseExtractors.Failure(ex) => Stitch.exception(ex) } + .map { resp => + GDPrimitive.Place + .fromLocationResponse(resp) + .headOption + .map(GeoduckPlaceConverter(placeKey.language, _)) + } + .lowerFromOption() + } + +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/LastQuoteOfQuoterRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/LastQuoteOfQuoterRepository.scala new file mode 100644 index 000000000..9c853b85c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/LastQuoteOfQuoterRepository.scala @@ -0,0 +1,24 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.flockdb.client.QuoteTweetsIndexGraph +import com.twitter.flockdb.client.TFlockClient +import com.twitter.flockdb.client.UserTimelineGraph +import com.twitter.stitch.Stitch + +object LastQuoteOfQuoterRepository { + type Type = (TweetId, UserId) => Stitch[Boolean] + + def apply( + tflockReadClient: TFlockClient + ): Type = + (tweetId, userId) => { + // Select the tweets authored by userId quoting tweetId. + // By intersecting the tweet quotes with this user's tweets. + val quotesFromQuotingUser = QuoteTweetsIndexGraph + .from(tweetId) + .intersect(UserTimelineGraph.from(userId)) + + Stitch.callFuture(tflockReadClient.selectAll(quotesFromQuotingUser).map(_.size <= 1)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ManhattanTweetRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ManhattanTweetRepository.scala new file mode 100644 index 000000000..dd87a2e99 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ManhattanTweetRepository.scala @@ -0,0 +1,147 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.storage.TweetStorageClient.GetStoredTweet +import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet +import com.twitter.tweetypie.storage._ +import scala.util.control.NoStackTrace + +case class StorageGetTweetFailure(tweetId: TweetId, underlying: Throwable) + extends Exception(s"tweetId=$tweetId", underlying) + with NoStackTrace + +object ManhattanTweetRepository { + private[this] val logger = Logger(getClass) + + def apply( + getTweet: TweetStorageClient.GetTweet, + getStoredTweet: TweetStorageClient.GetStoredTweet, + shortCircuitLikelyPartialTweetReads: Gate[Duration], + statsReceiver: StatsReceiver, + clientIdHelper: ClientIdHelper, + ): TweetResultRepository.Type = { + def likelyAvailable(tweetId: TweetId): Boolean = + if (SnowflakeId.isSnowflakeId(tweetId)) { + val tweetAge: Duration = Time.now.since(SnowflakeId(tweetId).time) + !shortCircuitLikelyPartialTweetReads(tweetAge) + } else { + true // Not a snowflake id, so should definitely be available + } + + val likelyPartialTweetReadsCounter = statsReceiver.counter("likely_partial_tweet_reads") + + (tweetId, options) => + if (!likelyAvailable(tweetId)) { + likelyPartialTweetReadsCounter.incr() + val currentClient = + clientIdHelper.effectiveClientId.getOrElse(ClientIdHelper.UnknownClientId) + logger.debug(s"likely_partial_tweet_read $tweetId $currentClient") + Stitch.exception(NotFound) + } else if (options.fetchStoredTweets) { + getStoredTweet(tweetId).liftToTry.flatMap(handleGetStoredTweetResponse(tweetId, _)) + } else { + getTweet(tweetId).liftToTry.flatMap(handleGetTweetResponse(tweetId, _)) + } + } + + private def handleGetTweetResponse( + tweetId: tweetypie.TweetId, + response: Try[GetTweet.Response] + ): Stitch[TweetResult] = { + response match { + case Return(GetTweet.Response.Found(tweet)) => + Stitch.value(TweetResult(TweetData(tweet = tweet), HydrationState.modified)) + case Return(GetTweet.Response.NotFound) => + Stitch.exception(NotFound) + case Return(GetTweet.Response.Deleted) => + Stitch.exception(FilteredState.Unavailable.TweetDeleted) + case Return(_: GetTweet.Response.BounceDeleted) => + Stitch.exception(FilteredState.Unavailable.BounceDeleted) + case Throw(_: storage.RateLimited) => + Stitch.exception(OverCapacity(s"Storage overcapacity, tweetId=$tweetId")) + case Throw(e) => + Stitch.exception(StorageGetTweetFailure(tweetId, e)) + } + } + + private def handleGetStoredTweetResponse( + tweetId: tweetypie.TweetId, + response: Try[GetStoredTweet.Response] + ): Stitch[TweetResult] = { + def translateErrors( + getStoredTweetErrs: Seq[GetStoredTweet.Error] + ): Seq[StoredTweetResult.Error] = { + getStoredTweetErrs.map { + case GetStoredTweet.Error.TweetIsCorrupt => StoredTweetResult.Error.Corrupt + case GetStoredTweet.Error.ScrubbedFieldsPresent => + StoredTweetResult.Error.ScrubbedFieldsPresent + case GetStoredTweet.Error.TweetFieldsMissingOrInvalid => + StoredTweetResult.Error.FieldsMissingOrInvalid + case GetStoredTweet.Error.TweetShouldBeHardDeleted => + StoredTweetResult.Error.ShouldBeHardDeleted + } + } + + def toTweetResult( + tweet: Tweet, + state: Option[TweetStateRecord], + errors: Seq[GetStoredTweet.Error] + ): TweetResult = { + val translatedErrors = translateErrors(errors) + val canHydrate: Boolean = + !translatedErrors.contains(StoredTweetResult.Error.Corrupt) && + !translatedErrors.contains(StoredTweetResult.Error.FieldsMissingOrInvalid) + + val storedTweetResult = state match { + case None => StoredTweetResult.Present(translatedErrors, canHydrate) + case Some(TweetStateRecord.HardDeleted(_, softDeletedAtMsec, hardDeletedAtMsec)) => + StoredTweetResult.HardDeleted(softDeletedAtMsec, hardDeletedAtMsec) + case Some(TweetStateRecord.SoftDeleted(_, softDeletedAtMsec)) => + StoredTweetResult.SoftDeleted(softDeletedAtMsec, translatedErrors, canHydrate) + case Some(TweetStateRecord.BounceDeleted(_, deletedAtMsec)) => + StoredTweetResult.BounceDeleted(deletedAtMsec, translatedErrors, canHydrate) + case Some(TweetStateRecord.Undeleted(_, undeletedAtMsec)) => + StoredTweetResult.Undeleted(undeletedAtMsec, translatedErrors, canHydrate) + case Some(TweetStateRecord.ForceAdded(_, addedAtMsec)) => + StoredTweetResult.ForceAdded(addedAtMsec, translatedErrors, canHydrate) + } + + TweetResult( + TweetData(tweet = tweet, storedTweetResult = Some(storedTweetResult)), + HydrationState.modified) + } + + val tweetResult = response match { + case Return(GetStoredTweet.Response.FoundAny(tweet, state, _, _, errors)) => + toTweetResult(tweet, state, errors) + case Return(GetStoredTweet.Response.Failed(tweetId, _, _, _, errors)) => + val tweetData = TweetData( + tweet = Tweet(tweetId), + storedTweetResult = Some(StoredTweetResult.Failed(translateErrors(errors)))) + TweetResult(tweetData, HydrationState.modified) + case Return(GetStoredTweet.Response.HardDeleted(tweetId, state, _, _)) => + toTweetResult(Tweet(tweetId), state, Seq()) + case Return(GetStoredTweet.Response.NotFound(tweetId)) => { + val tweetData = TweetData( + tweet = Tweet(tweetId), + storedTweetResult = Some(StoredTweetResult.NotFound) + ) + TweetResult(tweetData, HydrationState.modified) + } + case _ => { + val tweetData = TweetData( + tweet = Tweet(tweetId), + storedTweetResult = Some(StoredTweetResult.Failed(Seq(StoredTweetResult.Error.Corrupt)))) + TweetResult(tweetData, HydrationState.modified) + } + } + + Stitch.value(tweetResult) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/MediaMetadataRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/MediaMetadataRepository.scala new file mode 100644 index 000000000..f9aa5a832 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/MediaMetadataRepository.scala @@ -0,0 +1,22 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.media.MediaMetadata +import com.twitter.tweetypie.media.MediaMetadataRequest + +object MediaMetadataRepository { + type Type = MediaMetadataRequest => Stitch[MediaMetadata] + + def apply(getMediaMetadata: FutureArrow[MediaMetadataRequest, MediaMetadata]): Type = { + // use an `SeqGroup` to group the future-calls together, even though they can be + // executed independently, in order to help keep hydration between different tweets + // in sync, to improve batching in hydrators which occur later in the pipeline. + val requestGroup = SeqGroup[MediaMetadataRequest, MediaMetadata] { + requests: Seq[MediaMetadataRequest] => + Future.collect(requests.map(r => getMediaMetadata(r).liftToTry)) + } + mediaReq => Stitch.call(mediaReq, requestGroup) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ParentUserIdRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ParentUserIdRepository.scala new file mode 100644 index 000000000..8c7092a53 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ParentUserIdRepository.scala @@ -0,0 +1,33 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.FilteredState.Unavailable.BounceDeleted +import com.twitter.tweetypie.core.FilteredState.Unavailable.SourceTweetNotFound +import com.twitter.tweetypie.core.FilteredState.Unavailable.TweetDeleted + +object ParentUserIdRepository { + type Type = Tweet => Stitch[Option[UserId]] + + case class ParentTweetNotFound(tweetId: TweetId) extends Exception + + def apply(tweetRepo: TweetRepository.Type): Type = { + val options = TweetQuery.Options(TweetQuery.Include(Set(Tweet.CoreDataField.id))) + + tweet => + getShare(tweet) match { + case Some(share) if share.sourceStatusId == share.parentStatusId => + Stitch.value(Some(share.sourceUserId)) + case Some(share) => + tweetRepo(share.parentStatusId, options) + .map(tweet => Some(getUserId(tweet))) + .rescue { + case NotFound | TweetDeleted | BounceDeleted | SourceTweetNotFound(_) => + Stitch.exception(ParentTweetNotFound(share.parentStatusId)) + } + case None => + Stitch.None + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PastedMediaRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PastedMediaRepository.scala new file mode 100644 index 000000000..dd21e4ec1 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PastedMediaRepository.scala @@ -0,0 +1,129 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.media.Media +import com.twitter.tweetypie.media.MediaUrl +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.util.MediaId +import java.nio.ByteBuffer + +case class PastedMedia(mediaEntities: Seq[MediaEntity], mediaTags: Map[MediaId, Seq[MediaTag]]) { + + /** + * Updates the copied media entities to have the same indices as the given UrlEntity. + */ + def updateEntities(urlEntity: UrlEntity): PastedMedia = + if (mediaEntities.isEmpty) this + else copy(mediaEntities = mediaEntities.map(Media.copyFromUrlEntity(_, urlEntity))) + + def merge(that: PastedMedia): PastedMedia = + PastedMedia( + mediaEntities = this.mediaEntities ++ that.mediaEntities, + mediaTags = this.mediaTags ++ that.mediaTags + ) + + /** + * Return a new PastedMedia that contains only the first maxMediaEntities media entities + */ + def take(maxMediaEntities: Int): PastedMedia = { + val entities = this.mediaEntities.take(maxMediaEntities) + val mediaIds = entities.map(_.mediaId) + val pastedTags = mediaTags.filterKeys { id => mediaIds.contains(id) } + + PastedMedia( + mediaEntities = entities, + mediaTags = pastedTags + ) + } + + def mergeTweetMediaTags(ownedTags: Option[TweetMediaTags]): Option[TweetMediaTags] = { + val merged = ownedTags.map(_.tagMap).getOrElse(Map.empty) ++ mediaTags + if (merged.nonEmpty) { + Some(TweetMediaTags(merged)) + } else { + None + } + } +} + +object PastedMedia { + import MediaUrl.Permalink.hasTweetId + + val empty: PastedMedia = PastedMedia(Nil, Map.empty) + + /** + * @param tweet: the tweet whose media URL was pasted. + * + * @return the media that should be copied to a tweet that has a + * link to the media in this tweet, along with its protection + * status. The returned media entities will have sourceStatusId + * and sourceUserId set appropriately for inclusion in a different + * tweet. + */ + def getMediaEntities(tweet: Tweet): Seq[MediaEntity] = + getMedia(tweet).collect { + case mediaEntity if hasTweetId(mediaEntity, tweet.id) => + setSource(mediaEntity, tweet.id, getUserId(tweet)) + } + + def setSource(mediaEntity: MediaEntity, tweetId: TweetId, userId: TweetId): MediaEntity = + mediaEntity.copy( + sourceStatusId = Some(tweetId), + sourceUserId = Some(mediaEntity.sourceUserId.getOrElse(userId)) + ) +} + +object PastedMediaRepository { + type Type = (TweetId, Ctx) => Stitch[PastedMedia] + + case class Ctx( + includeMediaEntities: Boolean, + includeAdditionalMetadata: Boolean, + includeMediaTags: Boolean, + extensionsArgs: Option[ByteBuffer], + safetyLevel: SafetyLevel) { + def asTweetQueryOptions: TweetQuery.Options = + TweetQuery.Options( + enforceVisibilityFiltering = true, + extensionsArgs = extensionsArgs, + safetyLevel = safetyLevel, + include = TweetQuery.Include( + tweetFields = + Set(Tweet.CoreDataField.id) ++ + (if (includeMediaEntities) Set(Tweet.MediaField.id) else Set.empty) ++ + (if (includeMediaTags) Set(Tweet.MediaTagsField.id) else Set.empty), + mediaFields = if (includeMediaEntities && includeAdditionalMetadata) { + Set(MediaEntity.AdditionalMetadataField.id) + } else { + Set.empty + }, + // don't recursively load pasted media + pastedMedia = false + ) + ) + } + + /** + * A Repository of PastedMedia fetched from other tweets. We query the tweet with + * default global visibility filtering enabled, so we won't see entities for users that + * are protected, deactivated, suspended, etc. + */ + def apply(tweetRepo: TweetRepository.Type): Type = + (tweetId, ctx) => + tweetRepo(tweetId, ctx.asTweetQueryOptions) + .flatMap { t => + val entities = PastedMedia.getMediaEntities(t) + if (entities.nonEmpty) { + Stitch.value(PastedMedia(entities, getMediaTagMap(t))) + } else { + Stitch.NotFound + } + } + .rescue { + // drop filtered tweets + case _: FilteredState => Stitch.NotFound + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PenguinLanguageRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PenguinLanguageRepository.scala new file mode 100644 index 000000000..26525ab6c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PenguinLanguageRepository.scala @@ -0,0 +1,53 @@ +package com.twitter.tweetypie +package repository + +import com.ibm.icu.util.ULocale +import com.twitter.common.text.pipeline.TwitterLanguageIdentifier +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup +import com.twitter.tweetypie.repository.LanguageRepository.Text +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.FuturePool +import com.twitter.util.logging.Logger + +object LanguageRepository { + type Type = Text => Stitch[Option[Language]] + type Text = String +} + +object PenguinLanguageRepository { + private val identifier = new TwitterLanguageIdentifier.Builder().buildForTweet() + private val log = Logger(getClass) + + def isRightToLeft(lang: String): Boolean = + new ULocale(lang).getCharacterOrientation == "right-to-left" + + def apply(futurePool: FuturePool): LanguageRepository.Type = { + val identifyOne = + FutureArrow[Text, Option[Language]] { text => + futurePool { + try { + Some(identifier.identify(text)) + } catch { + case e: IllegalArgumentException => + val userId = TwitterContext().map(_.userId) + val encodedText = com.twitter.util.Base64StringEncoder.encode(text.getBytes) + log.info(s"${e.getMessage} : USER ID - $userId : TEXT - $encodedText") + None + } + }.map { + case Some(langWithScore) => + val lang = langWithScore.getLocale.getLanguage + Some( + Language( + language = lang, + rightToLeft = isRightToLeft(lang), + confidence = langWithScore.getScore + )) + case None => None + } + } + + text => Stitch.call(text, LegacySeqGroup(identifyOne.liftSeq)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PerspectiveRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PerspectiveRepository.scala new file mode 100644 index 000000000..ac609097a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PerspectiveRepository.scala @@ -0,0 +1,15 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.stitch.Stitch +import com.twitter.stitch.timelineservice.TimelineService.GetPerspectives +import com.twitter.timelineservice.thriftscala.TimelineEntryPerspective + +object PerspectiveRepository { + + /** + * Same type as com.twitter.stitch.timelineservice.TimelineService.GetPerspectives but without + * using Arrow. + */ + type Type = GetPerspectives.Query => Stitch[TimelineEntryPerspective] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PlaceRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PlaceRepository.scala new file mode 100644 index 000000000..8219eb350 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PlaceRepository.scala @@ -0,0 +1,13 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.servo.cache.ScopedCacheKey +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.thriftscala.Place + +case class PlaceKey(placeId: PlaceId, language: String) + extends ScopedCacheKey("t", "geo", 1, placeId, language) + +object PlaceRepository { + type Type = PlaceKey => Stitch[Place] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ProfileGeoRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ProfileGeoRepository.scala new file mode 100644 index 000000000..6968c71c1 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ProfileGeoRepository.scala @@ -0,0 +1,66 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.dataproducts.enrichments.thriftscala._ +import com.twitter.gizmoduck.thriftscala.UserResponseState._ +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup +import com.twitter.tweetypie.backends.GnipEnricherator +import com.twitter.tweetypie.thriftscala.GeoCoordinates + +case class ProfileGeoKey(tweetId: TweetId, userId: Option[UserId], coords: Option[GeoCoordinates]) { + def key: TweetData = + TweetData( + tweetId = tweetId, + userId = userId, + coordinates = coords.map(ProfileGeoRepository.convertGeo) + ) +} + +object ProfileGeoRepository { + type Type = ProfileGeoKey => Stitch[ProfileGeoEnrichment] + + case class UnexpectedState(state: EnrichmentHydrationState) extends Exception(state.name) + + def convertGeo(coords: GeoCoordinates): TweetyPieGeoCoordinates = + TweetyPieGeoCoordinates( + latitude = coords.latitude, + longitude = coords.longitude, + geoPrecision = coords.geoPrecision, + display = coords.display + ) + + def apply(hydrateProfileGeo: GnipEnricherator.HydrateProfileGeo): Type = { + import EnrichmentHydrationState._ + + val emptyEnrichmentStitch = Stitch.value(ProfileGeoEnrichment()) + + val profileGeoGroup = SeqGroup[TweetData, ProfileGeoResponse] { keys: Seq[TweetData] => + // Gnip ignores writePath and treats all requests as reads + LegacySeqGroup.liftToSeqTry( + hydrateProfileGeo(ProfileGeoRequest(requests = keys, writePath = false)) + ) + } + + (geoKey: ProfileGeoKey) => + Stitch + .call(geoKey.key, profileGeoGroup) + .flatMap { + case ProfileGeoResponse(_, Success, Some(enrichment), _) => + Stitch.value(enrichment) + case ProfileGeoResponse(_, Success, None, _) => + // when state is Success enrichment should always be Some, but default to be safe + emptyEnrichmentStitch + case ProfileGeoResponse( + _, + UserLookupError, + _, + Some(DeactivatedUser | SuspendedUser | NotFound) + ) => + emptyEnrichmentStitch + case r => + Stitch.exception(UnexpectedState(r.state)) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/QuotedTweetVisibilityRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/QuotedTweetVisibilityRepository.scala new file mode 100644 index 000000000..ed8116476 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/QuotedTweetVisibilityRepository.scala @@ -0,0 +1,48 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.spam.rtf.thriftscala.{SafetyLevel => ThriftSafetyLevel} +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository.VisibilityResultToFilteredState.toFilteredState +import com.twitter.visibility.configapi.configs.VisibilityDeciderGates +import com.twitter.visibility.interfaces.tweets.QuotedTweetVisibilityLibrary +import com.twitter.visibility.interfaces.tweets.QuotedTweetVisibilityRequest +import com.twitter.visibility.interfaces.tweets.TweetAndAuthor +import com.twitter.visibility.models.SafetyLevel +import com.twitter.visibility.models.ViewerContext + +/** + * This repository handles visibility filtering of inner quoted tweets + * based on relationships between the inner and outer tweets. This is + * additive to independent visibility filtering of the inner tweet. + */ +object QuotedTweetVisibilityRepository { + type Type = Request => Stitch[Option[FilteredState]] + + case class Request( + outerTweetId: TweetId, + outerAuthorId: UserId, + innerTweetId: TweetId, + innerAuthorId: UserId, + viewerId: Option[UserId], + safetyLevel: ThriftSafetyLevel) + + def apply( + quotedTweetVisibilityLibrary: QuotedTweetVisibilityLibrary.Type, + visibilityDeciderGates: VisibilityDeciderGates, + ): QuotedTweetVisibilityRepository.Type = { request: Request => + quotedTweetVisibilityLibrary( + QuotedTweetVisibilityRequest( + quotedTweet = TweetAndAuthor(request.innerTweetId, request.innerAuthorId), + outerTweet = TweetAndAuthor(request.outerTweetId, request.outerAuthorId), + ViewerContext.fromContextWithViewerIdFallback(request.viewerId), + safetyLevel = SafetyLevel.fromThrift(request.safetyLevel) + ) + ).map(visibilityResult => + toFilteredState( + visibilityResult = visibilityResult, + disableLegacyInterstitialFilteredReason = + visibilityDeciderGates.disableLegacyInterstitialFilteredReason())) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/QuoterHasAlreadyQuotedRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/QuoterHasAlreadyQuotedRepository.scala new file mode 100644 index 000000000..7de373848 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/QuoterHasAlreadyQuotedRepository.scala @@ -0,0 +1,15 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.flockdb.client.QuotersGraph +import com.twitter.flockdb.client.TFlockClient +import com.twitter.stitch.Stitch + +object QuoterHasAlreadyQuotedRepository { + type Type = (TweetId, UserId) => Stitch[Boolean] + + def apply( + tflockReadClient: TFlockClient + ): Type = + (tweetId, userId) => Stitch.callFuture(tflockReadClient.contains(QuotersGraph, tweetId, userId)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/RelationshipRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/RelationshipRepository.scala new file mode 100644 index 000000000..9b6304b4a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/RelationshipRepository.scala @@ -0,0 +1,53 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.servo.util.FutureArrow +import com.twitter.socialgraph.thriftscala._ +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup + +object RelationshipKey { + def blocks(sourceId: UserId, destinationId: UserId): RelationshipKey = + RelationshipKey(sourceId, destinationId, RelationshipType.Blocking) + + def follows(sourceId: UserId, destinationId: UserId): RelationshipKey = + RelationshipKey(sourceId, destinationId, RelationshipType.Following) + + def mutes(sourceId: UserId, destinationId: UserId): RelationshipKey = + RelationshipKey(sourceId, destinationId, RelationshipType.Muting) + + def reported(sourceId: UserId, destinationId: UserId): RelationshipKey = + RelationshipKey(sourceId, destinationId, RelationshipType.ReportedAsSpam) +} + +case class RelationshipKey( + sourceId: UserId, + destinationId: UserId, + relationship: RelationshipType) { + def asExistsRequest: ExistsRequest = + ExistsRequest( + source = sourceId, + target = destinationId, + relationships = Seq(Relationship(relationship)) + ) +} + +object RelationshipRepository { + type Type = RelationshipKey => Stitch[Boolean] + + def apply( + exists: FutureArrow[(Seq[ExistsRequest], Option[RequestContext]), Seq[ExistsResult]], + maxRequestSize: Int + ): Type = { + val relationshipGroup: SeqGroup[RelationshipKey, Boolean] = + new SeqGroup[RelationshipKey, Boolean] { + override def run(keys: Seq[RelationshipKey]): Future[Seq[Try[Boolean]]] = + LegacySeqGroup.liftToSeqTry( + exists((keys.map(_.asExistsRequest), None)).map(_.map(_.exists))) + override val maxSize: Int = maxRequestSize + } + + relationshipKey => Stitch.call(relationshipKey, relationshipGroup) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/RetweetSpamCheckRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/RetweetSpamCheckRepository.scala new file mode 100644 index 000000000..610f3f3c4 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/RetweetSpamCheckRepository.scala @@ -0,0 +1,13 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.service.gen.scarecrow.{thriftscala => scarecrow} +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.backends.Scarecrow + +object RetweetSpamCheckRepository { + type Type = scarecrow.Retweet => Stitch[scarecrow.TieredAction] + + def apply(checkRetweet: Scarecrow.CheckRetweet): Type = + retweet => Stitch.callFuture(checkRetweet(retweet)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StitchLockingCache.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StitchLockingCache.scala new file mode 100644 index 000000000..7808d465f --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StitchLockingCache.scala @@ -0,0 +1,161 @@ +package com.twitter.tweetypie.repository + +import com.twitter.servo.cache.{CachedValueStatus => Status, LockingCache => KVLockingCache, _} +import com.twitter.servo.repository.{CachedResult => Result} +import com.twitter.stitch.MapGroup +import com.twitter.stitch.Group +import com.twitter.stitch.Stitch +import com.twitter.util.Future +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.util.Time +import com.twitter.util.Try + +/** + * Adapts a key-value locking cache to Arrow and + * normalizes the results to `CachedResult`. + */ +trait StitchLockingCache[K, V] { + val get: K => Stitch[Result[K, V]] + val lockAndSet: (K, StitchLockingCache.Val[V]) => Stitch[Unit] + val delete: K => Stitch[Boolean] +} + +object StitchLockingCache { + + /** + * Value intended to be written back to cache using lockAndSet. + * + * Note that only a subset of CachedValueStatus values are eligible for writing: + * Found, NotFound, and Deleted + */ + sealed trait Val[+V] + object Val { + case class Found[V](value: V) extends Val[V] + case object NotFound extends Val[Nothing] + case object Deleted extends Val[Nothing] + } + + /** + * A Group for batching get requests to a [[KVLockingCache]]. + */ + private case class GetGroup[K, V](cache: KVLockingCache[K, Cached[V]], override val maxSize: Int) + extends MapGroup[K, Result[K, V]] { + + private[this] def cachedToResult(key: K, cached: Cached[V]): Try[Result[K, V]] = + cached.status match { + case Status.NotFound => Return(Result.CachedNotFound(key, cached.cachedAt)) + case Status.Deleted => Return(Result.CachedDeleted(key, cached.cachedAt)) + case Status.SerializationFailed => Return(Result.SerializationFailed(key)) + case Status.DeserializationFailed => Return(Result.DeserializationFailed(key)) + case Status.Evicted => Return(Result.NotFound(key)) + case Status.DoNotCache => Return(Result.DoNotCache(key, cached.doNotCacheUntil)) + case Status.Found => + cached.value match { + case None => Return(Result.NotFound(key)) + case Some(value) => Return(Result.CachedFound(key, value, cached.cachedAt)) + } + case _ => Throw(new UnsupportedOperationException) + } + + override protected def run(keys: Seq[K]): Future[K => Try[Result[K, V]]] = + cache.get(keys).map { (result: KeyValueResult[K, Cached[V]]) => key => + result.found.get(key) match { + case Some(cached) => cachedToResult(key, cached) + case None => + result.failed.get(key) match { + case Some(t) => Return(Result.Failed(key, t)) + case None => Return(Result.NotFound(key)) + } + } + } + } + + /** + * Used in the implementation of LockAndSetGroup. This is just a + * glorified tuple with special equality semantics where calls with + * the same key will compare equal. MapGroup will use this as a key + * in a Map, which will prevent duplicate lockAndSet calls with the + * same key. We don't care which one we use + */ + private class LockAndSetCall[K, V](val key: K, val value: V) { + override def equals(other: Any): Boolean = + other match { + case call: LockAndSetCall[_, _] => call.key == key + case _ => false + } + + override def hashCode(): Int = key.hashCode + } + + /** + * A Group for `lockAndSet` calls to a [[KVLockingCache]]. This is + * necessary to avoid writing back a key multiple times if it is + * appears more than once in a batch. LockAndSetCall considers two + * calls equal even if the values differ because multiple lockAndSet + * calls for the same key will eventually result in only one being + * chosen by the cache anyway, and this avoids conflicting + * lockAndSet calls. + * + * For example, consider a tweet that mentions @jack twice + * when @jack is not in cache. That will result in two queries to + * load @jack, which will be deduped by the Group when the repo is + * called. Despite the fact that it is loaded only once, each of the + * two loads is oblivious to the other, so each of them attempts to + * write the value back to cache, resulting in two `lockAndSet` + * calls for @jack, so we have to dedupe them again. + */ + private case class LockAndSetGroup[K, V]( + cache: KVLockingCache[K, V], + picker: KVLockingCache.Picker[V]) + extends MapGroup[LockAndSetCall[K, V], Option[V]] { + + override def run( + calls: Seq[LockAndSetCall[K, V]] + ): Future[LockAndSetCall[K, V] => Try[Option[V]]] = + Future + .collect { + calls.map { call => + // This is masked to prevent interrupts to the overall + // request from interrupting writes back to cache. + cache + .lockAndSet(call.key, KVLockingCache.PickingHandler(call.value, picker)) + .masked + .liftToTry + } + } + .map(responses => calls.zip(responses).toMap) + } + + def apply[K, V]( + underlying: KVLockingCache[K, Cached[V]], + picker: KVLockingCache.Picker[Cached[V]], + maxRequestSize: Int = Int.MaxValue + ): StitchLockingCache[K, V] = + new StitchLockingCache[K, V] { + override val get: K => Stitch[Result[K, V]] = { + val group: Group[K, Result[K, V]] = GetGroup(underlying, maxRequestSize) + + (key: K) => Stitch.call(key, group) + } + + override val lockAndSet: (K, Val[V]) => Stitch[Unit] = { + val group = LockAndSetGroup(underlying, picker) + + (key: K, value: Val[V]) => { + val now = Time.now + val cached: Cached[V] = + value match { + case Val.Found(v) => Cached[V](Some(v), Status.Found, now, Some(now)) + case Val.NotFound => Cached[V](None, Status.NotFound, now, Some(now)) + case Val.Deleted => Cached[V](None, Status.Deleted, now, Some(now)) + } + + Stitch.call(new LockAndSetCall(key, cached), group).unit + } + } + + override val delete: K => Stitch[Boolean] = + (key: K) => Stitch.callFuture(underlying.delete(key)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoCommunityAccessRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoCommunityAccessRepository.scala new file mode 100644 index 000000000..2658446a3 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoCommunityAccessRepository.scala @@ -0,0 +1,26 @@ +package com.twitter.tweetypie.repository + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.CommunityId +import com.twitter.strato.client.Fetcher +import com.twitter.strato.client.{Client => StratoClient} + +object StratoCommunityAccessRepository { + type Type = CommunityId => Stitch[Option[CommunityAccess]] + + sealed trait CommunityAccess + object CommunityAccess { + case object Public extends CommunityAccess + case object Closed extends CommunityAccess + case object Private extends CommunityAccess + } + + val column = "communities/access.Community" + + def apply(client: StratoClient): Type = { + val fetcher: Fetcher[CommunityId, Unit, CommunityAccess] = + client.fetcher[CommunityId, CommunityAccess](column) + + communityId => fetcher.fetch(communityId).map(_.v) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoCommunityMembershipRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoCommunityMembershipRepository.scala new file mode 100644 index 000000000..cfeb070c1 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoCommunityMembershipRepository.scala @@ -0,0 +1,19 @@ +package com.twitter.tweetypie.repository + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.CommunityId +import com.twitter.strato.client.Fetcher +import com.twitter.strato.client.{Client => StratoClient} + +object StratoCommunityMembershipRepository { + type Type = CommunityId => Stitch[Boolean] + + val column = "communities/isMember.Community" + + def apply(client: StratoClient): Type = { + val fetcher: Fetcher[CommunityId, Unit, Boolean] = + client.fetcher[CommunityId, Boolean](column) + + communityId => fetcher.fetch(communityId).map(_.v.getOrElse(false)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoPromotedTweetRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoPromotedTweetRepository.scala new file mode 100644 index 000000000..8c510e533 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoPromotedTweetRepository.scala @@ -0,0 +1,19 @@ +package com.twitter.tweetypie.repository + +import com.twitter.stitch.Stitch +import com.twitter.strato.client.Fetcher +import com.twitter.tweetypie.TweetId +import com.twitter.strato.client.{Client => StratoClient} + +object StratoPromotedTweetRepository { + type Type = TweetId => Stitch[Boolean] + + val column = "tweetypie/isPromoted.Tweet" + + def apply(client: StratoClient): Type = { + val fetcher: Fetcher[TweetId, Unit, Boolean] = + client.fetcher[TweetId, Boolean](column) + + tweetId => fetcher.fetch(tweetId).map(f => f.v.getOrElse(false)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSafetyLabelsRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSafetyLabelsRepository.scala new file mode 100644 index 000000000..68f537fce --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSafetyLabelsRepository.scala @@ -0,0 +1,49 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.search.blender.services.strato.UserSearchSafetySettings +import com.twitter.spam.rtf.thriftscala.SafetyLabel +import com.twitter.spam.rtf.thriftscala.SafetyLabelMap +import com.twitter.spam.rtf.thriftscala.SafetyLabelType +import com.twitter.stitch.Stitch +import com.twitter.strato.client.Fetcher +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.strato.thrift.ScroogeConvImplicits._ +import com.twitter.visibility.common.UserSearchSafetySource + +object StratoSafetyLabelsRepository { + type Type = (TweetId, SafetyLabelType) => Stitch[Option[SafetyLabel]] + + def apply(client: StratoClient): Type = { + val safetyLabelMapRepo = StratoSafetyLabelMapRepository(client) + + (tweetId, safetyLabelType) => + safetyLabelMapRepo(tweetId).map( + _.flatMap(_.labels).flatMap(_.get(safetyLabelType)) + ) + } +} + +object StratoSafetyLabelMapRepository { + type Type = TweetId => Stitch[Option[SafetyLabelMap]] + + val column = "visibility/baseTweetSafetyLabelMap" + + def apply(client: StratoClient): Type = { + val fetcher: Fetcher[TweetId, Unit, SafetyLabelMap] = + client.fetcher[TweetId, SafetyLabelMap](column) + + tweetId => fetcher.fetch(tweetId).map(_.v) + } +} + +object StratoUserSearchSafetySourceRepository { + type Type = UserId => Stitch[UserSearchSafetySettings] + + def apply(client: StratoClient): Type = { + val fetcher: Fetcher[UserId, Unit, UserSearchSafetySettings] = + client.fetcher[UserId, UserSearchSafetySettings](UserSearchSafetySource.Column) + + userId => fetcher.fetch(userId).map(_.v.getOrElse(UserSearchSafetySource.DefaultSetting)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSubscriptionVerificationRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSubscriptionVerificationRepository.scala new file mode 100644 index 000000000..1fb825c6b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSubscriptionVerificationRepository.scala @@ -0,0 +1,19 @@ +package com.twitter.tweetypie.repository + +import com.twitter.stitch.Stitch +import com.twitter.strato.client.Fetcher +import com.twitter.tweetypie.UserId +import com.twitter.strato.client.{Client => StratoClient} + +object StratoSubscriptionVerificationRepository { + type Type = (UserId, String) => Stitch[Boolean] + + val column = "subscription-services/subscription-verification/cacheProtectedHasAccess.User" + + def apply(client: StratoClient): Type = { + val fetcher: Fetcher[UserId, Seq[String], Seq[String]] = + client.fetcher[UserId, Seq[String], Seq[String]](column) + + (userId, resource) => fetcher.fetch(userId, Seq(resource)).map(f => f.v.nonEmpty) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSuperFollowEligibleRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSuperFollowEligibleRepository.scala new file mode 100644 index 000000000..e86352c37 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSuperFollowEligibleRepository.scala @@ -0,0 +1,19 @@ +package com.twitter.tweetypie.repository + +import com.twitter.stitch.Stitch +import com.twitter.strato.client.Fetcher +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.tweetypie.UserId + +object StratoSuperFollowEligibleRepository { + type Type = UserId => Stitch[Boolean] + + val column = "audiencerewards/audienceRewardsService/getSuperFollowEligibility.User" + + def apply(client: StratoClient): Type = { + val fetcher: Fetcher[UserId, Unit, Boolean] = + client.fetcher[UserId, Boolean](column) + + userId => fetcher.fetch(userId).map(_.v.getOrElse(false)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSuperFollowRelationsRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSuperFollowRelationsRepository.scala new file mode 100644 index 000000000..e6fa65268 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSuperFollowRelationsRepository.scala @@ -0,0 +1,60 @@ +package com.twitter.tweetypie.repository + +import com.twitter.audience_rewards.thriftscala.HasSuperFollowingRelationshipRequest +import com.twitter.stitch.Stitch +import com.twitter.strato.client.Fetcher +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.tweetypie.Future +import com.twitter.tweetypie.UserId +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.thriftscala.ExclusiveTweetControl +import com.twitter.tweetypie.thriftscala.TweetCreateState + +object StratoSuperFollowRelationsRepository { + type Type = (UserId, UserId) => Stitch[Boolean] + + def apply(client: StratoClient): Type = { + + val column = "audiencerewards/superFollows/hasSuperFollowingRelationshipV2" + + val fetcher: Fetcher[HasSuperFollowingRelationshipRequest, Unit, Boolean] = + client.fetcher[HasSuperFollowingRelationshipRequest, Boolean](column) + + (authorId, viewerId) => { + // Owner of an exclusive tweet chain can respond to their own + // tweets / replies, despite not super following themselves + if (authorId == viewerId) { + Stitch.True + } else { + val key = HasSuperFollowingRelationshipRequest(authorId, viewerId) + // The default relation for this column is "missing", aka None. + // This needs to be mapped to false since Super Follows are a sparse relation. + fetcher.fetch(key).map(_.v.getOrElse(false)) + } + } + } + + object Validate { + def apply( + exclusiveTweetControl: Option[ExclusiveTweetControl], + userId: UserId, + superFollowRelationsRepo: StratoSuperFollowRelationsRepository.Type + ): Future[Unit] = { + Stitch + .run { + exclusiveTweetControl.map(_.conversationAuthorId) match { + // Don't do exclusive tweet validation on non exclusive tweets. + case None => + Stitch.value(true) + + case Some(convoAuthorId) => + superFollowRelationsRepo(userId, convoAuthorId) + } + }.map { + case true => Future.Unit + case false => + Future.exception(TweetCreateFailure.State(TweetCreateState.SourceTweetNotFound)) + }.flatten + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetCountsRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetCountsRepository.scala new file mode 100644 index 000000000..82bbd2930 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetCountsRepository.scala @@ -0,0 +1,59 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.flockdb.client._ +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup + +sealed trait TweetCountKey { + // The flockdb Select used to calculate the count from TFlock + def toSelect: Select[StatusGraph] + + // The Tweet id for this count + def tweetId: TweetId + + // com.twitter.servo.cache.MemcacheCache calls toString to turn this key into a cache key + def toString: String +} + +case class RetweetsKey(tweetId: TweetId) extends TweetCountKey { + lazy val toSelect: Select[StatusGraph] = RetweetsGraph.from(tweetId) + override lazy val toString: String = "cnts:rt:" + tweetId +} + +case class RepliesKey(tweetId: TweetId) extends TweetCountKey { + lazy val toSelect: Select[StatusGraph] = RepliesToTweetsGraph.from(tweetId) + override lazy val toString: String = "cnts:re:" + tweetId +} + +case class FavsKey(tweetId: TweetId) extends TweetCountKey { + lazy val toSelect: Select[StatusGraph] = FavoritesGraph.to(tweetId) + override lazy val toString: String = "cnts:fv:" + tweetId +} + +case class QuotesKey(tweetId: TweetId) extends TweetCountKey { + lazy val toSelect: Select[StatusGraph] = QuotersGraph.from(tweetId) + override lazy val toString: String = "cnts:qt:" + tweetId +} + +case class BookmarksKey(tweetId: TweetId) extends TweetCountKey { + lazy val toSelect: Select[StatusGraph] = BookmarksGraph.to(tweetId) + override lazy val toString: String = "cnts:bm:" + tweetId +} + +object TweetCountsRepository { + type Type = TweetCountKey => Stitch[Count] + + def apply(tflock: TFlockClient, maxRequestSize: Int): Type = { + object RequestGroup extends SeqGroup[TweetCountKey, Count] { + override def run(keys: Seq[TweetCountKey]): Future[Seq[Try[MediaId]]] = { + val selects = MultiSelect[StatusGraph]() ++= keys.map(_.toSelect) + LegacySeqGroup.liftToSeqTry(tflock.multiCount(selects).map(counts => counts.map(_.toLong))) + } + override val maxSize: Int = maxRequestSize + } + + key => Stitch.call(key, RequestGroup) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetQuery.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetQuery.scala new file mode 100644 index 000000000..efbd5b61f --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetQuery.scala @@ -0,0 +1,147 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import java.nio.ByteBuffer + +object TweetQuery { + + /** + * Parent trait that indicates what triggered the tweet query. + */ + sealed trait Cause { + import Cause._ + + /** + * Is the tweet query hydrating the specified tweet for the purposes of a write? + */ + def writing(tweetId: TweetId): Boolean = + this match { + case w: Write if w.tweetId == tweetId => true + case _ => false + } + + /** + * Is the tweet query performing a regular read for any tweet? If the cause is + * a write on a different tweet, then any other tweet that is read in support of the write + * is considered a normal read, and is subject to read-path hydration. + */ + def reading(tweetId: TweetId): Boolean = + !writing(tweetId) + + /** + * Are we performing an insert after create on the specified tweet? An undelete operation + * performs an insert, but is not considered an initial insert. + */ + def initialInsert(tweetId: TweetId): Boolean = + this match { + case Insert(`tweetId`) => true + case _ => false + } + } + + object Cause { + case object Read extends Cause + trait Write extends Cause { + val tweetId: TweetId + } + case class Insert(tweetId: TweetId) extends Write + case class Undelete(tweetId: TweetId) extends Write + } + + /** + * Options for TweetQuery. + * + * @param include indicates which optionally hydrated fields on each tweet should be + * hydrated and included. + * @param enforceVisibilityFiltering whether Tweetypie visibility hydrators should be run to + * filter protected tweets, blocked quote tweets, contributor data, etc. This does not affect + * Visibility Library (http://go/vf) based filtering. + * @param cause indicates what triggered the read: a normal read, or a write operation. + * @param forExternalConsumption when true, the tweet is being read for rendering to an external + * client such as the iPhone Twitter app and is subject to being Dropped to prevent serving + * "bad" text to clients that might crash their OS. When false, the tweet is being read for internal + * non-client purposes and should never be Dropped. + * @param isInnerQuotedTweet Set by [[com.twitter.tweetypie.hydrator.QuotedTweetHydrator]], + * to be used by [[com.twitter.visibility.interfaces.tweets.TweetVisibilityLibrary]] + * so VisibilityFiltering library can execute Interstitial logic on inner quoted tweets. + * @param fetchStoredTweets Set by GetStoredTweetsHandler. If set to true, the Manhattan storage + * layer will fetch and construct Tweets regardless of what state they're in. + */ + case class Options( + include: TweetQuery.Include, + cacheControl: CacheControl = CacheControl.ReadWriteCache, + cardsPlatformKey: Option[String] = None, + excludeReported: Boolean = false, + enforceVisibilityFiltering: Boolean = false, + safetyLevel: SafetyLevel = SafetyLevel.FilterNone, + forUserId: Option[UserId] = None, + languageTag: String = "en", + extensionsArgs: Option[ByteBuffer] = None, + cause: Cause = Cause.Read, + scrubUnrequestedFields: Boolean = true, + requireSourceTweet: Boolean = true, + forExternalConsumption: Boolean = false, + simpleQuotedTweet: Boolean = false, + isInnerQuotedTweet: Boolean = false, + fetchStoredTweets: Boolean = false, + isSourceTweet: Boolean = false, + enableEditControlHydration: Boolean = true) + + case class Include( + tweetFields: Set[FieldId] = Set.empty, + countsFields: Set[FieldId] = Set.empty, + mediaFields: Set[FieldId] = Set.empty, + quotedTweet: Boolean = false, + pastedMedia: Boolean = false) { + + /** + * Accumulates additional (rather than replaces) field ids. + */ + def also( + tweetFields: Traversable[FieldId] = Nil, + countsFields: Traversable[FieldId] = Nil, + mediaFields: Traversable[FieldId] = Nil, + quotedTweet: Option[Boolean] = None, + pastedMedia: Option[Boolean] = None + ): Include = + copy( + tweetFields = this.tweetFields ++ tweetFields, + countsFields = this.countsFields ++ countsFields, + mediaFields = this.mediaFields ++ mediaFields, + quotedTweet = quotedTweet.getOrElse(this.quotedTweet), + pastedMedia = pastedMedia.getOrElse(this.pastedMedia) + ) + + /** + * Removes field ids. + */ + def exclude( + tweetFields: Traversable[FieldId] = Nil, + countsFields: Traversable[FieldId] = Nil, + mediaFields: Traversable[FieldId] = Nil + ): Include = + copy( + tweetFields = this.tweetFields -- tweetFields, + countsFields = this.countsFields -- countsFields, + mediaFields = this.mediaFields -- mediaFields + ) + + def ++(that: Include): Include = + copy( + tweetFields = this.tweetFields ++ that.tweetFields, + countsFields = this.countsFields ++ that.countsFields, + mediaFields = this.mediaFields ++ that.mediaFields, + quotedTweet = this.quotedTweet || that.quotedTweet, + pastedMedia = this.pastedMedia || that.pastedMedia + ) + } +} + +sealed case class CacheControl(writeToCache: Boolean, readFromCache: Boolean) + +object CacheControl { + val NoCache: CacheControl = CacheControl(false, false) + val ReadOnlyCache: CacheControl = CacheControl(false, true) + val ReadWriteCache: CacheControl = CacheControl(true, true) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetRepository.scala new file mode 100644 index 000000000..f0f24fafa --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetRepository.scala @@ -0,0 +1,31 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ + +object TweetRepository { + type Type = (TweetId, TweetQuery.Options) => Stitch[Tweet] + type Optional = (TweetId, TweetQuery.Options) => Stitch[Option[Tweet]] + + def tweetGetter(repo: Optional, opts: TweetQuery.Options): FutureArrow[TweetId, Option[Tweet]] = + FutureArrow(tweetId => Stitch.run(repo(tweetId, opts))) + + def tweetGetter(repo: Optional): FutureArrow[(TweetId, TweetQuery.Options), Option[Tweet]] = + FutureArrow { case (tweetId, opts) => Stitch.run(repo(tweetId, opts)) } + + /** + * Converts a `TweetResultRepository.Type`-typed repo to an `TweetRepository.Type`-typed repo. + */ + def fromTweetResult(repo: TweetResultRepository.Type): Type = + (tweetId, options) => repo(tweetId, options).map(_.value.tweet) + + /** + * Converts a `Type`-typed repo to an `Optional`-typed + * repo, where NotFound or filtered tweets are returned as `None`. + */ + def optional(repo: Type): Optional = + (tweetId, options) => + repo(tweetId, options).liftToOption { case NotFound | (_: FilteredState) => true } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetResultRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetResultRepository.scala new file mode 100644 index 000000000..2e8f50ffd --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetResultRepository.scala @@ -0,0 +1,17 @@ +package com.twitter.tweetypie.repository + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.TweetId +import com.twitter.tweetypie.core.TweetResult + +object TweetResultRepository { + type Type = (TweetId, TweetQuery.Options) => Stitch[TweetResult] + + /** + * Short-circuits the request of invalid tweet ids (`<= 0`) by immediately throwing `NotFound`. + */ + def shortCircuitInvalidIds(repo: Type): Type = { + case (tweetId, _) if tweetId <= 0 => Stitch.NotFound + case (tweetId, options) => repo(tweetId, options) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetSpamCheckRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetSpamCheckRepository.scala new file mode 100644 index 000000000..98b3f5e47 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetSpamCheckRepository.scala @@ -0,0 +1,14 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.service.gen.scarecrow.{thriftscala => scarecrow} +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.backends.Scarecrow + +object TweetSpamCheckRepository { + + type Type = (scarecrow.TweetNew, scarecrow.TweetContext) => Stitch[scarecrow.CheckTweetResponse] + + def apply(checkTweet: Scarecrow.CheckTweet2): Type = + (tweetNew, tweetContext) => Stitch.callFuture(checkTweet((tweetNew, tweetContext))) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetVisibilityRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetVisibilityRepository.scala new file mode 100644 index 000000000..f0017b2fd --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetVisibilityRepository.scala @@ -0,0 +1,123 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.logging.Logger +import com.twitter.spam.rtf.thriftscala.{SafetyLevel => ThriftSafetyLevel} +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository.VisibilityResultToFilteredState.toFilteredState +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.visibility.configapi.configs.VisibilityDeciderGates +import com.twitter.visibility.interfaces.tweets.TweetVisibilityLibrary +import com.twitter.visibility.interfaces.tweets.TweetVisibilityRequest +import com.twitter.visibility.models.SafetyLevel.DeprecatedSafetyLevel +import com.twitter.visibility.models.SafetyLevel +import com.twitter.visibility.models.ViewerContext + +/** + * This repository handles visibility filtering of tweets + * + * i.e. deciding whether to drop/suppress tweets based on viewer + * and safety level for instance. Rules in VF library can be thought as: + * + * (SafetyLevel)(Viewer, Content, Features) => Action + * + * SafetyLevel represents the product context in which the Viewer is + * requesting to view the Content. Example: TimelineHome, TweetDetail, + * Recommendations, Notifications + * + * Content here is mainly tweets (can be users, notifications, cards etc) + * + * Features might include safety labels and other metadata of a Tweet, + * flags set on a User (including the Viewer), relationships between Users + * (e.g. block, follow), relationships between Users and Content + * (e.g. reported for spam) + * + * We initialize VisibilityLibrary using UserSource and UserRelationshipSource: + * Stitch interfaces that provide methods to retrieve user and relationship + * information in Gizmoduck and SocialGraph repositories, respectively. + * This user and relationship info along with Tweet labels, provide necessary + * features to take a filtering decision. + * + * Actions supported in Tweetypie right now are Drop and Suppress. + * In the future, we might want to surface other granular actions such as + * Tombstone and Downrank which are supported in VF lib. + * + * The TweetVisibilityRepository has the following format: + * + * Request(Tweet, Option[SafetyLevel], Option[UserId]) => Stitch[Option[FilteredState]] + * + * SafetyLevel is plumbed from the tweet query options. + * + * In addition to the latency stats and rpc counts from VF library, we also capture + * unsupported and deprecated safety level stats here to inform the relevant clients. + * + * go/visibilityfiltering, go/visibilityfilteringdocs + * + */ +object TweetVisibilityRepository { + type Type = Request => Stitch[Option[FilteredState]] + + case class Request( + tweet: Tweet, + viewerId: Option[UserId], + safetyLevel: ThriftSafetyLevel, + isInnerQuotedTweet: Boolean, + isRetweet: Boolean, + hydrateConversationControl: Boolean, + isSourceTweet: Boolean) + + def apply( + visibilityLibrary: TweetVisibilityLibrary.Type, + visibilityDeciderGates: VisibilityDeciderGates, + log: Logger, + statsReceiver: StatsReceiver + ): TweetVisibilityRepository.Type = { + + val noTweetRulesCounter = statsReceiver.counter("no_tweet_rules_requests") + val deprecatedScope = statsReceiver.scope("deprecated_safety_level") + + request: Request => + SafetyLevel.fromThrift(request.safetyLevel) match { + case DeprecatedSafetyLevel => + deprecatedScope.counter(request.safetyLevel.name.toLowerCase()).incr() + log.warning("Deprecated SafetyLevel (%s) requested".format(request.safetyLevel.name)) + Stitch.None + case safetyLevel: SafetyLevel => + if (!TweetVisibilityLibrary.hasTweetRules(safetyLevel)) { + noTweetRulesCounter.incr() + Stitch.None + } else { + visibilityLibrary( + TweetVisibilityRequest( + tweet = request.tweet, + safetyLevel = safetyLevel, + viewerContext = ViewerContext.fromContextWithViewerIdFallback(request.viewerId), + isInnerQuotedTweet = request.isInnerQuotedTweet, + isRetweet = request.isRetweet, + hydrateConversationControl = request.hydrateConversationControl, + isSourceTweet = request.isSourceTweet + ) + ).map(visibilityResult => + toFilteredState( + visibilityResult = visibilityResult, + disableLegacyInterstitialFilteredReason = + visibilityDeciderGates.disableLegacyInterstitialFilteredReason())) + } + } + } + + /** + * We can skip visibility filtering when any of the following is true: + * + * - SafetyLevel is deprecated + * - SafetyLevel has no tweet rules + */ + def canSkipVisibilityFiltering(thriftSafetyLevel: ThriftSafetyLevel): Boolean = + SafetyLevel.fromThrift(thriftSafetyLevel) match { + case DeprecatedSafetyLevel => + true + case safetyLevel: SafetyLevel => + !TweetVisibilityLibrary.hasTweetRules(safetyLevel) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UnmentionInfoRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UnmentionInfoRepository.scala new file mode 100644 index 000000000..c7165f95b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UnmentionInfoRepository.scala @@ -0,0 +1,39 @@ +package com.twitter.tweetypie.repository + +import com.twitter.consumer_privacy.mention_controls.thriftscala.UnmentionInfo +import com.twitter.stitch.Stitch +import com.twitter.strato.client.Fetcher +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.strato.thrift.ScroogeConvImplicits._ + +object UnmentionInfoRepository { + type Type = Tweet => Stitch[Option[UnmentionInfo]] + + val column = "consumer-privacy/mentions-management/unmentionInfoFromTweet" + case class UnmentionInfoView(asViewer: Option[Long]) + + /** + * Creates a function that extracts users fields from a tweet and checks + * if the extracted users have been unmentioned from the tweet's asssociated conversation. + * This function enables the prefetch caching of UnmentionInfo used by graphql during createTweet + * events and mirrors the logic found in the unmentionInfo Strato column found + * here: http://go/unmentionInfo.strato + * @param client Strato client + * @return + */ + def apply(client: StratoClient): Type = { + val fetcher: Fetcher[Tweet, UnmentionInfoView, UnmentionInfo] = + client.fetcher[Tweet, UnmentionInfoView, UnmentionInfo](column) + + tweet => + tweet.coreData.flatMap(_.conversationId) match { + case Some(conversationId) => + val viewerUserId = TwitterContext().flatMap(_.userId) + fetcher + .fetch(tweet, UnmentionInfoView(viewerUserId)) + .map(_.v) + case _ => Stitch.None + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UnmentionedEntitiesRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UnmentionedEntitiesRepository.scala new file mode 100644 index 000000000..ea02cbdd3 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UnmentionedEntitiesRepository.scala @@ -0,0 +1,28 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.stitch.Stitch +import com.twitter.strato.client.Fetcher +import com.twitter.strato.client.{Client => StratoClient} + +/** + * Repository for fetching UserIds that have unmentioned themselves from a conversation. + */ +object UnmentionedEntitiesRepository { + type Type = (ConversationId, Seq[UserId]) => Stitch[Option[Seq[UserId]]] + + val column = "consumer-privacy/mentions-management/getUnmentionedUsersFromConversation" + case class GetUnmentionView(userIds: Option[Seq[Long]]) + + def apply(client: StratoClient): Type = { + val fetcher: Fetcher[Long, GetUnmentionView, Seq[Long]] = + client.fetcher[Long, GetUnmentionView, Seq[Long]](column) + + (conversationId, userIds) => + if (userIds.nonEmpty) { + fetcher.fetch(conversationId, GetUnmentionView(Some(userIds))).map(_.v) + } else { + Stitch.None + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UrlRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UrlRepository.scala new file mode 100644 index 000000000..b2bf53bac --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UrlRepository.scala @@ -0,0 +1,69 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.service.talon.thriftscala._ +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup +import com.twitter.tweetypie.backends.Talon +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.core.OverCapacity + +case class UrlSlug(text: String) extends AnyVal +case class ExpandedUrl(text: String) extends AnyVal + +object UrlRepository { + type Type = UrlSlug => Stitch[ExpandedUrl] + + /** + * Builds a UrlRepository from a Talon.Expand arrow. + */ + def apply( + talonExpand: Talon.Expand, + tweetypieClientId: String, + statsReceiver: StatsReceiver, + clientIdHelper: ClientIdHelper, + ): Type = { + val observedTalonExpand: Talon.Expand = + talonExpand + .trackOutcome(statsReceiver, _ => clientIdHelper.effectiveClientId.getOrElse("unknown")) + + val expandGroup = SeqGroup[ExpandRequest, Try[ExpandResponse]] { requests => + LegacySeqGroup.liftToSeqTry( + Future.collect(requests.map(r => observedTalonExpand(r).liftToTry))) + } + + slug => + val request = toExpandRequest(slug, auditMessage(tweetypieClientId, clientIdHelper)) + + Stitch + .call(request, expandGroup) + .lowerFromTry + .flatMap(toExpandedUrl(slug, _)) + } + + def auditMessage(tweetypieClientId: String, clientIdHelper: ClientIdHelper): String = { + tweetypieClientId + clientIdHelper.effectiveClientId.mkString(":", "", "") + } + + def toExpandRequest(slug: UrlSlug, auditMessage: String): ExpandRequest = + ExpandRequest(userId = 0, shortUrl = slug.text, fromUser = false, auditMsg = Some(auditMessage)) + + def toExpandedUrl(slug: UrlSlug, res: ExpandResponse): Stitch[ExpandedUrl] = + res.responseCode match { + case ResponseCode.Ok => + // use Option(res.longUrl) because res.longUrl can be null + Option(res.longUrl) match { + case None => Stitch.NotFound + case Some(longUrl) => Stitch.value(ExpandedUrl(longUrl)) + } + + case ResponseCode.BadInput => + Stitch.NotFound + + // we shouldn't see other ResponseCodes, because Talon.Expand translates them to + // exceptions, but we have this catch-all just in case. + case _ => + Stitch.exception(OverCapacity("talon")) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserInfoRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserInfoRepository.scala new file mode 100644 index 000000000..204b86cef --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserInfoRepository.scala @@ -0,0 +1,138 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.gizmoduck.thriftscala.UserResponseState +import com.twitter.spam.rtf.thriftscala.{SafetyLevel => ThriftSafetyLevel} +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.thriftscala.UserIdentity +import com.twitter.visibility.interfaces.tweets.UserUnavailableStateVisibilityLibrary +import com.twitter.visibility.interfaces.tweets.UserUnavailableStateVisibilityRequest +import com.twitter.visibility.models.SafetyLevel +import com.twitter.visibility.models.UserUnavailableStateEnum +import com.twitter.visibility.models.ViewerContext +import com.twitter.visibility.thriftscala.UserVisibilityResult + +/** + * Some types of user (e.g. frictionless users) may not + * have profiles, so a missing UserIdentity may mean that the user + * does not exist, or that the user does not have a profile. + */ +object UserIdentityRepository { + type Type = UserKey => Stitch[UserIdentity] + + def apply(repo: UserRepository.Type): Type = { key => + val opts = UserQueryOptions(Set(UserField.Profile), UserVisibility.Mentionable) + repo(key, opts) + .map { user => + user.profile.map { profile => + UserIdentity( + id = user.id, + screenName = profile.screenName, + realName = profile.name + ) + } + } + .lowerFromOption() + } +} + +object UserProtectionRepository { + type Type = UserKey => Stitch[Boolean] + + def apply(repo: UserRepository.Type): Type = { + val opts = UserQueryOptions(Set(UserField.Safety), UserVisibility.All) + + userKey => + repo(userKey, opts) + .map(user => user.safety.map(_.isProtected)) + .lowerFromOption() + } +} + +/** + * Query Gizmoduck to check if a user `forUserId` can see user `userKey`. + * If forUserId is Some(), this will also check protected relationship, + * if it's None, it will check others as per UserVisibility.Visible policy in + * UserRepository.scala. If forUserId is None, this doesn't verify any + * relationships, visibility is determined based solely on user's + * properties (eg. deactivated, suspended, etc) + */ +object UserVisibilityRepository { + type Type = Query => Stitch[Option[FilteredState.Unavailable]] + + case class Query( + userKey: UserKey, + forUserId: Option[UserId], + tweetId: TweetId, + isRetweet: Boolean, + isInnerQuotedTweet: Boolean, + safetyLevel: Option[ThriftSafetyLevel]) + + def apply( + repo: UserRepository.Type, + userUnavailableAuthorStateVisibilityLibrary: UserUnavailableStateVisibilityLibrary.Type + ): Type = + query => { + repo( + query.userKey, + UserQueryOptions( + Set(), + UserVisibility.Visible, + forUserId = query.forUserId, + filteredAsFailure = true, + safetyLevel = query.safetyLevel + ) + ) + // We don't actually care about the response here (User's data), only whether + // it was filtered or not + .map { case _ => None } + .rescue { + case fs: FilteredState.Unavailable => Stitch.value(Some(fs)) + case UserFilteredFailure(state, reason) => + userUnavailableAuthorStateVisibilityLibrary + .apply( + UserUnavailableStateVisibilityRequest( + query.safetyLevel + .map(SafetyLevel.fromThrift).getOrElse(SafetyLevel.FilterDefault), + query.tweetId, + ViewerContext.fromContextWithViewerIdFallback(query.forUserId), + toUserUnavailableState(state, reason), + query.isRetweet, + query.isInnerQuotedTweet + ) + ).map(VisibilityResultToFilteredState.toFilteredStateUnavailable) + case NotFound => Stitch.value(Some(FilteredState.Unavailable.Author.NotFound)) + } + } + + def toUserUnavailableState( + userResponseState: UserResponseState, + userVisibilityResult: Option[UserVisibilityResult] + ): UserUnavailableStateEnum = { + (userResponseState, userVisibilityResult) match { + case (UserResponseState.DeactivatedUser, _) => UserUnavailableStateEnum.Deactivated + case (UserResponseState.OffboardedUser, _) => UserUnavailableStateEnum.Offboarded + case (UserResponseState.ErasedUser, _) => UserUnavailableStateEnum.Erased + case (UserResponseState.SuspendedUser, _) => UserUnavailableStateEnum.Suspended + case (UserResponseState.ProtectedUser, _) => UserUnavailableStateEnum.Protected + case (_, Some(result)) => UserUnavailableStateEnum.Filtered(result) + case _ => UserUnavailableStateEnum.Unavailable + } + } +} + +object UserViewRepository { + type Type = Query => Stitch[User] + + case class Query( + userKey: UserKey, + forUserId: Option[UserId], + visibility: UserVisibility, + queryFields: Set[UserField] = Set(UserField.View)) + + def apply(repo: UserRepository.Type): UserViewRepository.Type = + query => + repo(query.userKey, UserQueryOptions(query.queryFields, query.visibility, query.forUserId)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserRepository.scala new file mode 100644 index 000000000..ca80d9503 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserRepository.scala @@ -0,0 +1,285 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.gizmoduck.thriftscala.LookupContext +import com.twitter.gizmoduck.thriftscala.UserResponseState +import com.twitter.gizmoduck.thriftscala.UserResult +import com.twitter.servo.cache.ScopedCacheKey +import com.twitter.servo.json.syntax._ +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.NotFound +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup +import com.twitter.tweetypie.backends.Gizmoduck +import com.twitter.tweetypie.core._ +import com.twitter.util.Base64Long.toBase64 +import com.twitter.util.logging.Logger +import com.twitter.visibility.thriftscala.UserVisibilityResult +import scala.util.control.NoStackTrace + +sealed trait UserKey + +object UserKey { + def byId(userId: UserId): UserKey = UserIdKey(userId) + def byScreenName(screenName: String): UserKey = ScreenNameKey.toLowerCase(screenName) + def apply(userId: UserId): UserKey = UserIdKey(userId) + def apply(screenName: String): UserKey = ScreenNameKey.toLowerCase(screenName) +} + +case class UserIdKey(userId: UserId) + extends ScopedCacheKey("t", "usr", 1, "id", toBase64(userId)) + with UserKey + +object ScreenNameKey { + def toLowerCase(screenName: String): ScreenNameKey = ScreenNameKey(screenName.toLowerCase) +} + +/** + * Use UserKey.apply(String) instead of ScreenNameKey(String) to construct a key, + * as it will down-case the screen-name to better utilize the user cache. + */ +case class ScreenNameKey private (screenName: String) + extends ScopedCacheKey("t", "usr", 1, "sn", screenName) + with UserKey + +/** + * A set of flags, used in UserQuery, which control whether to include or filter out + * users in various non-standard states. + */ +case class UserVisibility( + filterProtected: Boolean, + filterSuspended: Boolean, + filterDeactivated: Boolean, + filterOffboardedAndErased: Boolean, + filterNoScreenName: Boolean, + filterPeriscope: Boolean, + filterSoft: Boolean) + +object UserVisibility { + + /** + * No filtering, can see every user that gizmoduck can return. + */ + val All: UserVisibility = UserVisibility( + filterProtected = false, + filterSuspended = false, + filterDeactivated = false, + filterOffboardedAndErased = false, + filterNoScreenName = false, + filterPeriscope = false, + filterSoft = false + ) + + /** + * Only includes users that would be visible to a non-logged in user, + * or a logged in user where the following graph is checked for + * protected users. + * + * no-screen-name, soft, and periscope users are visible, but not + * mentionable. + */ + val Visible: UserVisibility = UserVisibility( + filterProtected = true, + filterSuspended = true, + filterDeactivated = true, + filterOffboardedAndErased = true, + filterNoScreenName = false, + filterPeriscope = false, + filterSoft = false + ) + + val MediaTaggable: UserVisibility = UserVisibility( + filterProtected = false, + filterSuspended = true, + filterDeactivated = true, + filterOffboardedAndErased = true, + filterNoScreenName = true, + filterPeriscope = true, + filterSoft = true + ) + + /** + * Includes all mentionable users (filter deactivated/offboarded/erased/no-screen-name users) + */ + val Mentionable: UserVisibility = UserVisibility( + filterProtected = false, + filterSuspended = false, + filterDeactivated = false, + filterOffboardedAndErased = true, + filterNoScreenName = true, + filterPeriscope = true, + filterSoft = true + ) +} + +/** + * The `visibility` field includes a set of flags that indicate whether users in + * various non-standard states should be included in the `found` results, or filtered + * out. By default, "filtered out" means to treat them as `notFound`, but if `filteredAsFailure` + * is true, then the filtered users will be indicated in a [[UserFilteredFailure]] result. + */ +case class UserQueryOptions( + queryFields: Set[UserField] = Set.empty, + visibility: UserVisibility, + forUserId: Option[UserId] = None, + filteredAsFailure: Boolean = false, + safetyLevel: Option[SafetyLevel] = None) { + def toLookupContext: LookupContext = + LookupContext( + includeFailed = true, + forUserId = forUserId, + includeProtected = !visibility.filterProtected, + includeSuspended = !visibility.filterSuspended, + includeDeactivated = !visibility.filterDeactivated, + includeErased = !visibility.filterOffboardedAndErased, + includeNoScreenNameUsers = !visibility.filterNoScreenName, + includePeriscopeUsers = !visibility.filterPeriscope, + includeSoftUsers = !visibility.filterSoft, + includeOffboarded = !visibility.filterOffboardedAndErased, + safetyLevel = safetyLevel + ) +} + +case class UserLookupFailure(message: String, state: UserResponseState) extends RuntimeException { + override def getMessage(): String = + s"$message: responseState = $state" +} + +/** + * Indicates a failure due to the user being filtered. + * + * @see [[GizmoduckUserRepository.FilteredStates]] + */ +case class UserFilteredFailure(state: UserResponseState, reason: Option[UserVisibilityResult]) + extends Exception + with NoStackTrace + +object UserRepository { + type Type = (UserKey, UserQueryOptions) => Stitch[User] + type Optional = (UserKey, UserQueryOptions) => Stitch[Option[User]] + + def optional(repo: Type): Optional = + (userKey, queryOptions) => repo(userKey, queryOptions).liftNotFoundToOption + + def userGetter( + userRepo: UserRepository.Optional, + opts: UserQueryOptions + ): UserKey => Future[Option[User]] = + userKey => Stitch.run(userRepo(userKey, opts)) +} + +object GizmoduckUserRepository { + private[this] val log = Logger(getClass) + + def apply( + getById: Gizmoduck.GetById, + getByScreenName: Gizmoduck.GetByScreenName, + maxRequestSize: Int = Int.MaxValue + ): UserRepository.Type = { + case class GetBy[K]( + opts: UserQueryOptions, + get: ((LookupContext, Seq[K], Set[UserField])) => Future[Seq[UserResult]]) + extends SeqGroup[K, UserResult] { + override def run(keys: Seq[K]): Future[Seq[Try[UserResult]]] = + LegacySeqGroup.liftToSeqTry(get((opts.toLookupContext, keys, opts.queryFields))) + override def maxSize: Int = maxRequestSize + } + + (key, opts) => { + val result = + key match { + case UserIdKey(id) => Stitch.call(id, GetBy(opts, getById)) + case ScreenNameKey(sn) => Stitch.call(sn, GetBy(opts, getByScreenName)) + } + + result.flatMap(r => Stitch.const(toTryUser(r, opts.filteredAsFailure))) + } + } + + private def toTryUser( + userResult: UserResult, + filteredAsFailure: Boolean + ): Try[User] = + userResult.responseState match { + case s if s.forall(SuccessStates.contains(_)) => + userResult.user match { + case Some(u) => + Return(u) + + case None => + log.warn( + s"User expected to be present, but not found in:\n${userResult.prettyPrint}" + ) + // This should never happen, but if it does, treat it as the + // user being returned as NotFound. + Throw(NotFound) + } + + case Some(s) if NotFoundStates.contains(s) => + Throw(NotFound) + + case Some(s) if FilteredStates.contains(s) => + Throw(if (filteredAsFailure) UserFilteredFailure(s, userResult.unsafeReason) else NotFound) + + case Some(UserResponseState.Failed) => + def lookupFailure(msg: String) = + UserLookupFailure(msg, UserResponseState.Failed) + + Throw { + userResult.failureReason + .map { reason => + reason.internalServerError + .orElse { + reason.overCapacity.map { e => + // Convert Gizmoduck OverCapacity to Tweetypie + // OverCapacity exception, explaining that it was + // propagated from Gizmoduck. + OverCapacity(s"gizmoduck over capacity: ${e.message}") + } + } + .orElse(reason.unexpectedException.map(lookupFailure)) + .getOrElse(lookupFailure("failureReason empty")) + } + .getOrElse(lookupFailure("failureReason missing")) + } + + case Some(unexpected) => + Throw(UserLookupFailure("Unexpected response state", unexpected)) + } + + /** + * States that we expect to correspond to a user being returned. + */ + val SuccessStates: Set[UserResponseState] = + Set[UserResponseState]( + UserResponseState.Found, + UserResponseState.Partial + ) + + /** + * States that always correspond to a NotFound response. + */ + val NotFoundStates: Set[UserResponseState] = + Set[UserResponseState]( + UserResponseState.NotFound, + // These are really filtered out, but we treat them as not found + // since we don't have analogous filtering states for tweets. + UserResponseState.PeriscopeUser, + UserResponseState.SoftUser, + UserResponseState.NoScreenNameUser + ) + + /** + * Response states that correspond to a FilteredState + */ + val FilteredStates: Set[UserResponseState] = + Set( + UserResponseState.DeactivatedUser, + UserResponseState.OffboardedUser, + UserResponseState.ErasedUser, + UserResponseState.SuspendedUser, + UserResponseState.ProtectedUser, + UserResponseState.UnsafeUser + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserTakedownRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserTakedownRepository.scala new file mode 100644 index 000000000..488c2cca6 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserTakedownRepository.scala @@ -0,0 +1,26 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.stitch.Stitch +import com.twitter.takedown.util.TakedownReasons +import com.twitter.tseng.withholding.thriftscala.TakedownReason + +/** + * Query TakedownReason objects from gizmoduck + * + * No backfill job has been completed so there may exist users that have a takedown + * country_code without a corresponding UnspecifiedReason takedown_reason. Therefore, + * read from both fields and merge into a set of TakedownReason, translating raw takedown + * country_code into TakedownReason.UnspecifiedReason(country_code). + */ +object UserTakedownRepository { + type Type = UserId => Stitch[Set[TakedownReason]] + + val userQueryOptions: UserQueryOptions = + UserQueryOptions(Set(UserField.Takedowns), UserVisibility.All) + + def apply(userRepo: UserRepository.Type): UserTakedownRepository.Type = + userId => + userRepo(UserKey(userId = userId), userQueryOptions) + .map(_.takedowns.map(TakedownReasons.userTakedownsToReasons).getOrElse(Set.empty)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserViewerRecipient.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserViewerRecipient.scala new file mode 100644 index 000000000..1dd2b0e92 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserViewerRecipient.scala @@ -0,0 +1,78 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.context.thriftscala.Viewer +import com.twitter.featureswitches.Recipient +import com.twitter.featureswitches.TOOClient +import com.twitter.featureswitches.UserAgent +import com.twitter.tweetypie.StatsReceiver +import com.twitter.tweetypie.User +import com.twitter.tweetypie.UserId +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.repository.UserViewerRecipient.UserIdMismatchException + +/** + * Provides a Recipient backed by a Gizmoduck User and TwitterContext Viewer for + * use in FeatureSwitch validation. + */ +object UserViewerRecipient { + object UserIdMismatchException extends Exception + + def apply(user: User, viewer: Viewer, stats: StatsReceiver): Option[Recipient] = { + // This is a workaround for thrift API clients that allow users to Tweet on behalf + // of other Twitter users. This is similar to go/contributors, however some platforms + // have enabled workflows that don't use the go/contributors auth platform, and + // therefore the TwitterContext Viewer isn't set up correctly for contributor requests. + if (viewer.userId.contains(user.id)) { + Some(new UserViewerRecipient(user, viewer)) + } else { + val mismatchScope = stats.scope(s"user_viewer_mismatch") + ClientIdHelper.default.effectiveClientIdRoot.foreach { clientId => + mismatchScope.scope("client").counter(clientId).incr() + } + mismatchScope.counter("total").incr() + None + } + } +} + +class UserViewerRecipient( + user: User, + viewer: Viewer) + extends Recipient { + + if (!viewer.userId.contains(user.id)) { + throw UserIdMismatchException + } + + override def userId: Option[UserId] = viewer.userId + + override def userRoles: Option[Set[String]] = user.roles.map(_.roles.toSet) + + override def deviceId: Option[String] = viewer.deviceId + + override def guestId: Option[Long] = viewer.guestId + + override def languageCode: Option[String] = viewer.requestLanguageCode + + override def signupCountryCode: Option[String] = user.safety.flatMap(_.signupCountryCode) + + override def countryCode: Option[String] = viewer.requestCountryCode + + override def userAgent: Option[UserAgent] = viewer.userAgent.flatMap(UserAgent(_)) + + override def isManifest: Boolean = false + + override def isVerified: Option[Boolean] = user.safety.map(_.verified) + + override def clientApplicationId: Option[Long] = viewer.clientApplicationId + + @Deprecated + override def isTwoffice: Option[Boolean] = None + + @Deprecated + override def tooClient: Option[TOOClient] = None + + @Deprecated + override def highWaterMark: Option[Long] = None +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/VibeRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/VibeRepository.scala new file mode 100644 index 000000000..780773942 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/VibeRepository.scala @@ -0,0 +1,30 @@ +package com.twitter.tweetypie.repository + +import com.twitter.stitch.Stitch +import com.twitter.strato.client.Fetcher +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.strato.thrift.ScroogeConvImplicits._ +import com.twitter.vibes.thriftscala.VibeV2 + +object VibeRepository { + type Type = Tweet => Stitch[Option[VibeV2]] + + val column = "vibes/vibe.Tweet" + case class VibeView(viewerId: Option[Long]) + + /** + * Creates a function that applies the vibes/vibe.Tweet strato column fetch on the given + * Tweet. Strato column source: go/vibe.strato + * @param client Strato client + * @return + */ + def apply(client: StratoClient): Type = { + val fetcher: Fetcher[Long, VibeView, VibeV2] = + client.fetcher[Long, VibeView, VibeV2](column) + tweet => + fetcher + .fetch(tweet.id, VibeView(None)) + .map(_.v) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/VisibilityResultToFilteredState.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/VisibilityResultToFilteredState.scala new file mode 100644 index 000000000..4eec0613f --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/VisibilityResultToFilteredState.scala @@ -0,0 +1,209 @@ +package com.twitter.tweetypie.repository + +import com.twitter.spam.rtf.thriftscala.FilteredReason +import com.twitter.spam.rtf.thriftscala.KeywordMatch +import com.twitter.spam.rtf.thriftscala.SafetyResult +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.core.FilteredState.Suppress +import com.twitter.tweetypie.core.FilteredState.Unavailable +import com.twitter.visibility.builder.VisibilityResult +import com.twitter.visibility.common.user_result.UserVisibilityResultHelper +import com.twitter.visibility.rules.Reason._ +import com.twitter.visibility.rules._ +import com.twitter.visibility.{thriftscala => vfthrift} + +object VisibilityResultToFilteredState { + def toFilteredStateUnavailable( + visibilityResult: VisibilityResult + ): Option[FilteredState.Unavailable] = { + val dropSafetyResult = Some( + Unavailable.Drop(FilteredReason.SafetyResult(visibilityResult.getSafetyResult)) + ) + + visibilityResult.verdict match { + case Drop(ExclusiveTweet, _) => + dropSafetyResult + + case Drop(NsfwViewerIsUnderage | NsfwViewerHasNoStatedAge | NsfwLoggedOut, _) => + dropSafetyResult + + case Drop(TrustedFriendsTweet, _) => + dropSafetyResult + + case _: LocalizedTombstone => dropSafetyResult + + case Drop(StaleTweet, _) => dropSafetyResult + + // legacy drop actions + case dropAction: Drop => unavailableFromDropAction(dropAction) + + // not an unavailable state that can be mapped + case _ => None + } + } + + def toFilteredState( + visibilityResult: VisibilityResult, + disableLegacyInterstitialFilteredReason: Boolean + ): Option[FilteredState] = { + val suppressSafetyResult = Some( + Suppress(FilteredReason.SafetyResult(visibilityResult.getSafetyResult)) + ) + val dropSafetyResult = Some( + Unavailable.Drop(FilteredReason.SafetyResult(visibilityResult.getSafetyResult)) + ) + + visibilityResult.verdict match { + case _: Appealable => suppressSafetyResult + + case _: Preview => suppressSafetyResult + + case _: InterstitialLimitedEngagements => suppressSafetyResult + + case _: EmergencyDynamicInterstitial => suppressSafetyResult + + case _: SoftIntervention => suppressSafetyResult + + case _: LimitedEngagements => suppressSafetyResult + + case _: TweetInterstitial => suppressSafetyResult + + case _: TweetVisibilityNudge => suppressSafetyResult + + case Interstitial( + ViewerBlocksAuthor | ViewerReportedAuthor | ViewerReportedTweet | ViewerMutesAuthor | + ViewerHardMutedAuthor | MutedKeyword | InterstitialDevelopmentOnly | HatefulConduct | + AbusiveBehavior, + _, + _) if disableLegacyInterstitialFilteredReason => + suppressSafetyResult + + case Interstitial( + ViewerBlocksAuthor | ViewerReportedAuthor | ViewerReportedTweet | + InterstitialDevelopmentOnly, + _, + _) => + suppressSafetyResult + + case _: ComplianceTweetNotice => suppressSafetyResult + + case Drop(ExclusiveTweet, _) => + dropSafetyResult + + case Drop(NsfwViewerIsUnderage | NsfwViewerHasNoStatedAge | NsfwLoggedOut, _) => + dropSafetyResult + + case Drop(TrustedFriendsTweet, _) => + dropSafetyResult + + case Drop(StaleTweet, _) => dropSafetyResult + + case _: LocalizedTombstone => dropSafetyResult + + case _: Avoid => suppressSafetyResult + + // legacy drop actions + case dropAction: Drop => unavailableFromDropAction(dropAction) + + // legacy suppress actions + case action => suppressFromVisibilityAction(action, !disableLegacyInterstitialFilteredReason) + } + } + + def toFilteredState( + userVisibilityResult: Option[vfthrift.UserVisibilityResult] + ): FilteredState.Unavailable = + userVisibilityResult + .collect { + case blockedUser if UserVisibilityResultHelper.isDropAuthorBlocksViewer(blockedUser) => + Unavailable.Drop(FilteredReason.AuthorBlockViewer(true)) + + /** + * Reuse states for author visibility issues from the [[UserRepository]] for consistency with + * other logic for handling the same types of author visibility filtering. + */ + case protectedUser if UserVisibilityResultHelper.isDropProtectedAuthor(protectedUser) => + Unavailable.Author.Protected + case suspendedUser if UserVisibilityResultHelper.isDropSuspendedAuthor(suspendedUser) => + Unavailable.Author.Suspended + case nsfwUser if UserVisibilityResultHelper.isDropNsfwAuthor(nsfwUser) => + Unavailable.Drop(FilteredReason.ContainNsfwMedia(true)) + case mutedByViewer if UserVisibilityResultHelper.isDropViewerMutesAuthor(mutedByViewer) => + Unavailable.Drop(FilteredReason.ViewerMutesAuthor(true)) + case blockedByViewer + if UserVisibilityResultHelper.isDropViewerBlocksAuthor(blockedByViewer) => + Unavailable.Drop( + FilteredReason.SafetyResult( + SafetyResult( + None, + vfthrift.Action.Drop( + vfthrift.Drop(Some(vfthrift.DropReason.ViewerBlocksAuthor(true))) + )))) + } + .getOrElse(FilteredState.Unavailable.Drop(FilteredReason.UnspecifiedReason(true))) + + private def unavailableFromDropAction(dropAction: Drop): Option[FilteredState.Unavailable] = + dropAction match { + case Drop(AuthorBlocksViewer, _) => + Some(Unavailable.Drop(FilteredReason.AuthorBlockViewer(true))) + case Drop(Unspecified, _) => + Some(Unavailable.Drop(FilteredReason.UnspecifiedReason(true))) + case Drop(MutedKeyword, _) => + Some(Unavailable.Drop(FilteredReason.TweetMatchesViewerMutedKeyword(KeywordMatch("")))) + case Drop(ViewerMutesAuthor, _) => + Some(Unavailable.Drop(FilteredReason.ViewerMutesAuthor(true))) + case Drop(Nsfw, _) => + Some(Unavailable.Drop(FilteredReason.ContainNsfwMedia(true))) + case Drop(NsfwMedia, _) => + Some(Unavailable.Drop(FilteredReason.ContainNsfwMedia(true))) + case Drop(PossiblyUndesirable, _) => + Some(Unavailable.Drop(FilteredReason.PossiblyUndesirable(true))) + case Drop(Bounce, _) => + Some(Unavailable.Drop(FilteredReason.TweetIsBounced(true))) + + /** + * Reuse states for author visibility issues from the [[UserRepository]] for consistency with + * other logic for handling the same types of author visibility filtering. + */ + case Drop(ProtectedAuthor, _) => + Some(Unavailable.Author.Protected) + case Drop(SuspendedAuthor, _) => + Some(Unavailable.Author.Suspended) + case Drop(OffboardedAuthor, _) => + Some(Unavailable.Author.Offboarded) + case Drop(DeactivatedAuthor, _) => + Some(Unavailable.Author.Deactivated) + case Drop(ErasedAuthor, _) => + Some(Unavailable.Author.Deactivated) + case _: Drop => + Some(Unavailable.Drop(FilteredReason.UnspecifiedReason(true))) + } + + private def suppressFromVisibilityAction( + action: Action, + enableLegacyFilteredReason: Boolean + ): Option[FilteredState.Suppress] = + action match { + case interstitial: Interstitial => + interstitial.reason match { + case MutedKeyword if enableLegacyFilteredReason => + Some(Suppress(FilteredReason.TweetMatchesViewerMutedKeyword(KeywordMatch("")))) + case ViewerMutesAuthor if enableLegacyFilteredReason => + Some(Suppress(FilteredReason.ViewerMutesAuthor(true))) + case ViewerHardMutedAuthor if enableLegacyFilteredReason => + Some(Suppress(FilteredReason.ViewerMutesAuthor(true))) + // Interstitial tweets are considered suppressed by Tweetypie. For + // legacy behavior reasons, these tweets should be dropped when + // appearing as a quoted tweet via a call to getTweets. + case Nsfw => + Some(Suppress(FilteredReason.ContainNsfwMedia(true))) + case NsfwMedia => + Some(Suppress(FilteredReason.ContainNsfwMedia(true))) + case PossiblyUndesirable => + Some(Suppress(FilteredReason.PossiblyUndesirable(true))) + case _ => + Some(Suppress(FilteredReason.PossiblyUndesirable(true))) + } + case _ => None + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/package.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/package.scala new file mode 100644 index 000000000..5aa38d1e2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/package.scala @@ -0,0 +1,8 @@ +package com.twitter.tweetypie + +import com.twitter.context.TwitterContext +package object repository { + // Bring Tweetypie permitted TwitterContext into scope + val TwitterContext: TwitterContext = + com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ActivityService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ActivityService.scala new file mode 100644 index 000000000..c6480d546 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ActivityService.scala @@ -0,0 +1,25 @@ +package com.twitter.tweetypie.serverutil + +import com.twitter.finagle.Service +import com.twitter.util.Activity +import com.twitter.util.Future + +/** + * Transforms an `Activity` that contains a `Service` into a `Service`. + * The implementation guarantees that the service is rebuilt only when the + * activity changes, not on every request. + */ +object ActivityService { + + def apply[Req, Rep](activity: Activity[Service[Req, Rep]]): Service[Req, Rep] = { + + val serviceEvent = + ActivityUtil.strict(activity).values.map(_.get) + + new Service[Req, Rep] { + + def apply(req: Req): Future[Rep] = + serviceEvent.toFuture.flatMap(_.apply(req)) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ActivityUtil.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ActivityUtil.scala new file mode 100644 index 000000000..2ee6d9bd5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ActivityUtil.scala @@ -0,0 +1,23 @@ +package com.twitter.tweetypie.serverutil + +import com.twitter.util.Activity +import com.twitter.util.Closable +import com.twitter.util.Var +import com.twitter.util.Witness + +object ActivityUtil { + + /** + * Makes the composition strict up to the point where it is called. + * Compositions based on the returned activity will have + * the default lazy behavior. + */ + def strict[T](activity: Activity[T]): Activity[T] = { + val state = Var(Activity.Pending: Activity.State[T]) + val event = activity.states + + Closable.closeOnCollect(event.register(Witness(state)), state) + + new Activity(state) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/BUILD new file mode 100644 index 000000000..c660ac645 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/BUILD @@ -0,0 +1,23 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/google/inject:guice", + "finagle/finagle-core/src/main", + "finagle/finagle-memcached/src/main/scala", + "scrooge/scrooge-core", + "tweetypie/servo/util", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-core/src/main/scala/com/twitter/stitch", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/common/src/scala/com/twitter/tweetypie/client_id", + "tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities", + "tweetypie/common/src/scala/com/twitter/tweetypie/tweettext", + "twitter-config/yaml", + "util/util-hashing/src/main/scala", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + "util/util-stats/src/main/scala", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/BoringStackTrace.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/BoringStackTrace.scala new file mode 100644 index 000000000..d9e57213a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/BoringStackTrace.scala @@ -0,0 +1,43 @@ +package com.twitter.tweetypie.serverutil + +import com.twitter.finagle.ChannelException +import com.twitter.finagle.TimeoutException +import com.twitter.scrooge.ThriftException +import java.net.SocketException +import java.nio.channels.CancelledKeyException +import java.nio.channels.ClosedChannelException +import java.util.concurrent.CancellationException +import java.util.concurrent.{TimeoutException => JTimeoutException} +import org.apache.thrift.TApplicationException +import scala.util.control.NoStackTrace + +object BoringStackTrace { + + /** + * These exceptions are boring because they are expected to + * occasionally (or even regularly) happen during normal operation + * of the service. The intention is to make it easier to debug + * problems by making interesting exceptions easier to see. + * + * The best way to mark an exception as boring is to extend from + * NoStackTrace, since that is a good indication that we don't care + * about the details. + */ + def isBoring(t: Throwable): Boolean = + t match { + case _: NoStackTrace => true + case _: TimeoutException => true + case _: CancellationException => true + case _: JTimeoutException => true + case _: ChannelException => true + case _: SocketException => true + case _: ClosedChannelException => true + case _: CancelledKeyException => true + case _: ThriftException => true + // DeadlineExceededExceptions are propagated as: + // org.apache.thrift.TApplicationException: Internal error processing issue3: 'com.twitter.finagle.service.DeadlineFilter$DeadlineExceededException: exceeded request deadline of 100.milliseconds by 4.milliseconds. Deadline expired at 2020-08-27 17:07:46 +0000 and now it is 2020-08-27 17:07:46 +0000.' + case e: TApplicationException => + e.getMessage != null && e.getMessage.contains("DeadlineExceededException") + case _ => false + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/CaffeineMemcacheClient.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/CaffeineMemcacheClient.scala new file mode 100644 index 000000000..f898c53fc --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/CaffeineMemcacheClient.scala @@ -0,0 +1,174 @@ +package com.twitter.tweetypie.serverutil + +import com.github.benmanes.caffeine.cache.stats.CacheStats +import com.github.benmanes.caffeine.cache.stats.StatsCounter +import com.github.benmanes.caffeine.cache.AsyncCacheLoader +import com.github.benmanes.caffeine.cache.AsyncLoadingCache +import com.github.benmanes.caffeine.cache.Caffeine +import com.twitter.finagle.memcached.protocol.Value +import com.twitter.finagle.memcached.Client +import com.twitter.finagle.memcached.GetResult +import com.twitter.finagle.memcached.ProxyClient +import com.twitter.finagle.stats.NullStatsReceiver +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.util.Duration +import com.twitter.util.Future +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.util.{Promise => TwitterPromise} +import com.twitter.util.logging.Logger +import java.util.concurrent.TimeUnit.NANOSECONDS +import java.util.concurrent.CompletableFuture +import java.util.concurrent.Executor +import java.util.concurrent.TimeUnit +import java.util.function.BiConsumer +import java.util.function.Supplier +import java.lang +import java.util +import scala.collection.JavaConverters._ + +object CaffeineMemcacheClient { + val logger: Logger = Logger(getClass) + + /** + * Helper method to convert between Java 8's CompletableFuture and Twitter's Future. + */ + private def toTwitterFuture[T](cf: CompletableFuture[T]): Future[T] = { + if (cf.isDone && !cf.isCompletedExceptionally && !cf.isCancelled) { + Future.const(Return(cf.get())) + } else { + val p = new TwitterPromise[T] with TwitterPromise.InterruptHandler { + override protected def onInterrupt(t: Throwable): Unit = cf.cancel(true) + } + cf.whenComplete(new BiConsumer[T, Throwable] { + override def accept(result: T, exception: Throwable): Unit = { + if (exception != null) { + p.updateIfEmpty(Throw(exception)) + } else { + p.updateIfEmpty(Return(result)) + } + } + }) + p + } + } +} + +class CaffeineMemcacheClient( + override val proxyClient: Client, + val maximumSize: Int = 1000, + val ttl: Duration = Duration.fromSeconds(10), + stats: StatsReceiver = NullStatsReceiver) + extends ProxyClient { + import CaffeineMemcacheClient._ + + private[this] object Stats extends StatsCounter { + private val hits = stats.counter("hits") + private val miss = stats.counter("misses") + private val totalLoadTime = stats.stat("loads") + private val loadSuccess = stats.counter("loads-success") + private val loadFailure = stats.counter("loads-failure") + private val eviction = stats.counter("evictions") + private val evictionWeight = stats.counter("evictions-weight") + + override def recordHits(i: Int): Unit = hits.incr(i) + override def recordMisses(i: Int): Unit = miss.incr(i) + override def recordLoadSuccess(l: Long): Unit = { + loadSuccess.incr() + totalLoadTime.add(NANOSECONDS.toMillis(l)) + } + + override def recordLoadFailure(l: Long): Unit = { + loadFailure.incr() + totalLoadTime.add(NANOSECONDS.toMillis(l)) + } + + override def recordEviction(): Unit = recordEviction(1) + override def recordEviction(weight: Int): Unit = { + eviction.incr() + evictionWeight.incr(weight) + } + + /** + * We are currently not using this method. + */ + override def snapshot(): CacheStats = { + new CacheStats(0, 0, 0, 0, 0, 0, 0) + } + } + + private[this] object MemcachedAsyncCacheLoader extends AsyncCacheLoader[String, GetResult] { + private[this] val EmptyMisses: Set[String] = Set.empty + private[this] val EmptyFailures: Map[String, Throwable] = Map.empty + private[this] val EmptyHits: Map[String, Value] = Map.empty + + override def asyncLoad(key: String, executor: Executor): CompletableFuture[GetResult] = { + val f = new util.function.Function[util.Map[String, GetResult], GetResult] { + override def apply(r: util.Map[String, GetResult]): GetResult = r.get(key) + } + asyncLoadAll(Seq(key).asJava, executor).thenApply(f) + } + + /** + * Converts response from multi-key to single key. Memcache returns the result + * in one struct that contains all the hits, misses and exceptions. Caffeine + * requires a map from a key to the result, so we do that conversion here. + */ + override def asyncLoadAll( + keys: lang.Iterable[_ <: String], + executor: Executor + ): CompletableFuture[util.Map[String, GetResult]] = { + val result = new CompletableFuture[util.Map[String, GetResult]]() + proxyClient.getResult(keys.asScala).respond { + case Return(r) => + val map = new util.HashMap[String, GetResult]() + r.hits.foreach { + case (key, value) => + map.put( + key, + r.copy(hits = Map(key -> value), misses = EmptyMisses, failures = EmptyFailures) + ) + } + r.misses.foreach { key => + map.put(key, r.copy(hits = EmptyHits, misses = Set(key), failures = EmptyFailures)) + } + // We are passing through failures so that we maintain the contract expected by clients. + // Without passing through the failures, several metrics get lost. Some of these failures + // might get cached. The cache is short-lived, so we are not worried when it does + // get cached. + r.failures.foreach { + case (key, value) => + map.put( + key, + r.copy(hits = EmptyHits, misses = EmptyMisses, failures = Map(key -> value)) + ) + } + result.complete(map) + case Throw(ex) => + logger.warn("Error loading keys from memcached", ex) + result.completeExceptionally(ex) + } + result + } + } + + private[this] val cache: AsyncLoadingCache[String, GetResult] = + Caffeine + .newBuilder() + .maximumSize(maximumSize) + .refreshAfterWrite(ttl.inMilliseconds * 3 / 4, TimeUnit.MILLISECONDS) + .expireAfterWrite(ttl.inMilliseconds, TimeUnit.MILLISECONDS) + .recordStats(new Supplier[StatsCounter] { + override def get(): StatsCounter = Stats + }) + .buildAsync(MemcachedAsyncCacheLoader) + + override def getResult(keys: Iterable[String]): Future[GetResult] = { + val twitterFuture = toTwitterFuture(cache.getAll(keys.asJava)) + twitterFuture + .map { result => + val values = result.values().asScala + values.reduce(_ ++ _) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/DeviceSourceParser.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/DeviceSourceParser.scala new file mode 100644 index 000000000..1600269e3 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/DeviceSourceParser.scala @@ -0,0 +1,100 @@ +package com.twitter.tweetypie.serverutil + +/** + * Parse a device source into an OAuth app id. This mapping is + * neccesary when you need to request information about a client from + * a service that only knows about clients in terms of oauthIds. + * + * This happens either by parsing out an explicit "oauth:" app id or + * using a mapping from old non oauth clientIds like "web" and "sms" + * to oauthIds that have retroactively been assigned to those clients. + * If the legacy id cannot be found in the map and it's a non-numeric + * string, it's converted to the oauthId for twitter.com. + * + * Tweets with non oauth clientIds are still being created because + * thats how the monorail creates them. We also need to be able to + * process any app id string that is in old tweet data. + * + */ +object DeviceSourceParser { + + /** + * The oauth id for twitter.com. Also used as a default oauth id for + * other clients without their own + */ + val Web = 268278L + + /** + * The OAuth app ids for known legacy device sources. + */ + val legacyMapping: Map[String, Long] = Map[String, Long]( + "web" -> Web, + "tweetbutton" -> 6219130L, + "keitai_web" -> 38366L, + "sms" -> 241256L + ) + + /** + * Attempt to convert a client application id String into an OAuth + * id. + * + * The string must consist of the characters "oauth:" followed by a + * non-negative, decimal long. The text is case-insensitive, and + * whitespace at the beginning or end is ignored. + * + * We want to accept input as liberally as possible, because if we + * fail to do that here, it will get counted as a "legacy app id" + */ + val parseOAuthAppId: String => Option[Long] = { + // Case-insensitive, whitespace insensitive. The javaWhitespace + // character class is consistent with Character.isWhitespace, but is + // sadly different from \s. It will likely not matter in the long + // run, but this accepts more inputs and is easier to test (because + // we can use isWhitespace) + val OAuthAppIdRe = """(?i)\p{javaWhitespace}*oauth:(\d+)\p{javaWhitespace}*""".r + + _ match { + case OAuthAppIdRe(digits) => + // We should only get NumberFormatException when the number is + // larger than a Long, because the regex will rule out all of + // the other invalid cases. + try Some(digits.toLong) + catch { case _: NumberFormatException => None } + case _ => + None + } + } + + /** + * Attempt to convert a client application id String into an OAuth id or legacy identifier without + * any fallback behavior. + */ + val parseStrict: String => Option[Long] = + appIdStr => + parseOAuthAppId(appIdStr) + .orElse(legacyMapping.get(appIdStr)) + + /** + * Return true if a string can be used as a valid client application id or legacy identifier + */ + val isValid: String => Boolean = appIdStr => parseStrict(appIdStr).isDefined + + /** + * Build a parser that converts device sources to OAuth app ids, + * including performing the legacy mapping. + */ + val parseAppId: String => Option[Long] = { + val IsNumericRe = """-?[0-9]+""".r + + appIdStr => + parseStrict(appIdStr) + .orElse { + appIdStr match { + // We just fail the lookup if the app id looks like it's + // numeric. + case IsNumericRe() => None + case _ => Some(Web) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ExceptionCounter.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ExceptionCounter.scala new file mode 100644 index 000000000..0a7c6e43b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ExceptionCounter.scala @@ -0,0 +1,38 @@ +package com.twitter.tweetypie.serverutil + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.servo +import com.twitter.servo.util.ExceptionCategorizer + +object ExceptionCounter { + // These throwables are alertable because they indicate conditions we never expect in production. + def isAlertable(throwable: Throwable): Boolean = + throwable match { + case e: RuntimeException => true + case e: Error => true + case _ => false + } + + // count how many exceptions are alertable and how many are boring + val tweetypieCategorizers: ExceptionCategorizer = + ExceptionCategorizer.const("alertableException").onlyIf(isAlertable) ++ + ExceptionCategorizer.const("boringException").onlyIf(BoringStackTrace.isBoring) + + val defaultCategorizer: ExceptionCategorizer = + ExceptionCategorizer.default() ++ tweetypieCategorizers + + def defaultCategorizer(name: String): ExceptionCategorizer = + ExceptionCategorizer.default(Seq(name)) ++ tweetypieCategorizers + + def apply(statsReceiver: StatsReceiver): servo.util.ExceptionCounter = + new servo.util.ExceptionCounter(statsReceiver, defaultCategorizer) + + def apply(statsReceiver: StatsReceiver, name: String): servo.util.ExceptionCounter = + new servo.util.ExceptionCounter(statsReceiver, defaultCategorizer(name)) + + def apply( + statsReceiver: StatsReceiver, + categorizer: ExceptionCategorizer + ): servo.util.ExceptionCounter = + new servo.util.ExceptionCounter(statsReceiver, categorizer) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ExtendedTweetMetadataBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ExtendedTweetMetadataBuilder.scala new file mode 100644 index 000000000..53a3bc18d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ExtendedTweetMetadataBuilder.scala @@ -0,0 +1,52 @@ +package com.twitter.tweetypie.serverutil + +import com.twitter.tweetypie.getCashtags +import com.twitter.tweetypie.getHashtags +import com.twitter.tweetypie.getMedia +import com.twitter.tweetypie.getMentions +import com.twitter.tweetypie.getText +import com.twitter.tweetypie.getUrls +import com.twitter.tweetypie.thriftscala.ExtendedTweetMetadata +import com.twitter.tweetypie.thriftscala.ShortenedUrl +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.tweetypie.tweettext.Offset +import com.twitter.tweetypie.tweettext.TextEntity +import com.twitter.tweetypie.tweettext.Truncator +import com.twitter.tweetypie.tweettext.TweetText +import com.twitter.tweetypie.thriftscala.entities.Implicits._ + +/** + * Computes the appropriate truncation index to support rendering on legacy clients. + */ +object ExtendedTweetMetadataBuilder { + import TweetText._ + + def apply(tweet: Tweet, selfPermalink: ShortenedUrl): ExtendedTweetMetadata = { + + def entityRanges[T: TextEntity](entities: Seq[T]): Seq[(Int, Int)] = + entities.map(e => (TextEntity.fromIndex(e).toInt, TextEntity.toIndex(e).toInt)) + + val allEntityRanges = + Offset.Ranges.fromCodePointPairs( + entityRanges(getUrls(tweet)) ++ + entityRanges(getMentions(tweet)) ++ + entityRanges(getMedia(tweet)) ++ + entityRanges(getHashtags(tweet)) ++ + entityRanges(getCashtags(tweet)) + ) + + val text = getText(tweet) + + val apiCompatibleTruncationIndex = + // need to leave enough space for ellipsis, space, and self-permalink + Truncator.truncationPoint( + text = text, + maxDisplayLength = OriginalMaxDisplayLength - selfPermalink.shortUrl.length - 2, + atomicUnits = allEntityRanges + ) + + ExtendedTweetMetadata( + apiCompatibleTruncationIndex = apiCompatibleTruncationIndex.codePointOffset.toInt + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/NullMemcacheClient.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/NullMemcacheClient.scala new file mode 100644 index 000000000..0cbecec88 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/NullMemcacheClient.scala @@ -0,0 +1,46 @@ +package com.twitter.tweetypie.serverutil + +import com.twitter.finagle.memcached +import com.twitter.finagle.memcached.CasResult +import com.twitter.io.Buf +import com.twitter.tweetypie.Future +import com.twitter.tweetypie.Time +import java.lang + +/** + * This will be used during CI test runs, in the no-cache scenarios for both DCs. + * We are treating this as cache of instantaneous expiry. MockClient uses an in-memory map as + * an underlying data-store, we extend it and prevent any writes to the map - thus making sure + * it's always empty. + */ +class NullMemcacheClient extends memcached.MockClient { + override def set(key: String, flags: Int, expiry: Time, value: Buf): Future[Unit] = Future.Done + + override def add(key: String, flags: Int, expiry: Time, value: Buf): Future[lang.Boolean] = + Future.value(true) + + override def append(key: String, flags: Int, expiry: Time, value: Buf): Future[lang.Boolean] = + Future.value(false) + + override def prepend(key: String, flags: Int, expiry: Time, value: Buf): Future[lang.Boolean] = + Future.value(false) + + override def replace(key: String, flags: Int, expiry: Time, value: Buf): Future[lang.Boolean] = + Future.value(false) + + override def checkAndSet( + key: String, + flags: Int, + expiry: Time, + value: Buf, + casUnique: Buf + ): Future[CasResult] = Future.value(CasResult.NotFound) + + override def delete(key: String): Future[lang.Boolean] = Future.value(false) + + override def incr(key: String, delta: Long): Future[Option[lang.Long]] = + Future.value(None) + + override def decr(key: String, delta: Long): Future[Option[lang.Long]] = + Future.value(None) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/PartnerMedia.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/PartnerMedia.scala new file mode 100644 index 000000000..f2c32d7b4 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/PartnerMedia.scala @@ -0,0 +1,15 @@ +package com.twitter.tweetypie.serverutil + +import com.twitter.config.yaml.YamlMap +import scala.util.matching.Regex + +object PartnerMedia { + def load(yamlMap: YamlMap): Seq[Regex] = + (httpOrHttps(yamlMap) ++ httpOnly(yamlMap)).map(_.r) + + private def httpOrHttps(yamlMap: YamlMap): Seq[String] = + yamlMap.stringSeq("http_or_https").map("""^(?:https?\:\/\/)?""" + _) + + private def httpOnly(yamlMap: YamlMap): Seq[String] = + yamlMap.stringSeq("http_only").map("""^(?:http\:\/\/)?""" + _) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/StoredCard.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/StoredCard.scala new file mode 100644 index 000000000..566d43c24 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/StoredCard.scala @@ -0,0 +1,36 @@ +package com.twitter.tweetypie.serverutil + +import com.twitter.tweetypie.thriftscala.CardReference +import com.twitter.util.Try +import java.net.URI +import scala.util.control.NonFatal + +/** + * Utility to extract the stored card id out of a CardReference + */ +object StoredCard { + + private val cardScheme = "card" + private val cardPrefix = s"$cardScheme://" + + /** + * Looks at the CardReference to determines if the cardUri points to a "stored" + * card id. Stored Card URIs are are expected to be in the format "card://" + * (case sensitive). In future these URIs can potentially be: + * "card://[/path[?queryString]]. Note that this utility cares just about the + * "Stored Card" types. So it just skips the other card types. + */ + def unapply(cr: CardReference): Option[Long] = { + try { + for { + uriStr <- Option(cr.cardUri) if uriStr.startsWith(cardPrefix) + uri <- Try(new URI(uriStr)).toOption + if uri.getScheme == cardScheme && uri.getHost != null + } yield uri.getHost.toLong // throws NumberFormatException non numeric host (cardIds) + } catch { + // The validations are done upstream by the TweetBuilder, so exceptions + // due to bad URIs will be swallowed. + case NonFatal(e) => None + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites/BUILD new file mode 100644 index 000000000..768daa991 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites/BUILD @@ -0,0 +1,15 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "tweetypie/servo/repo", + "tweetypie/servo/util", + "snowflake:id", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "tweetypie/server/src/main/thrift:compiled-scala", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites/TweetCacheWrite.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites/TweetCacheWrite.scala new file mode 100644 index 000000000..6f1f49cd0 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites/TweetCacheWrite.scala @@ -0,0 +1,99 @@ +package com.twitter.tweetypie.serverutil.logcachewrites + +import com.twitter.servo.cache.Cached +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.tweetypie.TweetId +import com.twitter.tweetypie.core.Serializer +import com.twitter.tweetypie.thriftscala.CachedTweet +import com.twitter.util.Time +import java.util.Base64 + +/** + * A record of a tweet cache write. This is used for debugging. These log + * messages are scribed to test_tweetypie_tweet_cache_writes. + */ +case class TweetCacheWrite( + tweetId: TweetId, + timestamp: Time, + action: String, + value: Option[Cached[CachedTweet]]) { + + /** + * Convert to a tab-separated string suitable for writing to a log message. + * + * Fields are: + * - Tweet id + * - Timestamp: + * If the tweet id is a snowflake id, this is an offset since tweet creation. + * If it is not a snowflake id, then this is a Unix epoch time in + * milliseconds. (The idea is that for most tweets, this encoding will make + * it easier to see the interval between events and whether it occured soon + * after tweet creation.) + * - Cache action ("set", "add", "replace", "cas", "delete") + * - Base64-encoded Cached[CachedTweet] struct + */ + def toLogMessage: String = { + val builder = new java.lang.StringBuilder + val timestampOffset = + if (SnowflakeId.isSnowflakeId(tweetId)) { + SnowflakeId(tweetId).unixTimeMillis.asLong + } else { + 0 + } + builder + .append(tweetId) + .append('\t') + .append(timestamp.inMilliseconds - timestampOffset) + .append('\t') + .append(action) + .append('\t') + value.foreach { ct => + // When logging, we end up serializing the value twice, once for the + // cache write and once for the logging. This is suboptimal, but the + // assumption is that we only do this for a small fraction of cache + // writes, so it should be ok. The reason that this is necessary is + // because we want to do the filtering on the deserialized value, so + // the serialized value is not available at the level that we are + // doing the filtering. + val thriftBytes = Serializer.CachedTweet.CachedCompact.to(ct).get + builder.append(Base64.getEncoder.encodeToString(thriftBytes)) + } + builder.toString + } +} + +object TweetCacheWrite { + case class ParseException(msg: String, cause: Exception) extends RuntimeException(cause) { + override def getMessage: String = s"Failed to parse as TweetCacheWrite: $msg" + } + + /** + * Parse a TweetCacheWrite object from the result of TweetCacheWrite.toLogMessage + */ + def fromLogMessage(msg: String): TweetCacheWrite = + try { + val (tweetIdStr, timestampStr, action, cachedTweetStr) = + msg.split('\t') match { + case Array(f1, f2, f3) => (f1, f2, f3, None) + case Array(f1, f2, f3, f4) => (f1, f2, f3, Some(f4)) + } + val tweetId = tweetIdStr.toLong + val timestamp = { + val offset = + if (SnowflakeId.isSnowflakeId(tweetId)) { + SnowflakeId(tweetId).unixTimeMillis.asLong + } else { + 0 + } + Time.fromMilliseconds(timestampStr.toLong + offset) + } + val value = cachedTweetStr.map { str => + val thriftBytes = Base64.getDecoder.decode(str) + Serializer.CachedTweet.CachedCompact.from(thriftBytes).get + } + + TweetCacheWrite(tweetIdStr.toLong, timestamp, action, value) + } catch { + case e: Exception => throw ParseException(msg, e) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites/WriteLoggingCache.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites/WriteLoggingCache.scala new file mode 100644 index 000000000..a332c8e59 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites/WriteLoggingCache.scala @@ -0,0 +1,66 @@ +package com.twitter.tweetypie.serverutil.logcachewrites + +import com.twitter.servo.cache.Checksum +import com.twitter.servo.cache.CacheWrapper +import com.twitter.util.Future +import com.twitter.util.logging.Logger +import scala.util.control.NonFatal + +trait WriteLoggingCache[K, V] extends CacheWrapper[K, V] { + // Use getClass so we can see which implementation is actually failing. + private[this] lazy val logFailureLogger = Logger(getClass) + + def selectKey(k: K): Boolean + def select(k: K, v: V): Boolean + def log(action: String, k: K, v: Option[V]): Unit + + def safeLog(action: String, k: K, v: Option[V]): Unit = + try { + log(action, k, v) + } catch { + case NonFatal(e) => + // The exception occurred in logging, and we don't want to fail the + // request with the logging failure if this happens, so log it and carry + // on. + logFailureLogger.error("Logging cache write", e) + } + + override def add(k: K, v: V): Future[Boolean] = + // Call the selection function before doing the work. Since it's highly + // likely that the Future will succeed, it's cheaper to call the function + // before we make the call so that we can avoid creating the callback and + // attaching it to the Future if we would not log. + if (select(k, v)) { + underlyingCache.add(k, v).onSuccess(r => if (r) safeLog("add", k, Some(v))) + } else { + underlyingCache.add(k, v) + } + + override def checkAndSet(k: K, v: V, checksum: Checksum): Future[Boolean] = + if (select(k, v)) { + underlyingCache.checkAndSet(k, v, checksum).onSuccess(r => if (r) safeLog("cas", k, Some(v))) + } else { + underlyingCache.checkAndSet(k, v, checksum) + } + + override def set(k: K, v: V): Future[Unit] = + if (select(k, v)) { + underlyingCache.set(k, v).onSuccess(_ => safeLog("set", k, Some(v))) + } else { + underlyingCache.set(k, v) + } + + override def replace(k: K, v: V): Future[Boolean] = + if (select(k, v)) { + underlyingCache.replace(k, v).onSuccess(r => if (r) safeLog("replace", k, Some(v))) + } else { + underlyingCache.replace(k, v) + } + + override def delete(k: K): Future[Boolean] = + if (selectKey(k)) { + underlyingCache.delete(k).onSuccess(r => if (r) safeLog("delete", k, None)) + } else { + underlyingCache.delete(k) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/BUILD new file mode 100644 index 000000000..1fb3cf249 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/BUILD @@ -0,0 +1,38 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/twitter/bijection:scrooge", + "3rdparty/jvm/org/apache/thrift:libthrift", + "core-app-services/failed_task:writer", + "core-app-services/lib:coreservices", + "finagle/finagle-core/src/main", + "finagle/finagle-mux/src/main/scala", + "finagle/finagle-stats", + "quill/capture", + "quill/core/src/main/thrift:thrift-scala", + "scrooge/scrooge-core/src/main/scala", + "tweetypie/servo/request/src/main/scala", + "tweetypie/servo/util", + "src/thrift/com/twitter/servo:servo-exception-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:delete_location_data-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/handler", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/store", + "tweetypie/server/src/main/thrift:compiled-scala", + "tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields", + "tweetypie/common/src/scala/com/twitter/tweetypie/client_id", + "tweetypie/common/src/scala/com/twitter/tweetypie/context", + "tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala", + "twitter-server-internal", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + "util/util-stats/src/main/scala", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/ClientHandlingTweetService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/ClientHandlingTweetService.scala new file mode 100644 index 000000000..f19245b60 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/ClientHandlingTweetService.scala @@ -0,0 +1,524 @@ +/** Copyright 2012 Twitter, Inc. */ +package com.twitter.tweetypie.service + +import com.twitter.coreservices.StratoPublicApiRequestAttributionCounter +import com.twitter.finagle.CancelledRequestException +import com.twitter.finagle.context.Contexts +import com.twitter.finagle.context.Deadline +import com.twitter.finagle.mux.ClientDiscardedRequestException +import com.twitter.finagle.stats.DefaultStatsReceiver +import com.twitter.finagle.stats.Stat +import com.twitter.servo.exception.thriftscala.ClientError +import com.twitter.servo.util.ExceptionCategorizer +import com.twitter.servo.util.MemoizedExceptionCounterFactory +import com.twitter.tweetypie.Future +import com.twitter.tweetypie.Gate +import com.twitter.tweetypie.Logger +import com.twitter.tweetypie.StatsReceiver +import com.twitter.tweetypie.ThriftTweetService +import com.twitter.tweetypie.TweetId +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.context.TweetypieContext +import com.twitter.tweetypie.core.OverCapacity +import com.twitter.tweetypie.serverutil.ExceptionCounter +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.Promise + +/** + * A TweetService that takes care of the handling of requests from + * external services. In particular, this wrapper doesn't have any + * logic for handling requests itself. It just serves as a gateway for + * requests and responses, making sure that the underlying tweet + * service only sees requests it should handle and that the external + * clients get clean responses. + * + * - Ensures that exceptions are propagated cleanly + * - Sheds traffic if necessary + * - Authenticates clients + * - Records stats about clients + * + * For each endpoint, we record both client-specific and total metrics for number of requests, + * successes, exceptions, and latency. The stats names follow the patterns: + * - .//requests + * - .//success + * - .//client_errors + * - .//server_errors + * - .//exceptions + * - .//exceptions/ + * - .///requests + * - .///success + * - .///exceptions + * - .///exceptions/ + */ +class ClientHandlingTweetService( + underlying: ThriftTweetService, + stats: StatsReceiver, + loadShedEligible: Gate[String], + shedReadTrafficVoluntarily: Gate[Unit], + requestAuthorizer: ClientRequestAuthorizer, + getTweetsAuthorizer: MethodAuthorizer[GetTweetsRequest], + getTweetFieldsAuthorizer: MethodAuthorizer[GetTweetFieldsRequest], + requestSizeAuthorizer: MethodAuthorizer[Int], + clientIdHelper: ClientIdHelper) + extends ThriftTweetService { + import RescueExceptions._ + + private val log = Logger("com.twitter.tweetypie.service.TweetService") + + private[this] val Requests = "requests" + private[this] val Success = "success" + private[this] val Latency = "latency_ms" + + private[this] val StratoStatsCounter = new StratoPublicApiRequestAttributionCounter( + DefaultStatsReceiver + ) + private[this] val clientServerCategorizer = + ExceptionCategorizer.simple { + _ match { + case _: ClientError | _: AccessDenied => "client_errors" + case _ => "server_errors" + } + } + + private[this] val preServoExceptionCountersWithClientId = + new MemoizedExceptionCounterFactory(stats) + private[this] val preServoExceptionCounters = + new MemoizedExceptionCounterFactory(stats, categorizer = ExceptionCounter.defaultCategorizer) + private[this] val postServoExceptionCounters = + new MemoizedExceptionCounterFactory(stats, categorizer = clientServerCategorizer) + + private def clientId: String = + clientIdHelper.effectiveClientId.getOrElse(ClientIdHelper.UnknownClientId) + private def clientIdRoot: String = + clientIdHelper.effectiveClientIdRoot.getOrElse(ClientIdHelper.UnknownClientId) + + private[this] val futureOverCapacityException = + Future.exception(OverCapacity("Request rejected due to load shedding.")) + + private[this] def ifNotOverCapacityRead[T]( + methodStats: StatsReceiver, + requestSize: Long + )( + f: => Future[T] + ): Future[T] = { + val couldShed = loadShedEligible(clientId) + val doShed = couldShed && shedReadTrafficVoluntarily() + + methodStats.stat("loadshed_incoming_requests").add(requestSize) + if (couldShed) { + methodStats.stat("loadshed_eligible_requests").add(requestSize) + } else { + methodStats.stat("loadshed_ineligible_requests").add(requestSize) + } + + if (doShed) { + methodStats.stat("loadshed_rejected_requests").add(requestSize) + futureOverCapacityException + } else { + f + } + } + + private def maybeTimeFuture[A](maybeStat: Option[Stat])(f: => Future[A]) = + maybeStat match { + case Some(stat) => Stat.timeFuture(stat)(f) + case None => f + } + + /** + * Perform the action, increment the appropriate counters, and clean up the exceptions to servo exceptions + * + * This method also masks all interrupts to prevent request cancellation on hangup. + */ + private[this] def trackS[T]( + name: String, + requestInfo: Any, + extraStatPrefix: Option[String] = None, + requestSize: Option[Long] = None + )( + action: StatsReceiver => Future[T] + ): Future[T] = { + val methodStats = stats.scope(name) + val clientStats = methodStats.scope(clientIdRoot) + val cancelledCounter = methodStats.counter("cancelled") + + /** + * Returns an identical future except that it ignores interrupts and increments a counter + * when a request is cancelled. This is [[Future]].masked but with a counter. + */ + def maskedWithStats[A](f: Future[A]): Future[A] = { + val p = Promise[A]() + p.setInterruptHandler { + case _: ClientDiscardedRequestException | _: CancelledRequestException => + cancelledCounter.incr() + } + f.proxyTo(p) + p + } + + maskedWithStats( + requestAuthorizer(name, clientIdHelper.effectiveClientId) + .flatMap { _ => + methodStats.counter(Requests).incr() + extraStatPrefix.foreach(p => methodStats.counter(p, Requests).incr()) + clientStats.counter(Requests).incr() + StratoStatsCounter.recordStats(name, "tweets", requestSize.getOrElse(1L)) + + Stat.timeFuture(methodStats.stat(Latency)) { + Stat.timeFuture(clientStats.stat(Latency)) { + maybeTimeFuture(extraStatPrefix.map(p => methodStats.stat(p, Latency))) { + TweetypieContext.Local.trackStats(stats, methodStats, clientStats) + + // Remove the deadline for backend requests when we mask client cancellations so + // that side-effects are applied to all backend services even after client timeouts. + // Wrap and then flatten an extra layer of Future to capture any thrown exceptions. + Future(Contexts.broadcast.letClear(Deadline)(action(methodStats))).flatten + } + } + } + } + ).onSuccess { _ => + methodStats.counter(Success).incr() + extraStatPrefix.foreach(p => methodStats.counter(p, Success).incr()) + clientStats.counter(Success).incr() + } + .onFailure { e => + preServoExceptionCounters(name)(e) + preServoExceptionCountersWithClientId(name, clientIdRoot)(e) + } + .rescue(rescueToServoFailure(name, clientId)) + .onFailure { e => + postServoExceptionCounters(name)(e) + logFailure(e, requestInfo) + } + } + + def track[T]( + name: String, + requestInfo: Any, + extraStatPrefix: Option[String] = None, + requestSize: Option[Long] = None + )( + action: => Future[T] + ): Future[T] = { + trackS(name, requestInfo, extraStatPrefix, requestSize) { _: StatsReceiver => action } + } + + private def logFailure(ex: Throwable, requestInfo: Any): Unit = + log.warn(s"Returning failure response: $ex\n failed request info: $requestInfo") + + object RequestWidthPrefix { + private def prefix(width: Int) = { + val bucketMin = + width match { + case c if c < 10 => "0_9" + case c if c < 100 => "10_99" + case _ => "100_plus" + } + s"width_$bucketMin" + } + + def forGetTweetsRequest(r: GetTweetsRequest): String = prefix(r.tweetIds.size) + def forGetTweetFieldsRequest(r: GetTweetFieldsRequest): String = prefix(r.tweetIds.size) + } + + object WithMediaPrefix { + def forPostTweetRequest(r: PostTweetRequest): String = + if (r.mediaUploadIds.exists(_.nonEmpty)) + "with_media" + else + "without_media" + } + + override def getTweets(request: GetTweetsRequest): Future[Seq[GetTweetResult]] = + trackS( + "get_tweets", + request, + Some(RequestWidthPrefix.forGetTweetsRequest(request)), + Some(request.tweetIds.size) + ) { stats => + getTweetsAuthorizer(request, clientId).flatMap { _ => + ifNotOverCapacityRead(stats, request.tweetIds.length) { + underlying.getTweets(request) + } + } + } + + override def getTweetFields(request: GetTweetFieldsRequest): Future[Seq[GetTweetFieldsResult]] = + trackS( + "get_tweet_fields", + request, + Some(RequestWidthPrefix.forGetTweetFieldsRequest(request)), + Some(request.tweetIds.size) + ) { stats => + getTweetFieldsAuthorizer(request, clientId).flatMap { _ => + ifNotOverCapacityRead(stats, request.tweetIds.length) { + underlying.getTweetFields(request) + } + } + } + + override def replicatedGetTweets(request: GetTweetsRequest): Future[Unit] = + track("replicated_get_tweets", request, requestSize = Some(request.tweetIds.size)) { + underlying.replicatedGetTweets(request).rescue { + case e: Throwable => Future.Unit // do not need deferredrpc to retry on exceptions + } + } + + override def replicatedGetTweetFields(request: GetTweetFieldsRequest): Future[Unit] = + track("replicated_get_tweet_fields", request, requestSize = Some(request.tweetIds.size)) { + underlying.replicatedGetTweetFields(request).rescue { + case e: Throwable => Future.Unit // do not need deferredrpc to retry on exceptions + } + } + + override def getTweetCounts(request: GetTweetCountsRequest): Future[Seq[GetTweetCountsResult]] = + trackS("get_tweet_counts", request, requestSize = Some(request.tweetIds.size)) { stats => + ifNotOverCapacityRead(stats, request.tweetIds.length) { + requestSizeAuthorizer(request.tweetIds.size, clientId).flatMap { _ => + underlying.getTweetCounts(request) + } + } + } + + override def replicatedGetTweetCounts(request: GetTweetCountsRequest): Future[Unit] = + track("replicated_get_tweet_counts", request, requestSize = Some(request.tweetIds.size)) { + underlying.replicatedGetTweetCounts(request).rescue { + case e: Throwable => Future.Unit // do not need deferredrpc to retry on exceptions + } + } + + override def postTweet(request: PostTweetRequest): Future[PostTweetResult] = + track("post_tweet", request, Some(WithMediaPrefix.forPostTweetRequest(request))) { + underlying.postTweet(request) + } + + override def postRetweet(request: RetweetRequest): Future[PostTweetResult] = + track("post_retweet", request) { + underlying.postRetweet(request) + } + + override def setAdditionalFields(request: SetAdditionalFieldsRequest): Future[Unit] = + track("set_additional_fields", request) { + underlying.setAdditionalFields(request) + } + + override def deleteAdditionalFields(request: DeleteAdditionalFieldsRequest): Future[Unit] = + track("delete_additional_fields", request, requestSize = Some(request.tweetIds.size)) { + requestSizeAuthorizer(request.tweetIds.size, clientId).flatMap { _ => + underlying.deleteAdditionalFields(request) + } + } + + override def asyncSetAdditionalFields(request: AsyncSetAdditionalFieldsRequest): Future[Unit] = + track("async_set_additional_fields", request) { + underlying.asyncSetAdditionalFields(request) + } + + override def asyncDeleteAdditionalFields( + request: AsyncDeleteAdditionalFieldsRequest + ): Future[Unit] = + track("async_delete_additional_fields", request) { + underlying.asyncDeleteAdditionalFields(request) + } + + override def replicatedUndeleteTweet2(request: ReplicatedUndeleteTweet2Request): Future[Unit] = + track("replicated_undelete_tweet2", request) { underlying.replicatedUndeleteTweet2(request) } + + override def replicatedInsertTweet2(request: ReplicatedInsertTweet2Request): Future[Unit] = + track("replicated_insert_tweet2", request) { underlying.replicatedInsertTweet2(request) } + + override def asyncInsert(request: AsyncInsertRequest): Future[Unit] = + track("async_insert", request) { underlying.asyncInsert(request) } + + override def updatePossiblySensitiveTweet( + request: UpdatePossiblySensitiveTweetRequest + ): Future[Unit] = + track("update_possibly_sensitive_tweet", request) { + underlying.updatePossiblySensitiveTweet(request) + } + + override def asyncUpdatePossiblySensitiveTweet( + request: AsyncUpdatePossiblySensitiveTweetRequest + ): Future[Unit] = + track("async_update_possibly_sensitive_tweet", request) { + underlying.asyncUpdatePossiblySensitiveTweet(request) + } + + override def replicatedUpdatePossiblySensitiveTweet(tweet: Tweet): Future[Unit] = + track("replicated_update_possibly_sensitive_tweet", tweet) { + underlying.replicatedUpdatePossiblySensitiveTweet(tweet) + } + + override def undeleteTweet(request: UndeleteTweetRequest): Future[UndeleteTweetResponse] = + track("undelete_tweet", request) { + underlying.undeleteTweet(request) + } + + override def asyncUndeleteTweet(request: AsyncUndeleteTweetRequest): Future[Unit] = + track("async_undelete_tweet", request) { + underlying.asyncUndeleteTweet(request) + } + + override def unretweet(request: UnretweetRequest): Future[UnretweetResult] = + track("unretweet", request) { + underlying.unretweet(request) + } + + override def eraseUserTweets(request: EraseUserTweetsRequest): Future[Unit] = + track("erase_user_tweets", request) { + underlying.eraseUserTweets(request) + } + + override def asyncEraseUserTweets(request: AsyncEraseUserTweetsRequest): Future[Unit] = + track("async_erase_user_tweets", request) { + underlying.asyncEraseUserTweets(request) + } + + override def asyncDelete(request: AsyncDeleteRequest): Future[Unit] = + track("async_delete", request) { underlying.asyncDelete(request) } + + override def deleteTweets(request: DeleteTweetsRequest): Future[Seq[DeleteTweetResult]] = + track("delete_tweets", request, requestSize = Some(request.tweetIds.size)) { + requestSizeAuthorizer(request.tweetIds.size, clientId).flatMap { _ => + underlying.deleteTweets(request) + } + } + + override def cascadedDeleteTweet(request: CascadedDeleteTweetRequest): Future[Unit] = + track("cascaded_delete_tweet", request) { underlying.cascadedDeleteTweet(request) } + + override def replicatedDeleteTweet2(request: ReplicatedDeleteTweet2Request): Future[Unit] = + track("replicated_delete_tweet2", request) { underlying.replicatedDeleteTweet2(request) } + + override def incrTweetFavCount(request: IncrTweetFavCountRequest): Future[Unit] = + track("incr_tweet_fav_count", request) { underlying.incrTweetFavCount(request) } + + override def asyncIncrFavCount(request: AsyncIncrFavCountRequest): Future[Unit] = + track("async_incr_fav_count", request) { underlying.asyncIncrFavCount(request) } + + override def replicatedIncrFavCount(tweetId: TweetId, delta: Int): Future[Unit] = + track("replicated_incr_fav_count", tweetId) { + underlying.replicatedIncrFavCount(tweetId, delta) + } + + override def incrTweetBookmarkCount(request: IncrTweetBookmarkCountRequest): Future[Unit] = + track("incr_tweet_bookmark_count", request) { underlying.incrTweetBookmarkCount(request) } + + override def asyncIncrBookmarkCount(request: AsyncIncrBookmarkCountRequest): Future[Unit] = + track("async_incr_bookmark_count", request) { underlying.asyncIncrBookmarkCount(request) } + + override def replicatedIncrBookmarkCount(tweetId: TweetId, delta: Int): Future[Unit] = + track("replicated_incr_bookmark_count", tweetId) { + underlying.replicatedIncrBookmarkCount(tweetId, delta) + } + + override def replicatedSetAdditionalFields(request: SetAdditionalFieldsRequest): Future[Unit] = + track("replicated_set_additional_fields", request) { + underlying.replicatedSetAdditionalFields(request) + } + + def setRetweetVisibility(request: SetRetweetVisibilityRequest): Future[Unit] = { + track("set_retweet_visibility", request) { + underlying.setRetweetVisibility(request) + } + } + + def asyncSetRetweetVisibility(request: AsyncSetRetweetVisibilityRequest): Future[Unit] = { + track("async_set_retweet_visibility", request) { + underlying.asyncSetRetweetVisibility(request) + } + } + + override def replicatedSetRetweetVisibility( + request: ReplicatedSetRetweetVisibilityRequest + ): Future[Unit] = + track("replicated_set_retweet_visibility", request) { + underlying.replicatedSetRetweetVisibility(request) + } + + override def replicatedDeleteAdditionalFields( + request: ReplicatedDeleteAdditionalFieldsRequest + ): Future[Unit] = + track("replicated_delete_additional_fields", request) { + underlying.replicatedDeleteAdditionalFields(request) + } + + override def replicatedTakedown(tweet: Tweet): Future[Unit] = + track("replicated_takedown", tweet) { underlying.replicatedTakedown(tweet) } + + override def scrubGeoUpdateUserTimestamp(request: DeleteLocationData): Future[Unit] = + track("scrub_geo_update_user_timestamp", request) { + underlying.scrubGeoUpdateUserTimestamp(request) + } + + override def scrubGeo(request: GeoScrub): Future[Unit] = + track("scrub_geo", request, requestSize = Some(request.statusIds.size)) { + requestSizeAuthorizer(request.statusIds.size, clientId).flatMap { _ => + underlying.scrubGeo(request) + } + } + + override def replicatedScrubGeo(tweetIds: Seq[TweetId]): Future[Unit] = + track("replicated_scrub_geo", tweetIds) { underlying.replicatedScrubGeo(tweetIds) } + + override def deleteLocationData(request: DeleteLocationDataRequest): Future[Unit] = + track("delete_location_data", request) { + underlying.deleteLocationData(request) + } + + override def flush(request: FlushRequest): Future[Unit] = + track("flush", request, requestSize = Some(request.tweetIds.size)) { + requestSizeAuthorizer(request.tweetIds.size, clientId).flatMap { _ => + underlying.flush(request) + } + } + + override def takedown(request: TakedownRequest): Future[Unit] = + track("takedown", request) { underlying.takedown(request) } + + override def asyncTakedown(request: AsyncTakedownRequest): Future[Unit] = + track("async_takedown", request) { + underlying.asyncTakedown(request) + } + + override def setTweetUserTakedown(request: SetTweetUserTakedownRequest): Future[Unit] = + track("set_tweet_user_takedown", request) { underlying.setTweetUserTakedown(request) } + + override def quotedTweetDelete(request: QuotedTweetDeleteRequest): Future[Unit] = + track("quoted_tweet_delete", request) { + underlying.quotedTweetDelete(request) + } + + override def quotedTweetTakedown(request: QuotedTweetTakedownRequest): Future[Unit] = + track("quoted_tweet_takedown", request) { + underlying.quotedTweetTakedown(request) + } + + override def getDeletedTweets( + request: GetDeletedTweetsRequest + ): Future[Seq[GetDeletedTweetResult]] = + track("get_deleted_tweets", request, requestSize = Some(request.tweetIds.size)) { + requestSizeAuthorizer(request.tweetIds.size, clientId).flatMap { _ => + underlying.getDeletedTweets(request) + } + } + + override def getStoredTweets( + request: GetStoredTweetsRequest + ): Future[Seq[GetStoredTweetsResult]] = { + track("get_stored_tweets", request, requestSize = Some(request.tweetIds.size)) { + requestSizeAuthorizer(request.tweetIds.size, clientId).flatMap { _ => + underlying.getStoredTweets(request) + } + } + } + + override def getStoredTweetsByUser( + request: GetStoredTweetsByUserRequest + ): Future[GetStoredTweetsByUserResult] = { + track("get_stored_tweets_by_user", request) { + underlying.getStoredTweetsByUser(request) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/DispatchingTweetService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/DispatchingTweetService.scala new file mode 100644 index 000000000..f148fb25a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/DispatchingTweetService.scala @@ -0,0 +1,376 @@ +/** Copyright 2010 Twitter, Inc. */ +package com.twitter.tweetypie +package service + +import com.twitter.servo.exception.thriftscala.ClientError +import com.twitter.servo.exception.thriftscala.ClientErrorCause +import com.twitter.tweetypie.additionalfields.AdditionalFields +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.handler._ +import com.twitter.tweetypie.store._ +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.Future + +/** + * Implementation of the TweetService which dispatches requests to underlying + * handlers and stores. + */ +class DispatchingTweetService( + asyncDeleteAdditionalFieldsBuilder: AsyncDeleteAdditionalFieldsBuilder.Type, + asyncSetAdditionalFieldsBuilder: AsyncSetAdditionalFieldsBuilder.Type, + deleteAdditionalFieldsBuilder: DeleteAdditionalFieldsBuilder.Type, + deleteLocationDataHandler: DeleteLocationDataHandler.Type, + deletePathHandler: TweetDeletePathHandler, + eraseUserTweetsHandler: EraseUserTweetsHandler, + getDeletedTweetsHandler: GetDeletedTweetsHandler.Type, + getStoredTweetsHandler: GetStoredTweetsHandler.Type, + getStoredTweetsByUserHandler: GetStoredTweetsByUserHandler.Type, + getTweetCountsHandler: GetTweetCountsHandler.Type, + getTweetsHandler: GetTweetsHandler.Type, + getTweetFieldsHandler: GetTweetFieldsHandler.Type, + postTweetHandler: PostTweet.Type[PostTweetRequest], + postRetweetHandler: PostTweet.Type[RetweetRequest], + quotedTweetDeleteBuilder: QuotedTweetDeleteEventBuilder.Type, + quotedTweetTakedownBuilder: QuotedTweetTakedownEventBuilder.Type, + scrubGeoScrubTweetsBuilder: ScrubGeoEventBuilder.ScrubTweets.Type, + scrubGeoUpdateUserTimestampBuilder: ScrubGeoEventBuilder.UpdateUserTimestamp.Type, + setAdditionalFieldsBuilder: SetAdditionalFieldsBuilder.Type, + setRetweetVisibilityHandler: SetRetweetVisibilityHandler.Type, + statsReceiver: StatsReceiver, + takedownHandler: TakedownHandler.Type, + tweetStore: TotalTweetStore, + undeleteTweetHandler: UndeleteTweetHandler.Type, + unretweetHandler: UnretweetHandler.Type, + updatePossiblySensitiveTweetHandler: UpdatePossiblySensitiveTweetHandler.Type, + userTakedownHandler: UserTakedownHandler.Type, + clientIdHelper: ClientIdHelper) + extends ThriftTweetService { + import AdditionalFields._ + + // Incoming reads + + override def getTweets(request: GetTweetsRequest): Future[Seq[GetTweetResult]] = + getTweetsHandler(request) + + override def getTweetFields(request: GetTweetFieldsRequest): Future[Seq[GetTweetFieldsResult]] = + getTweetFieldsHandler(request) + + override def getTweetCounts(request: GetTweetCountsRequest): Future[Seq[GetTweetCountsResult]] = + getTweetCountsHandler(request) + + // Incoming deletes + + override def cascadedDeleteTweet(request: CascadedDeleteTweetRequest): Future[Unit] = + deletePathHandler.cascadedDeleteTweet(request) + + override def deleteTweets(request: DeleteTweetsRequest): Future[Seq[DeleteTweetResult]] = + deletePathHandler.deleteTweets(request) + + // Incoming writes + + override def postTweet(request: PostTweetRequest): Future[PostTweetResult] = + postTweetHandler(request) + + override def postRetweet(request: RetweetRequest): Future[PostTweetResult] = + postRetweetHandler(request) + + override def setAdditionalFields(request: SetAdditionalFieldsRequest): Future[Unit] = { + val setFields = AdditionalFields.nonEmptyAdditionalFieldIds(request.additionalFields) + if (setFields.isEmpty) { + Future.exception( + ClientError( + ClientErrorCause.BadRequest, + s"${SetAdditionalFieldsRequest.AdditionalFieldsField.name} is empty, there must be at least one field to set" + ) + ) + } else { + + unsettableAdditionalFieldIds(request.additionalFields) match { + case Nil => + setAdditionalFieldsBuilder(request).flatMap(tweetStore.setAdditionalFields) + case unsettableFieldIds => + Future.exception( + ClientError( + ClientErrorCause.BadRequest, + unsettableAdditionalFieldIdsErrorMessage(unsettableFieldIds) + ) + ) + } + } + } + + override def deleteAdditionalFields(request: DeleteAdditionalFieldsRequest): Future[Unit] = + if (request.tweetIds.isEmpty || request.fieldIds.isEmpty) { + Future.exception( + ClientError(ClientErrorCause.BadRequest, "request contains empty tweet ids or field ids") + ) + } else if (request.fieldIds.exists(!isAdditionalFieldId(_))) { + Future.exception( + ClientError(ClientErrorCause.BadRequest, "cannot delete non-additional fields") + ) + } else { + deleteAdditionalFieldsBuilder(request).flatMap { events => + Future.join(events.map(tweetStore.deleteAdditionalFields)) + } + } + + override def asyncInsert(request: AsyncInsertRequest): Future[Unit] = + AsyncInsertTweet.Event.fromAsyncRequest(request) match { + case TweetStoreEventOrRetry.First(e) => tweetStore.asyncInsertTweet(e) + case TweetStoreEventOrRetry.Retry(e) => tweetStore.retryAsyncInsertTweet(e) + } + + override def asyncSetAdditionalFields(request: AsyncSetAdditionalFieldsRequest): Future[Unit] = + asyncSetAdditionalFieldsBuilder(request).map { + case TweetStoreEventOrRetry.First(e) => tweetStore.asyncSetAdditionalFields(e) + case TweetStoreEventOrRetry.Retry(e) => tweetStore.retryAsyncSetAdditionalFields(e) + } + + /** + * Set if a retweet should be included in its source tweet's retweet count. + * + * This is called by our RetweetVisibility daemon when a user enter/exit + * suspended or read-only state and all their retweets visibility need to + * be modified. + * + * @see [[SetRetweetVisibilityHandler]] for more implementation details + */ + override def setRetweetVisibility(request: SetRetweetVisibilityRequest): Future[Unit] = + setRetweetVisibilityHandler(request) + + override def asyncSetRetweetVisibility(request: AsyncSetRetweetVisibilityRequest): Future[Unit] = + AsyncSetRetweetVisibility.Event.fromAsyncRequest(request) match { + case TweetStoreEventOrRetry.First(e) => tweetStore.asyncSetRetweetVisibility(e) + case TweetStoreEventOrRetry.Retry(e) => tweetStore.retryAsyncSetRetweetVisibility(e) + } + + /** + * When a tweet has been successfully undeleted from storage in Manhattan this endpoint will + * enqueue requests to three related endpoints via deferredRPC: + * + * 1. asyncUndeleteTweet: Asynchronously handle aspects of the undelete not required for the response. + * 2. replicatedUndeleteTweet2: Send the undeleted tweet to other clusters for cache caching. + * + * @see [[UndeleteTweetHandler]] for the core undelete implementation + */ + override def undeleteTweet(request: UndeleteTweetRequest): Future[UndeleteTweetResponse] = + undeleteTweetHandler(request) + + /** + * The async method that undeleteTweet calls to handle notifiying other services of the undelete + * See [[TweetStores.asyncUndeleteTweetStore]] for all the stores that handle this event. + */ + override def asyncUndeleteTweet(request: AsyncUndeleteTweetRequest): Future[Unit] = + AsyncUndeleteTweet.Event.fromAsyncRequest(request) match { + case TweetStoreEventOrRetry.First(e) => tweetStore.asyncUndeleteTweet(e) + case TweetStoreEventOrRetry.Retry(e) => tweetStore.retryAsyncUndeleteTweet(e) + } + + override def getDeletedTweets( + request: GetDeletedTweetsRequest + ): Future[Seq[GetDeletedTweetResult]] = + getDeletedTweetsHandler(request) + + /** + * Triggers the deletion of all of a users tweets. Used by Gizmoduck when erasing a user + * after they have been deactived for some number of days. + */ + override def eraseUserTweets(request: EraseUserTweetsRequest): Future[Unit] = + eraseUserTweetsHandler.eraseUserTweetsRequest(request) + + override def asyncEraseUserTweets(request: AsyncEraseUserTweetsRequest): Future[Unit] = + eraseUserTweetsHandler.asyncEraseUserTweetsRequest(request) + + override def asyncDelete(request: AsyncDeleteRequest): Future[Unit] = + AsyncDeleteTweet.Event.fromAsyncRequest(request) match { + case TweetStoreEventOrRetry.First(e) => tweetStore.asyncDeleteTweet(e) + case TweetStoreEventOrRetry.Retry(e) => tweetStore.retryAsyncDeleteTweet(e) + } + + /* + * unretweet a tweet. + * + * There are two ways to unretweet: + * - call deleteTweets() with the retweetId + * - call unretweet() with the retweeter userId and sourceTweetId + * + * This is useful if you want to be able to undo a retweet without having to + * keep track of a retweetId + * + * Returns DeleteTweetResult for any deleted retweets. + */ + override def unretweet(request: UnretweetRequest): Future[UnretweetResult] = + unretweetHandler(request) + + override def asyncDeleteAdditionalFields( + request: AsyncDeleteAdditionalFieldsRequest + ): Future[Unit] = + asyncDeleteAdditionalFieldsBuilder(request).map { + case TweetStoreEventOrRetry.First(e) => tweetStore.asyncDeleteAdditionalFields(e) + case TweetStoreEventOrRetry.Retry(e) => tweetStore.retryAsyncDeleteAdditionalFields(e) + } + + override def incrTweetFavCount(request: IncrTweetFavCountRequest): Future[Unit] = + tweetStore.incrFavCount(IncrFavCount.Event(request.tweetId, request.delta, Time.now)) + + override def asyncIncrFavCount(request: AsyncIncrFavCountRequest): Future[Unit] = + tweetStore.asyncIncrFavCount(AsyncIncrFavCount.Event(request.tweetId, request.delta, Time.now)) + + override def incrTweetBookmarkCount(request: IncrTweetBookmarkCountRequest): Future[Unit] = + tweetStore.incrBookmarkCount(IncrBookmarkCount.Event(request.tweetId, request.delta, Time.now)) + + override def asyncIncrBookmarkCount(request: AsyncIncrBookmarkCountRequest): Future[Unit] = + tweetStore.asyncIncrBookmarkCount( + AsyncIncrBookmarkCount.Event(request.tweetId, request.delta, Time.now)) + + override def scrubGeoUpdateUserTimestamp(request: DeleteLocationData): Future[Unit] = + scrubGeoUpdateUserTimestampBuilder(request).flatMap(tweetStore.scrubGeoUpdateUserTimestamp) + + override def deleteLocationData(request: DeleteLocationDataRequest): Future[Unit] = + deleteLocationDataHandler(request) + + override def scrubGeo(request: GeoScrub): Future[Unit] = + scrubGeoScrubTweetsBuilder(request).flatMap(tweetStore.scrubGeo) + + override def takedown(request: TakedownRequest): Future[Unit] = + takedownHandler(request) + + override def quotedTweetDelete(request: QuotedTweetDeleteRequest): Future[Unit] = + quotedTweetDeleteBuilder(request).flatMap { + case Some(event) => tweetStore.quotedTweetDelete(event) + case None => Future.Unit + } + + override def quotedTweetTakedown(request: QuotedTweetTakedownRequest): Future[Unit] = + quotedTweetTakedownBuilder(request).flatMap { + case Some(event) => tweetStore.quotedTweetTakedown(event) + case None => Future.Unit + } + + override def asyncTakedown(request: AsyncTakedownRequest): Future[Unit] = + AsyncTakedown.Event.fromAsyncRequest(request) match { + case TweetStoreEventOrRetry.First(e) => tweetStore.asyncTakedown(e) + case TweetStoreEventOrRetry.Retry(e) => tweetStore.retryAsyncTakedown(e) + } + + override def setTweetUserTakedown(request: SetTweetUserTakedownRequest): Future[Unit] = + userTakedownHandler(request) + + override def asyncUpdatePossiblySensitiveTweet( + request: AsyncUpdatePossiblySensitiveTweetRequest + ): Future[Unit] = { + AsyncUpdatePossiblySensitiveTweet.Event.fromAsyncRequest(request) match { + case TweetStoreEventOrRetry.First(event) => + tweetStore.asyncUpdatePossiblySensitiveTweet(event) + case TweetStoreEventOrRetry.Retry(event) => + tweetStore.retryAsyncUpdatePossiblySensitiveTweet(event) + } + } + + override def flush(request: FlushRequest): Future[Unit] = { + // The logged "previous Tweet" value is intended to be used when interactively debugging an + // issue and an engineer flushes the tweet manually, e.g. from tweetypie.cmdline console. + // Don't log automated flushes originating from tweetypie-daemons to cut down noise. + val logExisting = !clientIdHelper.effectiveClientIdRoot.exists(_ == "tweetypie-daemons") + tweetStore.flush( + Flush.Event(request.tweetIds, request.flushTweets, request.flushCounts, logExisting) + ) + } + + // Incoming replication events + + override def replicatedGetTweetCounts(request: GetTweetCountsRequest): Future[Unit] = + getTweetCounts(request).unit + + override def replicatedGetTweetFields(request: GetTweetFieldsRequest): Future[Unit] = + getTweetFields(request).unit + + override def replicatedGetTweets(request: GetTweetsRequest): Future[Unit] = + getTweets(request).unit + + override def replicatedInsertTweet2(request: ReplicatedInsertTweet2Request): Future[Unit] = + tweetStore.replicatedInsertTweet( + ReplicatedInsertTweet + .Event( + request.cachedTweet.tweet, + request.cachedTweet, + request.quoterHasAlreadyQuotedTweet.getOrElse(false), + request.initialTweetUpdateRequest + ) + ) + + override def replicatedDeleteTweet2(request: ReplicatedDeleteTweet2Request): Future[Unit] = + tweetStore.replicatedDeleteTweet( + ReplicatedDeleteTweet.Event( + tweet = request.tweet, + isErasure = request.isErasure, + isBounceDelete = request.isBounceDelete, + isLastQuoteOfQuoter = request.isLastQuoteOfQuoter.getOrElse(false) + ) + ) + + override def replicatedIncrFavCount(tweetId: TweetId, delta: Int): Future[Unit] = + tweetStore.replicatedIncrFavCount(ReplicatedIncrFavCount.Event(tweetId, delta)) + + override def replicatedIncrBookmarkCount(tweetId: TweetId, delta: Int): Future[Unit] = + tweetStore.replicatedIncrBookmarkCount(ReplicatedIncrBookmarkCount.Event(tweetId, delta)) + + override def replicatedScrubGeo(tweetIds: Seq[TweetId]): Future[Unit] = + tweetStore.replicatedScrubGeo(ReplicatedScrubGeo.Event(tweetIds)) + + override def replicatedSetAdditionalFields(request: SetAdditionalFieldsRequest): Future[Unit] = + tweetStore.replicatedSetAdditionalFields( + ReplicatedSetAdditionalFields.Event(request.additionalFields) + ) + + override def replicatedSetRetweetVisibility( + request: ReplicatedSetRetweetVisibilityRequest + ): Future[Unit] = + tweetStore.replicatedSetRetweetVisibility( + ReplicatedSetRetweetVisibility.Event(request.srcId, request.visible) + ) + + override def replicatedDeleteAdditionalFields( + request: ReplicatedDeleteAdditionalFieldsRequest + ): Future[Unit] = + Future.join( + request.fieldsMap.map { + case (tweetId, fieldIds) => + tweetStore.replicatedDeleteAdditionalFields( + ReplicatedDeleteAdditionalFields.Event(tweetId, fieldIds) + ) + }.toSeq + ) + + override def replicatedUndeleteTweet2(request: ReplicatedUndeleteTweet2Request): Future[Unit] = + tweetStore.replicatedUndeleteTweet( + ReplicatedUndeleteTweet + .Event( + request.cachedTweet.tweet, + request.cachedTweet, + request.quoterHasAlreadyQuotedTweet.getOrElse(false) + )) + + override def replicatedTakedown(tweet: Tweet): Future[Unit] = + tweetStore.replicatedTakedown(ReplicatedTakedown.Event(tweet)) + + override def updatePossiblySensitiveTweet( + request: UpdatePossiblySensitiveTweetRequest + ): Future[Unit] = + updatePossiblySensitiveTweetHandler(request) + + override def replicatedUpdatePossiblySensitiveTweet(tweet: Tweet): Future[Unit] = + tweetStore.replicatedUpdatePossiblySensitiveTweet( + ReplicatedUpdatePossiblySensitiveTweet.Event(tweet) + ) + + override def getStoredTweets( + request: GetStoredTweetsRequest + ): Future[Seq[GetStoredTweetsResult]] = + getStoredTweetsHandler(request) + + override def getStoredTweetsByUser( + request: GetStoredTweetsByUserRequest + ): Future[GetStoredTweetsByUserResult] = + getStoredTweetsByUserHandler(request) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/FailureLoggingTweetService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/FailureLoggingTweetService.scala new file mode 100644 index 000000000..c1dd98151 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/FailureLoggingTweetService.scala @@ -0,0 +1,76 @@ +package com.twitter.tweetypie +package service + +import com.twitter.bijection.scrooge.BinaryScalaCodec +import com.twitter.coreservices.failed_task.writer.FailedTaskWriter +import com.twitter.scrooge.ThriftException +import com.twitter.scrooge.ThriftStruct +import com.twitter.scrooge.ThriftStructCodec +import com.twitter.tweetypie.serverutil.BoringStackTrace +import com.twitter.tweetypie.thriftscala._ +import scala.util.control.NoStackTrace + +object FailureLoggingTweetService { + + /** + * Defines the universe of exception types for which we should scribe + * the failure. + */ + private def shouldWrite(t: Throwable): Boolean = + t match { + case _: ThriftException => true + case _: PostTweetFailure => true + case _ => !BoringStackTrace.isBoring(t) + } + + /** + * Holds failure information from a failing PostTweetResult. + * + * FailedTaskWriter logs an exception with the failed request, so we + * need to package up any failure that we want to log into an + * exception. + */ + private class PostTweetFailure(state: TweetCreateState, reason: Option[String]) + extends Exception + with NoStackTrace { + override def toString: String = s"PostTweetFailure($state, $reason)" + } +} + +/** + * Wraps a tweet service with scribing of failed requests in order to + * enable analysis of failures for diagnosing problems. + */ +class FailureLoggingTweetService( + failedTaskWriter: FailedTaskWriter[Array[Byte]], + protected val underlying: ThriftTweetService) + extends TweetServiceProxy { + import FailureLoggingTweetService._ + + private[this] object writers { + private[this] def writer[T <: ThriftStruct]( + name: String, + codec: ThriftStructCodec[T] + ): (T, Throwable) => Future[Unit] = { + val taskWriter = failedTaskWriter(name, BinaryScalaCodec(codec).apply) + + (t, exc) => + Future.when(shouldWrite(exc)) { + taskWriter.writeFailure(t, exc) + } + } + + val postTweet: (PostTweetRequest, Throwable) => Future[Unit] = + writer("post_tweet", PostTweetRequest) + } + + override def postTweet(request: PostTweetRequest): Future[PostTweetResult] = + underlying.postTweet(request).respond { + // Log requests for states other than OK to enable debugging creation failures + case Return(res) if res.state != TweetCreateState.Ok => + writers.postTweet(request, new PostTweetFailure(res.state, res.failureReason)) + case Throw(exc) => + writers.postTweet(request, exc) + case _ => + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/MethodAuthorizer.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/MethodAuthorizer.scala new file mode 100644 index 000000000..8b1d2e1db --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/MethodAuthorizer.scala @@ -0,0 +1,91 @@ +package com.twitter.tweetypie +package service + +/** + * An authorizer for determining if a request to a + * method should be rejected. + * + * This class is in the spirit of servo.request.ClientRequestAuthorizer. + * The difference is ClientRequestAuthorizer only operates + * on two pieces of information, clientId and a method name. + * + * This class can be used to create a more complex authorizer that + * operates on the specifics of a request. e.g, an + * authorizer that disallows certain clients from passing + * certain optional flags. + * + * Note: With some work, ClientRequestAuthorizer could be + * generalized to support cases like this. If we end up making + * more method authorizers it might be worth it to + * go that route. + */ +abstract class MethodAuthorizer[T]() { + def apply(request: T, clientId: String): Future[Unit] + + /** + * Created decidered MethodAuthorizer + * if the decider is off it will execute + * MethodAuthorizer.unit, which always succeeds. + */ + def enabledBy(decider: Gate[Unit]): MethodAuthorizer[T] = + MethodAuthorizer.select(decider, this, MethodAuthorizer.unit) + + /** + * Transform this MethodAuthorizer[T] into a MethodAuthorizer[A] + * by providing a function from A => T + */ + def contramap[A](f: A => T): MethodAuthorizer[A] = + MethodAuthorizer[A] { (request, clientId) => this(f(request), clientId) } +} + +object MethodAuthorizer { + + /** + * @param f an authorization function that returns + * Future.Unit if the request is authorized, and Future.exception() + * if the request is not authorized. + * + * @return An instance of MethodAuthorizer with an apply method + * that returns f + */ + def apply[T](f: (T, String) => Future[Unit]): MethodAuthorizer[T] = + new MethodAuthorizer[T]() { + def apply(request: T, clientId: String): Future[Unit] = f(request, clientId) + } + + /** + * @param authorizers A seq of MethodAuthorizers to be + * composed into one. + * @return A MethodAuthorizer that sequentially executes + * all of the authorizers + */ + def all[T](authorizers: Seq[MethodAuthorizer[T]]): MethodAuthorizer[T] = + MethodAuthorizer { (request, clientId) => + authorizers.foldLeft(Future.Unit) { + case (f, authorize) => f.before(authorize(request, clientId)) + } + } + + /** + * @return A MethodAuthorizer that always returns Future.Unit + * Useful if you need to decider off your MethodAuthorizer + * and replace it with one that always passes. + */ + def unit[T]: MethodAuthorizer[T] = MethodAuthorizer { (request, client) => Future.Unit } + + /** + * @return A MethodAuthorizer that switches between two provided + * MethodAuthorizers depending on a decider. + */ + def select[T]( + decider: Gate[Unit], + ifTrue: MethodAuthorizer[T], + ifFalse: MethodAuthorizer[T] + ): MethodAuthorizer[T] = + MethodAuthorizer { (request, client) => + decider.pick( + ifTrue(request, client), + ifFalse(request, client) + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/ObservedTweetService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/ObservedTweetService.scala new file mode 100644 index 000000000..d0337076a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/ObservedTweetService.scala @@ -0,0 +1,422 @@ +package com.twitter.tweetypie +package service + +import com.twitter.servo.exception.thriftscala.ClientError +import com.twitter.servo.util.SynchronizedHashMap +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.service.observer._ +import com.twitter.tweetypie.thriftscala._ +import com.twitter.finagle.tracing.Trace + +/** + * Wraps an underlying TweetService, observing requests and results. + */ +class ObservedTweetService( + protected val underlying: ThriftTweetService, + stats: StatsReceiver, + clientIdHelper: ClientIdHelper) + extends TweetServiceProxy { + + private[this] val asyncEventOrRetryScope = stats.scope("async_event_or_retry") + private[this] val deleteFieldsScope = stats.scope("delete_additional_fields") + private[this] val deleteTweetsScope = stats.scope("delete_tweets") + private[this] val getDeletedTweetsScope = stats.scope("get_deleted_tweets") + private[this] val getTweetCountsScope = stats.scope("get_tweet_counts") + private[this] val getTweetsScope = stats.scope("get_tweets") + private[this] val getTweetFieldsScope = stats.scope("get_tweet_fields") + private[this] val postTweetScope = stats.scope("post_tweet") + private[this] val replicatedInsertTweet2Scope = stats.scope("replicated_insert_tweet2") + private[this] val retweetScope = stats.scope("post_retweet") + private[this] val scrubGeoScope = stats.scope("scrub_geo") + private[this] val setFieldsScope = stats.scope("set_additional_fields") + private[this] val setRetweetVisibilityScope = stats.scope("set_retweet_visibility") + private[this] val getStoredTweetsScope = stats.scope("get_stored_tweets") + private[this] val getStoredTweetsByUserScope = stats.scope("get_stored_tweets_by_user") + + private[this] val defaultGetTweetsRequestOptions = GetTweetOptions() + + /** Increments the appropriate write success/failure counter */ + private[this] val observeWriteResult: Effect[Try[_]] = { + withAndWithoutClientId(stats) { (stats, _) => + val successCounter = stats.counter("write_successes") + val failureCounter = stats.counter("write_failures") + val clientErrorCounter = stats.counter("write_client_errors") + Effect[Try[_]] { + case Return(_) => successCounter.incr() + case Throw(ClientError(_, _)) | Throw(AccessDenied(_, _)) => clientErrorCounter.incr() + case Throw(_) => failureCounter.incr() + } + } + } + + /** Increments the tweet_creates counter on future success. */ + private[this] val observeTweetWriteSuccess: Effect[Any] = { + withAndWithoutClientId(stats) { (stats, _) => + val counter = stats.counter("tweet_writes") + Effect[Any] { _ => counter.incr() } + } + } + + private[this] val observeGetTweetsRequest = + withAndWithoutClientId(getTweetsScope) { + GetTweetsObserver.observeRequest + } + + private[this] val observeGetTweetFieldsRequest = + withAndWithoutClientId(getTweetFieldsScope) { + GetTweetFieldsObserver.observeRequest + } + + private[this] val observeGetTweetCountsRequest = + withAndWithoutClientId(getTweetCountsScope) { (s, _) => + GetTweetCountsObserver.observeRequest(s) + } + + private[this] val observeRetweetRequest: Effect[RetweetRequest] = + withAndWithoutClientId(retweetScope) { (s, _) => Observer.observeRetweetRequest(s) } + + private[this] val observeDeleteTweetsRequest = + withAndWithoutClientId(deleteTweetsScope) { (s, _) => Observer.observeDeleteTweetsRequest(s) } + + private[this] val observeSetFieldsRequest: Effect[SetAdditionalFieldsRequest] = + withAndWithoutClientId(setFieldsScope) { (s, _) => Observer.observeSetFieldsRequest(s) } + + private[this] val observeSetRetweetVisibilityRequest: Effect[SetRetweetVisibilityRequest] = + withAndWithoutClientId(setRetweetVisibilityScope) { (s, _) => + Observer.observeSetRetweetVisibilityRequest(s) + } + + private[this] val observeDeleteFieldsRequest: Effect[DeleteAdditionalFieldsRequest] = + withAndWithoutClientId(deleteFieldsScope) { (s, _) => Observer.observeDeleteFieldsRequest(s) } + + private[this] val observePostTweetAdditionals: Effect[Tweet] = + withAndWithoutClientId(postTweetScope) { (s, _) => Observer.observeAdditionalFields(s) } + + private[this] val observePostTweetRequest: Effect[PostTweetRequest] = + withAndWithoutClientId(postTweetScope) { (s, _) => PostTweetObserver.observerRequest(s) } + + private[this] val observeGetTweetResults = + withAndWithoutClientId(getTweetsScope) { + GetTweetsObserver.observeResults + } + + private[this] val observeGetTweetFieldsResults: Effect[Seq[GetTweetFieldsResult]] = + GetTweetFieldsObserver.observeResults(getTweetFieldsScope) + + private[this] val observeTweetCountsResults = + GetTweetCountsObserver.observeResults(getTweetCountsScope) + + private[this] val observeScrubGeoRequest = + Observer.observeScrubGeo(scrubGeoScope) + + private[this] val observeRetweetResponse = + PostTweetObserver.observeResults(retweetScope, byClient = false) + + private[this] val observePostTweetResponse = + PostTweetObserver.observeResults(postTweetScope, byClient = false) + + private[this] val observeAsyncInsertRequest = + Observer.observeAsyncInsertRequest(asyncEventOrRetryScope) + + private[this] val observeAsyncSetAdditionalFieldsRequest = + Observer.observeAsyncSetAdditionalFieldsRequest(asyncEventOrRetryScope) + + private[this] val observeAsyncSetRetweetVisibilityRequest = + Observer.observeAsyncSetRetweetVisibilityRequest(asyncEventOrRetryScope) + + private[this] val observeAsyncUndeleteTweetRequest = + Observer.observeAsyncUndeleteTweetRequest(asyncEventOrRetryScope) + + private[this] val observeAsyncDeleteTweetRequest = + Observer.observeAsyncDeleteTweetRequest(asyncEventOrRetryScope) + + private[this] val observeAsyncDeleteAdditionalFieldsRequest = + Observer.observeAsyncDeleteAdditionalFieldsRequest(asyncEventOrRetryScope) + + private[this] val observeAsyncTakedownRequest = + Observer.observeAsyncTakedownRequest(asyncEventOrRetryScope) + + private[this] val observeAsyncUpdatePossiblySensitiveTweetRequest = + Observer.observeAsyncUpdatePossiblySensitiveTweetRequest(asyncEventOrRetryScope) + + private[this] val observedReplicatedInsertTweet2Request = + Observer.observeReplicatedInsertTweetRequest(replicatedInsertTweet2Scope) + + private[this] val observeGetTweetFieldsResultState: Effect[GetTweetFieldsObserver.Type] = + withAndWithoutClientId(getTweetFieldsScope) { (statsReceiver, _) => + GetTweetFieldsObserver.observeExchange(statsReceiver) + } + + private[this] val observeGetTweetsResultState: Effect[GetTweetsObserver.Type] = + withAndWithoutClientId(getTweetsScope) { (statsReceiver, _) => + GetTweetsObserver.observeExchange(statsReceiver) + } + + private[this] val observeGetTweetCountsResultState: Effect[GetTweetCountsObserver.Type] = + withAndWithoutClientId(getTweetCountsScope) { (statsReceiver, _) => + GetTweetCountsObserver.observeExchange(statsReceiver) + } + + private[this] val observeGetDeletedTweetsResultState: Effect[GetDeletedTweetsObserver.Type] = + withAndWithoutClientId(getDeletedTweetsScope) { (statsReceiver, _) => + GetDeletedTweetsObserver.observeExchange(statsReceiver) + } + + private[this] val observeGetStoredTweetsRequest: Effect[GetStoredTweetsRequest] = + GetStoredTweetsObserver.observeRequest(getStoredTweetsScope) + + private[this] val observeGetStoredTweetsResult: Effect[Seq[GetStoredTweetsResult]] = + GetStoredTweetsObserver.observeResult(getStoredTweetsScope) + + private[this] val observeGetStoredTweetsResultState: Effect[GetStoredTweetsObserver.Type] = + GetStoredTweetsObserver.observeExchange(getStoredTweetsScope) + + private[this] val observeGetStoredTweetsByUserRequest: Effect[GetStoredTweetsByUserRequest] = + GetStoredTweetsByUserObserver.observeRequest(getStoredTweetsByUserScope) + + private[this] val observeGetStoredTweetsByUserResult: Effect[GetStoredTweetsByUserResult] = + GetStoredTweetsByUserObserver.observeResult(getStoredTweetsByUserScope) + + private[this] val observeGetStoredTweetsByUserResultState: Effect[ + GetStoredTweetsByUserObserver.Type + ] = + GetStoredTweetsByUserObserver.observeExchange(getStoredTweetsByUserScope) + + override def getTweets(request: GetTweetsRequest): Future[Seq[GetTweetResult]] = { + val actualRequest = + if (request.options.nonEmpty) request + else request.copy(options = Some(defaultGetTweetsRequestOptions)) + observeGetTweetsRequest(actualRequest) + Trace.recordBinary("query_width", request.tweetIds.length) + super + .getTweets(request) + .onSuccess(observeGetTweetResults) + .respond(response => observeGetTweetsResultState((request, response))) + } + + override def getTweetFields(request: GetTweetFieldsRequest): Future[Seq[GetTweetFieldsResult]] = { + observeGetTweetFieldsRequest(request) + Trace.recordBinary("query_width", request.tweetIds.length) + super + .getTweetFields(request) + .onSuccess(observeGetTweetFieldsResults) + .respond(response => observeGetTweetFieldsResultState((request, response))) + } + + override def getTweetCounts(request: GetTweetCountsRequest): Future[Seq[GetTweetCountsResult]] = { + observeGetTweetCountsRequest(request) + Trace.recordBinary("query_width", request.tweetIds.length) + super + .getTweetCounts(request) + .onSuccess(observeTweetCountsResults) + .respond(response => observeGetTweetCountsResultState((request, response))) + } + + override def getDeletedTweets( + request: GetDeletedTweetsRequest + ): Future[Seq[GetDeletedTweetResult]] = { + Trace.recordBinary("query_width", request.tweetIds.length) + super + .getDeletedTweets(request) + .respond(response => observeGetDeletedTweetsResultState((request, response))) + } + + override def postTweet(request: PostTweetRequest): Future[PostTweetResult] = { + observePostTweetRequest(request) + request.additionalFields.foreach(observePostTweetAdditionals) + super + .postTweet(request) + .onSuccess(observePostTweetResponse) + .onSuccess(observeTweetWriteSuccess) + .respond(observeWriteResult) + } + + override def postRetweet(request: RetweetRequest): Future[PostTweetResult] = { + observeRetweetRequest(request) + super + .postRetweet(request) + .onSuccess(observeRetweetResponse) + .onSuccess(observeTweetWriteSuccess) + .respond(observeWriteResult) + } + + override def setAdditionalFields(request: SetAdditionalFieldsRequest): Future[Unit] = { + observeSetFieldsRequest(request) + super + .setAdditionalFields(request) + .respond(observeWriteResult) + } + + override def setRetweetVisibility(request: SetRetweetVisibilityRequest): Future[Unit] = { + observeSetRetweetVisibilityRequest(request) + super + .setRetweetVisibility(request) + .respond(observeWriteResult) + } + + override def deleteAdditionalFields(request: DeleteAdditionalFieldsRequest): Future[Unit] = { + observeDeleteFieldsRequest(request) + super + .deleteAdditionalFields(request) + .respond(observeWriteResult) + } + + override def updatePossiblySensitiveTweet( + request: UpdatePossiblySensitiveTweetRequest + ): Future[Unit] = + super + .updatePossiblySensitiveTweet(request) + .respond(observeWriteResult) + + override def deleteLocationData(request: DeleteLocationDataRequest): Future[Unit] = + super + .deleteLocationData(request) + .respond(observeWriteResult) + + override def scrubGeo(geoScrub: GeoScrub): Future[Unit] = { + observeScrubGeoRequest(geoScrub) + super + .scrubGeo(geoScrub) + .respond(observeWriteResult) + } + + override def scrubGeoUpdateUserTimestamp(request: DeleteLocationData): Future[Unit] = + super.scrubGeoUpdateUserTimestamp(request).respond(observeWriteResult) + + override def takedown(request: TakedownRequest): Future[Unit] = + super + .takedown(request) + .respond(observeWriteResult) + + override def setTweetUserTakedown(request: SetTweetUserTakedownRequest): Future[Unit] = + super + .setTweetUserTakedown(request) + .respond(observeWriteResult) + + override def incrTweetFavCount(request: IncrTweetFavCountRequest): Future[Unit] = + super + .incrTweetFavCount(request) + .respond(observeWriteResult) + + override def incrTweetBookmarkCount(request: IncrTweetBookmarkCountRequest): Future[Unit] = + super + .incrTweetBookmarkCount(request) + .respond(observeWriteResult) + + override def deleteTweets(request: DeleteTweetsRequest): Future[Seq[DeleteTweetResult]] = { + observeDeleteTweetsRequest(request) + super + .deleteTweets(request) + .respond(observeWriteResult) + } + + override def cascadedDeleteTweet(request: CascadedDeleteTweetRequest): Future[Unit] = + super + .cascadedDeleteTweet(request) + .respond(observeWriteResult) + + override def asyncInsert(request: AsyncInsertRequest): Future[Unit] = { + observeAsyncInsertRequest(request) + super + .asyncInsert(request) + .respond(observeWriteResult) + } + + override def asyncSetAdditionalFields(request: AsyncSetAdditionalFieldsRequest): Future[Unit] = { + observeAsyncSetAdditionalFieldsRequest(request) + super + .asyncSetAdditionalFields(request) + .respond(observeWriteResult) + } + + override def asyncSetRetweetVisibility( + request: AsyncSetRetweetVisibilityRequest + ): Future[Unit] = { + observeAsyncSetRetweetVisibilityRequest(request) + super + .asyncSetRetweetVisibility(request) + .respond(observeWriteResult) + } + + override def asyncUndeleteTweet(request: AsyncUndeleteTweetRequest): Future[Unit] = { + observeAsyncUndeleteTweetRequest(request) + super + .asyncUndeleteTweet(request) + .respond(observeWriteResult) + } + + override def asyncDelete(request: AsyncDeleteRequest): Future[Unit] = { + observeAsyncDeleteTweetRequest(request) + super + .asyncDelete(request) + .respond(observeWriteResult) + } + + override def asyncDeleteAdditionalFields( + request: AsyncDeleteAdditionalFieldsRequest + ): Future[Unit] = { + observeAsyncDeleteAdditionalFieldsRequest(request) + super + .asyncDeleteAdditionalFields(request) + .respond(observeWriteResult) + } + + override def asyncTakedown(request: AsyncTakedownRequest): Future[Unit] = { + observeAsyncTakedownRequest(request) + super + .asyncTakedown(request) + .respond(observeWriteResult) + } + + override def asyncUpdatePossiblySensitiveTweet( + request: AsyncUpdatePossiblySensitiveTweetRequest + ): Future[Unit] = { + observeAsyncUpdatePossiblySensitiveTweetRequest(request) + super + .asyncUpdatePossiblySensitiveTweet(request) + .respond(observeWriteResult) + } + + override def replicatedInsertTweet2(request: ReplicatedInsertTweet2Request): Future[Unit] = { + observedReplicatedInsertTweet2Request(request.cachedTweet.tweet) + super.replicatedInsertTweet2(request) + } + + override def getStoredTweets( + request: GetStoredTweetsRequest + ): Future[Seq[GetStoredTweetsResult]] = { + observeGetStoredTweetsRequest(request) + super + .getStoredTweets(request) + .onSuccess(observeGetStoredTweetsResult) + .respond(response => observeGetStoredTweetsResultState((request, response))) + } + + override def getStoredTweetsByUser( + request: GetStoredTweetsByUserRequest + ): Future[GetStoredTweetsByUserResult] = { + observeGetStoredTweetsByUserRequest(request) + super + .getStoredTweetsByUser(request) + .onSuccess(observeGetStoredTweetsByUserResult) + .respond(response => observeGetStoredTweetsByUserResultState((request, response))) + } + + private def withAndWithoutClientId[A]( + stats: StatsReceiver + )( + f: (StatsReceiver, Boolean) => Effect[A] + ) = + f(stats, false).also(withClientId(stats)(f)) + + private def withClientId[A](stats: StatsReceiver)(f: (StatsReceiver, Boolean) => Effect[A]) = { + val map = new SynchronizedHashMap[String, Effect[A]] + + Effect[A] { value => + clientIdHelper.effectiveClientIdRoot.foreach { clientId => + val clientObserver = map.getOrElseUpdate(clientId, f(stats.scope(clientId), true)) + clientObserver(value) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/QuillTweetService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/QuillTweetService.scala new file mode 100644 index 000000000..69b9481be --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/QuillTweetService.scala @@ -0,0 +1,75 @@ +package com.twitter.tweetypie +package service + +import com.twitter.quill.capture.QuillCapture +import com.twitter.tweetypie.thriftscala._ +import org.apache.thrift.transport.TMemoryBuffer +import com.twitter.finagle.thrift.Protocols +import com.twitter.quill.capture.Payloads +import com.twitter.tweetypie.service.QuillTweetService.createThriftBinaryRequest +import org.apache.thrift.protocol.TMessage +import org.apache.thrift.protocol.TMessageType +import org.apache.thrift.protocol.TProtocol + +object QuillTweetService { + // Construct the byte stream for a binary thrift request + def createThriftBinaryRequest(method_name: String, write_args: TProtocol => Unit): Array[Byte] = { + val buf = new TMemoryBuffer(512) + val oprot = Protocols.binaryFactory().getProtocol(buf) + + oprot.writeMessageBegin(new TMessage(method_name, TMessageType.CALL, 0)) + write_args(oprot) + oprot.writeMessageEnd() + + // Return bytes + java.util.Arrays.copyOfRange(buf.getArray, 0, buf.length) + } +} + +/** + * Wraps an underlying TweetService, logging some requests. + */ +class QuillTweetService(quillCapture: QuillCapture, protected val underlying: ThriftTweetService) + extends TweetServiceProxy { + + override def postTweet(request: PostTweetRequest): Future[PostTweetResult] = { + val requestBytes = createThriftBinaryRequest( + TweetService.PostTweet.name, + TweetService.PostTweet.Args(request).write) + quillCapture.storeServerRecv(Payloads.fromThriftMessageBytes(requestBytes)) + underlying.postTweet(request) + } + + override def deleteTweets(request: DeleteTweetsRequest): Future[Seq[DeleteTweetResult]] = { + val requestBytes = createThriftBinaryRequest( + TweetService.DeleteTweets.name, + TweetService.DeleteTweets.Args(request).write) + quillCapture.storeServerRecv(Payloads.fromThriftMessageBytes(requestBytes)) + underlying.deleteTweets(request) + } + + override def postRetweet(request: RetweetRequest): Future[PostTweetResult] = { + val requestBytes = createThriftBinaryRequest( + TweetService.PostRetweet.name, + TweetService.PostRetweet.Args(request).write) + quillCapture.storeServerRecv(Payloads.fromThriftMessageBytes(requestBytes)) + underlying.postRetweet(request) + } + + override def unretweet(request: UnretweetRequest): Future[UnretweetResult] = { + val requestBytes = createThriftBinaryRequest( + TweetService.Unretweet.name, + TweetService.Unretweet.Args(request).write) + quillCapture.storeServerRecv(Payloads.fromThriftMessageBytes(requestBytes)) + underlying.unretweet(request) + } + + override def cascadedDeleteTweet(request: CascadedDeleteTweetRequest): Future[Unit] = { + val requestBytes = createThriftBinaryRequest( + TweetServiceInternal.CascadedDeleteTweet.name, + TweetServiceInternal.CascadedDeleteTweet.Args(request).write) + quillCapture.storeServerRecv(Payloads.fromThriftMessageBytes(requestBytes)) + underlying.cascadedDeleteTweet(request) + } + +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/ReplicatingTweetService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/ReplicatingTweetService.scala new file mode 100644 index 000000000..d10170232 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/ReplicatingTweetService.scala @@ -0,0 +1,47 @@ +package com.twitter.tweetypie +package service + +import com.twitter.tweetypie.thriftscala._ +import com.twitter.servo.forked.Forked +import com.twitter.tweetypie.service.ReplicatingTweetService.GatedReplicationClient + +/** + * Wraps an underlying ThriftTweetService, transforming external requests to replicated requests. + */ +object ReplicatingTweetService { + // Can be used to associate replication client with a gate that determines + // if a replication request should be performed. + case class GatedReplicationClient(client: ThriftTweetService, gate: Gate[Unit]) { + def execute(executor: Forked.Executor, action: ThriftTweetService => Unit): Unit = { + if (gate()) executor { () => action(client) } + } + } +} + +class ReplicatingTweetService( + protected val underlying: ThriftTweetService, + replicationTargets: Seq[GatedReplicationClient], + executor: Forked.Executor, +) extends TweetServiceProxy { + private[this] def replicateRead(action: ThriftTweetService => Unit): Unit = + replicationTargets.foreach(_.execute(executor, action)) + + override def getTweetCounts(request: GetTweetCountsRequest): Future[Seq[GetTweetCountsResult]] = { + replicateRead(_.replicatedGetTweetCounts(request)) + underlying.getTweetCounts(request) + } + + override def getTweetFields(request: GetTweetFieldsRequest): Future[Seq[GetTweetFieldsResult]] = { + if (!request.options.doNotCache) { + replicateRead(_.replicatedGetTweetFields(request)) + } + underlying.getTweetFields(request) + } + + override def getTweets(request: GetTweetsRequest): Future[Seq[GetTweetResult]] = { + if (!request.options.exists(_.doNotCache)) { + replicateRead(_.replicatedGetTweets(request)) + } + underlying.getTweets(request) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/RescueExceptions.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/RescueExceptions.scala new file mode 100644 index 000000000..9ae769f2b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/RescueExceptions.scala @@ -0,0 +1,63 @@ +package com.twitter.tweetypie +package service + +import com.twitter.finagle.IndividualRequestTimeoutException +import com.twitter.servo.exception.thriftscala._ +import com.twitter.tweetypie.core.OverCapacity +import com.twitter.tweetypie.core.RateLimited +import com.twitter.tweetypie.core.TweetHydrationError +import com.twitter.tweetypie.core.UpstreamFailure +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.TimeoutException + +object RescueExceptions { + private val log = Logger("com.twitter.tweetypie.service.TweetService") + + /** + * rescue to servo exceptions + */ + def rescueToServoFailure( + name: String, + clientId: String + ): PartialFunction[Throwable, Future[Nothing]] = { + translateToServoFailure(formatError(name, clientId, _)).andThen(Future.exception) + } + + private def translateToServoFailure( + toMsg: String => String + ): PartialFunction[Throwable, Throwable] = { + case e: AccessDenied if suspendedOrDeactivated(e) => + e.copy(message = toMsg(e.message)) + case e: ClientError => + e.copy(message = toMsg(e.message)) + case e: UnauthorizedException => + ClientError(ClientErrorCause.Unauthorized, toMsg(e.msg)) + case e: AccessDenied => + ClientError(ClientErrorCause.Unauthorized, toMsg(e.message)) + case e: RateLimited => + ClientError(ClientErrorCause.RateLimited, toMsg(e.message)) + case e: ServerError => + e.copy(message = toMsg(e.message)) + case e: TimeoutException => + ServerError(ServerErrorCause.RequestTimeout, toMsg(e.toString)) + case e: IndividualRequestTimeoutException => + ServerError(ServerErrorCause.RequestTimeout, toMsg(e.toString)) + case e: UpstreamFailure => + ServerError(ServerErrorCause.DependencyError, toMsg(e.toString)) + case e: OverCapacity => + ServerError(ServerErrorCause.ServiceUnavailable, toMsg(e.message)) + case e: TweetHydrationError => + ServerError(ServerErrorCause.DependencyError, toMsg(e.toString)) + case e => + log.warn("caught unexpected exception", e) + ServerError(ServerErrorCause.InternalServerError, toMsg(e.toString)) + } + + private def suspendedOrDeactivated(e: AccessDenied): Boolean = + e.errorCause.exists { c => + c == AccessDeniedCause.UserDeactivated || c == AccessDeniedCause.UserSuspended + } + + private def formatError(name: String, clientId: String, msg: String): String = + s"($clientId, $name) $msg" +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/TweetServiceProxy.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/TweetServiceProxy.scala new file mode 100644 index 000000000..a167ecb43 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/TweetServiceProxy.scala @@ -0,0 +1,146 @@ +/** Copyright 2012 Twitter, Inc. */ +package com.twitter.tweetypie +package service + +import com.twitter.finagle.thrift.ClientId +import com.twitter.tweetypie.thriftscala.{TweetServiceProxy => BaseTweetServiceProxy, _} + +/** + * A trait for TweetService implementations that wrap an underlying TweetService and need to modify + * only some of the methods. + * + * This proxy is the same as [[com.twitter.tweetypie.thriftscala.TweetServiceProxy]], except it also + * extends [[com.twitter.tweetypie.thriftscala.TweetServiceInternal]] which gives us access to all + * of the async* methods. + */ +trait TweetServiceProxy extends BaseTweetServiceProxy with ThriftTweetService { + protected override def underlying: ThriftTweetService + + override def replicatedGetTweetCounts(request: GetTweetCountsRequest): Future[Unit] = + wrap(underlying.replicatedGetTweetCounts(request)) + + override def replicatedGetTweetFields(request: GetTweetFieldsRequest): Future[Unit] = + wrap(underlying.replicatedGetTweetFields(request)) + + override def replicatedGetTweets(request: GetTweetsRequest): Future[Unit] = + wrap(underlying.replicatedGetTweets(request)) + + override def asyncSetAdditionalFields(request: AsyncSetAdditionalFieldsRequest): Future[Unit] = + wrap(underlying.asyncSetAdditionalFields(request)) + + override def asyncDeleteAdditionalFields( + request: AsyncDeleteAdditionalFieldsRequest + ): Future[Unit] = + wrap(underlying.asyncDeleteAdditionalFields(request)) + + override def cascadedDeleteTweet(request: CascadedDeleteTweetRequest): Future[Unit] = + wrap(underlying.cascadedDeleteTweet(request)) + + override def asyncInsert(request: AsyncInsertRequest): Future[Unit] = + wrap(underlying.asyncInsert(request)) + + override def replicatedUpdatePossiblySensitiveTweet(tweet: Tweet): Future[Unit] = + wrap(underlying.replicatedUpdatePossiblySensitiveTweet(tweet)) + + override def asyncUpdatePossiblySensitiveTweet( + request: AsyncUpdatePossiblySensitiveTweetRequest + ): Future[Unit] = + wrap(underlying.asyncUpdatePossiblySensitiveTweet(request)) + + override def asyncUndeleteTweet(request: AsyncUndeleteTweetRequest): Future[Unit] = + wrap(underlying.asyncUndeleteTweet(request)) + + override def eraseUserTweets(request: EraseUserTweetsRequest): Future[Unit] = + wrap(underlying.eraseUserTweets(request)) + + override def asyncEraseUserTweets(request: AsyncEraseUserTweetsRequest): Future[Unit] = + wrap(underlying.asyncEraseUserTweets(request)) + + override def asyncDelete(request: AsyncDeleteRequest): Future[Unit] = + wrap(underlying.asyncDelete(request)) + + override def asyncIncrFavCount(request: AsyncIncrFavCountRequest): Future[Unit] = + wrap(underlying.asyncIncrFavCount(request)) + + override def asyncIncrBookmarkCount(request: AsyncIncrBookmarkCountRequest): Future[Unit] = + wrap(underlying.asyncIncrBookmarkCount(request)) + + override def scrubGeoUpdateUserTimestamp(request: DeleteLocationData): Future[Unit] = + wrap(underlying.scrubGeoUpdateUserTimestamp(request)) + + override def asyncSetRetweetVisibility(request: AsyncSetRetweetVisibilityRequest): Future[Unit] = + wrap(underlying.asyncSetRetweetVisibility(request)) + + override def setRetweetVisibility(request: SetRetweetVisibilityRequest): Future[Unit] = + wrap(underlying.setRetweetVisibility(request)) + + override def asyncTakedown(request: AsyncTakedownRequest): Future[Unit] = + wrap(underlying.asyncTakedown(request)) + + override def setTweetUserTakedown(request: SetTweetUserTakedownRequest): Future[Unit] = + wrap(underlying.setTweetUserTakedown(request)) + + override def replicatedUndeleteTweet2(request: ReplicatedUndeleteTweet2Request): Future[Unit] = + wrap(underlying.replicatedUndeleteTweet2(request)) + + override def replicatedInsertTweet2(request: ReplicatedInsertTweet2Request): Future[Unit] = + wrap(underlying.replicatedInsertTweet2(request)) + + override def replicatedDeleteTweet2(request: ReplicatedDeleteTweet2Request): Future[Unit] = + wrap(underlying.replicatedDeleteTweet2(request)) + + override def replicatedIncrFavCount(tweetId: TweetId, delta: Int): Future[Unit] = + wrap(underlying.replicatedIncrFavCount(tweetId, delta)) + + override def replicatedIncrBookmarkCount(tweetId: TweetId, delta: Int): Future[Unit] = + wrap(underlying.replicatedIncrBookmarkCount(tweetId, delta)) + + override def replicatedSetRetweetVisibility( + request: ReplicatedSetRetweetVisibilityRequest + ): Future[Unit] = + wrap(underlying.replicatedSetRetweetVisibility(request)) + + override def replicatedScrubGeo(tweetIds: Seq[TweetId]): Future[Unit] = + wrap(underlying.replicatedScrubGeo(tweetIds)) + + override def replicatedSetAdditionalFields(request: SetAdditionalFieldsRequest): Future[Unit] = + wrap(underlying.replicatedSetAdditionalFields(request)) + + override def replicatedDeleteAdditionalFields( + request: ReplicatedDeleteAdditionalFieldsRequest + ): Future[Unit] = + wrap(underlying.replicatedDeleteAdditionalFields(request)) + + override def replicatedTakedown(tweet: Tweet): Future[Unit] = + wrap(underlying.replicatedTakedown(tweet)) + + override def quotedTweetDelete(request: QuotedTweetDeleteRequest): Future[Unit] = + wrap(underlying.quotedTweetDelete(request)) + + override def quotedTweetTakedown(request: QuotedTweetTakedownRequest): Future[Unit] = + wrap(underlying.quotedTweetTakedown(request)) + + override def getStoredTweets( + request: GetStoredTweetsRequest + ): Future[Seq[GetStoredTweetsResult]] = + wrap(underlying.getStoredTweets(request)) + + override def getStoredTweetsByUser( + request: GetStoredTweetsByUserRequest + ): Future[GetStoredTweetsByUserResult] = + wrap(underlying.getStoredTweetsByUser(request)) +} + +/** + * A TweetServiceProxy with a mutable underlying field. + */ +class MutableTweetServiceProxy(var underlying: ThriftTweetService) extends TweetServiceProxy + +/** + * A TweetServiceProxy that sets the ClientId context before executing the method. + */ +class ClientIdSettingTweetServiceProxy(clientId: ClientId, val underlying: ThriftTweetService) + extends TweetServiceProxy { + override def wrap[A](f: => Future[A]): Future[A] = + clientId.asCurrent(f) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/TweetServiceWarmer.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/TweetServiceWarmer.scala new file mode 100644 index 000000000..79e97519c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/TweetServiceWarmer.scala @@ -0,0 +1,90 @@ +package com.twitter.tweetypie +package service + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.thrift.ClientId +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.Await +import scala.util.control.NonFatal + +/** + * Settings for the artificial tweet fetching requests that are sent to warmup the + * server before authentic requests are processed. + */ +case class WarmupQueriesSettings( + realTweetRequestCycles: Int = 100, + requestTimeout: Duration = 3.seconds, + clientId: ClientId = ClientId("tweetypie.warmup"), + requestTimeRange: Duration = 10.minutes, + maxConcurrency: Int = 20) + +object TweetServiceWarmer { + + /** + * Load info from perspective of TLS test account with short favorites timeline. + */ + val ForUserId = 3511687034L // @mikestltestact1 +} + +/** + * Generates requests to getTweets for the purpose of warming up the code paths used + * in fetching tweets. + */ +class TweetServiceWarmer( + warmupSettings: WarmupQueriesSettings, + requestOptions: GetTweetOptions = GetTweetOptions(includePlaces = true, + includeRetweetCount = true, includeReplyCount = true, includeFavoriteCount = true, + includeCards = true, cardsPlatformKey = Some("iPhone-13"), includePerspectivals = true, + includeQuotedTweet = true, forUserId = Some(TweetServiceWarmer.ForUserId))) + extends (ThriftTweetService => Unit) { + import warmupSettings._ + + private val realTweetIds = + Seq( + 20L, // just setting up my twttr + 456190426412617728L, // protected user tweet + 455477977715707904L, // suspended user tweet + 440322224407314432L, // ellen oscar selfie + 372173241290612736L, // gaga mentions 1d + 456965485179838464L, // media tagged tweet + 525421442918121473L, // tweet with card + 527214829807759360L, // tweet with annotation + 472788687571677184L // tweet with quote tweet + ) + + private val log = Logger(getClass) + + /** + * Executes the warmup queries, waiting for them to complete or until + * the warmupTimeout occurs. + */ + def apply(service: ThriftTweetService): Unit = { + val warmupStart = Time.now + log.info("warming up...") + warmup(service) + val warmupDuration = Time.now.since(warmupStart) + log.info("warmup took " + warmupDuration) + } + + /** + * Executes the warmup queries, returning when all responses have completed or timed-out. + */ + private[this] def warmup(service: ThriftTweetService): Unit = + clientId.asCurrent { + val request = GetTweetsRequest(realTweetIds, options = Some(requestOptions)) + val requests = Seq.fill(realTweetRequestCycles)(request) + val requestGroups = requests.grouped(maxConcurrency) + + for (requests <- requestGroups) { + val responses = requests.map(service.getTweets(_)) + try { + Await.ready(Future.join(responses), requestTimeout) + } catch { + // Await.ready throws exceptions on timeouts and + // interruptions. This prevents those exceptions from + // bubbling up. + case NonFatal(_) => + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/BUILD new file mode 100644 index 000000000..45c15cb77 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/BUILD @@ -0,0 +1,21 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle/finagle-core/src/main", + "tweetypie/servo/util/src/main/scala", + "snowflake/src/main/scala/com/twitter/snowflake/id", + "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", + "src/thrift/com/twitter/servo:servo-exception-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:stored-tweet-info-scala", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/media", + "tweetypie/server/src/main/thrift:compiled-scala", + "tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields", + "tweetypie/common/src/scala/com/twitter/tweetypie/tweettext", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetDeletedTweetsObserver.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetDeletedTweetsObserver.scala new file mode 100644 index 000000000..1e86348b8 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetDeletedTweetsObserver.scala @@ -0,0 +1,25 @@ +package com.twitter.tweetypie +package service +package observer + +import com.twitter.servo.exception.thriftscala.ClientError +import com.twitter.tweetypie.thriftscala.GetDeletedTweetResult +import com.twitter.tweetypie.thriftscala.GetDeletedTweetsRequest + +private[service] object GetDeletedTweetsObserver { + type Type = ObserveExchange[GetDeletedTweetsRequest, Seq[GetDeletedTweetResult]] + + def observeExchange(stats: StatsReceiver): Effect[Type] = { + val resultStateStats = ResultStateStats(stats) + + Effect { + case (request, response) => + response match { + case Return(_) | Throw(ClientError(_)) => + resultStateStats.success(request.tweetIds.size) + case Throw(_) => + resultStateStats.failed(request.tweetIds.size) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetStoredTweetsByUserObserver.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetStoredTweetsByUserObserver.scala new file mode 100644 index 000000000..5c16c68b2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetStoredTweetsByUserObserver.scala @@ -0,0 +1,67 @@ +package com.twitter.tweetypie +package service +package observer + +import com.twitter.tweetypie.thriftscala.GetStoredTweetsByUserRequest +import com.twitter.tweetypie.thriftscala.GetStoredTweetsByUserResult + +private[service] object GetStoredTweetsByUserObserver extends StoredTweetsObserver { + + type Type = ObserveExchange[GetStoredTweetsByUserRequest, GetStoredTweetsByUserResult] + val firstTweetTimestamp: Long = 1142974200L + + def observeRequest(stats: StatsReceiver): Effect[GetStoredTweetsByUserRequest] = { + val optionsScope = stats.scope("options") + val bypassVisibilityFilteringCounter = optionsScope.counter("bypass_visibility_filtering") + val forUserIdCounter = optionsScope.counter("set_for_user_id") + val timeRangeStat = optionsScope.stat("time_range_seconds") + val cursorCounter = optionsScope.counter("cursor") + val startFromOldestCounter = optionsScope.counter("start_from_oldest") + val additionalFieldsScope = optionsScope.scope("additional_fields") + + Effect { request => + if (request.options.isDefined) { + val options = request.options.get + + if (options.bypassVisibilityFiltering) bypassVisibilityFilteringCounter.incr() + if (options.setForUserId) forUserIdCounter.incr() + if (options.cursor.isDefined) { + cursorCounter.incr() + } else { + // We only add a time range stat once, when there's no cursor in the request (i.e. this + // isn't a repeat request for a subsequent batch of results) + val startTimeSeconds: Long = + options.startTimeMsec.map(_ / 1000).getOrElse(firstTweetTimestamp) + val endTimeSeconds: Long = options.endTimeMsec.map(_ / 1000).getOrElse(Time.now.inSeconds) + timeRangeStat.add(endTimeSeconds - startTimeSeconds) + + // We use the startFromOldest parameter when the cursor isn't defined + if (options.startFromOldest) startFromOldestCounter.incr() + } + options.additionalFieldIds.foreach { id => + additionalFieldsScope.counter(id.toString).incr() + } + } + } + } + + def observeResult(stats: StatsReceiver): Effect[GetStoredTweetsByUserResult] = { + val resultScope = stats.scope("result") + + Effect { result => + observeStoredTweets(result.storedTweets, resultScope) + } + } + + def observeExchange(stats: StatsReceiver): Effect[Type] = { + val resultStateStats = ResultStateStats(stats) + + Effect { + case (request, response) => + response match { + case Return(_) => resultStateStats.success() + case Throw(_) => resultStateStats.failed() + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetStoredTweetsObserver.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetStoredTweetsObserver.scala new file mode 100644 index 000000000..f6021d06c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetStoredTweetsObserver.scala @@ -0,0 +1,52 @@ +package com.twitter.tweetypie +package service +package observer + +import com.twitter.tweetypie.thriftscala.GetStoredTweetsRequest +import com.twitter.tweetypie.thriftscala.GetStoredTweetsResult + +private[service] object GetStoredTweetsObserver extends StoredTweetsObserver { + type Type = ObserveExchange[GetStoredTweetsRequest, Seq[GetStoredTweetsResult]] + + def observeRequest(stats: StatsReceiver): Effect[GetStoredTweetsRequest] = { + val requestSizeStat = stats.stat("request_size") + + val optionsScope = stats.scope("options") + val bypassVisibilityFilteringCounter = optionsScope.counter("bypass_visibility_filtering") + val forUserIdCounter = optionsScope.counter("for_user_id") + val additionalFieldsScope = optionsScope.scope("additional_fields") + + Effect { request => + requestSizeStat.add(request.tweetIds.size) + + if (request.options.isDefined) { + val options = request.options.get + if (options.bypassVisibilityFiltering) bypassVisibilityFilteringCounter.incr() + if (options.forUserId.isDefined) forUserIdCounter.incr() + options.additionalFieldIds.foreach { id => + additionalFieldsScope.counter(id.toString).incr() + } + } + } + } + + def observeResult(stats: StatsReceiver): Effect[Seq[GetStoredTweetsResult]] = { + val resultScope = stats.scope("result") + + Effect { result => + observeStoredTweets(result.map(_.storedTweet), resultScope) + } + } + + def observeExchange(stats: StatsReceiver): Effect[Type] = { + val resultStateStats = ResultStateStats(stats) + + Effect { + case (request, response) => + response match { + case Return(_) => resultStateStats.success(request.tweetIds.size) + case Throw(_) => resultStateStats.failed(request.tweetIds.size) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetTweetCountsObserver.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetTweetCountsObserver.scala new file mode 100644 index 000000000..c97fdc2e7 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetTweetCountsObserver.scala @@ -0,0 +1,67 @@ +package com.twitter.tweetypie +package service +package observer + +import com.twitter.servo.exception.thriftscala.ClientError +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.tweetypie.thriftscala.GetTweetCountsRequest +import com.twitter.tweetypie.thriftscala.GetTweetCountsResult + +private[service] object GetTweetCountsObserver { + type Type = ObserveExchange[GetTweetCountsRequest, Seq[GetTweetCountsResult]] + + def observeExchange(stats: StatsReceiver): Effect[Type] = { + val resultStateStats = ResultStateStats(stats) + + Effect { + case (request, response) => + response match { + case Return(_) | Throw(ClientError(_)) => + resultStateStats.success(request.tweetIds.size) + case Throw(_) => + resultStateStats.failed(request.tweetIds.size) + } + } + } + + def observeResults(stats: StatsReceiver): Effect[Seq[GetTweetCountsResult]] = { + val retweetCounter = stats.counter("retweets") + val replyCounter = stats.counter("replies") + val favoriteCounter = stats.counter("favorites") + + Effect { counts => + counts.foreach { c => + if (c.retweetCount.isDefined) retweetCounter.incr() + if (c.replyCount.isDefined) replyCounter.incr() + if (c.favoriteCount.isDefined) favoriteCounter.incr() + } + } + } + + def observeRequest(stats: StatsReceiver): Effect[GetTweetCountsRequest] = { + val requestSizesStat = stats.stat("request_size") + val optionsScope = stats.scope("options") + val includeRetweetCounter = optionsScope.counter("retweet_counts") + val includeReplyCounter = optionsScope.counter("reply_counts") + val includeFavoriteCounter = optionsScope.counter("favorite_counts") + val tweetAgeStat = stats.stat("tweet_age_seconds") + + Effect { request => + val size = request.tweetIds.size + requestSizesStat.add(size) + + // Measure Tweet.get_tweet_counts tweet age of requested Tweets. + // Tweet counts are stored in cache, falling back to TFlock on cache misses. + // Track client TweetId age to understand how that affects clients response latencies. + for { + id <- request.tweetIds + timestamp <- SnowflakeId.timeFromIdOpt(id) + age = Time.now.since(timestamp) + } tweetAgeStat.add(age.inSeconds) + + if (request.includeRetweetCount) includeRetweetCounter.incr(size) + if (request.includeReplyCount) includeReplyCounter.incr(size) + if (request.includeFavoriteCount) includeFavoriteCounter.incr(size) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetTweetFieldsObserver.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetTweetFieldsObserver.scala new file mode 100644 index 000000000..af6666b03 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetTweetFieldsObserver.scala @@ -0,0 +1,160 @@ +package com.twitter.tweetypie +package service +package observer + +import com.twitter.servo.exception.thriftscala.ClientError +import com.twitter.tweetypie.thriftscala._ + +private[service] object GetTweetFieldsObserver { + type Type = ObserveExchange[GetTweetFieldsRequest, Seq[GetTweetFieldsResult]] + + def observeExchange(statsReceiver: StatsReceiver): Effect[Type] = { + val resultStateStats = ResultStateStats(statsReceiver) + + val stats = statsReceiver.scope("results") + val tweetResultFailed = stats.counter("tweet_result_failed") + val quoteResultFailed = stats.counter("quote_result_failed") + val overCapacity = stats.counter("over_capacity") + + def observeFailedResult(r: GetTweetFieldsResult): Unit = { + r.tweetResult match { + case TweetFieldsResultState.Failed(failed) => + tweetResultFailed.incr() + + if (failed.overCapacity) overCapacity.incr() + case _ => + } + + if (r.quotedTweetResult.exists(_.isInstanceOf[TweetFieldsResultState.Failed])) + quoteResultFailed.incr() + } + + Effect { + case (request, response) => + response match { + case Return(xs) => + xs foreach { + case x if isFailedResult(x) => + observeFailedResult(x) + resultStateStats.failed() + case _ => + resultStateStats.success() + } + case Throw(ClientError(_)) => + resultStateStats.success(request.tweetIds.size) + case Throw(_) => + resultStateStats.failed(request.tweetIds.size) + } + } + } + + def observeRequest(stats: StatsReceiver, byClient: Boolean): Effect[GetTweetFieldsRequest] = { + val requestSizeStat = stats.stat("request_size") + val optionsScope = stats.scope("options") + val tweetFieldsScope = optionsScope.scope("tweet_field") + val countsFieldsScope = optionsScope.scope("counts_field") + val mediaFieldsScope = optionsScope.scope("media_field") + val includeRetweetedTweetCounter = optionsScope.counter("include_retweeted_tweet") + val includeQuotedTweetCounter = optionsScope.counter("include_quoted_tweet") + val forUserIdCounter = optionsScope.counter("for_user_id") + val cardsPlatformKeyCounter = optionsScope.counter("cards_platform_key") + val cardsPlatformKeyScope = optionsScope.scope("cards_platform_key") + val extensionsArgsCounter = optionsScope.counter("extensions_args") + val doNotCacheCounter = optionsScope.counter("do_not_cache") + val simpleQuotedTweetCounter = optionsScope.counter("simple_quoted_tweet") + val visibilityPolicyScope = optionsScope.scope("visibility_policy") + val userVisibleCounter = visibilityPolicyScope.counter("user_visible") + val noFilteringCounter = visibilityPolicyScope.counter("no_filtering") + val noSafetyLevelCounter = optionsScope.counter("no_safety_level") + val safetyLevelCounter = optionsScope.counter("safety_level") + val safetyLevelScope = optionsScope.scope("safety_level") + + Effect { + case GetTweetFieldsRequest(tweetIds, options) => + requestSizeStat.add(tweetIds.size) + options.tweetIncludes.foreach { + case TweetInclude.TweetFieldId(id) => tweetFieldsScope.counter(id.toString).incr() + case TweetInclude.CountsFieldId(id) => countsFieldsScope.counter(id.toString).incr() + case TweetInclude.MediaEntityFieldId(id) => mediaFieldsScope.counter(id.toString).incr() + case _ => + } + if (options.includeRetweetedTweet) includeRetweetedTweetCounter.incr() + if (options.includeQuotedTweet) includeQuotedTweetCounter.incr() + if (options.forUserId.nonEmpty) forUserIdCounter.incr() + if (options.cardsPlatformKey.nonEmpty) cardsPlatformKeyCounter.incr() + if (!byClient) { + options.cardsPlatformKey.foreach { cardsPlatformKey => + cardsPlatformKeyScope.counter(cardsPlatformKey).incr() + } + } + if (options.extensionsArgs.nonEmpty) extensionsArgsCounter.incr() + if (options.safetyLevel.nonEmpty) { + safetyLevelCounter.incr() + } else { + noSafetyLevelCounter.incr() + } + options.visibilityPolicy match { + case TweetVisibilityPolicy.UserVisible => userVisibleCounter.incr() + case TweetVisibilityPolicy.NoFiltering => noFilteringCounter.incr() + case _ => + } + options.safetyLevel.foreach { level => safetyLevelScope.counter(level.toString).incr() } + if (options.doNotCache) doNotCacheCounter.incr() + if (options.simpleQuotedTweet) simpleQuotedTweetCounter.incr() + } + } + + def observeResults(stats: StatsReceiver): Effect[Seq[GetTweetFieldsResult]] = { + val resultsCounter = stats.counter("results") + val resultsScope = stats.scope("results") + val observeState = GetTweetFieldsObserver.observeResultState(resultsScope) + + Effect { results => + resultsCounter.incr(results.size) + results.foreach { r => + observeState(r.tweetResult) + r.quotedTweetResult.foreach { qtResult => + resultsCounter.incr() + observeState(qtResult) + } + } + } + } + + /** + * Given a GetTweetFieldsResult result, do we observe the result as a failure or not. + */ + private def isFailedResult(result: GetTweetFieldsResult): Boolean = { + result.tweetResult.isInstanceOf[TweetFieldsResultState.Failed] || + result.quotedTweetResult.exists(_.isInstanceOf[TweetFieldsResultState.Failed]) + } + + private def observeResultState(stats: StatsReceiver): Effect[TweetFieldsResultState] = { + val foundCounter = stats.counter("found") + val notFoundCounter = stats.counter("not_found") + val failedCounter = stats.counter("failed") + val filteredCounter = stats.counter("filtered") + val filteredReasonScope = stats.scope("filtered_reason") + val otherCounter = stats.counter("other") + val observeTweet = Observer + .countTweetAttributes(stats.scope("found"), byClient = false) + + Effect { + case TweetFieldsResultState.Found(found) => + foundCounter.incr() + observeTweet(found.tweet) + found.retweetedTweet.foreach(observeTweet) + + case TweetFieldsResultState.NotFound(_) => notFoundCounter.incr() + case TweetFieldsResultState.Failed(_) => failedCounter.incr() + case TweetFieldsResultState.Filtered(f) => + filteredCounter.incr() + // Since reasons have parameters, eg. AuthorBlockViewer(true) and we don't + // need the "(true)" part, we do .getClass.getSimpleName to get rid of that + filteredReasonScope.counter(f.reason.getClass.getSimpleName).incr() + + case _ => otherCounter.incr() + } + } + +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetTweetsObserver.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetTweetsObserver.scala new file mode 100644 index 000000000..77f1829a5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetTweetsObserver.scala @@ -0,0 +1,120 @@ +package com.twitter.tweetypie +package service +package observer + +import com.twitter.servo.exception.thriftscala.ClientError +import com.twitter.tweetypie.thriftscala.GetTweetOptions +import com.twitter.tweetypie.thriftscala.GetTweetResult +import com.twitter.tweetypie.thriftscala.GetTweetsRequest + +private[service] object GetTweetsObserver { + type Type = ObserveExchange[GetTweetsRequest, Seq[GetTweetResult]] + + def observeExchange(stats: StatsReceiver): Effect[Type] = { + val resultStateStats = ResultStateStats(stats) + + Effect { + case (request, response) => + response match { + case Return(xs) => + xs.foreach { + case result if Observer.successStatusStates(result.tweetState) => + resultStateStats.success() + case _ => + resultStateStats.failed() + } + case Throw(ClientError(_)) => + resultStateStats.success(request.tweetIds.size) + case Throw(_) => + resultStateStats.failed(request.tweetIds.size) + } + } + } + + def observeResults(stats: StatsReceiver, byClient: Boolean): Effect[Seq[GetTweetResult]] = + countStates(stats).also(countTweetReadAttributes(stats, byClient)) + + def observeRequest(stats: StatsReceiver, byClient: Boolean): Effect[GetTweetsRequest] = { + val requestSizeStat = stats.stat("request_size") + val optionsScope = stats.scope("options") + val languageScope = optionsScope.scope("language") + val includeSourceTweetCounter = optionsScope.counter("source_tweet") + val includeQuotedTweetCounter = optionsScope.counter("quoted_tweet") + val includePerspectiveCounter = optionsScope.counter("perspective") + val includeConversationMutedCounter = optionsScope.counter("conversation_muted") + val includePlacesCounter = optionsScope.counter("places") + val includeCardsCounter = optionsScope.counter("cards") + val includeRetweetCountsCounter = optionsScope.counter("retweet_counts") + val includeReplyCountsCounter = optionsScope.counter("reply_counts") + val includeFavoriteCountsCounter = optionsScope.counter("favorite_counts") + val includeQuoteCountsCounter = optionsScope.counter("quote_counts") + val bypassVisibilityFilteringCounter = optionsScope.counter("bypass_visibility_filtering") + val excludeReportedCounter = optionsScope.counter("exclude_reported") + val cardsPlatformKeyScope = optionsScope.scope("cards_platform_key") + val extensionsArgsCounter = optionsScope.counter("extensions_args") + val doNotCacheCounter = optionsScope.counter("do_not_cache") + val additionalFieldsScope = optionsScope.scope("additional_fields") + val safetyLevelScope = optionsScope.scope("safety_level") + val includeProfileGeoEnrichment = optionsScope.counter("profile_geo_enrichment") + val includeMediaAdditionalMetadata = optionsScope.counter("media_additional_metadata") + val simpleQuotedTweet = optionsScope.counter("simple_quoted_tweet") + val forUserIdCounter = optionsScope.counter("for_user_id") + + def includesPerspectivals(options: GetTweetOptions) = + options.includePerspectivals && options.forUserId.nonEmpty + + Effect { + case GetTweetsRequest(tweetIds, _, Some(options), _) => + requestSizeStat.add(tweetIds.size) + if (!byClient) languageScope.counter(options.languageTag).incr() + if (options.includeSourceTweet) includeSourceTweetCounter.incr() + if (options.includeQuotedTweet) includeQuotedTweetCounter.incr() + if (includesPerspectivals(options)) includePerspectiveCounter.incr() + if (options.includeConversationMuted) includeConversationMutedCounter.incr() + if (options.includePlaces) includePlacesCounter.incr() + if (options.includeCards) includeCardsCounter.incr() + if (options.includeRetweetCount) includeRetweetCountsCounter.incr() + if (options.includeReplyCount) includeReplyCountsCounter.incr() + if (options.includeFavoriteCount) includeFavoriteCountsCounter.incr() + if (options.includeQuoteCount) includeQuoteCountsCounter.incr() + if (options.bypassVisibilityFiltering) bypassVisibilityFilteringCounter.incr() + if (options.excludeReported) excludeReportedCounter.incr() + if (options.extensionsArgs.nonEmpty) extensionsArgsCounter.incr() + if (options.doNotCache) doNotCacheCounter.incr() + if (options.includeProfileGeoEnrichment) includeProfileGeoEnrichment.incr() + if (options.includeMediaAdditionalMetadata) includeMediaAdditionalMetadata.incr() + if (options.simpleQuotedTweet) simpleQuotedTweet.incr() + if (options.forUserId.nonEmpty) forUserIdCounter.incr() + if (!byClient) { + options.cardsPlatformKey.foreach { cardsPlatformKey => + cardsPlatformKeyScope.counter(cardsPlatformKey).incr() + } + } + options.additionalFieldIds.foreach { id => + additionalFieldsScope.counter(id.toString).incr() + } + options.safetyLevel.foreach { level => safetyLevelScope.counter(level.toString).incr() } + } + } + + /** + * We count the number of times each tweet state is returned as a + * general measure of the health of TweetyPie. partial and not_found + * tweet states should be close to zero. + */ + private def countStates(stats: StatsReceiver): Effect[Seq[GetTweetResult]] = { + val state = Observer.observeStatusStates(stats) + Effect { results => results.foreach { tweetResult => state(tweetResult.tweetState) } } + } + + private def countTweetReadAttributes( + stats: StatsReceiver, + byClient: Boolean + ): Effect[Seq[GetTweetResult]] = { + val tweetObserver = Observer.countTweetAttributes(stats, byClient) + Effect { results => + results.foreach { tweetResult => tweetResult.tweet.foreach(tweetObserver) } + } + } + +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/Observer.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/Observer.scala new file mode 100644 index 000000000..c5a9782cb --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/Observer.scala @@ -0,0 +1,365 @@ +package com.twitter.tweetypie +package service +package observer + +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.tweetypie.additionalfields.AdditionalFields +import com.twitter.tweetypie.media.MediaKeyClassifier +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.tweettext.TweetText.codePointLength +import com.twitter.conversions.DurationOps._ + +/** + * Observer can be used for storing + * - one-off handler specific metrics with minor logic + * - reusable Tweetypie service metrics for multiple handlers + */ +private[service] object Observer { + + val successStatusStates: Set[StatusState] = Set( + StatusState.Found, + StatusState.NotFound, + StatusState.DeactivatedUser, + StatusState.SuspendedUser, + StatusState.ProtectedUser, + StatusState.ReportedTweet, + StatusState.UnsupportedClient, + StatusState.Drop, + StatusState.Suppress, + StatusState.Deleted, + StatusState.BounceDeleted + ) + + def observeStatusStates(statsReceiver: StatsReceiver): Effect[StatusState] = { + val stats = statsReceiver.scope("status_state") + val total = statsReceiver.counter("status_results") + + val foundCounter = stats.counter("found") + val notFoundCounter = stats.counter("not_found") + val partialCounter = stats.counter("partial") + val timedOutCounter = stats.counter("timed_out") + val failedCounter = stats.counter("failed") + val deactivatedCounter = stats.counter("deactivated") + val suspendedCounter = stats.counter("suspended") + val protectedCounter = stats.counter("protected") + val reportedCounter = stats.counter("reported") + val overCapacityCounter = stats.counter("over_capacity") + val unsupportedClientCounter = stats.counter("unsupported_client") + val dropCounter = stats.counter("drop") + val suppressCounter = stats.counter("suppress") + val deletedCounter = stats.counter("deleted") + val bounceDeletedCounter = stats.counter("bounce_deleted") + + Effect { st => + total.incr() + st match { + case StatusState.Found => foundCounter.incr() + case StatusState.NotFound => notFoundCounter.incr() + case StatusState.Partial => partialCounter.incr() + case StatusState.TimedOut => timedOutCounter.incr() + case StatusState.Failed => failedCounter.incr() + case StatusState.DeactivatedUser => deactivatedCounter.incr() + case StatusState.SuspendedUser => suspendedCounter.incr() + case StatusState.ProtectedUser => protectedCounter.incr() + case StatusState.ReportedTweet => reportedCounter.incr() + case StatusState.OverCapacity => overCapacityCounter.incr() + case StatusState.UnsupportedClient => unsupportedClientCounter.incr() + case StatusState.Drop => dropCounter.incr() + case StatusState.Suppress => suppressCounter.incr() + case StatusState.Deleted => deletedCounter.incr() + case StatusState.BounceDeleted => bounceDeletedCounter.incr() + case _ => + } + } + } + + def observeSetFieldsRequest(stats: StatsReceiver): Effect[SetAdditionalFieldsRequest] = + Effect { request => + val tweet = request.additionalFields + AdditionalFields.nonEmptyAdditionalFieldIds(tweet).foreach { id => + val fieldScope = "field_%d".format(id) + val fieldCounter = stats.counter(fieldScope) + val sizeStats = stats.stat(fieldScope) + + tweet.getFieldBlob(id).foreach { blob => + fieldCounter.incr() + sizeStats.add(blob.content.length) + } + } + } + + def observeSetRetweetVisibilityRequest( + stats: StatsReceiver + ): Effect[SetRetweetVisibilityRequest] = { + val setInvisibleCounter = stats.counter("set_invisible") + val setVisibleCounter = stats.counter("set_visible") + + Effect { request => + if (request.visible) setVisibleCounter.incr() else setInvisibleCounter.incr() + } + } + + def observeDeleteFieldsRequest(stats: StatsReceiver): Effect[DeleteAdditionalFieldsRequest] = { + val requestSizeStat = stats.stat("request_size") + + Effect { request => + requestSizeStat.add(request.tweetIds.size) + + request.fieldIds.foreach { id => + val fieldScope = "field_%d".format(id) + val fieldCounter = stats.counter(fieldScope) + fieldCounter.incr() + } + } + } + + def observeDeleteTweetsRequest(stats: StatsReceiver): Effect[DeleteTweetsRequest] = { + val requestSizeStat = stats.stat("request_size") + val userErasureTweetsStat = stats.counter("user_erasure_tweets") + val isBounceDeleteStat = stats.counter("is_bounce_delete_tweets") + + Effect { + case DeleteTweetsRequest(tweetIds, _, _, _, isUserErasure, _, isBounceDelete, _, _) => + requestSizeStat.add(tweetIds.size) + if (isUserErasure) { + userErasureTweetsStat.incr(tweetIds.size) + } + if (isBounceDelete) { + isBounceDeleteStat.incr(tweetIds.size) + } + } + } + + def observeRetweetRequest(stats: StatsReceiver): Effect[RetweetRequest] = { + val optionsScope = stats.scope("options") + val narrowcastCounter = optionsScope.counter("narrowcast") + val nullcastCounter = optionsScope.counter("nullcast") + val darkCounter = optionsScope.counter("dark") + val successOnDupCounter = optionsScope.counter("success_on_dup") + + Effect { request => + if (request.narrowcast.nonEmpty) narrowcastCounter.incr() + if (request.nullcast) nullcastCounter.incr() + if (request.dark) darkCounter.incr() + if (request.returnSuccessOnDuplicate) successOnDupCounter.incr() + } + } + + def observeScrubGeo(stats: StatsReceiver): Effect[GeoScrub] = { + val optionsScope = stats.scope("options") + val hosebirdEnqueueCounter = optionsScope.counter("hosebird_enqueue") + val requestSizeStat = stats.stat("request_size") + + Effect { request => + requestSizeStat.add(request.statusIds.size) + if (request.hosebirdEnqueue) hosebirdEnqueueCounter.incr() + } + } + + def observeEventOrRetry(stats: StatsReceiver, isRetry: Boolean): Unit = { + val statName = if (isRetry) "retry" else "event" + stats.counter(statName).incr() + } + + def observeAsyncInsertRequest(stats: StatsReceiver): Effect[AsyncInsertRequest] = { + val insertScope = stats.scope("insert") + val ageStat = insertScope.stat("age") + Effect { request => + observeEventOrRetry(insertScope, request.retryAction.isDefined) + ageStat.add(SnowflakeId.timeFromId(request.tweet.id).untilNow.inMillis) + } + } + + def observeAsyncSetAdditionalFieldsRequest( + stats: StatsReceiver + ): Effect[AsyncSetAdditionalFieldsRequest] = { + val setAdditionalFieldsScope = stats.scope("set_additional_fields") + Effect { request => + observeEventOrRetry(setAdditionalFieldsScope, request.retryAction.isDefined) + } + } + + def observeAsyncSetRetweetVisibilityRequest( + stats: StatsReceiver + ): Effect[AsyncSetRetweetVisibilityRequest] = { + val setRetweetVisibilityScope = stats.scope("set_retweet_visibility") + + Effect { request => + observeEventOrRetry(setRetweetVisibilityScope, request.retryAction.isDefined) + } + } + + def observeAsyncUndeleteTweetRequest(stats: StatsReceiver): Effect[AsyncUndeleteTweetRequest] = { + val undeleteTweetScope = stats.scope("undelete_tweet") + Effect { request => observeEventOrRetry(undeleteTweetScope, request.retryAction.isDefined) } + } + + def observeAsyncDeleteTweetRequest(stats: StatsReceiver): Effect[AsyncDeleteRequest] = { + val deleteTweetScope = stats.scope("delete_tweet") + Effect { request => observeEventOrRetry(deleteTweetScope, request.retryAction.isDefined) } + } + + def observeAsyncDeleteAdditionalFieldsRequest( + stats: StatsReceiver + ): Effect[AsyncDeleteAdditionalFieldsRequest] = { + val deleteAdditionalFieldsScope = stats.scope("delete_additional_fields") + Effect { request => + observeEventOrRetry( + deleteAdditionalFieldsScope, + request.retryAction.isDefined + ) + } + } + + def observeAsyncTakedownRequest(stats: StatsReceiver): Effect[AsyncTakedownRequest] = { + val takedownScope = stats.scope("takedown") + Effect { request => observeEventOrRetry(takedownScope, request.retryAction.isDefined) } + } + + def observeAsyncUpdatePossiblySensitiveTweetRequest( + stats: StatsReceiver + ): Effect[AsyncUpdatePossiblySensitiveTweetRequest] = { + val updatePossiblySensitiveTweetScope = stats.scope("update_possibly_sensitive_tweet") + Effect { request => + observeEventOrRetry(updatePossiblySensitiveTweetScope, request.action.isDefined) + } + } + + def observeReplicatedInsertTweetRequest(stats: StatsReceiver): Effect[Tweet] = { + val ageStat = stats.stat("age") // in milliseconds + Effect { request => ageStat.add(SnowflakeId.timeFromId(request.id).untilNow.inMillis) } + } + + def camelToUnderscore(str: String): String = { + val bldr = new StringBuilder + str.foldLeft(false) { (prevWasLowercase, c) => + if (prevWasLowercase && c.isUpper) { + bldr += '_' + } + bldr += c.toLower + c.isLower + } + bldr.result + } + + def observeAdditionalFields(stats: StatsReceiver): Effect[Tweet] = { + val additionalScope = stats.scope("additional_fields") + + Effect { tweet => + for (fieldId <- AdditionalFields.nonEmptyAdditionalFieldIds(tweet)) + additionalScope.counter(fieldId.toString).incr() + } + } + + /** + * We count how many tweets have each of these attributes so that we + * can observe general trends, as well as for tracking down the + * cause of behavior changes, like increased calls to certain + * services. + */ + def countTweetAttributes(stats: StatsReceiver, byClient: Boolean): Effect[Tweet] = { + val ageStat = stats.stat("age") + val tweetCounter = stats.counter("tweets") + val retweetCounter = stats.counter("retweets") + val repliesCounter = stats.counter("replies") + val inReplyToTweetCounter = stats.counter("in_reply_to_tweet") + val selfRepliesCounter = stats.counter("self_replies") + val directedAtCounter = stats.counter("directed_at") + val mentionsCounter = stats.counter("mentions") + val mentionsStat = stats.stat("mentions") + val urlsCounter = stats.counter("urls") + val urlsStat = stats.stat("urls") + val hashtagsCounter = stats.counter("hashtags") + val hashtagsStat = stats.stat("hashtags") + val mediaCounter = stats.counter("media") + val mediaStat = stats.stat("media") + val photosCounter = stats.counter("media", "photos") + val gifsCounter = stats.counter("media", "animated_gifs") + val videosCounter = stats.counter("media", "videos") + val cardsCounter = stats.counter("cards") + val card2Counter = stats.counter("card2") + val geoCoordsCounter = stats.counter("geo_coordinates") + val placeCounter = stats.counter("place") + val quotedTweetCounter = stats.counter("quoted_tweet") + val selfRetweetCounter = stats.counter("self_retweet") + val languageScope = stats.scope("language") + val textLengthStat = stats.stat("text_length") + val selfThreadCounter = stats.counter("self_thread") + val communitiesTweetCounter = stats.counter("communities") + + observeAdditionalFields(stats).also { + Effect[Tweet] { tweet => + def coreDataField[T](f: TweetCoreData => T): Option[T] = + tweet.coreData.map(f) + + def coreDataOptionField[T](f: TweetCoreData => Option[T]) = + coreDataField(f).flatten + + (SnowflakeId.isSnowflakeId(tweet.id) match { + case true => Some(SnowflakeId.timeFromId(tweet.id)) + case false => coreDataField(_.createdAtSecs.seconds.afterEpoch) + }).foreach { createdAt => ageStat.add(createdAt.untilNow.inSeconds) } + + if (!byClient) { + val mentions = getMentions(tweet) + val urls = getUrls(tweet) + val hashtags = getHashtags(tweet) + val media = getMedia(tweet) + val mediaKeys = media.flatMap(_.mediaKey) + val share = coreDataOptionField(_.share) + val selfThreadMetadata = getSelfThreadMetadata(tweet) + val communities = getCommunities(tweet) + + tweetCounter.incr() + if (share.isDefined) retweetCounter.incr() + if (coreDataOptionField(_.directedAtUser).isDefined) directedAtCounter.incr() + + coreDataOptionField(_.reply).foreach { reply => + repliesCounter.incr() + if (reply.inReplyToStatusId.nonEmpty) { + // repliesCounter counts all Tweets with a Reply struct, + // but that includes both directed-at Tweets and + // conversational replies. Only conversational replies + // have inReplyToStatusId present, so this counter lets + // us split apart those two cases. + inReplyToTweetCounter.incr() + } + + // Not all Tweet objects have CoreData yet isSelfReply() requires it. Thus, this + // invocation is guarded by the `coreDataOptionField(_.reply)` above. + if (isSelfReply(tweet)) selfRepliesCounter.incr() + } + + if (mentions.nonEmpty) mentionsCounter.incr() + if (urls.nonEmpty) urlsCounter.incr() + if (hashtags.nonEmpty) hashtagsCounter.incr() + if (media.nonEmpty) mediaCounter.incr() + if (selfThreadMetadata.nonEmpty) selfThreadCounter.incr() + if (communities.nonEmpty) communitiesTweetCounter.incr() + + mentionsStat.add(mentions.size) + urlsStat.add(urls.size) + hashtagsStat.add(hashtags.size) + mediaStat.add(media.size) + + if (mediaKeys.exists(MediaKeyClassifier.isImage(_))) photosCounter.incr() + if (mediaKeys.exists(MediaKeyClassifier.isGif(_))) gifsCounter.incr() + if (mediaKeys.exists(MediaKeyClassifier.isVideo(_))) videosCounter.incr() + + if (tweet.cards.exists(_.nonEmpty)) cardsCounter.incr() + if (tweet.card2.nonEmpty) card2Counter.incr() + if (coreDataOptionField(_.coordinates).nonEmpty) geoCoordsCounter.incr() + if (TweetLenses.place.get(tweet).nonEmpty) placeCounter.incr() + if (TweetLenses.quotedTweet.get(tweet).nonEmpty) quotedTweetCounter.incr() + if (share.exists(_.sourceUserId == getUserId(tweet))) selfRetweetCounter.incr() + + tweet.language + .map(_.language) + .foreach(lang => languageScope.counter(lang).incr()) + coreDataField(_.text).foreach(text => textLengthStat.add(codePointLength(text))) + } + } + } + } + +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/PostTweetObserver.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/PostTweetObserver.scala new file mode 100644 index 000000000..6d20169d0 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/PostTweetObserver.scala @@ -0,0 +1,82 @@ +package com.twitter.tweetypie +package service +package observer + +import com.twitter.escherbird.thriftscala.TweetEntityAnnotation +import com.twitter.tweetypie.thriftscala.BatchComposeMode +import com.twitter.tweetypie.thriftscala.PostTweetRequest +import com.twitter.tweetypie.thriftscala.PostTweetResult +import com.twitter.tweetypie.thriftscala.TweetCreateState +import com.twitter.util.Memoize + +private[service] object PostTweetObserver { + def observeResults(stats: StatsReceiver, byClient: Boolean): Effect[PostTweetResult] = { + val stateScope = stats.scope("state") + val tweetObserver = Observer.countTweetAttributes(stats, byClient) + + val stateCounters = + Memoize { st: TweetCreateState => stateScope.counter(Observer.camelToUnderscore(st.name)) } + + Effect { result => + stateCounters(result.state).incr() + if (result.state == TweetCreateState.Ok) result.tweet.foreach(tweetObserver) + } + } + + private def isCommunity(req: PostTweetRequest): Boolean = { + val CommunityGroupId = 8L + val CommunityDomainId = 31L + req.additionalFields + .flatMap(_.escherbirdEntityAnnotations).exists { e => + e.entityAnnotations.collect { + case TweetEntityAnnotation(CommunityGroupId, CommunityDomainId, _) => true + }.nonEmpty + } + } + + def observerRequest(stats: StatsReceiver): Effect[PostTweetRequest] = { + val optionsScope = stats.scope("options") + val narrowcastCounter = optionsScope.counter("narrowcast") + val nullcastCounter = optionsScope.counter("nullcast") + val inReplyToStatusIdCounter = optionsScope.counter("in_reply_to_status_id") + val placeIdCounter = optionsScope.counter("place_id") + val geoCoordinatesCounter = optionsScope.counter("geo_coordinates") + val placeMetadataCounter = optionsScope.counter("place_metadata") + val mediaUploadIdCounter = optionsScope.counter("media_upload_id") + val darkCounter = optionsScope.counter("dark") + val tweetToNarrowcastingCounter = optionsScope.counter("tweet_to_narrowcasting") + val autoPopulateReplyMetadataCounter = optionsScope.counter("auto_populate_reply_metadata") + val attachmentUrlCounter = optionsScope.counter("attachment_url") + val excludeReplyUserIdsCounter = optionsScope.counter("exclude_reply_user_ids") + val excludeReplyUserIdsStat = optionsScope.stat("exclude_reply_user_ids") + val uniquenessIdCounter = optionsScope.counter("uniqueness_id") + val batchModeScope = optionsScope.scope("batch_mode") + val batchModeFirstCounter = batchModeScope.counter("first") + val batchModeSubsequentCounter = batchModeScope.counter("subsequent") + val communitiesCounter = optionsScope.counter("communities") + + Effect { request => + if (request.narrowcast.nonEmpty) narrowcastCounter.incr() + if (request.nullcast) nullcastCounter.incr() + if (request.inReplyToTweetId.nonEmpty) inReplyToStatusIdCounter.incr() + if (request.geo.flatMap(_.placeId).nonEmpty) placeIdCounter.incr() + if (request.geo.flatMap(_.coordinates).nonEmpty) geoCoordinatesCounter.incr() + if (request.geo.flatMap(_.placeMetadata).nonEmpty) placeMetadataCounter.incr() + if (request.mediaUploadIds.nonEmpty) mediaUploadIdCounter.incr() + if (request.dark) darkCounter.incr() + if (request.enableTweetToNarrowcasting) tweetToNarrowcastingCounter.incr() + if (request.autoPopulateReplyMetadata) autoPopulateReplyMetadataCounter.incr() + if (request.attachmentUrl.nonEmpty) attachmentUrlCounter.incr() + if (request.excludeReplyUserIds.exists(_.nonEmpty)) excludeReplyUserIdsCounter.incr() + if (isCommunity(request)) communitiesCounter.incr() + if (request.uniquenessId.nonEmpty) uniquenessIdCounter.incr() + request.transientContext.flatMap(_.batchCompose).foreach { + case BatchComposeMode.BatchFirst => batchModeFirstCounter.incr() + case BatchComposeMode.BatchSubsequent => batchModeSubsequentCounter.incr() + case _ => + } + + excludeReplyUserIdsStat.add(request.excludeReplyUserIds.size) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/ResultStateStats.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/ResultStateStats.scala new file mode 100644 index 000000000..b9cedf68e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/ResultStateStats.scala @@ -0,0 +1,19 @@ +package com.twitter.tweetypie +package service +package observer + +import com.twitter.finagle.stats.StatsReceiver + +/** + * "Result State" is, for every singular tweet read, we categorize the tweet + * result as a success or failure. + * These stats enable us to track true TPS success rates. + */ +private[service] case class ResultStateStats(private val underlying: StatsReceiver) { + private val stats = underlying.scope("result_state") + private val successCounter = stats.counter("success") + private val failedCounter = stats.counter("failed") + + def success(delta: Long = 1): Unit = successCounter.incr(delta) + def failed(delta: Long = 1): Unit = failedCounter.incr(delta) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/StoredTweetsObserver.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/StoredTweetsObserver.scala new file mode 100644 index 000000000..8a525c158 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/StoredTweetsObserver.scala @@ -0,0 +1,56 @@ +package com.twitter.tweetypie +package service +package observer + +import com.twitter.tweetypie.thriftscala.StoredTweetError +import com.twitter.tweetypie.thriftscala.StoredTweetInfo +import com.twitter.tweetypie.thriftscala.StoredTweetState.BounceDeleted +import com.twitter.tweetypie.thriftscala.StoredTweetState.ForceAdded +import com.twitter.tweetypie.thriftscala.StoredTweetState.HardDeleted +import com.twitter.tweetypie.thriftscala.StoredTweetState.NotFound +import com.twitter.tweetypie.thriftscala.StoredTweetState.SoftDeleted +import com.twitter.tweetypie.thriftscala.StoredTweetState.Undeleted +import com.twitter.tweetypie.thriftscala.StoredTweetState.UnknownUnionField + +private[service] trait StoredTweetsObserver { + + protected def observeStoredTweets( + storedTweets: Seq[StoredTweetInfo], + stats: StatsReceiver + ): Unit = { + val stateScope = stats.scope("state") + val errorScope = stats.scope("error") + + val sizeCounter = stats.counter("count") + sizeCounter.incr(storedTweets.size) + + val returnedStatesCount = storedTweets + .groupBy(_.storedTweetState match { + case None => "found" + case Some(_: HardDeleted) => "hard_deleted" + case Some(_: SoftDeleted) => "soft_deleted" + case Some(_: BounceDeleted) => "bounce_deleted" + case Some(_: Undeleted) => "undeleted" + case Some(_: ForceAdded) => "force_added" + case Some(_: NotFound) => "not_found" + case Some(_: UnknownUnionField) => "unknown" + }) + .mapValues(_.size) + + returnedStatesCount.foreach { + case (state, count) => stateScope.counter(state).incr(count) + } + + val returnedErrorsCount = storedTweets + .foldLeft(Seq[StoredTweetError]()) { (errors, storedTweetInfo) => + errors ++ storedTweetInfo.errors + } + .groupBy(_.name) + .mapValues(_.size) + + returnedErrorsCount.foreach { + case (error, count) => errorScope.counter(error).incr(count) + } + } + +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/package.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/package.scala new file mode 100644 index 000000000..4cfaea9f4 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/package.scala @@ -0,0 +1,13 @@ +package com.twitter.tweetypie +package service + +import com.twitter.util.Try + +package object observer { + + /** + * Generic Request/Result observer container for making observations on both requests/results. + */ + type ObserveExchange[Req, Res] = (Req, Try[Res]) + +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/package.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/package.scala new file mode 100644 index 000000000..c6e0e861b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/package.scala @@ -0,0 +1,12 @@ +package com.twitter.tweetypie + +import com.twitter.servo.request +import com.twitter.servo.request.ClientRequestAuthorizer + +package object service { + type ClientRequestAuthorizer = request.ClientRequestAuthorizer + + type UnauthorizedException = request.ClientRequestAuthorizer.UnauthorizedException + val UnauthorizedException: ClientRequestAuthorizer.UnauthorizedException.type = + request.ClientRequestAuthorizer.UnauthorizedException +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/AsyncEnqueueStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/AsyncEnqueueStore.scala new file mode 100644 index 000000000..3ad816e40 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/AsyncEnqueueStore.scala @@ -0,0 +1,95 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.thriftscala._ + +/** + * AsyncEnqueueStore converts certains TweetStoreEvent types into their async-counterpart + * events, and enqueues those to a deferredrpc-backed ThriftTweetService instance. + */ +trait AsyncEnqueueStore + extends TweetStoreBase[AsyncEnqueueStore] + with InsertTweet.Store + with DeleteTweet.Store + with UndeleteTweet.Store + with IncrFavCount.Store + with IncrBookmarkCount.Store + with SetAdditionalFields.Store + with SetRetweetVisibility.Store + with Takedown.Store + with DeleteAdditionalFields.Store + with UpdatePossiblySensitiveTweet.Store { + def wrap(w: TweetStore.Wrap): AsyncEnqueueStore = + new TweetStoreWrapper[AsyncEnqueueStore](w, this) + with AsyncEnqueueStore + with InsertTweet.StoreWrapper + with DeleteTweet.StoreWrapper + with UndeleteTweet.StoreWrapper + with IncrFavCount.StoreWrapper + with IncrBookmarkCount.StoreWrapper + with SetAdditionalFields.StoreWrapper + with SetRetweetVisibility.StoreWrapper + with Takedown.StoreWrapper + with DeleteAdditionalFields.StoreWrapper + with UpdatePossiblySensitiveTweet.StoreWrapper +} + +object AsyncEnqueueStore { + def apply( + tweetService: ThriftTweetService, + scrubUserInAsyncInserts: User => User, + scrubSourceTweetInAsyncInserts: Tweet => Tweet, + scrubSourceUserInAsyncInserts: User => User + ): AsyncEnqueueStore = + new AsyncEnqueueStore { + override val insertTweet: FutureEffect[InsertTweet.Event] = + FutureEffect[InsertTweet.Event] { e => + tweetService.asyncInsert( + e.toAsyncRequest( + scrubUserInAsyncInserts, + scrubSourceTweetInAsyncInserts, + scrubSourceUserInAsyncInserts + ) + ) + } + + override val deleteTweet: FutureEffect[DeleteTweet.Event] = + FutureEffect[DeleteTweet.Event] { e => tweetService.asyncDelete(e.toAsyncRequest) } + + override val undeleteTweet: FutureEffect[UndeleteTweet.Event] = + FutureEffect[UndeleteTweet.Event] { e => + tweetService.asyncUndeleteTweet(e.toAsyncUndeleteTweetRequest) + } + + override val incrFavCount: FutureEffect[IncrFavCount.Event] = + FutureEffect[IncrFavCount.Event] { e => tweetService.asyncIncrFavCount(e.toAsyncRequest) } + + override val incrBookmarkCount: FutureEffect[IncrBookmarkCount.Event] = + FutureEffect[IncrBookmarkCount.Event] { e => + tweetService.asyncIncrBookmarkCount(e.toAsyncRequest) + } + + override val setAdditionalFields: FutureEffect[SetAdditionalFields.Event] = + FutureEffect[SetAdditionalFields.Event] { e => + tweetService.asyncSetAdditionalFields(e.toAsyncRequest) + } + + override val setRetweetVisibility: FutureEffect[SetRetweetVisibility.Event] = + FutureEffect[SetRetweetVisibility.Event] { e => + tweetService.asyncSetRetweetVisibility(e.toAsyncRequest) + } + + override val deleteAdditionalFields: FutureEffect[DeleteAdditionalFields.Event] = + FutureEffect[DeleteAdditionalFields.Event] { e => + tweetService.asyncDeleteAdditionalFields(e.toAsyncRequest) + } + + override val updatePossiblySensitiveTweet: FutureEffect[UpdatePossiblySensitiveTweet.Event] = + FutureEffect[UpdatePossiblySensitiveTweet.Event] { e => + tweetService.asyncUpdatePossiblySensitiveTweet(e.toAsyncRequest) + } + + override val takedown: FutureEffect[Takedown.Event] = + FutureEffect[Takedown.Event] { e => tweetService.asyncTakedown(e.toAsyncRequest) } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/BUILD new file mode 100644 index 000000000..542f5ee81 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/BUILD @@ -0,0 +1,60 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = [ + "bazel-compatible", + "bazel-incompatible-scaladoc", # see http://go/bazel-incompatible-scaladoc + ], + dependencies = [ + "3rdparty/jvm/com/fasterxml/jackson/core:jackson-databind", + "3rdparty/jvm/com/fasterxml/jackson/module:jackson-module-scala", + "3rdparty/jvm/com/twitter/bijection:core", + "3rdparty/jvm/com/twitter/bijection:thrift", + "3rdparty/jvm/org/apache/thrift:libthrift", + "diffshow", + "fanoutservice/thrift/src/main/thrift:thrift-scala", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", + "finagle/finagle-core/src/main", + "flock-client/src/main/scala", + "mediaservices/commons/src/main/thrift:thrift-scala", + "scrooge/scrooge-core", + "tweetypie/servo/repo", + "tweetypie/servo/repo/src/main/thrift:thrift-scala", + "tweetypie/servo/util", + "snowflake/src/main/scala/com/twitter/snowflake/id", + "src/scala/com/twitter/takedown/util", + "src/thrift/com/twitter/context:feature-context-scala", + "src/thrift/com/twitter/context:twitter-context-scala", + "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", + "src/thrift/com/twitter/expandodo:cards-scala", + "src/thrift/com/twitter/geoduck:geoduck-scala", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/gizmoduck:user-thrift-scala", + "src/thrift/com/twitter/guano:guano-scala", + "src/thrift/com/twitter/timelineservice/server/internal:thrift-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:audit-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:events-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-core", + "stitch/stitch-core/src/main/scala/com/twitter/stitch", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/media", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/repository", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", + "tweetypie/server/src/main/thrift:compiled-scala", + "tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields", + "tweetypie/common/src/scala/com/twitter/tweetypie/client_id", + "tweetypie/common/src/scala/com/twitter/tweetypie/media", + "tweetypie/common/src/scala/com/twitter/tweetypie/storage", + "tweetypie/common/src/scala/com/twitter/tweetypie/tflock", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "twitter-context", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + "util/util-stats/src/main/scala", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/CachingTweetStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/CachingTweetStore.scala new file mode 100644 index 000000000..2f4dd6387 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/CachingTweetStore.scala @@ -0,0 +1,420 @@ +package com.twitter.tweetypie +package store + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.twitter.scrooge.TFieldBlob +import com.twitter.servo.cache.LockingCache._ +import com.twitter.servo.cache._ +import com.twitter.tweetypie.additionalfields.AdditionalFields +import com.twitter.tweetypie.repository.CachedBounceDeleted.isBounceDeleted +import com.twitter.tweetypie.repository.CachedBounceDeleted.toBounceDeletedCachedTweet +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.store.TweetUpdate._ +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.Time +import diffshow.DiffShow + +trait CachingTweetStore + extends TweetStoreBase[CachingTweetStore] + with InsertTweet.Store + with ReplicatedInsertTweet.Store + with DeleteTweet.Store + with AsyncDeleteTweet.Store + with ReplicatedDeleteTweet.Store + with UndeleteTweet.Store + with AsyncUndeleteTweet.Store + with ReplicatedUndeleteTweet.Store + with SetAdditionalFields.Store + with ReplicatedSetAdditionalFields.Store + with DeleteAdditionalFields.Store + with AsyncDeleteAdditionalFields.Store + with ReplicatedDeleteAdditionalFields.Store + with ScrubGeo.Store + with ReplicatedScrubGeo.Store + with Takedown.Store + with ReplicatedTakedown.Store + with Flush.Store + with UpdatePossiblySensitiveTweet.Store + with AsyncUpdatePossiblySensitiveTweet.Store + with ReplicatedUpdatePossiblySensitiveTweet.Store { + def wrap(w: TweetStore.Wrap): CachingTweetStore = + new TweetStoreWrapper(w, this) + with CachingTweetStore + with InsertTweet.StoreWrapper + with ReplicatedInsertTweet.StoreWrapper + with DeleteTweet.StoreWrapper + with AsyncDeleteTweet.StoreWrapper + with ReplicatedDeleteTweet.StoreWrapper + with UndeleteTweet.StoreWrapper + with AsyncUndeleteTweet.StoreWrapper + with ReplicatedUndeleteTweet.StoreWrapper + with SetAdditionalFields.StoreWrapper + with ReplicatedSetAdditionalFields.StoreWrapper + with DeleteAdditionalFields.StoreWrapper + with AsyncDeleteAdditionalFields.StoreWrapper + with ReplicatedDeleteAdditionalFields.StoreWrapper + with ScrubGeo.StoreWrapper + with ReplicatedScrubGeo.StoreWrapper + with Takedown.StoreWrapper + with ReplicatedTakedown.StoreWrapper + with Flush.StoreWrapper + with UpdatePossiblySensitiveTweet.StoreWrapper + with AsyncUpdatePossiblySensitiveTweet.StoreWrapper + with ReplicatedUpdatePossiblySensitiveTweet.StoreWrapper +} + +object CachingTweetStore { + val Action: AsyncWriteAction.CacheUpdate.type = AsyncWriteAction.CacheUpdate + + def apply( + tweetCache: LockingCache[TweetKey, Cached[CachedTweet]], + tweetKeyFactory: TweetKeyFactory, + stats: StatsReceiver + ): CachingTweetStore = { + val ops = + new CachingTweetStoreOps( + tweetCache, + tweetKeyFactory, + stats + ) + + new CachingTweetStore { + override val insertTweet: FutureEffect[InsertTweet.Event] = { + FutureEffect[InsertTweet.Event](e => + ops.insertTweet(e.internalTweet, e.initialTweetUpdateRequest)) + } + + override val replicatedInsertTweet: FutureEffect[ReplicatedInsertTweet.Event] = + FutureEffect[ReplicatedInsertTweet.Event](e => + ops.insertTweet(e.cachedTweet, e.initialTweetUpdateRequest)) + + override val deleteTweet: FutureEffect[DeleteTweet.Event] = + FutureEffect[DeleteTweet.Event](e => + ops.deleteTweet(e.tweet.id, updateOnly = true, isBounceDelete = e.isBounceDelete)) + + override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + FutureEffect[AsyncDeleteTweet.Event](e => + ops.deleteTweet(e.tweet.id, updateOnly = true, isBounceDelete = e.isBounceDelete)) + + override val retryAsyncDeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncDeleteTweet) + + override val replicatedDeleteTweet: FutureEffect[ReplicatedDeleteTweet.Event] = + FutureEffect[ReplicatedDeleteTweet.Event](e => + ops.deleteTweet( + tweetId = e.tweet.id, + updateOnly = e.isErasure, + isBounceDelete = e.isBounceDelete + )) + + override val undeleteTweet: FutureEffect[UndeleteTweet.Event] = + FutureEffect[UndeleteTweet.Event](e => ops.undeleteTweet(e.internalTweet)) + + override val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = + FutureEffect[AsyncUndeleteTweet.Event](e => ops.undeleteTweet(e.cachedTweet)) + + override val retryAsyncUndeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUndeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncUndeleteTweet) + + override val replicatedUndeleteTweet: FutureEffect[ReplicatedUndeleteTweet.Event] = + FutureEffect[ReplicatedUndeleteTweet.Event](e => ops.undeleteTweet(e.cachedTweet)) + + override val setAdditionalFields: FutureEffect[SetAdditionalFields.Event] = + FutureEffect[SetAdditionalFields.Event](e => ops.setAdditionalFields(e.additionalFields)) + + override val replicatedSetAdditionalFields: FutureEffect[ + ReplicatedSetAdditionalFields.Event + ] = + FutureEffect[ReplicatedSetAdditionalFields.Event](e => + ops.setAdditionalFields(e.additionalFields)) + + override val deleteAdditionalFields: FutureEffect[DeleteAdditionalFields.Event] = + FutureEffect[DeleteAdditionalFields.Event](e => + ops.deleteAdditionalFields(e.tweetId, e.fieldIds)) + + override val asyncDeleteAdditionalFields: FutureEffect[AsyncDeleteAdditionalFields.Event] = + FutureEffect[AsyncDeleteAdditionalFields.Event](e => + ops.deleteAdditionalFields(e.tweetId, e.fieldIds)) + + override val retryAsyncDeleteAdditionalFields: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteAdditionalFields.Event] + ] = + TweetStore.retry(Action, asyncDeleteAdditionalFields) + + override val replicatedDeleteAdditionalFields: FutureEffect[ + ReplicatedDeleteAdditionalFields.Event + ] = + FutureEffect[ReplicatedDeleteAdditionalFields.Event](e => + ops.deleteAdditionalFields(e.tweetId, e.fieldIds)) + + override val scrubGeo: FutureEffect[ScrubGeo.Event] = + FutureEffect[ScrubGeo.Event](e => ops.scrubGeo(e.tweetIds)) + + override val replicatedScrubGeo: FutureEffect[ReplicatedScrubGeo.Event] = + FutureEffect[ReplicatedScrubGeo.Event](e => ops.scrubGeo(e.tweetIds)) + + override val takedown: FutureEffect[Takedown.Event] = + FutureEffect[Takedown.Event](e => ops.takedown(e.tweet)) + + override val replicatedTakedown: FutureEffect[ReplicatedTakedown.Event] = + FutureEffect[ReplicatedTakedown.Event](e => ops.takedown(e.tweet)) + + override val flush: FutureEffect[Flush.Event] = + FutureEffect[Flush.Event](e => ops.flushTweets(e.tweetIds, logExisting = e.logExisting)) + .onlyIf(_.flushTweets) + + override val updatePossiblySensitiveTweet: FutureEffect[UpdatePossiblySensitiveTweet.Event] = + FutureEffect[UpdatePossiblySensitiveTweet.Event](e => ops.updatePossiblySensitive(e.tweet)) + + override val replicatedUpdatePossiblySensitiveTweet: FutureEffect[ + ReplicatedUpdatePossiblySensitiveTweet.Event + ] = + FutureEffect[ReplicatedUpdatePossiblySensitiveTweet.Event](e => + ops.updatePossiblySensitive(e.tweet)) + + override val asyncUpdatePossiblySensitiveTweet: FutureEffect[ + AsyncUpdatePossiblySensitiveTweet.Event + ] = + FutureEffect[AsyncUpdatePossiblySensitiveTweet.Event](e => + ops.updatePossiblySensitive(e.tweet)) + + override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUpdatePossiblySensitiveTweet.Event] + ] = + TweetStore.retry(Action, asyncUpdatePossiblySensitiveTweet) + } + } +} + +private class CachingTweetStoreOps( + tweetCache: LockingCache[TweetKey, Cached[CachedTweet]], + tweetKeyFactory: TweetKeyFactory, + stats: StatsReceiver, + evictionRetries: Int = 3) { + type CachedTweetHandler = Handler[Cached[CachedTweet]] + + private val preferNewestPicker = new PreferNewestCached[CachedTweet] + + private val evictionFailedCounter = stats.counter("eviction_failures") + + private val cacheFlushesLog = Logger("com.twitter.tweetypie.store.CacheFlushesLog") + + private[this] val mapper = new ObjectMapper().registerModule(DefaultScalaModule) + + /** + * Inserts a tweet into cache, recording all compiled additional fields and all + * included passthrough fields. Additionally if the insertion event contains + * a 'InitialTweetUpdateRequest` we will update the cache entry for this tweet's + * initialTweet. + */ + def insertTweet( + ct: CachedTweet, + initialTweetUpdateRequest: Option[InitialTweetUpdateRequest] + ): Future[Unit] = + lockAndSet( + ct.tweet.id, + insertTweetHandler(ct) + ).flatMap { _ => + initialTweetUpdateRequest match { + case Some(request) => + lockAndSet( + request.initialTweetId, + updateTweetHandler(tweet => InitialTweetUpdate.updateTweet(tweet, request)) + ) + case None => + Future.Unit + } + } + + /** + * Writes a `deleted` tombstone to cache. If `updateOnly` is true, then we only + * write the tombstone if the tweet is already in cache. If `isBounceDelete` we + * write a special bounce-deleted CachedTweet record to cache. + */ + def deleteTweet(tweetId: TweetId, updateOnly: Boolean, isBounceDelete: Boolean): Future[Unit] = { + // We only need to store a CachedTweet value the tweet is bounce-deleted to support rendering + // timeline tombstones for tweets that violated the Twitter Rules. see go/bounced-tweet + val cachedValue = if (isBounceDelete) { + found(toBounceDeletedCachedTweet(tweetId)) + } else { + writeThroughCached[CachedTweet](None, CachedValueStatus.Deleted) + } + + val pickerHandler = + if (updateOnly) { + deleteTweetUpdateOnlyHandler(cachedValue) + } else { + deleteTweetHandler(cachedValue) + } + + lockAndSet(tweetId, pickerHandler) + } + + def undeleteTweet(ct: CachedTweet): Future[Unit] = + lockAndSet( + ct.tweet.id, + insertTweetHandler(ct) + ) + + def setAdditionalFields(tweet: Tweet): Future[Unit] = + lockAndSet(tweet.id, setFieldsHandler(AdditionalFields.additionalFields(tweet))) + + def deleteAdditionalFields(tweetId: TweetId, fieldIds: Seq[FieldId]): Future[Unit] = + lockAndSet(tweetId, deleteFieldsHandler(fieldIds)) + + def scrubGeo(tweetIds: Seq[TweetId]): Future[Unit] = + Future.join { + tweetIds.map { id => + // First, attempt to modify any tweets that are in cache to + // avoid having to reload the cached tweet from storage. + lockAndSet(id, scrubGeoHandler).unit.rescue { + case _: OptimisticLockingCache.LockAndSetFailure => + // If the modification fails, then remove whatever is in + // cache. This is much more likely to succeed because it + // does not require multiple successful requests to cache. + // This will force the tweet to be loaded from storage the + // next time it is requested, and the stored tweet will have + // the geo information removed. + // + // This eviction path was added due to frequent failures of + // the in-place modification code path, causing geoscrub + // daemon tasks to fail. + evictOne(tweetKeyFactory.fromId(id), evictionRetries) + } + } + } + + def takedown(tweet: Tweet): Future[Unit] = + lockAndSet(tweet.id, updateCachedTweetHandler(copyTakedownFieldsForUpdate(tweet))) + + def updatePossiblySensitive(tweet: Tweet): Future[Unit] = + lockAndSet(tweet.id, updateTweetHandler(copyNsfwFieldsForUpdate(tweet))) + + def flushTweets(tweetIds: Seq[TweetId], logExisting: Boolean = false): Future[Unit] = { + val tweetKeys = tweetIds.map(tweetKeyFactory.fromId) + + Future.when(logExisting) { logExistingValues(tweetKeys) }.ensure { + evictAll(tweetKeys) + } + } + + /** + * A LockingCache.Handler that inserts a tweet into cache. + */ + private def insertTweetHandler(newValue: CachedTweet): Handler[Cached[CachedTweet]] = + AlwaysSetHandler(Some(writeThroughCached(Some(newValue), CachedValueStatus.Found))) + + private def foundAndNotBounced(c: Cached[CachedTweet]) = + c.status == CachedValueStatus.Found && !isBounceDeleted(c) + + /** + * A LockingCache.Handler that updates an existing CachedTweet in cache. + */ + private def updateTweetHandler(update: Tweet => Tweet): CachedTweetHandler = + inCache => + for { + cached <- inCache.filter(foundAndNotBounced) + cachedTweet <- cached.value + updatedTweet = update(cachedTweet.tweet) + } yield found(cachedTweet.copy(tweet = updatedTweet)) + + /** + * A LockingCache.Handler that updates an existing CachedTweet in cache. + */ + private def updateCachedTweetHandler(update: CachedTweet => CachedTweet): CachedTweetHandler = + inCache => + for { + cached <- inCache.filter(foundAndNotBounced) + cachedTweet <- cached.value + updatedCachedTweet = update(cachedTweet) + } yield found(updatedCachedTweet) + + private def deleteTweetHandler(value: Cached[CachedTweet]): CachedTweetHandler = + PickingHandler(value, preferNewestPicker) + + private def deleteTweetUpdateOnlyHandler(value: Cached[CachedTweet]): CachedTweetHandler = + UpdateOnlyPickingHandler(value, preferNewestPicker) + + private def setFieldsHandler(additional: Seq[TFieldBlob]): CachedTweetHandler = + inCache => + for { + cached <- inCache.filter(foundAndNotBounced) + cachedTweet <- cached.value + updatedTweet = AdditionalFields.setAdditionalFields(cachedTweet.tweet, additional) + updatedCachedTweet = CachedTweet(updatedTweet) + } yield found(updatedCachedTweet) + + private def deleteFieldsHandler(fieldIds: Seq[FieldId]): CachedTweetHandler = + inCache => + for { + cached <- inCache.filter(foundAndNotBounced) + cachedTweet <- cached.value + updatedTweet = AdditionalFields.unsetFields(cachedTweet.tweet, fieldIds) + scrubbedCachedTweet = cachedTweet.copy(tweet = updatedTweet) + } yield found(scrubbedCachedTweet) + + private val scrubGeoHandler: CachedTweetHandler = + inCache => + for { + cached <- inCache.filter(foundAndNotBounced) + cachedTweet <- cached.value + tweet = cachedTweet.tweet + coreData <- tweet.coreData if hasGeo(tweet) + scrubbedCoreData = coreData.copy(coordinates = None, placeId = None) + scrubbedTweet = tweet.copy(coreData = Some(scrubbedCoreData), place = None) + scrubbedCachedTweet = cachedTweet.copy(tweet = scrubbedTweet) + } yield found(scrubbedCachedTweet) + + private def evictOne(key: TweetKey, tries: Int): Future[Int] = + tweetCache.delete(key).transform { + case Throw(_) if tries > 1 => evictOne(key, tries - 1) + case Throw(_) => Future.value(1) + case Return(_) => Future.value(0) + } + + private def evictAll(keys: Seq[TweetKey]): Future[Unit] = + Future + .collect { + keys.map(evictOne(_, evictionRetries)) + } + .onSuccess { (failures: Seq[Int]) => evictionFailedCounter.incr(failures.sum) } + .unit + + private def logExistingValues(keys: Seq[TweetKey]): Future[Unit] = + tweetCache + .get(keys) + .map { existing => + for { + (key, cached) <- existing.found + cachedTweet <- cached.value + tweet = cachedTweet.tweet + } yield { + cacheFlushesLog.info( + mapper.writeValueAsString( + Map( + "key" -> key, + "tweet_id" -> tweet.id, + "tweet" -> DiffShow.show(tweet) + ) + ) + ) + } + } + .unit + + private def found(value: CachedTweet): Cached[CachedTweet] = + writeThroughCached(Some(value), CachedValueStatus.Found) + + private def writeThroughCached[V](value: Option[V], status: CachedValueStatus): Cached[V] = { + val now = Time.now + Cached(value, status, now, None, Some(now)) + } + + private def lockAndSet(tweetId: TweetId, handler: LockingCache.Handler[Cached[CachedTweet]]) = + tweetCache.lockAndSet(tweetKeyFactory.fromId(tweetId), handler).unit +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteAdditionalFields.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteAdditionalFields.scala new file mode 100644 index 000000000..726745b7e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteAdditionalFields.scala @@ -0,0 +1,172 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.thriftscala._ + +object DeleteAdditionalFields extends TweetStore.SyncModule { + + case class Event(tweetId: TweetId, fieldIds: Seq[FieldId], userId: UserId, timestamp: Time) + extends SyncTweetStoreEvent("delete_additional_fields") { + + def toAsyncRequest: AsyncDeleteAdditionalFieldsRequest = + AsyncDeleteAdditionalFieldsRequest( + tweetId = tweetId, + fieldIds = fieldIds, + userId = userId, + timestamp = timestamp.inMillis + ) + } + + trait Store { + val deleteAdditionalFields: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val deleteAdditionalFields: FutureEffect[Event] = wrap( + underlying.deleteAdditionalFields) + } + + object Store { + def apply( + cachingTweetStore: CachingTweetStore, + asyncEnqueueStore: AsyncEnqueueStore, + logLensStore: LogLensStore + ): Store = + new Store { + override val deleteAdditionalFields: FutureEffect[Event] = + FutureEffect.inParallel( + // ignore failures deleting from cache, will be retried in async-path + cachingTweetStore.ignoreFailures.deleteAdditionalFields, + asyncEnqueueStore.deleteAdditionalFields, + logLensStore.deleteAdditionalFields + ) + } + } +} + +object AsyncDeleteAdditionalFields extends TweetStore.AsyncModule { + + object Event { + def fromAsyncRequest( + request: AsyncDeleteAdditionalFieldsRequest, + user: User + ): TweetStoreEventOrRetry[Event] = + TweetStoreEventOrRetry( + Event( + tweetId = request.tweetId, + fieldIds = request.fieldIds, + userId = request.userId, + optUser = Some(user), + timestamp = Time.fromMilliseconds(request.timestamp) + ), + request.retryAction, + RetryEvent + ) + } + + case class Event( + tweetId: TweetId, + fieldIds: Seq[FieldId], + userId: UserId, + optUser: Option[User], + timestamp: Time) + extends AsyncTweetStoreEvent("async_delete_additional_fields") + with TweetStoreTweetEvent { + + def toAsyncRequest( + action: Option[AsyncWriteAction] = None + ): AsyncDeleteAdditionalFieldsRequest = + AsyncDeleteAdditionalFieldsRequest( + tweetId = tweetId, + fieldIds = fieldIds, + userId = userId, + timestamp = timestamp.inMillis, + retryAction = action + ) + + override def toTweetEventData: Seq[TweetEventData] = + Seq( + TweetEventData.AdditionalFieldDeleteEvent( + AdditionalFieldDeleteEvent( + deletedFields = Map(tweetId -> fieldIds), + userId = optUser.map(_.id) + ) + ) + ) + + override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = + service.asyncDeleteAdditionalFields(toAsyncRequest(Some(action))) + } + + case class RetryEvent(action: AsyncWriteAction, event: Event) + extends TweetStoreRetryEvent[Event] { + + override val eventType: AsyncWriteEventType.DeleteAdditionalFields.type = + AsyncWriteEventType.DeleteAdditionalFields + override val scribedTweetOnFailure: None.type = None + } + + trait Store { + val asyncDeleteAdditionalFields: FutureEffect[Event] + val retryAsyncDeleteAdditionalFields: FutureEffect[TweetStoreRetryEvent[Event]] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val asyncDeleteAdditionalFields: FutureEffect[Event] = wrap( + underlying.asyncDeleteAdditionalFields) + override val retryAsyncDeleteAdditionalFields: FutureEffect[TweetStoreRetryEvent[Event]] = wrap( + underlying.retryAsyncDeleteAdditionalFields + ) + } + + object Store { + def apply( + manhattanStore: ManhattanTweetStore, + cachingTweetStore: CachingTweetStore, + replicatingStore: ReplicatingTweetStore, + eventBusEnqueueStore: TweetEventBusStore + ): Store = { + val stores: Seq[Store] = + Seq( + manhattanStore, + cachingTweetStore, + replicatingStore, + eventBusEnqueueStore + ) + + def build[E <: TweetStoreEvent](extract: Store => FutureEffect[E]): FutureEffect[E] = + FutureEffect.inParallel[E](stores.map(extract): _*) + + new Store { + override val asyncDeleteAdditionalFields: FutureEffect[Event] = build( + _.asyncDeleteAdditionalFields) + override val retryAsyncDeleteAdditionalFields: FutureEffect[TweetStoreRetryEvent[Event]] = + build(_.retryAsyncDeleteAdditionalFields) + } + } + } +} + +object ReplicatedDeleteAdditionalFields extends TweetStore.ReplicatedModule { + + case class Event(tweetId: TweetId, fieldIds: Seq[FieldId]) + extends ReplicatedTweetStoreEvent("replicated_delete_additional_fields") + + trait Store { + val replicatedDeleteAdditionalFields: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedDeleteAdditionalFields: FutureEffect[Event] = + wrap(underlying.replicatedDeleteAdditionalFields) + } + + object Store { + def apply(cachingTweetStore: CachingTweetStore): Store = { + new Store { + override val replicatedDeleteAdditionalFields: FutureEffect[Event] = + cachingTweetStore.replicatedDeleteAdditionalFields + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteTweet.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteTweet.scala new file mode 100644 index 000000000..c2b315d27 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteTweet.scala @@ -0,0 +1,221 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.store.TweetEventDataScrubber.scrub +import com.twitter.tweetypie.thriftscala._ + +object DeleteTweet extends TweetStore.SyncModule { + case class Event( + tweet: Tweet, + timestamp: Time, + user: Option[User] = None, + byUserId: Option[UserId] = None, + auditPassthrough: Option[AuditDeleteTweet] = None, + cascadedFromTweetId: Option[TweetId] = None, + isUserErasure: Boolean = false, + isBounceDelete: Boolean = false, + isLastQuoteOfQuoter: Boolean = false, + isAdminDelete: Boolean) + extends SyncTweetStoreEvent("delete_tweet") { + + def toAsyncRequest: AsyncDeleteRequest = + AsyncDeleteRequest( + tweet = tweet, + user = user, + byUserId = byUserId, + timestamp = timestamp.inMillis, + auditPassthrough = auditPassthrough, + cascadedFromTweetId = cascadedFromTweetId, + isUserErasure = isUserErasure, + isBounceDelete = isBounceDelete, + isLastQuoteOfQuoter = Some(isLastQuoteOfQuoter), + isAdminDelete = Some(isAdminDelete) + ) + } + + trait Store { + val deleteTweet: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val deleteTweet: FutureEffect[Event] = wrap(underlying.deleteTweet) + } + + object Store { + def apply( + cachingTweetStore: CachingTweetStore, + asyncEnqueueStore: AsyncEnqueueStore, + userCountsUpdatingStore: GizmoduckUserCountsUpdatingStore, + tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore, + logLensStore: LogLensStore + ): Store = + new Store { + override val deleteTweet: FutureEffect[Event] = + FutureEffect.inParallel( + cachingTweetStore.ignoreFailures.deleteTweet, + asyncEnqueueStore.deleteTweet, + userCountsUpdatingStore.deleteTweet, + tweetCountsUpdatingStore.deleteTweet, + logLensStore.deleteTweet + ) + } + } +} + +object AsyncDeleteTweet extends TweetStore.AsyncModule { + + object Event { + def fromAsyncRequest(request: AsyncDeleteRequest): TweetStoreEventOrRetry[Event] = + TweetStoreEventOrRetry( + AsyncDeleteTweet.Event( + tweet = request.tweet, + timestamp = Time.fromMilliseconds(request.timestamp), + optUser = request.user, + byUserId = request.byUserId, + auditPassthrough = request.auditPassthrough, + cascadedFromTweetId = request.cascadedFromTweetId, + isUserErasure = request.isUserErasure, + isBounceDelete = request.isBounceDelete, + isLastQuoteOfQuoter = request.isLastQuoteOfQuoter.getOrElse(false), + isAdminDelete = request.isAdminDelete.getOrElse(false) + ), + request.retryAction, + RetryEvent + ) + } + + case class Event( + tweet: Tweet, + timestamp: Time, + optUser: Option[User] = None, + byUserId: Option[UserId] = None, + auditPassthrough: Option[AuditDeleteTweet] = None, + cascadedFromTweetId: Option[TweetId] = None, + isUserErasure: Boolean = false, + isBounceDelete: Boolean, + isLastQuoteOfQuoter: Boolean = false, + isAdminDelete: Boolean) + extends AsyncTweetStoreEvent("async_delete_tweet") + with TweetStoreTweetEvent { + val tweetEventTweetId: TweetId = tweet.id + + def toAsyncRequest(action: Option[AsyncWriteAction] = None): AsyncDeleteRequest = + AsyncDeleteRequest( + tweet = tweet, + user = optUser, + byUserId = byUserId, + timestamp = timestamp.inMillis, + auditPassthrough = auditPassthrough, + cascadedFromTweetId = cascadedFromTweetId, + retryAction = action, + isUserErasure = isUserErasure, + isBounceDelete = isBounceDelete, + isLastQuoteOfQuoter = Some(isLastQuoteOfQuoter), + isAdminDelete = Some(isAdminDelete) + ) + + override def toTweetEventData: Seq[TweetEventData] = + Seq( + TweetEventData.TweetDeleteEvent( + TweetDeleteEvent( + tweet = scrub(tweet), + user = optUser, + isUserErasure = Some(isUserErasure), + audit = auditPassthrough, + byUserId = byUserId, + isAdminDelete = Some(isAdminDelete) + ) + ) + ) + + override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = + service.asyncDelete(toAsyncRequest(Some(action))) + } + + case class RetryEvent(action: AsyncWriteAction, event: Event) + extends TweetStoreRetryEvent[Event] { + + override val eventType: AsyncWriteEventType.Delete.type = AsyncWriteEventType.Delete + override val scribedTweetOnFailure: Option[Tweet] = Some(event.tweet) + } + + trait Store { + val asyncDeleteTweet: FutureEffect[Event] + val retryAsyncDeleteTweet: FutureEffect[TweetStoreRetryEvent[Event]] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val asyncDeleteTweet: FutureEffect[Event] = wrap(underlying.asyncDeleteTweet) + override val retryAsyncDeleteTweet: FutureEffect[TweetStoreRetryEvent[Event]] = wrap( + underlying.retryAsyncDeleteTweet) + } + + object Store { + def apply( + manhattanStore: ManhattanTweetStore, + cachingTweetStore: CachingTweetStore, + replicatingStore: ReplicatingTweetStore, + indexingStore: TweetIndexingStore, + eventBusEnqueueStore: TweetEventBusStore, + timelineUpdatingStore: TlsTimelineUpdatingStore, + tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore, + guanoServiceStore: GuanoServiceStore, + mediaServiceStore: MediaServiceStore + ): Store = { + val stores: Seq[Store] = + Seq( + manhattanStore, + cachingTweetStore, + replicatingStore, + indexingStore, + eventBusEnqueueStore, + timelineUpdatingStore, + tweetCountsUpdatingStore, + guanoServiceStore, + mediaServiceStore + ) + + def build[E <: TweetStoreEvent](extract: Store => FutureEffect[E]): FutureEffect[E] = + FutureEffect.inParallel[E](stores.map(extract): _*) + + new Store { + override val asyncDeleteTweet: FutureEffect[Event] = build(_.asyncDeleteTweet) + override val retryAsyncDeleteTweet: FutureEffect[TweetStoreRetryEvent[Event]] = build( + _.retryAsyncDeleteTweet) + } + } + } +} + +object ReplicatedDeleteTweet extends TweetStore.ReplicatedModule { + + case class Event( + tweet: Tweet, + isErasure: Boolean, + isBounceDelete: Boolean, + isLastQuoteOfQuoter: Boolean = false) + extends ReplicatedTweetStoreEvent("replicated_delete_tweet") + + trait Store { + val replicatedDeleteTweet: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedDeleteTweet: FutureEffect[Event] = wrap(underlying.replicatedDeleteTweet) + } + + object Store { + def apply( + cachingTweetStore: CachingTweetStore, + tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore + ): Store = { + new Store { + override val replicatedDeleteTweet: FutureEffect[Event] = + FutureEffect.inParallel( + cachingTweetStore.replicatedDeleteTweet, + tweetCountsUpdatingStore.replicatedDeleteTweet.ignoreFailures + ) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/FanoutServiceStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/FanoutServiceStore.scala new file mode 100644 index 000000000..ad0104acd --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/FanoutServiceStore.scala @@ -0,0 +1,38 @@ +package com.twitter.tweetypie +package store + +import com.twitter.timelineservice.fanout.thriftscala.FanoutService +import com.twitter.tweetypie.thriftscala._ + +trait FanoutServiceStore extends TweetStoreBase[FanoutServiceStore] with AsyncInsertTweet.Store { + def wrap(w: TweetStore.Wrap): FanoutServiceStore = + new TweetStoreWrapper(w, this) with FanoutServiceStore with AsyncInsertTweet.StoreWrapper +} + +object FanoutServiceStore { + val Action: AsyncWriteAction.FanoutDelivery.type = AsyncWriteAction.FanoutDelivery + + def apply( + fanoutClient: FanoutService.MethodPerEndpoint, + stats: StatsReceiver + ): FanoutServiceStore = + new FanoutServiceStore { + override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = + FutureEffect[AsyncInsertTweet.Event] { event => + fanoutClient.tweetCreateEvent2( + TweetCreateEvent( + tweet = event.tweet, + user = event.user, + sourceTweet = event.sourceTweet, + sourceUser = event.sourceUser, + additionalContext = event.additionalContext, + transientContext = event.transientContext + ) + ) + } + + override val retryAsyncInsertTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncInsertTweet.Event] + ] = TweetStore.retry(Action, asyncInsertTweet) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Flush.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Flush.scala new file mode 100644 index 000000000..83fbc12af --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Flush.scala @@ -0,0 +1,34 @@ +package com.twitter.tweetypie +package store + +object Flush extends TweetStore.SyncModule { + + case class Event( + tweetIds: Seq[TweetId], + flushTweets: Boolean = true, + flushCounts: Boolean = true, + logExisting: Boolean = true) + extends SyncTweetStoreEvent("flush") + + trait Store { + val flush: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val flush: FutureEffect[Event] = wrap(underlying.flush) + } + + object Store { + def apply( + cachingTweetStore: CachingTweetStore, + tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore + ): Store = + new Store { + override val flush: FutureEffect[Event] = + FutureEffect.inParallel( + cachingTweetStore.flush, + tweetCountsUpdatingStore.flush + ) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GeoSearchRequestIDStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GeoSearchRequestIDStore.scala new file mode 100644 index 000000000..be29aba1e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GeoSearchRequestIDStore.scala @@ -0,0 +1,72 @@ +package com.twitter.tweetypie +package store + +import com.twitter.geoduck.backend.relevance.thriftscala.ReportFailure +import com.twitter.geoduck.backend.relevance.thriftscala.ReportResult +import com.twitter.geoduck.backend.relevance.thriftscala.ConversionReport +import com.twitter.geoduck.backend.searchrequestid.thriftscala.SearchRequestID +import com.twitter.geoduck.backend.tweetid.thriftscala.TweetID +import com.twitter.geoduck.common.thriftscala.GeoduckException +import com.twitter.geoduck.service.identifier.thriftscala.PlaceIdentifier +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.thriftscala._ + +trait GeoSearchRequestIDStore + extends TweetStoreBase[GeoSearchRequestIDStore] + with AsyncInsertTweet.Store { + def wrap(w: TweetStore.Wrap): GeoSearchRequestIDStore = + new TweetStoreWrapper[GeoSearchRequestIDStore](w, this) + with GeoSearchRequestIDStore + with AsyncInsertTweet.StoreWrapper +} + +object GeoSearchRequestIDStore { + type ConversionReporter = FutureArrow[ConversionReport, ReportResult] + + val Action: AsyncWriteAction.GeoSearchRequestId.type = AsyncWriteAction.GeoSearchRequestId + private val log = Logger(getClass) + + object FailureHandler { + def translateException(failure: ReportResult.Failure): GeoduckException = { + failure.failure match { + case ReportFailure.Failure(exception) => exception + case _ => GeoduckException("Unknown failure: " + failure.toString) + } + } + } + + def apply(conversionReporter: ConversionReporter): GeoSearchRequestIDStore = + new GeoSearchRequestIDStore { + + val conversionEffect: FutureEffect[ConversionReport] = + FutureEffect + .fromPartial[ReportResult] { + case unionFailure: ReportResult.Failure => + Future.exception(FailureHandler.translateException(unionFailure)) + } + .contramapFuture(conversionReporter) + + override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = + conversionEffect.contramapOption[AsyncInsertTweet.Event] { event => + for { + isUserProtected <- event.user.safety.map(_.isProtected) + geoSearchRequestID <- event.geoSearchRequestId + placeType <- event.tweet.place.map(_.`type`) + placeId <- event.tweet.coreData.flatMap(_.placeId) + placeIdLong <- Try(java.lang.Long.parseUnsignedLong(placeId, 16)).toOption + if placeType == PlaceType.Poi && isUserProtected == false + } yield { + ConversionReport( + requestID = SearchRequestID(requestID = geoSearchRequestID), + tweetID = TweetID(event.tweet.id), + placeID = PlaceIdentifier(placeIdLong) + ) + } + } + + override val retryAsyncInsertTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncInsertTweet.Event] + ] = + TweetStore.retry(Action, asyncInsertTweet) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserCountsUpdatingStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserCountsUpdatingStore.scala new file mode 100644 index 000000000..4ddc40dc2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserCountsUpdatingStore.scala @@ -0,0 +1,48 @@ +package com.twitter.tweetypie +package store + +import com.twitter.gizmoduck.thriftscala.{CountsUpdateField => Field} +import com.twitter.tweetypie.backends.Gizmoduck + +trait GizmoduckUserCountsUpdatingStore + extends TweetStoreBase[GizmoduckUserCountsUpdatingStore] + with InsertTweet.Store + with DeleteTweet.Store { + def wrap(w: TweetStore.Wrap): GizmoduckUserCountsUpdatingStore = + new TweetStoreWrapper(w, this) + with GizmoduckUserCountsUpdatingStore + with InsertTweet.StoreWrapper + with DeleteTweet.StoreWrapper +} + +/** + * A TweetStore implementation that sends user-specific count updates to Gizmoduck. + */ +object GizmoduckUserCountsUpdatingStore { + def isUserTweet(tweet: Tweet): Boolean = + !TweetLenses.nullcast.get(tweet) && TweetLenses.narrowcast.get(tweet).isEmpty + + def apply( + incr: Gizmoduck.IncrCount, + hasMedia: Tweet => Boolean + ): GizmoduckUserCountsUpdatingStore = { + def incrField(field: Field, amt: Int): FutureEffect[Tweet] = + FutureEffect[Tweet](tweet => incr((getUserId(tweet), field, amt))) + + def incrAll(amt: Int): FutureEffect[Tweet] = + FutureEffect.inParallel( + incrField(Field.Tweets, amt).onlyIf(isUserTweet), + incrField(Field.MediaTweets, amt).onlyIf(t => isUserTweet(t) && hasMedia(t)) + ) + + new GizmoduckUserCountsUpdatingStore { + override val insertTweet: FutureEffect[InsertTweet.Event] = + incrAll(1).contramap[InsertTweet.Event](_.tweet) + + override val deleteTweet: FutureEffect[DeleteTweet.Event] = + incrAll(-1) + .contramap[DeleteTweet.Event](_.tweet) + .onlyIf(!_.isUserErasure) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserGeotagUpdateStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserGeotagUpdateStore.scala new file mode 100644 index 000000000..fb6c50c4c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserGeotagUpdateStore.scala @@ -0,0 +1,68 @@ +package com.twitter.tweetypie +package store + +import com.twitter.gizmoduck.thriftscala.LookupContext +import com.twitter.gizmoduck.thriftscala.ModifiedAccount +import com.twitter.gizmoduck.thriftscala.ModifiedUser +import com.twitter.tweetypie.backends.Gizmoduck +import com.twitter.tweetypie.thriftscala._ + +trait GizmoduckUserGeotagUpdateStore + extends TweetStoreBase[GizmoduckUserGeotagUpdateStore] + with AsyncInsertTweet.Store + with ScrubGeoUpdateUserTimestamp.Store { + def wrap(w: TweetStore.Wrap): GizmoduckUserGeotagUpdateStore = + new TweetStoreWrapper(w, this) + with GizmoduckUserGeotagUpdateStore + with AsyncInsertTweet.StoreWrapper + with ScrubGeoUpdateUserTimestamp.StoreWrapper +} + +/** + * A TweetStore implementation that updates a Gizmoduck user's user_has_geotagged_status flag. + * If a tweet is geotagged and the user's flag is not set, call out to Gizmoduck to update it. + */ +object GizmoduckUserGeotagUpdateStore { + val Action: AsyncWriteAction.UserGeotagUpdate.type = AsyncWriteAction.UserGeotagUpdate + + def apply( + modifyAndGet: Gizmoduck.ModifyAndGet, + stats: StatsReceiver + ): GizmoduckUserGeotagUpdateStore = { + // Counts the number of times that the scrubGeo actually cleared the + // hasGeotaggedStatuses bit for a user. + val clearedCounter = stats.counter("has_geotag_cleared") + + // Counts the number of times that asyncInsertTweet actually set the + // hasGeotaggedStatuses bit for a user. + val setCounter = stats.counter("has_geotag_set") + + def setHasGeotaggedStatuses(value: Boolean): FutureEffect[UserId] = { + val modifiedAccount = ModifiedAccount(hasGeotaggedStatuses = Some(value)) + val modifiedUser = ModifiedUser(account = Some(modifiedAccount)) + FutureEffect(userId => modifyAndGet((LookupContext(), userId, modifiedUser)).unit) + } + + new GizmoduckUserGeotagUpdateStore { + override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = + setHasGeotaggedStatuses(true) + .contramap[AsyncInsertTweet.Event](_.user.id) + .onSuccess(_ => setCounter.incr()) + .onlyIf { e => + // only with geo info and an account that doesn't yet have geotagged statuses flag set + hasGeo(e.tweet) && (e.user.account.exists(!_.hasGeotaggedStatuses)) + } + + override val retryAsyncInsertTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncInsertTweet.Event] + ] = + TweetStore.retry(Action, asyncInsertTweet) + + override val scrubGeoUpdateUserTimestamp: FutureEffect[ScrubGeoUpdateUserTimestamp.Event] = + setHasGeotaggedStatuses(false) + .contramap[ScrubGeoUpdateUserTimestamp.Event](_.userId) + .onlyIf(_.mightHaveGeotaggedStatuses) + .onSuccess(_ => clearedCounter.incr()) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Guano.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Guano.scala new file mode 100644 index 000000000..d40e6f657 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Guano.scala @@ -0,0 +1,144 @@ +package com.twitter.tweetypie +package store + +import com.twitter.guano.{thriftscala => guano} +import com.twitter.servo.util.Scribe +import com.twitter.takedown.util.TakedownReasons +import com.twitter.tseng.withholding.thriftscala.TakedownReason +import com.twitter.tweetypie.thriftscala.AuditDeleteTweet + +object Guano { + case class MalwareAttempt( + url: String, + userId: UserId, + clientAppId: Option[Long], + remoteHost: Option[String]) { + def toScribeMessage: guano.ScribeMessage = + guano.ScribeMessage( + `type` = guano.ScribeType.MalwareAttempt, + malwareAttempt = Some( + guano.MalwareAttempt( + timestamp = Time.now.inSeconds, + host = remoteHost, + userId = userId, + url = url, + `type` = guano.MalwareAttemptType.Status, + clientAppId = clientAppId.map(_.toInt) // yikes! + ) + ) + ) + } + + case class DestroyTweet( + tweet: Tweet, + userId: UserId, + byUserId: UserId, + passthrough: Option[AuditDeleteTweet]) { + def toScribeMessage: guano.ScribeMessage = + guano.ScribeMessage( + `type` = guano.ScribeType.DestroyStatus, + destroyStatus = Some( + guano.DestroyStatus( + `type` = Some(guano.DestroyStatusType.Status), + timestamp = Time.now.inSeconds, + userId = userId, + byUserId = byUserId, + statusId = tweet.id, + text = "", + reason = passthrough + .flatMap(_.reason) + .flatMap { r => guano.UserActionReason.valueOf(r.name) } + .orElse(Some(guano.UserActionReason.Other)), + done = passthrough.flatMap(_.done).orElse(Some(true)), + host = passthrough.flatMap(_.host), + bulkId = passthrough.flatMap(_.bulkId), + note = passthrough.flatMap(_.note), + runId = passthrough.flatMap(_.runId), + clientApplicationId = passthrough.flatMap(_.clientApplicationId), + userAgent = passthrough.flatMap(_.userAgent) + ) + ) + ) + } + + case class Takedown( + tweetId: TweetId, + userId: UserId, + reason: TakedownReason, + takendown: Boolean, + note: Option[String], + host: Option[String], + byUserId: Option[UserId]) { + def toScribeMessage: guano.ScribeMessage = + guano.ScribeMessage( + `type` = guano.ScribeType.PctdAction, + pctdAction = Some( + guano.PctdAction( + `type` = guano.PctdActionType.Status, + timestamp = Time.now.inSeconds, + tweetId = Some(tweetId), + userId = userId, + countryCode = + TakedownReasons.reasonToCountryCode.applyOrElse(reason, (_: TakedownReason) => ""), + takendown = takendown, + note = note, + host = host, + byUserId = byUserId.getOrElse(-1L), + reason = Some(reason) + ) + ) + ) + } + + case class UpdatePossiblySensitiveTweet( + tweetId: TweetId, + userId: UserId, + byUserId: UserId, + action: guano.NsfwTweetActionAction, + enabled: Boolean, + host: Option[String], + note: Option[String]) { + def toScribeMessage: guano.ScribeMessage = + guano.ScribeMessage( + `type` = guano.ScribeType.NsfwTweetAction, + nsfwTweetAction = Some( + guano.NsfwTweetAction( + timestamp = Time.now.inSeconds, + host = host, + userId = userId, + byUserId = byUserId, + action = action, + enabled = enabled, + note = note, + tweetId = tweetId + ) + ) + ) + } + + def apply( + scribe: FutureEffect[guano.ScribeMessage] = Scribe(guano.ScribeMessage, + Scribe("trust_eng_audit")) + ): Guano = { + new Guano { + override val scribeMalwareAttempt: FutureEffect[MalwareAttempt] = + scribe.contramap[MalwareAttempt](_.toScribeMessage) + + override val scribeDestroyTweet: FutureEffect[DestroyTweet] = + scribe.contramap[DestroyTweet](_.toScribeMessage) + + override val scribeTakedown: FutureEffect[Takedown] = + scribe.contramap[Takedown](_.toScribeMessage) + + override val scribeUpdatePossiblySensitiveTweet: FutureEffect[UpdatePossiblySensitiveTweet] = + scribe.contramap[UpdatePossiblySensitiveTweet](_.toScribeMessage) + } + } +} + +trait Guano { + val scribeMalwareAttempt: FutureEffect[Guano.MalwareAttempt] + val scribeDestroyTweet: FutureEffect[Guano.DestroyTweet] + val scribeTakedown: FutureEffect[Guano.Takedown] + val scribeUpdatePossiblySensitiveTweet: FutureEffect[Guano.UpdatePossiblySensitiveTweet] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GuanoServiceStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GuanoServiceStore.scala new file mode 100644 index 000000000..a2a284b8f --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GuanoServiceStore.scala @@ -0,0 +1,120 @@ +package com.twitter.tweetypie +package store + +import com.twitter.guano.thriftscala.NsfwTweetActionAction +import com.twitter.tseng.withholding.thriftscala.TakedownReason +import com.twitter.tweetypie.thriftscala._ + +trait GuanoServiceStore + extends TweetStoreBase[GuanoServiceStore] + with AsyncDeleteTweet.Store + with AsyncTakedown.Store + with AsyncUpdatePossiblySensitiveTweet.Store { + def wrap(w: TweetStore.Wrap): GuanoServiceStore = + new TweetStoreWrapper(w, this) + with GuanoServiceStore + with AsyncDeleteTweet.StoreWrapper + with AsyncTakedown.StoreWrapper + with AsyncUpdatePossiblySensitiveTweet.StoreWrapper +} + +object GuanoServiceStore { + val Action: AsyncWriteAction.GuanoScribe.type = AsyncWriteAction.GuanoScribe + + val toGuanoTakedown: (AsyncTakedown.Event, TakedownReason, Boolean) => Guano.Takedown = + (event: AsyncTakedown.Event, reason: TakedownReason, takendown: Boolean) => + Guano.Takedown( + tweetId = event.tweet.id, + userId = getUserId(event.tweet), + reason = reason, + takendown = takendown, + note = event.auditNote, + host = event.host, + byUserId = event.byUserId + ) + + val toGuanoUpdatePossiblySensitiveTweet: ( + AsyncUpdatePossiblySensitiveTweet.Event, + Boolean, + NsfwTweetActionAction + ) => Guano.UpdatePossiblySensitiveTweet = + ( + event: AsyncUpdatePossiblySensitiveTweet.Event, + updatedValue: Boolean, + action: NsfwTweetActionAction + ) => + Guano.UpdatePossiblySensitiveTweet( + tweetId = event.tweet.id, + host = event.host.orElse(Some("unknown")), + userId = event.user.id, + byUserId = event.byUserId, + action = action, + enabled = updatedValue, + note = event.note + ) + + def apply(guano: Guano, stats: StatsReceiver): GuanoServiceStore = { + val deleteByUserIdCounter = stats.counter("deletes_with_by_user_id") + val deleteScribeCounter = stats.counter("deletes_resulting_in_scribe") + + new GuanoServiceStore { + override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + FutureEffect[AsyncDeleteTweet.Event] { event => + val tweet = event.tweet + + event.byUserId.foreach(_ => deleteByUserIdCounter.incr()) + + // Guano the tweet deletion action not initiated from the RetweetsDeletionStore + event.byUserId match { + case Some(byUserId) => + deleteScribeCounter.incr() + guano.scribeDestroyTweet( + Guano.DestroyTweet( + tweet = tweet, + userId = getUserId(tweet), + byUserId = byUserId, + passthrough = event.auditPassthrough + ) + ) + case _ => + Future.Unit + } + }.onlyIf(_.cascadedFromTweetId.isEmpty) + + override val retryAsyncDeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncDeleteTweet) + + override val asyncTakedown: FutureEffect[AsyncTakedown.Event] = + FutureEffect[AsyncTakedown.Event] { event => + val messages = + event.reasonsToAdd.map(toGuanoTakedown(event, _, true)) ++ + event.reasonsToRemove.map(toGuanoTakedown(event, _, false)) + Future.join(messages.map(guano.scribeTakedown)) + }.onlyIf(_.scribeForAudit) + + override val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[AsyncTakedown.Event]] = + TweetStore.retry(Action, asyncTakedown) + + override val asyncUpdatePossiblySensitiveTweet: FutureEffect[ + AsyncUpdatePossiblySensitiveTweet.Event + ] = + FutureEffect[AsyncUpdatePossiblySensitiveTweet.Event] { event => + val messages = + event.nsfwAdminChange.map( + toGuanoUpdatePossiblySensitiveTweet(event, _, NsfwTweetActionAction.NsfwAdmin) + ) ++ + event.nsfwUserChange.map( + toGuanoUpdatePossiblySensitiveTweet(event, _, NsfwTweetActionAction.NsfwUser) + ) + Future.join(messages.toSeq.map(guano.scribeUpdatePossiblySensitiveTweet)) + } + + override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUpdatePossiblySensitiveTweet.Event] + ] = + TweetStore.retry(Action, asyncUpdatePossiblySensitiveTweet) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrBookmarkCount.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrBookmarkCount.scala new file mode 100644 index 000000000..5f1f2920a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrBookmarkCount.scala @@ -0,0 +1,92 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.store.TweetStoreEvent.NoRetry +import com.twitter.tweetypie.store.TweetStoreEvent.RetryStrategy +import com.twitter.tweetypie.thriftscala.AsyncIncrBookmarkCountRequest +import com.twitter.tweetypie.thriftscala.AsyncWriteAction + +object IncrBookmarkCount extends TweetStore.SyncModule { + case class Event(tweetId: TweetId, delta: Int, timestamp: Time) + extends SyncTweetStoreEvent("incr_bookmark_count") { + val toAsyncRequest: AsyncIncrBookmarkCountRequest = + AsyncIncrBookmarkCountRequest(tweetId = tweetId, delta = delta) + } + + trait Store { + val incrBookmarkCount: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val incrBookmarkCount: FutureEffect[Event] = wrap(underlying.incrBookmarkCount) + } + + object Store { + def apply( + asyncEnqueueStore: AsyncEnqueueStore, + replicatingStore: ReplicatingTweetStore + ): Store = { + new Store { + override val incrBookmarkCount: FutureEffect[Event] = + FutureEffect.inParallel( + asyncEnqueueStore.incrBookmarkCount, + replicatingStore.incrBookmarkCount + ) + } + } + } +} + +object AsyncIncrBookmarkCount extends TweetStore.AsyncModule { + case class Event(tweetId: TweetId, delta: Int, timestamp: Time) + extends AsyncTweetStoreEvent("async_incr_bookmark_event") { + override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = + Future.Unit + + override def retryStrategy: RetryStrategy = NoRetry + } + + trait Store { + def asyncIncrBookmarkCount: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val asyncIncrBookmarkCount: FutureEffect[Event] = wrap( + underlying.asyncIncrBookmarkCount) + } + + object Store { + def apply(tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore): Store = { + new Store { + override def asyncIncrBookmarkCount: FutureEffect[AsyncIncrBookmarkCount.Event] = + tweetCountsUpdatingStore.asyncIncrBookmarkCount + } + } + } +} + +object ReplicatedIncrBookmarkCount extends TweetStore.ReplicatedModule { + case class Event(tweetId: TweetId, delta: Int) + extends ReplicatedTweetStoreEvent("replicated_incr_bookmark_count") { + override def retryStrategy: RetryStrategy = NoRetry + } + + trait Store { + val replicatedIncrBookmarkCount: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedIncrBookmarkCount: FutureEffect[Event] = wrap( + underlying.replicatedIncrBookmarkCount) + } + + object Store { + def apply(tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore): Store = { + new Store { + override val replicatedIncrBookmarkCount: FutureEffect[Event] = { + tweetCountsUpdatingStore.replicatedIncrBookmarkCount + } + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrFavCount.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrFavCount.scala new file mode 100644 index 000000000..b6e1aabcb --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrFavCount.scala @@ -0,0 +1,90 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.store.TweetStoreEvent.NoRetry +import com.twitter.tweetypie.thriftscala._ + +object IncrFavCount extends TweetStore.SyncModule { + + case class Event(tweetId: TweetId, delta: Int, timestamp: Time) + extends SyncTweetStoreEvent("incr_fav_count") { + val toAsyncRequest: AsyncIncrFavCountRequest = AsyncIncrFavCountRequest(tweetId, delta) + } + + trait Store { + val incrFavCount: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val incrFavCount: FutureEffect[Event] = wrap(underlying.incrFavCount) + } + + object Store { + def apply( + asyncEnqueueStore: AsyncEnqueueStore, + replicatingStore: ReplicatingTweetStore + ): Store = + new Store { + override val incrFavCount: FutureEffect[Event] = + FutureEffect.inParallel( + asyncEnqueueStore.incrFavCount, + replicatingStore.incrFavCount + ) + } + } +} + +object AsyncIncrFavCount extends TweetStore.AsyncModule { + + case class Event(tweetId: TweetId, delta: Int, timestamp: Time) + extends AsyncTweetStoreEvent("async_incr_fav_count") { + + override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = + Future.Unit // We need to define this method for TweetStoreEvent.Async but we don't use it + + override def retryStrategy: TweetStoreEvent.RetryStrategy = NoRetry + } + + trait Store { + val asyncIncrFavCount: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val asyncIncrFavCount: FutureEffect[Event] = wrap(underlying.asyncIncrFavCount) + } + + object Store { + def apply(tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore): Store = { + new Store { + override val asyncIncrFavCount: FutureEffect[Event] = + tweetCountsUpdatingStore.asyncIncrFavCount + } + } + } +} + +object ReplicatedIncrFavCount extends TweetStore.ReplicatedModule { + + case class Event(tweetId: TweetId, delta: Int) + extends ReplicatedTweetStoreEvent("replicated_incr_fav_count") { + override def retryStrategy: TweetStoreEvent.NoRetry.type = NoRetry + } + + trait Store { + val replicatedIncrFavCount: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedIncrFavCount: FutureEffect[Event] = wrap( + underlying.replicatedIncrFavCount) + } + + object Store { + def apply(tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore): Store = { + new Store { + override val replicatedIncrFavCount: FutureEffect[Event] = + tweetCountsUpdatingStore.replicatedIncrFavCount.ignoreFailures + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InitialTweetUpdate.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InitialTweetUpdate.scala new file mode 100644 index 000000000..3e796d3d8 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InitialTweetUpdate.scala @@ -0,0 +1,31 @@ +package com.twitter.tweetypie.store + +import com.twitter.tweetypie.Tweet +import com.twitter.tweetypie.serverutil.ExtendedTweetMetadataBuilder +import com.twitter.tweetypie.thriftscala.EditControl +import com.twitter.tweetypie.thriftscala.InitialTweetUpdateRequest +import com.twitter.tweetypie.util.EditControlUtil + +/* Logic to update the initial tweet with new information when that tweet is edited */ +object InitialTweetUpdate { + + /* Given the initial tweet and update request, copy updated edit + * related fields onto it. + */ + def updateTweet(initialTweet: Tweet, request: InitialTweetUpdateRequest): Tweet = { + + // compute a new edit control initial with updated list of edit tweet ids + val editControl: EditControl.Initial = + EditControlUtil.editControlForInitialTweet(initialTweet, request.editTweetId).get() + + // compute the correct extended metadata for a permalink + val extendedTweetMetadata = + request.selfPermalink.map(link => ExtendedTweetMetadataBuilder(initialTweet, link)) + + initialTweet.copy( + selfPermalink = initialTweet.selfPermalink.orElse(request.selfPermalink), + editControl = Some(editControl), + extendedTweetMetadata = initialTweet.extendedTweetMetadata.orElse(extendedTweetMetadata) + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InsertTweet.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InsertTweet.scala new file mode 100644 index 000000000..969cc2b5a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InsertTweet.scala @@ -0,0 +1,284 @@ +package com.twitter.tweetypie +package store + +import com.twitter.context.thriftscala.FeatureContext +import com.twitter.tweetypie.core.GeoSearchRequestId +import com.twitter.tweetypie.store.TweetEventDataScrubber.scrub +import com.twitter.tweetypie.thriftscala._ + +object InsertTweet extends TweetStore.SyncModule { + + case class Event( + tweet: Tweet, + user: User, + timestamp: Time, + _internalTweet: Option[CachedTweet] = None, + sourceTweet: Option[Tweet] = None, + sourceUser: Option[User] = None, + quotedTweet: Option[Tweet] = None, + quotedUser: Option[User] = None, + parentUserId: Option[UserId] = None, + initialTweetUpdateRequest: Option[InitialTweetUpdateRequest] = None, + dark: Boolean = false, + hydrateOptions: WritePathHydrationOptions = WritePathHydrationOptions(), + featureContext: Option[FeatureContext] = None, + geoSearchRequestId: Option[GeoSearchRequestId] = None, + additionalContext: Option[collection.Map[TweetCreateContextKey, String]] = None, + transientContext: Option[TransientCreateContext] = None, + quoterHasAlreadyQuotedTweet: Boolean = false, + noteTweetMentionedUserIds: Option[Seq[Long]] = None) + extends SyncTweetStoreEvent("insert_tweet") + with QuotedTweetOps { + def internalTweet: CachedTweet = + _internalTweet.getOrElse( + throw new IllegalStateException( + s"internalTweet should have been set in WritePathHydration, ${this}" + ) + ) + + def toAsyncRequest( + scrubUser: User => User, + scrubSourceTweet: Tweet => Tweet, + scrubSourceUser: User => User + ): AsyncInsertRequest = + AsyncInsertRequest( + tweet = tweet, + cachedTweet = internalTweet, + user = scrubUser(user), + sourceTweet = sourceTweet.map(scrubSourceTweet), + sourceUser = sourceUser.map(scrubSourceUser), + quotedTweet = quotedTweet.map(scrubSourceTweet), + quotedUser = quotedUser.map(scrubSourceUser), + parentUserId = parentUserId, + featureContext = featureContext, + timestamp = timestamp.inMillis, + geoSearchRequestId = geoSearchRequestId.map(_.requestID), + additionalContext = additionalContext, + transientContext = transientContext, + quoterHasAlreadyQuotedTweet = Some(quoterHasAlreadyQuotedTweet), + initialTweetUpdateRequest = initialTweetUpdateRequest, + noteTweetMentionedUserIds = noteTweetMentionedUserIds + ) + } + + trait Store { + val insertTweet: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val insertTweet: FutureEffect[Event] = wrap(underlying.insertTweet) + } + + object Store { + def apply( + logLensStore: LogLensStore, + manhattanStore: ManhattanTweetStore, + tweetStatsStore: TweetStatsStore, + cachingTweetStore: CachingTweetStore, + limiterStore: LimiterStore, + asyncEnqueueStore: AsyncEnqueueStore, + userCountsUpdatingStore: GizmoduckUserCountsUpdatingStore, + tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore + ): Store = + new Store { + override val insertTweet: FutureEffect[Event] = + FutureEffect.sequentially( + logLensStore.insertTweet, + manhattanStore.insertTweet, + tweetStatsStore.insertTweet, + FutureEffect.inParallel( + // allow write-through caching to fail without failing entire insert + cachingTweetStore.ignoreFailures.insertTweet, + limiterStore.ignoreFailures.insertTweet, + asyncEnqueueStore.insertTweet, + userCountsUpdatingStore.insertTweet, + tweetCountsUpdatingStore.insertTweet + ) + ) + } + } +} + +object AsyncInsertTweet extends TweetStore.AsyncModule { + + private val log = Logger(getClass) + + object Event { + def fromAsyncRequest(request: AsyncInsertRequest): TweetStoreEventOrRetry[Event] = + TweetStoreEventOrRetry( + Event( + tweet = request.tweet, + cachedTweet = request.cachedTweet, + user = request.user, + optUser = Some(request.user), + timestamp = Time.fromMilliseconds(request.timestamp), + sourceTweet = request.sourceTweet, + sourceUser = request.sourceUser, + parentUserId = request.parentUserId, + featureContext = request.featureContext, + quotedTweet = request.quotedTweet, + quotedUser = request.quotedUser, + geoSearchRequestId = request.geoSearchRequestId, + additionalContext = request.additionalContext, + transientContext = request.transientContext, + quoterHasAlreadyQuotedTweet = request.quoterHasAlreadyQuotedTweet.getOrElse(false), + initialTweetUpdateRequest = request.initialTweetUpdateRequest, + noteTweetMentionedUserIds = request.noteTweetMentionedUserIds + ), + request.retryAction, + RetryEvent + ) + } + + case class Event( + tweet: Tweet, + cachedTweet: CachedTweet, + user: User, + optUser: Option[User], + timestamp: Time, + sourceTweet: Option[Tweet] = None, + sourceUser: Option[User] = None, + parentUserId: Option[UserId] = None, + featureContext: Option[FeatureContext] = None, + quotedTweet: Option[Tweet] = None, + quotedUser: Option[User] = None, + geoSearchRequestId: Option[String] = None, + additionalContext: Option[collection.Map[TweetCreateContextKey, String]] = None, + transientContext: Option[TransientCreateContext] = None, + quoterHasAlreadyQuotedTweet: Boolean = false, + initialTweetUpdateRequest: Option[InitialTweetUpdateRequest] = None, + noteTweetMentionedUserIds: Option[Seq[Long]] = None) + extends AsyncTweetStoreEvent("async_insert_tweet") + with QuotedTweetOps + with TweetStoreTweetEvent { + + def toAsyncRequest(action: Option[AsyncWriteAction] = None): AsyncInsertRequest = + AsyncInsertRequest( + tweet = tweet, + cachedTweet = cachedTweet, + user = user, + sourceTweet = sourceTweet, + sourceUser = sourceUser, + parentUserId = parentUserId, + retryAction = action, + featureContext = featureContext, + timestamp = timestamp.inMillis, + quotedTweet = quotedTweet, + quotedUser = quotedUser, + geoSearchRequestId = geoSearchRequestId, + additionalContext = additionalContext, + transientContext = transientContext, + quoterHasAlreadyQuotedTweet = Some(quoterHasAlreadyQuotedTweet), + initialTweetUpdateRequest = initialTweetUpdateRequest, + noteTweetMentionedUserIds = noteTweetMentionedUserIds + ) + + override def toTweetEventData: Seq[TweetEventData] = + Seq( + TweetEventData.TweetCreateEvent( + TweetCreateEvent( + tweet = scrub(tweet), + user = user, + sourceUser = sourceUser, + sourceTweet = sourceTweet.map(scrub), + retweetParentUserId = parentUserId, + quotedTweet = publicQuotedTweet.map(scrub), + quotedUser = publicQuotedUser, + additionalContext = additionalContext, + transientContext = transientContext, + quoterHasAlreadyQuotedTweet = Some(quoterHasAlreadyQuotedTweet) + ) + ) + ) + + override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = + service.asyncInsert(toAsyncRequest(Some(action))) + } + + case class RetryEvent(action: AsyncWriteAction, event: Event) + extends TweetStoreRetryEvent[Event] { + + override val eventType: AsyncWriteEventType.Insert.type = AsyncWriteEventType.Insert + override val scribedTweetOnFailure: Option[Tweet] = Some(event.tweet) + } + + trait Store { + val asyncInsertTweet: FutureEffect[Event] + val retryAsyncInsertTweet: FutureEffect[TweetStoreRetryEvent[Event]] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val asyncInsertTweet: FutureEffect[Event] = wrap(underlying.asyncInsertTweet) + override val retryAsyncInsertTweet: FutureEffect[TweetStoreRetryEvent[Event]] = wrap( + underlying.retryAsyncInsertTweet) + } + + object Store { + def apply( + replicatingStore: ReplicatingTweetStore, + indexingStore: TweetIndexingStore, + tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore, + timelineUpdatingStore: TlsTimelineUpdatingStore, + eventBusEnqueueStore: TweetEventBusStore, + fanoutServiceStore: FanoutServiceStore, + scribeMediaTagStore: ScribeMediaTagStore, + userGeotagUpdateStore: GizmoduckUserGeotagUpdateStore, + geoSearchRequestIDStore: GeoSearchRequestIDStore + ): Store = { + val stores: Seq[Store] = + Seq( + replicatingStore, + indexingStore, + timelineUpdatingStore, + eventBusEnqueueStore, + fanoutServiceStore, + userGeotagUpdateStore, + tweetCountsUpdatingStore, + scribeMediaTagStore, + geoSearchRequestIDStore + ) + + def build[E <: TweetStoreEvent](extract: Store => FutureEffect[E]): FutureEffect[E] = + FutureEffect.inParallel[E](stores.map(extract): _*) + + new Store { + override val asyncInsertTweet: FutureEffect[Event] = build(_.asyncInsertTweet) + override val retryAsyncInsertTweet: FutureEffect[TweetStoreRetryEvent[Event]] = build( + _.retryAsyncInsertTweet) + } + } + } +} + +object ReplicatedInsertTweet extends TweetStore.ReplicatedModule { + + case class Event( + tweet: Tweet, + cachedTweet: CachedTweet, + quoterHasAlreadyQuotedTweet: Boolean = false, + initialTweetUpdateRequest: Option[InitialTweetUpdateRequest] = None) + extends ReplicatedTweetStoreEvent("replicated_insert_tweet") + + trait Store { + val replicatedInsertTweet: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedInsertTweet: FutureEffect[Event] = wrap(underlying.replicatedInsertTweet) + } + + object Store { + def apply( + cachingTweetStore: CachingTweetStore, + tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore + ): Store = { + new Store { + override val replicatedInsertTweet: FutureEffect[Event] = + FutureEffect.inParallel( + cachingTweetStore.replicatedInsertTweet, + tweetCountsUpdatingStore.replicatedInsertTweet.ignoreFailures + ) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LimiterStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LimiterStore.scala new file mode 100644 index 000000000..fa71a7967 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LimiterStore.scala @@ -0,0 +1,41 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.backends.LimiterService +import com.twitter.tweetypie.thriftscala._ + +trait LimiterStore extends TweetStoreBase[LimiterStore] with InsertTweet.Store { + def wrap(w: TweetStore.Wrap): LimiterStore = + new TweetStoreWrapper(w, this) with LimiterStore with InsertTweet.StoreWrapper +} + +object LimiterStore { + def apply( + incrementCreateSuccess: LimiterService.IncrementByOne, + incrementMediaTags: LimiterService.Increment + ): LimiterStore = + new LimiterStore { + override val insertTweet: FutureEffect[InsertTweet.Event] = + FutureEffect[InsertTweet.Event] { event => + Future.when(!event.dark) { + val userId = event.user.id + val contributorUserId: Option[UserId] = event.tweet.contributor.map(_.userId) + + val mediaTags = getMediaTagMap(event.tweet) + val mediaTagCount = countDistinctUserMediaTags(mediaTags) + Future + .join( + incrementCreateSuccess(userId, contributorUserId), + incrementMediaTags(userId, contributorUserId, mediaTagCount) + ) + .unit + } + } + } + + def countDistinctUserMediaTags(mediaTags: Map[MediaId, Seq[MediaTag]]): Int = + mediaTags.values.flatten.toSeq + .collect { case MediaTag(MediaTagType.User, Some(userId), _, _) => userId } + .distinct + .size +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LogLensStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LogLensStore.scala new file mode 100644 index 000000000..67b69691e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LogLensStore.scala @@ -0,0 +1,169 @@ +package com.twitter.tweetypie +package store + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.twitter.finagle.tracing.Trace +import com.twitter.tweetypie.additionalfields.AdditionalFields +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.media.Media.ownMedia + +trait LogLensStore + extends TweetStoreBase[LogLensStore] + with InsertTweet.Store + with DeleteTweet.Store + with UndeleteTweet.Store + with SetAdditionalFields.Store + with DeleteAdditionalFields.Store + with ScrubGeo.Store + with Takedown.Store + with UpdatePossiblySensitiveTweet.Store { + def wrap(w: TweetStore.Wrap): LogLensStore = + new TweetStoreWrapper(w, this) + with LogLensStore + with InsertTweet.StoreWrapper + with DeleteTweet.StoreWrapper + with UndeleteTweet.StoreWrapper + with SetAdditionalFields.StoreWrapper + with DeleteAdditionalFields.StoreWrapper + with ScrubGeo.StoreWrapper + with Takedown.StoreWrapper + with UpdatePossiblySensitiveTweet.StoreWrapper +} + +object LogLensStore { + def apply( + tweetCreationsLogger: Logger, + tweetDeletionsLogger: Logger, + tweetUndeletionsLogger: Logger, + tweetUpdatesLogger: Logger, + clientIdHelper: ClientIdHelper, + ): LogLensStore = + new LogLensStore { + private[this] val mapper = new ObjectMapper().registerModule(DefaultScalaModule) + + private def logMessage(logger: Logger, data: (String, Any)*): Future[Unit] = + Future { + val allData = data ++ defaultData + val msg = mapper.writeValueAsString(Map(allData: _*)) + logger.info(msg) + } + + // Note: Longs are logged as strings to avoid JSON 53-bit numeric truncation + private def defaultData: Seq[(String, Any)] = { + val viewer = TwitterContext() + Seq( + "client_id" -> getOpt(clientIdHelper.effectiveClientId), + "service_id" -> getOpt(clientIdHelper.effectiveServiceIdentifier), + "trace_id" -> Trace.id.traceId.toString, + "audit_ip" -> getOpt(viewer.flatMap(_.auditIp)), + "application_id" -> getOpt(viewer.flatMap(_.clientApplicationId).map(_.toString)), + "user_agent" -> getOpt(viewer.flatMap(_.userAgent)), + "authenticated_user_id" -> getOpt(viewer.flatMap(_.authenticatedUserId).map(_.toString)) + ) + } + + private def getOpt[A](opt: Option[A]): Any = + opt.getOrElse(null) + + override val insertTweet: FutureEffect[InsertTweet.Event] = + FutureEffect[InsertTweet.Event] { event => + logMessage( + tweetCreationsLogger, + "type" -> "create_tweet", + "tweet_id" -> event.tweet.id.toString, + "user_id" -> event.user.id.toString, + "source_tweet_id" -> getOpt(event.sourceTweet.map(_.id.toString)), + "source_user_id" -> getOpt(event.sourceUser.map(_.id.toString)), + "directed_at_user_id" -> getOpt(getDirectedAtUser(event.tweet).map(_.userId.toString)), + "reply_to_tweet_id" -> getOpt( + getReply(event.tweet).flatMap(_.inReplyToStatusId).map(_.toString)), + "reply_to_user_id" -> getOpt(getReply(event.tweet).map(_.inReplyToUserId.toString)), + "media_ids" -> ownMedia(event.tweet).map(_.mediaId.toString) + ) + } + + override val deleteTweet: FutureEffect[DeleteTweet.Event] = + FutureEffect[DeleteTweet.Event] { event => + logMessage( + tweetDeletionsLogger, + "type" -> "delete_tweet", + "tweet_id" -> event.tweet.id.toString, + "user_id" -> getOpt(event.user.map(_.id.toString)), + "source_tweet_id" -> getOpt(getShare(event.tweet).map(_.sourceStatusId.toString)), + "by_user_id" -> getOpt(event.byUserId.map(_.toString)), + "passthrough_audit_ip" -> getOpt(event.auditPassthrough.flatMap(_.host)), + "media_ids" -> ownMedia(event.tweet).map(_.mediaId.toString), + "cascaded_from_tweet_id" -> getOpt(event.cascadedFromTweetId.map(_.toString)) + ) + } + + override val undeleteTweet: FutureEffect[UndeleteTweet.Event] = + FutureEffect[UndeleteTweet.Event] { event => + logMessage( + tweetUndeletionsLogger, + "type" -> "undelete_tweet", + "tweet_id" -> event.tweet.id.toString, + "user_id" -> event.user.id.toString, + "source_tweet_id" -> getOpt(getShare(event.tweet).map(_.sourceStatusId.toString)), + "media_ids" -> ownMedia(event.tweet).map(_.mediaId.toString) + ) + } + + override val setAdditionalFields: FutureEffect[SetAdditionalFields.Event] = + FutureEffect[SetAdditionalFields.Event] { event => + logMessage( + tweetUpdatesLogger, + "type" -> "set_additional_fields", + "tweet_id" -> event.additionalFields.id.toString, + "field_ids" -> AdditionalFields.nonEmptyAdditionalFieldIds(event.additionalFields) + ) + } + + override val deleteAdditionalFields: FutureEffect[DeleteAdditionalFields.Event] = + FutureEffect[DeleteAdditionalFields.Event] { event => + logMessage( + tweetUpdatesLogger, + "type" -> "delete_additional_fields", + "tweet_id" -> event.tweetId.toString, + "field_ids" -> event.fieldIds + ) + } + + override val scrubGeo: FutureEffect[ScrubGeo.Event] = + FutureEffect[ScrubGeo.Event] { event => + Future.join( + event.tweetIds.map { tweetId => + logMessage( + tweetUpdatesLogger, + "type" -> "scrub_geo", + "tweet_id" -> tweetId.toString, + "user_id" -> event.userId.toString + ) + } + ) + } + + override val takedown: FutureEffect[Takedown.Event] = + FutureEffect[Takedown.Event] { event => + logMessage( + tweetUpdatesLogger, + "type" -> "takedown", + "tweet_id" -> event.tweet.id.toString, + "user_id" -> getUserId(event.tweet).toString, + "reasons" -> event.takedownReasons + ) + } + + override val updatePossiblySensitiveTweet: FutureEffect[UpdatePossiblySensitiveTweet.Event] = + FutureEffect[UpdatePossiblySensitiveTweet.Event] { event => + logMessage( + tweetUpdatesLogger, + "type" -> "update_possibly_sensitive_tweet", + "tweet_id" -> event.tweet.id.toString, + "nsfw_admin" -> TweetLenses.nsfwAdmin(event.tweet), + "nsfw_user" -> TweetLenses.nsfwUser(event.tweet) + ) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ManhattanTweetStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ManhattanTweetStore.scala new file mode 100644 index 000000000..6eaa65eee --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ManhattanTweetStore.scala @@ -0,0 +1,231 @@ +/** Copyright 2010 Twitter, Inc. */ +package com.twitter.tweetypie +package store + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.additionalfields.AdditionalFields +import com.twitter.tweetypie.storage.Field +import com.twitter.tweetypie.storage.Response.TweetResponse +import com.twitter.tweetypie.storage.Response.TweetResponseCode +import com.twitter.tweetypie.storage.TweetStorageClient +import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet +import com.twitter.tweetypie.storage.TweetStorageException +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.Future + +case class UpdateTweetNotFoundException(tweetId: TweetId) extends Exception + +trait ManhattanTweetStore + extends TweetStoreBase[ManhattanTweetStore] + with InsertTweet.Store + with AsyncDeleteTweet.Store + with ScrubGeo.Store + with SetAdditionalFields.Store + with DeleteAdditionalFields.Store + with AsyncDeleteAdditionalFields.Store + with Takedown.Store + with UpdatePossiblySensitiveTweet.Store + with AsyncUpdatePossiblySensitiveTweet.Store { + def wrap(w: TweetStore.Wrap): ManhattanTweetStore = + new TweetStoreWrapper(w, this) + with ManhattanTweetStore + with InsertTweet.StoreWrapper + with AsyncDeleteTweet.StoreWrapper + with ScrubGeo.StoreWrapper + with SetAdditionalFields.StoreWrapper + with DeleteAdditionalFields.StoreWrapper + with AsyncDeleteAdditionalFields.StoreWrapper + with Takedown.StoreWrapper + with UpdatePossiblySensitiveTweet.StoreWrapper + with AsyncUpdatePossiblySensitiveTweet.StoreWrapper +} + +/** + * A TweetStore implementation that writes to Manhattan. + */ +object ManhattanTweetStore { + val Action: AsyncWriteAction.TbirdUpdate.type = AsyncWriteAction.TbirdUpdate + + private val log = Logger(getClass) + private val successResponses = Set(TweetResponseCode.Success, TweetResponseCode.Deleted) + + case class AnnotationFailure(message: String) extends Exception(message) + + def apply(tweetStorageClient: TweetStorageClient): ManhattanTweetStore = { + + def handleStorageResponses( + responsesStitch: Stitch[Seq[TweetResponse]], + action: String + ): Future[Unit] = + Stitch + .run(responsesStitch) + .onFailure { + case ex: TweetStorageException => log.warn("failed on: " + action, ex) + case _ => + } + .flatMap { responses => + Future.when(responses.exists(resp => !successResponses(resp.overallResponse))) { + Future.exception(AnnotationFailure(s"$action gets failure response $responses")) + } + } + + def updateTweetMediaIds(mutation: Mutation[MediaEntity]): Tweet => Tweet = + tweet => tweet.copy(media = tweet.media.map(entities => entities.map(mutation.endo))) + + /** + * Does a get and set, and only sets fields that are allowed to be + * changed. This also prevents incoming tweets containing incomplete + * fields from being saved to Manhattan. + */ + def updateOneTweetByIdAction(tweetId: TweetId, copyFields: Tweet => Tweet): Future[Unit] = { + Stitch.run { + tweetStorageClient.getTweet(tweetId).flatMap { + case GetTweet.Response.Found(tweet) => + val updatedTweet = copyFields(tweet) + + if (updatedTweet != tweet) { + tweetStorageClient.addTweet(updatedTweet) + } else { + Stitch.Unit + } + case _ => Stitch.exception(UpdateTweetNotFoundException(tweetId)) + } + } + } + + // This should NOT be used in parallel with other write operations. + // A race condition can occur after changes to the storage library to + // return all additional fields. The resulting behavior can cause + // fields that were modified by other writes to revert to their old value. + def updateOneTweetAction(update: Tweet, copyFields: Tweet => Tweet => Tweet): Future[Unit] = + updateOneTweetByIdAction(update.id, copyFields(update)) + + def tweetStoreUpdateTweet(tweet: Tweet): Future[Unit] = { + val setFields = AdditionalFields.nonEmptyAdditionalFieldIds(tweet).map(Field.additionalField) + handleStorageResponses( + tweetStorageClient.updateTweet(tweet, setFields).map(Seq(_)), + s"updateTweet($tweet, $setFields)" + ) + } + + // This is an edit so update the initial Tweet's control + def updateInitialTweet(event: InsertTweet.Event): Future[Unit] = { + event.initialTweetUpdateRequest match { + case Some(request) => + updateOneTweetByIdAction( + request.initialTweetId, + tweet => InitialTweetUpdate.updateTweet(tweet, request) + ) + case None => Future.Unit + } + } + + new ManhattanTweetStore { + override val insertTweet: FutureEffect[InsertTweet.Event] = + FutureEffect[InsertTweet.Event] { event => + Stitch + .run( + tweetStorageClient.addTweet(event.internalTweet.tweet) + ).flatMap(_ => updateInitialTweet(event)) + } + + override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + FutureEffect[AsyncDeleteTweet.Event] { event => + if (event.isBounceDelete) { + Stitch.run(tweetStorageClient.bounceDelete(event.tweet.id)) + } else { + Stitch.run(tweetStorageClient.softDelete(event.tweet.id)) + } + } + + override val retryAsyncDeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncDeleteTweet) + + override val scrubGeo: FutureEffect[ScrubGeo.Event] = + FutureEffect[ScrubGeo.Event] { event => + Stitch.run(tweetStorageClient.scrub(event.tweetIds, Seq(Field.Geo))) + } + + override val setAdditionalFields: FutureEffect[SetAdditionalFields.Event] = + FutureEffect[SetAdditionalFields.Event] { event => + tweetStoreUpdateTweet(event.additionalFields) + } + + override val deleteAdditionalFields: FutureEffect[DeleteAdditionalFields.Event] = + FutureEffect[DeleteAdditionalFields.Event] { event => + handleStorageResponses( + tweetStorageClient.deleteAdditionalFields( + Seq(event.tweetId), + event.fieldIds.map(Field.additionalField) + ), + s"deleteAdditionalFields(${event.tweetId}, ${event.fieldIds}})" + ) + } + + override val asyncDeleteAdditionalFields: FutureEffect[AsyncDeleteAdditionalFields.Event] = + FutureEffect[AsyncDeleteAdditionalFields.Event] { event => + handleStorageResponses( + tweetStorageClient.deleteAdditionalFields( + Seq(event.tweetId), + event.fieldIds.map(Field.additionalField) + ), + s"deleteAdditionalFields(Seq(${event.tweetId}), ${event.fieldIds}})" + ) + } + + override val retryAsyncDeleteAdditionalFields: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteAdditionalFields.Event] + ] = + TweetStore.retry(Action, asyncDeleteAdditionalFields) + + override val takedown: FutureEffect[Takedown.Event] = + FutureEffect[Takedown.Event] { event => + val (fieldsToUpdate, fieldsToDelete) = + Seq( + Field.TweetypieOnlyTakedownCountryCodes, + Field.TweetypieOnlyTakedownReasons + ).filter(_ => event.updateCodesAndReasons) + .partition(f => event.tweet.getFieldBlob(f.id).isDefined) + + val allFieldsToUpdate = Seq(Field.HasTakedown) ++ fieldsToUpdate + + Future + .join( + handleStorageResponses( + tweetStorageClient + .updateTweet(event.tweet, allFieldsToUpdate) + .map(Seq(_)), + s"updateTweet(${event.tweet}, $allFieldsToUpdate)" + ), + Future.when(fieldsToDelete.nonEmpty) { + handleStorageResponses( + tweetStorageClient + .deleteAdditionalFields(Seq(event.tweet.id), fieldsToDelete), + s"deleteAdditionalFields(Seq(${event.tweet.id}), $fieldsToDelete)" + ) + } + ).unit + } + + override val updatePossiblySensitiveTweet: FutureEffect[UpdatePossiblySensitiveTweet.Event] = + FutureEffect[UpdatePossiblySensitiveTweet.Event] { event => + updateOneTweetAction(event.tweet, TweetUpdate.copyNsfwFieldsForUpdate) + } + + override val asyncUpdatePossiblySensitiveTweet: FutureEffect[ + AsyncUpdatePossiblySensitiveTweet.Event + ] = + FutureEffect[AsyncUpdatePossiblySensitiveTweet.Event] { event => + updateOneTweetAction(event.tweet, TweetUpdate.copyNsfwFieldsForUpdate) + } + + override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUpdatePossiblySensitiveTweet.Event] + ] = + TweetStore.retry(Action, asyncUpdatePossiblySensitiveTweet) + + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaIndexHelper.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaIndexHelper.scala new file mode 100644 index 000000000..4efe22706 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaIndexHelper.scala @@ -0,0 +1,34 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.thriftscala._ +import scala.util.matching.Regex + +object MediaIndexHelper { + + /** + * Which tweets should we treat as "media" tweets? + * + * Any tweet that is not a retweet and any of: + * - Is explicitly marked as a media tweet. + * - Has a media entity. + * - Includes a partner media URL. + */ + def apply(partnerMediaRegexes: Seq[Regex]): Tweet => Boolean = { + val isPartnerUrl = partnerUrlMatcher(partnerMediaRegexes) + + tweet => + getShare(tweet).isEmpty && + (hasMediaFlagSet(tweet) || + getMedia(tweet).nonEmpty || + getUrls(tweet).exists(isPartnerUrl)) + } + + def partnerUrlMatcher(partnerMediaRegexes: Seq[Regex]): UrlEntity => Boolean = + _.expanded.exists { expandedUrl => + partnerMediaRegexes.exists(_.findFirstIn(expandedUrl).isDefined) + } + + def hasMediaFlagSet(tweet: Tweet): Boolean = + tweet.coreData.flatMap(_.hasMedia).getOrElse(false) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaServiceStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaServiceStore.scala new file mode 100644 index 000000000..f2f427c3c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaServiceStore.scala @@ -0,0 +1,62 @@ +package com.twitter.tweetypie +package store + +import com.twitter.mediaservices.commons.thriftscala.MediaKey +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.media._ +import com.twitter.tweetypie.thriftscala._ + +trait MediaServiceStore + extends TweetStoreBase[MediaServiceStore] + with AsyncDeleteTweet.Store + with AsyncUndeleteTweet.Store { + def wrap(w: TweetStore.Wrap): MediaServiceStore = + new TweetStoreWrapper(w, this) + with MediaServiceStore + with AsyncDeleteTweet.StoreWrapper + with AsyncUndeleteTweet.StoreWrapper +} + +object MediaServiceStore { + val Action: AsyncWriteAction.MediaDeletion.type = AsyncWriteAction.MediaDeletion + + private def ownMedia(t: Tweet): Seq[(MediaKey, TweetId)] = + getMedia(t) + .collect { + case m if Media.isOwnMedia(t.id, m) => (MediaKeyUtil.get(m), t.id) + } + + def apply( + deleteMedia: FutureArrow[DeleteMediaRequest, Unit], + undeleteMedia: FutureArrow[UndeleteMediaRequest, Unit] + ): MediaServiceStore = + new MediaServiceStore { + override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + FutureEffect[AsyncDeleteTweet.Event] { e => + Future.when(!isRetweet(e.tweet)) { + val ownMediaKeys: Seq[(MediaKey, TweetId)] = ownMedia(e.tweet) + val deleteMediaRequests = ownMediaKeys.map(DeleteMediaRequest.tupled) + Future.collect(deleteMediaRequests.map(deleteMedia)) + } + } + + override val retryAsyncDeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncDeleteTweet) + + override val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = + FutureEffect[AsyncUndeleteTweet.Event] { e => + Future.when(!isRetweet(e.tweet)) { + val ownMediaKeys: Seq[(MediaKey, TweetId)] = ownMedia(e.tweet) + val unDeleteMediaRequests = ownMediaKeys.map(UndeleteMediaRequest.tupled) + Future.collect(unDeleteMediaRequests.map(undeleteMedia)) + } + } + + override val retryAsyncUndeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUndeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncUndeleteTweet) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetDelete.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetDelete.scala new file mode 100644 index 000000000..68a6283d7 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetDelete.scala @@ -0,0 +1,45 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.thriftscala._ + +object QuotedTweetDelete extends TweetStore.SyncModule { + + case class Event( + quotingTweetId: TweetId, + quotingUserId: UserId, + quotedTweetId: TweetId, + quotedUserId: UserId, + timestamp: Time, + optUser: Option[User] = None) + extends SyncTweetStoreEvent("quoted_tweet_delete") + with TweetStoreTweetEvent { + + override def toTweetEventData: Seq[TweetEventData] = + Seq( + TweetEventData.QuotedTweetDeleteEvent( + QuotedTweetDeleteEvent( + quotingTweetId = quotingTweetId, + quotingUserId = quotingUserId, + quotedTweetId = quotedTweetId, + quotedUserId = quotedUserId + ) + ) + ) + } + + trait Store { + val quotedTweetDelete: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val quotedTweetDelete: FutureEffect[Event] = wrap(underlying.quotedTweetDelete) + } + + object Store { + def apply(eventBusEnqueueStore: TweetEventBusStore): Store = + new Store { + override val quotedTweetDelete: FutureEffect[Event] = eventBusEnqueueStore.quotedTweetDelete + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetOps.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetOps.scala new file mode 100644 index 000000000..34fa71aa6 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetOps.scala @@ -0,0 +1,33 @@ +package com.twitter.tweetypie +package store + +/** + * Mixin that implements public quoted tweet and public quoted user + * filtering for tweet events that have quoted tweets and users. + */ +trait QuotedTweetOps { + def quotedTweet: Option[Tweet] + def quotedUser: Option[User] + + /** + * Do we have evidence that the quoted user is unprotected? + */ + def quotedUserIsPublic: Boolean = + // The quoted user should include the `safety` struct, but if it + // doesn't for any reason then the quoted tweet and quoted user + // should not be included in the events. This is a safety measure to + // avoid leaking private information. + quotedUser.exists(_.safety.exists(!_.isProtected)) + + /** + * The quoted tweet, filtered as it should appear through public APIs. + */ + def publicQuotedTweet: Option[Tweet] = + if (quotedUserIsPublic) quotedTweet else None + + /** + * The quoted user, filtered as it should appear through public APIs. + */ + def publicQuotedUser: Option[User] = + if (quotedUserIsPublic) quotedUser else None +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetTakedown.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetTakedown.scala new file mode 100644 index 000000000..4b73437cb --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetTakedown.scala @@ -0,0 +1,51 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tseng.withholding.thriftscala.TakedownReason +import com.twitter.tweetypie.thriftscala._ + +object QuotedTweetTakedown extends TweetStore.SyncModule { + + case class Event( + quotingTweetId: TweetId, + quotingUserId: UserId, + quotedTweetId: TweetId, + quotedUserId: UserId, + takedownCountryCodes: Seq[String], + takedownReasons: Seq[TakedownReason], + timestamp: Time, + optUser: Option[User] = None) + extends SyncTweetStoreEvent("quoted_tweet_takedown") + with TweetStoreTweetEvent { + + override def toTweetEventData: Seq[TweetEventData] = + Seq( + TweetEventData.QuotedTweetTakedownEvent( + QuotedTweetTakedownEvent( + quotingTweetId = quotingTweetId, + quotingUserId = quotingUserId, + quotedTweetId = quotedTweetId, + quotedUserId = quotedUserId, + takedownCountryCodes = takedownCountryCodes, + takedownReasons = takedownReasons + ) + ) + ) + } + + trait Store { + val quotedTweetTakedown: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val quotedTweetTakedown: FutureEffect[Event] = wrap(underlying.quotedTweetTakedown) + } + + object Store { + def apply(eventBusEnqueueStore: TweetEventBusStore): Store = + new Store { + override val quotedTweetTakedown: FutureEffect[Event] = + eventBusEnqueueStore.quotedTweetTakedown + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ReplicatingTweetStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ReplicatingTweetStore.scala new file mode 100644 index 000000000..333103447 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ReplicatingTweetStore.scala @@ -0,0 +1,180 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.thriftscala._ + +/** + * A TweetStore that sends write events to the replication endpoints + * of a ThriftTweetService. + * + * The events that are sent are sufficient to keep the other + * instance's caches up to date. The calls contain sufficient data so + * that the remote caches can be updated without requiring the remote + * Tweetypie to access any other services. + * + * The replication services two purposes: + * + * 1. Maintain consistency between caches in different data centers. + * + * 2. Keep the caches in all data centers warm, protecting backend + * services. + * + * Correctness bugs are worse than bugs that make data less available. + * All of these events affect data consistency. + * + * IncrFavCount.Event and InsertEvents are the least important + * from a data consistency standpoint, because the only data + * consistency issues are counts, which are cached for a shorter time, + * and are not as noticable to end users if they fail to occur. + * (Failure to apply them is both less severe and self-correcting.) + * + * Delete and GeoScrub events are critical, because the cached data + * has a long expiration and failure to apply them can result in + * violations of user privacy. + * + * Update events are also important from a legal perspective, since + * the update may be updating the per-country take-down status. + * + * @param svc: The ThriftTweetService implementation that will receive the + * replication events. In practice, this will usually be a + * deferredrpc service. + */ +trait ReplicatingTweetStore + extends TweetStoreBase[ReplicatingTweetStore] + with AsyncInsertTweet.Store + with AsyncDeleteTweet.Store + with AsyncUndeleteTweet.Store + with AsyncSetRetweetVisibility.Store + with AsyncSetAdditionalFields.Store + with AsyncDeleteAdditionalFields.Store + with ScrubGeo.Store + with IncrFavCount.Store + with IncrBookmarkCount.Store + with AsyncTakedown.Store + with AsyncUpdatePossiblySensitiveTweet.Store { + def wrap(w: TweetStore.Wrap): ReplicatingTweetStore = + new TweetStoreWrapper(w, this) + with ReplicatingTweetStore + with AsyncInsertTweet.StoreWrapper + with AsyncDeleteTweet.StoreWrapper + with AsyncUndeleteTweet.StoreWrapper + with AsyncSetRetweetVisibility.StoreWrapper + with AsyncSetAdditionalFields.StoreWrapper + with AsyncDeleteAdditionalFields.StoreWrapper + with ScrubGeo.StoreWrapper + with IncrFavCount.StoreWrapper + with IncrBookmarkCount.StoreWrapper + with AsyncTakedown.StoreWrapper + with AsyncUpdatePossiblySensitiveTweet.StoreWrapper +} + +object ReplicatingTweetStore { + + val Action: AsyncWriteAction.Replication.type = AsyncWriteAction.Replication + + def apply( + svc: ThriftTweetService + ): ReplicatingTweetStore = + new ReplicatingTweetStore { + override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = + FutureEffect[AsyncInsertTweet.Event] { e => + svc.replicatedInsertTweet2( + ReplicatedInsertTweet2Request( + e.cachedTweet, + initialTweetUpdateRequest = e.initialTweetUpdateRequest + )) + } + + override val retryAsyncInsertTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncInsertTweet.Event] + ] = + TweetStore.retry(Action, asyncInsertTweet) + + override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + FutureEffect[AsyncDeleteTweet.Event] { e => + svc.replicatedDeleteTweet2( + ReplicatedDeleteTweet2Request( + tweet = e.tweet, + isErasure = e.isUserErasure, + isBounceDelete = e.isBounceDelete + ) + ) + } + + override val retryAsyncDeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncDeleteTweet) + + override val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = + FutureEffect[AsyncUndeleteTweet.Event] { e => + svc.replicatedUndeleteTweet2(ReplicatedUndeleteTweet2Request(e.cachedTweet)) + } + + override val retryAsyncUndeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUndeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncUndeleteTweet) + + override val asyncSetAdditionalFields: FutureEffect[AsyncSetAdditionalFields.Event] = + FutureEffect[AsyncSetAdditionalFields.Event] { e => + svc.replicatedSetAdditionalFields(SetAdditionalFieldsRequest(e.additionalFields)) + } + + override val retryAsyncSetAdditionalFields: FutureEffect[ + TweetStoreRetryEvent[AsyncSetAdditionalFields.Event] + ] = + TweetStore.retry(Action, asyncSetAdditionalFields) + + override val asyncSetRetweetVisibility: FutureEffect[AsyncSetRetweetVisibility.Event] = + FutureEffect[AsyncSetRetweetVisibility.Event] { e => + svc.replicatedSetRetweetVisibility( + ReplicatedSetRetweetVisibilityRequest(e.srcId, e.visible) + ) + } + + override val retryAsyncSetRetweetVisibility: FutureEffect[ + TweetStoreRetryEvent[AsyncSetRetweetVisibility.Event] + ] = + TweetStore.retry(Action, asyncSetRetweetVisibility) + + override val asyncDeleteAdditionalFields: FutureEffect[AsyncDeleteAdditionalFields.Event] = + FutureEffect[AsyncDeleteAdditionalFields.Event] { e => + svc.replicatedDeleteAdditionalFields( + ReplicatedDeleteAdditionalFieldsRequest(Map(e.tweetId -> e.fieldIds)) + ) + } + + override val retryAsyncDeleteAdditionalFields: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteAdditionalFields.Event] + ] = + TweetStore.retry(Action, asyncDeleteAdditionalFields) + + override val scrubGeo: FutureEffect[ScrubGeo.Event] = + FutureEffect[ScrubGeo.Event](e => svc.replicatedScrubGeo(e.tweetIds)) + + override val incrFavCount: FutureEffect[IncrFavCount.Event] = + FutureEffect[IncrFavCount.Event](e => svc.replicatedIncrFavCount(e.tweetId, e.delta)) + + override val incrBookmarkCount: FutureEffect[IncrBookmarkCount.Event] = + FutureEffect[IncrBookmarkCount.Event](e => + svc.replicatedIncrBookmarkCount(e.tweetId, e.delta)) + + override val asyncTakedown: FutureEffect[AsyncTakedown.Event] = + FutureEffect[AsyncTakedown.Event](e => svc.replicatedTakedown(e.tweet)) + + override val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[AsyncTakedown.Event]] = + TweetStore.retry(Action, asyncTakedown) + + override val asyncUpdatePossiblySensitiveTweet: FutureEffect[ + AsyncUpdatePossiblySensitiveTweet.Event + ] = + FutureEffect[AsyncUpdatePossiblySensitiveTweet.Event](e => + svc.replicatedUpdatePossiblySensitiveTweet(e.tweet)) + + override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUpdatePossiblySensitiveTweet.Event] + ] = + TweetStore.retry(Action, asyncUpdatePossiblySensitiveTweet) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/RetweetArchivalEnqueueStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/RetweetArchivalEnqueueStore.scala new file mode 100644 index 000000000..4720e0317 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/RetweetArchivalEnqueueStore.scala @@ -0,0 +1,38 @@ +package com.twitter.tweetypie.store +import com.twitter.tweetypie.FutureEffect +import com.twitter.tweetypie.thriftscala.AsyncWriteAction +import com.twitter.tweetypie.thriftscala.RetweetArchivalEvent + +trait RetweetArchivalEnqueueStore + extends TweetStoreBase[RetweetArchivalEnqueueStore] + with AsyncSetRetweetVisibility.Store { + def wrap(w: TweetStore.Wrap): RetweetArchivalEnqueueStore = + new TweetStoreWrapper(w, this) + with RetweetArchivalEnqueueStore + with AsyncSetRetweetVisibility.StoreWrapper +} + +object RetweetArchivalEnqueueStore { + + def apply(enqueue: FutureEffect[RetweetArchivalEvent]): RetweetArchivalEnqueueStore = + new RetweetArchivalEnqueueStore { + override val asyncSetRetweetVisibility: FutureEffect[AsyncSetRetweetVisibility.Event] = + FutureEffect[AsyncSetRetweetVisibility.Event] { e => + enqueue( + RetweetArchivalEvent( + retweetId = e.retweetId, + srcTweetId = e.srcId, + retweetUserId = e.retweetUserId, + srcTweetUserId = e.srcTweetUserId, + timestampMs = e.timestamp.inMillis, + isArchivingAction = Some(!e.visible) + ) + ) + } + + override val retryAsyncSetRetweetVisibility: FutureEffect[ + TweetStoreRetryEvent[AsyncSetRetweetVisibility.Event] + ] = + TweetStore.retry(AsyncWriteAction.RetweetArchivalEnqueue, asyncSetRetweetVisibility) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScribeMediaTagStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScribeMediaTagStore.scala new file mode 100644 index 000000000..f610fb5ce --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScribeMediaTagStore.scala @@ -0,0 +1,42 @@ +package com.twitter.tweetypie +package store + +import com.twitter.servo.util.Scribe +import com.twitter.tweetypie.thriftscala.TweetMediaTagEvent + +/** + * Scribes thrift-encoded TweetMediaTagEvents (from tweet_events.thrift). + */ +trait ScribeMediaTagStore extends TweetStoreBase[ScribeMediaTagStore] with AsyncInsertTweet.Store { + def wrap(w: TweetStore.Wrap): ScribeMediaTagStore = + new TweetStoreWrapper(w, this) with ScribeMediaTagStore with AsyncInsertTweet.StoreWrapper +} + +object ScribeMediaTagStore { + + private def toMediaTagEvent(event: AsyncInsertTweet.Event): Option[TweetMediaTagEvent] = { + val tweet = event.tweet + val taggedUserIds = getMediaTagMap(tweet).values.flatten.flatMap(_.userId).toSet + val timestamp = Time.now.inMilliseconds + if (taggedUserIds.nonEmpty) { + Some(TweetMediaTagEvent(tweet.id, getUserId(tweet), taggedUserIds, Some(timestamp))) + } else { + None + } + } + + def apply( + scribe: FutureEffect[String] = Scribe("tweetypie_media_tag_events") + ): ScribeMediaTagStore = + new ScribeMediaTagStore { + override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = + Scribe(TweetMediaTagEvent, scribe) + .contramapOption[AsyncInsertTweet.Event](toMediaTagEvent) + + // we don't retry this action + override val retryAsyncInsertTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncInsertTweet.Event] + ] = + FutureEffect.unit[TweetStoreRetryEvent[AsyncInsertTweet.Event]] + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScrubGeo.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScrubGeo.scala new file mode 100644 index 000000000..262def919 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScrubGeo.scala @@ -0,0 +1,164 @@ +package com.twitter.tweetypie +package store + +import com.twitter.conversions.DurationOps._ +import com.twitter.servo.cache.Cached +import com.twitter.servo.cache.CachedValueStatus +import com.twitter.servo.cache.LockingCache +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.tweetypie.backends.GeoScrubEventStore +import com.twitter.tweetypie.thriftscala._ + +/** + * Scrub geo information from Tweets. + */ +object ScrubGeo extends TweetStore.SyncModule { + + case class Event( + tweetIdSet: Set[TweetId], + userId: UserId, + optUser: Option[User], + timestamp: Time, + enqueueMax: Boolean) + extends SyncTweetStoreEvent("scrub_geo") + with TweetStoreTweetEvent { + + val tweetIds: Seq[TweetId] = tweetIdSet.toSeq + + override def toTweetEventData: Seq[TweetEventData] = + tweetIds.map { tweetId => + TweetEventData.TweetScrubGeoEvent( + TweetScrubGeoEvent( + tweetId = tweetId, + userId = userId + ) + ) + } + } + + trait Store { + val scrubGeo: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val scrubGeo: FutureEffect[Event] = wrap(underlying.scrubGeo) + } + + object Store { + def apply( + logLensStore: LogLensStore, + manhattanStore: ManhattanTweetStore, + cachingTweetStore: CachingTweetStore, + eventBusEnqueueStore: TweetEventBusStore, + replicatingStore: ReplicatingTweetStore + ): Store = + new Store { + override val scrubGeo: FutureEffect[Event] = + FutureEffect.inParallel( + logLensStore.scrubGeo, + manhattanStore.scrubGeo, + cachingTweetStore.scrubGeo, + eventBusEnqueueStore.scrubGeo, + replicatingStore.scrubGeo + ) + } + } +} + +object ReplicatedScrubGeo extends TweetStore.ReplicatedModule { + + case class Event(tweetIds: Seq[TweetId]) extends ReplicatedTweetStoreEvent("replicated_scrub_geo") + + trait Store { + val replicatedScrubGeo: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedScrubGeo: FutureEffect[Event] = wrap(underlying.replicatedScrubGeo) + } + + object Store { + def apply(cachingTweetStore: CachingTweetStore): Store = { + new Store { + override val replicatedScrubGeo: FutureEffect[Event] = + cachingTweetStore.replicatedScrubGeo + } + } + } +} + +/** + * Update the timestamp of the user's most recent request to delete all + * location data attached to her tweets. We use the timestamp to ensure + * that even if we fail to scrub a particular tweet in storage, we will + * not return geo information with that tweet. + * + * See http://go/geoscrub for more details. + */ +object ScrubGeoUpdateUserTimestamp extends TweetStore.SyncModule { + + case class Event(userId: UserId, timestamp: Time, optUser: Option[User]) + extends SyncTweetStoreEvent("scrub_geo_update_user_timestamp") + with TweetStoreTweetEvent { + + def mightHaveGeotaggedStatuses: Boolean = + optUser.forall(_.account.forall(_.hasGeotaggedStatuses == true)) + + def maxTweetId: TweetId = SnowflakeId.firstIdFor(timestamp + 1.millisecond) - 1 + + override def toTweetEventData: Seq[TweetEventData] = + Seq( + TweetEventData.UserScrubGeoEvent( + UserScrubGeoEvent( + userId = userId, + maxTweetId = maxTweetId + ) + ) + ) + + /** + * How to update a geo scrub timestamp cache entry. Always prefers + * the highest timestamp value that is available, regardless of when + * it was added to cache. + */ + def cacheHandler: LockingCache.Handler[Cached[Time]] = { + case Some(c) if c.value.exists(_ >= timestamp) => None + case _ => Some(Cached(Some(timestamp), CachedValueStatus.Found, Time.now)) + } + } + + trait Store { + val scrubGeoUpdateUserTimestamp: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val scrubGeoUpdateUserTimestamp: FutureEffect[Event] = wrap( + underlying.scrubGeoUpdateUserTimestamp) + } + + object Store { + def apply( + geotagUpdateStore: GizmoduckUserGeotagUpdateStore, + tweetEventBusStore: TweetEventBusStore, + setInManhattan: GeoScrubEventStore.SetGeoScrubTimestamp, + cache: LockingCache[UserId, Cached[Time]] + ): Store = { + val manhattanEffect = + setInManhattan.asFutureEffect + .contramap[Event](e => (e.userId, e.timestamp)) + + val cacheEffect = + FutureEffect[Event](e => cache.lockAndSet(e.userId, e.cacheHandler).unit) + + new Store { + override val scrubGeoUpdateUserTimestamp: FutureEffect[Event] = + FutureEffect.inParallel( + manhattanEffect, + cacheEffect, + geotagUpdateStore.scrubGeoUpdateUserTimestamp, + tweetEventBusStore.scrubGeoUpdateUserTimestamp + ) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetAdditionalFields.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetAdditionalFields.scala new file mode 100644 index 000000000..a1dfef0df --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetAdditionalFields.scala @@ -0,0 +1,155 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.thriftscala._ + +object SetAdditionalFields extends TweetStore.SyncModule { + + case class Event(additionalFields: Tweet, userId: UserId, timestamp: Time) + extends SyncTweetStoreEvent("set_additional_fields") { + + def toAsyncRequest: AsyncSetAdditionalFieldsRequest = + AsyncSetAdditionalFieldsRequest( + additionalFields = additionalFields, + userId = userId, + timestamp = timestamp.inMillis + ) + } + + trait Store { + val setAdditionalFields: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val setAdditionalFields: FutureEffect[Event] = wrap(underlying.setAdditionalFields) + } + + object Store { + def apply( + manhattanStore: ManhattanTweetStore, + cachingTweetStore: CachingTweetStore, + asyncEnqueueStore: AsyncEnqueueStore, + logLensStore: LogLensStore + ): Store = + new Store { + override val setAdditionalFields: FutureEffect[Event] = + FutureEffect.sequentially( + logLensStore.setAdditionalFields, + manhattanStore.setAdditionalFields, + // Ignore failures but wait for completion to ensure we attempted to update cache before + // running async tasks, in particular publishing an event to EventBus. + cachingTweetStore.ignoreFailuresUponCompletion.setAdditionalFields, + asyncEnqueueStore.setAdditionalFields + ) + } + } +} + +object AsyncSetAdditionalFields extends TweetStore.AsyncModule { + + object Event { + def fromAsyncRequest( + request: AsyncSetAdditionalFieldsRequest, + user: User + ): TweetStoreEventOrRetry[Event] = + TweetStoreEventOrRetry( + Event( + additionalFields = request.additionalFields, + userId = request.userId, + optUser = Some(user), + timestamp = Time.fromMilliseconds(request.timestamp) + ), + request.retryAction, + RetryEvent + ) + } + + case class Event(additionalFields: Tweet, userId: UserId, optUser: Option[User], timestamp: Time) + extends AsyncTweetStoreEvent("async_set_additional_fields") + with TweetStoreTweetEvent { + + def toAsyncRequest(action: Option[AsyncWriteAction] = None): AsyncSetAdditionalFieldsRequest = + AsyncSetAdditionalFieldsRequest( + additionalFields = additionalFields, + retryAction = action, + userId = userId, + timestamp = timestamp.inMillis + ) + + override def toTweetEventData: Seq[TweetEventData] = + Seq( + TweetEventData.AdditionalFieldUpdateEvent( + AdditionalFieldUpdateEvent( + updatedFields = additionalFields, + userId = optUser.map(_.id) + ) + ) + ) + + override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = + service.asyncSetAdditionalFields(toAsyncRequest(Some(action))) + } + + case class RetryEvent(action: AsyncWriteAction, event: Event) + extends TweetStoreRetryEvent[Event] { + + override val eventType: AsyncWriteEventType.SetAdditionalFields.type = + AsyncWriteEventType.SetAdditionalFields + override val scribedTweetOnFailure: None.type = None + } + + trait Store { + val asyncSetAdditionalFields: FutureEffect[Event] + val retryAsyncSetAdditionalFields: FutureEffect[TweetStoreRetryEvent[Event]] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val asyncSetAdditionalFields: FutureEffect[Event] = wrap( + underlying.asyncSetAdditionalFields) + override val retryAsyncSetAdditionalFields: FutureEffect[TweetStoreRetryEvent[Event]] = wrap( + underlying.retryAsyncSetAdditionalFields) + } + + object Store { + def apply( + replicatingStore: ReplicatingTweetStore, + eventBusEnqueueStore: TweetEventBusStore + ): Store = { + val stores: Seq[Store] = Seq(replicatingStore, eventBusEnqueueStore) + + def build[E <: TweetStoreEvent](extract: Store => FutureEffect[E]): FutureEffect[E] = + FutureEffect.inParallel[E](stores.map(extract): _*) + + new Store { + override val asyncSetAdditionalFields: FutureEffect[Event] = build( + _.asyncSetAdditionalFields) + override val retryAsyncSetAdditionalFields: FutureEffect[TweetStoreRetryEvent[Event]] = + build(_.retryAsyncSetAdditionalFields) + } + } + } +} + +object ReplicatedSetAdditionalFields extends TweetStore.ReplicatedModule { + + case class Event(additionalFields: Tweet) + extends ReplicatedTweetStoreEvent("replicated_set_additional_fields") + + trait Store { + val replicatedSetAdditionalFields: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedSetAdditionalFields: FutureEffect[Event] = wrap( + underlying.replicatedSetAdditionalFields) + } + + object Store { + def apply(cachingTweetStore: CachingTweetStore): Store = { + new Store { + override val replicatedSetAdditionalFields: FutureEffect[Event] = + cachingTweetStore.replicatedSetAdditionalFields + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetRetweetVisibility.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetRetweetVisibility.scala new file mode 100644 index 000000000..7f4736f15 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetRetweetVisibility.scala @@ -0,0 +1,172 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.thriftscala._ + +object SetRetweetVisibility extends TweetStore.SyncModule { + + case class Event( + retweetId: TweetId, + visible: Boolean, + srcId: TweetId, + retweetUserId: UserId, + srcTweetUserId: UserId, + timestamp: Time) + extends SyncTweetStoreEvent("set_retweet_visibility") { + def toAsyncRequest: AsyncSetRetweetVisibilityRequest = + AsyncSetRetweetVisibilityRequest( + retweetId = retweetId, + visible = visible, + srcId = srcId, + retweetUserId = retweetUserId, + sourceTweetUserId = srcTweetUserId, + timestamp = timestamp.inMillis + ) + } + + trait Store { + val setRetweetVisibility: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + val setRetweetVisibility: FutureEffect[Event] = wrap(underlying.setRetweetVisibility) + } + + object Store { + + /** + * [[AsyncEnqueueStore]] - use this store to call the asyncSetRetweetVisibility endpoint. + * + * @see [[AsyncSetRetweetVisibility.Store.apply]] + */ + def apply(asyncEnqueueStore: AsyncEnqueueStore): Store = + new Store { + override val setRetweetVisibility: FutureEffect[Event] = + asyncEnqueueStore.setRetweetVisibility + } + } +} + +object AsyncSetRetweetVisibility extends TweetStore.AsyncModule { + + case class Event( + retweetId: TweetId, + visible: Boolean, + srcId: TweetId, + retweetUserId: UserId, + srcTweetUserId: UserId, + timestamp: Time) + extends AsyncTweetStoreEvent("async_set_retweet_visibility") { + def toAsyncRequest(action: Option[AsyncWriteAction] = None): AsyncSetRetweetVisibilityRequest = + AsyncSetRetweetVisibilityRequest( + retweetId = retweetId, + visible = visible, + srcId = srcId, + retweetUserId = retweetUserId, + sourceTweetUserId = srcTweetUserId, + retryAction = action, + timestamp = timestamp.inMillis + ) + + override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = + service.asyncSetRetweetVisibility(toAsyncRequest(Some(action))) + } + + object Event { + def fromAsyncRequest(req: AsyncSetRetweetVisibilityRequest): TweetStoreEventOrRetry[Event] = + TweetStoreEventOrRetry( + AsyncSetRetweetVisibility.Event( + retweetId = req.retweetId, + visible = req.visible, + srcId = req.srcId, + retweetUserId = req.retweetUserId, + srcTweetUserId = req.sourceTweetUserId, + timestamp = Time.fromMilliseconds(req.timestamp) + ), + req.retryAction, + RetryEvent + ) + } + + case class RetryEvent(action: AsyncWriteAction, event: Event) + extends TweetStoreRetryEvent[Event] { + + override val eventType: AsyncWriteEventType.SetRetweetVisibility.type = + AsyncWriteEventType.SetRetweetVisibility + override val scribedTweetOnFailure: None.type = None + } + + trait Store { + val asyncSetRetweetVisibility: FutureEffect[Event] + val retryAsyncSetRetweetVisibility: FutureEffect[TweetStoreRetryEvent[Event]] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + val asyncSetRetweetVisibility: FutureEffect[Event] = wrap(underlying.asyncSetRetweetVisibility) + val retryAsyncSetRetweetVisibility: FutureEffect[TweetStoreRetryEvent[Event]] = wrap( + underlying.retryAsyncSetRetweetVisibility) + } + + object Store { + + /** + * [[TweetIndexingStore]] - archive or unarchive a retweet edge in TFlock RetweetGraph + * [[TweetCountsCacheUpdatingStore]] - modify the retweet count directly in cache. + * [[ReplicatingTweetStore]] - replicate this [[Event]] in the other DC. + * [[RetweetArchivalEnqueueStore]] - publish RetweetArchivalEvent to "retweet_archival_events" event stream. + * + * @see [[ReplicatedSetRetweetVisibility.Store.apply]] + */ + def apply( + tweetIndexingStore: TweetIndexingStore, + tweetCountsCacheUpdatingStore: TweetCountsCacheUpdatingStore, + replicatingTweetStore: ReplicatingTweetStore, + retweetArchivalEnqueueStore: RetweetArchivalEnqueueStore + ): Store = { + val stores: Seq[Store] = + Seq( + tweetIndexingStore, + tweetCountsCacheUpdatingStore, + replicatingTweetStore, + retweetArchivalEnqueueStore + ) + + def build[E <: TweetStoreEvent, S](extract: Store => FutureEffect[E]): FutureEffect[E] = + FutureEffect.inParallel[E](stores.map(extract): _*) + + new Store { + override val asyncSetRetweetVisibility: FutureEffect[Event] = build( + _.asyncSetRetweetVisibility) + override val retryAsyncSetRetweetVisibility: FutureEffect[TweetStoreRetryEvent[Event]] = + build(_.retryAsyncSetRetweetVisibility) + } + } + } +} + +object ReplicatedSetRetweetVisibility extends TweetStore.ReplicatedModule { + + case class Event(srcId: TweetId, visible: Boolean) + extends ReplicatedTweetStoreEvent("replicated_set_retweet_visibility") + + trait Store { + val replicatedSetRetweetVisibility: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedSetRetweetVisibility: FutureEffect[Event] = + wrap(underlying.replicatedSetRetweetVisibility) + } + + object Store { + + /** + * [[TweetCountsCacheUpdatingStore]] - replicate modifying the retweet count directly in cache. + */ + def apply(tweetCountsCacheUpdatingStore: TweetCountsCacheUpdatingStore): Store = + new Store { + override val replicatedSetRetweetVisibility: FutureEffect[Event] = + tweetCountsCacheUpdatingStore.replicatedSetRetweetVisibility + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Takedown.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Takedown.scala new file mode 100644 index 000000000..cfe3262b5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Takedown.scala @@ -0,0 +1,205 @@ +package com.twitter.tweetypie +package store + +import com.twitter.takedown.util.TakedownReasons +import com.twitter.tseng.withholding.thriftscala.TakedownReason +import com.twitter.tweetypie.thriftscala._ + +object Takedown extends TweetStore.SyncModule { + + case class Event( + tweet: Tweet, // for CachingTweetStore / ManhattanTweetStore / ReplicatedTakedown + timestamp: Time, + user: Option[User] = None, + takedownReasons: Seq[TakedownReason] = Seq(), // for EventBus + reasonsToAdd: Seq[TakedownReason] = Seq(), // for Guano + reasonsToRemove: Seq[TakedownReason] = Seq(), // for Guano + auditNote: Option[String] = None, + host: Option[String] = None, + byUserId: Option[UserId] = None, + eventbusEnqueue: Boolean = true, + scribeForAudit: Boolean = true, + // If ManhattanTweetStore should update countryCodes and reasons + updateCodesAndReasons: Boolean = false) + extends SyncTweetStoreEvent("takedown") { + def toAsyncRequest(): AsyncTakedownRequest = + AsyncTakedownRequest( + tweet = tweet, + user = user, + takedownReasons = takedownReasons, + reasonsToAdd = reasonsToAdd, + reasonsToRemove = reasonsToRemove, + scribeForAudit = scribeForAudit, + eventbusEnqueue = eventbusEnqueue, + auditNote = auditNote, + byUserId = byUserId, + host = host, + timestamp = timestamp.inMillis + ) + } + + trait Store { + val takedown: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val takedown: FutureEffect[Event] = wrap(underlying.takedown) + } + + object Store { + def apply( + logLensStore: LogLensStore, + manhattanStore: ManhattanTweetStore, + cachingTweetStore: CachingTweetStore, + asyncEnqueueStore: AsyncEnqueueStore + ): Store = + new Store { + override val takedown: FutureEffect[Event] = + FutureEffect.inParallel( + logLensStore.takedown, + FutureEffect.sequentially( + manhattanStore.takedown, + FutureEffect.inParallel( + cachingTweetStore.takedown, + asyncEnqueueStore.takedown + ) + ) + ) + } + } +} + +object AsyncTakedown extends TweetStore.AsyncModule { + + object Event { + def fromAsyncRequest(request: AsyncTakedownRequest): TweetStoreEventOrRetry[Event] = + TweetStoreEventOrRetry( + Event( + tweet = request.tweet, + optUser = request.user, + takedownReasons = request.takedownReasons, + reasonsToAdd = request.reasonsToAdd, + reasonsToRemove = request.reasonsToRemove, + auditNote = request.auditNote, + host = request.host, + byUserId = request.byUserId, + eventbusEnqueue = request.eventbusEnqueue, + scribeForAudit = request.scribeForAudit, + timestamp = Time.fromMilliseconds(request.timestamp) + ), + request.retryAction, + RetryEvent + ) + } + + case class Event( + tweet: Tweet, + timestamp: Time, + optUser: Option[User], + takedownReasons: Seq[TakedownReason], // for EventBus + reasonsToAdd: Seq[TakedownReason], // for Guano + reasonsToRemove: Seq[TakedownReason], // for Guano + auditNote: Option[String], // for Guano + host: Option[String], // for Guano + byUserId: Option[UserId], // for Guano + eventbusEnqueue: Boolean, + scribeForAudit: Boolean) + extends AsyncTweetStoreEvent("async_takedown") + with TweetStoreTweetEvent { + + def toAsyncRequest(action: Option[AsyncWriteAction] = None): AsyncTakedownRequest = + AsyncTakedownRequest( + tweet = tweet, + user = optUser, + takedownReasons = takedownReasons, + reasonsToAdd = reasonsToAdd, + reasonsToRemove = reasonsToRemove, + scribeForAudit = scribeForAudit, + eventbusEnqueue = eventbusEnqueue, + auditNote = auditNote, + byUserId = byUserId, + host = host, + timestamp = timestamp.inMillis, + retryAction = action + ) + + override def toTweetEventData: Seq[TweetEventData] = + optUser.map { user => + TweetEventData.TweetTakedownEvent( + TweetTakedownEvent( + tweetId = tweet.id, + userId = user.id, + takedownCountryCodes = + takedownReasons.collect(TakedownReasons.reasonToCountryCode).sorted, + takedownReasons = takedownReasons + ) + ) + }.toSeq + + override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = + service.asyncTakedown(toAsyncRequest(Some(action))) + } + + case class RetryEvent(action: AsyncWriteAction, event: Event) + extends TweetStoreRetryEvent[Event] { + + override val eventType: AsyncWriteEventType.Takedown.type = AsyncWriteEventType.Takedown + override val scribedTweetOnFailure: Option[Tweet] = Some(event.tweet) + } + + trait Store { + val asyncTakedown: FutureEffect[Event] + val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[Event]] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val asyncTakedown: FutureEffect[Event] = wrap(underlying.asyncTakedown) + override val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[Event]] = wrap( + underlying.retryAsyncTakedown) + } + + object Store { + def apply( + replicatingStore: ReplicatingTweetStore, + guanoStore: GuanoServiceStore, + eventBusEnqueueStore: TweetEventBusStore + ): Store = { + val stores: Seq[Store] = + Seq( + replicatingStore, + guanoStore, + eventBusEnqueueStore + ) + + def build[E <: TweetStoreEvent](extract: Store => FutureEffect[E]): FutureEffect[E] = + FutureEffect.inParallel[E](stores.map(extract): _*) + + new Store { + override val asyncTakedown: FutureEffect[Event] = build(_.asyncTakedown) + override val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[Event]] = build( + _.retryAsyncTakedown) + } + } + } +} + +object ReplicatedTakedown extends TweetStore.ReplicatedModule { + + case class Event(tweet: Tweet) extends ReplicatedTweetStoreEvent("takedown") + + trait Store { + val replicatedTakedown: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedTakedown: FutureEffect[Event] = wrap(underlying.replicatedTakedown) + } + + object Store { + def apply(cachingTweetStore: CachingTweetStore): Store = { + new Store { + override val replicatedTakedown: FutureEffect[Event] = cachingTweetStore.replicatedTakedown + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TlsTimelineUpdatingStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TlsTimelineUpdatingStore.scala new file mode 100644 index 000000000..14b83d878 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TlsTimelineUpdatingStore.scala @@ -0,0 +1,150 @@ +package com.twitter.tweetypie +package store + +import com.twitter.timelineservice.{thriftscala => tls} +import com.twitter.tweetypie.backends.TimelineService +import com.twitter.tweetypie.thriftscala._ + +trait TlsTimelineUpdatingStore + extends TweetStoreBase[TlsTimelineUpdatingStore] + with AsyncInsertTweet.Store + with AsyncDeleteTweet.Store + with AsyncUndeleteTweet.Store { + def wrap(w: TweetStore.Wrap): TlsTimelineUpdatingStore = + new TweetStoreWrapper(w, this) + with TlsTimelineUpdatingStore + with AsyncInsertTweet.StoreWrapper + with AsyncDeleteTweet.StoreWrapper + with AsyncUndeleteTweet.StoreWrapper +} + +/** + * An implementation of TweetStore that sends update events to + * the Timeline Service. + */ +object TlsTimelineUpdatingStore { + val Action: AsyncWriteAction.TimelineUpdate.type = AsyncWriteAction.TimelineUpdate + + /** + * Converts a TweetyPie Tweet to tls.Tweet + * + * @param explicitCreatedAt when Some, overrides the default getTimestamp defined in package + * object com.twitter.tweetypie + */ + def tweetToTLSFullTweet( + hasMedia: Tweet => Boolean + )( + tweet: Tweet, + explicitCreatedAt: Option[Time], + noteTweetMentionedUserIds: Option[Seq[Long]] + ): tls.FullTweet = + tls.FullTweet( + userId = getUserId(tweet), + tweetId = tweet.id, + mentionedUserIds = + noteTweetMentionedUserIds.getOrElse(getMentions(tweet).flatMap(_.userId)).toSet, + isNullcasted = TweetLenses.nullcast.get(tweet), + conversationId = TweetLenses.conversationId.get(tweet).getOrElse(tweet.id), + narrowcastGeos = Set.empty, + createdAtMs = explicitCreatedAt.getOrElse(getTimestamp(tweet)).inMillis, + hasMedia = hasMedia(tweet), + directedAtUserId = TweetLenses.directedAtUser.get(tweet).map(_.userId), + retweet = getShare(tweet).map { share => + tls.Retweet( + sourceUserId = share.sourceUserId, + sourceTweetId = share.sourceStatusId, + parentTweetId = Some(share.parentStatusId) + ) + }, + reply = getReply(tweet).map { reply => + tls.Reply( + inReplyToUserId = reply.inReplyToUserId, + inReplyToTweetId = reply.inReplyToStatusId + ) + }, + quote = tweet.quotedTweet.map { qt => + tls.Quote( + quotedUserId = qt.userId, + quotedTweetId = qt.tweetId + ) + }, + mediaTags = tweet.mediaTags, + text = Some(getText(tweet)) + ) + + val logger: Logger = Logger(getClass) + + def logValidationFailed(stats: StatsReceiver): tls.ProcessEventResult => Unit = { + case tls.ProcessEventResult(tls.ProcessEventResultType.ValidationFailed, errors) => + logger.error(s"Validation Failed in processEvent2: $errors") + stats.counter("processEvent2_validation_failed").incr() + case _ => () + } + + def apply( + processEvent2: TimelineService.ProcessEvent2, + hasMedia: Tweet => Boolean, + stats: StatsReceiver + ): TlsTimelineUpdatingStore = { + val toTlsTweet = tweetToTLSFullTweet(hasMedia) _ + + val processAndLog = + processEvent2.andThen(FutureArrow.fromFunction(logValidationFailed(stats))) + + new TlsTimelineUpdatingStore { + override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = + processAndLog + .contramap[AsyncInsertTweet.Event] { event => + tls.Event.FullTweetCreate( + tls.FullTweetCreateEvent( + toTlsTweet(event.tweet, Some(event.timestamp), event.noteTweetMentionedUserIds), + event.timestamp.inMillis, + featureContext = event.featureContext + ) + ) + } + .asFutureEffect[AsyncInsertTweet.Event] + + override val retryAsyncInsertTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncInsertTweet.Event] + ] = + TweetStore.retry(Action, asyncInsertTweet) + + override val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = + processAndLog + .contramap[AsyncUndeleteTweet.Event] { event => + tls.Event.FullTweetRestore( + tls.FullTweetRestoreEvent( + toTlsTweet(event.tweet, None, None), + event.deletedAt.map(_.inMillis) + ) + ) + } + .asFutureEffect[AsyncUndeleteTweet.Event] + + override val retryAsyncUndeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUndeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncUndeleteTweet) + + override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + processAndLog + .contramap[AsyncDeleteTweet.Event] { event => + tls.Event.FullTweetDelete( + tls.FullTweetDeleteEvent( + toTlsTweet(event.tweet, None, None), + event.timestamp.inMillis, + isUserErasure = Some(event.isUserErasure), + isBounceDelete = Some(event.isBounceDelete) + ) + ) + } + .asFutureEffect[AsyncDeleteTweet.Event] + + override val retryAsyncDeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncDeleteTweet) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetCountsCacheUpdatingStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetCountsCacheUpdatingStore.scala new file mode 100644 index 000000000..3f1d3e288 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetCountsCacheUpdatingStore.scala @@ -0,0 +1,358 @@ +package com.twitter.tweetypie +package store + +import com.twitter.concurrent.Serialized +import com.twitter.servo.cache.LockingCache.Handler +import com.twitter.servo.cache._ +import com.twitter.tweetypie.repository.BookmarksKey +import com.twitter.tweetypie.repository.FavsKey +import com.twitter.tweetypie.repository.QuotesKey +import com.twitter.tweetypie.repository.RepliesKey +import com.twitter.tweetypie.repository.RetweetsKey +import com.twitter.tweetypie.repository.TweetCountKey +import com.twitter.util.Duration +import com.twitter.util.Timer +import scala.collection.mutable + +trait TweetCountsCacheUpdatingStore + extends TweetStoreBase[TweetCountsCacheUpdatingStore] + with InsertTweet.Store + with AsyncInsertTweet.Store + with ReplicatedInsertTweet.Store + with DeleteTweet.Store + with AsyncDeleteTweet.Store + with ReplicatedDeleteTweet.Store + with UndeleteTweet.Store + with ReplicatedUndeleteTweet.Store + with AsyncIncrFavCount.Store + with ReplicatedIncrFavCount.Store + with AsyncIncrBookmarkCount.Store + with ReplicatedIncrBookmarkCount.Store + with AsyncSetRetweetVisibility.Store + with ReplicatedSetRetweetVisibility.Store + with Flush.Store { + def wrap(w: TweetStore.Wrap): TweetCountsCacheUpdatingStore = { + new TweetStoreWrapper(w, this) + with TweetCountsCacheUpdatingStore + with InsertTweet.StoreWrapper + with AsyncInsertTweet.StoreWrapper + with ReplicatedInsertTweet.StoreWrapper + with DeleteTweet.StoreWrapper + with AsyncDeleteTweet.StoreWrapper + with ReplicatedDeleteTweet.StoreWrapper + with UndeleteTweet.StoreWrapper + with ReplicatedUndeleteTweet.StoreWrapper + with AsyncIncrFavCount.StoreWrapper + with ReplicatedIncrFavCount.StoreWrapper + with AsyncIncrBookmarkCount.StoreWrapper + with ReplicatedIncrBookmarkCount.StoreWrapper + with AsyncSetRetweetVisibility.StoreWrapper + with ReplicatedSetRetweetVisibility.StoreWrapper + with Flush.StoreWrapper + } +} + +/** + * An implementation of TweetStore that updates tweet-specific counts in + * the CountsCache. + */ +object TweetCountsCacheUpdatingStore { + private type Action = TweetCountKey => Future[Unit] + + def keys(tweetId: TweetId): Seq[TweetCountKey] = + Seq( + RetweetsKey(tweetId), + RepliesKey(tweetId), + FavsKey(tweetId), + QuotesKey(tweetId), + BookmarksKey(tweetId)) + + def relatedKeys(tweet: Tweet): Seq[TweetCountKey] = + Seq( + getReply(tweet).flatMap(_.inReplyToStatusId).map(RepliesKey(_)), + getQuotedTweet(tweet).map(quotedTweet => QuotesKey(quotedTweet.tweetId)), + getShare(tweet).map(share => RetweetsKey(share.sourceStatusId)) + ).flatten + + // pick all keys except quotes key + def relatedKeysWithoutQuotesKey(tweet: Tweet): Seq[TweetCountKey] = + relatedKeys(tweet).filterNot(_.isInstanceOf[QuotesKey]) + + def apply(countsStore: CachedCountsStore): TweetCountsCacheUpdatingStore = { + val incr: Action = key => countsStore.incr(key, 1) + val decr: Action = key => countsStore.incr(key, -1) + val init: Action = key => countsStore.add(key, 0) + val delete: Action = key => countsStore.delete(key) + + def initCounts(tweetId: TweetId) = Future.join(keys(tweetId).map(init)) + def incrRelatedCounts(tweet: Tweet, excludeQuotesKey: Boolean = false) = { + Future.join { + if (excludeQuotesKey) { + relatedKeysWithoutQuotesKey(tweet).map(incr) + } else { + relatedKeys(tweet).map(incr) + } + } + } + def deleteCounts(tweetId: TweetId) = Future.join(keys(tweetId).map(delete)) + + // Decrement all the counters if is the last quote, otherwise avoid decrementing quote counters + def decrRelatedCounts(tweet: Tweet, isLastQuoteOfQuoter: Boolean = false) = { + Future.join { + if (isLastQuoteOfQuoter) { + relatedKeys(tweet).map(decr) + } else { + relatedKeysWithoutQuotesKey(tweet).map(decr) + } + } + } + + def updateFavCount(tweetId: TweetId, delta: Int) = + countsStore.incr(FavsKey(tweetId), delta).unit + + def updateBookmarkCount(tweetId: TweetId, delta: Int) = + countsStore.incr(BookmarksKey(tweetId), delta).unit + + // these are use specifically for setRetweetVisibility + def incrRetweetCount(tweetId: TweetId) = incr(RetweetsKey(tweetId)) + def decrRetweetCount(tweetId: TweetId) = decr(RetweetsKey(tweetId)) + + new TweetCountsCacheUpdatingStore { + override val insertTweet: FutureEffect[InsertTweet.Event] = + FutureEffect[InsertTweet.Event](e => initCounts(e.tweet.id)) + + override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = + FutureEffect[AsyncInsertTweet.Event] { e => + incrRelatedCounts(e.cachedTweet.tweet, e.quoterHasAlreadyQuotedTweet) + } + + override val retryAsyncInsertTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncInsertTweet.Event] + ] = + FutureEffect.unit[TweetStoreRetryEvent[AsyncInsertTweet.Event]] + + override val replicatedInsertTweet: FutureEffect[ReplicatedInsertTweet.Event] = + FutureEffect[ReplicatedInsertTweet.Event] { e => + Future + .join( + initCounts(e.tweet.id), + incrRelatedCounts(e.tweet, e.quoterHasAlreadyQuotedTweet)).unit + } + + override val deleteTweet: FutureEffect[DeleteTweet.Event] = + FutureEffect[DeleteTweet.Event](e => deleteCounts(e.tweet.id)) + + override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + FutureEffect[AsyncDeleteTweet.Event](e => decrRelatedCounts(e.tweet, e.isLastQuoteOfQuoter)) + + override val retryAsyncDeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteTweet.Event] + ] = + FutureEffect.unit[TweetStoreRetryEvent[AsyncDeleteTweet.Event]] + + override val replicatedDeleteTweet: FutureEffect[ReplicatedDeleteTweet.Event] = + FutureEffect[ReplicatedDeleteTweet.Event] { e => + Future + .join(deleteCounts(e.tweet.id), decrRelatedCounts(e.tweet, e.isLastQuoteOfQuoter)).unit + } + + override val undeleteTweet: FutureEffect[UndeleteTweet.Event] = + FutureEffect[UndeleteTweet.Event] { e => + incrRelatedCounts(e.tweet, e.quoterHasAlreadyQuotedTweet) + } + + override val replicatedUndeleteTweet: FutureEffect[ReplicatedUndeleteTweet.Event] = + FutureEffect[ReplicatedUndeleteTweet.Event] { e => + incrRelatedCounts(e.tweet, e.quoterHasAlreadyQuotedTweet) + } + + override val asyncIncrFavCount: FutureEffect[AsyncIncrFavCount.Event] = + FutureEffect[AsyncIncrFavCount.Event](e => updateFavCount(e.tweetId, e.delta)) + + override val replicatedIncrFavCount: FutureEffect[ReplicatedIncrFavCount.Event] = + FutureEffect[ReplicatedIncrFavCount.Event](e => updateFavCount(e.tweetId, e.delta)) + + override val asyncIncrBookmarkCount: FutureEffect[AsyncIncrBookmarkCount.Event] = + FutureEffect[AsyncIncrBookmarkCount.Event](e => updateBookmarkCount(e.tweetId, e.delta)) + + override val replicatedIncrBookmarkCount: FutureEffect[ReplicatedIncrBookmarkCount.Event] = + FutureEffect[ReplicatedIncrBookmarkCount.Event] { e => + updateBookmarkCount(e.tweetId, e.delta) + } + + override val asyncSetRetweetVisibility: FutureEffect[AsyncSetRetweetVisibility.Event] = + FutureEffect[AsyncSetRetweetVisibility.Event] { e => + if (e.visible) incrRetweetCount(e.srcId) else decrRetweetCount(e.srcId) + } + + override val retryAsyncSetRetweetVisibility: FutureEffect[ + TweetStoreRetryEvent[AsyncSetRetweetVisibility.Event] + ] = + FutureEffect.unit[TweetStoreRetryEvent[AsyncSetRetweetVisibility.Event]] + + override val replicatedSetRetweetVisibility: FutureEffect[ + ReplicatedSetRetweetVisibility.Event + ] = + FutureEffect[ReplicatedSetRetweetVisibility.Event] { e => + if (e.visible) incrRetweetCount(e.srcId) else decrRetweetCount(e.srcId) + } + + override val flush: FutureEffect[Flush.Event] = + FutureEffect[Flush.Event] { e => Future.collect(e.tweetIds.map(deleteCounts)).unit } + .onlyIf(_.flushCounts) + } + } +} + +/** + * A simple trait around the cache operations needed by TweetCountsCacheUpdatingStore. + */ +trait CachedCountsStore { + def add(key: TweetCountKey, count: Count): Future[Unit] + def delete(key: TweetCountKey): Future[Unit] + def incr(key: TweetCountKey, delta: Count): Future[Unit] +} + +object CachedCountsStore { + def fromLockingCache(cache: LockingCache[TweetCountKey, Cached[Count]]): CachedCountsStore = + new CachedCountsStore { + def add(key: TweetCountKey, count: Count): Future[Unit] = + cache.add(key, toCached(count)).unit + + def delete(key: TweetCountKey): Future[Unit] = + cache.delete(key).unit + + def incr(key: TweetCountKey, delta: Count): Future[Unit] = + cache.lockAndSet(key, IncrDecrHandler(delta)).unit + } + + def toCached(count: Count): Cached[Count] = { + val now = Time.now + Cached(Some(count), CachedValueStatus.Found, now, Some(now)) + } + + case class IncrDecrHandler(delta: Long) extends Handler[Cached[Count]] { + override def apply(inCache: Option[Cached[Count]]): Option[Cached[Count]] = + inCache.flatMap(incrCount) + + private[this] def incrCount(oldCached: Cached[Count]): Option[Cached[Count]] = { + oldCached.value.map { oldCount => oldCached.copy(value = Some(saferIncr(oldCount))) } + } + + private[this] def saferIncr(value: Long) = math.max(0, value + delta) + + override lazy val toString: String = "IncrDecrHandler(%s)".format(delta) + } + + object QueueIsFullException extends Exception +} + +/** + * An implementation of CachedCountsStore that can queue and aggregate multiple incr + * updates to the same key together. Currently, updates for a key only start to aggregate + * after there is a failure to incr on the underlying store, which often indicates contention + * due to a high level of updates. After a failure, a key is promoted into a "tracked" state, + * and subsequent updates are aggregated together. Periodically, the aggregated updates will + * be flushed. If the flush for a key succeeds and no more updates have come in during the flush, + * then the key is demoted out of the tracked state. Otherwise, updates continue to aggregate + * until the next flush attempt. + */ +class AggregatingCachedCountsStore( + underlying: CachedCountsStore, + timer: Timer, + flushInterval: Duration, + maxSize: Int, + stats: StatsReceiver) + extends CachedCountsStore + with Serialized { + private[this] val pendingUpdates: mutable.Map[TweetCountKey, Count] = + new mutable.HashMap[TweetCountKey, Count] + + private[this] var trackingCount: Int = 0 + + private[this] val promotionCounter = stats.counter("promotions") + private[this] val demotionCounter = stats.counter("demotions") + private[this] val updateCounter = stats.counter("aggregated_updates") + private[this] val overflowCounter = stats.counter("overflows") + private[this] val flushFailureCounter = stats.counter("flush_failures") + private[this] val trackingCountGauge = stats.addGauge("tracking")(trackingCount.toFloat) + + timer.schedule(flushInterval) { flush() } + + def add(key: TweetCountKey, count: Count): Future[Unit] = + underlying.add(key, count) + + def delete(key: TweetCountKey): Future[Unit] = + underlying.delete(key) + + def incr(key: TweetCountKey, delta: Count): Future[Unit] = + aggregateIfTracked(key, delta).flatMap { + case true => Future.Unit + case false => + underlying + .incr(key, delta) + .rescue { case _ => aggregate(key, delta) } + } + + /** + * Queues an update to be aggregated and applied to a key at a later time, but only if we are + * already aggregating updates for the key. + * + * @return true the delta was aggregated, false if the key is not being tracked + * and the incr should be attempted directly. + */ + private[this] def aggregateIfTracked(key: TweetCountKey, delta: Count): Future[Boolean] = + serialized { + pendingUpdates.get(key) match { + case None => false + case Some(current) => + updateCounter.incr() + pendingUpdates(key) = current + delta + true + } + } + + /** + * Queues an update to be aggregated and applied to a key at a later time. + */ + private[this] def aggregate(key: TweetCountKey, delta: Count): Future[Unit] = + serialized { + val alreadyTracked = pendingUpdates.contains(key) + + if (!alreadyTracked) { + if (pendingUpdates.size < maxSize) + promotionCounter.incr() + else { + overflowCounter.incr() + throw CachedCountsStore.QueueIsFullException + } + } + + (pendingUpdates.get(key).getOrElse(0L) + delta) match { + case 0 => + pendingUpdates.remove(key) + demotionCounter.incr() + + case aggregatedDelta => + pendingUpdates(key) = aggregatedDelta + } + + trackingCount = pendingUpdates.size + } + + private[this] def flush(): Future[Unit] = { + for { + // make a copy of the updates to flush, so that updates can continue to be queued + // while the flush is in progress. if an individual flush succeeds, then we + // go back and update pendingUpdates. + updates <- serialized { pendingUpdates.toSeq.toList } + () <- Future.join(for ((key, delta) <- updates) yield flush(key, delta)) + } yield () + } + + private[this] def flush(key: TweetCountKey, delta: Count): Future[Unit] = + underlying + .incr(key, delta) + .flatMap(_ => aggregate(key, -delta)) + .handle { case ex => flushFailureCounter.incr() } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetEventBusStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetEventBusStore.scala new file mode 100644 index 000000000..e846c01ea --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetEventBusStore.scala @@ -0,0 +1,209 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.thriftscala._ + +trait TweetEventBusStore + extends TweetStoreBase[TweetEventBusStore] + with AsyncDeleteAdditionalFields.Store + with AsyncDeleteTweet.Store + with AsyncInsertTweet.Store + with AsyncSetAdditionalFields.Store + with AsyncTakedown.Store + with AsyncUndeleteTweet.Store + with AsyncUpdatePossiblySensitiveTweet.Store + with QuotedTweetDelete.Store + with QuotedTweetTakedown.Store + with ScrubGeoUpdateUserTimestamp.Store + with ScrubGeo.Store { self => + def wrap(w: TweetStore.Wrap): TweetEventBusStore = + new TweetStoreWrapper(w, this) + with TweetEventBusStore + with AsyncDeleteAdditionalFields.StoreWrapper + with AsyncDeleteTweet.StoreWrapper + with AsyncInsertTweet.StoreWrapper + with AsyncSetAdditionalFields.StoreWrapper + with AsyncTakedown.StoreWrapper + with AsyncUndeleteTweet.StoreWrapper + with AsyncUpdatePossiblySensitiveTweet.StoreWrapper + with QuotedTweetDelete.StoreWrapper + with QuotedTweetTakedown.StoreWrapper + with ScrubGeo.StoreWrapper + with ScrubGeoUpdateUserTimestamp.StoreWrapper + + def inParallel(that: TweetEventBusStore): TweetEventBusStore = + new TweetEventBusStore { + override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = + self.asyncInsertTweet.inParallel(that.asyncInsertTweet) + override val asyncDeleteAdditionalFields: FutureEffect[AsyncDeleteAdditionalFields.Event] = + self.asyncDeleteAdditionalFields.inParallel(that.asyncDeleteAdditionalFields) + override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + self.asyncDeleteTweet.inParallel(that.asyncDeleteTweet) + override val asyncSetAdditionalFields: FutureEffect[AsyncSetAdditionalFields.Event] = + self.asyncSetAdditionalFields.inParallel(that.asyncSetAdditionalFields) + override val asyncTakedown: FutureEffect[AsyncTakedown.Event] = + self.asyncTakedown.inParallel(that.asyncTakedown) + override val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = + self.asyncUndeleteTweet.inParallel(that.asyncUndeleteTweet) + override val asyncUpdatePossiblySensitiveTweet: FutureEffect[ + AsyncUpdatePossiblySensitiveTweet.Event + ] = + self.asyncUpdatePossiblySensitiveTweet.inParallel(that.asyncUpdatePossiblySensitiveTweet) + override val quotedTweetDelete: FutureEffect[QuotedTweetDelete.Event] = + self.quotedTweetDelete.inParallel(that.quotedTweetDelete) + override val quotedTweetTakedown: FutureEffect[QuotedTweetTakedown.Event] = + self.quotedTweetTakedown.inParallel(that.quotedTweetTakedown) + override val retryAsyncInsertTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncInsertTweet.Event] + ] = + self.retryAsyncInsertTweet.inParallel(that.retryAsyncInsertTweet) + override val retryAsyncDeleteAdditionalFields: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteAdditionalFields.Event] + ] = + self.retryAsyncDeleteAdditionalFields.inParallel(that.retryAsyncDeleteAdditionalFields) + override val retryAsyncDeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteTweet.Event] + ] = + self.retryAsyncDeleteTweet.inParallel(that.retryAsyncDeleteTweet) + override val retryAsyncUndeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUndeleteTweet.Event] + ] = + self.retryAsyncUndeleteTweet.inParallel(that.retryAsyncUndeleteTweet) + override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUpdatePossiblySensitiveTweet.Event] + ] = + self.retryAsyncUpdatePossiblySensitiveTweet.inParallel( + that.retryAsyncUpdatePossiblySensitiveTweet + ) + override val retryAsyncSetAdditionalFields: FutureEffect[ + TweetStoreRetryEvent[AsyncSetAdditionalFields.Event] + ] = + self.retryAsyncSetAdditionalFields.inParallel(that.retryAsyncSetAdditionalFields) + override val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[AsyncTakedown.Event]] = + self.retryAsyncTakedown.inParallel(that.retryAsyncTakedown) + override val scrubGeo: FutureEffect[ScrubGeo.Event] = + self.scrubGeo.inParallel(that.scrubGeo) + override val scrubGeoUpdateUserTimestamp: FutureEffect[ScrubGeoUpdateUserTimestamp.Event] = + self.scrubGeoUpdateUserTimestamp.inParallel(that.scrubGeoUpdateUserTimestamp) + } +} + +object TweetEventBusStore { + val Action: AsyncWriteAction = AsyncWriteAction.EventBusEnqueue + + def safetyTypeForUser(user: User): Option[SafetyType] = + user.safety.map(userSafetyToSafetyType) + + def userSafetyToSafetyType(safety: Safety): SafetyType = + if (safety.isProtected) { + SafetyType.Private + } else if (safety.suspended) { + SafetyType.Restricted + } else { + SafetyType.Public + } + + def apply( + eventStore: FutureEffect[TweetEvent] + ): TweetEventBusStore = { + + def toTweetEvents(event: TweetStoreTweetEvent): Seq[TweetEvent] = + event.toTweetEventData.map { data => + TweetEvent( + data, + TweetEventFlags( + timestampMs = event.timestamp.inMillis, + safetyType = event.optUser.flatMap(safetyTypeForUser) + ) + ) + } + + def enqueueEvents[E <: TweetStoreTweetEvent]: FutureEffect[E] = + eventStore.liftSeq.contramap[E](toTweetEvents) + + new TweetEventBusStore { + override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = + enqueueEvents[AsyncInsertTweet.Event] + + override val asyncDeleteAdditionalFields: FutureEffect[AsyncDeleteAdditionalFields.Event] = + enqueueEvents[AsyncDeleteAdditionalFields.Event] + + override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + enqueueEvents[AsyncDeleteTweet.Event] + + override val asyncSetAdditionalFields: FutureEffect[AsyncSetAdditionalFields.Event] = + enqueueEvents[AsyncSetAdditionalFields.Event] + + override val asyncTakedown: FutureEffect[AsyncTakedown.Event] = + enqueueEvents[AsyncTakedown.Event] + .onlyIf(_.eventbusEnqueue) + + override val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = + enqueueEvents[AsyncUndeleteTweet.Event] + + override val asyncUpdatePossiblySensitiveTweet: FutureEffect[ + AsyncUpdatePossiblySensitiveTweet.Event + ] = + enqueueEvents[AsyncUpdatePossiblySensitiveTweet.Event] + + override val quotedTweetDelete: FutureEffect[QuotedTweetDelete.Event] = + enqueueEvents[QuotedTweetDelete.Event] + + override val quotedTweetTakedown: FutureEffect[QuotedTweetTakedown.Event] = + enqueueEvents[QuotedTweetTakedown.Event] + + override val retryAsyncInsertTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncInsertTweet.Event] + ] = + TweetStore.retry(Action, asyncInsertTweet) + + override val retryAsyncDeleteAdditionalFields: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteAdditionalFields.Event] + ] = + TweetStore.retry(Action, asyncDeleteAdditionalFields) + + override val retryAsyncDeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncDeleteTweet) + + override val retryAsyncUndeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUndeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncUndeleteTweet) + + override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUpdatePossiblySensitiveTweet.Event] + ] = + TweetStore.retry(Action, asyncUpdatePossiblySensitiveTweet) + + override val retryAsyncSetAdditionalFields: FutureEffect[ + TweetStoreRetryEvent[AsyncSetAdditionalFields.Event] + ] = + TweetStore.retry(Action, asyncSetAdditionalFields) + + override val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[AsyncTakedown.Event]] = + TweetStore.retry(Action, asyncTakedown) + + override val scrubGeo: FutureEffect[ScrubGeo.Event] = + enqueueEvents[ScrubGeo.Event] + + override val scrubGeoUpdateUserTimestamp: FutureEffect[ScrubGeoUpdateUserTimestamp.Event] = + enqueueEvents[ScrubGeoUpdateUserTimestamp.Event] + } + } +} + +/** + * Scrubs inappropriate fields from tweet events before publishing. + */ +object TweetEventDataScrubber { + def scrub(tweet: Tweet): Tweet = + tweet.copy( + cards = None, + card2 = None, + media = tweet.media.map(_.map { mediaEntity => mediaEntity.copy(extensionsReply = None) }), + previousCounts = None, + editPerspective = None + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetIndexingStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetIndexingStore.scala new file mode 100644 index 000000000..648e9a17c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetIndexingStore.scala @@ -0,0 +1,65 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.tflock.TweetIndexer +import com.twitter.tweetypie.thriftscala._ + +trait TweetIndexingStore + extends TweetStoreBase[TweetIndexingStore] + with AsyncInsertTweet.Store + with AsyncDeleteTweet.Store + with AsyncUndeleteTweet.Store + with AsyncSetRetweetVisibility.Store { + def wrap(w: TweetStore.Wrap): TweetIndexingStore = + new TweetStoreWrapper(w, this) + with TweetIndexingStore + with AsyncInsertTweet.StoreWrapper + with AsyncDeleteTweet.StoreWrapper + with AsyncUndeleteTweet.StoreWrapper + with AsyncSetRetweetVisibility.StoreWrapper +} + +/** + * A TweetStore that sends indexing updates to a TweetIndexer. + */ +object TweetIndexingStore { + val Action: AsyncWriteAction.TweetIndex.type = AsyncWriteAction.TweetIndex + + def apply(indexer: TweetIndexer): TweetIndexingStore = + new TweetIndexingStore { + override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = + FutureEffect[AsyncInsertTweet.Event](event => indexer.createIndex(event.tweet)) + + override val retryAsyncInsertTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncInsertTweet.Event] + ] = + TweetStore.retry(Action, asyncInsertTweet) + + override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + FutureEffect[AsyncDeleteTweet.Event](event => + indexer.deleteIndex(event.tweet, event.isBounceDelete)) + + override val retryAsyncDeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncDeleteTweet) + + override val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = + FutureEffect[AsyncUndeleteTweet.Event](event => indexer.undeleteIndex(event.tweet)) + + override val retryAsyncUndeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUndeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncUndeleteTweet) + + override val asyncSetRetweetVisibility: FutureEffect[AsyncSetRetweetVisibility.Event] = + FutureEffect[AsyncSetRetweetVisibility.Event] { event => + indexer.setRetweetVisibility(event.retweetId, event.visible) + } + + override val retryAsyncSetRetweetVisibility: FutureEffect[ + TweetStoreRetryEvent[AsyncSetRetweetVisibility.Event] + ] = + TweetStore.retry(Action, asyncSetRetweetVisibility) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStatsStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStatsStore.scala new file mode 100644 index 000000000..23f6f5124 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStatsStore.scala @@ -0,0 +1,64 @@ +package com.twitter.tweetypie +package store + +import com.twitter.finagle.stats.RollupStatsReceiver +import com.twitter.servo.util.MemoizingStatsReceiver + +/** + * Records some stats about inserted tweets. Tweets are currently classified by three criteria: + * + * - tweet type: "tweet" or "retweet" + * - user type: "stresstest", "protected", "restricted", or "public" + * - fanout type: "nullcast", "narrowcast", or "usertimeline" + * + * A counter is incremented for a tweet using those three criteria in order. Counters are + * created with a RollupStatsReceiver, so counts are aggregated at each level. Some + * example counters are: + * + * ./insert + * ./insert/tweet + * ./insert/tweet/public + * ./insert/tweet/protected/usertimeline + * ./insert/retweet/stresstest + * ./insert/retweet/public/nullcast + */ +trait TweetStatsStore extends TweetStoreBase[TweetStatsStore] with InsertTweet.Store { + def wrap(w: TweetStore.Wrap): TweetStatsStore = + new TweetStoreWrapper(w, this) with TweetStatsStore with InsertTweet.StoreWrapper +} + +object TweetStatsStore { + def apply(stats: StatsReceiver): TweetStatsStore = { + val rollup = new MemoizingStatsReceiver(new RollupStatsReceiver(stats)) + val inserts = rollup.scope("insert") + + def tweetType(tweet: Tweet) = + if (getShare(tweet).isDefined) "retweet" else "tweet" + + def userType(user: User) = + if (user.roles.exists(_.roles.contains("stresstest"))) "stresstest" + else if (user.safety.exists(_.isProtected)) "protected" + else if (user.safety.exists(_.suspended)) "restricted" + else "public" + + def fanoutType(tweet: Tweet) = + if (TweetLenses.nullcast(tweet)) "nullcast" + else if (TweetLenses.narrowcast(tweet).isDefined) "narrowcast" + else "usertimeline" + + new TweetStatsStore { + override val insertTweet: FutureEffect[InsertTweet.Event] = + FutureEffect[InsertTweet.Event] { event => + inserts + .counter( + tweetType(event.tweet), + userType(event.user), + fanoutType(event.tweet) + ) + .incr() + + Future.Unit + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStore.scala new file mode 100644 index 000000000..62a668681 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStore.scala @@ -0,0 +1,292 @@ +package com.twitter.tweetypie +package store + +import com.twitter.finagle.service.RetryPolicy +import com.twitter.finagle.stats.Stat +import com.twitter.servo.util.RetryHandler +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.Timer + +object TweetStore { + // Using the old-school c.t.logging.Logger here as this log is only used by + // servo.FutureEffect's trackOutcome method, which needs that kind of logger. + val log: com.twitter.logging.Logger = com.twitter.logging.Logger(getClass) + + /** + * Adapts a tweet store on a specific TweetStoreEvent type to one that handles + * TweetStoreRetryEvents of that type that match the given AsyncWriteAction. + */ + def retry[T <: AsyncTweetStoreEvent]( + action: AsyncWriteAction, + store: FutureEffect[T] + ): FutureEffect[TweetStoreRetryEvent[T]] = + store.contramap[TweetStoreRetryEvent[T]](_.event).onlyIf(_.action == action) + + /** + * Defines an abstract polymorphic operation to be applied to FutureEffects over any + * TweetStoreEvent type. The Wrap operation is defined over all possible + * FutureEffect[E <: TweetStoreEvent] types. + */ + trait Wrap { + def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] + } + + /** + * A Wrap operation that applies standardized metrics collection to the FutureEffect. + */ + case class Tracked(stats: StatsReceiver) extends Wrap { + def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] = + FutureEffect[E] { event => + Stat.timeFuture(stats.scope(event.name).stat("latency_ms")) { + handler(event) + } + }.trackOutcome(stats, _.name, log) + } + + /** + * A Wrap operation that makes the FutureEffect enabled according to the given gate. + */ + case class Gated(gate: Gate[Unit]) extends Wrap { + def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] = + handler.enabledBy(gate) + } + + /** + * A Wrap operation that updates the FutureEffect to ignore failures. + */ + object IgnoreFailures extends Wrap { + def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] = + handler.ignoreFailures + } + + /** + * A Wrap operation that updates the FutureEffect to ignore failures upon completion. + */ + object IgnoreFailuresUponCompletion extends Wrap { + def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] = + handler.ignoreFailuresUponCompletion + } + + /** + * A Wrap operation that applies a RetryHandler to FutureEffects. + */ + case class Retry(retryHandler: RetryHandler[Unit]) extends Wrap { + def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] = + handler.retry(retryHandler) + } + + /** + * A Wrap operation that applies a RetryHandler to FutureEffects. + */ + case class ReplicatedEventRetry(retryHandler: RetryHandler[Unit]) extends Wrap { + def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] = + FutureEffect[E] { event => + event.retryStrategy match { + case TweetStoreEvent.ReplicatedEventLocalRetry => handler.retry(retryHandler)(event) + case _ => handler(event) + } + } + } + + /** + * A Wrap operation that configures async-retry behavior to async-write events. + */ + class AsyncRetry( + localRetryPolicy: RetryPolicy[Try[Nothing]], + enqueueRetryPolicy: RetryPolicy[Try[Nothing]], + timer: Timer, + tweetService: ThriftTweetService, + scribe: FutureEffect[FailedAsyncWrite] + )( + stats: StatsReceiver, + action: AsyncWriteAction) + extends Wrap { + + override def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] = + FutureEffect[E] { event => + event.retryStrategy match { + case TweetStoreEvent.EnqueueAsyncRetry(enqueueRetry) => + enqueueAsyncRetry(handler, enqueueRetry)(event) + + case TweetStoreEvent.LocalRetryThenScribeFailure(toFailedAsyncWrite) => + localRetryThenScribeFailure(handler, toFailedAsyncWrite)(event) + + case _ => + handler(event) + } + } + + private def enqueueAsyncRetry[E <: TweetStoreEvent]( + handler: FutureEffect[E], + enqueueRetry: (ThriftTweetService, AsyncWriteAction) => Future[Unit] + ): FutureEffect[E] = { + val retryInitCounter = stats.counter("retries_initiated") + + // enqueues failed TweetStoreEvents to the deferredrpc-backed tweetService + // to be retried. this store uses the enqueueRetryPolicy to retry the enqueue + // attempts in the case of deferredrpc application failures. + val enqueueRetryHandler = + FutureEffect[E](_ => enqueueRetry(tweetService, action)) + .retry(RetryHandler.failuresOnly(enqueueRetryPolicy, timer, stats.scope("enqueue_retry"))) + + handler.rescue { + case ex => + TweetStore.log.warning(ex, s"will retry $action") + retryInitCounter.incr() + enqueueRetryHandler + } + } + + private def localRetryThenScribeFailure[E <: TweetStoreEvent]( + handler: FutureEffect[E], + toFailedAsyncWrite: AsyncWriteAction => FailedAsyncWrite + ): FutureEffect[E] = { + val exhaustedCounter = stats.counter("retries_exhausted") + + // scribe events that failed after exhausting all retries + val scribeEventHandler = + FutureEffect[E](_ => scribe(toFailedAsyncWrite(action))) + + // wraps `handle` with a retry policy to retry failures with a backoff. if we exhaust + // all retries, then we pass the event to `scribeEventStore` to scribe the failure. + handler + .retry(RetryHandler.failuresOnly(localRetryPolicy, timer, stats)) + .rescue { + case ex => + TweetStore.log.warning(ex, s"exhausted retries on $action") + exhaustedCounter.incr() + scribeEventHandler + } + } + } + + /** + * Parent trait for defining a "module" that defines a TweetStoreEvent type and corresponding + * TweetStore and TweetStoreWrapper types. + */ + sealed trait Module { + type Store + type StoreWrapper <: Store + } + + /** + * Parent trait for defining a "module" that defines a sync TweetStoreEvent. + */ + trait SyncModule extends Module { + type Event <: SyncTweetStoreEvent + } + + /** + * Parent trait for defining a "module" that defines an async TweetStoreEvent and a + * TweetStoreRetryEvent. + */ + trait AsyncModule extends Module { + type Event <: AsyncTweetStoreEvent + type RetryEvent <: TweetStoreRetryEvent[Event] + } + + /** + * Parent trait for defining a "module" that defines a replicated TweetStoreEvent. + */ + trait ReplicatedModule extends Module { + type Event <: ReplicatedTweetStoreEvent + } +} + +/** + * Trait for TweetStore implementations that support handler wrapping. + */ +trait TweetStoreBase[Self] { + import TweetStore._ + + /** + * Returns a new store of type Self with Wrap applied to each event handler in this instance. + */ + def wrap(w: Wrap): Self + + /** + * Applies the Tracked Wrap operation to the store. + */ + def tracked(stats: StatsReceiver): Self = wrap(Tracked(stats)) + + /** + * Applies the Gated Wrap operation to the store. + */ + def enabledBy(gate: Gate[Unit]): Self = wrap(Gated(gate)) + + /** + * Applies the IgnoreFailures Wrap operation to the store. + */ + def ignoreFailures: Self = wrap(IgnoreFailures) + + /** + * Applies the IgnoreFailuresUponCompletion Wrap operation to the store. + */ + def ignoreFailuresUponCompletion: Self = wrap(IgnoreFailuresUponCompletion) + + /** + * Applies a RetryHandler to each event handler. + */ + def retry(retryHandler: RetryHandler[Unit]): Self = wrap(Retry(retryHandler)) + + /** + * Applies a RetryHandler to replicated event handlers. + */ + def replicatedRetry(retryHandler: RetryHandler[Unit]): Self = + wrap(ReplicatedEventRetry(retryHandler)) + + /** + * Applies the AsyncRetryConfig Wrap operation to the store. + */ + def asyncRetry(cfg: AsyncRetry): Self = wrap(cfg) +} + +/** + * An abstract base class for tweet store instances that wrap another tweet store instance. + * You can mix event-specific store wrapper traits into this class to automatically + * have the event-specific handlers wrapped. + */ +abstract class TweetStoreWrapper[+T]( + protected val wrap: TweetStore.Wrap, + protected val underlying: T) + +/** + * A TweetStore that has a handler for all possible TweetStoreEvents. + */ +trait TotalTweetStore + extends AsyncDeleteAdditionalFields.Store + with AsyncDeleteTweet.Store + with AsyncIncrBookmarkCount.Store + with AsyncIncrFavCount.Store + with AsyncInsertTweet.Store + with AsyncSetAdditionalFields.Store + with AsyncSetRetweetVisibility.Store + with AsyncTakedown.Store + with AsyncUndeleteTweet.Store + with AsyncUpdatePossiblySensitiveTweet.Store + with DeleteAdditionalFields.Store + with DeleteTweet.Store + with Flush.Store + with IncrBookmarkCount.Store + with IncrFavCount.Store + with InsertTweet.Store + with QuotedTweetDelete.Store + with QuotedTweetTakedown.Store + with ReplicatedDeleteAdditionalFields.Store + with ReplicatedDeleteTweet.Store + with ReplicatedIncrBookmarkCount.Store + with ReplicatedIncrFavCount.Store + with ReplicatedInsertTweet.Store + with ReplicatedScrubGeo.Store + with ReplicatedSetAdditionalFields.Store + with ReplicatedSetRetweetVisibility.Store + with ReplicatedTakedown.Store + with ReplicatedUndeleteTweet.Store + with ReplicatedUpdatePossiblySensitiveTweet.Store + with ScrubGeo.Store + with ScrubGeoUpdateUserTimestamp.Store + with SetAdditionalFields.Store + with SetRetweetVisibility.Store + with Takedown.Store + with UndeleteTweet.Store + with UpdatePossiblySensitiveTweet.Store diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStoreEvent.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStoreEvent.scala new file mode 100644 index 000000000..987668d6f --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStoreEvent.scala @@ -0,0 +1,144 @@ +package com.twitter.tweetypie +package store + +import com.twitter.finagle.tracing.Trace +import com.twitter.tweetypie.store.TweetStoreEvent.RetryStrategy +import com.twitter.tweetypie.thriftscala._ + +object TweetStoreEvent { + + /** + * Parent trait for indicating what type of retry strategy to apply to event handlers + * for the corresponding event type. Different classes of events use different strategies. + */ + sealed trait RetryStrategy + + /** + * Indicates that the event type doesn't support retries. + */ + case object NoRetry extends RetryStrategy + + /** + * Indicates that if an event handler encounters a failure, it should enqueue a + * retry to be performed asynchronously. + */ + case class EnqueueAsyncRetry(enqueueRetry: (ThriftTweetService, AsyncWriteAction) => Future[Unit]) + extends RetryStrategy + + /** + * Indicates that if an event handler encounters a failure, it should retry + * the event locally some number of times, before eventually given up and scribing + * the failure. + */ + case class LocalRetryThenScribeFailure(toFailedAsyncWrite: AsyncWriteAction => FailedAsyncWrite) + extends RetryStrategy + + /** + * Indicates that if an event handler encounters a failure, it should retry + * the event locally some number of times. + */ + case object ReplicatedEventLocalRetry extends RetryStrategy +} + +/** + * The abstract parent class for all TweetStoreEvent types. + */ +sealed trait TweetStoreEvent { + val name: String + + val traceId: Long = Trace.id.traceId.toLong + + /** + * Indicates a particular retry behavior that should be applied to event handlers for + * the corresponding event type. The specifics of the strategy might depend upon the + * specific TweetStore implementation. + */ + def retryStrategy: RetryStrategy +} + +abstract class SyncTweetStoreEvent(val name: String) extends TweetStoreEvent { + override def retryStrategy: RetryStrategy = TweetStoreEvent.NoRetry +} + +abstract class AsyncTweetStoreEvent(val name: String) extends TweetStoreEvent { + def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] + + override def retryStrategy: RetryStrategy = TweetStoreEvent.EnqueueAsyncRetry(enqueueRetry) +} + +abstract class ReplicatedTweetStoreEvent(val name: String) extends TweetStoreEvent { + override def retryStrategy: RetryStrategy = TweetStoreEvent.ReplicatedEventLocalRetry +} + +/** + * A trait for all TweetStoreEvents that become TweetEvents. + */ +trait TweetStoreTweetEvent { + val timestamp: Time + + val optUser: Option[User] + + /** + * Most TweetStoreTweetEvents map to a single TweetEvent, but some + * optionally map to an event and others map to multiple events, so + * this method needs to return a Seq of TweetEventData. + */ + def toTweetEventData: Seq[TweetEventData] +} + +/** + * The abstract parent class for an event that indicates a particular action + * for a particular event that needs to be retried via the async-write-retrying mechanism. + */ +abstract class TweetStoreRetryEvent[E <: AsyncTweetStoreEvent] extends TweetStoreEvent { + override val name = "async_write_retry" + + def action: AsyncWriteAction + def event: E + + def eventType: AsyncWriteEventType + + def scribedTweetOnFailure: Option[Tweet] + + override def retryStrategy: RetryStrategy = + TweetStoreEvent.LocalRetryThenScribeFailure(action => + FailedAsyncWrite(eventType, action, scribedTweetOnFailure)) +} + +/** + * Functions as a disjunction between an event type E and it's corresonding + * retry event type TweetStoreRetryEvent[E] + */ +case class TweetStoreEventOrRetry[E <: AsyncTweetStoreEvent]( + event: E, + toRetry: Option[TweetStoreRetryEvent[E]]) { + def toInitial: Option[E] = if (retryAction.isDefined) None else Some(event) + def retryAction: Option[RetryStrategy] = toRetry.map(_.retryStrategy) + def hydrate(f: E => Future[E]): Future[TweetStoreEventOrRetry[E]] = + f(event).map(e => copy(event = e)) +} + +object TweetStoreEventOrRetry { + def apply[E <: AsyncTweetStoreEvent, R <: TweetStoreRetryEvent[E]]( + event: E, + retryAction: Option[AsyncWriteAction], + toRetryEvent: (AsyncWriteAction, E) => R + ): TweetStoreEventOrRetry[E] = + TweetStoreEventOrRetry(event, retryAction.map(action => toRetryEvent(action, event))) + + object First { + + /** matches against TweetStoreEventOrRetry instances for an initial event */ + def unapply[E <: AsyncTweetStoreEvent](it: TweetStoreEventOrRetry[E]): Option[E] = + it.toInitial + } + + object Retry { + + /** matches against TweetStoreEventOrRetry instances for a retry event */ + def unapply[E <: AsyncTweetStoreEvent]( + it: TweetStoreEventOrRetry[E] + ): Option[TweetStoreRetryEvent[E]] = + it.toRetry + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetUpdate.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetUpdate.scala new file mode 100644 index 000000000..8e031fc46 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetUpdate.scala @@ -0,0 +1,41 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.thriftscala._ + +object TweetUpdate { + + /** + * Copies takedown information from the source [[Tweet]] into [[CachedTweet]]. + * + * Note that this method requires the source [[Tweet]] to have been loaded with the following + * additional fields (which happens for all paths that create [[ReplicatedTakedown.Event]], in + * both [[TakedownHandler]] and [[UserTakedownHandler]]: + * - TweetypieOnlyTakedownReasonsField + * - TweetypieOnlyTakedownCountryCodesField + * This is done to ensure the remote datacenter of a takedown does not incorrectly try to load + * from MH as the data is already cached. + */ + def copyTakedownFieldsForUpdate(source: Tweet): CachedTweet => CachedTweet = + ct => { + val newCoreData = source.coreData.get + val updatedCoreData = ct.tweet.coreData.map(_.copy(hasTakedown = newCoreData.hasTakedown)) + ct.copy( + tweet = ct.tweet.copy( + coreData = updatedCoreData, + tweetypieOnlyTakedownCountryCodes = source.tweetypieOnlyTakedownCountryCodes, + tweetypieOnlyTakedownReasons = source.tweetypieOnlyTakedownReasons + ) + ) + } + + def copyNsfwFieldsForUpdate(source: Tweet): Tweet => Tweet = + tweet => { + val newCoreData = source.coreData.get + val updatedCoreData = + tweet.coreData.map { core => + core.copy(nsfwUser = newCoreData.nsfwUser, nsfwAdmin = newCoreData.nsfwAdmin) + } + tweet.copy(coreData = updatedCoreData) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UndeleteTweet.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UndeleteTweet.scala new file mode 100644 index 000000000..72edb8cc1 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UndeleteTweet.scala @@ -0,0 +1,237 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.store.TweetEventDataScrubber.scrub +import com.twitter.tweetypie.thriftscala._ + +object UndeleteTweet extends TweetStore.SyncModule { + + /** + * A TweetStoreEvent for Undeletion. + */ + case class Event( + tweet: Tweet, + user: User, + timestamp: Time, + hydrateOptions: WritePathHydrationOptions, + _internalTweet: Option[CachedTweet] = None, + deletedAt: Option[Time], + sourceTweet: Option[Tweet] = None, + sourceUser: Option[User] = None, + quotedTweet: Option[Tweet] = None, + quotedUser: Option[User] = None, + parentUserId: Option[UserId] = None, + quoterHasAlreadyQuotedTweet: Boolean = false) + extends SyncTweetStoreEvent("undelete_tweet") + with QuotedTweetOps { + def internalTweet: CachedTweet = + _internalTweet.getOrElse( + throw new IllegalStateException( + s"internalTweet should have been set in WritePathHydration, ${this}" + ) + ) + + def toAsyncUndeleteTweetRequest: AsyncUndeleteTweetRequest = + AsyncUndeleteTweetRequest( + tweet = tweet, + cachedTweet = internalTweet, + user = user, + timestamp = timestamp.inMillis, + deletedAt = deletedAt.map(_.inMillis), + sourceTweet = sourceTweet, + sourceUser = sourceUser, + quotedTweet = quotedTweet, + quotedUser = quotedUser, + parentUserId = parentUserId, + quoterHasAlreadyQuotedTweet = Some(quoterHasAlreadyQuotedTweet) + ) + } + + trait Store { + val undeleteTweet: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val undeleteTweet: FutureEffect[Event] = wrap(underlying.undeleteTweet) + } + + object Store { + def apply( + logLensStore: LogLensStore, + cachingTweetStore: CachingTweetStore, + tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore, + asyncEnqueueStore: AsyncEnqueueStore + ): Store = + new Store { + override val undeleteTweet: FutureEffect[Event] = + FutureEffect.inParallel( + logLensStore.undeleteTweet, + // ignore failures writing to cache, will be retried in async-path + cachingTweetStore.ignoreFailures.undeleteTweet, + tweetCountsUpdatingStore.undeleteTweet, + asyncEnqueueStore.undeleteTweet + ) + } + } +} + +object AsyncUndeleteTweet extends TweetStore.AsyncModule { + + object Event { + def fromAsyncRequest(request: AsyncUndeleteTweetRequest): TweetStoreEventOrRetry[Event] = + TweetStoreEventOrRetry( + AsyncUndeleteTweet.Event( + tweet = request.tweet, + cachedTweet = request.cachedTweet, + user = request.user, + optUser = Some(request.user), + timestamp = Time.fromMilliseconds(request.timestamp), + deletedAt = request.deletedAt.map(Time.fromMilliseconds), + sourceTweet = request.sourceTweet, + sourceUser = request.sourceUser, + quotedTweet = request.quotedTweet, + quotedUser = request.quotedUser, + parentUserId = request.parentUserId, + quoterHasAlreadyQuotedTweet = request.quoterHasAlreadyQuotedTweet.getOrElse(false) + ), + request.retryAction, + RetryEvent + ) + } + + case class Event( + tweet: Tweet, + cachedTweet: CachedTweet, + user: User, + optUser: Option[User], + timestamp: Time, + deletedAt: Option[Time], + sourceTweet: Option[Tweet], + sourceUser: Option[User], + quotedTweet: Option[Tweet], + quotedUser: Option[User], + parentUserId: Option[UserId] = None, + quoterHasAlreadyQuotedTweet: Boolean = false) + extends AsyncTweetStoreEvent("async_undelete_tweet") + with QuotedTweetOps + with TweetStoreTweetEvent { + + /** + * Convert this event into an AsyncUndeleteTweetRequest thrift request object + */ + def toAsyncRequest(retryAction: Option[AsyncWriteAction] = None): AsyncUndeleteTweetRequest = + AsyncUndeleteTweetRequest( + tweet = tweet, + cachedTweet = cachedTweet, + user = user, + timestamp = timestamp.inMillis, + retryAction = retryAction, + deletedAt = deletedAt.map(_.inMillis), + sourceTweet = sourceTweet, + sourceUser = sourceUser, + quotedTweet = quotedTweet, + quotedUser = quotedUser, + parentUserId = parentUserId, + quoterHasAlreadyQuotedTweet = Some(quoterHasAlreadyQuotedTweet) + ) + + override def toTweetEventData: Seq[TweetEventData] = + Seq( + TweetEventData.TweetUndeleteEvent( + TweetUndeleteEvent( + tweet = scrub(tweet), + user = Some(user), + sourceTweet = sourceTweet.map(scrub), + sourceUser = sourceUser, + retweetParentUserId = parentUserId, + quotedTweet = publicQuotedTweet.map(scrub), + quotedUser = publicQuotedUser, + deletedAtMsec = deletedAt.map(_.inMilliseconds) + ) + ) + ) + + override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = + service.asyncUndeleteTweet(toAsyncRequest(Some(action))) + } + + case class RetryEvent(action: AsyncWriteAction, event: Event) + extends TweetStoreRetryEvent[Event] { + + override val eventType: AsyncWriteEventType.Undelete.type = AsyncWriteEventType.Undelete + override val scribedTweetOnFailure: Option[Tweet] = Some(event.tweet) + } + + trait Store { + val asyncUndeleteTweet: FutureEffect[Event] + val retryAsyncUndeleteTweet: FutureEffect[TweetStoreRetryEvent[Event]] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val asyncUndeleteTweet: FutureEffect[Event] = wrap(underlying.asyncUndeleteTweet) + override val retryAsyncUndeleteTweet: FutureEffect[TweetStoreRetryEvent[Event]] = wrap( + underlying.retryAsyncUndeleteTweet) + } + + object Store { + def apply( + cachingTweetStore: CachingTweetStore, + eventBusEnqueueStore: TweetEventBusStore, + indexingStore: TweetIndexingStore, + replicatingStore: ReplicatingTweetStore, + mediaServiceStore: MediaServiceStore, + timelineUpdatingStore: TlsTimelineUpdatingStore + ): Store = { + val stores: Seq[Store] = + Seq( + cachingTweetStore, + eventBusEnqueueStore, + indexingStore, + replicatingStore, + mediaServiceStore, + timelineUpdatingStore + ) + + def build[E <: TweetStoreEvent](extract: Store => FutureEffect[E]): FutureEffect[E] = + FutureEffect.inParallel[E](stores.map(extract): _*) + + new Store { + override val asyncUndeleteTweet: FutureEffect[Event] = build(_.asyncUndeleteTweet) + override val retryAsyncUndeleteTweet: FutureEffect[TweetStoreRetryEvent[Event]] = build( + _.retryAsyncUndeleteTweet) + } + } + } +} + +object ReplicatedUndeleteTweet extends TweetStore.ReplicatedModule { + + case class Event( + tweet: Tweet, + cachedTweet: CachedTweet, + quoterHasAlreadyQuotedTweet: Boolean = false) + extends ReplicatedTweetStoreEvent("replicated_undelete_tweet") + + trait Store { + val replicatedUndeleteTweet: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedUndeleteTweet: FutureEffect[Event] = wrap( + underlying.replicatedUndeleteTweet) + } + + object Store { + def apply( + cachingTweetStore: CachingTweetStore, + tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore + ): Store = + new Store { + override val replicatedUndeleteTweet: FutureEffect[Event] = + FutureEffect.inParallel( + cachingTweetStore.replicatedUndeleteTweet.ignoreFailures, + tweetCountsUpdatingStore.replicatedUndeleteTweet.ignoreFailures + ) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UpdatePossiblySensitiveTweet.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UpdatePossiblySensitiveTweet.scala new file mode 100644 index 000000000..c8d1d0b30 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UpdatePossiblySensitiveTweet.scala @@ -0,0 +1,206 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.thriftscala._ + +object UpdatePossiblySensitiveTweet extends TweetStore.SyncModule { + + case class Event( + tweet: Tweet, + user: User, + timestamp: Time, + byUserId: UserId, + nsfwAdminChange: Option[Boolean], + nsfwUserChange: Option[Boolean], + note: Option[String], + host: Option[String]) + extends SyncTweetStoreEvent("update_possibly_sensitive_tweet") { + def toAsyncRequest: AsyncUpdatePossiblySensitiveTweetRequest = + AsyncUpdatePossiblySensitiveTweetRequest( + tweet = tweet, + user = user, + byUserId = byUserId, + timestamp = timestamp.inMillis, + nsfwAdminChange = nsfwAdminChange, + nsfwUserChange = nsfwUserChange, + note = note, + host = host + ) + } + + trait Store { + val updatePossiblySensitiveTweet: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val updatePossiblySensitiveTweet: FutureEffect[Event] = wrap( + underlying.updatePossiblySensitiveTweet + ) + } + + object Store { + def apply( + manhattanStore: ManhattanTweetStore, + cachingTweetStore: CachingTweetStore, + logLensStore: LogLensStore, + asyncEnqueueStore: AsyncEnqueueStore + ): Store = + new Store { + override val updatePossiblySensitiveTweet: FutureEffect[Event] = + FutureEffect.inParallel( + manhattanStore.ignoreFailures.updatePossiblySensitiveTweet, + cachingTweetStore.ignoreFailures.updatePossiblySensitiveTweet, + logLensStore.updatePossiblySensitiveTweet, + asyncEnqueueStore.updatePossiblySensitiveTweet + ) + } + } +} + +object AsyncUpdatePossiblySensitiveTweet extends TweetStore.AsyncModule { + + object Event { + def fromAsyncRequest( + request: AsyncUpdatePossiblySensitiveTweetRequest + ): TweetStoreEventOrRetry[Event] = + TweetStoreEventOrRetry( + AsyncUpdatePossiblySensitiveTweet.Event( + tweet = request.tweet, + user = request.user, + optUser = Some(request.user), + timestamp = Time.fromMilliseconds(request.timestamp), + byUserId = request.byUserId, + nsfwAdminChange = request.nsfwAdminChange, + nsfwUserChange = request.nsfwUserChange, + note = request.note, + host = request.host + ), + request.action, + RetryEvent + ) + } + + case class Event( + tweet: Tweet, + user: User, + optUser: Option[User], + timestamp: Time, + byUserId: UserId, + nsfwAdminChange: Option[Boolean], + nsfwUserChange: Option[Boolean], + note: Option[String], + host: Option[String]) + extends AsyncTweetStoreEvent("async_update_possibly_sensitive_tweet") + with TweetStoreTweetEvent { + + def toAsyncRequest( + action: Option[AsyncWriteAction] = None + ): AsyncUpdatePossiblySensitiveTweetRequest = + AsyncUpdatePossiblySensitiveTweetRequest( + tweet = tweet, + user = user, + byUserId = byUserId, + timestamp = timestamp.inMillis, + nsfwAdminChange = nsfwAdminChange, + nsfwUserChange = nsfwUserChange, + note = note, + host = host, + action = action + ) + + override def toTweetEventData: Seq[TweetEventData] = + Seq( + TweetEventData.TweetPossiblySensitiveUpdateEvent( + TweetPossiblySensitiveUpdateEvent( + tweetId = tweet.id, + userId = user.id, + nsfwAdmin = TweetLenses.nsfwAdmin.get(tweet), + nsfwUser = TweetLenses.nsfwUser.get(tweet) + ) + ) + ) + + override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = + service.asyncUpdatePossiblySensitiveTweet(toAsyncRequest(Some(action))) + } + + case class RetryEvent(action: AsyncWriteAction, event: Event) + extends TweetStoreRetryEvent[Event] { + + override val eventType: AsyncWriteEventType.UpdatePossiblySensitiveTweet.type = + AsyncWriteEventType.UpdatePossiblySensitiveTweet + override val scribedTweetOnFailure: Option[Tweet] = Some(event.tweet) + } + + trait Store { + val asyncUpdatePossiblySensitiveTweet: FutureEffect[Event] + val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[TweetStoreRetryEvent[Event]] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val asyncUpdatePossiblySensitiveTweet: FutureEffect[Event] = wrap( + underlying.asyncUpdatePossiblySensitiveTweet + ) + override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[TweetStoreRetryEvent[Event]] = + wrap( + underlying.retryAsyncUpdatePossiblySensitiveTweet + ) + } + + object Store { + def apply( + manhattanStore: ManhattanTweetStore, + cachingTweetStore: CachingTweetStore, + replicatingStore: ReplicatingTweetStore, + guanoStore: GuanoServiceStore, + eventBusStore: TweetEventBusStore + ): Store = { + val stores: Seq[Store] = + Seq( + manhattanStore, + cachingTweetStore, + replicatingStore, + guanoStore, + eventBusStore + ) + + def build[E <: TweetStoreEvent](extract: Store => FutureEffect[E]): FutureEffect[E] = + FutureEffect.inParallel[E](stores.map(extract): _*) + + new Store { + override val asyncUpdatePossiblySensitiveTweet: FutureEffect[Event] = build( + _.asyncUpdatePossiblySensitiveTweet) + override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ + TweetStoreRetryEvent[Event] + ] = build( + _.retryAsyncUpdatePossiblySensitiveTweet + ) + } + } + } +} + +object ReplicatedUpdatePossiblySensitiveTweet extends TweetStore.ReplicatedModule { + + case class Event(tweet: Tweet) + extends ReplicatedTweetStoreEvent("replicated_update_possibly_sensitive_tweet") + + trait Store { + val replicatedUpdatePossiblySensitiveTweet: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedUpdatePossiblySensitiveTweet: FutureEffect[Event] = wrap( + underlying.replicatedUpdatePossiblySensitiveTweet + ) + } + + object Store { + def apply(cachingTweetStore: CachingTweetStore): Store = { + new Store { + override val replicatedUpdatePossiblySensitiveTweet: FutureEffect[Event] = + cachingTweetStore.replicatedUpdatePossiblySensitiveTweet + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/package.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/package.scala new file mode 100644 index 000000000..aa399d9bf --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/package.scala @@ -0,0 +1,16 @@ +package com.twitter.tweetypie + +import com.fasterxml.jackson.core.JsonGenerator +import com.twitter.tweetypie.thriftscala.CachedTweet +import com.twitter.context.TwitterContext + +package object store { + type JsonGen = JsonGenerator => Unit + + // Bring Tweetypie permitted TwitterContext into scope + val TwitterContext: TwitterContext = + com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) + + def cachedTweetFromUnhydratedTweet(tweet: Tweet): CachedTweet = + CachedTweet(tweet = tweet) +} diff --git a/tweetypie/server/src/main/thrift/BUILD b/tweetypie/server/src/main/thrift/BUILD new file mode 100644 index 000000000..f90f1b823 --- /dev/null +++ b/tweetypie/server/src/main/thrift/BUILD @@ -0,0 +1,29 @@ +create_thrift_libraries( + base_name = "compiled", + sources = ["**/*.thrift"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependency_roots = [ + "mediaservices/commons/src/main/thrift", + "tweetypie/servo/repo/src/main/thrift", + "src/thrift/com/twitter/context:feature-context", + "src/thrift/com/twitter/escherbird:media-annotation-structs", + "src/thrift/com/twitter/expandodo:capi", + "src/thrift/com/twitter/expandodo:only", + "src/thrift/com/twitter/geoduck", + "src/thrift/com/twitter/gizmoduck:thrift", + "src/thrift/com/twitter/gizmoduck:user-thrift", + "src/thrift/com/twitter/servo:servo-exception", + "tweetypie/common/src/thrift/com/twitter/tweetypie:audit", + "tweetypie/common/src/thrift/com/twitter/tweetypie:delete_location_data", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service", + "tweetypie/common/src/thrift/com/twitter/tweetypie:stored-tweet-info", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet", + ], + generate_languages = [ + "java", + "scala", + ], +) diff --git a/tweetypie/server/src/main/thrift/tweetypie_internal.thrift b/tweetypie/server/src/main/thrift/tweetypie_internal.thrift new file mode 100644 index 000000000..3cc16381e --- /dev/null +++ b/tweetypie/server/src/main/thrift/tweetypie_internal.thrift @@ -0,0 +1,705 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala + +include "com/twitter/context/feature_context.thrift" +include "com/twitter/expandodo/cards.thrift" +include "com/twitter/gizmoduck/user.thrift" +include "com/twitter/mediaservices/commons/MediaCommon.thrift" +include "com/twitter/mediaservices/commons/MediaInformation.thrift" +include "com/twitter/mediaservices/commons/TweetMedia.thrift" +include "com/twitter/servo/exceptions.thrift" +include "com/twitter/servo/cache/servo_repo.thrift" +include "com/twitter/tseng/withholding/withholding.thrift" +include "com/twitter/tweetypie/delete_location_data.thrift" +include "com/twitter/tweetypie/transient_context.thrift" +include "com/twitter/tweetypie/media_entity.thrift" +include "com/twitter/tweetypie/tweet.thrift" +include "com/twitter/tweetypie/tweet_audit.thrift" +include "com/twitter/tweetypie/stored_tweet_info.thrift" +include "com/twitter/tweetypie/tweet_service.thrift" + +typedef i16 FieldId + +struct UserIdentity { + 1: required i64 id + 2: required string screen_name + 3: required string real_name +# obsolete 4: bool deactivated = 0 +# obsolete 5: bool suspended = 0 +} + +enum HydrationType { + MENTIONS = 1, + URLS = 2, + CACHEABLE_MEDIA = 3, + QUOTED_TWEET_REF = 4, + REPLY_SCREEN_NAME = 5, + DIRECTED_AT = 6, + CONTRIBUTOR = 7, + SELF_THREAD_INFO = 8 +} + +struct CachedTweet { + 1: required tweet.Tweet tweet + // @obsolete 2: optional set included_additional_fields + 3: set completed_hydrations = [] + + // Indicates that a tweet was deleted after being bounced for violating + // the Twitter Rules. + // When set to true, all other fields in CachedTweet are ignored. + 4: optional bool is_bounce_deleted + + // Indicates whether this tweet has safety labels stored in Strato. + // See com.twitter.tweetypie.core.TweetData.hasSafetyLabels for more details. + // @obsolete 5: optional bool has_safety_labels +} (persisted='true', hasPersonalData='true') + +struct MediaFaces { + 1: required map> faces +} + +enum AsyncWriteEventType { + INSERT = 1, + DELETE = 2, + UNDELETE = 3, + SET_ADDITIONAL_FIELDS = 4, + DELETE_ADDITIONAL_FIELDS = 5, + UPDATE_POSSIBLY_SENSITIVE_TWEET = 6, + UPDATE_TWEET_MEDIA = 7, + TAKEDOWN = 8, + SET_RETWEET_VISIBILITY = 9 +} + +// an enum of actions that could happen in an async-write (insert or delete) +enum AsyncWriteAction { + HOSEBIRD_ENQUEUE = 1 + SEARCH_ENQUEUE = 2 + // obsolete MAIL_ENQUEUE = 3 + FANOUT_DELIVERY = 4 + // obsolete FACEBOOK_ENQUEUE = 5 + TWEET_INDEX = 6 + TIMELINE_UPDATE = 7 + CACHE_UPDATE = 8 + REPLICATION = 9 + // obsolete MONORAIL_EXPIRY_ENQUEUE = 10 + USER_GEOTAG_UPDATE = 11 + // obsolete IBIS_ENQUEUE = 12 + EVENT_BUS_ENQUEUE = 13 + // obsolete HOSEBIRD_BINARY_ENQUEUE = 14 + TBIRD_UPDATE = 15 + RETWEETS_DELETION = 16 + GUANO_SCRIBE = 17 + MEDIA_DELETION = 18 + GEO_SEARCH_REQUEST_ID = 19 + SEARCH_THRIFT_ENQUEUE = 20 + RETWEET_ARCHIVAL_ENQUEUE = 21 +} + +# This struct is scribed to test_tweetypie_failed_async_write after +# an async-write action has failed multiple retries +struct FailedAsyncWrite { + 1: required AsyncWriteEventType event_type + 2: required AsyncWriteAction action + 3: optional tweet.Tweet tweet +} (persisted='true', hasPersonalData='true') + +# This struct is scribed to test_tweetypie_detached_retweets after +# attempting to read a retweet for which the source tweet has been deleted. +struct DetachedRetweet { + 1: required i64 tweet_id (personalDataType='TweetId') + 2: required i64 user_id (personalDataType='UserId') + 3: required i64 source_tweet_id (personalDataType='TweetId') +} (persisted='true', hasPersonalData='true') + +struct TweetCacheWrite { + 1: required i64 tweet_id (personalDataType = 'TweetId') + // If the tweet id is a snowflake id, this is an offset since tweet creation. + // If it is not a snowflake id, then this is a Unix epoch time in + // milliseconds. (The idea is that for most tweets, this encoding will make + // it easier to see the interval between events and whether it occured soon + // acter tweet creation.) + 2: required i64 timestamp (personalDataType = 'TransactionTimestamp') + 3: required string action // One of "set", "add", "replace", "cas", "delete" + 4: required servo_repo.CachedValue cached_value // Contains metadata about the cached value + 5: optional CachedTweet cached_tweet +} (persisted='true', hasPersonalData='true') + +struct AsyncInsertRequest { + 12: required tweet.Tweet tweet + 18: required user.User user + 21: required i64 timestamp + // the cacheable version of tweet from field 12 + 29: required CachedTweet cached_tweet + # 13: obsolete tweet.Tweet internal_tweet + 19: optional tweet.Tweet source_tweet + 20: optional user.User source_user + // Used for quote tweet feature + 22: optional tweet.Tweet quoted_tweet + 23: optional user.User quoted_user + 28: optional i64 parent_user_id + // Used for delivering the requestId of a geotagged tweet + 24: optional string geo_search_request_id + # 7: obsolete + # if not specified, all async insert actions are performed. if specified, only + # the specified action is performed; this is used for retrying specific actions + # that failed on a previous attempt. + 10: optional AsyncWriteAction retry_action + # 11: obsolete: bool from_monorail = 0 + # 14: obsolete + 15: optional feature_context.FeatureContext feature_context + # 16: obsolete + # 17: obsolete + # 26: obsolete: optional tweet.Tweet debug_tweet_copy + 27: optional map additional_context + 30: optional transient_context.TransientCreateContext transient_context + // Used to check whether the same tweet has been quoted multiple + // times by a given user. + 31: optional bool quoter_has_already_quoted_tweet + 32: optional InitialTweetUpdateRequest initialTweetUpdateRequest + // User ids of users mentioned in note tweet. Used for tls events + 33: optional list note_tweet_mentioned_user_ids +} + +struct AsyncUpdatePossiblySensitiveTweetRequest { + 1: required tweet.Tweet tweet + 2: required user.User user + 3: required i64 by_user_id + 4: required i64 timestamp + 5: optional bool nsfw_admin_change + 6: optional bool nsfw_user_change + 7: optional string note + 8: optional string host + 9: optional AsyncWriteAction action +} + +struct AsyncUpdateTweetMediaRequest { + 1: required i64 tweet_id + 2: required list orphaned_media + 3: optional AsyncWriteAction retry_action + 4: optional list media_keys +} + +struct AsyncSetAdditionalFieldsRequest { + 1: required tweet.Tweet additional_fields + 3: required i64 timestamp + 4: required i64 user_id + 2: optional AsyncWriteAction retry_action +} + +struct AsyncSetRetweetVisibilityRequest { + 1: required i64 retweet_id + // Whether to archive or unarchive(visible=true) the retweet_id edge in the RetweetsGraph. + 2: required bool visible + 3: required i64 src_id + 5: required i64 retweet_user_id + 6: required i64 source_tweet_user_id + 7: required i64 timestamp + 4: optional AsyncWriteAction retry_action +} + +struct SetRetweetVisibilityRequest { + 1: required i64 retweet_id + // Whether to archive or unarchive(visible=true) the retweet_id edge in the RetweetsGraph. + 2: required bool visible +} + +struct AsyncEraseUserTweetsRequest { + 1: required i64 user_id + 3: required i64 flock_cursor + 4: required i64 start_timestamp + 5: required i64 tweet_count +} + +struct AsyncDeleteRequest { + 4: required tweet.Tweet tweet + 11: required i64 timestamp + 2: optional user.User user + 9: optional i64 by_user_id + 12: optional tweet_audit.AuditDeleteTweet audit_passthrough + 13: optional i64 cascaded_from_tweet_id + # if not specified, all async-delete actions are performed. if specified, only + # the specified action is performed; this is used for retrying specific actions + # that failed on a previous attempt. + 3: optional AsyncWriteAction retry_action + 5: bool delete_media = 1 + 6: bool delete_retweets = 1 + 8: bool scribe_for_audit = 1 + 15: bool is_user_erasure = 0 + 17: bool is_bounce_delete = 0 + 18: optional bool is_last_quote_of_quoter + 19: optional bool is_admin_delete +} + +struct AsyncUndeleteTweetRequest { + 1: required tweet.Tweet tweet + 3: required user.User user + 4: required i64 timestamp + // the cacheable version of tweet from field 1 + 12: required CachedTweet cached_tweet + # 2: obsolete tweet.Tweet internal_tweet + 5: optional AsyncWriteAction retry_action + 6: optional i64 deleted_at + 7: optional tweet.Tweet source_tweet + 8: optional user.User source_user + 9: optional tweet.Tweet quoted_tweet + 10: optional user.User quoted_user + 11: optional i64 parent_user_id + 13: optional bool quoter_has_already_quoted_tweet +} + +struct AsyncIncrFavCountRequest { + 1: required i64 tweet_id + 2: required i32 delta +} + +struct AsyncIncrBookmarkCountRequest { + 1: required i64 tweet_id + 2: required i32 delta +} + +struct AsyncDeleteAdditionalFieldsRequest { + 6: required i64 tweet_id + 7: required list field_ids + 4: required i64 timestamp + 5: required i64 user_id + 3: optional AsyncWriteAction retry_action +} + +// Used for both tweet and user takedowns. +// user will be None for user takedowns because user is only used when scribe_for_audit or +// eventbus_enqueue are true, which is never the case for user takedown. +struct AsyncTakedownRequest { + 1: required tweet.Tweet tweet + + // Author of the tweet. Used when scribe_for_audit or eventbus_enqueue are true which is the case + // for tweet takedown but not user takedown. + 2: optional user.User user + + // This field is the resulting list of takedown country codes on the tweet after the + // countries_to_add and countries_to_remove changes have been applied. + 13: list takedown_reasons = [] + + // This field is the list of takedown reaons to add to the tweet. + 14: list reasons_to_add = [] + + // This field is the list of takedown reasons to remove from the tweet. + 15: list reasons_to_remove = [] + + // This field determines whether or not Tweetypie should write takedown audits + // for this request to Guano. + 6: required bool scribe_for_audit + + // This field determines whether or not Tweetypie should enqueue a + // TweetTakedownEvent to EventBus and Hosebird for this request. + 7: required bool eventbus_enqueue + + // This field is sent as part of the takedown audit that's written to Guano, + // and is not persisted with the takedown itself. + 8: optional string audit_note + + // This field is the ID of the user who initiated the takedown. It is used + // when auditing the takedown in Guano. If unset, it will be logged as -1. + 9: optional i64 by_user_id + + // This field is the host where the request originated or the remote IP that + // is associated with the request. It is used when auditing the takedown in + // Guano. If unset, it will be logged as "". + 10: optional string host + + 11: optional AsyncWriteAction retry_action + 12: required i64 timestamp +} + +struct SetTweetUserTakedownRequest { + 1: required i64 tweet_id + 2: required bool has_takedown + 3: optional i64 user_id +} + +enum DataErrorCause { + UNKNOWN = 0 + // Returned on set_tweet_user_takedown when + // the SetTweetUserTakedownRequest.user_id does not match the author + // of the tweet identified by SetTweetUserTakedownRequest.tweet_id. + USER_TWEET_RELATIONSHIP = 1 +} + +/** + * DataError is returned for operations that perform data changes, + * but encountered an inconsistency, and the operation cannot + * be meaninfully performed. + */ +exception DataError { + 1: required string message + 2: optional DataErrorCause errorCause +} + +struct ReplicatedDeleteAdditionalFieldsRequest { + /** is a map for backwards compatibility, but will only contain a single tweet id */ + 1: required map> fields_map +} + +struct CascadedDeleteTweetRequest { + 1: required i64 tweet_id + 2: required i64 cascaded_from_tweet_id + 3: optional tweet_audit.AuditDeleteTweet audit_passthrough +} + +struct QuotedTweetDeleteRequest { + 1: i64 quoting_tweet_id + 2: i64 quoted_tweet_id + 3: i64 quoted_user_id +} + +struct QuotedTweetTakedownRequest { + 1: i64 quoting_tweet_id + 2: i64 quoted_tweet_id + 3: i64 quoted_user_id + 4: list takedown_country_codes = [] + 5: list takedown_reasons = [] +} + +struct ReplicatedInsertTweet2Request { + 1: required CachedTweet cached_tweet + // Used to check whether the same tweet has been quoted by a user. + 2: optional bool quoter_has_already_quoted_tweet + 3: optional InitialTweetUpdateRequest initialTweetUpdateRequest +} + +struct ReplicatedDeleteTweet2Request { + 1: required tweet.Tweet tweet + 2: required bool is_erasure + 3: required bool is_bounce_delete + 4: optional bool is_last_quote_of_quoter +} + +struct ReplicatedSetRetweetVisibilityRequest { + 1: required i64 src_id + // Whether to archive or unarchive(visible=true) the retweet_id edge in the RetweetsGraph. + 2: required bool visible +} + +struct ReplicatedUndeleteTweet2Request { + 1: required CachedTweet cached_tweet + 2: optional bool quoter_has_already_quoted_tweet +} + +struct GetStoredTweetsOptions { + 1: bool bypass_visibility_filtering = 0 + 2: optional i64 for_user_id + 3: list additional_field_ids = [] +} + +struct GetStoredTweetsRequest { + 1: required list tweet_ids + 2: optional GetStoredTweetsOptions options +} + +struct GetStoredTweetsResult { + 1: required stored_tweet_info.StoredTweetInfo stored_tweet +} + +struct GetStoredTweetsByUserOptions { + 1: bool bypass_visibility_filtering = 0 + 2: bool set_for_user_id = 0 + 3: optional i64 start_time_msec + 4: optional i64 end_time_msec + 5: optional i64 cursor + 6: bool start_from_oldest = 0 + 7: list additional_field_ids = [] +} + +struct GetStoredTweetsByUserRequest { + 1: required i64 user_id + 2: optional GetStoredTweetsByUserOptions options +} + +struct GetStoredTweetsByUserResult { + 1: required list stored_tweets + 2: optional i64 cursor +} + +/* This is a request to update an initial tweet based on the creation of a edit tweet + * initialTweetId: The tweet to be updated + * editTweetId: The tweet being created, which is an edit of initialTweetId + * selfPermalink: A self permalink for initialTweetId + */ +struct InitialTweetUpdateRequest { + 1: required i64 initialTweetId + 2: required i64 editTweetId + 3: optional tweet.ShortenedUrl selfPermalink +} + +service TweetServiceInternal extends tweet_service.TweetService { + + /** + * Performs the async portion of TweetService.erase_user_tweets. + * Only tweetypie itself can call this. + */ + void async_erase_user_tweets(1: AsyncEraseUserTweetsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs the async portion of TweetService.post_tweet. + * Only tweetypie itself can call this. + */ + void async_insert(1: AsyncInsertRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs the async portion of TweetService.delete_tweets. + * Only tweetypie itself can call this. + */ + void async_delete(1: AsyncDeleteRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs the async portion of TweetService.undelete_tweet. + * Only tweetypie itself can call this. + */ + void async_undelete_tweet(1: AsyncUndeleteTweetRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs the async portion of TweetService.update_possibly_sensitive_tweet. + * Only tweetypie itself can call this. + */ + void async_update_possibly_sensitive_tweet(1: AsyncUpdatePossiblySensitiveTweetRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs the async portion of TweetService.incr_tweet_fav_count. + * Only tweetypie itself can call this. + */ + void async_incr_fav_count(1: AsyncIncrFavCountRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs the async portion of TweetService.incr_tweet_bookmark_count. + * Only tweetypie itself can call this. + */ + void async_incr_bookmark_count(1: AsyncIncrBookmarkCountRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs the async portion of TweetService.set_additional_fields. + * Only tweetypie itself can call this. + */ + void async_set_additional_fields(1: AsyncSetAdditionalFieldsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs the async portion of TweetServiceInternal.set_retweet_visibility. + * Only tweetypie itself can call this. + */ + void async_set_retweet_visibility(1: AsyncSetRetweetVisibilityRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Set whether the specified retweet ID should be included in its source tweet's retweet count. + * This endpoint is invoked from a tweetypie-daemon to adjust retweet counts for all tweets a + * suspended or fraudulent (e.g. ROPO-'d) user has retweeted to disincentivize their false engagement. + */ + void set_retweet_visibility(1: SetRetweetVisibilityRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs the async portion of TweetService.delete_additional_fields. + * Only tweetypie itself can call this. + */ + void async_delete_additional_fields(1: AsyncDeleteAdditionalFieldsRequest field_delete) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs the async portion of TweetService.takedown. + * Only tweetypie itself can call this. + */ + void async_takedown(1: AsyncTakedownRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Update the tweet's takedown fields when a user is taken down. + * Only tweetypie's UserTakedownChange daemon can call this. + */ + void set_tweet_user_takedown(1: SetTweetUserTakedownRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error, + 3: DataError data_error) + + /** + * Cascade delete tweet is the logic for removing tweets that are detached + * from their dependency which has been deleted. They are already filtered + * out from serving, so this operation reconciles storage with the view + * presented by Tweetypie. + * This RPC call is delegated from daemons or batch jobs. Currently there + * are two use-cases when this call is issued: + * * Deleting detached retweets after the source tweet was deleted. + * This is done through RetweetsDeletion daemon and the + * CleanupDetachedRetweets job. + * * Deleting edits of an initial tweet that has been deleted. + * This is done by CascadedEditedTweetDelete daemon. + * Note that, when serving the original delete request for an edit, + * the initial tweet is only deleted, which makes all edits hidden. + */ + void cascaded_delete_tweet(1: CascadedDeleteTweetRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Update the timestamp of the user's most recent request to delete + * location data on their tweets. This does not actually remove the + * geo information from the user's tweets, but it will prevent the geo + * information for this user's tweets from being returned by + * Tweetypie. + */ + void scrub_geo_update_user_timestamp(1: delete_location_data.DeleteLocationData request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Look up tweets quoting a tweet that has been deleted and enqueue a compliance event. + * Only tweetypie's QuotedTweetDelete daemon can call this. + **/ + void quoted_tweet_delete(1: QuotedTweetDeleteRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Look up tweets quoting a tweet that has been taken down and enqueue a compliance event. + * Only tweetypie's QuotedTweetTakedown daemon can call this. + **/ + void quoted_tweet_takedown(1: QuotedTweetTakedownRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates TweetService.get_tweet_counts from another cluster. + */ + void replicated_get_tweet_counts(1: tweet_service.GetTweetCountsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates TweetService.get_tweet_fields from another cluster. + */ + void replicated_get_tweet_fields(1: tweet_service.GetTweetFieldsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates TweetService.get_tweets from another cluster. + */ + void replicated_get_tweets(1: tweet_service.GetTweetsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetService.post_tweet InsertTweet event from another cluster. + * Note: v1 version of this endpoint previously just took a Tweet which is why it was replaced + */ + void replicated_insert_tweet2(1: ReplicatedInsertTweet2Request request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetService.delete_tweets DeleteTweet event from another cluster. + */ + void replicated_delete_tweet2(1: ReplicatedDeleteTweet2Request request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetService.incr_tweet_fav_count event from another cluster. + */ + void replicated_incr_fav_count(1: i64 tweet_id, 2: i32 delta) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetService.incr_tweet_bookmark_count event from another cluster. + */ + void replicated_incr_bookmark_count(1: i64 tweet_id, 2: i32 delta) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetServiceInternal.set_retweet_visibility event from another cluster. + */ + void replicated_set_retweet_visibility(1: ReplicatedSetRetweetVisibilityRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetService.scrub_geo from another cluster. + */ + void replicated_scrub_geo(1: list tweet_ids) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetService.set_additional_fields event from another cluster. + */ + void replicated_set_additional_fields( + 1: tweet_service.SetAdditionalFieldsRequest request + ) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetService.delete_additional_fields event from another cluster. + */ + void replicated_delete_additional_fields( + 1: ReplicatedDeleteAdditionalFieldsRequest request + ) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetService.undelete_tweet event from another cluster. + * Note: v1 version of this endpoint previously just took a Tweet which is why it was replaced + */ + void replicated_undelete_tweet2(1: ReplicatedUndeleteTweet2Request request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetService.takedown event from another cluster. + */ + void replicated_takedown(1: tweet.Tweet tweet) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetService.update_possibly_sensitive_tweet event from another cluster. + */ + void replicated_update_possibly_sensitive_tweet(1: tweet.Tweet tweet) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Fetches hydrated Tweets and some metadata irrespective of the Tweets' state. + */ + list get_stored_tweets(1: GetStoredTweetsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Fetches hydrated Tweets and some metadata for a particular user, irrespective of the Tweets' + * state. + */ + GetStoredTweetsByUserResult get_stored_tweets_by_user(1: GetStoredTweetsByUserRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) +} diff --git a/tweetypie/servo/README.md b/tweetypie/servo/README.md new file mode 100644 index 000000000..ff9d1e89d --- /dev/null +++ b/tweetypie/servo/README.md @@ -0,0 +1,3 @@ +# Servo + +Servo is a collection of classes and patterns for building services in Scala. It's a grab-bag of code that was deemed useful for service development. diff --git a/tweetypie/servo/decider/BUILD b/tweetypie/servo/decider/BUILD new file mode 100644 index 000000000..2da29494b --- /dev/null +++ b/tweetypie/servo/decider/BUILD @@ -0,0 +1,5 @@ +target( + dependencies = [ + "tweetypie/servo/decider/src/main/scala", + ], +) diff --git a/tweetypie/servo/decider/src/main/scala/BUILD b/tweetypie/servo/decider/src/main/scala/BUILD new file mode 100644 index 000000000..846ac3eb2 --- /dev/null +++ b/tweetypie/servo/decider/src/main/scala/BUILD @@ -0,0 +1,18 @@ +scala_library( + sources = ["**/*.scala"], + platform = "java8", + provides = scala_artifact( + org = "com.twitter", + name = "servo-decider", + repo = artifactory, + ), + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "decider", + "finagle/finagle-core/src/main", + "tweetypie/servo/util", + "twitter-server-internal", + "twitter-server/server/src/main/scala", + ], +) diff --git a/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderGateBuilder.scala b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderGateBuilder.scala new file mode 100644 index 000000000..e147ad2fe --- /dev/null +++ b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderGateBuilder.scala @@ -0,0 +1,41 @@ +package com.twitter.servo.decider + +import com.twitter.decider.{Decider, Feature} +import com.twitter.servo.util.Gate +import com.twitter.servo.gate.DeciderGate + +/** + * Convenience syntax for creating decider gates + */ +class DeciderGateBuilder(decider: Decider) { + + /** + * idGate should be used when the result of the gate needs to be consistent between repeated + * invocations, with the condition that consistency is dependent up on passing identical + * parameter between the invocations. + */ + def idGate(key: DeciderKeyName): Gate[Long] = + DeciderGate.byId(keyToFeature(key)) + + /** + * linearGate should be used when the probability of the gate returning true needs to + * increase linearly with the availability of feature. + */ + def linearGate(key: DeciderKeyName): Gate[Unit] = + DeciderGate.linear(keyToFeature(key)) + + /** + * typedLinearGate is a linearGate that conforms to the gate of the specified type. + */ + def typedLinearGate[T](key: DeciderKeyName): Gate[T] = + linearGate(key).contramap[T] { _ => () } + + /** + * expGate should be used when the probability of the gate returning true needs to + * increase exponentially with the availability of feature. + */ + def expGate(key: DeciderKeyName, exponent: Int): Gate[Unit] = + DeciderGate.exp(keyToFeature(key), exponent) + + def keyToFeature(key: DeciderKeyName): Feature = decider.feature(key.toString) +} diff --git a/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderKeyEnum.scala b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderKeyEnum.scala new file mode 100644 index 000000000..8f9e17dce --- /dev/null +++ b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderKeyEnum.scala @@ -0,0 +1,3 @@ +package com.twitter.servo.decider + +trait DeciderKeyEnum extends Enumeration diff --git a/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/package.scala b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/package.scala new file mode 100644 index 000000000..86aa734cb --- /dev/null +++ b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/package.scala @@ -0,0 +1,5 @@ +package com.twitter.servo + +package object decider { + type DeciderKeyName = DeciderKeyEnum#Value +} diff --git a/tweetypie/servo/decider/src/main/scala/com/twitter/servo/gate/DeciderGate.scala b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/gate/DeciderGate.scala new file mode 100644 index 000000000..42874e20d --- /dev/null +++ b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/gate/DeciderGate.scala @@ -0,0 +1,34 @@ +package com.twitter.servo.gate + +import com.twitter.decider +import com.twitter.servo.util.Gate +import scala.annotation.tailrec + +object DeciderGate { + + /** + * Create a Gate[Unit] with a probability of returning true + * that increases linearly with the availability of feature. + */ + def linear(feature: decider.Feature): Gate[Unit] = + Gate(_ => feature.isAvailable, "DeciderGate.linear(%s)".format(feature)) + + /** + * Create a Gate[Unit] with a probability of returning true + * that increases exponentially with the availability of feature. + */ + def exp(feature: decider.Feature, exponent: Int): Gate[Unit] = { + val gate = if (exponent >= 0) linear(feature) else !linear(feature) + + @tailrec + def go(exp: Int): Boolean = if (exp == 0) true else (gate() && go(exp - 1)) + + Gate(_ => go(math.abs(exponent)), "DeciderGate.exp(%s, %s)".format(feature, exponent)) + } + + /** + * Create a Gate[Long] that returns true if the given feature is available for an id. + */ + def byId(feature: decider.Feature): Gate[Long] = + Gate(id => feature.isAvailable(id), "DeciderGate.byId(%s)".format(feature)) +} diff --git a/tweetypie/servo/json/BUILD b/tweetypie/servo/json/BUILD new file mode 100644 index 000000000..9f49967ba --- /dev/null +++ b/tweetypie/servo/json/BUILD @@ -0,0 +1,5 @@ +target( + dependencies = [ + "tweetypie/servo/json/src/main/scala/com/twitter/servo/json", + ], +) diff --git a/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/BUILD b/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/BUILD new file mode 100644 index 000000000..c641f0626 --- /dev/null +++ b/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/BUILD @@ -0,0 +1,21 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + provides = scala_artifact( + org = "com.twitter", + name = "servo-json", + repo = artifactory, + ), + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "//:scala-reflect", + "3rdparty/jvm/com/fasterxml/jackson/core:jackson-core", + "3rdparty/jvm/com/fasterxml/jackson/core:jackson-databind", + "3rdparty/jvm/com/googlecode/java-diff-utils:diffutils", + "3rdparty/jvm/org/apache/thrift:libthrift", + "scrooge/scrooge-core", + "scrooge/scrooge-serializer", + "util/util-codec/src/main/scala", + ], +) diff --git a/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/ThriftJsonInspector.scala b/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/ThriftJsonInspector.scala new file mode 100644 index 000000000..cb9e65ee8 --- /dev/null +++ b/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/ThriftJsonInspector.scala @@ -0,0 +1,142 @@ +package com.twitter.servo.json + +import com.fasterxml.jackson.core.JsonParser +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.ObjectMapper +import com.twitter.scrooge.ThriftStruct +import com.twitter.scrooge.ThriftStructCodec +import com.twitter.scrooge.ThriftStructSerializer +import difflib.DiffUtils +import java.io.StringWriter +import org.apache.thrift.protocol.TField +import org.apache.thrift.protocol.TProtocol +import org.apache.thrift.protocol.TProtocolFactory +import org.apache.thrift.protocol.TSimpleJSONProtocol +import org.apache.thrift.transport.TTransport +import scala.collection.JavaConverters._ +import scala.language.experimental.macros +import scala.reflect.macros.blackbox.Context + +object ThriftJsonInspector { + private val mapper = new ObjectMapper() + mapper.configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, true) + private val factory = mapper.getFactory() + + private def mkSerializer[T <: ThriftStruct](_codec: ThriftStructCodec[T]) = + new ThriftStructSerializer[T] { + def codec = _codec + + def protocolFactory = + // Identical to TSimpleJSONProtocol.Factory except the TProtocol + // returned serializes Thrift pass-through fields with the name + // "(TField.id)" instead of empty string. + new TProtocolFactory { + def getProtocol(trans: TTransport): TProtocol = + new TSimpleJSONProtocol(trans) { + override def writeFieldBegin(field: TField): Unit = + writeString(if (field.name.isEmpty) s"(${field.id})" else field.name) + } + } + } + + def apply[T <: ThriftStruct](codec: ThriftStructCodec[T]) = new ThriftJsonInspector(codec) +} + +/** + * Helper for human inspection of Thrift objects. + */ +class ThriftJsonInspector[T <: ThriftStruct](codec: ThriftStructCodec[T]) { + import ThriftJsonInspector._ + + private[this] val serializer = mkSerializer(codec) + + /** + * Convert the Thrift object to a JSON representation based on this + * object's codec, in the manner of TSimpleJSONProtocol. The resulting + * JSON will have human-readable field names that match the field + * names that were used in the Thrift definition that the codec was + * created from, but the conversion is lossy, and the JSON + * representation cannot be converted back. + */ + def toSimpleJson(t: T): JsonNode = + mapper.readTree(factory.createParser(serializer.toBytes(t))) + + /** + * Selects requested fields (matching against the JSON fields) from a + * Thrift-generated class. + * + * Paths are specified as slash-separated strings (e.g., + * "key1/key2/key3"). If the path specifies an array or object, it is + * included in the output in JSON format, otherwise the simple value is + * converted to a string. + */ + def select(item: T, paths: Seq[String]): Seq[String] = { + val jsonNode = toSimpleJson(item) + paths.map { + _.split("/").foldLeft(jsonNode)(_.findPath(_)) match { + case node if node.isMissingNode => "[invalid-path]" + case node if node.isContainerNode => node.toString + case node => node.asText + } + } + } + + /** + * Convert the given Thrift struct to a human-readable pretty-printed + * JSON representation. This JSON cannot be converted back into a + * struct. This output is intended for debug logging or interactive + * inspection of Thrift objects. + */ + def prettyPrint(t: T): String = print(t, true) + + def print(t: T, pretty: Boolean = false): String = { + val writer = new StringWriter() + val generator = factory.createGenerator(writer) + if (pretty) + generator.useDefaultPrettyPrinter() + generator.writeTree(toSimpleJson(t)) + writer.toString + } + + /** + * Produce a human-readable unified diff of the json pretty-printed + * representations of `a` and `b`. If the inputs have the same JSON + * representation, the result will be the empty string. + */ + def diff(a: T, b: T, contextLines: Int = 1): String = { + val linesA = prettyPrint(a).linesIterator.toList.asJava + val linesB = prettyPrint(b).linesIterator.toList.asJava + val patch = DiffUtils.diff(linesA, linesB) + DiffUtils.generateUnifiedDiff("a", "b", linesA, patch, contextLines).asScala.mkString("\n") + } +} + +object syntax { + private[this] object CompanionObjectLoader { + def load[T](c: Context)(implicit t: c.universe.WeakTypeTag[T]) = { + val tSym = t.tpe.typeSymbol + val companion = tSym.asClass.companion + if (companion == c.universe.NoSymbol) { + c.abort(c.enclosingPosition, s"${tSym} has no companion object") + } else { + c.universe.Ident(companion) + } + } + } + + /** + * Load the companion object of the named type parameter and require + * it to be a ThriftStructCodec. Compilation will fail if the + * companion object is not a ThriftStructCodec. + */ + implicit def thriftStructCodec[T <: ThriftStruct]: ThriftStructCodec[T] = + macro CompanionObjectLoader.load[T] + + implicit class ThriftJsonSyntax[T <: ThriftStruct](t: T)(implicit codec: ThriftStructCodec[T]) { + private[this] def inspector = ThriftJsonInspector(codec) + def toSimpleJson: JsonNode = inspector.toSimpleJson(t) + def prettyPrint: String = inspector.prettyPrint(t) + def diff(other: T, contextLines: Int = 1): String = + inspector.diff(t, other, contextLines) + } +} diff --git a/tweetypie/servo/repo/BUILD b/tweetypie/servo/repo/BUILD new file mode 100644 index 000000000..66618d7e0 --- /dev/null +++ b/tweetypie/servo/repo/BUILD @@ -0,0 +1,5 @@ +target( + dependencies = [ + "tweetypie/servo/repo/src/main/scala", + ], +) diff --git a/tweetypie/servo/repo/src/main/scala/BUILD b/tweetypie/servo/repo/src/main/scala/BUILD new file mode 100644 index 000000000..c50c57807 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/BUILD @@ -0,0 +1,29 @@ +scala_library( + sources = ["**/*.scala"], + platform = "java8", + provides = scala_artifact( + org = "com.twitter", + name = "servo-repo", + repo = artifactory, + ), + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/google/guava", + "3rdparty/jvm/com/google/inject:guice", + "3rdparty/jvm/org/apache/thrift:libthrift", + "finagle/finagle-core/src/main", + "finagle/finagle-memcached/src/main/scala", + "finagle/finagle-mux/src/main/scala", + "finagle/finagle-thrift", + "scrooge/scrooge-core", + "scrooge/scrooge-serializer/src/main/scala", + "tweetypie/servo/repo/src/main/thrift:thrift-scala", + "tweetypie/servo/util", + "util/util-codec/src/main/scala", + "util/util-hashing/src/main/scala", + "util/util-logging", + "util/util-security/src/main/scala/com/twitter/util/security", + "util/util-stats/src/main/scala", + ], +) diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ByteCountingMemcache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ByteCountingMemcache.scala new file mode 100644 index 000000000..6a00220ef --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ByteCountingMemcache.scala @@ -0,0 +1,183 @@ +package com.twitter.servo.cache + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.util.{Duration, Future} + +case class ByteCountingMemcacheFactory( + memcacheFactory: MemcacheFactory, + statsReceiver: StatsReceiver, + delimiter: String = constants.Colon, + checksumSize: Int = 8) // memcached checksums are u64s + extends MemcacheFactory { + + def apply() = + new ByteCountingMemcache(memcacheFactory(), statsReceiver, delimiter, checksumSize) +} + +/** + * A decorator around a Memcache that counts the rough number + * of bytes transferred, bucketed & rolled up by in/out, method name, + * and key prefix + */ +class ByteCountingMemcache( + underlying: Memcache, + statsReceiver: StatsReceiver, + delimiter: String, + checksumSize: Int) + extends Memcache { + val scopedReceiver = statsReceiver.scope("memcache").scope("bytes") + + val outStat = scopedReceiver.stat("out") + val outReceiver = scopedReceiver.scope("out") + + val inStat = scopedReceiver.stat("in") + val inReceiver = scopedReceiver.scope("in") + + val getOutStat = outReceiver.stat("get") + val getOutReceiver = outReceiver.scope("get") + + val getInStat = inReceiver.stat("get") + val getInReceiver = inReceiver.scope("get") + val getInHitsStat = getInReceiver.stat("hits") + val getInHitsReceiver = getInReceiver.scope("hits") + val getInMissesStat = getInReceiver.stat("misses") + val getInMissesReceiver = getInReceiver.scope("misses") + + val gwcOutStat = outReceiver.stat("get_with_checksum") + val gwcOutReceiver = outReceiver.scope("get_with_checksum") + + val gwcInStat = inReceiver.stat("get_with_checksum") + val gwcInReceiver = inReceiver.scope("get_with_checksum") + val gwcInHitsStat = gwcOutReceiver.stat("hits") + val gwcInHitsReceiver = gwcOutReceiver.scope("hits") + val gwcInMissesStat = gwcOutReceiver.stat("misses") + val gwcInMissesReceiver = gwcOutReceiver.scope("misses") + + val addStat = outReceiver.stat("add") + val addReceiver = outReceiver.scope("add") + + val setStat = outReceiver.stat("set") + val setReceiver = outReceiver.scope("set") + + val replaceStat = outReceiver.stat("replace") + val replaceReceiver = outReceiver.scope("replace") + + val casStat = outReceiver.stat("check_and_set") + val casReceiver = outReceiver.scope("check_and_set") + + def release() = underlying.release() + + // get namespace from key + protected[this] def ns(key: String) = { + val idx = math.min(key.size - 1, math.max(key.lastIndexOf(delimiter), 0)) + key.substring(0, idx).replaceAll(delimiter, "_") + } + + override def get(keys: Seq[String]): Future[KeyValueResult[String, Array[Byte]]] = { + keys foreach { key => + val size = key.size + outStat.add(size) + getOutStat.add(size) + getOutReceiver.stat(ns(key)).add(size) + } + underlying.get(keys) onSuccess { lr => + lr.found foreach { + case (key, bytes) => + val size = key.size + bytes.length + inStat.add(size) + getInStat.add(size) + getInHitsStat.add(size) + getInHitsReceiver.stat(ns(key)).add(size) + } + lr.notFound foreach { key => + val size = key.size + inStat.add(size) + getInStat.add(size) + getInMissesStat.add(size) + getInMissesReceiver.stat(ns(key)).add(size) + } + } + } + + override def getWithChecksum( + keys: Seq[String] + ): Future[CsKeyValueResult[String, Array[Byte]]] = { + keys foreach { key => + val size = key.size + outStat.add(size) + gwcOutStat.add(size) + gwcOutReceiver.stat(ns(key)).add(size) + } + underlying.getWithChecksum(keys) onSuccess { lr => + lr.found foreach { + case (key, (bytes, _)) => + val size = key.size + (bytes map { _.length } getOrElse (0)) + checksumSize + inStat.add(size) + gwcInStat.add(size) + gwcInHitsStat.add(size) + gwcInHitsReceiver.stat(ns(key)).add(size) + } + lr.notFound foreach { key => + val size = key.size + inStat.add(size) + gwcInStat.add(size) + gwcInMissesStat.add(size) + gwcInMissesReceiver.stat(ns(key)).add(size) + } + } + } + + override def add(key: String, value: Array[Byte], ttl: Duration): Future[Boolean] = { + val size = key.size + value.size + outStat.add(size) + addStat.add(size) + addReceiver.stat(ns(key)).add(size) + underlying.add(key, value, ttl) + } + + override def checkAndSet( + key: String, + value: Array[Byte], + checksum: Checksum, + ttl: Duration + ): Future[Boolean] = { + val size = key.size + value.size + checksumSize + outStat.add(size) + casStat.add(size) + casReceiver.stat(ns(key)).add(size) + underlying.checkAndSet(key, value, checksum, ttl) + } + + override def set(key: String, value: Array[Byte], ttl: Duration): Future[Unit] = { + val size = key.size + value.size + outStat.add(size) + setStat.add(size) + setReceiver.stat(ns(key)).add(size) + underlying.set(key, value, ttl) + } + + override def replace(key: String, value: Array[Byte], ttl: Duration): Future[Boolean] = { + val size = key.size + value.size + outStat.add(size) + replaceStat.add(size) + replaceReceiver.stat(ns(key)).add(size) + underlying.replace(key, value, ttl) + } + + override def delete(key: String): Future[Boolean] = { + outStat.add(key.size) + underlying.delete(key) + } + + override def incr(key: String, delta: Long = 1): Future[Option[Long]] = { + val size = key.size + 8 + outStat.add(size) + underlying.incr(key, delta) + } + + override def decr(key: String, delta: Long = 1): Future[Option[Long]] = { + val size = key.size + 8 + outStat.add(size) + underlying.decr(key, delta) + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cache.scala new file mode 100644 index 000000000..c23e6e462 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cache.scala @@ -0,0 +1,275 @@ +package com.twitter.servo.cache + +import com.google.common.cache.CacheBuilder +import com.twitter.finagle.memcached.util.NotFound +import com.twitter.servo.util.ThreadLocalStringBuilder +import com.twitter.util.{Duration, Future, Return} +import java.util.concurrent.TimeUnit +import scala.collection.mutable +import scala.collection.JavaConverters._ + +/** + * opaque trait used for getWithChecksum calls. + * the implementation should be private to the cache, + * to inhibit peeking + */ +trait Checksum extends Any + +object ScopedCacheKey { + private[ScopedCacheKey] val builder = new ThreadLocalStringBuilder(64) +} + +/** + * base class for cache keys needing scoping + * + * @param globalNamespace + * the project-level namespace + * @param cacheNamespace + * the cache-level namespace + * @param version + * the version of serialization for values + * @param scopes + * additional key scopes + */ +abstract class ScopedCacheKey( + globalNamespace: String, + cacheNamespace: String, + version: Int, + scopes: String*) { + import constants._ + + override lazy val toString = { + val builder = ScopedCacheKey + .builder() + .append(globalNamespace) + .append(Colon) + .append(cacheNamespace) + .append(Colon) + .append(version) + + scopes foreach { + builder.append(Colon).append(_) + } + + builder.toString + } +} + +/** + * Shared trait for reading from a cache + */ +trait ReadCache[K, V] { + def get(keys: Seq[K]): Future[KeyValueResult[K, V]] + + /** + * get the value with an opaque checksum that can be passed in + * a checkAndSet operation. If there is a deserialization error, + * the checksum is still returned + */ + def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, V]] + + /** + * release any underlying resources + */ + def release(): Unit +} + +/** + * allows one ReadCache to wrap another + */ +trait ReadCacheWrapper[K, V, This <: ReadCache[K, V]] extends ReadCache[K, V] { + def underlyingCache: This + + override def get(keys: Seq[K]) = underlyingCache.get(keys) + + override def getWithChecksum(keys: Seq[K]) = underlyingCache.getWithChecksum(keys) + + override def release() = underlyingCache.release() +} + +/** + * Simple trait for a cache supporting multi-get and single set + */ +trait Cache[K, V] extends ReadCache[K, V] { + def add(key: K, value: V): Future[Boolean] + + def checkAndSet(key: K, value: V, checksum: Checksum): Future[Boolean] + + def set(key: K, value: V): Future[Unit] + + def set(pairs: Seq[(K, V)]): Future[Unit] = { + Future.join { + pairs map { + case (key, value) => set(key, value) + } + } + } + + /** + * Replaces the value for an existing key. If the key doesn't exist, this has no effect. + * @return true if replaced, false if not found + */ + def replace(key: K, value: V): Future[Boolean] + + /** + * Deletes a value from cache. + * @return true if deleted, false if not found + */ + def delete(key: K): Future[Boolean] +} + +/** + * allows one cache to wrap another + */ +trait CacheWrapper[K, V] extends Cache[K, V] with ReadCacheWrapper[K, V, Cache[K, V]] { + override def add(key: K, value: V) = underlyingCache.add(key, value) + + override def checkAndSet(key: K, value: V, checksum: Checksum) = + underlyingCache.checkAndSet(key, value, checksum) + + override def set(key: K, value: V) = underlyingCache.set(key, value) + + override def replace(key: K, value: V) = underlyingCache.replace(key, value) + + override def delete(key: K) = underlyingCache.delete(key) +} + +/** + * Switch between two caches with a decider value + */ +class DeciderableCache[K, V](primary: Cache[K, V], secondary: Cache[K, V], isAvailable: => Boolean) + extends CacheWrapper[K, V] { + override def underlyingCache = if (isAvailable) primary else secondary +} + +private object MutableMapCache { + case class IntChecksum(i: Int) extends AnyVal with Checksum +} + +/** + * implementation of a Cache with a mutable.Map + */ +class MutableMapCache[K, V](underlying: mutable.Map[K, V]) extends Cache[K, V] { + import MutableMapCache.IntChecksum + + protected[this] def checksum(value: V): Checksum = IntChecksum(value.hashCode) + + override def get(keys: Seq[K]): Future[KeyValueResult[K, V]] = Future { + val founds = Map.newBuilder[K, V] + val iter = keys.iterator + while (iter.hasNext) { + val key = iter.next() + synchronized { + underlying.get(key) + } match { + case Some(v) => founds += key -> v + case None => + } + } + val found = founds.result() + val notFound = NotFound(keys, found.keySet) + KeyValueResult(found, notFound) + } + + override def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, V]] = Future { + val founds = Map.newBuilder[K, (Return[V], Checksum)] + val iter = keys.iterator + while (iter.hasNext) { + val key = iter.next() + synchronized { + underlying.get(key) + } match { + case Some(value) => founds += key -> (Return(value), checksum(value)) + case None => + } + } + val found = founds.result() + val notFound = NotFound(keys, found.keySet) + KeyValueResult(found, notFound) + } + + override def add(key: K, value: V): Future[Boolean] = + synchronized { + underlying.get(key) match { + case Some(_) => + Future.False + case None => + underlying += key -> value + Future.True + } + } + + override def checkAndSet(key: K, value: V, cs: Checksum): Future[Boolean] = + synchronized { + underlying.get(key) match { + case Some(current) => + if (checksum(current) == cs) { + // checksums match, set value + underlying += key -> value + Future.True + } else { + // checksums didn't match, so no set + Future.False + } + case None => + // if nothing there, the checksums can't be compared + Future.False + } + } + + override def set(key: K, value: V): Future[Unit] = { + synchronized { + underlying += key -> value + } + Future.Done + } + + override def replace(key: K, value: V): Future[Boolean] = synchronized { + if (underlying.contains(key)) { + underlying(key) = value + Future.True + } else { + Future.False + } + } + + override def delete(key: K): Future[Boolean] = synchronized { + if (underlying.remove(key).nonEmpty) Future.True else Future.False + } + + override def release(): Unit = synchronized { + underlying.clear() + } +} + +/** + * In-memory implementation of a cache with LRU semantics and a TTL. + */ +class ExpiringLruCache[K, V](ttl: Duration, maximumSize: Int) + extends MutableMapCache[K, V]( + // TODO: consider wiring the Cache interface directly to the + // Guava Cache, instead of introducing two layers of indirection + CacheBuilder.newBuilder + .asInstanceOf[CacheBuilder[K, V]] + .expireAfterWrite(ttl.inMilliseconds, TimeUnit.MILLISECONDS) + .initialCapacity(maximumSize) + .maximumSize(maximumSize) + .build[K, V]() + .asMap + .asScala + ) + +/** + * An empty cache that stays empty + */ +class NullCache[K, V] extends Cache[K, V] { + lazy val futureTrue = Future.value(true) + override def get(keys: Seq[K]) = Future.value(KeyValueResult(notFound = keys.toSet)) + override def getWithChecksum(keys: Seq[K]) = Future.value(KeyValueResult(notFound = keys.toSet)) + override def add(key: K, value: V) = futureTrue + override def checkAndSet(key: K, value: V, checksum: Checksum) = Future.value(true) + override def set(key: K, value: V) = Future.Done + override def replace(key: K, value: V) = futureTrue + override def delete(key: K) = futureTrue + override def release() = () +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CacheFactory.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CacheFactory.scala new file mode 100644 index 000000000..85359db1a --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CacheFactory.scala @@ -0,0 +1,153 @@ +package com.twitter.servo.cache + +import com.twitter.util.Duration +import scala.collection.mutable + +/** + * Used to produce differently-typed caches with the same configuration + * and potentially with shared observation. + */ +trait CacheFactory { + def apply[K, V](serializer: Serializer[V], scopes: String*): Cache[K, V] +} + +/** + * Builds an instance of NullCache. + */ +object NullCacheFactory extends CacheFactory { + val cache = new NullCache[Nothing, Nothing] + + override def apply[K, V](serializer: Serializer[V], scopes: String*): Cache[K, V] = + cache.asInstanceOf[NullCache[K, V]] +} + +/** + * Builds DeciderableCaches, which proxy to one of two caches built from the + * argument CacheFactories depending on a decider value. + */ +case class DeciderableCacheFactory( + primaryCacheFactory: CacheFactory, + secondaryCacheFactory: CacheFactory, + isAvailable: () => Boolean) + extends CacheFactory { + override def apply[K, V](serializer: Serializer[V], scopes: String*) = + new DeciderableCache( + primaryCacheFactory(serializer, scopes: _*), + secondaryCacheFactory(serializer, scopes: _*), + isAvailable() + ) +} + +/** + * Builds MigratingCaches, which support gradual migrations from one cache + * to another. See MigratingCache.scala for details. + */ +case class MigratingCacheFactory(cacheFactory: CacheFactory, darkCacheFactory: CacheFactory) + extends CacheFactory { + override def apply[K, V](serializer: Serializer[V], scopes: String*) = + new MigratingCache( + cacheFactory(serializer, scopes: _*), + darkCacheFactory(serializer, scopes: _*) + ) +} + +case class ObservableCacheFactory(cacheFactory: CacheFactory, cacheObserver: CacheObserver) + extends CacheFactory { + override def apply[K, V](serializer: Serializer[V], scopes: String*) = + new ObservableCache(cacheFactory(serializer), cacheObserver.scope(scopes: _*)) +} + +/** + * Builds in-memory caches with elements that never expire. + */ +case class MutableMapCacheFactory( + serialize: Boolean = false, + useSharedCache: Boolean = false, + keyTransformerFactory: KeyTransformerFactory = ToStringKeyTransformerFactory) + extends CacheFactory { + lazy val sharedCache = mkCache + + def mkCache = { + new MutableMapCache[Object, Object](new mutable.HashMap) + } + + override def apply[K, V](serializer: Serializer[V], scopes: String*) = { + val cache = if (useSharedCache) sharedCache else mkCache + if (serialize) { + new KeyValueTransformingCache( + cache.asInstanceOf[Cache[String, Array[Byte]]], + serializer, + keyTransformerFactory() + ) + } else { + cache.asInstanceOf[Cache[K, V]] + } + } +} + +/** + * Builds in-memory caches with TTL'd entries and LRU eviction policies. + */ +case class InProcessLruCacheFactory( + ttl: Duration, + lruSize: Int, + serialize: Boolean = false, + useSharedCache: Boolean = false, + keyTransformerFactory: KeyTransformerFactory = ToStringKeyTransformerFactory) + extends CacheFactory { + def mkCache = new ExpiringLruCache[Object, Object](ttl, lruSize) + lazy val sharedCache = mkCache + + override def apply[K, V](serializer: Serializer[V], scopes: String*) = { + val cache = if (useSharedCache) sharedCache else mkCache + if (serialize) { + new KeyValueTransformingCache( + cache.asInstanceOf[Cache[String, Array[Byte]]], + serializer, + keyTransformerFactory() + ) + } else { + cache.asInstanceOf[Cache[K, V]] + } + } +} + +/** + * Builds MemcacheCaches, which applies serialization, key-transformation, + * and TTL mechanics to an underlying Memcache. + */ +case class MemcacheCacheFactory( + memcache: Memcache, + ttl: Duration, + keyTransformerFactory: KeyTransformerFactory = ToStringKeyTransformerFactory) + extends CacheFactory { + override def apply[K, V](serializer: Serializer[V], scopes: String*) = + new MemcacheCache(memcache, ttl, serializer, keyTransformerFactory[K]()) +} + +/** + * Builds KeyTransformers, which are required for constructing + * KeyValueTransformingCaches. + */ +trait KeyTransformerFactory { + def apply[K](): KeyTransformer[K] +} + +/** + * Builds KeyTransformers by simply call the keys' toString methods. + */ +object ToStringKeyTransformerFactory extends KeyTransformerFactory { + def apply[K]() = new ToStringKeyTransformer[K]() +} + +/** + * Builds KeyTransformers that prefix all keys generated by an underlying + * transformer with a string. + */ +case class PrefixKeyTransformerFactory( + prefix: String, + delimiter: String = constants.Colon, + underlying: KeyTransformerFactory = ToStringKeyTransformerFactory) + extends KeyTransformerFactory { + def apply[K]() = new PrefixKeyTransformer[K](prefix, delimiter, underlying[K]()) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cached.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cached.scala new file mode 100644 index 000000000..9956cb515 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cached.scala @@ -0,0 +1,261 @@ +package com.twitter.servo.cache + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.servo.cache.thriftscala.CachedValueStatus.DoNotCache +import com.twitter.servo.util.{Gate, Transformer} +import com.twitter.util.{Duration, Return, Throw, Time} +import java.nio.ByteBuffer + +object Cached { + + private[this] val millisToTime: Long => Time = + ms => Time.fromMilliseconds(ms) + + private val timeToMills: Time => Long = + time => time.inMilliseconds + + /** + * Deserialize a CachedValue to a Cached[V] + * + * If the ByteBuffer contained in the `cachedValue` is backed by an `Array[Byte]` with its offset + * at 0, we will apply the serializer directly to the backing array for performance reasons. + * + * As such, the `Serializer[V]` the caller provides MUST NOT mutate the buffer it is given. + * This exhortation is also given in com.twitter.servo.util.Transformer, but repeated here. + */ + def apply[V](cachedValue: CachedValue, serializer: Serializer[V]): Cached[V] = { + val value: Option[V] = cachedValue.value match { + case Some(buf) if buf.hasArray && buf.arrayOffset() == 0 => + serializer.from(buf.array).toOption + case Some(buf) => + val array = new Array[Byte](buf.remaining) + buf.duplicate.get(array) + serializer.from(array).toOption + case None => None + } + val status = + if (cachedValue.value.nonEmpty && value.isEmpty) + CachedValueStatus.DeserializationFailed + else + cachedValue.status + + Cached( + value, + status, + Time.fromMilliseconds(cachedValue.cachedAtMsec), + cachedValue.readThroughAtMsec.map(millisToTime), + cachedValue.writtenThroughAtMsec.map(millisToTime), + cachedValue.doNotCacheUntilMsec.map(millisToTime), + cachedValue.softTtlStep + ) + } +} + +/** + * A simple metadata wrapper for cached values. This is stored in the cache + * using the [[com.twitter.servo.cache.thriftscala.CachedValue]] struct, which is similar, but + * untyped. + */ +case class Cached[V]( + value: Option[V], + status: CachedValueStatus, + cachedAt: Time, + readThroughAt: Option[Time] = None, + writtenThroughAt: Option[Time] = None, + doNotCacheUntil: Option[Time] = None, + softTtlStep: Option[Short] = None) { + + /** + * produce a new cached value with the same metadata + */ + def map[W](f: V => W): Cached[W] = copy(value = value.map(f)) + + /** + * serialize to a CachedValue + */ + def toCachedValue(serializer: Serializer[V]): CachedValue = { + var serializedValue: Option[ByteBuffer] = None + val cachedValueStatus = value match { + case Some(v) => + serializer.to(v) match { + case Return(sv) => + serializedValue = Some(ByteBuffer.wrap(sv)) + status + case Throw(_) => CachedValueStatus.SerializationFailed + } + case None => status + } + + CachedValue( + serializedValue, + cachedValueStatus, + cachedAt.inMilliseconds, + readThroughAt.map(Cached.timeToMills), + writtenThroughAt.map(Cached.timeToMills), + doNotCacheUntil.map(Cached.timeToMills), + softTtlStep + ) + } + + /** + * Resolves conflicts between a value being inserted into cache and a value already in cache by + * using the time a cached value was last updated. + * If the cached value has a writtenThroughAt, returns it. Otherwise returns readThroughAt, but + * if that doesn't exist, returns cachedAt. + * This makes it favor writes to reads in the event of a race condition. + */ + def effectiveUpdateTime[V](writtenThroughBuffer: Duration = 0.second): Time = { + this.writtenThroughAt match { + case Some(wta) => wta + writtenThroughBuffer + case None => + this.readThroughAt match { + case Some(rta) => rta + case None => this.cachedAt + } + } + } +} + +/** + * Switch between two cache pickers by providing deciderable gate + */ +class DeciderablePicker[V]( + primaryPicker: LockingCache.Picker[Cached[V]], + secondaryPicker: LockingCache.Picker[Cached[V]], + usePrimary: Gate[Unit], + statsReceiver: StatsReceiver) + extends LockingCache.Picker[Cached[V]] { + private[this] val stats = statsReceiver.scope("deciderable_picker") + private[this] val pickerScope = stats.scope("picker") + private[this] val primaryPickerCount = pickerScope.counter("primary") + private[this] val secondaryPickerCount = pickerScope.counter("secondary") + + private[this] val pickedScope = stats.scope("picked_values") + private[this] val pickedValuesMatched = pickedScope.counter("matched") + private[this] val pickedValuesMismatched = pickedScope.counter("mismatched") + + override def apply(newValue: Cached[V], oldValue: Cached[V]): Option[Cached[V]] = { + val secondaryPickerValue = secondaryPicker(newValue, oldValue) + + if (usePrimary()) { + val primaryPickerValue = primaryPicker(newValue, oldValue) + + primaryPickerCount.incr() + if (primaryPickerValue == secondaryPickerValue) pickedValuesMatched.incr() + else pickedValuesMismatched.incr() + + primaryPickerValue + } else { + secondaryPickerCount.incr() + secondaryPickerValue + } + } + + override def toString(): String = "DeciderablePicker" + +} + +/** + * It's similar to the PreferNewestCached picker, but it prefers written-through value + * over read-through as long as written-through value + writtenThroughExtra is + * newer than read-through value. Same as in PreferNewestCached, if values cached + * have the same cached method and time picker picks the new value. + * + * It intends to solve race condition when the read and write requests come at the + * same time, but write requests is getting cached first and then getting override with + * a stale value from the read request. + * + * If enabled gate is disabled, it falls back to PreferNewestCached logic. + * + */ +class PreferWrittenThroughCached[V]( + writtenThroughBuffer: Duration = 1.second) + extends PreferNewestCached[V] { + override def apply(newValue: Cached[V], oldValue: Cached[V]): Option[Cached[V]] = { + // the tie goes to newValue + if (oldValue.effectiveUpdateTime(writtenThroughBuffer) > newValue.effectiveUpdateTime( + writtenThroughBuffer)) + None + else + Some(newValue) + } + override def toString(): String = "PreferWrittenThroughCached" +} + +/** + * prefer one value over another based on Cached metadata + */ +class PreferNewestCached[V] extends LockingCache.Picker[Cached[V]] { + + override def apply(newValue: Cached[V], oldValue: Cached[V]): Option[Cached[V]] = { + if (oldValue.effectiveUpdateTime() > newValue.effectiveUpdateTime()) + None + else + Some(newValue) + } + + override def toString(): String = "PreferNewestCached" +} + +/** + * Prefer non-empty values. If a non-empty value is in cache, and the + * value to store is empty, return the non-empty value with a fresh cachedAt + * instead. + */ +class PreferNewestNonEmptyCached[V] extends PreferNewestCached[V] { + override def apply(newValue: Cached[V], oldValue: Cached[V]) = { + (newValue.value, oldValue.value) match { + // Some/Some and None/None cases are handled by the super class + case (Some(_), Some(_)) => super.apply(newValue, oldValue) + case (None, None) => super.apply(newValue, oldValue) + case (Some(_), None) => Some(newValue) + case (None, Some(_)) => Some(oldValue.copy(cachedAt = Time.now)) + } + } +} + +/** + * Prefer do not cache entries if they're not expired. Otherwise uses fallbackPicker + * @param fallBackPicker the picker to use when the oldvalue isn't do not cache or is expired. + * Defaults to PreferNewestCache. + */ +class PreferDoNotCache[V]( + fallBackPicker: LockingCache.Picker[Cached[V]] = new PreferNewestCached[V]: PreferNewestCached[V], + statsReceiver: StatsReceiver) + extends LockingCache.Picker[Cached[V]] { + private[this] val pickDoNotCacheEntryCounter = statsReceiver.counter("pick_do_not_cache_entry") + private[this] val useFallbackCounter = statsReceiver.counter("use_fallback") + override def apply(newValue: Cached[V], oldValue: Cached[V]): Option[Cached[V]] = { + if (oldValue.status == DoNotCache && oldValue.doNotCacheUntil.forall( + _ > newValue.effectiveUpdateTime())) { // evaluates to true if dnc until is None + pickDoNotCacheEntryCounter.incr() + None + } else { + useFallbackCounter.incr() + fallBackPicker.apply(newValue, oldValue) + } + } +} + +/** + * A Transformer of Cached values composed of a Transformer of the underlying values. + */ +class CachedTransformer[A, B](underlying: Transformer[A, B]) + extends Transformer[Cached[A], Cached[B]] { + def to(cachedA: Cached[A]) = cachedA.value match { + case None => Return(cachedA.copy(value = None)) + case Some(a) => + underlying.to(a) map { b => + cachedA.copy(value = Some(b)) + } + } + + def from(cachedB: Cached[B]) = cachedB.value match { + case None => Return(cachedB.copy(value = None)) + case Some(b) => + underlying.from(b) map { a => + cachedB.copy(value = Some(a)) + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterCache.scala new file mode 100644 index 000000000..5fa06185a --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterCache.scala @@ -0,0 +1,20 @@ +package com.twitter.servo.cache + +import com.twitter.util.{Duration, Future} + +trait CounterCache[K] extends Cache[K, Long] { + def incr(key: K, delta: Int = 1): Future[Option[Long]] + def decr(key: K, delta: Int = 1): Future[Option[Long]] +} + +class MemcacheCounterCache[K]( + memcache: Memcache, + ttl: Duration, + transformKey: KeyTransformer[K] = ((k: K) => k.toString): (K => java.lang.String)) + extends MemcacheCache[K, Long](memcache, ttl, CounterSerializer, transformKey) + with CounterCache[K] + +class NullCounterCache[K] extends NullCache[K, Long] with CounterCache[K] { + override def incr(key: K, delta: Int = 1): Future[Option[Long]] = Future.value(Some(0L)) + override def decr(key: K, delta: Int = 1): Future[Option[Long]] = Future.value(Some(0L)) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterSerializer.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterSerializer.scala new file mode 100644 index 000000000..4711cc2ef --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterSerializer.scala @@ -0,0 +1,114 @@ +package com.twitter.servo.cache + +import com.google.common.base.Charsets +import com.twitter.util.Try + +/** + * Fast implementation of dealing with memcached counters. + * + * Memcache is funkytown for incr and decr. Basically, you store a number, + * as a STRING, and then incr and decr that. This abstracts over that detail. + * + * This implementation was quite a bit faster than the simple implementation + * of `new String(bytes, Charsets.US_ASCII).toLong()` + * and `Long.toString(value).getBytes()` + * + * Thread-safe. + */ +object CounterSerializer extends Serializer[Long] { + private[this] val Minus = '-'.toByte + // The lower bound + private[this] val Zero = '0'.toByte + // The upper bound + private[this] val Nine = '9'.toByte + + // Max length for our byte arrays that'll fit all positive longs + private[this] val MaxByteArrayLength = 19 + + override def to(long: Long): Try[Array[Byte]] = Try { + // NOTE: code based on Long.toString(value), but it avoids creating the + // intermediate String object and the charset encoding in String.getBytes + // This was about 12% faster than calling Long.toString(long).getBytes + if (long == Long.MinValue) { + "-9223372036854775808".getBytes(Charsets.US_ASCII) + } else { + val size = if (long < 0) stringSize(-long) + 1 else stringSize(long) + val bytes = new Array[Byte](size) + + var isNegative = false + var endAt = 0 + var currentLong = if (long < 0) { + isNegative = true + endAt = 1 + -long + } else { + long + } + + // Note: look at the implementation in Long.getChars(long, int, char[]) + // They can do 2 digits at a time for this, so we could speed this up + // See: Division by Invariant Integers using Multiplication + // http://gmplib.org/~tege/divcnst-pldi94.pdf + + // starting at the least significant digit and working our way up... + var pos = size - 1 + do { + val byte = currentLong % 10 + bytes(pos) = (Zero + byte).toByte + currentLong /= 10 + pos -= 1 + } while (currentLong != 0) + + if (isNegative) { + assert(pos == 0, "For value " + long + ", pos " + pos) + bytes(0) = Minus + } + + bytes + } + } + + override def from(bytes: Array[Byte]): Try[Long] = Try { + // This implementation was about 4x faster than the simple: + // new String(bytes, Charsets.US_ASCII).toLong + + if (bytes.length < 1) + throw new NumberFormatException("Empty byte arrays are unsupported") + + val isNegative = bytes(0) == Minus + if (isNegative && bytes.length == 1) + throw new NumberFormatException(bytes.mkString(",")) + + // we count in negative numbers so we don't have problems at Long.MaxValue + var total = 0L + val endAt = bytes.length + var i = if (isNegative) 1 else 0 + while (i < endAt) { + val b = bytes(i) + if (b < Zero || b > Nine) + throw new NumberFormatException(bytes.mkString(",")) + + val int = b - Zero + total = (total * 10L) - int + + i += 1 + } + + if (isNegative) total else -total + } + + /** + * @param long must be non-negative + */ + private[this] def stringSize(long: Long): Int = { + var p = 10 + var i = 1 + while (i < MaxByteArrayLength) { + if (long < p) return i + p *= 10 + i += 1 + } + MaxByteArrayLength + } + +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/FinagleMemcache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/FinagleMemcache.scala new file mode 100644 index 000000000..0cd3153a7 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/FinagleMemcache.scala @@ -0,0 +1,149 @@ +package com.twitter.servo.cache + +import com.twitter.finagle.memcached.{CasResult, Client} +import com.twitter.finagle.service.RetryPolicy +import com.twitter.finagle.{Backoff, Memcached, TimeoutException, WriteException} +import com.twitter.hashing.KeyHasher +import com.twitter.io.Buf +import com.twitter.logging.Logger +import com.twitter.util._ + +case class MemcacheRetryPolicy( + writeExceptionBackoffs: Backoff, + timeoutBackoffs: Backoff) + extends RetryPolicy[Try[Nothing]] { + override def apply(r: Try[Nothing]) = r match { + case Throw(_: WriteException) => onWriteException + case Throw(_: TimeoutException) => onTimeoutException + case _ => None + } + + private[this] def onTimeoutException = consume(timeoutBackoffs.toStream) { tail => + copy(timeoutBackoffs = Backoff.fromStream(tail)) + } + + private[this] def onWriteException = consume(writeExceptionBackoffs.toStream) { tail => + copy(writeExceptionBackoffs = Backoff.fromStream(tail)) + } + + private[this] def consume(s: Stream[Duration])(f: Stream[Duration] => MemcacheRetryPolicy) = { + s.headOption map { duration => + (duration, f(s.tail)) + } + } +} + +object FinagleMemcacheFactory { + val DefaultHashName = "fnv1-32" + + def apply(client: Memcached.Client, dest: String, hashName: String = DefaultHashName) = + new FinagleMemcacheFactory(client, dest, hashName) +} + +class FinagleMemcacheFactory private[cache] ( + client: Memcached.Client, + dest: String, + hashName: String) + extends MemcacheFactory { + + def apply(): Memcache = { + val keyHasher = KeyHasher.byName(hashName) + new FinagleMemcache(client.withKeyHasher(keyHasher).newTwemcacheClient(dest), hashName) + } +} + +object FinagleMemcache { + val NoFlags = 0 + val logger = Logger(getClass) +} + +/** + * Adapter for a [[Memcache]] (type alias for [[TtlCache]]) from a Finagle Memcached + * [[Client]]. + */ +class FinagleMemcache(client: Client, hashName: String = FinagleMemcacheFactory.DefaultHashName) + extends Memcache { + + import FinagleMemcache.NoFlags + + private[this] case class BufferChecksum(buffer: Buf) extends Checksum + + def release(): Unit = { + client.close() + } + + override def get(keys: Seq[String]): Future[KeyValueResult[String, Array[Byte]]] = + client.getResult(keys).transform { + case Return(gr) => + val found = gr.hits.map { + case (key, v) => + val bytes = Buf.ByteArray.Owned.extract(v.value) + key -> bytes + } + Future.value(KeyValueResult(found, gr.misses, gr.failures)) + + case Throw(t) => + Future.value(KeyValueResult(failed = keys.map(_ -> t).toMap)) + } + + override def getWithChecksum(keys: Seq[String]): Future[CsKeyValueResult[String, Array[Byte]]] = + client.getsResult(keys).transform { + case Return(gr) => + try { + val hits = gr.hits map { + case (key, v) => + val bytes = Buf.ByteArray.Owned.extract(v.value) + key -> (Return(bytes), BufferChecksum( + v.casUnique.get + )) // TODO. what to do if missing? + } + Future.value(KeyValueResult(hits, gr.misses, gr.failures)) + } catch { + case t: Throwable => + Future.value(KeyValueResult(failed = keys.map(_ -> t).toMap)) + } + case Throw(t) => + Future.value(KeyValueResult(failed = keys.map(_ -> t).toMap)) + } + + private val jb2sb: java.lang.Boolean => Boolean = _.booleanValue + private val jl2sl: java.lang.Long => Long = _.longValue + + override def add(key: String, value: Array[Byte], ttl: Duration): Future[Boolean] = + client.add(key, NoFlags, ttl.fromNow, Buf.ByteArray.Owned(value)) map jb2sb + + override def checkAndSet( + key: String, + value: Array[Byte], + checksum: Checksum, + ttl: Duration + ): Future[Boolean] = { + checksum match { + case BufferChecksum(cs) => + client.checkAndSet(key, NoFlags, ttl.fromNow, Buf.ByteArray.Owned(value), cs) map { + res: CasResult => + res.replaced + } + case _ => + Future.exception(new IllegalArgumentException("unrecognized checksum: " + checksum)) + } + } + + override def set(key: String, value: Array[Byte], ttl: Duration): Future[Unit] = + client.set(key, NoFlags, ttl.fromNow, Buf.ByteArray.Owned(value)) + + override def replace(key: String, value: Array[Byte], ttl: Duration): Future[Boolean] = + client.replace(key, NoFlags, ttl.fromNow, Buf.ByteArray.Owned(value)) map jb2sb + + override def delete(key: String): Future[Boolean] = + client.delete(key) map jb2sb + + def incr(key: String, delta: Long = 1): Future[Option[Long]] = + client.incr(key, delta) map { _ map jl2sl } + + def decr(key: String, delta: Long = 1): Future[Option[Long]] = + client.decr(key, delta) map { _ map jl2sl } + + // NOTE: This is the only reason that hashName is passed as a param to FinagleMemcache. + override lazy val toString = "FinagleMemcache(%s)".format(hashName) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ForwardingCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ForwardingCache.scala new file mode 100644 index 000000000..86c7f495a --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ForwardingCache.scala @@ -0,0 +1,186 @@ +package com.twitter.servo.cache + +import com.twitter.util.{Future, Return} +import scala.collection.mutable + +/** + * uses a forwarding cache to lookup a value by a secondary index. + * filters out values for which the requested secondary index does not + * match the actual secondary index (these are treated as a miss) + */ +class ForwardingCache[K, F, V]( + forwardingCache: Cache[K, Cached[F]], + underlyingCache: SecondaryIndexingCache[F, _, V], + primaryKey: V => F, + secondaryKey: SecondaryIndexingCache.IndexMapping[K, V], + lockingCacheFactory: LockingCacheFactory) + extends LockingCache[K, Cached[V]] { + protected[this] case class ForwardingChecksum( + forwardingChecksum: Checksum, + underlyingChecksum: Option[Checksum]) + extends Checksum + + protected[this] val lockingUnderlying = lockingCacheFactory(underlyingCache) + protected[this] val lockingForwarding = lockingCacheFactory(forwardingCache) + + override def get(keys: Seq[K]): Future[KeyValueResult[K, Cached[V]]] = { + forwardingCache.get(keys) flatMap { flr => + val (tombstones, notTombstones) = { + val tombstones = mutable.Map.empty[K, Cached[F]] + val notTombstones = mutable.Map.empty[F, K] + // split results into tombstoned keys and non-tombstoned key/pKeys + // while we're at it, produce a reverse-keymap of non-tombstones + flr.found foreach { + case (key, cachedPKey) => + cachedPKey.value match { + case Some(pKey) => notTombstones += pKey -> key + case None => tombstones += key -> cachedPKey + } + } + (tombstones.toMap, notTombstones.toMap) + } + + // only make call to underlyingCache if there are keys to lookup + val fromUnderlying = if (notTombstones.isEmpty) { + KeyValueResult.emptyFuture + } else { + // get non-tombstoned values from underlying cache + underlyingCache.get(notTombstones.keys.toSeq) map { lr => + val (goodValues, badValues) = lr.found partition { + case (pKey, cachedValue) => + // filter out values that somehow don't match the primary key and secondary key + cachedValue.value match { + case Some(value) => + secondaryKey(value) match { + case Return(Some(sKey)) => + pKey == primaryKey(value) && sKey == notTombstones(pKey) + case _ => false + } + case None => true + } + } + val found = goodValues map { case (k, v) => notTombstones(k) -> v } + val notFound = (lr.notFound ++ badValues.keySet) map { notTombstones(_) } + val failed = lr.failed map { case (k, t) => notTombstones(k) -> t } + KeyValueResult(found, notFound, failed) + } handle { + case t => + KeyValueResult(failed = notTombstones.values map { _ -> t } toMap) + } + } + + fromUnderlying map { lr => + // fill in tombstone values, copying the metadata from the Cached[F] + val withTombstones = tombstones map { + case (key, cachedPKey) => + key -> cachedPKey.copy[V](value = None) + } + val found = lr.found ++ withTombstones + val notFound = flr.notFound ++ lr.notFound + val failed = flr.failed ++ lr.failed + KeyValueResult(found, notFound, failed) + } + } + } + + // since we implement lockAndSet directly, we don't support getWithChecksum and checkAndSet. + // we should consider changing the class hierarchy of Cache/LockingCache so that this can + // be checked at compile time. + + override def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, Cached[V]]] = + Future.exception(new UnsupportedOperationException("Use lockAndSet directly")) + + override def checkAndSet(key: K, cachedValue: Cached[V], checksum: Checksum): Future[Boolean] = + Future.exception(new UnsupportedOperationException("Use lockAndSet directly")) + + protected[this] def maybeAddForwardingIndex( + key: K, + cachedPrimaryKey: Cached[F], + wasAdded: Boolean + ): Future[Boolean] = { + if (wasAdded) + forwardingCache.set(key, cachedPrimaryKey) map { _ => + true + } + else + Future.value(false) + } + + override def add(key: K, cachedValue: Cached[V]): Future[Boolean] = { + // copy the cache metadata to the primaryKey + val cachedPrimaryKey = cachedValue map { primaryKey(_) } + cachedPrimaryKey.value match { + case Some(pKey) => + // if a value can be derived from the key, use the underlying cache to add it + // the underlying cache will create the secondary index as a side-effect + underlyingCache.add(pKey, cachedValue) + case None => + // otherwise, we're just writing a tombstone, so we need to check if it exists + forwardingCache.add(key, cachedPrimaryKey) + } + } + + override def lockAndSet( + key: K, + handler: LockingCache.Handler[Cached[V]] + ): Future[Option[Cached[V]]] = { + handler(None) match { + case Some(cachedValue) => + cachedValue.value match { + case Some(value) => + // set on the underlying cache, and let it take care of adding + // the secondary index + val pKey = primaryKey(value) + lockingUnderlying.lockAndSet(pKey, handler) + case None => + // no underlying value to set, so just write the forwarding entry. + // secondaryIndexingCache doesn't lock for this set, so there's + // no point in our doing it. There's a slight risk of writing an + // errant tombstone in a race, but the only way to get around this + // would be to lock around *all* primary and secondary indexes, + // which could produce deadlocks, which is probably worse. + val cachedEmptyPKey = cachedValue.copy[F](value = None) + forwardingCache.set(key, cachedEmptyPKey) map { _ => + Some(cachedValue) + } + } + case None => + // nothing to do here + Future.value(None) + } + } + + override def set(key: K, cachedValue: Cached[V]): Future[Unit] = { + cachedValue.value match { + case Some(value) => + // set on the underlying cache, and let it take care of adding + // the secondary index + val pKey = primaryKey(value) + underlyingCache.set(pKey, cachedValue) + case None => + // no underlying value to set, so just write the forwarding entry + forwardingCache.set(key, cachedValue.copy[F](value = None)) + } + } + + override def replace(key: K, cachedValue: Cached[V]): Future[Boolean] = { + cachedValue.value match { + case Some(value) => + // replace in the underlying cache, and let it take care of adding the secondary index + val pKey = primaryKey(value) + underlyingCache.replace(pKey, cachedValue) + case None => + // no underlying value to set, so just write the forwarding entry + forwardingCache.replace(key, cachedValue.copy[F](value = None)) + } + } + + override def delete(key: K): Future[Boolean] = { + forwardingCache.delete(key) + } + + override def release(): Unit = { + forwardingCache.release() + underlyingCache.release() + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/HotKeyMemcacheClient.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/HotKeyMemcacheClient.scala new file mode 100644 index 000000000..af29080e4 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/HotKeyMemcacheClient.scala @@ -0,0 +1,109 @@ +package com.twitter.servo.cache + +import com.twitter.finagle.memcached.Client +import com.twitter.finagle.memcached.protocol.Value +import com.twitter.finagle.memcached.GetResult +import com.twitter.finagle.memcached.ProxyClient +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.tracing.Trace +import com.twitter.io.Buf +import com.twitter.logging.Logger +import com.twitter.util.Future +import scala.collection.breakOut + +object HotKeyCachingCache { + private[cache] val logger = Logger.get(getClass) +} + +/** + * Wrapper for a [[com.twitter.finagle.Memcached.Client]] that handles in-process caching for + * values flagged for promotion ("hot keys") by a twemcache backend. + * + * This is similar conceptually to + * [[com.twitter.servo.repository.HotKeyCachingKeyValueRepository]] but differs because + * HotKeyCachingKeyValueRepository detects hot keys in the client, which requires tuning and + * becomes less effective as the number of instances in the cluster grows. [[HotKeyMemcacheClient]] + * uses detection in the memcache server, which is centralized and has a better view of frequently + * accessed keys. This is a custom feature in twemcache, Twitter's memcache fork, that is not + * enabled by default. Consult with the cache team if you want to use it. + * + * Usage: + * {{{ + * new HotKeyMemcacheClient( + * underlyingCache = Memcached.client. ... .newRichClient(destination), + * inProcessCache = ExpiringLruInProcessCache(ttl = 10.seconds, maximumSize = 100), + * statsReceiver = statsReceiver.scope("inprocess") + * ) + * }}} + */ +class HotKeyMemcacheClient( + override val proxyClient: Client, + inProcessCache: InProcessCache[String, Value], + statsReceiver: StatsReceiver, + label: Option[String] = None) + extends ProxyClient { + import HotKeyCachingCache._ + + private val promotions = statsReceiver.counter("promotions") + private val hits = statsReceiver.counter("hits") + private val misses = statsReceiver.counter("misses") + + private def cacheIfPromoted(key: String, value: Value): Unit = { + if (value.flags.exists(MemcacheFlags.shouldPromote)) { + logger.debug(s"Promoting hot-key $key flagged by memcached backend to in-process cache.") + Trace.recordBinary("hot_key_cache.hot_key_promoted", s"${label.getOrElse("")},$key") + promotions.incr() + inProcessCache.set(key, value) + } + } + + override def getResult(keys: Iterable[String]): Future[GetResult] = { + val resultsFromInProcessCache: Map[String, Value] = + keys.flatMap(k => inProcessCache.get(k).map(v => (k, v)))(breakOut) + val foundInProcess = resultsFromInProcessCache.keySet + val newKeys = keys.filterNot(foundInProcess.contains) + + hits.incr(foundInProcess.size) + misses.incr(newKeys.size) + + if (foundInProcess.nonEmpty) { + // If there are hot keys found in the cache, record a trace annotation with the format: + // hot key cache client label;the number of hits;number of misses;and the set of hot keys found in the cache. + Trace.recordBinary( + "hot_key_cache", + s"${label.getOrElse("")};${foundInProcess.size};${newKeys.size};${foundInProcess.mkString(",")}" + ) + } + + proxyClient.getResult(newKeys).map { result => + result.hits.foreach { case (k, v) => cacheIfPromoted(k, v) } + result.copy(hits = result.hits ++ resultsFromInProcessCache) + } + } + + /** + * Exposes whether or not a key was promoted to the in-process hot key cache. In most cases, users + * of [[HotKeyMemcacheClient]] should not need to know this. However, they may if hot key caching + * conflicts with other layers of caching they are using. + */ + def isHotKey(key: String): Boolean = inProcessCache.get(key).isDefined +} + +// TOOD: May want to turn flags into a value class in com.twitter.finagle.memcached +// with methods for these operations +object MemcacheFlags { + val FrequencyBasedPromotion: Int = 1 + val BandwidthBasedPromotion: Int = 1 << 1 + val Promotable: Int = FrequencyBasedPromotion | BandwidthBasedPromotion + + /** + * Memcache flags are returned as an unsigned integer, represented as a decimal string. + * + * Check whether the bit in position 0 ([[FrequencyBasedPromotion]]) or the bit in position 1 + * ([[BandwidthBasedPromotion]]) is set to 1 (zero-index from least-significant bit). + */ + def shouldPromote(flagsBuf: Buf): Boolean = { + val flags = flagsBuf match { case Buf.Utf8(s) => s.toInt } + (flags & Promotable) != 0 + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/InProcessCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/InProcessCache.scala new file mode 100644 index 000000000..a47e0f7a1 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/InProcessCache.scala @@ -0,0 +1,63 @@ +package com.twitter.servo.cache + +import com.google.common.cache.{CacheBuilder, RemovalListener} +import com.twitter.util.Duration +import java.util.concurrent.TimeUnit + +object InProcessCache { + + /** + * Apply a read filter to exclude items in an InProcessCache + */ + def withFilter[K, V]( + underlying: InProcessCache[K, V] + )( + shouldFilter: (K, V) => Boolean + ): InProcessCache[K, V] = + new InProcessCache[K, V] { + def get(key: K): Option[V] = underlying.get(key) filterNot { shouldFilter(key, _) } + def set(key: K, value: V) = underlying.set(key, value) + } +} + +/** + * An in-process cache interface. It is distinct from a map in that: + * 1) All methods must be threadsafe + * 2) A value set in cache is not guaranteed to remain in the cache. + */ +trait InProcessCache[K, V] { + def get(key: K): Option[V] + def set(key: K, value: V): Unit +} + +/** + * In-process implementation of a cache with LRU semantics and a TTL. + */ +class ExpiringLruInProcessCache[K, V]( + ttl: Duration, + maximumSize: Int, + removalListener: Option[RemovalListener[K, V]] = None: None.type) + extends InProcessCache[K, V] { + + private[this] val cacheBuilder = + CacheBuilder.newBuilder + .asInstanceOf[CacheBuilder[K, V]] + .expireAfterWrite(ttl.inMilliseconds, TimeUnit.MILLISECONDS) + .initialCapacity(maximumSize) + .maximumSize(maximumSize) + + private[this] val cache = + removalListener match { + case Some(listener) => + cacheBuilder + .removalListener(listener) + .build[K, V]() + case None => + cacheBuilder + .build[K, V]() + } + + def get(key: K): Option[V] = Option(cache.getIfPresent(key)) + + def set(key: K, value: V): Unit = cache.put(key, value) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/IterableSerializer.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/IterableSerializer.scala new file mode 100644 index 000000000..0228b4a0f --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/IterableSerializer.scala @@ -0,0 +1,84 @@ +package com.twitter.servo.cache + +import com.twitter.util.{Throw, Return, Try} +import java.io.{DataOutputStream, ByteArrayOutputStream} +import java.nio.ByteBuffer +import scala.collection.mutable +import scala.util.control.NonFatal + +object IterableSerializer { + // Serialized format for version 0: + // Header: + // 1 byte - Version + // 4 byte - number of items + // Data, 1 per item: + // 4 bytes - item length in bytes (n) + // n bytes - item data + val FormatVersion = 0 +} + +/** + * A `Serializer` for `Iterable[T]`s. + * + * @param itemSerializer a Serializer for the individual elements. + * @param itemSizeEstimate estimated size in bytes of individual elements + */ +class IterableSerializer[T, C <: Iterable[T]]( + newBuilder: () => mutable.Builder[T, C], + itemSerializer: Serializer[T], + itemSizeEstimate: Int = 8) + extends Serializer[C] { + import IterableSerializer.FormatVersion + + if (itemSizeEstimate <= 0) { + throw new IllegalArgumentException( + "Item size estimate must be positive. Invalid estimate provided: " + itemSizeEstimate + ) + } + + override def to(iterable: C): Try[Array[Byte]] = Try { + assert(iterable.hasDefiniteSize, "Must have a definite size: %s".format(iterable)) + + val numItems = iterable.size + val baos = new ByteArrayOutputStream(1 + 4 + (numItems * (4 + itemSizeEstimate))) + val output = new DataOutputStream(baos) + + // Write serialization version format and set length. + output.writeByte(FormatVersion) + output.writeInt(numItems) + + iterable.foreach { item => + val itemBytes = itemSerializer.to(item).get() + output.writeInt(itemBytes.length) + output.write(itemBytes) + } + output.flush() + baos.toByteArray() + } + + override def from(bytes: Array[Byte]): Try[C] = { + try { + val buf = ByteBuffer.wrap(bytes) + val formatVersion = buf.get() + if (formatVersion < 0 || formatVersion > FormatVersion) { + Throw(new IllegalArgumentException("Invalid serialization format: " + formatVersion)) + } else { + val numItems = buf.getInt() + val builder = newBuilder() + builder.sizeHint(numItems) + + var i = 0 + while (i < numItems) { + val itemBytes = new Array[Byte](buf.getInt()) + buf.get(itemBytes) + val item = itemSerializer.from(itemBytes).get() + builder += item + i += 1 + } + Return(builder.result()) + } + } catch { + case NonFatal(e) => Throw(e) + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyFilteringCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyFilteringCache.scala new file mode 100644 index 000000000..8caea385a --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyFilteringCache.scala @@ -0,0 +1,51 @@ +package com.twitter.servo.cache + +import com.twitter.util.Future + +/** + * A cache wrapper that makes the underlying cache transparent to + * certain keys. + */ +class KeyFilteringCache[K, V](val underlyingCache: Cache[K, V], keyPredicate: K => Boolean) + extends CacheWrapper[K, V] { + override def get(keys: Seq[K]): Future[KeyValueResult[K, V]] = + underlyingCache.get(keys filter keyPredicate) + + override def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, V]] = + underlyingCache.getWithChecksum(keys filter keyPredicate) + + override def add(key: K, value: V) = + if (keyPredicate(key)) { + underlyingCache.add(key, value) + } else { + Future.True + } + + override def checkAndSet(key: K, value: V, checksum: Checksum) = + if (keyPredicate(key)) { + underlyingCache.checkAndSet(key, value, checksum) + } else { + Future.True + } + + override def set(key: K, value: V) = + if (keyPredicate(key)) { + underlyingCache.set(key, value) + } else { + Future.Done + } + + override def replace(key: K, value: V) = + if (keyPredicate(key)) { + underlyingCache.replace(key, value) + } else { + Future.True + } + + override def delete(key: K) = + if (keyPredicate(key)) { + underlyingCache.delete(key) + } else { + Future.True + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyTransformer.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyTransformer.scala new file mode 100644 index 000000000..fb7641b9e --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyTransformer.scala @@ -0,0 +1,21 @@ +package com.twitter.servo.cache + +/** + * Converts all keys to a string via .toString + */ +class ToStringKeyTransformer[K] extends KeyTransformer[K] { + override def apply(key: K) = key.toString +} + +/** + * Prefixes all keys with a string + */ +class PrefixKeyTransformer[K]( + prefix: String, + delimiter: String = constants.Colon, + underlying: KeyTransformer[K] = new ToStringKeyTransformer[K]: ToStringKeyTransformer[K]) + extends KeyTransformer[K] { + private[this] val fullPrefix = prefix + delimiter + + override def apply(key: K) = fullPrefix + underlying(key) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/LockingCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/LockingCache.scala new file mode 100644 index 000000000..caf990303 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/LockingCache.scala @@ -0,0 +1,486 @@ +package com.twitter.servo.cache + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.service.RetryPolicy +import com.twitter.finagle.partitioning.FailureAccrualException +import com.twitter.finagle.Backoff +import com.twitter.finagle.stats.{NullStatsReceiver, Stat, StatsReceiver} +import com.twitter.logging.{Level, Logger} +import com.twitter.servo.util.{ExceptionCounter, RateLimitingLogger} +import com.twitter.util._ +import scala.util.control.NoStackTrace + +object LockingCache { + + /** + * first argument is value to store, second argument is value in cache, + * returns an Option of the value to be stored. None should be interpreted + * as "don't store anything" + */ + type Picker[V] = (V, V) => Option[V] + + /** + * argument is value, if any, in cache. + * return type is value, if any, to be stored in cache. + * returning None means nothing will be done. + */ + type Handler[V] = Option[V] => Option[V] + + case class AlwaysSetHandler[V](value: Option[V]) extends Handler[V] { + override def apply(ignored: Option[V]) = value + } + + case class PickingHandler[V](newValue: V, pick: Picker[V]) extends Handler[V] { + override def apply(inCache: Option[V]): Option[V] = + inCache match { + case None => + // if nothing in cache, go ahead and store! + Some(newValue) + case Some(oldValue) => + // if something in cache, store a picked value based on + // what's in cache and what's being stored + pick(newValue, oldValue) + } + + // apparently case classes that extend functions don't get pretty toString methods + override lazy val toString = "PickingHandler(%s, %s)".format(newValue, pick) + } + + case class UpdateOnlyPickingHandler[V](newValue: V, pick: Picker[V]) extends Handler[V] { + override def apply(inCache: Option[V]): Option[V] = + inCache match { + case None => + // if nothing in cache, do not update + None + case Some(oldValue) => + // if something in cache, store a picked value based on + // what's in cache and what's being stored + pick(newValue, oldValue) + } + + // apparently case classes that extend functions don't get pretty toString methods + override lazy val toString = "UpdateOnlyPickingHandler(%s, %s)".format(newValue, pick) + } +} + +trait LockingCacheFactory { + def apply[K, V](cache: Cache[K, V]): LockingCache[K, V] + def scope(scopes: String*): LockingCacheFactory +} + +/** + * A cache that enforces a consistent view of values between the time when a set + * is initiated and when the value is actually updated in cache. + */ +trait LockingCache[K, V] extends Cache[K, V] { + + /** + * Look up a value and dispatch based on the result. The particular locking + * approach is defined by the implementing class. May call handler multiple + * times as part of more elaborate locking and retry looping. + * + * Overview of semantics: + * `handler(None)` is called if no value is present in cache. + * `handler(Some(value))` is called if a value is present. + * `handler(x)` should return None if nothing should be done and `Some(value)` + * if a value should be set. + * + * @return the value that was actually set + */ + def lockAndSet(key: K, handler: LockingCache.Handler[V]): Future[Option[V]] +} + +class OptimisticLockingCacheObserver(statsReceiver: StatsReceiver) { + import OptimisticLockingCache._ + + private[this] val scopedReceiver = statsReceiver.scope("locking_cache") + + private[this] val successCounter = scopedReceiver.counter("success") + private[this] val failureCounter = scopedReceiver.counter("failure") + private[this] val exceptionCounter = new ExceptionCounter(scopedReceiver) + private[this] val lockAndSetStat = scopedReceiver.stat("lockAndSet") + + def time[V](f: => Future[Option[V]]): Future[Option[V]] = { + Stat.timeFuture(lockAndSetStat) { + f + } + } + + def success(attempts: Seq[FailedAttempt]): Unit = { + successCounter.incr() + countAttempts(attempts) + } + + def failure(attempts: Seq[FailedAttempt]): Unit = { + failureCounter.incr() + countAttempts(attempts) + } + + def scope(s: String*): OptimisticLockingCacheObserver = + s.toList match { + case Nil => this + case head :: tail => + new OptimisticLockingCacheObserver(statsReceiver.scope(head)).scope(tail: _*) + } + + private[this] def countAttempts(attempts: Seq[FailedAttempt]): Unit = { + attempts foreach { attempt => + val name = attempt.getClass.getSimpleName + scopedReceiver.counter(name).incr() + attempt.maybeThrowable foreach { t => + exceptionCounter(t) + scopedReceiver.scope(name).counter(t.getClass.getName).incr() + } + } + } +} + +case class OptimisticLockingCacheFactory( + backoffs: Backoff, + observer: OptimisticLockingCacheObserver = new OptimisticLockingCacheObserver(NullStatsReceiver), + timer: Timer = new NullTimer, + // Enabling key logging may unintentionally cause inclusion of sensitive data + // in service logs and any accompanying log sinks such as Splunk. By default, this is disabled, + // however may be optionally enabled for the purpose of debugging. Caution is warranted. + enableKeyLogging: Boolean = false) + extends LockingCacheFactory { + def this( + backoffs: Backoff, + statsReceiver: StatsReceiver, + timer: Timer, + enableKeyLogging: Boolean + ) = this(backoffs, new OptimisticLockingCacheObserver(statsReceiver), timer, enableKeyLogging) + + override def apply[K, V](cache: Cache[K, V]): LockingCache[K, V] = { + new OptimisticLockingCache(cache, backoffs, observer, timer, enableKeyLogging) + } + + override def scope(scopes: String*): LockingCacheFactory = { + new OptimisticLockingCacheFactory(backoffs, observer.scope(scopes: _*), timer) + } +} + +object OptimisticLockingCache { + private[this] val FutureNone = Future.value(None) + + def emptyFutureNone[V] = FutureNone.asInstanceOf[Future[Option[V]]] + + sealed abstract class FailedAttempt(val maybeThrowable: Option[Throwable]) + extends Exception + with NoStackTrace + case class GetWithChecksumException(t: Throwable) extends FailedAttempt(Some(t)) + case object GetWithChecksumEmpty extends FailedAttempt(None) + case object CheckAndSetFailed extends FailedAttempt(None) + case class CheckAndSetException(t: Throwable) extends FailedAttempt(Some(t)) + case class AddException(t: Throwable) extends FailedAttempt(Some(t)) + + case class LockAndSetFailure(str: String, attempts: Seq[FailedAttempt]) + extends Exception( + str, + // if the last exception was an RPC exception, try to recover the stack trace + attempts.lastOption.flatMap(_.maybeThrowable).orNull + ) + + private def retryPolicy(backoffs: Backoff): RetryPolicy[Try[Nothing]] = + RetryPolicy.backoff(backoffs) { + case Throw(_: FailureAccrualException) => false + case _ => true + } +} + +/** + * Implementation of a LockingCache using add/getWithChecksum/checkAndSet. + */ +class OptimisticLockingCache[K, V]( + override val underlyingCache: Cache[K, V], + retryPolicy: RetryPolicy[Try[Nothing]], + observer: OptimisticLockingCacheObserver, + timer: Timer, + enableKeyLogging: Boolean) + extends LockingCache[K, V] + with CacheWrapper[K, V] { + import LockingCache._ + import OptimisticLockingCache._ + + def this( + underlyingCache: Cache[K, V], + retryPolicy: RetryPolicy[Try[Nothing]], + observer: OptimisticLockingCacheObserver, + timer: Timer, + ) = + this( + underlyingCache: Cache[K, V], + retryPolicy: RetryPolicy[Try[Nothing]], + observer: OptimisticLockingCacheObserver, + timer: Timer, + false + ) + + def this( + underlyingCache: Cache[K, V], + backoffs: Backoff, + observer: OptimisticLockingCacheObserver, + timer: Timer + ) = + this( + underlyingCache, + OptimisticLockingCache.retryPolicy(backoffs), + observer, + timer, + false + ) + + def this( + underlyingCache: Cache[K, V], + backoffs: Backoff, + observer: OptimisticLockingCacheObserver, + timer: Timer, + enableKeyLogging: Boolean + ) = + this( + underlyingCache, + OptimisticLockingCache.retryPolicy(backoffs), + observer, + timer, + enableKeyLogging + ) + + private[this] val log = Logger.get("OptimisticLockingCache") + private[this] val rateLimitedLogger = new RateLimitingLogger(logger = log) + + @deprecated("use RetryPolicy-based constructor", "0.1.2") + def this(underlyingCache: Cache[K, V], maxTries: Int = 10, enableKeyLogging: Boolean) = { + this( + underlyingCache, + Backoff.const(0.milliseconds).take(maxTries), + new OptimisticLockingCacheObserver(NullStatsReceiver), + new NullTimer, + enableKeyLogging + ) + } + + override def lockAndSet(key: K, handler: Handler[V]): Future[Option[V]] = { + observer.time { + dispatch(key, handler, retryPolicy, Nil) + } + } + + /** + * @param key + * The key to look up in cache + * @param handler + * The handler that is applied to values from cache + * @param retryPolicy + * Used to determine if more attempts should be made. + * @param attempts + * Contains representations of the causes of previous dispatch failures + */ + protected[this] def retry( + key: K, + failure: Try[Nothing], + handler: Handler[V], + retryPolicy: RetryPolicy[Try[Nothing]], + attempts: Seq[FailedAttempt] + ): Future[Option[V]] = + retryPolicy(failure) match { + case None => + observer.failure(attempts) + if (enableKeyLogging) { + rateLimitedLogger.log( + s"failed attempts for ${key}:\n ${attempts.mkString("\n ")}", + level = Level.INFO) + Future.exception(LockAndSetFailure("lockAndSet failed for " + key, attempts)) + } else { + Future.exception(LockAndSetFailure("lockAndSet failed", attempts)) + } + + case Some((backoff, tailPolicy)) => + timer + .doLater(backoff) { + dispatch(key, handler, tailPolicy, attempts) + } + .flatten + } + + /** + * @param key + * The key to look up in cache + * @param handler + * The handler that is applied to values from cache + * @param retryPolicy + * Used to determine if more attempts should be made. + * @param attempts + * Contains representations of the causes of previous dispatch failures + */ + protected[this] def dispatch( + key: K, + handler: Handler[V], + retryPolicy: RetryPolicy[Try[Nothing]], + attempts: Seq[FailedAttempt] + ): Future[Option[V]] = { + // get the value if nothing's there + handler(None) match { + case None => + // if nothing should be done when missing, go straight to getAndConditionallySet, + // since there's nothing to attempt an add with + getAndConditionallySet(key, handler, retryPolicy, attempts) + + case some @ Some(value) => + // otherwise, try to do an atomic add, which will return false if something's there + underlyingCache.add(key, value) transform { + case Return(added) => + if (added) { + // if added, return the value + observer.success(attempts) + Future.value(some) + } else { + // otherwise, do a checkAndSet based on the current value + getAndConditionallySet(key, handler, retryPolicy, attempts) + } + + case Throw(t) => + // count exception against retries + if (enableKeyLogging) + rateLimitedLogger.logThrowable(t, s"add($key) returned exception. will retry") + retry(key, Throw(t), handler, retryPolicy, attempts :+ AddException(t)) + } + } + } + + /** + * @param key + * The key to look up in cache + * @param handler + * The handler that is applied to values from cache + * @param retryPolicy + * Used to determine if more attempts should be made. + * @param attempts + * Contains representations of the causes of previous dispatch failures + */ + protected[this] def getAndConditionallySet( + key: K, + handler: Handler[V], + retryPolicy: RetryPolicy[Try[Nothing]], + attempts: Seq[FailedAttempt] + ): Future[Option[V]] = { + // look in the cache to see what's there + underlyingCache.getWithChecksum(Seq(key)) handle { + case t => + // treat global failure as key-based failure + KeyValueResult(failed = Map(key -> t)) + } flatMap { lr => + lr(key) match { + case Return.None => + handler(None) match { + case Some(_) => + // if there's nothing in the cache now, but handler(None) return Some, + // that means something has changed since we attempted the add, so try again + val failure = GetWithChecksumEmpty + retry(key, Throw(failure), handler, retryPolicy, attempts :+ failure) + + case None => + // if there's nothing in the cache now, but handler(None) returns None, + // that means we don't want to store anything when there's nothing already + // in cache, so return None + observer.success(attempts) + emptyFutureNone + } + + case Return(Some((Return(current), checksum))) => + // the cache entry is present + dispatchCheckAndSet(Some(current), checksum, key, handler, retryPolicy, attempts) + + case Return(Some((Throw(t), checksum))) => + // the cache entry failed to deserialize; treat it as a None and overwrite. + if (enableKeyLogging) + rateLimitedLogger.logThrowable( + t, + s"getWithChecksum(${key}) returned a bad value. overwriting.") + dispatchCheckAndSet(None, checksum, key, handler, retryPolicy, attempts) + + case Throw(t) => + // lookup failure counts against numTries + if (enableKeyLogging) + rateLimitedLogger.logThrowable( + t, + s"getWithChecksum(${key}) returned exception. will retry.") + retry(key, Throw(t), handler, retryPolicy, attempts :+ GetWithChecksumException(t)) + } + } + } + + /** + * @param current + * The value currently cached under key `key`, if any + * @param checksum + * The checksum of the currently-cached value + * @param key + * The key mapping to `current` + * @param handler + * The handler that is applied to values from cache + * @param retryPolicy + * Used to determine if more attempts should be made. + * @param attempts + * Contains representations of the causes of previous dispatch failures + */ + protected[this] def dispatchCheckAndSet( + current: Option[V], + checksum: Checksum, + key: K, + handler: Handler[V], + retryPolicy: RetryPolicy[Try[Nothing]], + attempts: Seq[FailedAttempt] + ): Future[Option[V]] = { + handler(current) match { + case None => + // if nothing should be done based on the current value, don't do anything + observer.success(attempts) + emptyFutureNone + + case some @ Some(value) => + // otherwise, try a check and set with the checksum + underlyingCache.checkAndSet(key, value, checksum) transform { + case Return(added) => + if (added) { + // if added, return the value + observer.success(attempts) + Future.value(some) + } else { + // otherwise, something has changed, try again + val failure = CheckAndSetFailed + retry(key, Throw(failure), handler, retryPolicy, attempts :+ failure) + } + + case Throw(t) => + // count exception against retries + if (enableKeyLogging) + rateLimitedLogger.logThrowable( + t, + s"checkAndSet(${key}) returned exception. will retry.") + retry(key, Throw(t), handler, retryPolicy, attempts :+ CheckAndSetException(t)) + } + } + } +} + +object NonLockingCacheFactory extends LockingCacheFactory { + override def apply[K, V](cache: Cache[K, V]): LockingCache[K, V] = new NonLockingCache(cache) + override def scope(scopes: String*) = this +} + +class NonLockingCache[K, V](override val underlyingCache: Cache[K, V]) + extends LockingCache[K, V] + with CacheWrapper[K, V] { + override def lockAndSet(key: K, handler: LockingCache.Handler[V]): Future[Option[V]] = { + handler(None) match { + case None => + // if nothing should be done when nothing's there, don't do anything + Future.value(None) + + case some @ Some(value) => + set(key, value) map { _ => + some + } + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Memcache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Memcache.scala new file mode 100644 index 000000000..8b0be8dcc --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Memcache.scala @@ -0,0 +1,59 @@ +package com.twitter.servo.cache + +import com.twitter.util.{Duration, Future} + +/** + * [[Memcache]] is a Cache with types that reflect the memcached protocol. Keys are strings and + * values are byte arrays. + */ +trait Memcache extends TtlCache[String, Array[Byte]] { + def incr(key: String, delta: Long = 1): Future[Option[Long]] + def decr(key: String, delta: Long = 1): Future[Option[Long]] +} + +/** + * allows one Memcache to wrap another + */ +trait MemcacheWrapper extends TtlCacheWrapper[String, Array[Byte]] with Memcache { + override def underlyingCache: Memcache + + override def incr(key: String, delta: Long = 1) = underlyingCache.incr(key, delta) + override def decr(key: String, delta: Long = 1) = underlyingCache.decr(key, delta) +} + +/** + * Switch between two caches with a decider value + */ +class DeciderableMemcache(primary: Memcache, secondary: Memcache, isAvailable: => Boolean) + extends MemcacheWrapper { + override def underlyingCache = if (isAvailable) primary else secondary +} + +/** + * [[MemcacheCache]] converts a [[Memcache]] to a [[Cache[K, V]]] using a [[Serializer]] for values + * and a [[KeyTransformer]] for keys. + * + * The value serializer is bidirectional. Keys are serialized using a one-way transformation + * method, which defaults to _.toString. + */ +class MemcacheCache[K, V]( + memcache: Memcache, + ttl: Duration, + serializer: Serializer[V], + transformKey: KeyTransformer[K] = new ToStringKeyTransformer[K]: ToStringKeyTransformer[K]) + extends CacheWrapper[K, V] { + override val underlyingCache = new KeyValueTransformingCache( + new SimpleTtlCacheToCache(memcache, ttl), + serializer, + transformKey + ) + + def incr(key: K, delta: Int = 1): Future[Option[Long]] = { + if (delta >= 0) + memcache.incr(transformKey(key), delta) + else + memcache.decr(transformKey(key), -delta) + } + + def decr(key: K, delta: Int = 1): Future[Option[Long]] = incr(key, -delta) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MigratingCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MigratingCache.scala new file mode 100644 index 000000000..750dc913c --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MigratingCache.scala @@ -0,0 +1,245 @@ +package com.twitter.servo.cache + +import com.twitter.finagle.stats.NullStatsReceiver +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.util.Duration +import com.twitter.util.Future +import com.twitter.util.Return +import com.twitter.util.Throw + +/** + * MigratingReadCache supports a gradual migration from one cache to another. Reads from the + * cache are compared to reads from the darkCache and new values are written to the darkCache + * if necessary. + */ +class MigratingReadCache[K, V]( + cache: ReadCache[K, V], + darkCache: Cache[K, V], + statsReceiver: StatsReceiver = NullStatsReceiver) + extends ReadCache[K, V] { + + private[this] val scopedStatsReceiver = statsReceiver.scope("migrating_read_cache") + private[this] val getScope = scopedStatsReceiver.scope("get") + private[this] val getMismatchedResultsCounter = getScope.counter("mismatched_results") + private[this] val getMissingResultsCounter = getScope.counter("missing_results") + private[this] val getUnexpectedResultsCounter = getScope.counter("unexpected_results") + private[this] val getMatchingResultsCounter = getScope.counter("matching_results") + + private[this] val getWithChecksumScope = scopedStatsReceiver.scope("get_with_cheksum") + private[this] val getWithChecksumMismatchedResultsCounter = + getWithChecksumScope.counter("mismatched_results") + private[this] val getWithChecksumMissingResultsCounter = + getWithChecksumScope.counter("missing_results") + private[this] val getWithChecksumUnexpectedResultsCounter = + getWithChecksumScope.counter("unexpected_results") + private[this] val getWithChecksumMatchingResultsCounter = + getWithChecksumScope.counter("matching_results") + + override def get(keys: Seq[K]): Future[KeyValueResult[K, V]] = { + cache.get(keys) onSuccess { result => + darkCache.get(keys) onSuccess { darkResult => + keys foreach { k => + (result(k), darkResult(k)) match { + // compare values, set if they differ + case (Return(Some(v)), Return(Some(dv))) if (v != dv) => + getMismatchedResultsCounter.incr() + darkCache.set(k, v) + // set a value if missing + case (Return(Some(v)), Return.None | Throw(_)) => + getMissingResultsCounter.incr() + darkCache.set(k, v) + // remove if necessary + case (Return.None, Return(Some(_)) | Throw(_)) => + getUnexpectedResultsCounter.incr() + darkCache.delete(k) + // do nothing otherwise + case _ => + getMatchingResultsCounter.incr() + () + } + } + } + } + } + + override def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, V]] = { + cache.getWithChecksum(keys) onSuccess { result => + // no point in the getWithChecksum from the darkCache + darkCache.get(keys) onSuccess { darkResult => + keys foreach { k => + (result(k), darkResult(k)) match { + // compare values, set if they differ + case (Return(Some((Return(v), _))), Return(Some(dv))) if (v != dv) => + getWithChecksumMismatchedResultsCounter.incr() + darkCache.set(k, v) + // set a value if missing + case (Return(Some((Return(v), _))), Return.None | Throw(_)) => + getWithChecksumMissingResultsCounter.incr() + darkCache.set(k, v) + // remove if necessary + case (Return.None, Return(Some(_)) | Throw(_)) => + getWithChecksumUnexpectedResultsCounter.incr() + darkCache.delete(k) + // do nothing otherwise + case _ => + getWithChecksumMatchingResultsCounter.incr() + () + } + } + } + } + } + + override def release(): Unit = { + cache.release() + darkCache.release() + } +} + +/** + * MigratingCache supports a gradual migration from one cache to another. Writes to the cache + * are propogated to the darkCache. Reads from the cache are compared to reads from the darkCache + * and new values are written to the darkCache if necessary. + * + * Writes to the darkCache are not locking writes, so there is some risk of inconsistencies from + * race conditions. However, writes to the darkCache only occur if they succeed in the cache, so + * if a checkAndSet fails, for example, no write is issued to the darkCache. + */ +class MigratingCache[K, V]( + cache: Cache[K, V], + darkCache: Cache[K, V], + statsReceiver: StatsReceiver = NullStatsReceiver) + extends MigratingReadCache(cache, darkCache, statsReceiver) + with Cache[K, V] { + override def add(key: K, value: V): Future[Boolean] = { + cache.add(key, value) onSuccess { wasAdded => + if (wasAdded) { + darkCache.set(key, value) + } + } + } + + override def checkAndSet(key: K, value: V, checksum: Checksum): Future[Boolean] = { + cache.checkAndSet(key, value, checksum) onSuccess { wasSet => + if (wasSet) { + darkCache.set(key, value) + } + } + } + + override def set(key: K, value: V): Future[Unit] = { + cache.set(key, value) onSuccess { _ => + darkCache.set(key, value) + } + } + + override def replace(key: K, value: V): Future[Boolean] = { + cache.replace(key, value) onSuccess { wasReplaced => + if (wasReplaced) { + darkCache.set(key, value) + } + } + } + + override def delete(key: K): Future[Boolean] = { + cache.delete(key) onSuccess { wasDeleted => + if (wasDeleted) { + darkCache.delete(key) + } + } + } +} + +/** + * Like MigratingCache but for TtlCaches + */ +class MigratingTtlCache[K, V]( + cache: TtlCache[K, V], + darkCache: TtlCache[K, V], + ttl: (K, V) => Duration) + extends MigratingReadCache(cache, new TtlCacheToCache(darkCache, ttl)) + with TtlCache[K, V] { + override def add(key: K, value: V, ttl: Duration): Future[Boolean] = { + cache.add(key, value, ttl) onSuccess { wasAdded => + if (wasAdded) { + darkCache.set(key, value, ttl) + } + } + } + + override def checkAndSet(key: K, value: V, checksum: Checksum, ttl: Duration): Future[Boolean] = { + cache.checkAndSet(key, value, checksum, ttl) onSuccess { wasSet => + if (wasSet) { + darkCache.set(key, value, ttl) + } + } + } + + override def set(key: K, value: V, ttl: Duration): Future[Unit] = { + cache.set(key, value, ttl) onSuccess { _ => + darkCache.set(key, value, ttl) + } + } + + override def replace(key: K, value: V, ttl: Duration): Future[Boolean] = { + cache.replace(key, value, ttl) onSuccess { wasReplaced => + if (wasReplaced) { + darkCache.set(key, value, ttl) + } + } + } + + override def delete(key: K): Future[Boolean] = { + cache.delete(key) onSuccess { wasDeleted => + if (wasDeleted) { + darkCache.delete(key) + } + } + } + + override def release(): Unit = { + cache.release() + darkCache.release() + } +} + +/** + * A MigratingTtlCache for Memcaches, implementing a migrating incr and decr. Race conditions + * are possible and may prevent the counts from being perfectly synchronized. + */ +class MigratingMemcache( + cache: Memcache, + darkCache: Memcache, + ttl: (String, Array[Byte]) => Duration) + extends MigratingTtlCache[String, Array[Byte]](cache, darkCache, ttl) + with Memcache { + def incr(key: String, delta: Long = 1): Future[Option[Long]] = { + cache.incr(key, delta) onSuccess { + case None => + darkCache.delete(key) + + case Some(value) => + darkCache.incr(key, delta) onSuccess { + case Some(`value`) => // same value! + case _ => + val b = value.toString.getBytes + darkCache.set(key, b, ttl(key, b)) + } + } + } + + def decr(key: String, delta: Long = 1): Future[Option[Long]] = { + cache.decr(key, delta) onSuccess { + case None => + darkCache.delete(key) + + case Some(value) => + darkCache.decr(key, delta) onSuccess { + case Some(`value`) => // same value! + case _ => + val b = value.toString.getBytes + darkCache.set(key, b, ttl(key, b)) + } + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MissingCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MissingCache.scala new file mode 100644 index 000000000..59acd28d0 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MissingCache.scala @@ -0,0 +1,46 @@ +package com.twitter.servo.cache + +import com.twitter.finagle.memcached.util.NotFound +import scala.util.Random + +/** + * wrap a ReadCache, forcing a miss rate. useful for playing back + * the same logs over and over, but simulating expected cache misses + */ +class MissingReadCache[K, V]( + underlyingCache: ReadCache[K, V], + hitRate: Float, + rand: Random = new Random) + extends ReadCache[K, V] { + assert(hitRate > 1 || hitRate < 0, "hitRate must be <= 1 and => 0") + + protected def filterResult[W](lr: KeyValueResult[K, W]) = { + val found = lr.found.filter { _ => + rand.nextFloat <= hitRate + } + val notFound = lr.notFound ++ NotFound(lr.found.keySet, found.keySet) + KeyValueResult(found, notFound, lr.failed) + } + + override def get(keys: Seq[K]) = + underlyingCache.get(keys) map { filterResult(_) } + + override def getWithChecksum(keys: Seq[K]) = + underlyingCache.getWithChecksum(keys) map { filterResult(_) } + + override def release() = underlyingCache.release() +} + +class MissingCache[K, V]( + override val underlyingCache: Cache[K, V], + hitRate: Float, + rand: Random = new Random) + extends MissingReadCache[K, V](underlyingCache, hitRate, rand) + with CacheWrapper[K, V] + +class MissingTtlCache[K, V]( + override val underlyingCache: TtlCache[K, V], + hitRate: Float, + rand: Random = new Random) + extends MissingReadCache[K, V](underlyingCache, hitRate, rand) + with TtlCacheWrapper[K, V] diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ObservableCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ObservableCache.scala new file mode 100644 index 000000000..a3bed9624 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ObservableCache.scala @@ -0,0 +1,419 @@ +package com.twitter.servo.cache + +import com.twitter.finagle.stats.{Stat, StatsReceiver} +import com.twitter.logging.{Level, Logger} +import com.twitter.servo.util.{ExceptionCounter, WindowedAverage} +import com.twitter.util._ + +/** + * track hits and misses in caches, time reads and writes + */ +trait CacheObserver { + + /** + * register a hit + */ + def hit(key: String): Unit + + /** + * register a miss + */ + def miss(key: String): Unit + + /** + * time the read, and automatically handle hits and misses from the KeyValueResult + */ + def read[K, T]( + name: String, + keys: Seq[K] + )( + f: => Future[KeyValueResult[K, T]] + ): Future[KeyValueResult[K, T]] + + /** + * time the write + */ + def write[K, T](name: String, key: K)(f: => Future[T]): Future[T] + + /** + * time the incr, and record the success/failure + */ + def incr[K](name: String, key: Seq[K])(f: => Future[Option[Long]]): Future[Option[Long]] + + /** + * produce a new CacheObserver with a nested scope + */ + def scope(s: String*): CacheObserver + + /** + * increment a counter tracking the number of expirations. + */ + def expired(delta: Int = 1): Unit + + /** + * Increment a counter tracking the number of failures. + */ + def failure(delta: Int = 1): Unit + + /** + * Increment a counter tracking the number of tombstones. + */ + def tombstone(delta: Int = 1): Unit + + /** + * Increment a counter tracking the number of not cached. + */ + def noCache(delta: Int = 1): Unit +} + +object NullCacheObserver extends CacheObserver { + override def hit(key: String) = () + override def miss(key: String) = () + override def read[K, T](name: String, keys: Seq[K])(f: => Future[KeyValueResult[K, T]]) = f + override def write[K, T](name: String, key: K)(f: => Future[T]) = f + override def incr[K](name: String, key: Seq[K])(f: => Future[Option[Long]]) = f + override def scope(s: String*) = this + override def expired(delta: Int = 1) = () + override def failure(delta: Int = 1): Unit = {} + override def tombstone(delta: Int = 1): Unit = {} + override def noCache(delta: Int = 1): Unit = {} +} + +/** + * A CacheObserver that writes to a StatsReceiver + */ +class StatsReceiverCacheObserver( + stats: StatsReceiver, + windowSize: Long, + log: Logger, + disableLogging: Boolean = false) + extends CacheObserver { + + def this( + statsReceiver: StatsReceiver, + windowSize: Long, + scope: String + ) = + this( + statsReceiver.scope(scope), + windowSize, + Logger.get(scope.replaceAll("([a-z]+)([A-Z])", "$1_$2").toLowerCase) + ) + + def this( + statsReceiver: StatsReceiver, + windowSize: Long, + scope: String, + disableLogging: Boolean + ) = + this( + statsReceiver.scope(scope), + windowSize, + Logger.get(scope.replaceAll("([a-z]+)([A-Z])", "$1_$2").toLowerCase), + disableLogging + ) + + protected[this] val expirationCounter = stats.counter("expirations") + + // needed to make sure we hand out the same observer for each scope, + // so that the hit rates are properly calculated + protected[this] val children = Memoize { + new StatsReceiverCacheObserver(stats, windowSize, _: String, disableLogging) + } + + protected[this] val exceptionCounter = new ExceptionCounter(stats) + private[this] val hitCounter = stats.counter("hits") + private[this] val missCounter = stats.counter("misses") + private[this] val failuresCounter = stats.counter("failures") + private[this] val tombstonesCounter = stats.counter("tombstones") + private[this] val noCacheCounter = stats.counter("noCache") + + private[this] val windowedHitRate = new WindowedAverage(windowSize) + private[this] val windowedIncrHitRate = new WindowedAverage(windowSize) + + private[this] val hitRateGauge = stats.addGauge("hit_rate") { + windowedHitRate.value.getOrElse(1.0).toFloat + } + + private[this] val incrHitRateGauge = stats.addGauge("incr_hit_rate") { + windowedIncrHitRate.value.getOrElse(1.0).toFloat + } + + protected[this] def handleThrowable[K](name: String, t: Throwable, key: Option[K]): Unit = { + stats.counter(name + "_failures").incr() + exceptionCounter(t) + if (!disableLogging) { + lazy val suffix = key + .map { k => + "(" + k.toString + ")" + } + .getOrElse("") + log.warning("%s%s caught: %s", name, suffix, t.getClass.getName) + log.trace(t, "stack trace was: ") + } + } + + override def hit(key: String): Unit = { + hits(1) + if (!disableLogging) + log.trace("cache hit: %s", key) + } + + private[this] def hits(n: Int): Unit = { + windowedHitRate.record(n.toDouble, n.toDouble) + hitCounter.incr(n) + } + + override def miss(key: String): Unit = { + misses(1) + if (!disableLogging) + log.trace("cache miss: %s", key) + } + + private[this] def misses(n: Int): Unit = { + windowedHitRate.record(0.0F, n.toDouble) + missCounter.incr(n) + } + + override def read[K, T]( + name: String, + keys: Seq[K] + )( + f: => Future[KeyValueResult[K, T]] + ): Future[KeyValueResult[K, T]] = + Stat + .timeFuture(stats.stat(name)) { + stats.counter(name).incr() + f + } + .respond { + case Return(lr) => + if (log.isLoggable(Level.TRACE)) { + lr.found.keys.foreach { k => + hit(k.toString) + } + lr.notFound.foreach { k => + miss(k.toString) + } + } else { + hits(lr.found.keys.size) + misses(lr.notFound.size) + } + lr.failed foreach { + case (k, t) => + handleThrowable(name, t, Some(k)) + // count failures as misses + miss(k.toString) + failuresCounter.incr() + } + case Throw(t) => + handleThrowable(name, t, None) + // count failures as misses + keys.foreach { k => + miss(k.toString) + } + failuresCounter.incr() + } + + override def write[K, T](name: String, key: K)(f: => Future[T]): Future[T] = + Stat.timeFuture(stats.stat(name)) { + stats.counter(name).incr() + f + } onFailure { + handleThrowable(name, _, Some(key)) + } + + override def incr[K](name: String, key: Seq[K])(f: => Future[Option[Long]]) = + Stat.timeFuture(stats.stat(name)) { + stats.counter(name).incr() + f + } onSuccess { optVal => + val hit = optVal.isDefined + windowedIncrHitRate.record(if (hit) 1F else 0F) + stats.counter(name + (if (hit) "_hits" else "_misses")).incr() + } + + override def scope(s: String*) = + s.toList match { + case Nil => this + case head :: tail => children(head).scope(tail: _*) + } + + override def expired(delta: Int = 1): Unit = { expirationCounter.incr(delta) } + override def failure(delta: Int = 1): Unit = { failuresCounter.incr(delta) } + override def tombstone(delta: Int = 1): Unit = { tombstonesCounter.incr(delta) } + override def noCache(delta: Int = 1): Unit = { noCacheCounter.incr(delta) } + +} + +/** + * Wraps an underlying cache with calls to a CacheObserver + */ +class ObservableReadCache[K, V](underlyingCache: ReadCache[K, V], observer: CacheObserver) + extends ReadCache[K, V] { + override def get(keys: Seq[K]): Future[KeyValueResult[K, V]] = { + observer.read("get", keys) { + underlyingCache.get(keys) + } + } + + override def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, V]] = { + observer.read[K, (Try[V], Checksum)]("get_with_checksum", keys) { + underlyingCache.getWithChecksum(keys) + } + } + + override def release() = underlyingCache.release() +} + +object ObservableCache { + def apply[K, V]( + underlyingCache: Cache[K, V], + statsReceiver: StatsReceiver, + windowSize: Long, + name: String + ): Cache[K, V] = + new ObservableCache( + underlyingCache, + new StatsReceiverCacheObserver(statsReceiver, windowSize, name) + ) + + def apply[K, V]( + underlyingCache: Cache[K, V], + statsReceiver: StatsReceiver, + windowSize: Long, + name: String, + disableLogging: Boolean + ): Cache[K, V] = + new ObservableCache( + underlyingCache, + new StatsReceiverCacheObserver( + statsReceiver = statsReceiver, + windowSize = windowSize, + scope = name, + disableLogging = disableLogging) + ) + + def apply[K, V]( + underlyingCache: Cache[K, V], + statsReceiver: StatsReceiver, + windowSize: Long, + log: Logger + ): Cache[K, V] = + new ObservableCache( + underlyingCache, + new StatsReceiverCacheObserver(statsReceiver, windowSize, log) + ) +} + +/** + * Wraps an underlying Cache with calls to a CacheObserver + */ +class ObservableCache[K, V](underlyingCache: Cache[K, V], observer: CacheObserver) + extends ObservableReadCache(underlyingCache, observer) + with Cache[K, V] { + override def add(key: K, value: V): Future[Boolean] = + observer.write("add", key) { + underlyingCache.add(key, value) + } + + override def checkAndSet(key: K, value: V, checksum: Checksum): Future[Boolean] = + observer.write("check_and_set", key) { + underlyingCache.checkAndSet(key, value, checksum) + } + + override def set(key: K, value: V): Future[Unit] = + observer.write("set", key) { + underlyingCache.set(key, value) + } + + override def replace(key: K, value: V): Future[Boolean] = + observer.write("replace", key) { + underlyingCache.replace(key, value) + } + + override def delete(key: K): Future[Boolean] = + observer.write("delete", key) { + underlyingCache.delete(key) + } +} + +object ObservableTtlCache { + def apply[K, V]( + underlyingCache: TtlCache[K, V], + statsReceiver: StatsReceiver, + windowSize: Long, + name: String + ): TtlCache[K, V] = + new ObservableTtlCache( + underlyingCache, + new StatsReceiverCacheObserver(statsReceiver, windowSize, name) + ) +} + +/** + * Wraps an underlying TtlCache with calls to a CacheObserver + */ +class ObservableTtlCache[K, V](underlyingCache: TtlCache[K, V], observer: CacheObserver) + extends ObservableReadCache(underlyingCache, observer) + with TtlCache[K, V] { + override def add(key: K, value: V, ttl: Duration): Future[Boolean] = + observer.write("add", key) { + underlyingCache.add(key, value, ttl) + } + + override def checkAndSet(key: K, value: V, checksum: Checksum, ttl: Duration): Future[Boolean] = + observer.write("check_and_set", key) { + underlyingCache.checkAndSet(key, value, checksum, ttl) + } + + override def set(key: K, value: V, ttl: Duration): Future[Unit] = + observer.write("set", key) { + underlyingCache.set(key, value, ttl) + } + + override def replace(key: K, value: V, ttl: Duration): Future[Boolean] = + observer.write("replace", key) { + underlyingCache.replace(key, value, ttl) + } + + override def delete(key: K): Future[Boolean] = + observer.write("delete", key) { + underlyingCache.delete(key) + } +} + +case class ObservableMemcacheFactory(memcacheFactory: MemcacheFactory, cacheObserver: CacheObserver) + extends MemcacheFactory { + + override def apply() = + new ObservableMemcache(memcacheFactory(), cacheObserver) +} + +@deprecated("use ObservableMemcacheFactory or ObservableMemcache directly", "0.1.2") +object ObservableMemcache { + def apply( + underlyingCache: Memcache, + statsReceiver: StatsReceiver, + windowSize: Long, + name: String + ): Memcache = + new ObservableMemcache( + underlyingCache, + new StatsReceiverCacheObserver(statsReceiver, windowSize, name) + ) +} + +class ObservableMemcache(underlyingCache: Memcache, observer: CacheObserver) + extends ObservableTtlCache[String, Array[Byte]](underlyingCache, observer) + with Memcache { + def incr(key: String, delta: Long = 1): Future[Option[Long]] = + observer.incr("incr", key) { + underlyingCache.incr(key, delta) + } + + def decr(key: String, delta: Long = 1): Future[Option[Long]] = + observer.incr("decr", key) { + underlyingCache.decr(key, delta) + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SecondaryIndexingCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SecondaryIndexingCache.scala new file mode 100644 index 000000000..801d21ea6 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SecondaryIndexingCache.scala @@ -0,0 +1,85 @@ +package com.twitter.servo.cache + +import com.twitter.logging.Logger +import com.twitter.util.{Future, Return, Throw, Try} + +object SecondaryIndexingCache { + type IndexMapping[S, V] = V => Try[Option[S]] +} + +/** + * Stores a secondary index whenever set is called, + * using a mapping from value to secondary index + */ +class SecondaryIndexingCache[K, S, V]( + override val underlyingCache: Cache[K, Cached[V]], + secondaryIndexCache: Cache[S, Cached[K]], + secondaryIndex: SecondaryIndexingCache.IndexMapping[S, V]) + extends CacheWrapper[K, Cached[V]] { + protected[this] val log = Logger.get(getClass.getSimpleName) + + protected[this] def setSecondaryIndex(key: K, cachedValue: Cached[V]): Future[Unit] = + cachedValue.value match { + case Some(value) => + secondaryIndex(value) match { + case Return(Some(index)) => + val cachedKey = cachedValue.copy(value = Some(key)) + secondaryIndexCache.set(index, cachedKey) + case Return.None => + Future.Done + case Throw(t) => + log.error(t, "failed to determine secondary index for: %s", cachedValue) + Future.Done + } + // if we're storing a tombstone, no secondary index can be made + case None => Future.Done + } + + override def set(key: K, cachedValue: Cached[V]): Future[Unit] = + super.set(key, cachedValue) flatMap { _ => + setSecondaryIndex(key, cachedValue) + } + + override def checkAndSet(key: K, cachedValue: Cached[V], checksum: Checksum): Future[Boolean] = + super.checkAndSet(key, cachedValue, checksum) flatMap { wasStored => + if (wasStored) + // do a straight set of the secondary index, but only if the CAS succeeded + setSecondaryIndex(key, cachedValue) map { _ => + true + } + else + Future.value(false) + } + + override def add(key: K, cachedValue: Cached[V]): Future[Boolean] = + super.add(key, cachedValue) flatMap { wasAdded => + if (wasAdded) + // do a straight set of the secondary index, but only if the add succeeded + setSecondaryIndex(key, cachedValue) map { _ => + true + } + else + Future.value(false) + } + + override def replace(key: K, cachedValue: Cached[V]): Future[Boolean] = + super.replace(key, cachedValue) flatMap { wasReplaced => + if (wasReplaced) + setSecondaryIndex(key, cachedValue) map { _ => + true + } + else + Future.value(false) + } + + override def release(): Unit = { + underlyingCache.release() + secondaryIndexCache.release() + } + + def withSecondaryIndex[T]( + secondaryIndexingCache: Cache[T, Cached[K]], + secondaryIndex: SecondaryIndexingCache.IndexMapping[T, V] + ): SecondaryIndexingCache[K, T, V] = + new SecondaryIndexingCache[K, T, V](this, secondaryIndexingCache, secondaryIndex) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SelectedCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SelectedCache.scala new file mode 100644 index 000000000..3e46211e4 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SelectedCache.scala @@ -0,0 +1,97 @@ +package com.twitter.servo.cache + +import com.twitter.util.Future + +/** + * Represents multiple underlying ReadCaches selected by key at invocation time. + */ +trait SelectedReadCacheWrapper[K, V, This <: ReadCache[K, V]] extends ReadCache[K, V] { + + /** Retrieves the underlying cache for the given key. */ + def underlyingCache(key: K): This + + /** Retrieves tuples of the underlying caches and the keys they apply to. */ + def underlyingCacheForKeys(keys: Seq[K]): Seq[(This, Seq[K])] + + /** Retrieves all underlying caches. */ + def underlyingCaches: Seq[This] + + private[this] def collectUnderlying[V2]( + keys: Seq[K] + )( + f: (This, Seq[K]) => Future[KeyValueResult[K, V2]] + ): Future[KeyValueResult[K, V2]] = { + Future.collect( + underlyingCacheForKeys(keys) collect { + case (cacheForKey, keys) if !keys.isEmpty => + f(cacheForKey, keys) + } + ) map { + KeyValueResult.sum(_) + } + } + + override def get(keys: Seq[K]) = collectUnderlying(keys) { _.get(_) } + override def getWithChecksum(keys: Seq[K]) = collectUnderlying(keys) { _.getWithChecksum(_) } + + override def release(): Unit = { + underlyingCaches foreach { _.release() } + } +} + +/** + * Represents multiple underlying Caches selected by key at invocation time. + */ +trait SelectedCacheWrapper[K, V] + extends Cache[K, V] + with SelectedReadCacheWrapper[K, V, Cache[K, V]] { + override def add(key: K, value: V) = underlyingCache(key).add(key, value) + + override def checkAndSet(key: K, value: V, checksum: Checksum) = + underlyingCache(key).checkAndSet(key, value, checksum) + + override def set(key: K, value: V) = underlyingCache(key).set(key, value) + + override def replace(key: K, value: V) = underlyingCache(key).replace(key, value) + + override def delete(key: K) = underlyingCache(key).delete(key) +} + +/** + * GateSelectedCache implements SelectedCache to choose between two underlying + * caches based on a function. + */ +class SelectedCache[K, V](primary: Cache[K, V], secondary: Cache[K, V], usePrimary: K => Boolean) + extends SelectedCacheWrapper[K, V] { + override def underlyingCache(key: K) = if (usePrimary(key)) primary else secondary + + override def underlyingCacheForKeys(keys: Seq[K]) = { + keys partition (usePrimary) match { + case (primaryKeys, secondaryKeys) => Seq((primary, primaryKeys), (secondary, secondaryKeys)) + } + } + + override def underlyingCaches = Seq(primary, secondary) +} + +/** + * Factory for SelectedCache instances that use a simple function to migrate + * users from a secondary cache (function returns false) to a primary cache + * (function returns true). Serves a purpose similar to CacheFactory, but + * cannot extend it due to type constraints. + * + * The function is expected to produce stable results by key over time to + * prevent accessing stale cache entries due to keys flapping between the + * two caches. + */ +class SelectedCacheFactory[K]( + primaryFactory: CacheFactory, + secondaryFactory: CacheFactory, + usePrimary: K => Boolean) { + def apply[V](serializer: Serializer[V], scopes: String*): Cache[K, V] = + new SelectedCache( + primaryFactory[K, V](serializer, scopes: _*), + secondaryFactory[K, V](serializer, scopes: _*), + usePrimary + ) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SeqSerializer.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SeqSerializer.scala new file mode 100644 index 000000000..7477aa9c6 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SeqSerializer.scala @@ -0,0 +1,10 @@ +package com.twitter.servo.cache + +/** + * A Serializer of `Seq[T]`s. + * + * @param itemSerializer a Serializer for the individual elements. + * @param itemSizeEstimate estimated size in bytes of individual elements + */ +class SeqSerializer[T](itemSerializer: Serializer[T], itemSizeEstimate: Int = 8) + extends IterableSerializer[T, Seq[T]](() => Seq.newBuilder[T], itemSerializer, itemSizeEstimate) diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Serializer.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Serializer.scala new file mode 100644 index 000000000..abe4e420c --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Serializer.scala @@ -0,0 +1,184 @@ +package com.twitter.servo.cache + +import com.google.common.primitives.{Ints, Longs} +import com.twitter.finagle.thrift.Protocols +import com.twitter.io.Buf +import com.twitter.scrooge.{ThriftStruct, ThriftStructCodec, ThriftStructSerializer} +import com.twitter.servo.util.Transformer +import com.twitter.util.{Time => UtilTime, Try} +import java.io.{ByteArrayInputStream, ByteArrayOutputStream} +import java.nio.ByteBuffer +import org.apache.thrift.TBase +import org.apache.thrift.protocol.{TCompactProtocol, TProtocolFactory} +import org.apache.thrift.transport.TIOStreamTransport + +object Serializers { self => + val CompactProtocolFactory = new TCompactProtocol.Factory + val EmptyByteArray = Array.empty[Byte] + + val Unit = Transformer[Unit, Array[Byte]](_ => EmptyByteArray, _ => ()) + + object Long { + val Simple = Transformer[Long, Array[Byte]](Longs.toByteArray, Longs.fromByteArray) + } + + object CachedLong { + val Compact: Serializer[Cached[Long]] = + new CachedSerializer(self.Long.Simple, CompactProtocolFactory) + } + + object SeqLong { + val Simple: Serializer[Seq[Long]] = new SeqSerializer(self.Long.Simple, 8) + } + + object CachedSeqLong { + val Compact: Serializer[Cached[Seq[Long]]] = + new CachedSerializer(self.SeqLong.Simple, CompactProtocolFactory) + } + + object Int { + val Simple = Transformer[Int, Array[Byte]](Ints.toByteArray, Ints.fromByteArray) + } + + object CachedInt { + val Compact: Serializer[Cached[Int]] = + new CachedSerializer(self.Int.Simple, CompactProtocolFactory) + } + + object SeqInt { + val Simple: Serializer[Seq[Int]] = new SeqSerializer(self.Int.Simple, 4) + } + + object CachedSeqInt { + val Compact: Serializer[Cached[Seq[Int]]] = + new CachedSerializer(self.SeqInt.Simple, CompactProtocolFactory) + } + + object String { + val Utf8: Serializer[String] = Transformer.Utf8ToBytes + } + + object CachedString { + val Compact: Serializer[Cached[String]] = + new CachedSerializer(self.String.Utf8, CompactProtocolFactory) + } + + object SeqString { + val Utf8: Serializer[Seq[String]] = new SeqSerializer(self.String.Utf8) + } + + object CachedSeqString { + val Compact: Serializer[Cached[Seq[String]]] = + new CachedSerializer(self.SeqString.Utf8, CompactProtocolFactory) + } + + /** + * We take care not to alter the buffer so that this conversion can + * safely be used multiple times with the same buffer, and that + * other threads cannot view other states of the buffer. + */ + private[this] def byteBufferToArray(b: ByteBuffer): Array[Byte] = { + val a = new Array[Byte](b.remaining) + b.duplicate.get(a) + a + } + + /** + * Convert between a ByteBuffer and an Array of bytes. The + * conversion to Array[Byte] makes a copy of the data, while the + * reverse conversion just wraps the array. + */ + val ArrayByteBuffer: Transformer[Array[Byte], ByteBuffer] = + Transformer(ByteBuffer.wrap(_: Array[Byte]), byteBufferToArray) + + val ArrayByteBuf: Transformer[Array[Byte], Buf] = + Transformer(Buf.ByteArray.Shared.apply, Buf.ByteArray.Shared.extract) + + /** + * Isomorphism between Time and Long. The Long represents the number + * of nanoseconds since the epoch. + */ + val TimeNanos: Transformer[UtilTime, Long] = + Transformer.pure[UtilTime, Long](_.inNanoseconds, UtilTime.fromNanoseconds) + + /** + * Transformer from Time to Array[Byte] always succeeds. The inverse + * transform throws BufferUnderflowException if the buffer is less + * than eight bytes in length. If it is greater than eight bytes, + * the later bytes are discarded. + */ + // This is lazy because if it is not, it may be initialized before + // Long.Simple. In that case, Long.Simple will be null at + // initialization time, and will be captured here. Unfortunately, + // this is dependent on the order of class initialization, which may + // vary between runs of a program. + lazy val Time: Serializer[UtilTime] = TimeNanos andThen Long.Simple +} + +/** + * A Serializer for Thrift structs generated by Scrooge. + * + * @param codec used to encode and decode structs for a given protocol + * @param protocolFactory defines the serialization protocol to be used + */ +class ThriftSerializer[T <: ThriftStruct]( + val codec: ThriftStructCodec[T], + val protocolFactory: TProtocolFactory) + extends Serializer[T] + with ThriftStructSerializer[T] { + override def to(obj: T): Try[Array[Byte]] = Try(toBytes(obj)) + override def from(bytes: Array[Byte]): Try[T] = Try(fromBytes(bytes)) +} + +/** + * A Serializer for Thrift structs generated by the Apache code generator. + * + * @param tFactory a factory for Thrift-defined objects of type T. Objects + * yielded by the factory are read into and returned during + * deserialization. + * + * @param protocolFactory defines the serialization protocol to be used + */ +class TBaseSerializer[T <: TBase[_, _]](tFactory: () => T, protocolFactory: TProtocolFactory) + extends Serializer[T] { + override def to(obj: T): Try[Array[Byte]] = Try { + val baos = new ByteArrayOutputStream + obj.write(protocolFactory.getProtocol(new TIOStreamTransport(baos))) + baos.toByteArray + } + + override def from(bytes: Array[Byte]): Try[T] = Try { + val obj = tFactory() + val stream = new ByteArrayInputStream(bytes) + obj.read(protocolFactory.getProtocol(new TIOStreamTransport(stream))) + obj + } +} + +object CachedSerializer { + def binary[T](valueSerializer: Serializer[T]): CachedSerializer[T] = + new CachedSerializer(valueSerializer, Protocols.binaryFactory()) + + def compact[T](valueSerializer: Serializer[T]): CachedSerializer[T] = + new CachedSerializer(valueSerializer, new TCompactProtocol.Factory) +} + +/** + * A Serializer of Cached object. + * + * @param valueSerializer an underlying serializer of the values to be cached. + * @param protocolFactory defines the serialization protocol to be used + */ +class CachedSerializer[T](valueSerializer: Serializer[T], protocolFactory: TProtocolFactory) + extends Serializer[Cached[T]] { + private[this] val underlying = new ThriftSerializer(CachedValue, protocolFactory) + + override def to(cached: Cached[T]): Try[Array[Byte]] = + underlying.to(cached.toCachedValue(valueSerializer)) + + private[this] val asCached: CachedValue => Cached[T] = + t => Cached(t, valueSerializer) + + override def from(bytes: Array[Byte]): Try[Cached[T]] = + underlying.from(bytes).map(asCached) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SetSerializer.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SetSerializer.scala new file mode 100644 index 000000000..9bc9a4c91 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SetSerializer.scala @@ -0,0 +1,10 @@ +package com.twitter.servo.cache + +/** + * A Serializer of `Set[T]`s. + * + * @param itemSerializer a Serializer for the individual elements. + * @param itemSizeEstimate estimated size in bytes of individual elements + */ +class SetSerializer[T](itemSerializer: Serializer[T], itemSizeEstimate: Int = 8) + extends IterableSerializer[T, Set[T]](() => Set.newBuilder[T], itemSerializer, itemSizeEstimate) diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SimpleReplicatingCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SimpleReplicatingCache.scala new file mode 100644 index 000000000..595f0698a --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SimpleReplicatingCache.scala @@ -0,0 +1,231 @@ +package com.twitter.servo.cache + +import com.twitter.servo.keyvalue._ +import com.twitter.servo.util.{OptionOrdering, TryOrdering} +import com.twitter.util.{Future, Return, Throw, Time, Try} + +object SimpleReplicatingCache { + + /** + * Builds a SimpleReplicatingCache that writes a value multiple times to the same underlying + * cache but under different keys. If the underlying cache is backed by enough shards, there + * is a good chance that the different keys will end up on different shards, giving you similar + * behavior to having multiple distinct caches. + */ + def apply[K, K2, V]( + underlying: LockingCache[K2, Cached[V]], + keyReplicator: (K, Int) => K2, + replicas: Int = 2 + ) = new SimpleReplicatingCache( + (0 until replicas).toSeq map { replica => + new KeyTransformingLockingCache( + underlying, + (key: K) => keyReplicator(key, replica) + ) + } + ) +} + +/** + * A very simple replicating cache implementation. It writes the same key/value pair to + * multiple underlying caches. On read, each underlying cache is queried with the key; if the + * results are not all the same for a given key, then the most recent value is chosen and + * replicated to all caches. + * + * Some cache operations are not currently supported, because their semantics are a little fuzzy + * in the replication case. Specifically: add and checkAndSet. + */ +class SimpleReplicatingCache[K, V](underlyingCaches: Seq[LockingCache[K, Cached[V]]]) + extends LockingCache[K, Cached[V]] { + private type CsValue = (Try[Cached[V]], Checksum) + + private val cachedOrdering = new Ordering[Cached[V]] { + // sort by ascending timestamp + def compare(a: Cached[V], b: Cached[V]) = a.cachedAt.compare(b.cachedAt) + } + + private val csValueOrdering = new Ordering[CsValue] { + // order by Try[V], ignore checksum + val subordering = TryOrdering(cachedOrdering) + def compare(a: CsValue, b: CsValue) = subordering.compare(a._1, b._1) + } + + private val tryOptionCsValueOrdering = TryOrdering(OptionOrdering(csValueOrdering)) + private val tryOptionCachedOrdering = TryOrdering(OptionOrdering(cachedOrdering)) + + /** + * release any underlying resources + */ + def release(): Unit = { + underlyingCaches foreach { _.release() } + } + + /** + * Fetches from all underlying caches in parallel, and if results differ, will choose a + * winner and push updated results back to the stale caches. + */ + def get(keys: Seq[K]): Future[KeyValueResult[K, Cached[V]]] = { + getWithChecksum(keys) map { csKvRes => + val resBldr = new KeyValueResultBuilder[K, Cached[V]] + + csKvRes.found foreach { + case (k, (Return(v), _)) => resBldr.addFound(k, v) + case (k, (Throw(t), _)) => resBldr.addFailed(k, t) + } + + resBldr.addNotFound(csKvRes.notFound) + resBldr.addFailed(csKvRes.failed) + resBldr.result() + } + } + + /** + * Fetches from all underlying caches in parallel, and if results differ, will choose a + * winner and push updated results back to the stale caches. + */ + def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, Cached[V]]] = { + Future.collect { + underlyingCaches map { underlying => + underlying.getWithChecksum(keys) + } + } map { underlyingResults => + val resBldr = new KeyValueResultBuilder[K, CsValue] + + for (key <- keys) { + val keyResults = underlyingResults map { _(key) } + resBldr(key) = getAndReplicate(key, keyResults) map { + // treat evictions as misses + case Some((Return(c), _)) if c.status == CachedValueStatus.Evicted => None + case v => v + } + } + + resBldr.result() + } + } + + /** + * Looks at all the returned values for a given set of replication keys, returning the most recent + * cached value if available, or indicate a miss if applicable, or return a failure if all + * keys failed. If a cached value is returned, and some keys don't have that cached value, + * the cached value will be replicated to those keys, possibly overwriting stale data. + */ + private def getAndReplicate( + key: K, + keyResults: Seq[Try[Option[CsValue]]] + ): Try[Option[CsValue]] = { + val max = keyResults.max(tryOptionCsValueOrdering) + + max match { + // if one of the replication keys returned a cached value, then make sure all replication + // keys contain that cached value. + case Return(Some((Return(cached), cs))) => + for ((underlying, keyResult) <- underlyingCaches zip keyResults) { + if (keyResult != max) { + replicate(key, cached, keyResult, underlying) + } + } + case _ => + } + + max + } + + private def replicate( + key: K, + cached: Cached[V], + current: Try[Option[CsValue]], + underlying: LockingCache[K, Cached[V]] + ): Future[Unit] = { + current match { + case Throw(_) => + // if we failed to read a particular value, we don't want to write to that key + // because that key could potentially have the real newest value + Future.Unit + case Return(None) => + // add rather than set, and fail if another value is written first + underlying.add(key, cached).unit + case Return(Some((_, cs))) => + underlying.checkAndSet(key, cached, cs).unit + } + } + + /** + * Currently not supported. Use set or lockAndSet. + */ + def add(key: K, value: Cached[V]): Future[Boolean] = { + Future.exception(new UnsupportedOperationException("use set or lockAndSet")) + } + + /** + * Currently not supported. + */ + def checkAndSet(key: K, value: Cached[V], checksum: Checksum): Future[Boolean] = { + Future.exception(new UnsupportedOperationException("use set or lockAndSet")) + } + + /** + * Calls set on all underlying caches. If at least one set succeeds, Future.Unit is + * returned. If all fail, a Future.exception will be returned. + */ + def set(key: K, value: Cached[V]): Future[Unit] = { + liftAndCollect { + underlyingCaches map { _.set(key, value) } + } flatMap { seqTryUnits => + // return Future.Unit if any underlying call succeeded, otherwise return + // the first failure. + if (seqTryUnits exists { _.isReturn }) + Future.Unit + else + Future.const(seqTryUnits.head) + } + } + + /** + * Calls lockAndSet on the underlying cache for all replication keys. If at least one + * underlying call succeeds, a successful result will be returned. + */ + def lockAndSet(key: K, handler: LockingCache.Handler[Cached[V]]): Future[Option[Cached[V]]] = { + liftAndCollect { + underlyingCaches map { _.lockAndSet(key, handler) } + } flatMap { seqTryOptionCached => + Future.const(seqTryOptionCached.max(tryOptionCachedOrdering)) + } + } + + /** + * Returns Future(true) if any of the underlying caches return Future(true); otherwise, + * returns Future(false) if any of the underlying caches return Future(false); otherwise, + * returns the first failure. + */ + def replace(key: K, value: Cached[V]): Future[Boolean] = { + liftAndCollect { + underlyingCaches map { _.replace(key, value) } + } flatMap { seqTryBools => + if (seqTryBools.contains(Return.True)) + Future.value(true) + else if (seqTryBools.contains(Return.False)) + Future.value(false) + else + Future.const(seqTryBools.head) + } + } + + /** + * Performing an actual deletion on the underlying caches is not a good idea in the face + * of potential failure, because failing to remove all values would allow a cached value to + * be resurrected. Instead, delete actually does a replace on the underlying caches with a + * CachedValueStatus of Evicted, which will be treated as a miss on read. + */ + def delete(key: K): Future[Boolean] = { + replace(key, Cached(None, CachedValueStatus.Evicted, Time.now)) + } + + /** + * Convets a Seq[Future[A]] into a Future[Seq[Try[A]]], isolating failures into Trys, instead + * of allowing the entire Future to failure. + */ + private def liftAndCollect[A](seq: Seq[Future[A]]): Future[Seq[Try[A]]] = { + Future.collect { seq map { _ transform { Future(_) } } } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TransformingCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TransformingCache.scala new file mode 100644 index 000000000..14e64d133 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TransformingCache.scala @@ -0,0 +1,324 @@ +package com.twitter.servo.cache + +import com.twitter.servo.util.Transformer +import com.twitter.util.{Duration, Future, Return, Throw} +import scala.collection.mutable.ArrayBuffer +import scala.collection.{breakOut, mutable} + +/** + * Adaptor from a ReadCache[K, V1] to an underlying ReadCache[K, V2] + * + * a Transformer is used to map between value types + */ +class ValueTransformingReadCache[K, V1, V2]( + underlyingCache: ReadCache[K, V2], + transformer: Transformer[V1, V2]) + extends ReadCache[K, V1] { + // overridden to avoid mapping the unneeded keyMap + override def get(keys: Seq[K]): Future[KeyValueResult[K, V1]] = { + underlyingCache.get(keys) map { lr => + // fold lr.found into found/deserialization failures + val found = mutable.Map.empty[K, V1] + val failed = mutable.Map.empty[K, Throwable] + + lr.found foreach { + case (key, value) => + transformer.from(value) match { + case Return(v) => found += key -> v + case Throw(t) => failed += key -> t + } + } + + lr.copy(found = found.toMap, failed = lr.failed ++ failed.toMap) + } handle { + case t => + KeyValueResult(failed = keys.map(_ -> t).toMap) + } + } + + // overridden to avoid mapping the unneeded keyMap + override def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, V1]] = { + underlyingCache.getWithChecksum(keys) map { clr => + clr.copy(found = clr.found map { + case (key, (value, checksum)) => + key -> (value flatMap { transformer.from(_) }, checksum) + }) + } handle { + case t => + KeyValueResult(failed = keys.map(_ -> t).toMap) + } + } + + override def release() = underlyingCache.release() +} + +/** + * Adaptor from a ReadCache[K, V1] to an underlying ReadCache[K2, V2] + * + * a Transformer is used to map between value types, and a + * one-way mapping is used for keys, making it possible to + * store data in the underlying cache using keys that can't + * easily be reverse-mapped. + */ +class KeyValueTransformingReadCache[K1, K2, V1, V2]( + underlyingCache: ReadCache[K2, V2], + transformer: Transformer[V1, V2], + underlyingKey: K1 => K2) + extends ReadCache[K1, V1] { + + // make keymapping for key recovery later + private[this] def mappedKeys( + keys: Seq[K1] + ): (IndexedSeq[K2], Map[K2, K1]) = { + val k2s = new ArrayBuffer[K2](keys.size) + val k2k1s: Map[K2, K1] = + keys.map { key => + val k2 = underlyingKey(key) + k2s += k2 + k2 -> key + }(breakOut) + (k2s, k2k1s) + } + + override def get(keys: Seq[K1]): Future[KeyValueResult[K1, V1]] = { + val (k2s, kMap) = mappedKeys(keys) + + underlyingCache + .get(k2s) + .map { lr => + // fold lr.found into found/deserialization failures + val found = Map.newBuilder[K1, V1] + val failed = Map.newBuilder[K1, Throwable] + + lr.found.foreach { + case (key, value) => + transformer.from(value) match { + case Return(v) => found += kMap(key) -> v + case Throw(t) => failed += kMap(key) -> t + } + } + + lr.failed.foreach { + case (k, t) => + failed += kMap(k) -> t + } + + KeyValueResult( + found.result(), + lr.notFound.map { kMap(_) }, + failed.result() + ) + } + .handle { + case t => + KeyValueResult(failed = keys.map(_ -> t).toMap) + } + } + + override def getWithChecksum(keys: Seq[K1]): Future[CsKeyValueResult[K1, V1]] = { + val (k2s, kMap) = mappedKeys(keys) + + underlyingCache + .getWithChecksum(k2s) + .map { clr => + KeyValueResult( + clr.found.map { + case (key, (value, checksum)) => + kMap(key) -> (value.flatMap(transformer.from), checksum) + }, + clr.notFound map { kMap(_) }, + clr.failed map { + case (key, t) => + kMap(key) -> t + } + ) + } + .handle { + case t => + KeyValueResult(failed = keys.map(_ -> t).toMap) + } + } + + override def release(): Unit = underlyingCache.release() +} + +class KeyTransformingCache[K1, K2, V](underlyingCache: Cache[K2, V], underlyingKey: K1 => K2) + extends KeyValueTransformingCache[K1, K2, V, V]( + underlyingCache, + Transformer.identity, + underlyingKey + ) + +/** + * Adaptor from a Cache[K, V1] to an underlying Cache[K, V2] + * + * a Transformer is used to map between value types + */ +class ValueTransformingCache[K, V1, V2]( + underlyingCache: Cache[K, V2], + transformer: Transformer[V1, V2]) + extends ValueTransformingReadCache[K, V1, V2](underlyingCache, transformer) + with Cache[K, V1] { + private[this] def to(v1: V1): Future[V2] = Future.const(transformer.to(v1)) + + override def add(key: K, value: V1): Future[Boolean] = + to(value) flatMap { underlyingCache.add(key, _) } + + override def checkAndSet(key: K, value: V1, checksum: Checksum): Future[Boolean] = + to(value) flatMap { underlyingCache.checkAndSet(key, _, checksum) } + + override def set(key: K, value: V1): Future[Unit] = + to(value) flatMap { underlyingCache.set(key, _) } + + override def replace(key: K, value: V1): Future[Boolean] = + to(value) flatMap { underlyingCache.replace(key, _) } + + override def delete(key: K): Future[Boolean] = + underlyingCache.delete(key) +} + +/** + * Adaptor from a Cache[K1, V1] to an underlying Cache[K2, V2] + * + * a Transformer is used to map between value types, and a + * one-way mapping is used for keys, making it possible to + * store data in the underlying cache using keys that can't + * easily be reverse-mapped. + */ +class KeyValueTransformingCache[K1, K2, V1, V2]( + underlyingCache: Cache[K2, V2], + transformer: Transformer[V1, V2], + underlyingKey: K1 => K2) + extends KeyValueTransformingReadCache[K1, K2, V1, V2]( + underlyingCache, + transformer, + underlyingKey + ) + with Cache[K1, V1] { + private[this] def to(v1: V1): Future[V2] = Future.const(transformer.to(v1)) + + override def add(key: K1, value: V1): Future[Boolean] = + to(value) flatMap { underlyingCache.add(underlyingKey(key), _) } + + override def checkAndSet(key: K1, value: V1, checksum: Checksum): Future[Boolean] = + to(value) flatMap { underlyingCache.checkAndSet(underlyingKey(key), _, checksum) } + + override def set(key: K1, value: V1): Future[Unit] = + to(value) flatMap { underlyingCache.set(underlyingKey(key), _) } + + override def replace(key: K1, value: V1): Future[Boolean] = + to(value) flatMap { underlyingCache.replace(underlyingKey(key), _) } + + override def delete(key: K1): Future[Boolean] = + underlyingCache.delete(underlyingKey(key)) +} + +/** + * Adaptor from a TtlCache[K, V1] to an underlying TtlCache[K, V2] + * + * a Transformer is used to map between value types + */ +class ValueTransformingTtlCache[K, V1, V2]( + underlyingCache: TtlCache[K, V2], + transformer: Transformer[V1, V2]) + extends ValueTransformingReadCache[K, V1, V2](underlyingCache, transformer) + with TtlCache[K, V1] { + private[this] def to(v1: V1): Future[V2] = Future.const(transformer.to(v1)) + + override def add(key: K, value: V1, ttl: Duration): Future[Boolean] = + to(value) flatMap { underlyingCache.add(key, _, ttl) } + + override def checkAndSet( + key: K, + value: V1, + checksum: Checksum, + ttl: Duration + ): Future[Boolean] = + to(value) flatMap { underlyingCache.checkAndSet(key, _, checksum, ttl) } + + override def set(key: K, value: V1, ttl: Duration): Future[Unit] = + to(value) flatMap { underlyingCache.set(key, _, ttl) } + + override def replace(key: K, value: V1, ttl: Duration): Future[Boolean] = + to(value) flatMap { underlyingCache.replace(key, _, ttl) } + + override def delete(key: K): Future[Boolean] = + underlyingCache.delete(key) +} + +/** + * Adaptor from a TtlCache[K1, V1] to an underlying TtlCache[K2, V2] + * + * a Transformer is used to map between value types, and a + * one-way mapping is used for keys, making it possible to + * store data in the underlying cache using keys that can't + * easily be reverse-mapped. + */ +class KeyValueTransformingTtlCache[K1, K2, V1, V2]( + underlyingCache: TtlCache[K2, V2], + transformer: Transformer[V1, V2], + underlyingKey: K1 => K2) + extends KeyValueTransformingReadCache[K1, K2, V1, V2]( + underlyingCache, + transformer, + underlyingKey + ) + with TtlCache[K1, V1] { + private[this] def to(v1: V1): Future[V2] = Future.const(transformer.to(v1)) + + override def add(key: K1, value: V1, ttl: Duration): Future[Boolean] = + to(value) flatMap { underlyingCache.add(underlyingKey(key), _, ttl) } + + override def checkAndSet( + key: K1, + value: V1, + checksum: Checksum, + ttl: Duration + ): Future[Boolean] = + to(value) flatMap { underlyingCache.checkAndSet(underlyingKey(key), _, checksum, ttl) } + + override def set(key: K1, value: V1, ttl: Duration): Future[Unit] = + to(value) flatMap { underlyingCache.set(underlyingKey(key), _, ttl) } + + override def replace(key: K1, value: V1, ttl: Duration): Future[Boolean] = + to(value) flatMap { underlyingCache.replace(underlyingKey(key), _, ttl) } + + override def delete(key: K1): Future[Boolean] = + underlyingCache.delete(underlyingKey(key)) +} + +class KeyTransformingTtlCache[K1, K2, V](underlyingCache: TtlCache[K2, V], underlyingKey: K1 => K2) + extends KeyValueTransformingTtlCache[K1, K2, V, V]( + underlyingCache, + Transformer.identity, + underlyingKey + ) + +class KeyTransformingLockingCache[K1, K2, V]( + underlyingCache: LockingCache[K2, V], + underlyingKey: K1 => K2) + extends KeyValueTransformingCache[K1, K2, V, V]( + underlyingCache, + Transformer.identity, + underlyingKey + ) + with LockingCache[K1, V] { + import LockingCache._ + + override def lockAndSet(key: K1, handler: Handler[V]): Future[Option[V]] = + underlyingCache.lockAndSet(underlyingKey(key), handler) +} + +class KeyTransformingCounterCache[K1, K2]( + underlyingCache: CounterCache[K2], + underlyingKey: K1 => K2) + extends KeyTransformingCache[K1, K2, Long](underlyingCache, underlyingKey) + with CounterCache[K1] { + override def incr(key: K1, delta: Int = 1): Future[Option[Long]] = { + underlyingCache.incr(underlyingKey(key), delta) + } + + override def decr(key: K1, delta: Int = 1): Future[Option[Long]] = { + underlyingCache.decr(underlyingKey(key), delta) + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TtlCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TtlCache.scala new file mode 100644 index 000000000..d42766951 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TtlCache.scala @@ -0,0 +1,95 @@ +package com.twitter.servo.cache + +import com.twitter.util.{Duration, Future} + +/** + * a Cache that takes a TTL per set + */ +trait TtlCache[K, V] extends ReadCache[K, V] { + def add(key: K, value: V, ttl: Duration): Future[Boolean] + + def checkAndSet(key: K, value: V, checksum: Checksum, ttl: Duration): Future[Boolean] + + def set(key: K, value: V, ttl: Duration): Future[Unit] + + /** + * Replaces the value for an existing key. If the key doesn't exist, this has no effect. + * @return true if replaced, false if not found + */ + def replace(key: K, value: V, ttl: Duration): Future[Boolean] + + /** + * Deletes a value from cache. + * @return true if deleted, false if not found + */ + def delete(key: K): Future[Boolean] +} + +/** + * allows one TtlCache to wrap another + */ +trait TtlCacheWrapper[K, V] extends TtlCache[K, V] with ReadCacheWrapper[K, V, TtlCache[K, V]] { + override def add(key: K, value: V, ttl: Duration) = underlyingCache.add(key, value, ttl) + + override def checkAndSet(key: K, value: V, checksum: Checksum, ttl: Duration) = + underlyingCache.checkAndSet(key, value, checksum, ttl) + + override def set(key: K, value: V, ttl: Duration) = underlyingCache.set(key, value, ttl) + + override def replace(key: K, value: V, ttl: Duration) = underlyingCache.replace(key, value, ttl) + + override def delete(key: K) = underlyingCache.delete(key) +} + +class PerturbedTtlCache[K, V]( + override val underlyingCache: TtlCache[K, V], + perturbTtl: Duration => Duration) + extends TtlCacheWrapper[K, V] { + override def add(key: K, value: V, ttl: Duration) = + underlyingCache.add(key, value, perturbTtl(ttl)) + + override def checkAndSet(key: K, value: V, checksum: Checksum, ttl: Duration) = + underlyingCache.checkAndSet(key, value, checksum, perturbTtl(ttl)) + + override def set(key: K, value: V, ttl: Duration) = + underlyingCache.set(key, value, perturbTtl(ttl)) + + override def replace(key: K, value: V, ttl: Duration) = + underlyingCache.replace(key, value, perturbTtl(ttl)) +} + +/** + * an adaptor to wrap a Cache[K, V] interface around a TtlCache[K, V] + */ +class TtlCacheToCache[K, V](override val underlyingCache: TtlCache[K, V], ttl: (K, V) => Duration) + extends Cache[K, V] + with ReadCacheWrapper[K, V, TtlCache[K, V]] { + override def add(key: K, value: V) = underlyingCache.add(key, value, ttl(key, value)) + + override def checkAndSet(key: K, value: V, checksum: Checksum) = + underlyingCache.checkAndSet(key, value, checksum, ttl(key, value)) + + override def set(key: K, value: V) = underlyingCache.set(key, value, ttl(key, value)) + + override def replace(key: K, value: V) = underlyingCache.replace(key, value, ttl(key, value)) + + override def delete(key: K) = underlyingCache.delete(key) +} + +/** + * use a single TTL for all objects + */ +class SimpleTtlCacheToCache[K, V](underlyingTtlCache: TtlCache[K, V], ttl: Duration) + extends TtlCacheToCache[K, V](underlyingTtlCache, (k: K, v: V) => ttl) + +/** + * use a value-based TTL function + */ +class ValueBasedTtlCacheToCache[K, V](underlyingTtlCache: TtlCache[K, V], ttl: V => Duration) + extends TtlCacheToCache[K, V](underlyingTtlCache, (k: K, v: V) => ttl(v)) + +/** + * use a key-based TTL function + */ +class KeyBasedTtlCacheToCache[K, V](underlyingTtlCache: TtlCache[K, V], ttl: K => Duration) + extends TtlCacheToCache[K, V](underlyingTtlCache, (k: K, v: V) => ttl(k)) diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/package.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/package.scala new file mode 100644 index 000000000..f2e74624d --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/package.scala @@ -0,0 +1,36 @@ +package com.twitter.servo + +import com.twitter.finagle.partitioning.PartitionNode +import com.twitter.servo.util.Transformer +import com.twitter.util.Try + +package object cache { + type CachedValue = thriftscala.CachedValue + val CachedValue = thriftscala.CachedValue + type CachedValueStatus = thriftscala.CachedValueStatus + val CachedValueStatus = thriftscala.CachedValueStatus + + type KeyTransformer[K] = K => String + type CsKeyValueResult[K, V] = KeyValueResult[K, (Try[V], Checksum)] + + type KeyValueResult[K, V] = keyvalue.KeyValueResult[K, V] + val KeyValueResult = keyvalue.KeyValueResult + + @deprecated("Use com.twitter.finagle.partitioning.PartitionNode instead", "1/7/2013") + type WeightedHost = PartitionNode + + type Serializer[T] = Transformer[T, Array[Byte]] + + /** + * Like a companion object, but for a type alias! + */ + val Serializer = Serializers + + type MemcacheFactory = (() => Memcache) +} + +package cache { + package object constants { + val Colon = ":" + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Accessors.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Accessors.scala new file mode 100644 index 000000000..647e9b3f0 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Accessors.scala @@ -0,0 +1,151 @@ +package com.twitter.servo.database + +import com.twitter.util.Time +import java.sql.{ResultSet, Timestamp} + +/** + * A base trait for transforming JDBC ResultSets. + * Designed to be used with the Accessors trait. + */ +trait ImplicitBuilder[T] extends Accessors { + def apply(implicit row: ResultSet): T +} + +object Accessors { + + /** + * helper to make it compile time error when trying to call getOption on types not supported + * instead of a runtime exception + */ + object SafeManifest { + implicit val booleanSafeManifest = new SafeManifest(implicitly[Manifest[Boolean]]) + implicit val doubleSafeManifest = new SafeManifest(implicitly[Manifest[Double]]) + implicit val intSafeManifest = new SafeManifest[Int](implicitly[Manifest[Int]]) + implicit val longSafeManifest = new SafeManifest[Long](implicitly[Manifest[Long]]) + implicit val stringSafeManifest = new SafeManifest[String](implicitly[Manifest[String]]) + implicit val timestampSafeManifest = + new SafeManifest[Timestamp](implicitly[Manifest[Timestamp]]) + } + + @deprecated("safe manifests no longer supported, use type-specific accessors instead", "1.1.1") + case class SafeManifest[T](mf: Manifest[T]) +} + +/** + * mixin to get ResultSet accessors for standard types + */ +trait Accessors { + import Accessors._ + + /** + * @return None when the column is null for the current row of the result set passed in + * Some[T] otherwise + * @throws UnsupportedOperationException if the return type expected is not supported, currently + * only Boolean, Int, Long, String and Timestamp are supported + */ + @deprecated("use type-specific accessors instead", "1.1.1") + def getOption[T](column: String)(implicit row: ResultSet, sf: SafeManifest[T]): Option[T] = { + val res = { + if (classOf[Boolean] == sf.mf.erasure) { + row.getBoolean(column) + } else if (classOf[Double] == sf.mf.erasure) { + row.getDouble(column) + } else if (classOf[Int] == sf.mf.erasure) { + row.getInt(column) + } else if (classOf[Long] == sf.mf.erasure) { + row.getLong(column) + } else if (classOf[String] == sf.mf.erasure) { + row.getString(column) + } else if (classOf[Timestamp] == sf.mf.erasure) { + row.getTimestamp(column) + } else { + throw new UnsupportedOperationException("type not supported: " + sf.mf.erasure) + } + } + if (row.wasNull()) { + None + } else { + Some(res.asInstanceOf[T]) + } + } + + /** + * @param get the method to apply to the ResultSet + * @param row the implicit ResultSet on which to apply get + * @return None when the column is null for the current row of the result set passed in + * Some[T] otherwise + */ + def getOption[T](get: ResultSet => T)(implicit row: ResultSet): Option[T] = { + val result = get(row) + if (row.wasNull()) { + None + } else { + Some(result) + } + } + + def booleanOption(column: String)(implicit row: ResultSet): Option[Boolean] = + getOption((_: ResultSet).getBoolean(column)) + + def boolean(column: String, default: Boolean = false)(implicit row: ResultSet): Boolean = + booleanOption(column).getOrElse(default) + + def doubleOption(column: String)(implicit row: ResultSet): Option[Double] = + getOption((_: ResultSet).getDouble(column)) + + def double(column: String, default: Double = 0.0)(implicit row: ResultSet): Double = + doubleOption(column).getOrElse(default) + + def intOption(column: String)(implicit row: ResultSet): Option[Int] = + getOption((_: ResultSet).getInt(column)) + + def int(column: String, default: Int = 0)(implicit row: ResultSet): Int = + intOption(column).getOrElse(default) + + def longOption(column: String)(implicit row: ResultSet): Option[Long] = + getOption((_: ResultSet).getLong(column)) + + def long(column: String, default: Long = 0)(implicit row: ResultSet): Long = + longOption(column).getOrElse(default) + + def stringOption(column: String)(implicit row: ResultSet): Option[String] = + getOption((_: ResultSet).getString(column)) + + def string(column: String, default: String = "")(implicit row: ResultSet): String = + stringOption(column).getOrElse(default) + + def timestampOption(column: String)(implicit row: ResultSet): Option[Timestamp] = + getOption((_: ResultSet).getTimestamp(column)) + + def timestamp( + column: String, + default: Timestamp = new Timestamp(0) + )( + implicit row: ResultSet + ): Timestamp = + timestampOption(column).getOrElse(default) + + def datetimeOption(column: String)(implicit row: ResultSet): Option[Long] = + timestampOption(column) map { _.getTime } + + def datetime(column: String, default: Long = 0L)(implicit row: ResultSet): Long = + datetimeOption(column).getOrElse(default) + + def timeOption(column: String)(implicit row: ResultSet): Option[Time] = + datetimeOption(column) map { Time.fromMilliseconds(_) } + + def time(column: String, default: Time = Time.epoch)(implicit row: ResultSet): Time = + timeOption(column).getOrElse(default) + + def bytesOption(column: String)(implicit row: ResultSet): Option[Array[Byte]] = + getOption((_: ResultSet).getBytes(column)) + + def bytes( + column: String, + default: Array[Byte] = Array.empty[Byte] + )( + implicit row: ResultSet + ): Array[Byte] = + bytesOption(column).getOrElse(default) + +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Bitfield.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Bitfield.scala new file mode 100644 index 000000000..fafd0fb72 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Bitfield.scala @@ -0,0 +1,56 @@ +package com.twitter.servo.database + +object Bitfield { + def multiValue(bits: Boolean*): Int = { + bits.foldLeft(0) { (accum, bit) => + (accum << 1) | (if (bit) 1 else 0) + } + } + + def multiValueLong(bits: Boolean*): Long = { + bits.foldLeft(0L) { (accum, bit) => + (accum << 1) | (if (bit) 1L else 0L) + } + } +} + +/** + * A mixin for unpacking bitfields. + */ +trait Bitfield { + val bitfield: Int + + /** + * Tests that a given position is set to 1. + */ + def isSet(position: Int): Boolean = { + (bitfield & (1 << position)) != 0 + } + + /** + * takes a sequence of booleans, from most to least significant + * and converts them to an integer. + * + * example: multiValue(true, false, true) yields 0b101 = 5 + */ + def multiValue(bits: Boolean*): Int = Bitfield.multiValue(bits: _*) +} + +trait LongBitfield { + val bitfield: Long + + /** + * Tests that a given position is set to 1. + */ + def isSet(position: Int): Boolean = { + (bitfield & (1L << position)) != 0 + } + + /** + * takes a sequence of booleans, from most to least significant + * and converts them to a long. + * + * example: multiValue(true, false, true) yields 0b101 = 5L + */ + def multiValue(bits: Boolean*): Long = Bitfield.multiValueLong(bits: _*) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Credentials.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Credentials.scala new file mode 100644 index 000000000..b4eef7418 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Credentials.scala @@ -0,0 +1,22 @@ +package com.twitter.servo.database + +import com.twitter.util.security +import java.io.File + +sealed trait Credentials { + def username: String + def password: String +} + +case class InlineCredentials(username: String, password: String) extends Credentials + +case class FileCredentials( + path: String, + usernameField: String = "db_username", + passwordField: String = "db_password") + extends Credentials { + lazy val (username, password) = { + val credentials = security.Credentials(new File(path)) + (credentials(usernameField), credentials(passwordField)) + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Database.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Database.scala new file mode 100644 index 000000000..3d9845c31 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Database.scala @@ -0,0 +1,201 @@ +package com.twitter.servo.database + +import com.twitter.servo.repository._ +import com.twitter.util.Future +import scala.collection.mutable.{HashMap, HashSet, ListBuffer} +import scala.collection.generic.Growable + +object Database { + + /** + * Construct a KeyValueRepository wrapping access to a database. + * + * Data retrieved as a row from the query is passed to a Builder producing a + * (Key, Row) tuple. Once all rows have been processed this way it is passed as a + * sequence to a post-query function that can perform actions (aggregation usually) + * and produce a final sequence of (Key, Value). + * + * @tparam Q + * how we'll be querying the this repository + * + * @tparam K + * the key used for looking data up + * + * @tparam R + * each entry from the the database will be represented as an instance of R + * + * @tparam V + * the repository will return a V produced by processing one or more Rs + * + * @param database + * A database used to back the KeyValueRepository being built. + * + * @param dbQuery + * A database query for fetching records to be parsed into objects of type + * Row. The query string can contain instances of the character '?' as + * placeholders for parameter passed into the `Database.select` calls. + * + * @param builder + * A Builder that builds (K, Row) pairs from ResultSets from the database + * + * @param postProcess + * A function which can manipulate the Seq[(K, Row)] that is returned from the + * database. Useful for aggregating multi-mapped K, V pairs where V holds a + * container with multiple values for the same key in the database. This function + * should not manipulate the list of keys; doing so will result in Return.None + * elements in the ensuing KeyValueResult. + * + * AggregateByKey has a basic implementation that groups R objects by a + * specified identifier and may be useful as a common impl. + * + * @param selectParams + * A function that is applied to the distinct keys in a repository query. + * The result is passed to `Database.select` to be used for filling in + * bind variables in dbQuery. By default, the repository query is passed + * directly to the select. The use cases for this function are situations + * where the SELECT statement takes multiple parameters. + * + * Example: + * // A repository that takes Seq[Long]s of userids and returns + * // Item objects of a parameterized item type. + * Database.keyValueRepository[Seq[Long], Long, Item, Item]( + * database, + * "SELECT * FROM items WHERE user_id IN (?) AND item_type = ?;", + * ItemBuilder, + * selectParams = Seq(_: Seq[Long], itemType) + * ) + */ + def keyValueRepository[Q <: Seq[K], K, R, V]( + database: Database, + dbQuery: String, + builder: Builder[(K, R)], + postProcess: Seq[(K, R)] => Seq[(K, V)] = + (identity[Seq[(K, V)]] _): (Seq[(K, V)] => Seq[(K, V)]), + selectParams: Seq[K] => Seq[Any] = (Seq(_: Seq[K])): (Seq[K] => collection.Seq[Seq[K]]) + ): KeyValueRepository[Q, K, V] = + query => { + if (query.isEmpty) { + KeyValueResult.emptyFuture + } else { + val uniqueKeys = query.distinct + KeyValueResult.fromPairs(uniqueKeys) { + database.select(dbQuery, builder, selectParams(uniqueKeys): _*) map postProcess + } + } + } +} + +/** + * A thin trait for async interaction with a database. + */ +trait Database { + def select[A](query: String, builder: Builder[A], params: Any*): Future[Seq[A]] + def selectOne[A](query: String, builder: Builder[A], params: Any*): Future[Option[A]] + def execute(query: String, params: Any*): Future[Int] + def insert(query: String, params: Any*): Future[Long] + def release(): Unit +} + +object NullDatabase extends Database { + override def select[Unit](query: String, builder: Builder[Unit], params: Any*) = + Future.value(Seq.empty[Unit]) + + override def selectOne[Unit](query: String, builder: Builder[Unit], params: Any*) = + Future.value(None) + + override def release() = () + + override def execute(query: String, params: Any*) = + Future.value(0) + + override def insert(query: String, params: Any*) = + Future.value(0) +} + +object AggregateByKey { + def apply[K, R, A]( + extractKey: R => K, + reduce: Seq[R] => A, + pruneDuplicates: Boolean = false + ) = new AggregateByKey(extractKey, reduce, pruneDuplicates) + + /** + * In the event that the item type (V) does not carry an aggregation key then we can have + * the Builder return a tuple with some id attached. If that is done then each Row from the + * builder will look something like (SomeGroupId, SomeRowObject). Because we tend to minimize + * data duplication this seems to be a pretty common pattern and can be seen in + * SavedSearchesRepository, FacebookConnectionsRepository, and UserToRoleRepository. + * + * @tparam K + * The type for the key + * @tparam V + * The type of a single element of the list + * @tparam A + * The object we'll aggregate list items into + * @param reduce + * A function that combines a seq of V into A + * @param pruneDuplicates + * If set this ensures that, at most, one instance of any given V will be passed into reduce. + */ + def withKeyValuePairs[K, V, A]( + reduce: Seq[V] => A, + pruneDuplicates: Boolean + ): AggregateByKey[K, (K, V), A] = + new AggregateByKey( + { case (k, _) => k }, + values => reduce(values map { case (_, v) => v }), + pruneDuplicates + ) +} + +/** + * Basic aggregator that extracts keys from a Row, groups into a Seq by those keys, and + * performs some reduction step to mash those into an aggregated object. Order is not + * necessarily kept between the retrieving rows from the database and passing them into + * reduce. + * + * @tparam K + * the type used by the item on which we aggregate rows + * + * @tparam R + * object that a single row of the query will be represented as + * + * @tparam A + * what we collect groups of R into + * + * @param extractKey + * function to extract a key from a row object + * + * @param reduce + * function that can take a sequence of rows and combine them into an aggregate + * + * @param pruneDuplicates + * if set this will ensure that at most one copy of each R will be passed into reduce (as + * determined by R's equal method) but will pass the input through a set which will + * likely lose ordering. + */ +class AggregateByKey[K, R, A]( + extractKey: R => K, + reduce: Seq[R] => A, + pruneDuplicates: Boolean = false) + extends (Seq[R] => Seq[(K, A)]) { + override def apply(input: Seq[R]): Seq[(K, A)] = { + val collectionMap = new HashMap[K, Growable[R] with Iterable[R]] + + def emptyCollection: Growable[R] with Iterable[R] = + if (pruneDuplicates) { + new HashSet[R] + } else { + new ListBuffer[R] + } + + input foreach { element => + (collectionMap.getOrElseUpdate(extractKey(element), emptyCollection)) += element + } + + collectionMap map { + case (key, items) => + key -> reduce(items toSeq) + } toSeq + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/package.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/package.scala new file mode 100644 index 000000000..6a1f41437 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/package.scala @@ -0,0 +1,19 @@ +package com.twitter.servo + +import com.twitter.util.Future +import java.sql.ResultSet + +package object database { + type DatabaseFactory = (() => Database) + + /** + * A function type for translating ResultSets into objects of the result type A. + */ + type Builder[A] = ResultSet => A + + /** + * A function type for asynchronously translating ResultSets into objects + * of the result type A. + */ + type FutureBuilder[A] = Builder[Future[A]] +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/hydrator/KeyValueHydrator.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/hydrator/KeyValueHydrator.scala new file mode 100644 index 000000000..67feab329 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/hydrator/KeyValueHydrator.scala @@ -0,0 +1,155 @@ +package com.twitter.servo.hydrator + +import com.twitter.servo.data.Mutation +import com.twitter.servo.util.{Effect, Gate} +import com.twitter.servo.repository._ +import com.twitter.util.{Future, Return, Try} + +object KeyValueHydrator { + // KeyValueHydrator extends this function type + type FunctionType[Q, K, V] = (Q, Future[KeyValueResult[K, V]]) => Future[Mutation[V]] + type Filter[Q, K, V] = (Q, Future[KeyValueResult[K, V]]) => Future[Boolean] + + private[this] val _unit = fromMutation[Any, Any, Any](Mutation.unit[Any]) + + /** + * A no-op hydrator. Forms a monoid with `also`. + */ + def unit[Q, K, V]: KeyValueHydrator[Q, K, V] = + _unit.asInstanceOf[KeyValueHydrator[Q, K, V]] + + /** + * Packages a function as a KeyValueHydrator + */ + def apply[Q, K, V](f: FunctionType[Q, K, V]): KeyValueHydrator[Q, K, V] = + new KeyValueHydrator[Q, K, V] { + override def apply(query: Q, futureResults: Future[KeyValueResult[K, V]]) = + f(query, futureResults) + } + + /** + * Creates a new KeyValueHydrator out of several underlying KVHydrators. The + * apply method is called on each KeyValueHydrator with the same + * futureResults, allowing each to kick-off some asynchronous work + * to produce a future Hydrated[Mutation]. When all the future + * Hydrated[Mutation]s are available, the results are folded, + * left-to-right, over the mutations, to build up the final + * results. + */ + def inParallel[Q, K, V](hydrators: KeyValueHydrator[Q, K, V]*): KeyValueHydrator[Q, K, V] = + KeyValueHydrator[Q, K, V] { (query, futureResults) => + val futureMutations = hydrators map { t => + t(query, futureResults) + } + Future.collect(futureMutations) map Mutation.all + } + + def const[Q, K, V](futureMutation: Future[Mutation[V]]): KeyValueHydrator[Q, K, V] = + KeyValueHydrator[Q, K, V] { (_, _) => + futureMutation + } + + def fromMutation[Q, K, V](mutation: Mutation[V]): KeyValueHydrator[Q, K, V] = + const[Q, K, V](Future.value(mutation)) +} + +/** + * A KeyValueHydrator builds a Mutation to be applied to the values in a KeyValueResult, but does + * not itself apply the Mutation. This allows several KeyValueHydrators to be composed together to + * begin their work in parallel to build the Mutations, which can then be combined and applied + * to the results later (see asRepositoryFilter). + * + * Forms a monoid with KeyValueHydrator.unit as unit and `also` as the combining function. + */ +trait KeyValueHydrator[Q, K, V] extends KeyValueHydrator.FunctionType[Q, K, V] { + protected[this] val unitMutation = Mutation.unit[V] + protected[this] val futureUnitMutation = Future.value(unitMutation) + + /** + * Combines two KeyValueHydrators. Forms a monoid with KeyValueHydator.unit + */ + def also(next: KeyValueHydrator[Q, K, V]): KeyValueHydrator[Q, K, V] = + KeyValueHydrator.inParallel(this, next) + + /** + * Turns a single KeyValueHydrator into a RepositoryFilter by applying the Mutation to + * found values in the KeyValueResult. If the mutation throws an exception, it will + * be caught and the resulting key/value paired moved to the failed map of the resulting + * KeyValueResult. + */ + lazy val asRepositoryFilter: RepositoryFilter[Q, KeyValueResult[K, V], KeyValueResult[K, V]] = + (query, futureResults) => { + this(query, futureResults) flatMap { mutation => + val update = mutation.endo + futureResults map { results => + results.mapValues { + case Return(Some(value)) => Try(Some(update(value))) + case x => x + } + } + } + } + + /** + * Apply this hydrator to the result of a repository. + */ + def hydratedBy_:(repo: KeyValueRepository[Q, K, V]): KeyValueRepository[Q, K, V] = + Repository.composed(repo, asRepositoryFilter) + + /** + * Return a new hydrator that applies the same mutation as this + * hydrator, but can be enabled/disabled or dark enabled/disabled via Gates. The light + * gate takes precedence over the dark gate. This allows you to go from 0%->100% dark, + * and then from 0%->100% light without affecting backend traffic. + */ + @deprecated("Use enabledBy(() => Boolean, () => Boolean)", "2.5.1") + def enabledBy(light: Gate[Unit], dark: Gate[Unit] = Gate.False): KeyValueHydrator[Q, K, V] = + enabledBy( + { () => + light() + }, + { () => + dark() + }) + + /** + * Return a new hydrator that applies the same mutation as this + * hydrator, but can be enabled/disabled or dark enable/disabled via nullary boolean functions. + * The light function takes precedence over the dark function. + * This allows you to go from 0%->100% dark, and then from 0%->100% light + * without affecting backend traffic. + */ + def enabledBy(light: () => Boolean, dark: () => Boolean): KeyValueHydrator[Q, K, V] = + KeyValueHydrator[Q, K, V] { (query, futureResults) => + val isLight = light() + val isDark = !isLight && dark() + if (!isLight && !isDark) { + futureUnitMutation + } else { + this(query, futureResults) map { + case mutation if isLight => mutation + case mutation if isDark => mutation.dark + } + } + } + + /** + * Build a new hydrator that will return the same result as the current hydrator, + * but will additionally perform the supplied effect on the result of hydration. + */ + def withEffect(effect: Effect[Option[V]]): KeyValueHydrator[Q, K, V] = + KeyValueHydrator[Q, K, V] { (query, futureResults) => + this(query, futureResults) map { _ withEffect effect } + } + + /** + * Builds a new hydrator that only attempt to hydrate if the + * supplied filter returns true. + */ + def filter(predicate: KeyValueHydrator.Filter[Q, K, V]): KeyValueHydrator[Q, K, V] = + KeyValueHydrator[Q, K, V] { (q, r) => + predicate(q, r) flatMap { t => + if (t) this(q, r) else futureUnitMutation + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/keyvalue/KeyValueResult.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/keyvalue/KeyValueResult.scala new file mode 100644 index 000000000..bcf49efb8 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/keyvalue/KeyValueResult.scala @@ -0,0 +1,473 @@ +package com.twitter.servo.keyvalue + +import com.twitter.finagle.memcached.util.NotFound +import com.twitter.util.{Future, Return, Throw, Try} +import scala.collection.immutable + +object KeyValueResult { + private[this] val Empty = KeyValueResult() + private[this] val EmptyFuture = Future.value(Empty) + + def empty[K, V]: KeyValueResult[K, V] = + Empty.asInstanceOf[KeyValueResult[K, V]] + + def emptyFuture[K, V]: Future[KeyValueResult[K, V]] = + EmptyFuture.asInstanceOf[Future[KeyValueResult[K, V]]] + + /** + * Builds a KeyValueResult using pairs of keys to Try[Option[V]]. These values are split + * out to build the separate found/notFound/failed collections. + */ + def build[K, V](data: (K, Try[Option[V]])*): KeyValueResult[K, V] = { + val bldr = new KeyValueResultBuilder[K, V] + data.foreach { case (k, v) => bldr.update(k, v) } + bldr.result() + } + + /** + * Builds a future KeyValueResult using a future sequence of key-value tuples. That + * sequence does not necessarily match up with the sequence of keys provided. The + * sequence of pairs represent the found results. notFound will be filled in from the + * missing keys. + */ + def fromPairs[K, V]( + keys: Iterable[K] = Nil: immutable.Nil.type + )( + futurePairs: Future[TraversableOnce[(K, V)]] + ): Future[KeyValueResult[K, V]] = { + fromMap(keys) { + futurePairs map { _.toMap } + } + } + + /** + * Builds a future KeyValueResult using a future map of found results. notFound will be filled + * in from the missing keys. + */ + def fromMap[K, V]( + keys: Iterable[K] = Nil: immutable.Nil.type + )( + futureMap: Future[Map[K, V]] + ): Future[KeyValueResult[K, V]] = { + futureMap map { found => + KeyValueResult[K, V](found = found, notFound = NotFound(keys.toSet, found.keySet)) + } handle { + case t => + KeyValueResult[K, V](failed = keys.map { _ -> t }.toMap) + } + } + + /** + * Builds a future KeyValueResult using a future sequence of optional results. That + * sequence must match up pair-wise with the given sequence of keys. A value of Some[V] is + * counted as a found result, a value of None is counted as a notFound result. + */ + def fromSeqOption[K, V]( + keys: Iterable[K] + )( + futureSeq: Future[Seq[Option[V]]] + ): Future[KeyValueResult[K, V]] = { + futureSeq map { seq => + keys.zip(seq).foldLeft(new KeyValueResultBuilder[K, V]) { + case (bldr, (key, tryRes)) => + tryRes match { + case Some(value) => bldr.addFound(key, value) + case None => bldr.addNotFound(key) + } + } result () + } handle { + case t => + KeyValueResult[K, V](failed = keys.map { _ -> t }.toMap) + } + } + + /** + * Builds a future KeyValueResult using a future sequence of Try results. That + * sequence must match up pair-wise with the given sequence of keys. A value of Return[V] is + * counted as a found result, a value of Throw is counted as a failed result. + */ + def fromSeqTry[K, V]( + keys: Iterable[K] + )( + futureSeq: Future[Seq[Try[V]]] + ): Future[KeyValueResult[K, V]] = { + futureSeq map { seq => + keys.zip(seq).foldLeft(new KeyValueResultBuilder[K, V]) { + case (bldr, (key, tryRes)) => + tryRes match { + case Return(value) => bldr.addFound(key, value) + case Throw(t) => bldr.addFailed(key, t) + } + } result () + } handle { + case t => + KeyValueResult[K, V](failed = keys.map { _ -> t }.toMap) + } + } + + /** + * Builds a future KeyValueResult using a sequence of future options. That sequence must + * match up pair-wise with the given sequence of keys. A value of Some[V] is + * counted as a found result, a value of None is counted as a notFound result. + */ + def fromSeqFuture[K, V]( + keys: Iterable[K] + )( + futureSeq: Seq[Future[Option[V]]] + ): Future[KeyValueResult[K, V]] = { + fromSeqTryOptions(keys) { + Future.collect { + futureSeq map { _.transform(Future(_)) } + } + } + } + + /** + * Builds a future KeyValueResult using a future sequence of Try[Option[V]]. That sequence must + * match up pair-wise with the given sequence of keys. A value of Return[Some[V]] is + * counted as a found result, a value of Return[None] is counted as a notFound result, and a value + * of Throw[V] is counted as a failed result. + */ + def fromSeqTryOptions[K, V]( + keys: Iterable[K] + )( + futureSeq: Future[Seq[Try[Option[V]]]] + ): Future[KeyValueResult[K, V]] = { + futureSeq map { seq => + keys.zip(seq).foldLeft(new KeyValueResultBuilder[K, V]) { + case (bldr, (key, tryRes)) => + tryRes match { + case Return(Some(value)) => bldr.addFound(key, value) + case Return(None) => bldr.addNotFound(key) + case Throw(t) => bldr.addFailed(key, t) + } + } result () + } handle { + case t => + KeyValueResult[K, V](failed = keys.map { _ -> t }.toMap) + } + } + + /** + * Builds a future KeyValueResult using a future map with value Try[Option[V]]. A value of + * Return[Some[V]] is counted as a found result, a value of Return[None] is counted as a notFound + * result, and a value of Throw[V] is counted as a failed result. + * + * notFound will be filled in from the missing keys. Exceptions will be handled by counting all + * keys as failed. Values that are in map but not keys will be ignored. + */ + def fromMapTryOptions[K, V]( + keys: Iterable[K] + )( + futureMapTryOptions: Future[Map[K, Try[Option[V]]]] + ): Future[KeyValueResult[K, V]] = { + futureMapTryOptions map { mapTryOptions => + keys.foldLeft(new KeyValueResultBuilder[K, V]) { + case (builder, key) => + mapTryOptions.get(key) match { + case Some(Return(Some(value))) => builder.addFound(key, value) + case Some(Return(None)) | None => builder.addNotFound(key) + case Some(Throw(failure)) => builder.addFailed(key, failure) + } + } result () + } handle { + case t => + KeyValueResult[K, V](failed = keys.map { _ -> t }.toMap) + } + } + + /** + * Reduces several KeyValueResults down to just 1, by combining as if by ++, but + * more efficiently with fewer intermediate results. + */ + def sum[K, V](results: Iterable[KeyValueResult[K, V]]): KeyValueResult[K, V] = { + val bldr = new KeyValueResultBuilder[K, V] + + results foreach { result => + bldr.addFound(result.found) + bldr.addNotFound(result.notFound) + bldr.addFailed(result.failed) + } + + val res = bldr.result() + + if (res.notFound.isEmpty && res.failed.isEmpty) { + res + } else { + val foundKeySet = res.found.keySet + val notFound = NotFound(res.notFound, foundKeySet) + val failed = NotFound(NotFound(res.failed, foundKeySet), res.notFound) + KeyValueResult(res.found, notFound, failed) + } + } +} + +case class KeyValueResult[K, +V]( + found: Map[K, V] = Map.empty[K, V]: immutable.Map[K, V], + notFound: Set[K] = Set.empty[K]: immutable.Set[K], + failed: Map[K, Throwable] = Map.empty[K, Throwable]: immutable.Map[K, Throwable]) + extends Iterable[(K, Try[Option[V]])] { + + /** + * A cheaper implementation of isEmpty than the default which relies + * on building an iterator. + */ + override def isEmpty = found.isEmpty && notFound.isEmpty && failed.isEmpty + + /** + * map over the keyspace to produce a new KeyValueResult + */ + def mapKeys[K2](f: K => K2): KeyValueResult[K2, V] = + copy( + found = found.map { case (k, v) => f(k) -> v }, + notFound = notFound.map(f), + failed = failed.map { case (k, t) => f(k) -> t } + ) + + /** + * Maps over found values to produce a new KeyValueResult. If the given function throws an + * exception for a particular value, that value will be moved to the `failed` bucket with + * the thrown exception. + */ + def mapFound[V2](f: V => V2): KeyValueResult[K, V2] = { + val builder = new KeyValueResultBuilder[K, V2]() + + found.foreach { + case (k, v) => + builder.update(k, Try(Some(f(v)))) + } + builder.addNotFound(notFound) + builder.addFailed(failed) + + builder.result() + } + + /** + * map over the values provided by the iterator, to produce a new KeyValueResult + */ + def mapValues[V2](f: Try[Option[V]] => Try[Option[V2]]): KeyValueResult[K, V2] = { + val builder = new KeyValueResultBuilder[K, V2]() + + found.foreach { + case (k, v) => + builder.update(k, f(Return(Some(v)))) + } + notFound.foreach { k => + builder.update(k, f(Return.None)) + } + failed.foreach { + case (k, t) => + builder.update(k, f(Throw(t))) + } + + builder.result() + } + + /** + * Map over found values to create a new KVR with the existing notFound and failed keys intact. + */ + def mapFoundValues[V2](f: V => Try[Option[V2]]): KeyValueResult[K, V2] = { + val builder = new KeyValueResultBuilder[K, V2]() + + found.foreach { + case (k, v) => builder.update(k, f(v)) + } + builder.addNotFound(notFound) + builder.addFailed(failed) + + builder.result() + } + + /** + * map over the pairs of results, creating a new KeyValueResult based on the returned + * tuples from the provided function. + */ + def mapPairs[K2, V2](f: (K, Try[Option[V]]) => (K2, Try[Option[V2]])): KeyValueResult[K2, V2] = { + val builder = new KeyValueResultBuilder[K2, V2] + + def update(k: K, v: Try[Option[V]]): Unit = + f(k, v) match { + case (k2, v2) => builder.update(k2, v2) + } + + found.foreach { + case (k, v) => + update(k, Return(Some(v))) + } + notFound.foreach { k => + update(k, Return.None) + } + failed.foreach { + case (k, t) => + update(k, Throw(t)) + } + + builder.result() + } + + /** + * filter the KeyValueResult, to produce a new KeyValueResult + */ + override def filter(p: ((K, Try[Option[V]])) => Boolean): KeyValueResult[K, V] = { + val builder = new KeyValueResultBuilder[K, V] + + def update(k: K, v: Try[Option[V]]): Unit = { + if (p((k, v))) + builder.update(k, v) + } + + found.foreach { + case (k, v) => + update(k, Return(Some(v))) + } + notFound.foreach { k => + update(k, Return.None) + } + failed.foreach { + case (k, t) => + update(k, Throw(t)) + } + + builder.result() + } + + /** + * filterNot the KeyValueResult, to produce a new KeyValueResult + */ + override def filterNot(p: ((K, Try[Option[V]])) => Boolean): KeyValueResult[K, V] = { + filter(!p(_)) + } + + /** + * Returns an Iterator that yields all found, notFound, and failed values + * represented in the combined Try[Option[V]] type. + */ + def iterator: Iterator[(K, Try[Option[V]])] = + (found.iterator map { case (k, v) => k -> Return(Some(v)) }) ++ + (notFound.iterator map { k => + k -> Return.None + }) ++ + (failed.iterator map { case (k, t) => k -> Throw(t) }) + + /** + * Returns a copy in which all failed entries are converted to misses. The specific + * failure information is lost. + */ + def convertFailedToNotFound = + copy( + notFound = notFound ++ failed.keySet, + failed = Map.empty[K, Throwable] + ) + + /** + * Returns a copy in which all not-found entries are converted to failures. + */ + def convertNotFoundToFailed(f: K => Throwable) = + copy( + notFound = Set.empty[K], + failed = failed ++ (notFound map { k => + k -> f(k) + }) + ) + + /** + * Returns a copy in which failures are repaired with the supplied handler + */ + def repairFailed[V2 >: V](handler: PartialFunction[Throwable, Option[V2]]) = + if (failed.isEmpty) { + this + } else { + val builder = new KeyValueResultBuilder[K, V2] + builder.addFound(found) + builder.addNotFound(notFound) + failed map { case (k, t) => builder.update(k, Throw(t) handle handler) } + builder.result() + } + + /** + * Combines two KeyValueResults. Conflicting founds/notFounds are resolved + * as founds, and conflicting (found|notFound)/failures are resolved as (found|notFound). + */ + def ++[K2 >: K, V2 >: V](that: KeyValueResult[K2, V2]): KeyValueResult[K2, V2] = { + if (this.isEmpty) that + else if (that.isEmpty) this.asInstanceOf[KeyValueResult[K2, V2]] + else { + val found = this.found ++ that.found + val notFound = NotFound(this.notFound ++ that.notFound, found.keySet) + val failed = NotFound(NotFound(this.failed ++ that.failed, found.keySet), notFound) + KeyValueResult(found, notFound, failed) + } + } + + /** + * Looks up a result for a key. + */ + def apply(key: K): Try[Option[V]] = { + found.get(key) match { + case some @ Some(_) => Return(some) + case None => + failed.get(key) match { + case Some(t) => Throw(t) + case None => Return.None + } + } + } + + /** + * Looks up a result for a key, returning a provided default if the key is not + * found or failed. + */ + def getOrElse[V2 >: V](key: K, default: => V2): V2 = + found.getOrElse(key, default) + + /** + * If any keys fail, will return the first failure. Otherwise, + * will convert founds/notFounds to a Seq[Option[V]], ordered by + * the keys provided + */ + def toFutureSeqOfOptions(keys: Seq[K]): Future[Seq[Option[V]]] = { + failed.values.headOption match { + case Some(t) => Future.exception(t) + case None => Future.value(keys.map(found.get)) + } + } + + // This is unfortunate, but we end up pulling in Iterable's toString, + // which is not all that readable. + override def toString(): String = { + val sb = new StringBuilder(256) + sb.append("KeyValueResult(") + sb.append("found = ") + sb.append(found) + sb.append(", notFound = ") + sb.append(notFound) + sb.append(", failed = ") + sb.append(failed) + sb.append(')') + sb.toString() + } +} + +class KeyValueResultBuilder[K, V] { + private[this] val found = Map.newBuilder[K, V] + private[this] val notFound = Set.newBuilder[K] + private[this] val failed = Map.newBuilder[K, Throwable] + + def addFound(k: K, v: V) = { found += (k -> v); this } + def addNotFound(k: K) = { notFound += k; this } + def addFailed(k: K, t: Throwable) = { failed += (k -> t); this } + + def addFound(kvs: Iterable[(K, V)]) = { found ++= kvs; this } + def addNotFound(ks: Iterable[K]) = { notFound ++= ks; this } + def addFailed(kts: Iterable[(K, Throwable)]) = { failed ++= kts; this } + + def update(k: K, tryV: Try[Option[V]]) = { + tryV match { + case Throw(t) => addFailed(k, t) + case Return(None) => addNotFound(k) + case Return(Some(v)) => addFound(k, v) + } + } + + def result() = KeyValueResult(found.result(), notFound.result(), failed.result()) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingCounterKeyValueRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingCounterKeyValueRepository.scala new file mode 100644 index 000000000..40f69b81a --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingCounterKeyValueRepository.scala @@ -0,0 +1,44 @@ +package com.twitter.servo.repository + +import com.twitter.servo.cache._ +import com.twitter.util.Future + +class CachingCounterKeyValueRepository[K]( + underlying: CounterKeyValueRepository[K], + cache: CounterCache[K], + observer: CacheObserver = NullCacheObserver) + extends CounterKeyValueRepository[K] { + + def apply(keys: Seq[K]): Future[KeyValueResult[K, Long]] = { + val uniqueKeys = keys.distinct + cache.get(uniqueKeys) flatMap { cachedResults => + recordResults(cachedResults) + + val missed = cachedResults.notFound ++ cachedResults.failed.keySet + readThrough(missed.toSeq) map { readResults => + KeyValueResult(cachedResults.found) ++ readResults + } + } + } + + private def readThrough(keys: Seq[K]): Future[KeyValueResult[K, Long]] = + if (keys.isEmpty) { + KeyValueResult.emptyFuture + } else { + underlying(keys) onSuccess { readResults => + for ((k, v) <- readResults.found) { + cache.add(k, v) + } + } + } + + private def recordResults(cachedResults: KeyValueResult[K, Long]): Unit = { + cachedResults.found.keys foreach { key => + observer.hit(key.toString) + } + cachedResults.notFound foreach { key => + observer.miss(key.toString) + } + observer.failure(cachedResults.failed.size) + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingKeyValueRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingKeyValueRepository.scala new file mode 100644 index 000000000..fe6e257d2 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingKeyValueRepository.scala @@ -0,0 +1,736 @@ +package com.twitter.servo.repository + +import com.twitter.logging.{Level, Logger} +import com.twitter.servo.cache._ +import com.twitter.servo.util.{Effect, Gate, RateLimitingLogger} +import com.twitter.util._ +import scala.collection.mutable +import scala.util.Random + +/** + * A set of classes that indicate how to handle cached results. + */ +sealed abstract class CachedResultAction[+V] + +object CachedResultAction { + + /** Indicates a key should be fetched from the underlying repo */ + case object HandleAsMiss extends CachedResultAction[Nothing] + + /** Indicates a key should be returned as not-found, and not fetched from the underlying repo */ + case object HandleAsNotFound extends CachedResultAction[Nothing] + + /** Indicates the value should be returned as found */ + case class HandleAsFound[V](value: V) extends CachedResultAction[V] + + /** Indicates the value should not be cached */ + case object HandleAsDoNotCache extends CachedResultAction[Nothing] + + /** Indicates that the given action should be applied, and the given function applied to the resulting value */ + case class TransformSubAction[V](action: CachedResultAction[V], f: V => V) + extends CachedResultAction[V] + + /** Indicates the key should be returned as a failure */ + case class HandleAsFailed(t: Throwable) extends CachedResultAction[Nothing] + + /** Indicates that the value should be refetched asynchronously, be immediately treated + * as the given action. */ + case class SoftExpiration[V](action: CachedResultAction[V]) extends CachedResultAction[V] +} + +/** + * A set of classes representing the various states for a cached result. + */ +sealed abstract class CachedResult[+K, +V] { + def key: K +} + +object CachedResult { + import CachedResultAction._ + + /** Indicates the key was not in cache */ + case class NotFound[K](key: K) extends CachedResult[K, Nothing] + + /** Indicates there was an error fetching the key */ + case class Failed[K](key: K, t: Throwable) extends CachedResult[K, Nothing] + + /** Indicates the cached value could not be deserialized */ + case class DeserializationFailed[K](key: K) extends CachedResult[K, Nothing] + + /** Indicates the cached value could not be serialized */ + case class SerializationFailed[K](key: K) extends CachedResult[K, Nothing] + + /** Indicates that a NotFound tombstone was found in cached */ + case class CachedNotFound[K]( + key: K, + cachedAt: Time, + softTtlStep: Option[Short] = None) + extends CachedResult[K, Nothing] + + /** Indicates that a Deleted tombstone was found in cached */ + case class CachedDeleted[K]( + key: K, + cachedAt: Time, + softTtlStep: Option[Short] = None) + extends CachedResult[K, Nothing] + + /** Indicates that value was found in cached */ + case class CachedFound[K, V]( + key: K, + value: V, + cachedAt: Time, + softTtlStep: Option[Short] = None) + extends CachedResult[K, V] + + /** Indicates that value should not be cached until */ + case class DoNotCache[K](key: K, until: Option[Time]) extends CachedResult[K, Nothing] + + type Handler[K, V] = CachedResult[K, V] => CachedResultAction[V] + + type PartialHandler[K, V] = CachedResult[K, V] => Option[CachedResultAction[V]] + + type HandlerFactory[Q, K, V] = Q => Handler[K, V] + + /** + * companion object for Handler type + */ + object Handler { + + /** + * terminate a PartialHandler to produce a new Handler + */ + def apply[K, V]( + partial: PartialHandler[K, V], + handler: Handler[K, V] = defaultHandler[K, V] + ): Handler[K, V] = { cachedResult => + partial(cachedResult) match { + case Some(s) => s + case None => handler(cachedResult) + } + } + } + + /** + * companion object for PartialHandler type + */ + object PartialHandler { + + /** + * Sugar to produce a PartialHandler from a PartialFunction. Successive calls to + * isDefined MUST return the same result. Otherwise, take the syntax hit and wire + * up your own PartialHandler. + */ + def apply[K, V]( + partial: PartialFunction[CachedResult[K, V], CachedResultAction[V]] + ): PartialHandler[K, V] = partial.lift + + /** + * chain one PartialHandler after another to produce a new PartialHandler + */ + def orElse[K, V]( + thisHandler: PartialHandler[K, V], + thatHandler: PartialHandler[K, V] + ): PartialHandler[K, V] = { cachedResult => + thisHandler(cachedResult) match { + case some @ Some(_) => some + case None => thatHandler(cachedResult) + } + } + } + + /** + * companion object for HandlerFactory type + */ + object HandlerFactory { + def apply[Q, K, V](handler: Handler[K, V]): HandlerFactory[Q, K, V] = _ => handler + } + + def defaultHandlerFactory[Q, K, V]: HandlerFactory[Q, K, V] = + HandlerFactory[Q, K, V](defaultHandler) + + /** + * This is the default Handler. Failures are treated as misses. + */ + def defaultHandler[K, V]: Handler[K, V] = { + case NotFound(_) | Failed(_, _) => HandleAsMiss + case DeserializationFailed(_) | SerializationFailed(_) => HandleAsMiss + case CachedNotFound(_, _, _) | CachedDeleted(_, _, _) => HandleAsNotFound + case CachedFound(_, value, _, _) => HandleAsFound(value) + case DoNotCache(_, Some(time)) if Time.now > time => HandleAsMiss + case DoNotCache(_, _) => HandleAsDoNotCache + } + + /** + * A PartialHandler that bubbles memcache failures up instead of converting + * those failures to misses. + */ + def failuresAreFailures[K, V] = PartialHandler[K, V] { + case Failed(_, t) => HandleAsFailed(t) + } + + /** + * A PartialHandler that doesn't attempt to write back to cache if the initial + * cache read failed, but still fetches from the underlying repo. + */ + def failuresAreDoNotCache[K, V] = PartialHandler[K, V] { + case Failed(_, _) => HandleAsDoNotCache + } + + /** + * A function that takes a cachedAt time and ttl, and returns an expiry time. This function + * _must_ be deterministic with respect to the arguments provided, otherwise, you might get a + * MatchError when using this with softTtlExpiration. + */ + type Expiry = (Time, Duration) => Time + + /** + * An Expiry function with an epsilon of zero. + */ + val fixedExpiry: Expiry = (cachedAt: Time, ttl: Duration) => cachedAt + ttl + + /** + * A repeatable "random" expiry function that perturbs the ttl with a random value + * no greater than +/-(ttl * maxFactor). + */ + def randomExpiry(maxFactor: Float): Expiry = { + if (maxFactor == 0) { + fixedExpiry + } else { (cachedAt: Time, ttl: Duration) => + { + val factor = (2 * new Random(cachedAt.inMilliseconds).nextFloat - 1) * maxFactor + cachedAt + ttl + Duration.fromNanoseconds((factor * ttl.inNanoseconds).toLong) + } + } + } + + /** + * soft-expires CachedFound and CachedNotFound based on a ttl. + * + * @param ttl + * values older than this will be considered expired, but still + * returned, and asynchronously refreshed in cache. + * @param expiry + * (optional) function to compute the expiry time + */ + def softTtlExpiration[K, V]( + ttl: Duration, + expiry: Expiry = fixedExpiry + ): PartialHandler[K, V] = + softTtlExpiration(_ => ttl, expiry) + + /** + * soft-expires CachedFound and CachedNotFound based on a ttl derived from the value + * + * @param ttl + * values older than this will be considered expired, but still + * returned, and asynchronously refreshed in cache. + * @param expiry + * (optional) function to compute the expiry time + */ + def softTtlExpiration[K, V]( + ttl: Option[V] => Duration, + expiry: Expiry + ): PartialHandler[K, V] = PartialHandler[K, V] { + case CachedFound(_, value, cachedAt, _) if expiry(cachedAt, ttl(Some(value))) < Time.now => + SoftExpiration(HandleAsFound(value)) + case CachedNotFound(_, cachedAt, _) if expiry(cachedAt, ttl(None)) < Time.now => + SoftExpiration(HandleAsNotFound) + } + + /** + * soft-expires CachedFound and CachedNotFound based on a ttl derived from both the value + * and the softTtlStep + * + * @param ttl + * values older than this will be considered expired, but still returned, and + * asynchronously refreshed in cache. + * @param expiry + * (optional) function to compute the expiry time + */ + def steppedSoftTtlExpiration[K, V]( + ttl: (Option[V], Option[Short]) => Duration, + expiry: Expiry = fixedExpiry + ): PartialHandler[K, V] = PartialHandler[K, V] { + case CachedFound(_, value, cachedAt, softTtlStep) + if expiry(cachedAt, ttl(Some(value), softTtlStep)) < Time.now => + SoftExpiration(HandleAsFound(value)) + case CachedNotFound(_, cachedAt, softTtlStep) + if expiry(cachedAt, ttl(None, softTtlStep)) < Time.now => + SoftExpiration(HandleAsNotFound) + case CachedDeleted(_, cachedAt, softTtlStep) + if expiry(cachedAt, ttl(None, softTtlStep)) < Time.now => + SoftExpiration(HandleAsNotFound) + } + + /** + * hard-expires CachedFound and CachedNotFound based on a ttl. + * + * @param ttl + * values older than this will be considered a miss + * @param expiry + * (optional) function to compute the expiry time + */ + def hardTtlExpiration[K, V]( + ttl: Duration, + expiry: Expiry = fixedExpiry + ): PartialHandler[K, V] = + hardTtlExpiration(_ => ttl, expiry) + + /** + * hard-expires CachedFound and CachedNotFound based on a ttl derived from the value + * + * @param ttl + * values older than this will be considered a miss + * @param expiry + * (optional) function to compute the expiry time + */ + def hardTtlExpiration[K, V]( + ttl: Option[V] => Duration, + expiry: Expiry + ): PartialHandler[K, V] = PartialHandler[K, V] { + case CachedFound(_, value, cachedAt, _) if expiry(cachedAt, ttl(Some(value))) < Time.now => + HandleAsMiss + case CachedNotFound(_, cachedAt, _) if expiry(cachedAt, ttl(None)) < Time.now => + HandleAsMiss + } + + /** + * hard-expires a CachedNotFound tombstone based on a ttl + * + * @param ttl + * values older than this will be considered expired + * @param expiry + * (optional) function to compute the expiry time + */ + def notFoundHardTtlExpiration[K, V]( + ttl: Duration, + expiry: Expiry = fixedExpiry + ): PartialHandler[K, V] = PartialHandler[K, V] { + case CachedNotFound(_, cachedAt, _) => + if (expiry(cachedAt, ttl) < Time.now) + HandleAsMiss + else + HandleAsNotFound + } + + /** + * hard-expires a CachedDeleted tombstone based on a ttl + * + * @param ttl + * values older than this will be considered expired + * @param expiry + * (optional) function to compute the expiry time + */ + def deletedHardTtlExpiration[K, V]( + ttl: Duration, + expiry: Expiry = fixedExpiry + ): PartialHandler[K, V] = PartialHandler[K, V] { + case CachedDeleted(_, cachedAt, _) => + if (expiry(cachedAt, ttl) < Time.now) + HandleAsMiss + else + HandleAsNotFound + } + + /** + * read only from cache, never fall back to underlying KeyValueRepository + */ + def cacheOnly[K, V]: Handler[K, V] = { + case CachedFound(_, value, _, _) => HandleAsFound(value) + case _ => HandleAsNotFound + } + + /** + * use either primary or backup Handler, depending on usePrimary result + * + * @param primaryHandler + * the handler to be used if usePrimary evaluates to true + * @param backupHandler + * the handle to be used if usePrimary evaluates to false + * @param usePrimary + * evaluates the query to determine which handler to use + */ + def switchedHandlerFactory[Q, K, V]( + primaryHandler: Handler[K, V], + backupHandler: Handler[K, V], + usePrimary: Q => Boolean + ): HandlerFactory[Q, K, V] = { query => + if (usePrimary(query)) + primaryHandler + else + backupHandler + } +} + +object CacheResultObserver { + case class CachingRepositoryResult[K, V]( + resultFromCache: KeyValueResult[K, Cached[V]], + resultFromCacheMissReadthrough: KeyValueResult[K, V], + resultFromSoftTtlReadthrough: KeyValueResult[K, V]) + def unit[K, V] = Effect.unit[CachingRepositoryResult[K, V]] +} + +object CachingKeyValueRepository { + type CacheResultObserver[K, V] = Effect[CacheResultObserver.CachingRepositoryResult[K, V]] +} + +/** + * Reads keyed values from a LockingCache, and reads through to an underlying + * KeyValueRepository for misses. supports a "soft ttl", beyond which values + * will be read through out-of-band to the originating request + * + * @param underlying + * the underlying KeyValueRepository + * @param cache + * the locking cache to read from + * @param newQuery + * a function for converting a subset of the keys of the original query into a new + * query. this is used to construct the query passed to the underlying repository + * to fetch the cache misses. + * @param handlerFactory + * A factory to produce functions that specify policies about how to handle results + * from cache. (i.e. to handle failures as misses vs failures, etc) + * @param picker + * used to choose between the value in cache and the value read from the DB when + * storing values in the cache + * @param observer + * a CacheObserver for collecting cache statistics* + * @param writeSoftTtlStep + * Write the soft_ttl_step value to indicate number of consistent reads from underlying store + * @param cacheResultObserver + * An [[Effect]] of type [[CacheResultObserver.CachingRepositoryResult]] which is useful for examining + * the results from the cache, underlying storage, and any later read-throughs. The effect is + * executed asynchronously from the request path and has no bearing on the Future[KeyValueResult]* + * returned from this Repository. + */ +class CachingKeyValueRepository[Q <: Seq[K], K, V]( + underlying: KeyValueRepository[Q, K, V], + val cache: LockingCache[K, Cached[V]], + newQuery: SubqueryBuilder[Q, K], + handlerFactory: CachedResult.HandlerFactory[Q, K, V] = + CachedResult.defaultHandlerFactory[Q, K, V], + picker: LockingCache.Picker[Cached[V]] = new PreferNewestCached[V]: PreferNewestCached[V], + observer: CacheObserver = NullCacheObserver, + writeSoftTtlStep: Gate[Unit] = Gate.False, + cacheResultObserver: CachingKeyValueRepository.CacheResultObserver[K, V] = + CacheResultObserver.unit[K, V]: Effect[CacheResultObserver.CachingRepositoryResult[K, V]]) + extends KeyValueRepository[Q, K, V] { + import CachedResult._ + import CachedResultAction._ + + protected[this] val log = Logger.get(getClass.getSimpleName) + private[this] val rateLimitedLogger = new RateLimitingLogger(logger = log) + + protected[this] val effectiveCacheStats = observer.scope("effective") + + /** + * Calculates the softTtlStep based on result from cache and underlying store. + * The softTtlStep indicates how many times we have + * performed & recorded a consistent read-through. + * A value of None is equivalent to Some(0) - it indicates zero consistent read-throughs. + */ + protected[this] def updateSoftTtlStep( + underlyingResult: Option[V], + cachedResult: Cached[V] + ): Option[Short] = { + if (writeSoftTtlStep() && underlyingResult == cachedResult.value) { + cachedResult.softTtlStep match { + case Some(step) if step < Short.MaxValue => Some((step + 1).toShort) + case Some(step) if step == Short.MaxValue => cachedResult.softTtlStep + case _ => Some(1) + } + } else { + None + } + } + + protected case class ProcessedCacheResult( + hits: Map[K, V], + misses: Seq[K], + doNotCache: Set[K], + failures: Map[K, Throwable], + tombstones: Set[K], + softExpirations: Seq[K], + transforms: Map[K, (V => V)]) + + override def apply(keys: Q): Future[KeyValueResult[K, V]] = { + getFromCache(keys).flatMap { cacheResult => + val ProcessedCacheResult( + hits, + misses, + doNotCache, + failures, + tombstones, + softExpirations, + transforms + ) = + process(keys, cacheResult) + + if (log.isLoggable(Level.TRACE)) { + log.trace( + "CachingKVR.apply keys %d hit %d miss %d noCache %d failure %d " + + "tombstone %d softexp %d", + keys.size, + hits.size, + misses.size, + doNotCache.size, + failures.size, + tombstones.size, + softExpirations.size + ) + } + recordCacheStats( + keys, + notFound = misses.toSet, + doNotCache = doNotCache, + expired = softExpirations.toSet, + numFailures = failures.size, + numTombstones = tombstones.size + ) + + // now read through all notFound + val underlyingQuery = newQuery(misses ++ doNotCache, keys) + val writeToCacheQuery = if (doNotCache.nonEmpty) newQuery(misses, keys) else underlyingQuery + val futureFromUnderlying = readThrough(underlyingQuery, writeToCacheQuery) + + // async read-through for the expired results, ignore results + val softExpirationQuery = newQuery(softExpirations, keys) + val futureFromSoftExpiry = readThrough(softExpirationQuery, softExpirationQuery, cacheResult) + + // merge all results together + for { + fromUnderlying <- futureFromUnderlying + fromCache = KeyValueResult(hits, tombstones, failures) + fromUnderlyingTransformed = transformResults(fromUnderlying, transforms) + } yield { + futureFromSoftExpiry.onSuccess { readThroughResults => + cacheResultObserver( + CacheResultObserver.CachingRepositoryResult( + cacheResult, + fromUnderlyingTransformed, + readThroughResults + ) + ) + } + KeyValueResult.sum(Seq(fromCache, fromUnderlyingTransformed)) + } + } + } + + /** + * Given results and a map of keys to transform functions, apply those transform functions + * to the found results. + */ + protected[this] def transformResults( + results: KeyValueResult[K, V], + transforms: Map[K, (V => V)] + ): KeyValueResult[K, V] = { + if (transforms.isEmpty) { + results + } else { + results.copy(found = results.found.map { + case (key, value) => + (key, transforms.get(key).map(_(value)).getOrElse(value)) + }) + } + } + + protected[this] def getFromCache(keys: Seq[K]): Future[KeyValueResult[K, Cached[V]]] = { + val uniqueKeys = keys.distinct + cache.get(uniqueKeys) handle { + case t: Throwable => + rateLimitedLogger.logThrowable(t, "exception caught in cache get") + + // treat total cache failure as a fetch that returned all failures + KeyValueResult(failed = uniqueKeys.map { _ -> t }.toMap) + } + } + + /** + * Buckets cache results according to the wishes of the CachedResultHandler + */ + protected[this] def process( + keys: Q, + cacheResult: KeyValueResult[K, Cached[V]] + ): ProcessedCacheResult = { + val cachedResultHandler = handlerFactory(keys) + + val hits = Map.newBuilder[K, V] + val misses = new mutable.ArrayBuffer[K] + val failures = Map.newBuilder[K, Throwable] + val tombstones = Set.newBuilder[K] + val softExpiredKeys = new mutable.ListBuffer[K] + val doNotCache = Set.newBuilder[K] + val transforms = Map.newBuilder[K, (V => V)] + + for (key <- keys) { + val cachedResult = cacheResult(key) match { + case Throw(t) => Failed(key, t) + case Return(None) => NotFound(key) + case Return(Some(cached)) => + cached.status match { + case CachedValueStatus.Found => + cached.value match { + case None => NotFound(key) + case Some(value) => + CachedFound( + key, + value, + cached.cachedAt, + cached.softTtlStep + ) + } + case CachedValueStatus.NotFound => CachedNotFound(key, cached.cachedAt) + case CachedValueStatus.Deleted => CachedDeleted(key, cached.cachedAt) + case CachedValueStatus.SerializationFailed => SerializationFailed(key) + case CachedValueStatus.DeserializationFailed => DeserializationFailed(key) + case CachedValueStatus.Evicted => NotFound(key) + case CachedValueStatus.DoNotCache => DoNotCache(key, cached.doNotCacheUntil) + } + } + + def processAction(action: CachedResultAction[V]): Unit = { + action match { + case HandleAsMiss => misses += key + case HandleAsFound(value) => hits += key -> value + case HandleAsNotFound => tombstones += key + case HandleAsDoNotCache => doNotCache += key + case HandleAsFailed(t) => failures += key -> t + case TransformSubAction(subAction, f) => + transforms += key -> f + processAction(subAction) + case SoftExpiration(subAction) => + softExpiredKeys += key + processAction(subAction) + } + } + + processAction(cachedResultHandler(cachedResult)) + } + + ProcessedCacheResult( + hits.result(), + misses, + doNotCache.result(), + failures.result(), + tombstones.result(), + softExpiredKeys, + transforms.result() + ) + } + + protected[this] def recordCacheStats( + keys: Seq[K], + notFound: Set[K], + doNotCache: Set[K], + expired: Set[K], + numFailures: Int, + numTombstones: Int + ): Unit = { + keys.foreach { key => + val wasntFound = notFound.contains(key) + val keyString = key.toString + if (wasntFound || expired.contains(key)) + effectiveCacheStats.miss(keyString) + else + effectiveCacheStats.hit(keyString) + + if (wasntFound) + observer.miss(keyString) + else + observer.hit(keyString) + } + observer.expired(expired.size) + observer.failure(numFailures) + observer.tombstone(numTombstones) + observer.noCache(doNotCache.size) + } + + /** + * read through to the underlying repository + * + * @param cacheKeys + * the keys to read and cache + */ + def readThrough(cacheKeys: Q): Future[KeyValueResult[K, V]] = { + readThrough(cacheKeys, cacheKeys) + } + + /** + * read through to the underlying repository + * + * @param writeToCacheQuery + * the query to pass to the writeToCache method after getting a result back from the + * underlying repository. this query can be exactly the same as underlyingQuery if + * all readThrough keys should be cached, or it may contain a subset of the keys if + * some keys should not be written back to cache. + * @param cacheResult + * the current cache results for underlyingQuery. + */ + def readThrough( + underlyingQuery: Q, + writeToCacheQuery: Q, + cacheResult: KeyValueResult[K, Cached[V]] = KeyValueResult.empty + ): Future[KeyValueResult[K, V]] = { + if (underlyingQuery.isEmpty) { + KeyValueResult.emptyFuture + } else { + underlying(underlyingQuery).onSuccess { result => + if (writeToCacheQuery.nonEmpty) { + writeToCache(writeToCacheQuery, result, cacheResult) + } + } + } + } + + /** + * Writes the contents of the given KeyValueResult to cache. + */ + def writeToCache( + keys: Q, + underlyingResult: KeyValueResult[K, V], + cacheResult: KeyValueResult[K, Cached[V]] = KeyValueResult[K, Cached[V]]() + ): Unit = { + lazy val cachedEmpty = { + val now = Time.now + Cached[V](None, CachedValueStatus.NotFound, now, Some(now), softTtlStep = None) + } + + keys.foreach { key => + // only cache Returns from the underlying repo, skip Throws. + // iff cached value matches value from underlying store + // (for both NotFound and Found results), increment softTtlStep + // otherwise, set softTtlStep to None + underlyingResult(key) match { + case Return(optUnderlyingVal) => + val softTtlStep = + cacheResult(key) match { + case Return(Some(cacheVal)) => updateSoftTtlStep(optUnderlyingVal, cacheVal) + case _ => None + } + + val status = + optUnderlyingVal match { + case Some(_) => CachedValueStatus.Found + case None => CachedValueStatus.NotFound + } + + val cached = + cachedEmpty.copy( + value = optUnderlyingVal, + status = status, + softTtlStep = softTtlStep + ) + + cache + .lockAndSet(key, LockingCache.PickingHandler(cached, picker)) + .onFailure { + case t: Throwable => + rateLimitedLogger.logThrowable(t, "exception caught in lockAndSet") + } + + case Throw(_) => None + } + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ChunkingStrategy.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ChunkingStrategy.scala new file mode 100644 index 000000000..1816596fc --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ChunkingStrategy.scala @@ -0,0 +1,50 @@ +package com.twitter.servo.repository + +object ChunkingStrategy { + + /** + * A chunking strategy for breaking a query into fixed size chunks, with the last + * chunk possibly being any size between 1 and chunkSize. + */ + def fixedSize[K](chunkSize: Int): Seq[K] => Seq[Seq[K]] = { + fixedSize(chunkSize, keysAsQuery[K]) + } + + /** + * A chunking strategy for breaking a query into fixed size chunks, with the last + * chunk possibly being any size between 1 and chunkSize. + */ + def fixedSize[Q <: Seq[K], K]( + chunkSize: Int, + newQuery: SubqueryBuilder[Q, K] + ): Q => Seq[Q] = { query => + query.distinct.grouped(chunkSize) map { newQuery(_, query) } toSeq + } + + /** + * A chunking strategy for breaking a query into roughly equal sized chunks no + * larger than maxSize. The last chunk may be slightly smaller due to rounding. + */ + def equalSize[K](maxSize: Int): Seq[K] => Seq[Seq[K]] = { + equalSize(maxSize, keysAsQuery[K]) + } + + /** + * A chunking strategy for breaking a query into roughly equal sized chunks no + * larger than maxSize. The last chunk may be slightly smaller due to rounding. + */ + def equalSize[Q <: Seq[K], K]( + maxSize: Int, + newQuery: SubqueryBuilder[Q, K] + ): Q => Seq[Q] = { query => + { + if (query.size <= maxSize) { + Seq(query) + } else { + val chunkCount = math.ceil(query.size / maxSize.toDouble) + val chunkSize = math.ceil(query.size / chunkCount).toInt + query.distinct.grouped(chunkSize) map { newQuery(_, query) } toSeq + } + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/DarkmodingKeyValueRepositoryFactory.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/DarkmodingKeyValueRepositoryFactory.scala new file mode 100644 index 000000000..f5c3f4c46 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/DarkmodingKeyValueRepositoryFactory.scala @@ -0,0 +1,161 @@ +package com.twitter.servo.repository + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.servo.cache.{CacheObserver, Cached, LockingCache} +import com.twitter.servo.repository +import com.twitter.servo.repository.CachedResult.{Handler, HandlerFactory} +import com.twitter.servo.util._ +import com.twitter.util._ + +import scala.util.control.NoStackTrace + +object DarkmodingKeyValueRepositoryFactory { + val DefaultEwmaHalfLife = 5.minutes + val DefaultRecentWindow = 10.seconds + val DefaultWindowSize = 5000 + val DefaultAvailabilityFromSuccessRate = + Availability.linearlyScaled(highWaterMark = 0.98, lowWaterMark = 0.75, minAvailability = 0.02) + + def DefaultEwmaTracker = new EwmaSuccessRateTracker(DefaultEwmaHalfLife) + def DefaultRecentWindowTracker = SuccessRateTracker.recentWindowed(DefaultRecentWindow) + def DefaultRollingWindowTracker = SuccessRateTracker.rollingWindow(DefaultWindowSize) + + /** + * Wraps an underlying repository, which can be manually or automatically darkmoded. + * + * Auto-darkmoding is based on success rate (SR) as reported by a [[SuccessRateTracker]]. + * + * @param readFromUnderlying Open: operate normally. Closed: read from backupRepo regardless of SR. + * @param autoDarkmode Open: auto-darkmoding kicks in based on SR. Closed: auto-darkmoding will not kick in regardless of SR. + * @param stats Used to record success rate and availability; often should be scoped to this repo for stats naming + * @param underlyingRepo The underlying repo; read from when not darkmoded + * @param backupRepo The repo to read from when darkmoded; defaults to an always-failing repo. + * @param successRateTracker Strategy for reporting SR, usually over a moving window + * @param availabilityFromSuccessRate Function to calculate availability based on success rate + * @param shouldIgnore don't count certain exceptions as failures, e.g. cancellations + */ + def darkmoding[Q <: Seq[K], K, V]( + readFromUnderlying: Gate[Unit], + autoDarkmode: Gate[Unit], + stats: StatsReceiver, + underlyingRepo: KeyValueRepository[Q, K, V], + backupRepo: KeyValueRepository[Q, K, V] = + KeyValueRepository.alwaysFailing[Q, K, V](DarkmodedException), + successRateTracker: SuccessRateTracker = DefaultRecentWindowTracker, + availabilityFromSuccessRate: Double => Double = DefaultAvailabilityFromSuccessRate, + shouldIgnore: Throwable => Boolean = SuccessRateTrackingRepository.isCancellation + ): KeyValueRepository[Q, K, V] = { + val (successRateTrackingRepoFactory, successRateGate) = + SuccessRateTrackingRepository.withGate[Q, K, V]( + stats, + availabilityFromSuccessRate, + successRateTracker.observed(stats), + shouldIgnore + ) + val gate = mkGate(successRateGate, readFromUnderlying, autoDarkmode) + + Repository.selected( + q => gate(()), + successRateTrackingRepoFactory(underlyingRepo), + backupRepo + ) + } + + /** + * Produces a caching repository around an underlying repository, which + * can be manually or automatically darkmoded. + * + * @param underlyingRepo The underlying repo from which to read + * @param cache The typed locking cache to fall back to when darkmoded + * @param picker Used to break ties when a value being written is already present in cache + * @param readFromUnderlying Open: operate normally. Closed: read from cache regardless of SR. + * @param autoDarkmode Open: auto-darkmoding kicks in based on SR. Closed: auto-darkmoding will not kick in regardless of SR. + * @param cacheObserver Observes interactions with the cache; often should be scoped to this repo for stats naming + * @param stats Used to record various stats; often should be scoped to this repo for stats naming + * @param handler a [[Handler]] to use when not darkmoded + * @param successRateTracker Strategy for reporting SR, usually over a moving window + * @param availabilityFromSuccessRate Function to calculate availability based on success rate + * @param shouldIgnore don't count certain exceptions as failures, e.g. cancellations + */ + def darkmodingCaching[K, V, CacheKey]( + underlyingRepo: KeyValueRepository[Seq[K], K, V], + cache: LockingCache[K, Cached[V]], + picker: LockingCache.Picker[Cached[V]], + readFromUnderlying: Gate[Unit], + autoDarkmode: Gate[Unit], + cacheObserver: CacheObserver, + stats: StatsReceiver, + handler: Handler[K, V], + successRateTracker: SuccessRateTracker = DefaultRecentWindowTracker, + availabilityFromSuccessRate: Double => Double = DefaultAvailabilityFromSuccessRate, + shouldIgnore: Throwable => Boolean = SuccessRateTrackingRepository.isCancellation, + writeSoftTtlStep: Gate[Unit] = Gate.False, + cacheResultObserver: CachingKeyValueRepository.CacheResultObserver[K, V] = + CacheResultObserver.unit[K, V]: Effect[CacheResultObserver.CachingRepositoryResult[K, V]] + ): CachingKeyValueRepository[Seq[K], K, V] = { + val (successRateTrackingRepoFactory, successRateGate) = + SuccessRateTrackingRepository.withGate[Seq[K], K, V]( + stats, + availabilityFromSuccessRate, + successRateTracker.observed(stats), + shouldIgnore + ) + val gate = mkGate(successRateGate, readFromUnderlying, autoDarkmode) + + new CachingKeyValueRepository[Seq[K], K, V]( + successRateTrackingRepoFactory(underlyingRepo), + cache, + repository.keysAsQuery, + mkHandlerFactory(handler, gate), + picker, + cacheObserver, + writeSoftTtlStep = writeSoftTtlStep, + cacheResultObserver = cacheResultObserver + ) + } + + /** + * Create a composite gate suitable for controlling darkmoding, usually via decider + * + * @param successRate gate that should close and open according to success rate (SR) changes + * @param readFromUnderlying if open: returned gate operates normally. if closed: returned gate will be closed regardless of SR + * @param autoDarkMode if open: close gate according to SR. if closed: gate ignores SR changes + * @return + */ + def mkGate( + successRate: Gate[Unit], + readFromUnderlying: Gate[Unit], + autoDarkMode: Gate[Unit] + ): Gate[Unit] = + readFromUnderlying & (successRate | !autoDarkMode) + + /** + * Construct a [[CachedResult.HandlerFactory]] with sane defaults for use with a caching darkmoded repository + * @param softTtl TTL for soft-expiration of values in the cache + * @param expiry Used to apply the softTTL (e.g. fixed vs randomly perturbed) + */ + def mkDefaultHandler[K, V]( + softTtl: Option[V] => Duration, + expiry: CachedResult.Expiry + ): Handler[K, V] = + CachedResult.Handler( + CachedResult.failuresAreDoNotCache, + CachedResult.Handler(CachedResult.softTtlExpiration(softTtl, expiry)) + ) + + private[repository] def mkHandlerFactory[CacheKey, V, K]( + handler: Handler[K, V], + successRateGate: Gate[Unit] + ): HandlerFactory[Seq[K], K, V] = + query => + if (successRateGate(())) handler + else CachedResult.cacheOnly +} + +/** + * This exception is returned from a repository when it is auto-darkmoded due to low backend + * success rate, or darkmoded manually via gate (usually a decider). + */ +class DarkmodedException extends Exception with NoStackTrace +object DarkmodedException extends DarkmodedException diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/HotKeyCachingKeyValueRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/HotKeyCachingKeyValueRepository.scala new file mode 100644 index 000000000..f8df436d0 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/HotKeyCachingKeyValueRepository.scala @@ -0,0 +1,74 @@ +package com.twitter.servo.repository + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.logging.Logger +import com.twitter.servo.cache.{InProcessCache, StatsReceiverCacheObserver} +import com.twitter.servo.util.FrequencyCounter +import com.twitter.util.Future + +/** + * A KeyValueRepository which uses a sliding window to track + * the frequency at which keys are requested and diverts requests + * for keys above the promotionThreshold through an in-memory request cache. + * + * @param underlyingRepo + * the underlying KeyValueRepository + * @param newQuery + * a function for converting a subset of the keys of the original query into a new query. + * @param windowSize + * the number of previous requests to include in the window + * @param promotionThreshold + * the number of requests for the same key in the window required + * to divert the request through the request cache + * @param cacheFactory + * a function which constructs a future response cache of the given size + * @param statsReceiver + * records stats on the cache + * @param disableLogging + * disables logging in token cache for pdp purposes + */ +object HotKeyCachingKeyValueRepository { + def apply[Q <: Seq[K], K, V]( + underlyingRepo: KeyValueRepository[Q, K, V], + newQuery: SubqueryBuilder[Q, K], + windowSize: Int, + promotionThreshold: Int, + cacheFactory: Int => InProcessCache[K, Future[Option[V]]], + statsReceiver: StatsReceiver, + disableLogging: Boolean = false + ): KeyValueRepository[Q, K, V] = { + val log = Logger.get(getClass.getSimpleName) + + val promotionsCounter = statsReceiver.counter("promotions") + + val onPromotion = { (k: K) => + log.debug("key %s promoted to HotKeyCache", k.toString) + promotionsCounter.incr() + } + + val frequencyCounter = new FrequencyCounter[K](windowSize, promotionThreshold, onPromotion) + + // Maximum cache size occurs in the event that every key in the buffer occurs + // `promotionThreshold` times. We apply a failure-refreshing filter to avoid + // caching failed responses. + val cache = + InProcessCache.withFilter( + cacheFactory(windowSize / promotionThreshold) + )( + ResponseCachingKeyValueRepository.refreshFailures + ) + + val observer = + new StatsReceiverCacheObserver(statsReceiver, windowSize, "request_cache", disableLogging) + + val cachingRepo = + new ResponseCachingKeyValueRepository[Q, K, V](underlyingRepo, cache, newQuery, observer) + + KeyValueRepository.selected( + frequencyCounter.incr, + cachingRepo, + underlyingRepo, + newQuery + ) + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ImmutableKeyValueRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ImmutableKeyValueRepository.scala new file mode 100644 index 000000000..f1711e99c --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ImmutableKeyValueRepository.scala @@ -0,0 +1,18 @@ +package com.twitter.servo.repository + +import com.twitter.util.{Future, Return, Throw, Try} + +class ImmutableKeyValueRepository[K, V](data: Map[K, Try[V]]) + extends KeyValueRepository[Seq[K], K, V] { + def apply(keys: Seq[K]) = Future { + val hits = keys flatMap { key => + data.get(key) map { key -> _ } + } toMap + + val found = hits collect { case (key, Return(value)) => key -> value } + val failed = hits collect { case (key, Throw(t)) => key -> t } + val notFound = keys.toSet -- found.keySet -- failed.keySet + + KeyValueResult(found, notFound, failed) + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/KeyValueRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/KeyValueRepository.scala new file mode 100644 index 000000000..82f6393f0 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/KeyValueRepository.scala @@ -0,0 +1,192 @@ +package com.twitter.servo.repository + +import com.twitter.util.{Future, Try} + +object KeyValueRepository { + + /** + * Builds a KeyValueRepository that returns KeyValueResults in which all keys failed with the + * provided Throwable. + */ + def alwaysFailing[Q <: Seq[K], K, V](failure: Throwable): KeyValueRepository[Q, K, V] = + (query: Q) => + Future.value( + KeyValueResult[K, V]( + failed = query map { _ -> failure } toMap + ) + ) + + /** + * Builds an immutable KeyValueRepository + */ + def apply[K, V](data: Map[K, Try[V]]): KeyValueRepository[Seq[K], K, V] = + new ImmutableKeyValueRepository(data) + + /** + * Sets up a mapReduce type operation on a KeyValueRepository where the query mapping function + * breaks the query up into smaller chunks, and the reducing function is just KeyValueResult.sum. + */ + def chunked[Q, K, V]( + repo: KeyValueRepository[Q, K, V], + chunker: Q => Seq[Q] + ): KeyValueRepository[Q, K, V] = + Repository.mapReduced(repo, chunker, KeyValueResult.sum[K, V]) + + /** + * Wraps a KeyValueRepository with stats recording functionality. + */ + def observed[Q, K, V]( + repo: KeyValueRepository[Q, K, V], + observer: RepositoryObserver, + querySize: Q => Int + ): KeyValueRepository[Q, K, V] = + query => { + observer.time(querySize(query)) { + repo(query).respond(observer.observeKeyValueResult) + } + } + + /** + * Creates a new KeyValueRepository that dispatches to onTrueRepo if the key + * predicate returns true, dispatches to onFalseRepo otherwise. + */ + def selected[Q <: Seq[K], K, V]( + select: K => Boolean, + onTrueRepo: KeyValueRepository[Q, K, V], + onFalseRepo: KeyValueRepository[Q, K, V], + queryBuilder: SubqueryBuilder[Q, K] + ): KeyValueRepository[Q, K, V] = selectedByQuery( + predicateFactory = _ => select, + onTrueRepo = onTrueRepo, + onFalseRepo = onFalseRepo, + queryBuilder = queryBuilder + ) + + /** + * Creates a new KeyValueRepository that uses predicateFactory to create a key predicate, then + * dispatches to onTrueRepo if the key predicate returns true, dispatches to onFalseRepo + * otherwise. + */ + def selectedByQuery[Q <: Seq[K], K, V]( + predicateFactory: Q => (K => Boolean), + onTrueRepo: KeyValueRepository[Q, K, V], + onFalseRepo: KeyValueRepository[Q, K, V], + queryBuilder: SubqueryBuilder[Q, K] + ): KeyValueRepository[Q, K, V] = { + val queryIsEmpty = (q: Q) => q.isEmpty + val r1 = shortCircuitEmpty(queryIsEmpty)(onTrueRepo) + val r2 = shortCircuitEmpty(queryIsEmpty)(onFalseRepo) + + (query: Q) => { + val predicate = predicateFactory(query) + val (q1, q2) = query.partition(predicate) + val futureRst1 = r1(queryBuilder(q1, query)) + val futureRst2 = r2(queryBuilder(q2, query)) + for { + r1 <- futureRst1 + r2 <- futureRst2 + } yield r1 ++ r2 + } + } + + /** + * Creates a new KeyValueRepository that dispatches to onTrueRepo if the query + * predicate returns true, dispatches to onFalseRepo otherwise. + */ + def choose[Q, K, V]( + predicate: Q => Boolean, + onTrueRepo: KeyValueRepository[Q, K, V], + onFalseRepo: KeyValueRepository[Q, K, V] + ): KeyValueRepository[Q, K, V] = { (query: Q) => + { + if (predicate(query)) { + onTrueRepo(query) + } else { + onFalseRepo(query) + } + } + } + + /** + * Short-circuit a KeyValueRepository to return an empty + * KeyValueResult when the query is empty rather than calling the + * backend. It is up to the caller to define empty. + * + * The implementation of repo and isEmpty should satisfy: + * + * forAll { (q: Q) => !isEmpty(q) || (repo(q).get == KeyValueResult.empty[K, V]) } + */ + def shortCircuitEmpty[Q, K, V]( + isEmpty: Q => Boolean + )( + repo: KeyValueRepository[Q, K, V] + ): KeyValueRepository[Q, K, V] = { q => + if (isEmpty(q)) KeyValueResult.emptyFuture[K, V] else repo(q) + } + + /** + * Short-circuit a KeyValueRepository to return an empty + * KeyValueResult for any empty Traversable query rather than + * calling the backend. + * + * The implementation of repo should satisfy: + * + * forAll { (q: Q) => !q.isEmpty || (repo(q).get == KeyValueResult.empty[K, V]) } + */ + def shortCircuitEmpty[Q <: Traversable[_], K, V]( + repo: KeyValueRepository[Q, K, V] + ): KeyValueRepository[Q, K, V] = shortCircuitEmpty[Q, K, V]((_: Q).isEmpty)(repo) + + /** + * Turns a bulking KeyValueRepository into a non-bulking Repository. The query to the + * KeyValueRepository must be nothing more than a Seq[K]. + */ + def singular[K, V](repo: KeyValueRepository[Seq[K], K, V]): Repository[K, Option[V]] = + singular(repo, (key: K) => Seq(key)) + + /** + * Turns a bulking KeyValueRepository into a non-bulking Repository. + */ + def singular[Q, K, V]( + repo: KeyValueRepository[Q, K, V], + queryBuilder: K => Q + ): Repository[K, Option[V]] = + key => { + repo(queryBuilder(key)) flatMap { results => + Future.const(results(key)) + } + } + + /** + * Converts a KeyValueRepository with value type V to one with value type + * V2 using a function that maps found values. + */ + def mapFound[Q, K, V, V2]( + repo: KeyValueRepository[Q, K, V], + f: V => V2 + ): KeyValueRepository[Q, K, V2] = + repo andThen { _ map { _ mapFound f } } + + /** + * Converts a KeyValueRepository with value type V to one with value type + * V2 using a function that maps over results. + */ + def mapValues[Q, K, V, V2]( + repo: KeyValueRepository[Q, K, V], + f: Try[Option[V]] => Try[Option[V2]] + ): KeyValueRepository[Q, K, V2] = + repo andThen { _ map { _ mapValues f } } + + /** + * Turns a KeyValueRepository which may throw an exception to another + * KeyValueRepository which always returns Future.value(KeyValueResult) + * even when there is an exception + */ + def scatterExceptions[Q <: Traversable[K], K, V]( + repo: KeyValueRepository[Q, K, V] + ): KeyValueRepository[Q, K, V] = + q => + repo(q) handle { + case t => KeyValueResult[K, V](failed = q map { _ -> t } toMap) + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ObservableKeyValueRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ObservableKeyValueRepository.scala new file mode 100644 index 000000000..4f0fc1f42 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ObservableKeyValueRepository.scala @@ -0,0 +1,89 @@ +package com.twitter.servo.repository + +import com.twitter.finagle.stats.{StatsReceiver, Stat} +import com.twitter.servo.util.{ExceptionCounter, LogarithmicallyBucketedTimer} +import com.twitter.util.{Future, Return, Throw, Try} + +class RepositoryObserver( + statsReceiver: StatsReceiver, + bucketBySize: Boolean, + exceptionCounter: ExceptionCounter) { + protected[this] lazy val timer = new LogarithmicallyBucketedTimer(statsReceiver) + protected[this] val sizeStat = statsReceiver.stat("size") + protected[this] val foundStat = statsReceiver.counter("found") + protected[this] val notFoundStat = statsReceiver.counter("not_found") + protected[this] val total = statsReceiver.counter("total") + private[this] val timeStat = statsReceiver.stat(LogarithmicallyBucketedTimer.LatencyStatName) + + def this(statsReceiver: StatsReceiver, bucketBySize: Boolean = true) = + this(statsReceiver, bucketBySize, new ExceptionCounter(statsReceiver)) + + def time[T](size: Int = 1)(f: => Future[T]) = { + sizeStat.add(size) + if (bucketBySize) + timer(size)(f) + else + Stat.timeFuture(timeStat)(f) + } + + private[this] def total(size: Int = 1): Unit = total.incr(size) + + def found(size: Int = 1): Unit = { + foundStat.incr(size) + total(size) + } + + def notFound(size: Int = 1): Unit = { + notFoundStat.incr(size) + total(size) + } + + def exception(ts: Throwable*): Unit = { + exceptionCounter(ts) + total(ts.size) + } + + def exceptions(ts: Seq[Throwable]): Unit = { + exception(ts: _*) + } + + def observeTry[V](tryObj: Try[V]): Unit = { + tryObj.respond { + case Return(_) => found() + case Throw(t) => exception(t) + } + } + + def observeOption[V](optionTry: Try[Option[V]]): Unit = { + optionTry.respond { + case Return(Some(_)) => found() + case Return(None) => notFound() + case Throw(t) => exception(t) + } + } + + def observeKeyValueResult[K, V](resultTry: Try[KeyValueResult[K, V]]): Unit = { + resultTry.respond { + case Return(result) => + found(result.found.size) + notFound(result.notFound.size) + exceptions(result.failed.values.toSeq) + case Throw(t) => + exception(t) + } + } + + /** + * observeSeq observes the result of a fetch against a key-value repository + * when the returned value is a Seq of type V. When the fetch is completed, + * observes whether or not the returned Seq is empty, contains some number of + * items, or has failed in some way. + */ + def observeSeq[V](seqTry: Try[Seq[V]]): Unit = { + seqTry.respond { + case Return(seq) if seq.isEmpty => notFound() + case Return(seq) => found(seq.length) + case Throw(t) => exception(t) + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/Repository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/Repository.scala new file mode 100644 index 000000000..5a62fe175 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/Repository.scala @@ -0,0 +1,133 @@ +package com.twitter.servo.repository + +import com.twitter.servo.util.RetryHandler +import com.twitter.util.{Duration, Future, Timer} + +object Repository { + + /** + * Composes a RepositoryFilter onto a Repository, producing a new Repository. + */ + def composed[Q, R1, R2]( + repo: Repository[Q, R1], + filter: RepositoryFilter[Q, R1, R2] + ): Repository[Q, R2] = + q => filter(q, repo(q)) + + /** + * Chains 2 or more RepositoryFilters together into a single RepositoryFilter. + */ + def chained[Q, R1, R2, R3]( + f1: RepositoryFilter[Q, R1, R2], + f2: RepositoryFilter[Q, R2, R3], + fs: RepositoryFilter[Q, R3, R3]* + ): RepositoryFilter[Q, R1, R3] = { + val first: RepositoryFilter[Q, R1, R3] = (q, r) => f2(q, f1(q, r)) + fs.toList match { + case Nil => first + case head :: tail => chained(first, head, tail: _*) + } + } + + /** + * Wraps a Repository with a function that transforms queries on the way in, and + * results on the way out. + */ + def transformed[Q, Q2, R, R2]( + repo: Repository[Q, R], + qmapper: Q2 => Q = (identity[Q] _): (Q => Q), + rmapper: R => R2 = (identity[R] _): (R => R) + ): Repository[Q2, R2] = + qmapper andThen repo andThen { _ map rmapper } + + /** + * Wraps a Repository with another Repository that explodes the query into multiple + * queries, executes those queries in parallel, then combines (reduces) results. + */ + def mapReduced[Q, Q2, R, R2]( + repo: Repository[Q, R], + mapper: Q2 => Seq[Q], + reducer: Seq[R] => R2 + ): Repository[Q2, R2] = + mapReducedWithQuery(repo, mapper, (rs: Seq[(Q, R)]) => reducer(rs map { case (_, r) => r })) + + /** + * An extension of mapReduced that passes query and result to the reducer. + */ + def mapReducedWithQuery[Q, Q2, R, R2]( + repo: Repository[Q, R], + mapper: Q2 => Seq[Q], + reducer: Seq[(Q, R)] => R2 + ): Repository[Q2, R2] = { + val queryRepo: Q => Future[(Q, R)] = q => repo(q) map { (q, _) } + q2 => Future.collect(mapper(q2) map queryRepo) map reducer + } + + /** + * Creates a new Repository that dispatches to r1 if the given query predicate returns true, + * and dispatches to r2 otherwise. + */ + def selected[Q, R]( + select: Q => Boolean, + onTrueRepo: Repository[Q, R], + onFalseRepo: Repository[Q, R] + ): Repository[Q, R] = + dispatched(select andThen { + case true => onTrueRepo + case false => onFalseRepo + }) + + /** + * Creates a new Repository that uses a function that selects an underlying repository + * based upon the query. + */ + def dispatched[Q, R](f: Q => Repository[Q, R]): Repository[Q, R] = + q => f(q)(q) + + /** + * Wraps a Repository with the given RetryHandler, which may automatically retry + * failed requests. + */ + def retrying[Q, R](handler: RetryHandler[R], repo: Repository[Q, R]): Repository[Q, R] = + handler.wrap(repo) + + /** + * Produces a new Repository where the returned Future must complete within the specified + * timeout, otherwise the Future fails with a com.twitter.util.TimeoutException. + * + * ''Note'': On timeout, the underlying future is not interrupted. + */ + def withTimeout[Q, R]( + timer: Timer, + timeout: Duration, + repo: Repository[Q, R] + ): Repository[Q, R] = + repo andThen { _.within(timer, timeout) } + + /** + * Produces a new Repository where the returned Future must complete within the specified + * timeout, otherwise the Future fails with the specified Throwable. + * + * ''Note'': On timeout, the underlying future is not interrupted. + */ + def withTimeout[Q, R]( + timer: Timer, + timeout: Duration, + exc: => Throwable, + repo: Repository[Q, R] + ): Repository[Q, R] = + repo andThen { _.within(timer, timeout, exc) } + + /** + * Wraps a Repository with stats recording functionality. + */ + def observed[Q, R]( + repo: Repository[Q, R], + observer: RepositoryObserver + ): Repository[Q, R] = + query => { + observer.time() { + repo(query).respond(observer.observeTry) + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ResponseCachingKeyValueRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ResponseCachingKeyValueRepository.scala new file mode 100644 index 000000000..efbd6f5a7 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ResponseCachingKeyValueRepository.scala @@ -0,0 +1,103 @@ +package com.twitter.servo.repository + +import com.twitter.servo.cache._ +import com.twitter.util._ + +object ResponseCachingKeyValueRepository { + + /** + * An cache filter that excludes cached future responses that are already fulfilled. + * Using this policy ensures that this repository will only ever have one outstanding request for the same item. + */ + def refreshSatisfied[K, V]: (K, Future[Option[V]]) => Boolean = + (_, v) => v.isDefined + + /** + * An cache filter that excludes cached future response that are failures + */ + def refreshFailures[K, V]: (K, Future[Option[V]]) => Boolean = + (_, v) => + v.poll match { + case Some(t) => t.isThrow + case None => false + } +} + +/** + * A repository that caches(in-process) Future responses from an underlying KeyValueRepository. + * Each time a request for a key is made, the repository first checks + * if any Future responses for that key are already cached. + * If so, the Future response from cache is returned. + * If not, a new Promise is placed in to cache, + * the underlying repository is queried to fulfill the Promise, + * and the new Promise is returned to the caller. + * @param underlying + * the underlying KeyValueRepository + * @param cache + * an inprocess cache of (future) responses + * @param newQuery + * a function which constructs a new query from a query and a set of keys + * @param observer + * a CacheObserver which records the hits/misses on the request cache + */ +class ResponseCachingKeyValueRepository[Q <: Seq[K], K, V]( + underlying: KeyValueRepository[Q, K, V], + cache: InProcessCache[K, Future[Option[V]]], + newQuery: SubqueryBuilder[Q, K], + observer: CacheObserver = NullCacheObserver) + extends KeyValueRepository[Q, K, V] { + private[this] def load(query: Q, promises: Seq[(K, Promise[Option[V]])]): Unit = { + if (promises.nonEmpty) { + underlying(newQuery(promises map { case (k, _) => k }, query)) respond { + case Throw(t) => promises foreach { case (_, p) => p.updateIfEmpty(Throw(t)) } + case Return(kvr) => promises foreach { case (k, p) => p.updateIfEmpty(kvr(k)) } + } + } + } + + sealed trait RefreshResult[K, V] { + def toInterruptible: Future[Option[V]] + } + + private case class CachedResult[K, V](result: Future[Option[V]]) extends RefreshResult[K, V] { + def toInterruptible = result.interruptible + } + + private case class LoadResult[K, V](keyToLoad: K, result: Promise[Option[V]]) + extends RefreshResult[K, V] { + def toInterruptible = result.interruptible + } + + private[this] def refresh(key: K): RefreshResult[K, V] = + synchronized { + cache.get(key) match { + case Some(updated) => + observer.hit(key.toString) + CachedResult(updated) + case None => + observer.miss(key.toString) + val promise = new Promise[Option[V]] + cache.set(key, promise) + LoadResult(key, promise) + } + } + + def apply(query: Q): Future[KeyValueResult[K, V]] = + KeyValueResult.fromSeqFuture(query) { + val result: Seq[RefreshResult[K, V]] = + query map { key => + cache.get(key) match { + case Some(value) => + observer.hit(key.toString) + CachedResult[K, V](value) + case None => + refresh(key) + } + } + + val toLoad = result collect { case LoadResult(k, p) => k -> p } + load(query, toLoad) + + result map { _.toInterruptible } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/RichQuery.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/RichQuery.scala new file mode 100644 index 000000000..9f2e315c7 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/RichQuery.scala @@ -0,0 +1,34 @@ +package com.twitter.servo.repository + +import scala.collection.SeqProxy + +/** + * RichQuery is a mixin trait for KeyValueRepository query objects that are more complex + * than Seq[K]. It extends SeqProxy to satisfy servo's requirements but provides Product-based + * implementations of equals and toString. (The query object is expected to be a case class + * and therefore implement Product.) + */ +trait RichQuery[K] extends SeqProxy[K] with Product { + // Compare to other RichQuery instances via Product; otherwise allow any sequence to + // match our proxied Seq (thereby matching the semantics of a case class that simply + // extends SeqProxy). + override def equals(any: Any) = { + any match { + case null => false + + case other: RichQuery[_] => + ( + this.productArity == other.productArity && + this.productIterator.zip(other.productIterator).foldLeft(true) { + case (ok, (e1, e2)) => + ok && e1 == e2 + } + ) + + case other => other.equals(this) + } + } + + // Produce reasonable string for testing + override def toString = "%s(%s)".format(this.productPrefix, this.productIterator.mkString(",")) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/SuccessRateTrackingRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/SuccessRateTrackingRepository.scala new file mode 100644 index 000000000..d4d9aed9d --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/SuccessRateTrackingRepository.scala @@ -0,0 +1,81 @@ +package com.twitter.servo.repository + +import com.twitter.finagle.mux.ClientDiscardedRequestException +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.{CancelledConnectionException, CancelledRequestException} +import com.twitter.servo.util.{Gate, SuccessRateTracker} +import com.twitter.util.Throwables.RootCause +import java.util.concurrent.CancellationException + +object SuccessRateTrackingRepository { + + /** + * (successes, failures) + */ + type SuccessRateObserver = (Int, Int) => Unit + + /** + * Identifies [[Throwable]]s that should not be counted as failures. + * + * This is a total function instead of a partial function so it can reliably recurse on itself + * to find a root cause. + */ + def isCancellation(t: Throwable): Boolean = + t match { + // We don't consider CancelledRequestExceptions or CancelledConnectionExceptions to be + // failures in order not to tarnish our success rate on upstream request cancellations. + case _: CancelledRequestException => true + case _: CancelledConnectionException => true + // non-finagle backends can throw CancellationExceptions when their futures are cancelled. + case _: CancellationException => true + // Mux servers can return ClientDiscardedRequestException. + case _: ClientDiscardedRequestException => true + // Most of these exceptions can be wrapped in com.twitter.finagle.Failure + case RootCause(t) => isCancellation(t) + case _ => false + } + + /** + * Return a Success Rate (SR) tracking repository along with the gate controlling it. + * + * @param stats Provides availability gauge + * @param availabilityFromSuccessRate function to calculate availability given SR + * @param tracker strategy for tracking (usually recent) SR + * @param shouldIgnore don't count certain exceptions as failures, e.g. cancellations + * @return tuple of (SR tracking repo, gate closing if SR drops too far) + */ + def withGate[Q <: Seq[K], K, V]( + stats: StatsReceiver, + availabilityFromSuccessRate: Double => Double, + tracker: SuccessRateTracker, + shouldIgnore: Throwable => Boolean = isCancellation + ): (KeyValueRepository[Q, K, V] => KeyValueRepository[Q, K, V], Gate[Unit]) = { + val successRateGate = tracker.observedAvailabilityGate(availabilityFromSuccessRate, stats) + + (new SuccessRateTrackingRepository[Q, K, V](_, tracker.record, shouldIgnore), successRateGate) + } +} + +/** + * A KeyValueRepository that provides feedback on query success rate to + * a SuccessRateObserver. Both found and not found are considered successful + * responses, while failures are not. Cancellations are ignored by default. + */ +class SuccessRateTrackingRepository[Q <: Seq[K], K, V]( + underlying: KeyValueRepository[Q, K, V], + observer: SuccessRateTrackingRepository.SuccessRateObserver, + shouldIgnore: Throwable => Boolean = SuccessRateTrackingRepository.isCancellation) + extends KeyValueRepository[Q, K, V] { + def apply(query: Q) = + underlying(query) onSuccess { kvr => + val nonIgnoredFailures = kvr.failed.values.foldLeft(0) { + case (count, t) if shouldIgnore(t) => count + case (count, _) => count + 1 + } + observer(kvr.found.size + kvr.notFound.size, nonIgnoredFailures) + } onFailure { t => + if (!shouldIgnore(t)) { + observer(0, query.size) + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/package.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/package.scala new file mode 100644 index 000000000..4c4fe7e4d --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/package.scala @@ -0,0 +1,50 @@ +package com.twitter.servo + +import com.twitter.util.Future + +package object repository { + + /** + * Base repository type. Maps a Query to a future Result + */ + type Repository[-Q, +R] = Q => Future[R] + + /** + * RepositoryFilters can be chained onto Repositories to asynchronously apply transformations to + * Repository results. + */ + type RepositoryFilter[-Q, -R, +S] = (Q, Future[R]) => Future[S] + + type KeyValueResult[K, V] = keyvalue.KeyValueResult[K, V] + val KeyValueResult = keyvalue.KeyValueResult + + /** + * A KeyValueRepository is a type of repository that handles bulk gets of data. The query + * defines the values to fetch, and is usually made of up of a Seq[K], possibly with other + * contextual information needed to perform the query. The result is a KeyValueResult, + * which contains a break-out of found, notFound, and failed key lookups. The set of + * keys may or may-not be computable locally from the query. This top-level type does not + * require that the keys are computable from the query, but certain instances, such as + * CachingKeyValueRepository, do require key-computability. + */ + type KeyValueRepository[Q, K, V] = Repository[Q, KeyValueResult[K, V]] + + type CounterKeyValueRepository[K] = KeyValueRepository[Seq[K], K, Long] + + /** + * For KeyValueRepository scenarios where the query is a sequence of keys, a SubqueryBuilder + * defines how to convert a sub-set of the keys from the query into a query. + */ + type SubqueryBuilder[Q <: Seq[K], K] = (Seq[K], Q) => Q + + /** + * A SubqueryBuilder where the query type is nothing more than a sequence of keys. + */ + @deprecated("use keysAsQuery", "1.1.0") + def KeysAsQuery[K]: SubqueryBuilder[Seq[K], K] = keysAsQuery[K] + + /** + * A SubqueryBuilder where the query type is nothing more than a sequence of keys. + */ + def keysAsQuery[K]: SubqueryBuilder[Seq[K], K] = (keys, parentQuery) => keys +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/CachingStore.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/CachingStore.scala new file mode 100644 index 000000000..a6dd69e26 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/CachingStore.scala @@ -0,0 +1,112 @@ +package com.twitter.servo.store + +import com.twitter.servo.cache.{Cached, CachedValueStatus, LockingCache} +import com.twitter.logging.Logger +import com.twitter.util.{Future, Time} + +/** + * Wraps a cache around an underlying store. + * + * CachingStore is a specialization of TransformingCachingStore where the store and cache are + * assumed to have the same key and value types. See TransformingCachingStore for a discussion + * of the arguments to CachingStore. + */ +class CachingStore[K, V]( + cache: LockingCache[K, Cached[V]], + underlying: Store[K, V], + valuePicker: LockingCache.Picker[Cached[V]], + key: V => K) + extends TransformingCachingStore[K, V, K, V]( + cache, + underlying, + valuePicker, + key, + identity, + identity + ) + +/** + * Wraps a cache of differing key/value types around an underlying store. + * + * Updates are applied first (unmodified) to the underlying store and then + * the cache is updated after running the key/value through a one-way function + * to derive the key/value as expected by the cache. + * + * @param cache + * the wrapping cache + * + * @param underlying + * the underlying store + * + * @param valuePicker + * chooses between existing and new value + * + * @param key + * computes a key from the value being stored + * + * @param cacheKey + * transforms the store's key type to the cache's key type + * + * @param cacheValue + * transforms the store's value type to the cache's value type + */ +class TransformingCachingStore[K, V, CacheK, CacheV]( + cache: LockingCache[CacheK, Cached[CacheV]], + underlying: Store[K, V], + valuePicker: LockingCache.Picker[Cached[CacheV]], + key: V => K, + cacheKey: K => CacheK, + cacheValue: V => CacheV) + extends Store[K, V] { + protected[this] val log = Logger.get(getClass.getSimpleName) + + override def create(value: V): Future[V] = { + chainCacheOp[V]( + underlying.create(value), + result => cache(key(result), Some(result), CachedValueStatus.Found, "new") + ) + } + + override def update(value: V): Future[Unit] = { + chainCacheOp[Unit]( + underlying.update(value), + _ => cache(key(value), Some(value), CachedValueStatus.Found, "updated") + ) + } + + override def destroy(key: K): Future[Unit] = { + chainCacheOp[Unit]( + underlying.destroy(key), + _ => cache(key, None, CachedValueStatus.Deleted, "deleted") + ) + } + + /** + * Subclasses may override this to alter the relationship between the result + * of the underlying Store operation and the result of the Cache operation. + * By default, the cache operation occurs asynchronously and only upon success + * of the store operation. Cache operation failures are logged but otherwise + * ignored. + */ + protected[this] def chainCacheOp[Result]( + storeOp: Future[Result], + cacheOp: Result => Future[Unit] + ): Future[Result] = { + storeOp onSuccess { cacheOp(_) } + } + + protected[this] def cache( + key: K, + value: Option[V], + status: CachedValueStatus, + desc: String + ): Future[Unit] = { + val now = Time.now + val cached = Cached(value map { cacheValue(_) }, status, now, None, Some(now)) + val handler = LockingCache.PickingHandler(cached, valuePicker) + cache.lockAndSet(cacheKey(key), handler).unit onFailure { + case t => + log.error(t, "exception caught while caching %s value", desc) + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/KeyValueStore.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/KeyValueStore.scala new file mode 100644 index 000000000..96866e854 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/KeyValueStore.scala @@ -0,0 +1,13 @@ +package com.twitter.servo.store + +import com.twitter.util.Future + +trait KeyValueStore[C, K, V, R] { + def put(ctx: C, key: K, value: Option[V]): Future[R] = multiPut(ctx, Seq((key -> value))) + def multiPut(ctx: C, kvs: Seq[(K, Option[V])]): Future[R] +} + +trait SimpleKeyValueStore[K, V] extends KeyValueStore[Unit, K, V, Unit] { + def put(key: K, value: Option[V]): Future[Unit] = multiPut((), Seq(key -> value)) + def multiPut(kvs: Seq[(K, Option[V])]): Future[Unit] = multiPut((), kvs) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/ObservableStore.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/ObservableStore.scala new file mode 100644 index 000000000..ae582c307 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/ObservableStore.scala @@ -0,0 +1,32 @@ +package com.twitter.servo.store + +import com.twitter.finagle.stats.{StatsReceiver, Stat} +import com.twitter.servo.util.{ExceptionCounter, LogarithmicallyBucketedTimer} +import com.twitter.util.Future + +class StoreObserver(statsReceiver: StatsReceiver) { + protected[this] val exceptionCounter = new ExceptionCounter(statsReceiver) + + def time[T](f: => Future[T]) = { + Stat.timeFuture(statsReceiver.stat(LogarithmicallyBucketedTimer.LatencyStatName))(f) + } + + def exception(ts: Throwable*): Unit = exceptionCounter(ts) +} + +class ObservableStore[K, V](underlying: Store[K, V], statsReceiver: StatsReceiver) + extends Store[K, V] { + protected[this] val observer = new StoreObserver(statsReceiver) + + override def create(value: V) = observer.time { + underlying.create(value) onFailure { observer.exception(_) } + } + + override def update(value: V) = observer.time { + underlying.update(value) onFailure { observer.exception(_) } + } + + override def destroy(key: K) = observer.time { + underlying.destroy(key) onFailure { observer.exception(_) } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/Store.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/Store.scala new file mode 100644 index 000000000..a86283b82 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/Store.scala @@ -0,0 +1,93 @@ +package com.twitter.servo.store + +import com.twitter.servo.util.Gate +import com.twitter.util.Future + +/** + * models a write-store of key/values + */ +trait Store[K, V] { + def create(value: V): Future[V] + def update(value: V): Future[Unit] + def destroy(key: K): Future[Unit] +} + +object Store { + + /** + * Filter store operations based on either the key or the value. If the gate passes then forward + * the operation to the underlying store, if not then forward the operation to a null store + * (effectively a no-op) + */ + def filtered[K, V](store: Store[K, V], filterKey: Gate[K], filterValue: Gate[V]) = + new GatedStore(store, new NullStore[K, V], filterKey, filterValue) + + /** + * A store type that selects between one of two underlying stores based on the key/value of the + * operation. If the key/value gate passes, forward the operation to the primary store, otherwise + * forward the operation to the secondary store. + */ + def gated[K, V]( + primary: Store[K, V], + secondary: Store[K, V], + usePrimaryKey: Gate[K], + usePrimaryValue: Gate[V] + ) = new GatedStore(primary, secondary, usePrimaryKey, usePrimaryValue) + + /** + * A store type that selects between one of two underlying stores based on a predicative value, + * which may change dynamically at runtime. + */ + def deciderable[K, V]( + primary: Store[K, V], + backup: Store[K, V], + primaryIsAvailable: => Boolean + ) = new DeciderableStore(primary, backup, primaryIsAvailable) +} + +trait StoreWrapper[K, V] extends Store[K, V] { + def underlyingStore: Store[K, V] + + override def create(value: V) = underlyingStore.create(value) + override def update(value: V) = underlyingStore.update(value) + override def destroy(key: K) = underlyingStore.destroy(key) +} + +class NullStore[K, V] extends Store[K, V] { + override def create(value: V) = Future.value(value) + override def update(value: V) = Future.Done + override def destroy(key: K) = Future.Done +} + +/** + * A Store type that selects between one of two underlying stores based + * on the key/value, which may change dynamically at runtime. + */ +private[servo] class GatedStore[K, V]( + primary: Store[K, V], + secondary: Store[K, V], + usePrimaryKey: Gate[K], + usePrimaryValue: Gate[V]) + extends Store[K, V] { + private[this] def pick[T](item: T, gate: Gate[T]) = if (gate(item)) primary else secondary + + override def create(value: V) = pick(value, usePrimaryValue).create(value) + override def update(value: V) = pick(value, usePrimaryValue).update(value) + override def destroy(key: K) = pick(key, usePrimaryKey).destroy(key) +} + +/** + * A Store type that selects between one of two underlying stores based + * on a predicative value, which may change dynamically at runtime. + */ +class DeciderableStore[K, V]( + primary: Store[K, V], + backup: Store[K, V], + primaryIsAvailable: => Boolean) + extends Store[K, V] { + private[this] def pick = if (primaryIsAvailable) primary else backup + + override def create(value: V) = pick.create(value) + override def update(value: V) = pick.update(value) + override def destroy(key: K) = pick.destroy(key) +} diff --git a/tweetypie/servo/repo/src/main/thrift/BUILD b/tweetypie/servo/repo/src/main/thrift/BUILD new file mode 100644 index 000000000..6ad3c0873 --- /dev/null +++ b/tweetypie/servo/repo/src/main/thrift/BUILD @@ -0,0 +1,13 @@ +create_thrift_libraries( + base_name = "thrift", + sources = ["**/*.thrift"], + platform = "java8", + tags = ["bazel-compatible"], + generate_languages = [ + "java", + "scala", + "strato", + ], + provides_java_name = "servo-repo-thrift-java", + provides_scala_name = "servo-repo-thrift-scala", +) diff --git a/tweetypie/servo/repo/src/main/thrift/com/twitter/servo/cache/servo_repo.thrift b/tweetypie/servo/repo/src/main/thrift/com/twitter/servo/cache/servo_repo.thrift new file mode 100644 index 000000000..51b7373f3 --- /dev/null +++ b/tweetypie/servo/repo/src/main/thrift/com/twitter/servo/cache/servo_repo.thrift @@ -0,0 +1,39 @@ +#@namespace scala com.twitter.servo.cache.thriftscala +#@ namespace strato com.twitter.servo.cache +// the java namespace is unused, but appeases the thrift Linter gods +namespace java com.twitter.servo.cache.thriftjava + +enum CachedValueStatus { + FOUND = 0, + NOT_FOUND = 1, + DELETED = 2, + SERIALIZATION_FAILED = 3 + DESERIALIZATION_FAILED = 4, + EVICTED = 5, + DO_NOT_CACHE = 6 +} + +/** + * Caching metadata for an binary cache value + */ +struct CachedValue { + 1: optional binary value + // can be used to distinguish between deletion tombstones and not-found tombstones + 2: CachedValueStatus status + // when was the cache value written + 3: i64 cached_at_msec + // set if the cache was read through + 4: optional i64 read_through_at_msec + // set if the cache was written through + 5: optional i64 written_through_at_msec + // This optional field is only read when the CacheValueStatus is DO_NOT_CACHE. + // When CacheValueStatus is DO_NOT_CACHE and this field is not set, the key + // will not be cached without a time limit. If the client wants to cache + // immediately, they would not set DO_NOT_CACHE. + 6: optional i64 do_not_cache_until_msec + // Indicates how many times we've successfully checked + // the cached value against the backing store. Should be initially set to 0. + // The client may choose to increase the soft TTL duration based on this value. + // See http://go/gd-dynamic-cache-ttls and http://go/strato-progressive-ttls for some use cases + 7: optional i16 soft_ttl_step +} (persisted='true') diff --git a/tweetypie/servo/request/BUILD b/tweetypie/servo/request/BUILD new file mode 100644 index 000000000..434ab68f4 --- /dev/null +++ b/tweetypie/servo/request/BUILD @@ -0,0 +1,5 @@ +target( + dependencies = [ + "tweetypie/servo/request/src/main/scala", + ], +) diff --git a/tweetypie/servo/request/src/main/scala/BUILD b/tweetypie/servo/request/src/main/scala/BUILD new file mode 100644 index 000000000..2d50540e5 --- /dev/null +++ b/tweetypie/servo/request/src/main/scala/BUILD @@ -0,0 +1,20 @@ +scala_library( + sources = ["**/*.scala"], + platform = "java8", + provides = scala_artifact( + org = "com.twitter", + name = "servo-request", + repo = artifactory, + ), + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle/finagle-core/src/main", + "tweetypie/servo/util", + "twitter-config/yaml", + "util/util-stats/src/main/scala", + ], + exports = [ + "tweetypie/servo/util", + ], +) diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestAuthorizer.scala b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestAuthorizer.scala new file mode 100644 index 000000000..1547adbbd --- /dev/null +++ b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestAuthorizer.scala @@ -0,0 +1,172 @@ +package com.twitter.servo.request + +import com.twitter.servo.gate.RateLimitingGate +import com.twitter.servo.util.Gate +import com.twitter.util.Future + +/** + * Collects per-request stats by method-name and client. + */ +trait ClientRequestAuthorizer extends ((String, Option[String]) => Future[Unit]) { self => + + /** + * @param methodName the name of the Service method being called + * @param clientIdStrOpt an Option of the string value of the originating + * request's ClientId + */ + def apply(methodName: String, clientIdStrOpt: Option[String]): Future[Unit] + + /** + * Compose this authorizer with another so that one is applied after the other. + * + * The resultant authorizer requires both underlying authorizers to succeed in + * order to authorize a request. + */ + def andThen(other: ClientRequestAuthorizer) = new ClientRequestAuthorizer { + override def apply(methodName: String, clientIdStrOpt: Option[String]): Future[Unit] = { + self.apply(methodName, clientIdStrOpt) flatMap { _ => + other(methodName, clientIdStrOpt) + } + } + } +} + +object ClientRequestAuthorizer { + case class UnauthorizedException(msg: String) extends Exception(msg) + + protected[this] val noClientIdException = + Future.exception(new UnauthorizedException("No ClientId specified")) + protected[this] val unauthorizedException = + new UnauthorizedException("Your ClientId is not authorized.") + protected[this] val overRateLimitException = + new UnauthorizedException("Your ClientId is over the allowed rate limit.") + + /** + * Increment stats counters for this request. + * + * Note that ClientRequestAuthorizer.observed doesn't compose in the same fashion + * as other authorizers via `andThen`. In order to observe authorization results, + * pass in an underlying authorizer as an argument to observed. + */ + def observed( + underlyingAuthorizer: ClientRequestAuthorizer, + observer: ClientRequestObserver + ) = new ClientRequestAuthorizer { + override def apply(methodName: String, clientIdStrOpt: Option[String]): Future[Unit] = { + val clientIdStr = clientIdStrOpt.getOrElse("no_client_id") + + observer(methodName, clientIdStrOpt map { Seq(_) }) + + underlyingAuthorizer(methodName, clientIdStrOpt) onFailure { _ => + observer.unauthorized(methodName, clientIdStr) + } onSuccess { _ => + observer.authorized(methodName, clientIdStr) + } + } + } + + def observed(observer: ClientRequestObserver): ClientRequestAuthorizer = + observed(ClientRequestAuthorizer.permissive, observer) + + /** + * Lets all requests through. + */ + def permissive = new ClientRequestAuthorizer { + override def apply(methodName: String, clientIdStrOpt: Option[String]) = Future.Done + } + + /** + * A Generic Authorizer that allows you to pass in your own authorizer function (filter). + * The filter should take in methodName and clientId and return a Boolean decision + * + * Note: Requires requests to have ClientIds. + * @param exception return this exception if the request does not pass the filter + */ + def filtered( + filter: (String, String) => Boolean, + exception: Exception = unauthorizedException + ): ClientRequestAuthorizer = + new ClientRequestAuthorizer { + val futureException = Future.exception(exception) + + override def apply(methodName: String, clientIdStrOpt: Option[String]): Future[Unit] = { + clientIdStrOpt match { + case Some(clientIdStr) => + if (filter(methodName, clientIdStr)) + Future.Done + else + futureException + case None => + noClientIdException + } + } + } + + /** + * Authorizes client requests based on a allowlist of ClientId strings. + */ + def allowlisted(allowlist: Set[String]): ClientRequestAuthorizer = + filtered { (_, clientIdStr) => + allowlist.contains(clientIdStr) + } + + /** + * Authorizes requests if and only if they have an associated ClientId. + */ + def withClientId: ClientRequestAuthorizer = filtered { (_, _) => + true + } + + /** + * Consult a (presumably) Decider-backed predicate to authorize requests by ClientId. + * @param exception return this exception if the request does not pass the filter + */ + def deciderable( + isAvailable: String => Boolean, + exception: Exception = unauthorizedException + ): ClientRequestAuthorizer = + filtered( + { (_, clientIdStr) => + isAvailable(clientIdStr) + }, + exception + ) + + /** + * Simple rate limiter for unknown client ids. Useful for letting new clients + * send some traffic without the risk of being overrun by requests. + * + * @param limitPerSecond Number of calls per second we can tolerate + */ + def rateLimited(limitPerSecond: Double): ClientRequestAuthorizer = { + gated(RateLimitingGate.uniform(limitPerSecond), overRateLimitException) + } + + /** + * Simple Gate based authorizer, will authorize according to the result of the gate regardless + * of the client/method name + */ + def gated( + gate: Gate[Unit], + exception: Exception = unauthorizedException + ): ClientRequestAuthorizer = { + deciderable(_ => gate(), exception) + } + + /** + * @return A ClientRequestAuthorizer that switches between two provided + * ClientRequestAuthorizers depending on a decider. + */ + def select( + decider: Gate[Unit], + ifTrue: ClientRequestAuthorizer, + ifFalse: ClientRequestAuthorizer + ): ClientRequestAuthorizer = + new ClientRequestAuthorizer { + override def apply(methodName: String, clientIdStrOpt: Option[String]): Future[Unit] = + decider.pick( + ifTrue(methodName, clientIdStrOpt), + ifFalse(methodName, clientIdStrOpt) + ) + } +} diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestObserver.scala b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestObserver.scala new file mode 100644 index 000000000..e7de2ab04 --- /dev/null +++ b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestObserver.scala @@ -0,0 +1,58 @@ +package com.twitter.servo.request + +import com.twitter.finagle.stats.NullStatsReceiver +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.util.Future + +object ClientRequestObserver { + private[request] val noClientIdKey = "no_client_id" +} + +/** + * Provides per-request stats based on Finagle ClientId. + * + * @param statsReceiver the StatsReceiver used for counting + * @param observeAuthorizationAttempts: if true (the default), observe all attempts. If false, + * only failures (unauthorized attempts) are observed. + */ +class ClientRequestObserver( + statsReceiver: StatsReceiver, + observeAuthorizationAttempts: Boolean = true) + extends ((String, Option[Seq[String]]) => Future[Unit]) { + import ClientRequestObserver.noClientIdKey + + protected[this] val scopedReceiver = statsReceiver.scope("client_request") + protected[this] val unauthorizedReceiver = scopedReceiver.scope("unauthorized") + protected[this] val unauthorizedCounter = scopedReceiver.counter("unauthorized") + + /** + * @param methodName the name of the Service method being called + * @param clientIdScopesOpt optional sequence of scope strings representing the + * originating request's ClientId + */ + override def apply(methodName: String, clientIdScopesOpt: Option[Seq[String]]): Future[Unit] = { + if (observeAuthorizationAttempts) { + scopedReceiver.counter(methodName).incr() + clientIdScopesOpt match { + case Some(clientIdScopes) => + scopedReceiver.scope(methodName).counter(clientIdScopes: _*).incr() + + case None => + scopedReceiver.scope(methodName).counter(noClientIdKey).incr() + } + } + Future.Done + } + + /** + * Increments a counter for unauthorized requests. + */ + def unauthorized(methodName: String, clientIdStr: String): Unit = { + unauthorizedCounter.incr() + unauthorizedReceiver.scope(methodName).counter(clientIdStr).incr() + } + + def authorized(methodName: String, clientIdStr: String): Unit = {} +} + +object NullClientRequestObserver extends ClientRequestObserver(NullStatsReceiver) diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/PermissionModule.scala b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/PermissionModule.scala new file mode 100644 index 000000000..5ccc171ed --- /dev/null +++ b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/PermissionModule.scala @@ -0,0 +1,233 @@ +package com.twitter.servo.request + +import com.twitter.config.yaml.YamlMap +import com.twitter.util.Try + +/** + * Module for defining a set of permissions. This is similar to + * Enumeration in the scala standard library. + * + * To use, instantiate a subclass: + * + * {{{ + * object MyPermissions extends PermissionModule { + * val Eat = create("eat") + * val Drink = create("drink") + * } + * }}} + * + * Permissions only support one kind of authorization, which is that + * you can check whether a holder of permissions has all of the + * permissions in a particular set. + * + * {{{ + * val snack = MyPermissions.Eat + * val dinner = MyPermissions.Eat union MyPermissions.Drink + * val canEat = MyPermissions.Eat + * dinner satisfiedBy canEat // false + * snack satisfiedBy canEat // true + * }}} + * + * Each instance will have its own distinct permission type, so it is + * not possible to confuse the permissions defined in different + * modules. + * + * {{{ + * scala> object P1 extends PermissionModule { val Read = create("read") } + * scala> object P2 extends PermissionModule { val Read = create("read") } + * scala> P1.Read satisfiedBy P2.Read + * error: type mismatch; + * found : P2.Permissions + * required: P1.Permissions + * P1.Read satisfiedBy P2.Read + * }}} + * + * Once an instance has been created, it will not be possible to + * create new permissions. The intention is that all permissions will + * be created at object initialization time. + * + * Each instance also supplies functionality for accessing permissions + * by name, including parsing client permission maps from YAML. + */ +trait PermissionModule { + // This var is used during object initialization to collect all of + // the permissions that are created in the subclass. The lazy + // initializer for `All` will set this to null as a side-effect, so + // that further permission creations are not allowed. + @volatile private[this] var allPerms: Set[String] = Set.empty + + /** + * Create a new Permission with the given name. Note that "*" is a + * reversed string for `All` permissions, thus it can not be + * used as the name of an individual permission. + * + * This method must be called before `All` is accessed. + * The intention is that it should be called as part of + * object initialization. + * + * Note that some methods of PermissionModule access `All`, so it is + * best to create all of your permissions before doing anything + * else. + * + * @throws RuntimeException: If it is called after `All` has been + * initialized. + */ + protected def create(name: String) = { + synchronized { + if (allPerms == null) { + throw new RuntimeException("Permission creation after initialization") + } + + allPerms = allPerms union Set(name) + } + + new Permissions(Set(name)) + } + + /** + * Get a set of permissions with this single permission by name. It + * will return None if there is no permission by that name. + * + * No permissions may be defined after this method is called. + */ + def get(name: String): Option[Permissions] = All.get(name) + + /** + * Get the set of permissions that contains that single permission + * by name. + * + * @throws RuntimeException if there is no defined permission with + * this name. + * + * No permissions may be defined after this method is called. + */ + def apply(name: String): Permissions = + get(name) match { + case None => throw new RuntimeException("Unknown permission: " + name) + case Some(p) => p + } + + /** + * No permissions (required or held) + */ + val Empty: Permissions = new Permissions(Set.empty) + + /** + * All defined permissions. + * + * No permissions may be defined after this value is initialized. + */ + lazy val All: Permissions = { + val p = new Permissions(allPerms) + allPerms = null + p + } + + /** + * Load permissions from a YAML map. + * + * No permissions may be defined after this method is called. + * + * @return a map from client identifier to permission set. + * @throws RuntimeException when the permission from the Map is not defined. + */ + def fromYaml(m: YamlMap): Try[Map[String, Permissions]] = + Try { + m.keys.map { k => + k -> fromSeq((m yamlList k).map { _.toString }) + }.toMap + } + + /** + * Load permissions from map. + * + * No permissions may be defined after this method is called. + * + * @param m a map from client identifier to a set of permission strings + * + * @return a map from client identifier to permission set. + * @throws RuntimeException when the permission from the Map is not defined. + */ + def fromMap(m: Map[String, Seq[String]]): Try[Map[String, Permissions]] = + Try { + m.map { case (k, v) => k -> fromSeq(v) } + } + + /** + * Load permissions from seq. + * + * No permissions may be defined after this method is called. + * + * @param sequence a Seq of permission strings + * + * @return a permission set. + * @throws RuntimeException when the permission is not defined. + */ + def fromSeq(permissionStrings: Seq[String]): Permissions = + permissionStrings.foldLeft(Empty) { (p, v) => + v match { + case "all" if get("all").isEmpty => All + case other => p union apply(other) + } + } + + /** + * Authorizer based on a Permissions for RPC method names. + * @param requiredPermissions + * map of RPC method names to Permissions required for that RPC + * @param clientPermissions + * map of ClientId to Permissions a client has + */ + def permissionBasedAuthorizer( + requiredPermissions: Map[String, Permissions], + clientPermissions: Map[String, Permissions] + ): ClientRequestAuthorizer = + ClientRequestAuthorizer.filtered { (methodName, clientId) => + requiredPermissions.get(methodName) exists { + _ satisfiedBy clientPermissions.getOrElse(clientId, Empty) + } + } + + /** + * A set of permissions. This can represent either permissions that + * are required to perform an action, or permissions that are held + * by a client. + * + * This type cannot be instantiated directly. Use the methods of + * your subclass of PermissionModule to do so. + */ + class Permissions private[PermissionModule] (private[PermissionModule] val permSet: Set[String]) { + + /** + * Does the supplied set of held permissions satisfy the + * requirements of this set of permissions? + * + * For example, if this set of permissions is Set("read"), and the + * other set of permissions is Set("read", "write"), then the + * other set of permissions satisfies this set. + */ + def satisfiedBy(other: Permissions): Boolean = permSet subsetOf other.permSet + + override def equals(other: Any): Boolean = + other match { + case p: Permissions => p.permSet == permSet + case _ => false + } + + override lazy val hashCode: Int = 5 + 37 * permSet.hashCode + + /** + * Get a single permission + */ + def get(permName: String): Option[Permissions] = + if (permSet contains permName) Some(new Permissions(Set(permName))) else None + + /** + * Create a new permission set that holds the permissions of this + * object as well as the permissions of the other object. + */ + def union(other: Permissions): Permissions = new Permissions(permSet union other.permSet) + + override def toString: String = "Permissions(%s)".format(permSet.mkString(", ")) + } +} diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestFilter.scala b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestFilter.scala new file mode 100644 index 000000000..e80044c2d --- /dev/null +++ b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestFilter.scala @@ -0,0 +1,120 @@ +package com.twitter.servo.request + +import com.twitter.finagle.tracing.TraceId +import com.twitter.servo.util.{FunctionArrow, Effect, FutureArrow, FutureEffect, Observable} +import com.twitter.util.{Future, Try} + +/** + * Useful mixins for request types. + */ +trait HasTraceId { + + /** + * The Finagle TraceId of the request. + */ + def traceId: TraceId +} + +/** + * A collection of RequestFilter factory functions. + * + * type RequestFilter[A] = FutureArrow[A, A] + */ +object RequestFilter { + + /** + * Produce a RequestFilter from a function `A => Future[A]`. + */ + def apply[A](f: A => Future[A]): RequestFilter[A] = FutureArrow(f) + + /** + * Produce a RequestFilter from a function `A => Try[A]`. + * + * The Try is evaluated within a Future. Thus, Throw results are translated + * to `Future.exception`s. + */ + def fromTry[A](f: A => Try[A]): RequestFilter[A] = FutureArrow.fromTry(f) + + /** + * A no-op RequestFilter; it simply returns the request. + * + * This forms a monoid with `append`. + */ + def identity[A]: RequestFilter[A] = FutureArrow.identity + + /** + * Appends two RequestFilters together. + * + * This forms a monoid with 'identity'. + */ + def append[A](a: RequestFilter[A], b: RequestFilter[A]): RequestFilter[A] = + FutureArrow.append(a, b) + + /** + * Compose an ordered series of RequestFilters into a single object. + */ + def all[A](filters: RequestFilter[A]*): RequestFilter[A] = + filters.foldLeft(identity[A])(append) + + /** + * Produce a RequestFilter that applies a side-effect, returning the argument + * request as-is. + */ + def effect[A](effect: Effect[A]): RequestFilter[A] = + FutureArrow.fromFunctionArrow(FunctionArrow.effect(effect)) + + /** + * Produce a RequestFilter that applies a side-effect, returning the argument + * request as-is. + */ + def effect[A](effect: FutureEffect[A]): RequestFilter[A] = FutureArrow.effect(effect) + + /** + * Returns a new request filter where all Futures returned from `a` have their + * `masked` method called + */ + def masked[A](a: RequestFilter[A]): RequestFilter[A] = a.masked + + /** + * Produces a RequestFilter that proxies to one of two others, depending on a + * predicate. + */ + def choose[A]( + predicate: A => Boolean, + ifTrue: RequestFilter[A], + ifFalse: RequestFilter[A] + ): RequestFilter[A] = + FutureArrow.choose(predicate, ifTrue, ifFalse) + + /** + * Guard the application of a filter on a predicate. The filter is applied + * if the predicate returns true, otherwise, the request is simply returned. + */ + def onlyIf[A](predicate: A => Boolean, f: RequestFilter[A]): RequestFilter[A] = + FutureArrow.onlyIf(predicate, f) + + /** + * Produces a RequestFilter that authorizes requests by applying an + * authorization function `A => Future[Unit]`. If the authorizer function + * results in a Future exception, requests are failed. Otherwise, they pass. + */ + def authorized[A <: Observable](authorizer: ClientRequestAuthorizer): RequestFilter[A] = + RequestFilter[A] { request => + authorizer(request.requestName, request.clientIdString) map { _ => + request + } + } + + /** + * Produces a RequestFilter that applies a ClientRequestObserver to requests. + * + * Used to increment counters and track stats for requests. + */ + def observed[A <: Observable](observer: ClientRequestObserver): RequestFilter[A] = + RequestFilter[A] { request => + val clientIdScopesOpt = request.clientIdString map { Seq(_) } + observer(request.requestName, clientIdScopesOpt) map { _ => + request + } + } +} diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestHandler.scala b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestHandler.scala new file mode 100644 index 000000000..207999580 --- /dev/null +++ b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestHandler.scala @@ -0,0 +1,24 @@ +package com.twitter.servo.request + +/** + * A collection of RequestHandler factory functions. + * + * type RequestHandler[-A, +B] = FutureArrow[A, B] + */ +object RequestHandler { + + /** + * Terminate a RequestFilter with a RequestHandler, producing a new handler. + */ + def apply[A, B <: A, C]( + filter: RequestFilter[A], + handler: RequestHandler[B, C] + ): RequestHandler[B, C] = + new RequestHandler[B, C] { + override def apply(request: B) = { + filter(request: A) flatMap { filteredRequest => + handler(filteredRequest.asInstanceOf[B]) + } + } + } +} diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/package.scala b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/package.scala new file mode 100644 index 000000000..c02b4161c --- /dev/null +++ b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/package.scala @@ -0,0 +1,35 @@ +package com.twitter.servo + +import com.twitter.servo.util.FutureArrow + +package object request { + + /** + * RequestFilters provide a mechanism for composing a chain of actions + * (e.g. logging, authentication, replication, etc) to be performed per + * request. The intention is for a series of RequestFilters are terminated in a + * RequestHandler, which returns an object of some response type. + * + * Upon completion of a filter's work, the convention is to either: + * + * a) Return a Future of a request object of type `A` to be passed to the next + * member of the filter/handler chain. + * b) Return a Future response outright in cases where request handling must + * be halted at the current filter (i.e. returning `Future.exception(...)`. + * + * @tparam A + * A type encapsulating all context and data required to satisfy a request. + */ + type RequestFilter[A] = FutureArrow[A, A] + + /** + * A handler of requests parameterized on the request and response types. + * + * @tparam A + * A type encapsulating all context and data required to satisfy a request. + * + * @tparam B + * A response type. + */ + type RequestHandler[-A, +B] = FutureArrow[A, B] +} diff --git a/tweetypie/servo/util/BUILD b/tweetypie/servo/util/BUILD new file mode 100644 index 000000000..b27c20631 --- /dev/null +++ b/tweetypie/servo/util/BUILD @@ -0,0 +1,6 @@ +target( + tags = ["bazel-compatible"], + dependencies = [ + "tweetypie/servo/util/src/main/scala", + ], +) diff --git a/tweetypie/servo/util/src/main/scala/BUILD b/tweetypie/servo/util/src/main/scala/BUILD new file mode 100644 index 000000000..2a6d5f1c5 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/BUILD @@ -0,0 +1,53 @@ +EXCEPTION_SOURCES = [ + "com/twitter/servo/util/Effect.scala", + "com/twitter/servo/util/ExceptionCounter.scala", + "com/twitter/servo/util/Gate.scala", + "com/twitter/servo/util/ThrowableHelper.scala", + "com/twitter/servo/util/package.scala", +] + +scala_library( + sources = ["**/*.scala"] + exclude_globs(EXCEPTION_SOURCES), + platform = "java8", + provides = scala_artifact( + org = "com.twitter", + name = "servo-util", + repo = artifactory, + ), + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + ":exception", + "3rdparty/jvm/com/google/guava", + "3rdparty/jvm/com/google/inject:guice", + "finagle/finagle-core/src/main", + "finagle/finagle-mux/src/main/scala", + "scrooge/scrooge-core", + "scrooge/scrooge-serializer", + "util-internal/scribe", + "util/util-logging/src/main/scala/com/twitter/logging", + "util/util-stats/src/main/scala", + ], + exports = [ + ":exception", + "util/util-logging/src/main/scala/com/twitter/logging", + ], +) + +scala_library( + name = "exception", + sources = EXCEPTION_SOURCES, + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + provides = scala_artifact( + org = "com.twitter", + name = "servo-util-exception", + repo = artifactory, + ), + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "util/util-core:util-core-util", + "util/util-stats/src/main/scala", + ], +) diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Lens.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Lens.scala new file mode 100644 index 000000000..9396c38f7 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Lens.scala @@ -0,0 +1,147 @@ +package com.twitter.servo.data + +import scala.language.existentials + +object Lens { + private[this] val _identity = iso[Any, Any](x => x, x => x) + + /** + * The identity lens. + */ + def identity[A] = _identity.asInstanceOf[Lens[A, A]] + + /** + * Convenience method for creating lenses with slightly more + * efficient setters. + */ + def checkEq[A, B](get: A => B, set: (A, B) => A) = Lens[A, B](get, set).checkEq + + /** + * Create a lens from an isomorphism. + */ + def iso[A, B](to: A => B, from: B => A) = Lens[A, B](to, (_, x) => from(x)) + + /** + * Using multiple lenses, copy multiple fields from one object to another, returning + * the updated result. + */ + def copyAll[A](lenses: Lens[A, _]*)(src: A, dst: A): A = + lenses.foldLeft(dst) { (t, l) => + l.copy(src, t) + } + + /** + * setAll can be used to set multiple values using multiple lenses on the same input + * value in one call, which is more readable than nested calls. For example, say + * that we have lenses (lensX: Lens[A, X]), (lensY: Lens[A, Y]), and (lensZ: Lens[A, Z]), + * then instead of writing: + * + * lensX.set(lensY.set(lensZ.set(a, z), y), x) + * + * you can write: + * + * Lens.setAll(a, lensX -> x, lensY -> y, lensZ -> z) + */ + def setAll[A](a: A, lensAndValues: ((Lens[A, B], B) forSome { type B })*): A = + lensAndValues.foldLeft(a) { case (a, (l, b)) => l.set(a, b) } + + /** + * Combines two lenses into one that gets and sets a tuple of values. + */ + def join[A, B, C](lensB: Lens[A, B], lensC: Lens[A, C]): Lens[A, (B, C)] = + Lens[A, (B, C)]( + a => (lensB.get(a), lensC.get(a)), + { case (a, (b, c)) => lensC.set(lensB.set(a, b), c) } + ) + + /** + * Combines three lenses into one that gets and sets a tuple of values. + */ + def join[A, B, C, D]( + lensB: Lens[A, B], + lensC: Lens[A, C], + lensD: Lens[A, D] + ): Lens[A, (B, C, D)] = + Lens[A, (B, C, D)]( + a => (lensB.get(a), lensC.get(a), lensD.get(a)), + { case (a, (b, c, d)) => lensD.set(lensC.set(lensB.set(a, b), c), d) } + ) +} + +/** + * A Lens is a first-class getter/setter. The value of lenses is that + * they can be composed with other operations. + * + * Note that it is up to you to ensure that the functions you pass to + * Lens obey the following laws for all inputs: + * + * a => set(a, get(a)) == a + * (a, b) => get(set(a, b)) == b + * (a, b, b1) => set(set(a, b), b1) == set(a, b1) + * + * The intuition for the name Lens[A, B] is that you are "viewing" A + * through a Lens that lets you see (and manipulate) a B. + * + * See e.g. + * http://stackoverflow.com/questions/5767129/lenses-fclabels-data-accessor-which-library-for-structure-access-and-mutatio#answer-5769285 + * for a more in-depth explanation of lenses. + */ +case class Lens[A, B](get: A => B, set: (A, B) => A) { + + /** + * Get the field. + */ + def apply(a: A) = get(a) + + /** + * Compose with another lens, such that the setter updates the + * outermost structure, and the getter gets the innermost structure. + */ + def andThen[C](next: Lens[B, C]) = + Lens(get andThen next.get, (a: A, c: C) => set(a, next.set(get(a), c))) + + /** + * An operator alias for `andThen`. + */ + def >>[C](next: Lens[B, C]) = andThen(next) + + /** + * Lift the function on the viewed value to a function on the outer + * value. + */ + def update(f: B => B): A => A = a => set(a, f(get(a))) + + /** + * Copies the field from one object to another. + */ + def copy(src: A, dst: A): A = set(dst, get(src)) + + /** + * Lift a mutation of the viewed value to a transform of the + * container. (E.g. a Mutation[Seq[UrlEntity]] to a Mutation[Tweet]) + */ + def mutation(m: Mutation[B]) = + Mutation[A] { a => + m(get(a)) map { set(a, _) } + } + + /** + * Create a new lens whose setter makes sure that the update would + * change the value. + * + * This should not change the meaning of the lens, but can possibly + * make it more efficient by avoiding copies when performing no-op + * sets. + * + * This is only worthwhile when the getter and equality comparison + * are cheap compared to the setter. + */ + def checkEq = Lens[A, B](get, (a, b) => if (get(a) == b) a else set(a, b)) + + /** + * Combines this lens and the given lens into one that gets and sets a tuple + * of values. + */ + def join[C](right: Lens[A, C]): Lens[A, (B, C)] = + Lens.join(this, right) +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Mutation.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Mutation.scala new file mode 100644 index 000000000..78e08df74 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Mutation.scala @@ -0,0 +1,268 @@ +package com.twitter.servo.data + +import com.twitter.util.{Return, Throw, Try} +import com.twitter.finagle.stats.{Counter, StatsReceiver} +import com.twitter.servo.util.{Effect, Gate} + +object Mutation { + + /** + * A mutation that ignores its input and always returns the given + * value as new. Use checkEq if this value could be the same as the + * input. + */ + def const[T](x: T) = Mutation[T] { _ => + Some(x) + } + + private[this] val _unit = Mutation[Any] { _ => + None + } + + /** + * A "no-op" mutation that will never alter the value. + * + * For any Mutations A, (A also unit) == (unit also A) == A. + * + * Forms a monoid with also as the operation. + */ + def unit[A]: Mutation[A] = _unit.asInstanceOf[Mutation[A]] + + /** + * Makes a Mutation out of a function. + */ + def apply[A](f: A => Option[A]): Mutation[A] = + new Mutation[A] { + override def apply(x: A) = f(x) + } + + /** + * Lift a function that returns the same type to a Mutation, using + * the type's notion of equality to detect when the mutation has + * not changed the value. + */ + def fromEndo[A](f: A => A): Mutation[A] = + Mutation[A] { x => + val y = f(x) + if (y == x) None else Some(y) + } + + /** + * Lift a partial function from A to A to a mutation. + */ + def fromPartial[A](f: PartialFunction[A, A]): Mutation[A] = Mutation[A](f.lift) + + /** + * Creates a new Mutation that applies all the given mutations in order. + */ + def all[A](mutations: Seq[Mutation[A]]): Mutation[A] = + mutations.foldLeft(unit[A])(_ also _) +} + +/** + * A Mutation encapsulates a computation that may optionally "mutate" a value, where + * "mutate" should be interpreted in the stateless/functional sense of making a copy with a + * a change. If the value is unchanged, the mutation should return None. When mutations are + * composed with `also`, the final result will be None iff no mutation actually changed the + * value. + * + * Forms a monoid with Mutation.unit as unit and `also` as the + * combining operation. + * + * This abstraction is useful for composing changes to a value when + * some action (such as updating a cache) should be performed if the + * value has changed. + */ +trait Mutation[A] extends (A => Option[A]) { + + /** + * Convert this mutation to a function that always returns a + * result. If the mutation has no effect, it returns the original + * input. + * + * (convert to an endofunction on A) + */ + lazy val endo: A => A = + x => + apply(x) match { + case Some(v) => v + case None => x + } + + /** + * Apply this mutation, and then apply the next mutation to the + * result. If this mutation leaves the value unchanged, the next + * mutation is invoked with the original input. + */ + def also(g: Mutation[A]): Mutation[A] = + Mutation[A] { x => + apply(x) match { + case None => g(x) + case someY @ Some(y) => + g(y) match { + case some @ Some(_) => some + case None => someY + } + } + } + + /** + * Apply this mutation, but refuse to return an altered value. This + * yields all of the effects of this mutation without affecting the + * final result. + */ + def dark: Mutation[A] = Mutation[A] { x => + apply(x); None + } + + /** + * Convert a Mutation on A to a Mutation on B by way of a pair of functions for + * converting from B to A and back. + */ + def xmap[B](f: B => A, g: A => B): Mutation[B] = + Mutation[B](f andThen this andThen { _ map g }) + + /** + * Converts a Mutation on A to a Mutation on Try[A], where the Mutation is only applied + * to Return values and any exceptions caught by the underying function are caught and + * returned as Some(Throw(_)) + */ + def tryable: Mutation[Try[A]] = + Mutation[Try[A]] { + case Throw(x) => Some(Throw(x)) + case Return(x) => + Try(apply(x)) match { + case Throw(y) => Some(Throw(y)) + case Return(None) => None + case Return(Some(y)) => Some(Return(y)) + } + } + + /** + * Perform this mutation only if the provided predicate returns true + * for the input. + */ + def onlyIf(predicate: A => Boolean): Mutation[A] = + Mutation[A] { x => + if (predicate(x)) this(x) else None + } + + /** + * Performs this mutation only if the given gate returns true. + */ + def enabledBy(enabled: Gate[Unit]): Mutation[A] = + enabledBy(() => enabled()) + + /** + * Performs this mutation only if the given function returns true. + */ + def enabledBy(enabled: () => Boolean): Mutation[A] = + onlyIf { _ => + enabled() + } + + /** + * A new mutation that returns the same result as this mutation, + * and additionally calls the specified Effect. + */ + def withEffect(effect: Effect[Option[A]]): Mutation[A] = + Mutation[A](this andThen effect.identity) + + /** + * Perform an equality check when a value is returned from the + * mutation. If the values are equal, then the mutation will yield + * None. + * + * This is useful for two reasons: + * + * 1. Any effects that are conditional upon mutation will not occur + * when the values are equal (e.g. updating a cache) + * + * 2. When using a Lens to lift a mutation to a mutation on a + * larger structure, checking equality on the smaller structure + * can prevent unnecessary copies of the larger structure. + */ + def checkEq = Mutation[A] { x => + this(x) match { + case someY @ Some(y) if y != x => someY + case _ => None + } + } + + /** + * Converts this mutation to a mutation of a different type, using a Lens to + * convert between types. + */ + def lensed[B](lens: Lens[B, A]): Mutation[B] = + Mutation[B](b => this(lens(b)).map(lens.set(b, _))) + + /** + * Convert this mutation to a mutation of a Seq of its type. It will + * yield None if no values are changed, or a Seq of both the changed + * and unchanged values if any value is mutated. + */ + def liftSeq = Mutation[Seq[A]] { xs => + var changed = false + val detectChange = Effect.fromPartial[Option[A]] { case Some(_) => changed = true } + val mutated = xs map (this withEffect detectChange).endo + if (changed) Some(mutated) else None + } + + /** + * Convert this mutation to a mutation of a Option of its type. It will yield + * None if the value is not changed, or a Some(Some(_)) if the value is mutated. + */ + def liftOption = Mutation[Option[A]] { + case None => None + case Some(x) => + this(x) match { + case None => None + case Some(y) => Some(Some(y)) + } + } + + /** + * Convert this mutation to a mutation of the values of a Map. It will + * yield None if no values are changed, or a Map with both the changed + * and unchanged values if any value is mutated. + */ + def liftMapValues[K] = Mutation[Map[K, A]] { m => + var changed = false + val detectChange = Effect.fromPartial[Option[A]] { case Some(_) => changed = true } + val f = (this withEffect detectChange).endo + val mutated = m map { case (k, v) => (k, f(v)) } + if (changed) Some(mutated) else None + } + + /** + * Return a new mutation that returns the same result as this + * mutation, as well as incrementing the given counter when the + * value is mutated. + */ + def countMutations(c: Counter) = + this withEffect { Effect.fromPartial { case Some(_) => c.incr() } } + + /** + * Wrap a mutation in stats with the following counters: + * - no-op (returned value was the same as the input) + * - none (mutation returned none) + * - mutated (mutation modified the result) + */ + def withStats(stats: StatsReceiver): Mutation[A] = { + val none = stats.counter("none") + val noop = stats.counter("noop") + val mutated = stats.counter("mutated") + input: A => { + val result = apply(input) + result.fold(none.incr()) { output => + if (output == input) { + noop.incr() + } else { + mutated.incr() + } + } + result + } + } + +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/Forked.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/Forked.scala new file mode 100644 index 000000000..55d031784 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/Forked.scala @@ -0,0 +1,120 @@ +/** + * Provides the ability to partially tee traffic to a secondary + * service. + * + * This code was originally written to provide a way to provide + * production traffic to the TweetyPie staging cluster, selecting a + * consistent subset of tweet ids, to enable a production-like cache + * hit rate with a much smaller cache. + */ +package com.twitter.servo.forked + +import com.twitter.servo.data.Lens + +object Forked { + + /** + * A strategy for executing forked actions. + */ + type Executor = (() => Unit) => Unit + + /** + * Directly execute the forked action. + */ + val inlineExecutor: Executor = f => f() + + /** + * Produce objects of type A to send to a secondary target. + * Returning None signifies that nothing should be forked. + */ + type Fork[A] = A => Option[A] + + /** + * Fork the input unchanged, only when it passes the specified + * predicate. + * + * For instance, if your service has a get() method + */ + def forkWhen[T](f: T => Boolean): Fork[T] = + a => if (f(a)) Some(a) else None + + /** + * Fork a subset of the elements of the Seq, based on the supplied + * predicate. If the resulting Seq is empty, the secondary action + * will not be executed. + */ + def forkSeq[T](f: T => Boolean): Fork[Seq[T]] = { xs => + val newXs = xs filter f + if (newXs.nonEmpty) Some(newXs) else None + } + + /** + * Apply forking through lens. + */ + def forkLens[A, B](lens: Lens[A, B], f: Fork[B]): Fork[A] = + a => f(lens(a)).map(lens.set(a, _)) + + /** + * A factory for building actions that will partially tee their input + * to a secondary target. The executor is parameterized to make the + * execution strategy independent from the forking logic. + */ + def toSecondary[S](secondary: S, executor: Executor): S => Forked[S] = + primary => + new Forked[S] { + + /** + * Tee a subset of requests defined by the forking function to the + * secondary service. + */ + def apply[Q, R](fork: Forked.Fork[Q], action: (S, Q) => R): Q => R = { req => + fork(req) foreach { req => + executor(() => action(secondary, req)) + } + action(primary, req) + } + } + + /** + * A forked action builder that bypasses the forking altogether and + * just calls the supplied action on a service. + * + * This is useful for configurations that will sometimes have fork + * targets defined and sometimes not. + */ + def notForked[S]: S => Forked[S] = + service => + new Forked[S] { + def apply[Q, R](unusedFork: Forked.Fork[Q], action: (S, Q) => R): Q => R = + action(service, _) + } +} + +/** + * Factory for forking functions, primarily useful for sending a copy + * of a stream of requests to a secondary service. + */ +trait Forked[S] { + import Forked._ + + /** + * Fork an action that takes two parameters, forking only on the + * first parameter, passing the second unchanged. + */ + def first[Q1, Q2, R]( + fork: Fork[Q1], + action: S => (Q1, Q2) => R + ): (Q1, Q2) => R = { + val f = + apply[(Q1, Q2), R]( + fork = p => + fork(p._1) map { q1 => + (q1, p._2) + }, + action = (svc, p) => action(svc)(p._1, p._2) + ) + (q1, q2) => f((q1, q2)) + } + + def apply[Q, R](fork: Fork[Q], action: (S, Q) => R): Q => R +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/QueueExecutor.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/QueueExecutor.scala new file mode 100644 index 000000000..5b2949e45 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/QueueExecutor.scala @@ -0,0 +1,82 @@ +package com.twitter.servo.forked + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.logging.Logger +import com.twitter.servo.util.ExceptionCounter +import com.twitter.util.{Duration, Time, Local, TimeoutException} +import java.util.concurrent.{LinkedBlockingQueue, TimeUnit, CountDownLatch} + +/** + * A forking action executor that executes the actions in a separate + * thread, using a bounded queue as the communication channel. If the + * queue is full (the secondary thread is slow to drain it), then the + * items will be dropped rather than enqueued. + */ +class QueueExecutor(maxQueueSize: Int, stats: StatsReceiver) extends Forked.Executor { + private val forkExceptionsCounter = new ExceptionCounter(stats) + private val enqueuedCounter = stats.counter("forked_actions_enqueued") + private val droppedCounter = stats.counter("forked_actions_dropped") + private val log = Logger.get("Forked.QueueExecutor") + + @volatile private var isStopped = false + private val releaseCountDownLatch = new CountDownLatch(1) + private val queue = new LinkedBlockingQueue[() => Unit](maxQueueSize) + private val thread = new Thread { + override def run(): Unit = { + while (!isStopped) { + try { + queue.take()() + } catch { + // Ignore interrupts from other threads + case _: InterruptedException => + // TODO: handle fatal errors more seriously + case e: Throwable => + forkExceptionsCounter(e) + log.error(e, "Executing queued action") + } + } + releaseCountDownLatch.countDown() + } + } + + thread.setDaemon(true) + thread.start() + + /** + * Interrupts the thread and directs it to stop processing. This + * method will not return until the processing thread has finished + * or the timeout occurs. Ok to call multiple times. + */ + def release(timeout: Duration): Unit = { + if (!isStopped) { + isStopped = true + thread.interrupt() + releaseCountDownLatch.await(timeout.inMilliseconds, TimeUnit.MILLISECONDS) || { + throw new TimeoutException(timeout.toString) + } + } + } + + /** + * Blocks until all the items currently in the queue have been + * executed, or the timeout occurs. Mostly useful during testing. + */ + def waitForQueueToDrain(timeout: Duration): Unit = { + val latch = new CountDownLatch(1) + val start = Time.now + queue.offer(() => latch.countDown(), timeout.inMilliseconds, TimeUnit.MILLISECONDS) + val remaining = timeout - (Time.now - start) + latch.await(remaining.inMilliseconds, TimeUnit.MILLISECONDS) || { + throw new TimeoutException(remaining.toString) + } + } + + /** + * Queue the action for execution in this object's thread. + */ + def apply(action: () => Unit) = + if (queue.offer(Local.closed(action))) + enqueuedCounter.incr() + else + droppedCounter.incr() +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/gate/RateLimitingGate.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/gate/RateLimitingGate.scala new file mode 100644 index 000000000..5cee23f22 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/gate/RateLimitingGate.scala @@ -0,0 +1,64 @@ +package com.twitter.servo.gate + +import com.google.common.annotations.VisibleForTesting +import com.google.common.util.concurrent.RateLimiter +import com.twitter.servo.util +import java.util.concurrent.TimeUnit + +/** + * A Rate Limiting Gate backed by com.google.common.util.concurrent.RateLimiter + * http://docs.guava-libraries.googlecode.com/git/javadoc/com/google/common/util/concurrent/RateLimiter.html + */ +object RateLimitingGate { + + /** + * Creates a Gate[Int] that returns true if acquiring number of permits + * from the ratelimiter succeeds. + */ + def weighted(permitsPerSecond: Double): util.Gate[Int] = { + val rateLimiter: RateLimiter = RateLimiter.create(permitsPerSecond) + util.Gate { rateLimiter.tryAcquire(_, 0, TimeUnit.SECONDS) } + } + + /** + * Creates a Gate[Unit] that returns true if acquiring a permit from the ratelimiter succeeds. + */ + def uniform(permitsPerSecond: Double): util.Gate[Unit] = { + weighted(permitsPerSecond) contramap { _ => + 1 + } + } + + /** + * Creates a Gate[Unit] with floating limit. Could be used with deciders. + */ + def dynamic(permitsPerSecond: => Double): util.Gate[Unit] = + dynamic(RateLimiter.create, permitsPerSecond) + + @VisibleForTesting + def dynamic( + rateLimiterFactory: Double => RateLimiter, + permitsPerSecond: => Double + ): util.Gate[Unit] = { + val rateLimiter: RateLimiter = rateLimiterFactory(permitsPerSecond) + util.Gate { _ => + val currentRate = permitsPerSecond + if (rateLimiter.getRate != currentRate) { + rateLimiter.setRate(currentRate) + } + rateLimiter.tryAcquire(0L, TimeUnit.SECONDS) + } + } +} + +@deprecated("Use RateLimitingGate.uniform", "2.8.2") +class RateLimitingGate[T](permitsPerSecond: Double) extends util.Gate[T] { + private[this] val rateLimiter: RateLimiter = RateLimiter.create(permitsPerSecond) + + /** + * If a "permit" is available, this method acquires it and returns true + * Else returns false immediately without waiting + */ + override def apply[U](u: U)(implicit asT: <:<[U, T]): Boolean = + rateLimiter.tryAcquire(1, 0, TimeUnit.SECONDS) +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Availability.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Availability.scala new file mode 100644 index 000000000..a23e9ed5f --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Availability.scala @@ -0,0 +1,43 @@ +package com.twitter.servo.util + +/** + * Provides functions for computing prescribed feature availability based + * on some runtime condition(s). (e.g. watermark values) + */ +object Availability { + + /** + * Stay at 100% available down to a high watermark success rate. Then + * between high and low watermarks, dial down availability to a provided + * minimum. Never go below this level because we need some requests to + * track the success rate going back up. + * + * NOTE: watermarks and minAvailability must be between 0 and 1. + */ + def linearlyScaled( + highWaterMark: Double, + lowWaterMark: Double, + minAvailability: Double + ): Double => Double = { + require( + highWaterMark >= lowWaterMark && highWaterMark <= 1, + s"highWaterMark ($highWaterMark) must be between lowWaterMark ($lowWaterMark) and 1, inclusive" + ) + require( + lowWaterMark >= minAvailability && lowWaterMark <= 1, + s"lowWaterMark ($lowWaterMark) must be between minAvailability ($minAvailability) and 1, inclusive" + ) + require( + minAvailability > 0 && minAvailability < 1, + s"minAvailability ($minAvailability) must be between 0 and 1, exclusive" + ) + + { + case sr if sr >= highWaterMark => 1.0 + case sr if sr <= lowWaterMark => minAvailability + case sr => + val linearFraction = (sr - lowWaterMark) / (highWaterMark - lowWaterMark) + minAvailability + (1.0 - minAvailability) * linearFraction + } + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Average.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Average.scala new file mode 100644 index 000000000..9aab6f25c --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Average.scala @@ -0,0 +1,116 @@ +package com.twitter.servo.util + +import com.twitter.util.{Duration, Time} + +/** + * Calculate a running average of data points + */ +trait Average { + def value: Option[Double] + def record(dataPoint: Double, count: Double = 1.0): Unit +} + +/** + * Calculates a running average using two windows of data points, a + * current one and a previous one. When the current window is full, + * it is rolled into the previous and the current window starts + * filling up again. + */ +class WindowedAverage(val windowSize: Long, initialValue: Option[Double] = None) extends Average { + private[this] val average = new ResettableAverage(None) + private[this] var lastAverage: Option[Double] = initialValue + + def value: Option[Double] = + synchronized { + lastAverage match { + case Some(lastAvg) => + // currentCount can temporarily exceed windowSize + val currentWeight = (average.count / windowSize) min 1.0 + Some((1.0 - currentWeight) * lastAvg + currentWeight * average.value.getOrElse(0.0)) + case None => average.value + } + } + + def record(dataPoint: Double, count: Double = 1.0): Unit = + synchronized { + if (average.count >= windowSize) { + lastAverage = value + average.reset() + } + average.record(dataPoint, count) + } +} + +/** + * Calculates a recent average using the past windowDuration of data points. Old average is mixed + * with the new average during windowDuration. If new data points are not recorded the average + * will revert towards defaultAverage. + */ +class RecentAverage( + val windowDuration: Duration, + val defaultAverage: Double, + currentTime: Time = Time.now // passing in start time to simplify scalacheck tests +) extends Average { + private[this] val default = Some(defaultAverage) + private[this] val currentAverage = new ResettableAverage(Some(defaultAverage)) + private[this] var prevAverage: Option[Double] = None + private[this] var windowStart: Time = currentTime + + private[this] def mix(fractOfV2: Double, v1: Double, v2: Double): Double = { + val f = 0.0.max(1.0.min(fractOfV2)) + (1.0 - f) * v1 + f * v2 + } + + private[this] def timeFract: Double = + 0.0.max(windowStart.untilNow.inNanoseconds.toDouble / windowDuration.inNanoseconds) + + def value: Some[Double] = + synchronized { + timeFract match { + case f if f < 1.0 => + Some(mix(f, prevAverage.getOrElse(defaultAverage), currentAverage.getValue)) + case f if f < 2.0 => Some(mix(f - 1.0, currentAverage.getValue, defaultAverage)) + case f => default + } + } + + def getValue: Double = value.get + + def record(dataPoint: Double, count: Double = 1.0): Unit = + synchronized { + // if we're past windowDuration, roll average + val now = Time.now + if (now - windowStart > windowDuration) { + prevAverage = value + windowStart = now + currentAverage.reset() + } + currentAverage.record(dataPoint, count) + } + + override def toString = + s"RecentAverage(window=$windowDuration, default=$defaultAverage, " + + s"prevValue=$prevAverage, value=$value, timeFract=$timeFract)" +} + +private class ResettableAverage[DoubleOpt <: Option[Double]](defaultAverage: DoubleOpt) + extends Average { + private[this] var currentCount: Double = 0 + private[this] var currentValue: Double = 0 + def reset(): Unit = { + currentCount = 0 + currentValue = 0 + } + def record(dataPoint: Double, count: Double): Unit = { + currentCount += count + currentValue += dataPoint + } + def value: Option[Double] = + if (currentCount == 0) defaultAverage + else Some(currentValue / currentCount) + + def getValue(implicit ev: DoubleOpt <:< Some[Double]): Double = + value.get + + def count: Double = currentCount +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/BatchExecutor.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/BatchExecutor.scala new file mode 100644 index 000000000..827e371c2 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/BatchExecutor.scala @@ -0,0 +1,218 @@ +package com.twitter.servo.util + +import com.twitter.logging.Logger +import com.twitter.util.{Timer, Duration, Promise, Future, Return, Throw} +import java.util.concurrent.CancellationException +import scala.collection.mutable.ArrayBuffer + +@deprecated("Use `Future.batched`", "2.6.1") +trait BatchExecutorFactory { + def apply[In, Out](f: Seq[In] => Future[Seq[Out]]): BatchExecutor[In, Out] +} + +/** + * A BatchExecutorFactory allows you to specify the criteria in which a batch + * should be flushed prior to constructing a BatchExecutor. A BatchExecutor asks for a + * function that takes a Seq[In] and returns a Future[Seq[Out]], in return it gives you + * a `In => Future[Out]` interface so that you can incrementally submit tasks to be + * performed when the criteria for batch flushing is met. + * + * Examples: + * val batcherFactory = BatchExecutorFactory(sizeThreshold = 10) + * def processBatch(reqs: Seq[Request]): Future[Seq[Response]] + * val batcher = batcherFactory(processBatch) + * + * val response: Future[Response] = batcher(new Request) + * + * the batcher will wait until 10 requests have been submitted, then delegate + * to the processBatch method to compute the responses. + * + * you can also construct a BatchExecutor that has a time-based threshold or both: + * val batcherFactory = BatchExecutorFactory( + * sizeThreshold = 10, timeThreshold = 10.milliseconds, timer = new JavaTimer(true)) + * + * A batcher's size can be controlled at runtime through a bufSizeFraction function + * that should return a float between 0.0 and 1.0 that represents the fractional size + * of the sizeThreshold that should be used for the next batch to be collected. + * + */ +@deprecated("Use `Future.batched`", "2.6.1") +object BatchExecutorFactory { + final val DefaultBufSizeFraction = 1.0f + lazy val instant = sized(1) + + def sized(sizeThreshold: Int): BatchExecutorFactory = new BatchExecutorFactory { + override def apply[In, Out](f: Seq[In] => Future[Seq[Out]]) = { + new BatchExecutor(sizeThreshold, None, f, DefaultBufSizeFraction) + } + } + + def timed(timeThreshold: Duration, timer: Timer): BatchExecutorFactory = + sizedAndTimed(Int.MaxValue, timeThreshold, timer) + + def sizedAndTimed( + sizeThreshold: Int, + timeThreshold: Duration, + timer: Timer + ): BatchExecutorFactory = + dynamicSizedAndTimed(sizeThreshold, timeThreshold, timer, DefaultBufSizeFraction) + + def dynamicSizedAndTimed( + sizeThreshold: Int, + timeThreshold: Duration, + timer: Timer, + bufSizeFraction: => Float + ): BatchExecutorFactory = new BatchExecutorFactory { + override def apply[In, Out](f: (Seq[In]) => Future[Seq[Out]]) = { + new BatchExecutor(sizeThreshold, Some(timeThreshold, timer), f, bufSizeFraction) + } + } +} + +@deprecated("Use `Future.batched`", "2.6.1") +class BatchExecutor[In, Out] private[util] ( + maxSizeThreshold: Int, + timeThreshold: Option[(Duration, Timer)], + f: Seq[In] => Future[Seq[Out]], + bufSizeFraction: => Float) { batcher => + + private[this] class ScheduledFlush(after: Duration, timer: Timer) { + @volatile private[this] var cancelled = false + private[this] val task = timer.schedule(after.fromNow) { flush() } + + def cancel(): Unit = { + cancelled = true + task.cancel() + } + + private[this] def flush(): Unit = { + val doAfter = batcher.synchronized { + if (!cancelled) { + flushBatch() + } else { () => + () + } + } + + doAfter() + } + } + + private[this] val log = Logger.get("BatchExecutor") + + // operations on these are synchronized on `this` + private[this] val buf = new ArrayBuffer[(In, Promise[Out])](maxSizeThreshold) + private[this] var scheduled: Option[ScheduledFlush] = None + private[this] var currentBufThreshold = newBufThreshold + + private[this] def shouldSchedule = timeThreshold.isDefined && scheduled.isEmpty + + private[this] def currentBufFraction = { + val fract = bufSizeFraction + + if (fract > 1.0f) { + log.warning( + "value returned for BatchExecutor.bufSizeFraction (%f) was > 1.0f, using 1.0", + fract + ) + 1.0f + } else if (fract < 0.0f) { + log.warning( + "value returned for BatchExecutor.bufSizeFraction (%f) was negative, using 0.0f", + fract + ) + 0.0f + } else { + fract + } + } + + private[this] def newBufThreshold = { + val size: Int = math.round(currentBufFraction * maxSizeThreshold) + + if (size < 1) { + 1 + } else if (size >= maxSizeThreshold) { + maxSizeThreshold + } else { + size + } + } + + def apply(t: In): Future[Out] = { + enqueue(t) + } + + private[this] def enqueue(t: In): Future[Out] = { + val promise = new Promise[Out] + val doAfter = synchronized { + buf.append((t, promise)) + if (buf.size >= currentBufThreshold) { + flushBatch() + } else { + scheduleFlushIfNecessary() + () => () + } + } + + doAfter() + promise + } + + private[this] def scheduleFlushIfNecessary(): Unit = { + timeThreshold foreach { + case (duration, timer) => + if (shouldSchedule) { + scheduled = Some(new ScheduledFlush(duration, timer)) + } + } + } + + private[this] def flushBatch(): () => Unit = { + // this must be executed within a synchronize block + val prevBatch = new ArrayBuffer[(In, Promise[Out])](buf.length) + buf.copyToBuffer(prevBatch) + buf.clear() + + scheduled foreach { _.cancel() } + scheduled = None + currentBufThreshold = newBufThreshold // set the next batch's size + + () => + try { + executeBatch(prevBatch) + } catch { + case e: Throwable => + log.warning(e, "unhandled exception caught in BatchExecutor: %s", e.toString) + } + } + + private[this] def executeBatch(batch: Seq[(In, Promise[Out])]): Unit = { + val uncancelled = batch filter { + case (in, p) => + p.isInterrupted match { + case Some(_cause) => + p.setException(new CancellationException) + false + case None => true + } + } + + val ins = uncancelled map { case (in, _) => in } + // N.B. intentionally not linking cancellation of these promises to the execution of the batch + // because it seems that in most cases you would be canceling mostly uncanceled work for an + // outlier. + val promises = uncancelled map { case (_, promise) => promise } + + f(ins) respond { + case Return(outs) => + (outs zip promises) foreach { + case (out, p) => + p() = Return(out) + } + case Throw(e) => + val t = Throw(e) + promises foreach { _() = t } + } + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CancelledExceptionExtractor.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CancelledExceptionExtractor.scala new file mode 100644 index 000000000..ca3ebe151 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CancelledExceptionExtractor.scala @@ -0,0 +1,21 @@ +package com.twitter.servo.util + +import com.twitter.finagle.mux.stats.MuxCancelledCategorizer +import com.twitter.finagle.stats.CancelledCategorizer +import com.twitter.util.FutureCancelledException +import com.twitter.util.Throwables.RootCause + +/** + * Helper that consolidates various ways (nested and top level) cancel exceptions can be detected. + */ +object CancelledExceptionExtractor { + def unapply(e: Throwable): Option[Throwable] = { + e match { + case _: FutureCancelledException => Some(e) + case MuxCancelledCategorizer(cause) => Some(cause) + case CancelledCategorizer(cause) => Some(cause) + case RootCause(CancelledExceptionExtractor(cause)) => Some(cause) + case _ => None + } + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CounterInitializingStatsReceiver.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CounterInitializingStatsReceiver.scala new file mode 100644 index 000000000..f8da5c5cf --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CounterInitializingStatsReceiver.scala @@ -0,0 +1,24 @@ +package com.twitter.servo.util + +import com.twitter.finagle.stats.{Counter, MetricBuilder, StatsReceiver, StatsReceiverProxy} + +/** + * A StatsReceiver that initializes counters to zero. + * Provides a simple wrapper that wraps a StatsReceiver where when using counters, + * have them auto initialize to 0. + * Until a counter performs its first incr() its returned as "undefined", + * which means if an alert is set on that counter + * it will result in an error. + * Another advantage is to remove the need to manually initialize counters in order + * to overcome aforementioned problem. + * @param self - underlying StatsReceiver + */ +class CounterInitializingStatsReceiver(protected val self: StatsReceiver) + extends StatsReceiverProxy { + + override def counter(metricBuilder: MetricBuilder): Counter = { + val counter = self.counter(metricBuilder) + counter.incr(0) + counter + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Effect.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Effect.scala new file mode 100644 index 000000000..00510a3e3 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Effect.scala @@ -0,0 +1,83 @@ +package com.twitter.servo.util + +object Effect { + // a no-op effect + private[this] val _unit = Effect[Any] { _ => + () + } + + /** + * A "no-op" Effect. For any effect E, (E also unit) == (unit also E) == E. + * Forms a monoid with `also`. + */ + def unit[A]: Effect[A] = _unit.asInstanceOf[Effect[A]] + + /** + * Package a function as an Effect. + */ + def apply[A](f: A => Unit): Effect[A] = + new Effect[A] { + override def apply(value: A) = f(value) + } + + /** + * An effect that only applies to some values. + */ + def fromPartial[A](f: PartialFunction[A, Unit]): Effect[A] = + Effect[A] { x => + if (f.isDefinedAt(x)) f(x) + } +} + +/** + * Perform an effect with the given value, without altering the result. + * + * Forms a monoid with Effect.unit as unit and `also` as the combining operation. + */ +trait Effect[A] extends (A => Unit) { self => + + /** + * An identity function that executes this effect as a side-effect. + */ + lazy val identity: A => A = { value => + self(value); value + } + + /** + * Combine effects, so that both effects are performed. + * Forms a monoid with Effect.unit. + */ + def also(next: Effect[A]): Effect[A] = + Effect[A](identity andThen next) + + /** + * Convert an effect to an effect of a more general type by way + * of an extraction function. (contravariant map) + */ + def contramap[B](extract: B => A): Effect[B] = + Effect[B](extract andThen self) + + /** + * Perform this effect only if the provided gate returns true. + */ + @deprecated("Use enabledBy(() => Boolean)", "2.5.1") + def enabledBy(enabled: Gate[Unit]): Effect[A] = + enabledBy(() => enabled()) + + /** + * Perform this effect only if the provided gate returns true. + */ + def enabledBy(enabled: () => Boolean): Effect[A] = + onlyIf { _ => + enabled() + } + + /** + * Perform this effect only if the provided predicate returns true + * for the input. + */ + def onlyIf(predicate: A => Boolean) = + Effect[A] { x => + if (predicate(x)) this(x) else () + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ExceptionCounter.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ExceptionCounter.scala new file mode 100644 index 000000000..85e4ac996 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ExceptionCounter.scala @@ -0,0 +1,193 @@ +package com.twitter.servo.util + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.util.Future +import scala.collection.mutable + +/** + * Categorizes an exception according to some criteria. + * n.b. Implemented in terms of lift rather than apply to avoid extra allocations when + * used when lifting the effect. + */ +trait ExceptionCategorizer { + import ExceptionCategorizer._ + + def lift(effect: Effect[Category]): Effect[Throwable] + + def apply(t: Throwable): Set[Category] = { + val s = mutable.Set.empty[Category] + lift(Effect(s += _))(t) + s.toSet + } + + /** + * construct a new categorizer that prepends scope to all categories returned by this categorizer + */ + def scoped(scope: Seq[String]): ExceptionCategorizer = + if (scope.isEmpty) { + this + } else { + val scopeIt: Category => Category = Memoize(scope ++ _) + fromLift(effect => lift(effect.contramap(scopeIt))) + } + + /** + * construct a new categorizer that returns the union of the categories returned by this and that + */ + def ++(that: ExceptionCategorizer): ExceptionCategorizer = + fromLift(effect => this.lift(effect).also(that.lift(effect))) + + /** + * construct a new categorizer that only returns categories for throwables matching pred + */ + def onlyIf(pred: Throwable => Boolean): ExceptionCategorizer = + fromLift(lift(_).onlyIf(pred)) +} + +object ExceptionCategorizer { + type Category = Seq[String] + + def const(categories: Set[Category]): ExceptionCategorizer = ExceptionCategorizer(_ => categories) + def const(c: Category): ExceptionCategorizer = const(Set(c)) + def const(s: String): ExceptionCategorizer = const(Seq(s)) + + def apply(fn: Throwable => Set[Category]): ExceptionCategorizer = + new ExceptionCategorizer { + def lift(effect: Effect[Category]) = Effect[Throwable](t => fn(t).foreach(effect)) + override def apply(t: Throwable) = fn(t) + } + + def fromLift(fn: Effect[Category] => Effect[Throwable]): ExceptionCategorizer = + new ExceptionCategorizer { + def lift(effect: Effect[Category]) = fn(effect) + } + + def singular(fn: Throwable => Category): ExceptionCategorizer = + fromLift(_.contramap(fn)) + + def simple(fn: Throwable => String): ExceptionCategorizer = + singular(fn.andThen(Seq(_))) + + def default( + name: Category = Seq("exceptions"), + sanitizeClassnameChain: Throwable => Seq[String] = ThrowableHelper.sanitizeClassnameChain + ): ExceptionCategorizer = + ExceptionCategorizer.const(name) ++ + ExceptionCategorizer.singular(sanitizeClassnameChain).scoped(name) +} + +/** + * Increments a counter for each category returned by the exception categorizer + * + * @param statsReceiver + * the unscoped statsReceiver on which to hang the counters + * @param categorizer + * A function that returns a list of category names that a throwable should be counted under. + */ +class ExceptionCounter(statsReceiver: StatsReceiver, categorizer: ExceptionCategorizer) { + + /** + * alternative constructor for backwards compatibility + * + * @param statsReceiver + * the unscoped statsReceiver on which to hang the counters + * @param name + * the counter name for total exceptions, and scope for individual + * exception counters. default value is `exceptions` + * @param sanitizeClassnameChain + * A function that can be used to cleanup classnames before passing them to the StatsReceiver. + */ + def this( + statsReceiver: StatsReceiver, + name: String, + sanitizeClassnameChain: Throwable => Seq[String] + ) = + this(statsReceiver, ExceptionCategorizer.default(List(name), sanitizeClassnameChain)) + + /** + * provided for backwards compatibility + */ + def this(statsReceiver: StatsReceiver) = + this(statsReceiver, ExceptionCategorizer.default()) + + /** + * provided for backwards compatibility + */ + def this(statsReceiver: StatsReceiver, name: String) = + this(statsReceiver, ExceptionCategorizer.default(List(name))) + + /** + * provided for backwards compatibility + */ + def this(statsReceiver: StatsReceiver, sanitizeClassnameChain: Throwable => Seq[String]) = + this( + statsReceiver, + ExceptionCategorizer.default(sanitizeClassnameChain = sanitizeClassnameChain) + ) + + private[this] val counter = categorizer.lift(Effect(statsReceiver.counter(_: _*).incr())) + + /** + * count one or more throwables + */ + def apply(t: Throwable, throwables: Throwable*): Unit = { + counter(t) + if (throwables.nonEmpty) apply(throwables) + } + + /** + * count n throwables + */ + def apply(throwables: Iterable[Throwable]): Unit = { + throwables.foreach(counter) + } + + /** + * wrap around a Future to capture exceptions + */ + def apply[T](f: => Future[T]): Future[T] = { + f onFailure { case t => apply(t) } + } +} + +/** + * A memoized exception counter factory. + * + * @param stats + * the unscoped statsReceiver on which to hang the counters + * @param categorizer + * A function that returns a list of category names that a throwable should be counted under. + */ +class MemoizedExceptionCounterFactory(stats: StatsReceiver, categorizer: ExceptionCategorizer) { + + /** + * A memoized exception counter factory using the default categorizer. + * + * @param stats + * the unscoped statsReceiver on which to hang the counters + */ + def this(stats: StatsReceiver) = + this(stats, ExceptionCategorizer.default()) + + /** + * A memoized exception counter factory using a categorizer with the given suffix. + * + * @param stats + * the unscoped statsReceiver on which to hang the counters + * @param suffix + * All created exception counters will have the + * specified suffix added. This allows compatibility with + * Servo's ExceptionCounter's name param (allows creating + * exception counters that default to the "exceptions" namespace + * as well as those with an otherwise-specified scope). + */ + def this(stats: StatsReceiver, suffix: Seq[String]) = + this(stats, ExceptionCategorizer.default(suffix)) + + private[this] val getCounter = + Memoize { (path: Seq[String]) => + new ExceptionCounter(stats, categorizer.scoped(path)) + } + + def apply(path: String*): ExceptionCounter = getCounter(path) +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FrequencyCounter.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FrequencyCounter.scala new file mode 100644 index 000000000..2fecb6414 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FrequencyCounter.scala @@ -0,0 +1,51 @@ +package com.twitter.servo.util + +import com.twitter.finagle.stats.{NullStatsReceiver, StatsReceiver} +import scala.collection.mutable + +/** + * Maintains a frequency counted circular buffer of objects. + */ +class FrequencyCounter[Q]( + size: Int, + threshold: Int, + trigger: Q => Unit, + statsReceiver: StatsReceiver = NullStatsReceiver) { + require(threshold > 1) // in order to minimize work for the common case + private[this] val buffer = new mutable.ArraySeq[Q](size) + private[this] var index = 0 + private[this] val counts = mutable.Map[Q, Int]() + + private[this] val keyCountStat = statsReceiver.scope("frequencyCounter").stat("keyCount") + + /** + * Adds a new key to the circular buffer and updates frequency counts. + * Runs trigger if this key occurs exactly `threshold` times in the buffer. + * Returns true if this key occurs at least `threshold` times in the buffer. + */ + def incr(key: Q): Boolean = { + // TOOD(aa): maybe write lock-free version + val count = synchronized { + counts(key) = counts.getOrElse(key, 0) + 1 + + Option(buffer(index)) foreach { oldKey => + val countVal = counts(oldKey) + if (countVal == 1) { + counts -= oldKey + } else { + counts(oldKey) = countVal - 1 + } + } + + buffer(index) = key + index = (index + 1) % size + counts(key) + } + keyCountStat.add(count) + if (count == threshold) { + trigger(key) + } + count >= threshold + } + +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FunctionArrow.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FunctionArrow.scala new file mode 100644 index 000000000..a9cc5be0e --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FunctionArrow.scala @@ -0,0 +1,75 @@ +package com.twitter.servo.util + +/** + * A collection of FunctionArrow factory functions. + */ +object FunctionArrow { + def apply[A, B](f: A => B): FunctionArrow[A, B] = fromFunction(f) + + /** + * Produce an FunctionArrow from a function `A => B`. + */ + def fromFunction[A, B](f: A => B): FunctionArrow[A, B] = + new FunctionArrow[A, B] { + def apply(a: A): B = f(a) + } + + /** + * Produces a FunctionArrow with no side-effects that simply returns its argument. + */ + def identity[A]: FunctionArrow[A, A] = apply(Predef.identity[A]) + + /** + * Appends two FunctionArrows together. + * + * This forms a monoid with 'identity'. + */ + def append[A, B, C](a: FunctionArrow[A, B], b: FunctionArrow[B, C]): FunctionArrow[A, C] = + a.andThen(b) + + /** + * Produce an FunctionArrow that applies an Effect, returning the argument + * value as-is. + */ + def effect[A](effect: Effect[A]): FunctionArrow[A, A] = apply { a => + effect(a); a + } + + /** + * Produces an FunctionArrow that proxies to one of two others, depending on a + * predicate. + */ + def choose[A, B]( + predicate: A => Boolean, + ifTrue: FunctionArrow[A, B], + ifFalse: FunctionArrow[A, B] + ): FunctionArrow[A, B] = + apply { a: A => + if (predicate(a)) ifTrue(a) else ifFalse(a) + } + + /** + * Produces an FunctionArrow whose application is guarded by a predicate. `f` is + * applied if the predicate returns true, otherwise the argument is simply + * returned. + */ + def onlyIf[A](predicate: A => Boolean, f: FunctionArrow[A, A]): FunctionArrow[A, A] = + choose(predicate, f, identity[A]) +} + +/** + * A function encapsulating a computation. + * + * Background on the Arrow abstraction: + * http://en.wikipedia.org/wiki/Arrow_(computer_science) + */ +trait FunctionArrow[-A, +B] extends (A => B) { self => + + /** + * Composes two FunctionArrows. Produces a new FunctionArrow that performs both in series. + */ + def andThen[C](next: FunctionArrow[B, C]): FunctionArrow[A, C] = + new FunctionArrow[A, C] { + override def apply(a: A) = next.apply(self(a)) + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureArrow.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureArrow.scala new file mode 100644 index 000000000..ea3fb8959 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureArrow.scala @@ -0,0 +1,501 @@ +package com.twitter.servo.util + +import com.twitter.finagle.service.RetryPolicy +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.tracing.Trace +import com.twitter.finagle.FailedFastException +import com.twitter.finagle.Filter +import com.twitter.finagle.Service +import com.twitter.util._ +import scala.util.control.NonFatal + +/** + * A collection of FutureArrow factory functions. + */ +object FutureArrow { + + /** + * Produce a FutureArrow from a function `A => Future[B]`. + */ + def apply[A, B](f: A => Future[B]): FutureArrow[A, B] = + new FutureArrow[A, B] { + override def apply(a: A): Future[B] = + try f(a) + catch { + case NonFatal(e) => Future.exception(e) + } + } + + /** + * Produce a FutureArrow that supports recursive calls. Recursing from a `Future` + * continuation is stack-safe, but direct recursion will use the stack, like a + * normal method invocation. + */ + def rec[A, B](f: FutureArrow[A, B] => A => Future[B]): FutureArrow[A, B] = + new FutureArrow[A, B] { self => + private val g: A => Future[B] = f(this) + override def apply(a: A): Future[B] = + try g(a) + catch { + case NonFatal(e) => Future.exception(e) + } + } + + /** + * Produce a FutureArrow from an FunctionArrow. + */ + def fromFunctionArrow[A, B](f: FunctionArrow[A, B]): FutureArrow[A, B] = + FutureArrow[A, B](a => Future(f(a))) + + /** + * Produce a FutureArrow from a function. + */ + def fromFunction[A, B](f: A => B): FutureArrow[A, B] = fromFunctionArrow(FunctionArrow(f)) + + /** + * Produce a FutureArrow from a function `A => Try[B]`. + * + * The Try is evaluated within a Future. Thus, Throw results are translated + * to `Future.exception`s. + */ + def fromTry[A, B](f: A => Try[B]): FutureArrow[A, B] = + FutureArrow[A, B](a => Future.const(f(a))) + + /** + * A FutureArrow that simply returns a Future of its argument. + */ + def identity[A]: FutureArrow[A, A] = + FutureArrow[A, A](a => Future.value(a)) + + /** + * A FutureArrow with a constant result, regardless of input. + */ + def const[A, B](value: Future[B]): FutureArrow[A, B] = + FutureArrow[A, B](_ => value) + + /** + * Appends two FutureArrows together. + * + * This forms a category with 'identity'. + */ + def append[A, B, C](a: FutureArrow[A, B], b: FutureArrow[B, C]) = a.andThen(b) + + /** + * Produce a FutureArrow that applies an FutureEffect, returning the argument + * value as-is on success. If the effect returns an Future exception, then the + * result of the filter will also be that exception. + */ + def effect[A](effect: FutureEffect[A]): FutureArrow[A, A] = + apply(a => effect(a).map(_ => a)) + + /** + * Produces a FutureArrow that proxies to one of two others, depending on a + * predicate. + */ + def choose[A, B](predicate: A => Boolean, ifTrue: FutureArrow[A, B], ifFalse: FutureArrow[A, B]) = + FutureArrow[A, B](a => if (predicate(a)) ifTrue(a) else ifFalse(a)) + + /** + * Produces a FutureArrow whose application is guarded by a predicate. `f` is + * applied if the predicate returns true, otherwise the argument is simply + * returned. + */ + def onlyIf[A](predicate: A => Boolean, f: FutureArrow[A, A]) = + choose(predicate, f, identity[A]) + + /** + * Produces a FutureArrow that forwards to multiple FutureArrows and collects + * the results into a `Seq[B]`. Results are gathered via Future.collect, so + * failure semantics are inherited from that method. + */ + def collect[A, B](arrows: Seq[FutureArrow[A, B]]): FutureArrow[A, Seq[B]] = + apply(a => Future.collect(arrows.map(arrow => arrow(a)))) + + private val RetryOnNonFailedFast: PartialFunction[Try[Any], Boolean] = { + case Throw(_: FailedFastException) => false + case Throw(_: Exception) => true + } +} + +/** + * A function encapsulating an asynchronous computation. + * + * Background on the Arrow abstraction: + * http://en.wikipedia.org/wiki/Arrow_(computer_science) + */ +trait FutureArrow[-A, +B] extends (A => Future[B]) { self => + + /** + * Composes two FutureArrows. Produces a new FutureArrow that performs both in + * series, depending on the success of the first. + */ + def andThen[C](next: FutureArrow[B, C]): FutureArrow[A, C] = + FutureArrow[A, C](a => self(a).flatMap(next.apply)) + + /** + * Combines this FutureArrow with another, producing one that translates a + * tuple of its constituents' arguments into a tuple of their results. + */ + def zipjoin[C, D](other: FutureArrow[C, D]): FutureArrow[(A, C), (B, D)] = + FutureArrow[(A, C), (B, D)] { + case (a, c) => self(a) join other(c) + } + + /** + * Converts a FutureArrow on a scalar input and output value into a FutureArrow on a + * Sequence of input values producing a pairwise sequence of output values. The elements + * of the input sequence are processed in parallel, so execution order is not guaranteed. + * Results are gathered via Future.collect, so failure semantics are inherited from that method. + */ + def liftSeq: FutureArrow[Seq[A], Seq[B]] = + FutureArrow[Seq[A], Seq[B]] { seqA => + Future.collect(seqA.map(this)) + } + + /** + * Converts this FutureArrow to a FutureEffect, where the result value is ignored. + */ + def asFutureEffect[A2 <: A]: FutureEffect[A2] = + FutureEffect(this.unit) + + /** + * Combines this FutureArrow with another, producing one that applies both + * in parallel, producing a tuple of their results. + */ + def inParallel[A2 <: A, C](other: FutureArrow[A2, C]): FutureArrow[A2, (B, C)] = { + val paired = self.zipjoin(other) + FutureArrow[A2, (B, C)](a => paired((a, a))) + } + + /** + * Wrap a FutureArrow with an ExceptionCounter, thus providing + * observability into the arrow's success and failure. + */ + def countExceptions( + exceptionCounter: ExceptionCounter + ): FutureArrow[A, B] = + FutureArrow[A, B](request => exceptionCounter(self(request))) + + /** + * Returns a chained FutureArrow in which the given function will be called for any + * input that succeeds. + */ + def onSuccess[A2 <: A](f: (A2, B) => Unit): FutureArrow[A2, B] = + FutureArrow[A2, B](a => self(a).onSuccess(b => f(a, b))) + + /** + * Returns a chained FutureArrow in which the given function will be called for any + * input that fails. + */ + def onFailure[A2 <: A](f: (A2, Throwable) => Unit): FutureArrow[A2, B] = + FutureArrow[A2, B](a => self(a).onFailure(t => f(a, t))) + + /** + * Translate exception returned by a FutureArrow according to a + * PartialFunction. + */ + def translateExceptions( + translateException: PartialFunction[Throwable, Throwable] + ): FutureArrow[A, B] = + FutureArrow[A, B] { request => + self(request).rescue { + case t if translateException.isDefinedAt(t) => Future.exception(translateException(t)) + case t => Future.exception(t) + } + } + + /** + * Apply a FutureArrow, lifting any non-Future exceptions thrown into + * `Future.exception`s. + */ + def liftExceptions: FutureArrow[A, B] = + FutureArrow[A, B] { request => + // Flattening the Future[Future[Response]] is equivalent, but more concise + // than wrapping the arrow(request) call in a try/catch block that transforms + // the exception to a Future.exception, or at least was more concise before + // I added a four-line comment. + Future(self(request)).flatten + } + + /** + * Wrap a FutureArrow in exception-tracking and -translation. Given a + * filter and a handler, exceptional results will be observed and translated + * according to the function passed in this function's second argument list. + */ + def cleanly( + exceptionCounter: ExceptionCounter + )( + translateException: PartialFunction[Throwable, Throwable] = { case t => t } + ): FutureArrow[A, B] = { + liftExceptions + .translateExceptions(translateException) + .countExceptions(exceptionCounter) + } + + /** + * Produces a FutureArrow that tracks its own application latency. + */ + @deprecated("use trackLatency(StatsReceiver, (A2 => String)", "2.11.1") + def trackLatency[A2 <: A]( + extractName: (A2 => String), + statsReceiver: StatsReceiver + ): FutureArrow[A2, B] = + trackLatency(statsReceiver, extractName) + + /** + * Produces a FutureArrow that tracks its own application latency. + */ + def trackLatency[A2 <: A]( + statsReceiver: StatsReceiver, + extractName: (A2 => String) + ): FutureArrow[A2, B] = + FutureArrow[A2, B] { request => + Stat.timeFuture(statsReceiver.stat(extractName(request), "latency_ms")) { + self(request) + } + } + + /** + * Produces a FutureArrow that tracks the outcome (i.e. success vs failure) of + * requests. + */ + @deprecated("use trackOutcome(StatsReceiver, (A2 => String)", "2.11.1") + def trackOutcome[A2 <: A]( + extractName: (A2 => String), + statsReceiver: StatsReceiver + ): FutureArrow[A2, B] = + trackOutcome(statsReceiver, extractName) + + def trackOutcome[A2 <: A]( + statsReceiver: StatsReceiver, + extractName: (A2 => String) + ): FutureArrow[A2, B] = + trackOutcome(statsReceiver, extractName, _ => None) + + /** + * Produces a FutureArrow that tracks the outcome (i.e. success vs failure) of + * requests. + */ + def trackOutcome[A2 <: A]( + statsReceiver: StatsReceiver, + extractName: (A2 => String), + exceptionCategorizer: Throwable => Option[String] + ): FutureArrow[A2, B] = + FutureArrow[A2, B] { request => + val scope = statsReceiver.scope(extractName(request)) + + self(request).respond { r => + statsReceiver.counter("requests").incr() + scope.counter("requests").incr() + + r match { + case Return(_) => + statsReceiver.counter("success").incr() + scope.counter("success").incr() + + case Throw(t) => + val category = exceptionCategorizer(t).getOrElse("failures") + statsReceiver.counter(category).incr() + scope.counter(category).incr() + scope.scope(category).counter(ThrowableHelper.sanitizeClassnameChain(t): _*).incr() + } + } + } + + /** + * Observe latency and success rate for any FutureArrow[A, B] where A is Observable + */ + def observed[A2 <: A with Observable]( + statsReceiver: StatsReceiver + ): FutureArrow[A2, B] = + observed(statsReceiver, exceptionCategorizer = _ => None) + + /** + * Observe latency and success rate for any FutureArrow[A, B] where A is Observable + */ + def observed[A2 <: A with Observable]( + statsReceiver: StatsReceiver, + exceptionCategorizer: Throwable => Option[String] + ): FutureArrow[A2, B] = + self.observed( + statsReceiver.scope("client_request"), + (a: A2) => a.requestName, + exceptionCategorizer + ) + + /** + * Observe latency and success rate for any FutureArrow + */ + def observed[A2 <: A]( + statsReceiver: StatsReceiver, + statsScope: A2 => String, + exceptionCategorizer: Throwable => Option[String] = _ => None + ): FutureArrow[A2, B] = + self + .trackLatency(statsReceiver, statsScope) + .trackOutcome(statsReceiver, statsScope, exceptionCategorizer) + + /** + * Trace the future arrow using local spans as documented here: + * https://docbird.twitter.biz/finagle/Tracing.html + */ + def traced[A2 <: A]( + traceScope: A2 => String + ): FutureArrow[A2, B] = { + FutureArrow[A2, B] { a => + Trace.traceLocalFuture(traceScope(a))(self(a)) + } + } + + /** + * Produces a new FutureArrow where the given function is applied to the input, and the result + * passed to this FutureArrow. + */ + def contramap[C](f: C => A): FutureArrow[C, B] = + FutureArrow[C, B](f.andThen(self)) + + /** + * Produces a new FutureArrow where the given function is applied to the result of this + * FutureArrow. + */ + def map[C](f: B => C): FutureArrow[A, C] = + mapResult(_.map(f)) + + /** + * Produces a new FutureArrow where the given function is applied to the resulting Future of + * this FutureArrow. + */ + def mapResult[C](f: Future[B] => Future[C]): FutureArrow[A, C] = + FutureArrow[A, C](a => f(self(a))) + + /** + * Produces a new FutureArrow which translates exceptions into futures + */ + def rescue[B2 >: B]( + rescueException: PartialFunction[Throwable, Future[B2]] + ): FutureArrow[A, B2] = { + FutureArrow[A, B2] { a => + self(a).rescue(rescueException) + } + } + + /** + * Produces a new FutureArrow where the result value is ignored, and Unit is returned. + */ + def unit: FutureArrow[A, Unit] = + mapResult(_.unit) + + /** + * Returns a copy of this FutureArrow where the returned Future has its `.masked` + * method called. + */ + def masked: FutureArrow[A, B] = + mapResult(_.masked) + + /** + * Wraps this FutureArrow by passing the underlying operation to the given retry handler + * for possible retries. + */ + def retry(handler: RetryHandler[B]): FutureArrow[A, B] = + FutureArrow[A, B](a => handler(self(a))) + + def retry[A2 <: A]( + policy: RetryPolicy[Try[B]], + timer: Timer, + statsReceiver: StatsReceiver, + extractName: (A2 => String) + ): FutureArrow[A2, B] = + FutureArrow[A2, B] { a => + val scoped = statsReceiver.scope(extractName(a)) + RetryHandler(policy, timer, scoped)(self(a)) + } + + /** + * Produces a new FutureArrow where the returned Future[B] must complete within the specified + * timeout, otherwise the Future fails with a com.twitter.util.TimeoutException. + * + * The [[timeout]] is passed by name to take advantage of deadlines passed in the request context. + * + * ''Note'': On timeout, the underlying future is NOT interrupted. + */ + def withTimeout(timer: Timer, timeout: => Duration): FutureArrow[A, B] = + mapResult(_.within(timer, timeout)) + + /** + * Produces a new FutureArrow where the returned Future must complete within the specified + * timeout, otherwise the Future fails with the specified Throwable. + * + * The [[timeout]] is passed by name to take advantage of deadlines passed in the request context. + * + * ''Note'': On timeout, the underlying future is NOT interrupted. + */ + def withTimeout(timer: Timer, timeout: => Duration, exc: => Throwable): FutureArrow[A, B] = + mapResult(_.within(timer, timeout, exc)) + + /** + * Produces a new FutureArrow where the returned Future[B] must complete within the specified + * timeout, otherwise the Future fails with a com.twitter.util.TimeoutException. + * + * The [[timeout]] is passed by name to take advantage of deadlines passed in the request context. + * + * ''Note'': On timeout, the underlying future is interrupted. + */ + def raiseWithin(timer: Timer, timeout: => Duration): FutureArrow[A, B] = + mapResult(_.raiseWithin(timeout)(timer)) + + /** + * Produces a new FutureArrow where the returned Future must complete within the specified + * timeout, otherwise the Future fails with the specified Throwable. + * + * [[timeout]] is passed by name to take advantage of deadlines passed in the request context. + * + * ''Note'': On timeout, the underlying future is interrupted. + */ + def raiseWithin(timer: Timer, timeout: => Duration, exc: => Throwable): FutureArrow[A, B] = + mapResult(_.raiseWithin(timer, timeout, exc)) + + /** + * Produces a finagle.Service instance that invokes this arrow. + */ + def asService: Service[A, B] = Service.mk(this) + + /** + * Produces a new FutureArrow with the given finagle.Filter applied to this instance. + */ + def withFilter[A2, B2](filter: Filter[A2, B2, A, B]): FutureArrow[A2, B2] = + FutureArrow[A2, B2](filter.andThen(asService)) + + /** + * Produces a new FutureArrow with the given timeout which retries on Exceptions or timeouts and + * records stats about the logical request. This is only appropriate for idempotent operations. + */ + def observedWithTimeoutAndRetry[A2 <: A]( + statsReceiver: StatsReceiver, + extractName: (A2 => String), + timer: Timer, + timeout: Duration, + numTries: Int, + shouldRetry: PartialFunction[Try[B], Boolean] = FutureArrow.RetryOnNonFailedFast + ): FutureArrow[A2, B] = { + val retryPolicy = RetryPolicy.tries(numTries, shouldRetry) + withTimeout(timer, timeout) + .retry(retryPolicy, timer, statsReceiver, extractName) + .trackLatency(statsReceiver, extractName) + .trackOutcome(statsReceiver, extractName) + } + + /** + * Produces a new FutureArrow with the given timeout and records stats about the logical request. + * This does not retry and is appropriate for non-idempotent operations. + */ + def observedWithTimeout[A2 <: A]( + statsReceiver: StatsReceiver, + extractName: (A2 => String), + timer: Timer, + timeout: Duration + ): FutureArrow[A2, B] = + withTimeout(timer, timeout) + .trackLatency(statsReceiver, extractName) + .trackOutcome(statsReceiver, extractName) +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureEffect.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureEffect.scala new file mode 100644 index 000000000..aa20bcd9f --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureEffect.scala @@ -0,0 +1,379 @@ +package com.twitter.servo.util + +import com.twitter.finagle.stats.{StatsReceiver, Stat} +import com.twitter.logging.{Logger, NullLogger} +import com.twitter.util._ + +object FutureEffect { + private[this] val _unit = FutureEffect[Any] { _ => + Future.Unit + } + + /** + * A FutureEffect that always succeeds. + */ + def unit[T]: FutureEffect[T] = + _unit.asInstanceOf[FutureEffect[T]] + + /** + * A FutureEffect that always fails with the given exception. + */ + def fail[T](ex: Throwable): FutureEffect[T] = + FutureEffect[T] { _ => + Future.exception(ex) + } + + /** + * Lift a function returning a Future to a FutureEffect. + */ + def apply[T](f: T => Future[Unit]) = + new FutureEffect[T] { + override def apply(x: T) = f(x) + } + + /** + * Performs all of the effects in order. If any effect fails, the + * whole operation fails, and the subsequent effects are not + * attempted. + */ + def sequentially[T](effects: FutureEffect[T]*): FutureEffect[T] = + effects.foldLeft[FutureEffect[T]](unit[T])(_ andThen _) + + /** + * Perform all of the effects concurrently. If any effect fails, the + * whole operation fails, but any of the effects may or may not have + * taken place. + */ + def inParallel[T](effects: FutureEffect[T]*): FutureEffect[T] = + FutureEffect[T] { t => + Future.join(effects map { _(t) }) + } + + def fromPartial[T](f: PartialFunction[T, Future[Unit]]) = + FutureEffect[T] { x => + if (f.isDefinedAt(x)) f(x) else Future.Unit + } + + /** + * Combines two FutureEffects into one that dispatches according to a gate. If the gate is + * true, use `a`, otherwise, use `b`. + */ + def selected[T](condition: Gate[Unit], a: FutureEffect[T], b: FutureEffect[T]): FutureEffect[T] = + selected(() => condition(), a, b) + + /** + * Combines two FutureEffects into one that dispatches according to a nullary boolean function. + * If the function returns true, use `a`, otherwise, use `b`. + */ + def selected[T](f: () => Boolean, a: FutureEffect[T], b: FutureEffect[T]): FutureEffect[T] = + FutureEffect[T] { t => + if (f()) a(t) else b(t) + } +} + +/** + * A function whose only result is a future effect. This wrapper + * provides convenient combinators. + */ +trait FutureEffect[T] extends (T => Future[Unit]) { self => + + /** + * Simplified version of `apply` when type is `Unit`. + */ + def apply()(implicit ev: Unit <:< T): Future[Unit] = self(()) + + /** + * Combines two Future effects, performing this one first and + * performing the next one if this one succeeds. + */ + def andThen(next: FutureEffect[T]): FutureEffect[T] = + FutureEffect[T] { x => + self(x) flatMap { _ => + next(x) + } + } + + /** + * Wraps this FutureEffect with a failure handling function that will be chained to + * the Future returned by this FutureEffect. + */ + def rescue( + handler: PartialFunction[Throwable, FutureEffect[T]] + ): FutureEffect[T] = + FutureEffect[T] { x => + self(x) rescue { + case t if handler.isDefinedAt(t) => + handler(t)(x) + } + } + + /** + * Combines two future effects, performing them both simultaneously. + * If either effect fails, the result will be failure, but the other + * effects will have occurred. + */ + def inParallel(other: FutureEffect[T]) = + FutureEffect[T] { x => + Future.join(Seq(self(x), other(x))) + } + + /** + * Perform this effect only if the provided gate returns true. + */ + def enabledBy(enabled: Gate[Unit]): FutureEffect[T] = + enabledBy(() => enabled()) + + /** + * Perform this effect only if the provided gate returns true. + */ + def enabledBy(enabled: () => Boolean): FutureEffect[T] = + onlyIf { _ => + enabled() + } + + /** + * Perform this effect only if the provided predicate returns true + * for the input. + */ + def onlyIf(predicate: T => Boolean) = + FutureEffect[T] { x => + if (predicate(x)) self(x) else Future.Unit + } + + /** + * Perform this effect with arg only if the condition is true. Otherwise just return Future Unit + */ + def when(condition: Boolean)(arg: => T): Future[Unit] = + if (condition) self(arg) else Future.Unit + + /** + * Adapt this effect to take a different input via the provided conversion. + * + * (Contravariant map) + */ + def contramap[U](g: U => T) = FutureEffect[U] { u => + self(g(u)) + } + + /** + * Adapt this effect to take a different input via the provided conversion. + * + * (Contravariant map) + */ + def contramapFuture[U](g: U => Future[T]) = FutureEffect[U] { u => + g(u) flatMap self + } + + /** + * Adapt this effect to take a different input via the provided conversion. + * If the output value of the given function is None, the effect is a no-op. + */ + def contramapOption[U](g: U => Option[T]) = + FutureEffect[U] { + g andThen { + case None => Future.Unit + case Some(t) => self(t) + } + } + + /** + * Adapt this effect to take a different input via the provided conversion. + * If the output value of the given function is future-None, the effect is a no-op. + * (Contravariant map) + */ + def contramapFutureOption[U](g: U => Future[Option[T]]) = + FutureEffect[U] { u => + g(u) flatMap { + case None => Future.Unit + case Some(x) => self(x) + } + } + + /** + * Adapt this effect to take a sequence of input values. + */ + def liftSeq: FutureEffect[Seq[T]] = + FutureEffect[Seq[T]] { seqT => + Future.join(seqT.map(self)) + } + + /** + * Allow the effect to fail, but immediately return success. The + * effect is not guaranteed to have finished when its future is + * available. + */ + def ignoreFailures: FutureEffect[T] = + FutureEffect[T] { x => + Try(self(x)); Future.Unit + } + + /** + * Allow the effect to fail but always return success. Unlike ignoreFailures, the + * effect is guaranteed to have finished when its future is available. + */ + def ignoreFailuresUponCompletion: FutureEffect[T] = + FutureEffect[T] { x => + Try(self(x)) match { + case Return(f) => f.handle { case _ => () } + case Throw(_) => Future.Unit + } + } + + /** + * Returns a chained FutureEffect in which the given function will be called for any + * input that succeeds. + */ + def onSuccess(f: T => Unit): FutureEffect[T] = + FutureEffect[T] { x => + self(x).onSuccess(_ => f(x)) + } + + /** + * Returns a chained FutureEffect in which the given function will be called for any + * input that fails. + */ + def onFailure(f: (T, Throwable) => Unit): FutureEffect[T] = + FutureEffect[T] { x => + self(x).onFailure(t => f(x, t)) + } + + /** + * Translate exception returned by a FutureEffect according to a + * PartialFunction. + */ + def translateExceptions( + translateException: PartialFunction[Throwable, Throwable] + ): FutureEffect[T] = + FutureEffect[T] { request => + self(request) rescue { + case t if translateException.isDefinedAt(t) => Future.exception(translateException(t)) + case t => Future.exception(t) + } + } + + /** + * Wraps an effect with retry logic. Will retry against any failure. + */ + def retry(backoffs: Stream[Duration], timer: Timer, stats: StatsReceiver): FutureEffect[T] = + retry(RetryHandler.failuresOnly(backoffs, timer, stats)) + + /** + * Returns a new FutureEffect that executes the effect within the given RetryHandler, which + * may retry the operation on failures. + */ + def retry(handler: RetryHandler[Unit]): FutureEffect[T] = + FutureEffect[T](handler.wrap(self)) + + @deprecated("use trackOutcome", "2.11.1") + def countExceptions(stats: StatsReceiver, getScope: T => String) = { + val exceptionCounterFactory = new MemoizedExceptionCounterFactory(stats) + FutureEffect[T] { t => + exceptionCounterFactory(getScope(t)) { self(t) } + } + } + + /** + * Produces a FutureEffect that tracks the latency of the underlying operation. + */ + def trackLatency(stats: StatsReceiver, extractName: T => String): FutureEffect[T] = + FutureEffect[T] { t => + Stat.timeFuture(stats.stat(extractName(t), "latency_ms")) { self(t) } + } + + def trackOutcome( + stats: StatsReceiver, + extractName: T => String, + logger: Logger = NullLogger + ): FutureEffect[T] = trackOutcome(stats, extractName, logger, _ => None) + + /** + * Produces a FutureEffect that tracks the outcome (i.e. success vs failure) of + * requests, including counting exceptions by classname. + */ + def trackOutcome( + stats: StatsReceiver, + extractName: T => String, + logger: Logger, + exceptionCategorizer: Throwable => Option[String] + ): FutureEffect[T] = + FutureEffect[T] { t => + val name = extractName(t) + val scope = stats.scope(name) + + self(t) respond { r => + scope.counter("requests").incr() + + r match { + case Return(_) => + scope.counter("success").incr() + + case Throw(t) => + val category = exceptionCategorizer(t).getOrElse("failures") + scope.counter(category).incr() + scope.scope(category).counter(ThrowableHelper.sanitizeClassnameChain(t): _*).incr() + logger.warning(t, s"failure in $name") + } + } + } + + /** + * Observe latency and success rate for any FutureEffect + * @param statsScope a function to produce a parent stats scope from the argument + * to the FutureEffect + * @param exceptionCategorizer a function to assign different Throwables with custom stats scopes. + */ + def observed( + statsReceiver: StatsReceiver, + statsScope: T => String, + logger: Logger = NullLogger, + exceptionCategorizer: Throwable => Option[String] = _ => None + ): FutureEffect[T] = + self + .trackLatency(statsReceiver, statsScope) + .trackOutcome(statsReceiver, statsScope, logger, exceptionCategorizer) + + /** + * Produces a new FutureEffect where the given function is applied to the result of this + * FutureEffect. + */ + def mapResult(f: Future[Unit] => Future[Unit]): FutureEffect[T] = + FutureEffect[T] { x => + f(self(x)) + } + + /** + * Produces a new FutureEffect where the returned Future must complete within the specified + * timeout, otherwise the Future fails with a com.twitter.util.TimeoutException. + * + * ''Note'': On timeout, the underlying future is NOT interrupted. + */ + def withTimeout(timer: Timer, timeout: Duration): FutureEffect[T] = + mapResult(_.within(timer, timeout)) + + /** + * Produces a new FutureEffect where the returned Future must complete within the specified + * timeout, otherwise the Future fails with the specified Throwable. + * + * ''Note'': On timeout, the underlying future is NOT interrupted. + */ + def withTimeout(timer: Timer, timeout: Duration, exc: => Throwable): FutureEffect[T] = + mapResult(_.within(timer, timeout, exc)) + + /** + * Produces a new FutureEffect where the returned Future must complete within the specified + * timeout, otherwise the Future fails with a com.twitter.util.TimeoutException. + * + * ''Note'': On timeout, the underlying future is interrupted. + */ + def raiseWithin(timer: Timer, timeout: Duration): FutureEffect[T] = + mapResult(_.raiseWithin(timeout)(timer)) + + /** + * Produces a new FutureEffect where the returned Future must complete within the specified + * timeout, otherwise the Future fails with the specified Throwable. + * + * ''Note'': On timeout, the underlying future is interrupted. + */ + def raiseWithin(timer: Timer, timeout: Duration, exc: => Throwable): FutureEffect[T] = + mapResult(_.raiseWithin(timer, timeout, exc)) +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Gate.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Gate.scala new file mode 100644 index 000000000..7b1420bff --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Gate.scala @@ -0,0 +1,210 @@ +package com.twitter.servo.util + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.util.{Duration, Time} +import java.util.concurrent.ThreadLocalRandom +import scala.language.implicitConversions + +object Gate { + + /** + * Construct a new Gate from a boolean function and a string representation + */ + def apply[T](f: T => Boolean, repr: => String): Gate[T] = + new Gate[T] { + override def apply[U](u: U)(implicit asT: <:<[U, T]): Boolean = f(asT(u)) + override def toString: String = repr + } + + /** + * Construct a new Gate from a boolean function + */ + def apply[T](f: T => Boolean): Gate[T] = Gate(f, "Gate(" + f + ")") + + /** + * Create a Gate[Any] with a probability of returning true + * that increases linearly with the availability, which should range from 0.0 to 1.0. + */ + def fromAvailability( + availability: => Double, + randomDouble: => Double = ThreadLocalRandom.current().nextDouble(), + repr: String = "Gate.fromAvailability" + ): Gate[Any] = + Gate(_ => randomDouble < math.max(math.min(availability, 1.0), 0.0), repr) + + /** + * Creates a Gate[Any] with a probability of returning true that + * increases linearly in time between startTime and (startTime + rampUpDuration). + */ + def linearRampUp( + startTime: Time, + rampUpDuration: Duration, + randomDouble: => Double = ThreadLocalRandom.current().nextDouble() + ): Gate[Any] = { + val availability = availabilityFromLinearRampUp(startTime, rampUpDuration) + + fromAvailability( + availability(Time.now), + randomDouble, + repr = "Gate.rampUp(" + startTime + ", " + rampUpDuration + ")" + ) + } + + /** + * Generates an availability function that maps a point in time to an availability value + * in the range of 0.0 - 1.0. Availability is 0 if the given time is before startTime, is + * 1 if the greather than (startTime + rampUpDuration), and is otherwise linearly + * interpolated between 0.0 and 1.0 as the time moves through the two endpoints. + */ + def availabilityFromLinearRampUp(startTime: Time, rampUpDuration: Duration): Time => Double = { + val endTime = startTime + rampUpDuration + val rampUpMillis = rampUpDuration.inMilliseconds.toDouble + now => { + if (now >= endTime) { + 1.0 + } else if (now <= startTime) { + 0.0 + } else { + (now - startTime).inMilliseconds.toDouble / rampUpMillis + } + } + } + + /** + * Returns a gate that increments true / false counters for each Gate invocation. Counter name + * can be overridden with trueName and falseName. + */ + def observed[T]( + gate: Gate[T], + stats: StatsReceiver, + trueName: String = "true", + falseName: String = "false" + ): Gate[T] = { + val trueCount = stats.counter(trueName) + val falseCount = stats.counter(falseName) + gate + .onTrue[T] { _ => + trueCount.incr() + } + .onFalse[T] { _ => + falseCount.incr() + } + } + + /** + * Construct a new Gate from a boolean value + */ + def const(v: Boolean): Gate[Any] = Gate(_ => v, v.toString) + + /** + * Constructs a new Gate that returns true if any of the gates in the input list return true. + * Always returns false when the input list is empty. + */ + def any[T](gates: Gate[T]*): Gate[T] = gates.foldLeft[Gate[T]](Gate.False)(_ | _) + + /** + * Constructs a new Gate that returns true iff all the gates in the input list return true. + * Always returns true when the input list is empty. + */ + def all[T](gates: Gate[T]*): Gate[T] = gates.foldLeft[Gate[T]](Gate.True)(_ & _) + + /** + * Gates that always return true/false + */ + val True: Gate[Any] = const(true) + val False: Gate[Any] = const(false) + + // Implicit conversions to downcast Gate to a plain function + implicit def gate2function1[T](g: Gate[T]): T => Boolean = g(_) + implicit def gate2function0(g: Gate[Unit]): () => Boolean = () => g(()) +} + +/** + * A function from T to Boolean, composable with boolean-like operators. + * Also supports building higher-order functions + * for dispatching based upon the value of this function over values of type T. + * Note: Gate does not inherit from T => Boolean in order to enforce correct type checking + * in the apply method of Gate[Unit]. (Scala is over eager to convert the return type of + * expression to Unit.) Instead, an implicit conversion allows Gate to be used in methods that + * require a function T => Boolean. + */ +trait Gate[-T] { + + /** + * A function from T => boolean with strict type bounds + */ + def apply[U](u: U)(implicit asT: <:<[U, T]): Boolean + + /** + * A nullary variant of apply that can be used when T is a Unit + */ + def apply()(implicit isUnit: <:<[Unit, T]): Boolean = apply(isUnit(())) + + /** + * Return a new Gate which applies the given function and then calls this Gate + */ + def contramap[U](f: U => T): Gate[U] = Gate(f andThen this, "%s.contramap(%s)".format(this, f)) + + /** + * Returns a new Gate of the requested type that ignores its input + */ + def on[U](implicit isUnit: <:<[Unit, T]): Gate[U] = contramap((_: U) => ()) + + /** + * Returns a new Gate which returns true when this Gate returns false + */ + def unary_! : Gate[T] = Gate(x => !this(x), "!%s".format(this)) + + /** + * Returns a new Gate which returns true when both this Gate and other Gate return true + */ + def &[U <: T](other: Gate[U]): Gate[U] = + Gate(x => this(x) && other(x), "(%s & %s)".format(this, other)) + + /** + * Returns a new Gate which returns true when either this Gate or other Gate return true + */ + def |[U <: T](other: Gate[U]): Gate[U] = + Gate(x => this(x) || other(x), "(%s | %s)".format(this, other)) + + /** + * Returns a new Gate which returns true when return values of this Gate and other Gate differ + */ + def ^[U <: T](other: Gate[U]): Gate[U] = + Gate(x => this(x) ^ other(x), "(%s ^ %s)".format(this, other)) + + /** + * Returns the first value when this Gate returns true, or the second value if it returns false. + */ + def pick[A](t: T, x: => A, y: => A): A = if (this(t)) x else y + + /** + * A varient of pick that doesn't require a value if T is a subtype of Unit + */ + def pick[A](x: => A, y: => A)(implicit isUnit: <:<[Unit, T]): A = pick(isUnit(()), x, y) + + /** + * Returns a 1-arg function that dynamically picks x or y based upon the function arg. + */ + def select[A](x: => A, y: => A): T => A = pick(_, x, y) + + /** + * Returns a version of this gate that runs the effect if the gate returns true. + */ + def onTrue[U <: T](f: U => Unit): Gate[U] = + Gate { (t: U) => + val v = this(t) + if (v) f(t) + v + } + + /** + * Returns a version of this gate that runs the effect if the gate returns false. + */ + def onFalse[U <: T](f: U => Unit): Gate[U] = + Gate { (t: U) => + val v = this(t) + if (!v) f(t) + v + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/LogarithmicallyBucketedTimer.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/LogarithmicallyBucketedTimer.scala new file mode 100644 index 000000000..262ea1bab --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/LogarithmicallyBucketedTimer.scala @@ -0,0 +1,41 @@ +package com.twitter.servo.util + +import com.twitter.finagle.stats.{StatsReceiver, Stat} +import com.twitter.util.Future + +object LogarithmicallyBucketedTimer { + val LatencyStatName = "latency_ms" +} + +/** + * helper to bucket timings by quantity. it produces base10 and baseE log buckets. + */ +class LogarithmicallyBucketedTimer( + statsReceiver: StatsReceiver, + prefix: String = LogarithmicallyBucketedTimer.LatencyStatName) { + + protected[this] def base10Key(count: Int) = + prefix + "_log_10_" + math.floor(math.log10(count)).toInt + + protected[this] def baseEKey(count: Int) = + prefix + "_log_E_" + math.floor(math.log(count)).toInt + + /** + * takes the base10 and baseE logs of the count, adds timings to the + * appropriate buckets + */ + def apply[T](count: Int = 0)(f: => Future[T]) = { + Stat.timeFuture(statsReceiver.stat(prefix)) { + // only bucketize for positive, non-zero counts + if (count > 0) { + Stat.timeFuture(statsReceiver.stat(base10Key(count))) { + Stat.timeFuture(statsReceiver.stat(baseEKey(count))) { + f + } + } + } else { + f + } + } + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/MemoizingStatsReceiver.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/MemoizingStatsReceiver.scala new file mode 100644 index 000000000..995d01906 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/MemoizingStatsReceiver.scala @@ -0,0 +1,46 @@ +package com.twitter.servo.util + +import com.twitter.finagle.stats._ + +/** + * Stores scoped StatsReceivers in a map to avoid unnecessary object creation. + */ +class MemoizingStatsReceiver(val self: StatsReceiver) + extends StatsReceiver + with DelegatingStatsReceiver + with Proxy { + def underlying: Seq[StatsReceiver] = Seq(self) + + val repr = self.repr + + private[this] lazy val scopeMemo = + Memoize[String, StatsReceiver] { name => + new MemoizingStatsReceiver(self.scope(name)) + } + + private[this] lazy val counterMemo = + Memoize[(Seq[String], Verbosity), Counter] { + case (names, verbosity) => + self.counter(verbosity, names: _*) + } + + private[this] lazy val statMemo = + Memoize[(Seq[String], Verbosity), Stat] { + case (names, verbosity) => + self.stat(verbosity, names: _*) + } + + def counter(metricBuilder: MetricBuilder): Counter = + counterMemo(metricBuilder.name -> metricBuilder.verbosity) + + def stat(metricBuilder: MetricBuilder): Stat = statMemo( + metricBuilder.name -> metricBuilder.verbosity) + + def addGauge(metricBuilder: MetricBuilder)(f: => Float): Gauge = { + // scalafix:off StoreGaugesAsMemberVariables + self.addGauge(metricBuilder)(f) + // scalafix:on StoreGaugesAsMemberVariables + } + + override def scope(name: String): StatsReceiver = scopeMemo(name) +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Observable.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Observable.scala new file mode 100644 index 000000000..443911763 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Observable.scala @@ -0,0 +1,22 @@ +package com.twitter.servo.util + +import com.twitter.finagle.thrift.ClientId + +/** + * A trait defining contextual information necessary to authorize + * and observe a request. + */ +trait Observable { + val requestName: String + val clientId: Option[ClientId] + + /** + * An Option[String] representation of the request-issuer's ClientId. + */ + lazy val clientIdString: Option[String] = + // It's possible for `ClientId.name` to be `null`, so we wrap it in + // `Option()` to force such cases to be None. + clientId flatMap { cid => + Option(cid.name) + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/OptionOrdering.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/OptionOrdering.scala new file mode 100644 index 000000000..11635316a --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/OptionOrdering.scala @@ -0,0 +1,22 @@ +package com.twitter.servo.util + +object OptionOrdering { + + /** + * Creates an Ordering of Option objects. Nones are ordered before Somes, and two Somes + * are ordered according to the given value ordering. + */ + def apply[A](valueOrdering: Ordering[A]) = new Ordering[Option[A]] { + // Nones before Somes, for two Somes, use valueOrdering + def compare(x: Option[A], y: Option[A]): Int = { + x match { + case None => if (y.nonEmpty) -1 else 0 + case Some(xValue) => + y match { + case None => 1 + case Some(yValue) => valueOrdering.compare(xValue, yValue) + } + } + } + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RandomPerturber.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RandomPerturber.scala new file mode 100644 index 000000000..569538554 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RandomPerturber.scala @@ -0,0 +1,16 @@ +package com.twitter.servo.util + +import com.twitter.util.Duration +import scala.util.Random + +/** + * A class for generating bounded random fluctuations around a given Duration. + */ +class RandomPerturber(percentage: Float, rnd: Random = new Random) extends (Duration => Duration) { + assert(percentage > 0 && percentage < 1, "percentage must be > 0 and < 1") + + override def apply(dur: Duration): Duration = { + val ns = dur.inNanoseconds + Duration.fromNanoseconds((ns + ((2 * rnd.nextFloat - 1) * percentage * ns)).toLong) + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RateLimitingLogger.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RateLimitingLogger.scala new file mode 100644 index 000000000..749addcc7 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RateLimitingLogger.scala @@ -0,0 +1,71 @@ +package com.twitter.servo.util + +import com.twitter.logging.{Level, Logger} +import com.twitter.util.{Duration, Time} +import com.twitter.conversions.DurationOps._ +import java.util.concurrent.atomic.AtomicLong + +object RateLimitingLogger { + private[util] val DefaultLoggerName = "servo" + private[util] val DefaultLogInterval = 500.milliseconds +} + +/** + * Class that makes it easier to rate-limit log messages, either by call site, or by + * logical grouping of messages. + * @param interval the interval in which messages should be rate limited + * @param logger the logger to use + */ +class RateLimitingLogger( + interval: Duration = RateLimitingLogger.DefaultLogInterval, + logger: Logger = Logger(RateLimitingLogger.DefaultLoggerName)) { + private[this] val last: AtomicLong = new AtomicLong(0L) + private[this] val sinceLast: AtomicLong = new AtomicLong(0L) + + private[this] val intervalNanos = interval.inNanoseconds + private[this] val intervalMsString = interval.inMilliseconds.toString + + private[this] def limited(action: Long => Unit): Unit = { + val now = Time.now.inNanoseconds + val lastNanos = last.get() + if (now - lastNanos > intervalNanos) { + if (last.compareAndSet(lastNanos, now)) { + val currentSinceLast = sinceLast.getAndSet(0L) + action(currentSinceLast) + } + } else { + sinceLast.incrementAndGet() + } + } + + def log(msg: => String, level: Level = Level.ERROR): Unit = { + limited { currentSinceLast: Long => + logger( + level, + "%s (group is logged at most once every %s ms%s)".format( + msg, + intervalMsString, + if (currentSinceLast > 0) { + s", ${currentSinceLast} occurrences since last" + } else "" + ) + ) + } + } + + def logThrowable(t: Throwable, msg: => String, level: Level = Level.ERROR): Unit = { + limited { currentSinceLast: Long => + logger( + level, + t, + "%s (group is logged at most once every %s ms%s)".format( + msg, + intervalMsString, + if (currentSinceLast > 0) { + s", ${currentSinceLast} occurrences since last" + } else "" + ) + ) + } + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Retry.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Retry.scala new file mode 100644 index 000000000..164dc2561 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Retry.scala @@ -0,0 +1,100 @@ +package com.twitter.servo.util + +import com.twitter.finagle.{Backoff, Service, TimeoutException, WriteException} +import com.twitter.finagle.service.{RetryExceptionsFilter, RetryPolicy} +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.util.DefaultTimer +import com.twitter.util.{Duration, Future, Throw, Timer, Try} + +/** + * Allows an action to be retried according to a backoff strategy. + * This is an adaption of the Finagle RetryExceptionsFilter, but with an + * arbitrary asynchronous computation. + */ +class Retry( + statsReceiver: StatsReceiver, + backoffs: Backoff, + private[this] val timer: Timer = DefaultTimer) { + + /** + * retry on specific exceptions + */ + def apply[T]( + f: () => Future[T] + )( + shouldRetry: PartialFunction[Throwable, Boolean] + ): Future[T] = { + val policy = RetryPolicy.backoff[Try[Nothing]](backoffs) { + case Throw(t) if shouldRetry.isDefinedAt(t) => shouldRetry(t) + } + + val service = new Service[Unit, T] { + override def apply(u: Unit): Future[T] = f() + } + + val retrying = new RetryExceptionsFilter(policy, timer, statsReceiver) andThen service + + retrying() + } + + @deprecated("release() has no function and will be removed", "2.8.2") + def release(): Unit = {} +} + +/** + * Use to configure separate backoffs for WriteExceptions, TimeoutExceptions, + * and service-specific exceptions + */ +class ServiceRetryPolicy( + writeExceptionBackoffs: Backoff, + timeoutBackoffs: Backoff, + serviceBackoffs: Backoff, + shouldRetryService: PartialFunction[Throwable, Boolean]) + extends RetryPolicy[Try[Nothing]] { + override def apply(r: Try[Nothing]) = r match { + case Throw(t) if shouldRetryService.isDefinedAt(t) => + if (shouldRetryService(t)) + onServiceException + else + None + case Throw(_: WriteException) => onWriteException + case Throw(_: TimeoutException) => onTimeoutException + case _ => None + } + + def copy( + writeExceptionBackoffs: Backoff = writeExceptionBackoffs, + timeoutBackoffs: Backoff = timeoutBackoffs, + serviceBackoffs: Backoff = serviceBackoffs, + shouldRetryService: PartialFunction[Throwable, Boolean] = shouldRetryService + ) = + new ServiceRetryPolicy( + writeExceptionBackoffs, + timeoutBackoffs, + serviceBackoffs, + shouldRetryService + ) + + private[this] def onWriteException = consume(writeExceptionBackoffs) { tail => + copy(writeExceptionBackoffs = tail) + } + + private[this] def onTimeoutException = consume(timeoutBackoffs) { tail => + copy(timeoutBackoffs = tail) + } + + private[this] def onServiceException = consume(serviceBackoffs) { tail => + copy(serviceBackoffs = tail) + } + + private[this] def consume(b: Backoff)(f: Backoff => ServiceRetryPolicy) = { + if (b.isExhausted) None + else Some((b.duration, f(b.next))) + } + + override val toString = "ServiceRetryPolicy(%s, %s, %s)".format( + writeExceptionBackoffs, + timeoutBackoffs, + serviceBackoffs + ) +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RetryHandler.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RetryHandler.scala new file mode 100644 index 000000000..f1e02c641 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RetryHandler.scala @@ -0,0 +1,169 @@ +package com.twitter.servo.util + +import com.twitter.finagle.Backoff +import com.twitter.finagle.service.{RetryBudget, RetryPolicy} +import com.twitter.finagle.stats.{Counter, StatsReceiver} +import com.twitter.util._ +import java.util.concurrent.CancellationException +import scala.util.control.NonFatal + +/** + * A RetryHandler can wrap an arbitrary Future-producing operation with retry logic, where the + * operation may conditionally be retried multiple times. + */ +trait RetryHandler[-A] { + + /** + * Executes the given operation and performs any applicable retries. + */ + def apply[A2 <: A](f: => Future[A2]): Future[A2] + + /** + * Wraps an arbitrary function with this RetryHandler's retrying logic. + */ + def wrap[A2 <: A, B](f: B => Future[A2]): B => Future[A2] = + b => this(f(b)) +} + +object RetryHandler { + + /** + * Builds a RetryHandler that retries according to the given RetryPolicy. Retries, if any, + * will be scheduled on the given Timer to be executed after the appropriate backoff, if any. + * Retries will be limited according the given `RetryBudget`. + */ + def apply[A]( + policy: RetryPolicy[Try[A]], + timer: Timer, + statsReceiver: StatsReceiver, + budget: RetryBudget = RetryBudget() + ): RetryHandler[A] = { + val firstTryCounter = statsReceiver.counter("first_try") + val retriesCounter = statsReceiver.counter("retries") + val budgetExhausedCounter = statsReceiver.counter("budget_exhausted") + + new RetryHandler[A] { + def apply[A2 <: A](f: => Future[A2]): Future[A2] = { + firstTryCounter.incr() + budget.deposit() + retry[A2](policy, timer, retriesCounter, budgetExhausedCounter, budget)(f) + } + } + } + + /** + * Builds a RetryHandler that will only retry on failures that are handled by the given policy, + * and does not consider any successful future for retries. + */ + def failuresOnly[A]( + policy: RetryPolicy[Try[Nothing]], + timer: Timer, + statsReceiver: StatsReceiver, + budget: RetryBudget = RetryBudget() + ): RetryHandler[A] = + apply(failureOnlyRetryPolicy(policy), timer, statsReceiver, budget) + + /** + * Builds a RetryHandler that will retry any failure according to the given backoff schedule, + * until either either the operation succeeds or all backoffs are exhausted. + */ + def failuresOnly[A]( + backoffs: Stream[Duration], + timer: Timer, + stats: StatsReceiver, + budget: RetryBudget + ): RetryHandler[A] = + failuresOnly( + RetryPolicy.backoff[Try[Nothing]](Backoff.fromStream(backoffs)) { case Throw(_) => true }, + timer, + stats, + budget + ) + + /** + * Builds a RetryHandler that will retry any failure according to the given backoff schedule, + * until either either the operation succeeds or all backoffs are exhausted. + */ + def failuresOnly[A]( + backoffs: Stream[Duration], + timer: Timer, + stats: StatsReceiver + ): RetryHandler[A] = + failuresOnly(backoffs, timer, stats, RetryBudget()) + + /** + * Converts a RetryPolicy that only handles failures (Throw) to a RetryPolicy that also + * handles successes (Return), by flagging that successes need not be retried. + */ + def failureOnlyRetryPolicy[A](policy: RetryPolicy[Try[Nothing]]): RetryPolicy[Try[A]] = + RetryPolicy[Try[A]] { + case Return(_) => None + case Throw(ex) => + policy(Throw(ex)) map { + case (backoff, p2) => (backoff, failureOnlyRetryPolicy(p2)) + } + } + + private[this] def retry[A]( + policy: RetryPolicy[Try[A]], + timer: Timer, + retriesCounter: Counter, + budgetExhausedCounter: Counter, + budget: RetryBudget + )( + f: => Future[A] + ): Future[A] = { + forceFuture(f).transform { transformed => + policy(transformed) match { + case Some((backoff, nextPolicy)) => + if (budget.tryWithdraw()) { + retriesCounter.incr() + schedule(backoff, timer) { + retry(nextPolicy, timer, retriesCounter, budgetExhausedCounter, budget)(f) + } + } else { + budgetExhausedCounter.incr() + Future.const(transformed) + } + case None => + Future.const(transformed) + } + } + } + + // similar to finagle's RetryExceptionsFilter + private[this] def schedule[A](d: Duration, timer: Timer)(f: => Future[A]) = { + if (d.inNanoseconds > 0) { + val promise = new Promise[A] + val task = timer.schedule(Time.now + d) { + if (!promise.isDefined) { + try { + promise.become(f) + } catch { + case NonFatal(cause) => + // Ignore any exceptions thrown by Promise#become(). This usually means that the promise + // was already defined and cannot be transformed. + } + } + } + promise.setInterruptHandler { + case cause => + task.cancel() + val cancellation = new CancellationException + cancellation.initCause(cause) + promise.updateIfEmpty(Throw(cancellation)) + } + promise + } else forceFuture(f) + } + + // (Future { f } flatten), but without the allocation + private[this] def forceFuture[A](f: => Future[A]) = { + try { + f + } catch { + case NonFatal(cause) => + Future.exception(cause) + } + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RpcRetry.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RpcRetry.scala new file mode 100644 index 000000000..36b790760 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RpcRetry.scala @@ -0,0 +1,90 @@ +package com.twitter.servo.util + +import com.twitter.util.Future + +object RpcRetry { + + /** + * Provides a generic implementation of a retry logic to only a subset + * of requests according to a given predicate and returning the result + * in the original order after the retry. + * @param rpcs Methods that can transform a Seq[Request] to + * Future[Map[Request, Response]], they will be invoked in order + * while there are remaining rpcs to invoke AND some responses + * still return false to the predicate. + * @param isSuccess if true, keep the response, else retry. + * @tparam Req a request object + * @tparam Resp a response object + * @return an rpc function (Seq[Req] => Future[Map[Req, Resp]]) that performs + * the retries internally. + */ + def retryableRpc[Req, Resp]( + rpcs: Seq[Seq[Req] => Future[Map[Req, Resp]]], + isSuccess: Resp => Boolean + ): Seq[Req] => Future[Map[Req, Resp]] = { + requestRetryAndMerge[Req, Resp](_, isSuccess, rpcs.toStream) + } + + /** + * Provides a generic implementation of a retry logic to only a subset + * of requests according to a given predicate and returning the result + * in the original order after the retry. + * @param rpcs Methods that can transform a Seq[Request] to + * Future[Seq[Response]], they will be invoked in order + * while there are remaining rpcs to invoke AND some responses + * still return false to the predicate. + * Note that all Request objects must adhere to hashCode/equals standards + * @param isSuccess if true, keep the response, else retry. + * @tparam Req a request object. Must adhere to hashCode/equals standards + * @tparam Resp a response object + * @return an rpc function (Seq[Req] => Future[Seq[Resp]]) that performs + * the retries internally. + */ + def retryableRpcSeq[Req, Resp]( + rpcs: Seq[Seq[Req] => Future[Seq[Resp]]], + isSuccess: Resp => Boolean + ): Seq[Req] => Future[Seq[Resp]] = { + requestRetryAndMergeSeq[Req, Resp](_, isSuccess, rpcs) + } + + private[this] def requestRetryAndMergeSeq[Req, Resp]( + requests: Seq[Req], + isSuccess: Resp => Boolean, + rpcs: Seq[Seq[Req] => Future[Seq[Resp]]] + ): Future[Seq[Resp]] = { + requestRetryAndMerge(requests, isSuccess, (rpcs map { rpcToMapResponse(_) }).toStream) map { + responseMap => + requests map { responseMap(_) } + } + } + + private[this] def requestRetryAndMerge[Req, Resp]( + requests: Seq[Req], + isSuccess: Resp => Boolean, + rpcs: Stream[Seq[Req] => Future[Map[Req, Resp]]] + ): Future[Map[Req, Resp]] = { + if (rpcs.isEmpty) { + Future.exception(new IllegalArgumentException("rpcs is empty.")) + } else { + val rpc = rpcs.head + rpc(requests) flatMap { responses => + val (keep, recurse) = responses partition { + case (_, rep) => isSuccess(rep) + } + if (rpcs.tail.nonEmpty && recurse.nonEmpty) { + requestRetryAndMerge(recurse.keys.toSeq, isSuccess, rpcs.tail) map { keep ++ _ } + } else { + Future.value(responses) + } + } + } + } + + private[this] def rpcToMapResponse[Req, Resp]( + rpc: Seq[Req] => Future[Seq[Resp]] + ): Seq[Req] => Future[Map[Req, Resp]] = { (reqs: Seq[Req]) => + rpc(reqs) map { reps => + (reqs zip reps).toMap + } + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Scribe.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Scribe.scala new file mode 100644 index 000000000..1d20842df --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Scribe.scala @@ -0,0 +1,80 @@ +package com.twitter.servo.util + +import com.twitter.finagle.stats.{NullStatsReceiver, StatsReceiver} +import com.twitter.logging._ +import com.twitter.scrooge.{BinaryThriftStructSerializer, ThriftStruct, ThriftStructCodec} +import com.twitter.util.Future + +object Scribe { + + /** + * Returns a new FutureEffect for scribing text to the specified category. + */ + def apply( + category: String, + statsReceiver: StatsReceiver = NullStatsReceiver + ): FutureEffect[String] = + Scribe(loggingHandler(category = category, statsReceiver = statsReceiver)) + + /** + * Returns a new FutureEffect for scribing text to the specified logging handler. + */ + def apply(handler: Handler): FutureEffect[String] = + FutureEffect[String] { msg => + handler.publish(new LogRecord(handler.getLevel, msg)) + Future.Unit + } + + /** + * Returns a new FutureEffect for scribing thrift objects to the specified category. + * The thrift object will be serialized to binary then converted to Base64. + */ + def apply[T <: ThriftStruct]( + codec: ThriftStructCodec[T], + category: String + ): FutureEffect[T] = + Scribe(codec, Scribe(category = category)) + + /** + * Returns a new FutureEffect for scribing thrift objects to the specified category. + * The thrift object will be serialized to binary then converted to Base64. + */ + def apply[T <: ThriftStruct]( + codec: ThriftStructCodec[T], + category: String, + statsReceiver: StatsReceiver + ): FutureEffect[T] = + Scribe(codec, Scribe(category = category, statsReceiver = statsReceiver)) + + /** + * Returns a new FutureEffect for scribing thrift objects to the underlying scribe effect. + * The thrift object will be serialized to binary then converted to Base64. + */ + def apply[T <: ThriftStruct]( + codec: ThriftStructCodec[T], + underlying: FutureEffect[String] + ): FutureEffect[T] = + underlying contramap serialize(codec) + + /** + * Builds a logging Handler that scribes log messages, wrapped with a QueueingHandler. + */ + def loggingHandler( + category: String, + formatter: Formatter = BareFormatter, + maxQueueSize: Int = 5000, + statsReceiver: StatsReceiver = NullStatsReceiver + ): Handler = + new QueueingHandler( + ScribeHandler(category = category, formatter = formatter, statsReceiver = statsReceiver)(), + maxQueueSize = maxQueueSize + ) + + /** + * Returns a function that serializes thrift structs to Base64. + */ + def serialize[T <: ThriftStruct](c: ThriftStructCodec[T]): T => String = { + val serializer = BinaryThriftStructSerializer(c) + t => serializer.toString(t) + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SuccessRateTracker.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SuccessRateTracker.scala new file mode 100644 index 000000000..4e84fb801 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SuccessRateTracker.scala @@ -0,0 +1,179 @@ +package com.twitter.servo.util + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.util.{Duration, Local} + +/** + * A strategy for tracking success rate, usually over a window + */ +trait SuccessRateTracker { self => + def record(successes: Int, failures: Int): Unit + def successRate: Double + + /** + * A [[Gate]] whose availability is computed from the success rate (SR) reported by the tracker. + * + * @param availabilityFromSuccessRate function to calculate availability of gate given SR + */ + def availabilityGate(availabilityFromSuccessRate: Double => Double): Gate[Unit] = + Gate.fromAvailability(availabilityFromSuccessRate(successRate)) + + /** + * A [[Gate]] whose availability is computed from the success rate reported by the tracker + * with stats attached. + */ + def observedAvailabilityGate( + availabilityFromSuccessRate: Double => Double, + stats: StatsReceiver + ): Gate[Unit] = + new Gate[Unit] { + val underlying = availabilityGate(availabilityFromSuccessRate) + val availabilityGauge = + stats.addGauge("availability") { availabilityFromSuccessRate(successRate).toFloat } + override def apply[U](u: U)(implicit asT: <:<[U, Unit]): Boolean = underlying.apply(u) + } + + /** + * Tracks number of successes and failures as counters, and success_rate as a gauge + */ + def observed(stats: StatsReceiver) = { + val successCounter = stats.counter("successes") + val failureCounter = stats.counter("failures") + new SuccessRateTracker { + private[this] val successRateGauge = stats.addGauge("success_rate")(successRate.toFloat) + override def record(successes: Int, failures: Int) = { + self.record(successes, failures) + successCounter.incr(successes) + failureCounter.incr(failures) + } + override def successRate = self.successRate + } + } +} + +object SuccessRateTracker { + + /** + * Track success rate (SR) using [[RecentAverage]] + * + * Defaults success rate to 100% which prevents early failures (or periods of 0 data points, + * e.g. tracking backend SR during failover) from producing dramatic drops in success rate. + * + * @param window Window size as duration + */ + def recentWindowed(window: Duration) = + new AverageSuccessRateTracker(new RecentAverage(window, defaultAverage = 1.0)) + + /** + * Track success rate using [[WindowedAverage]] + * + * Initializes the windowedAverage to one window's worth of successes. This prevents + * the problem where early failures produce dramatic drops in the success rate. + * + * @param windowSize Window size in number of data points + */ + def rollingWindow(windowSize: Int) = + new AverageSuccessRateTracker(new WindowedAverage(windowSize, initialValue = Some(1.0))) +} + +/** + * Tracks success rate using an [[Average]] + * + * @param average Strategy for recording an average, usually over a window + */ +class AverageSuccessRateTracker(average: Average) extends SuccessRateTracker { + def record(successes: Int, failures: Int): Unit = + average.record(successes, successes + failures) + + def successRate: Double = average.value.getOrElse(1) +} + +/** + * EwmaSuccessRateTracker computes a failure rate with exponential decay over a time bound. + * + * @param halfLife determines the rate of decay. Assuming a hypothetical service that is initially + * 100% successful and then instantly switches to 50% successful, it will take `halfLife` time + * for this tracker to report a success rate of ~75%. + */ +class EwmaSuccessRateTracker(halfLife: Duration) extends SuccessRateTracker { + // math.exp(-x) = 0.50 when x == ln(2) + // math.exp(-x / Tau) == math.exp(-x / halfLife * ln(2)) therefore when x/halfLife == 1, the + // decay output is 0.5 + private[this] val Tau: Double = halfLife.inNanoseconds.toDouble / math.log(2.0) + + private[this] var stamp: Long = EwmaSuccessRateTracker.nanoTime() + private[this] var decayingFailureRate: Double = 0.0 + + def record(successes: Int, failures: Int): Unit = { + if (successes < 0 || failures < 0) return + + val total = successes + failures + if (total == 0) return + + val observation = (failures.toDouble / total) max 0.0 min 1.0 + + synchronized { + val time = EwmaSuccessRateTracker.nanoTime() + val delta = ((time - stamp) max 0L).toDouble + val weight = math.exp(-delta / Tau) + decayingFailureRate = (decayingFailureRate * weight) + (observation * (1.0 - weight)) + stamp = time + } + } + + /** + * The current success rate computed as the inverse of the failure rate. + */ + def successRate: Double = 1.0 - failureRate + + def failureRate = synchronized { decayingFailureRate } +} + +private[servo] trait NanoTimeControl { + def set(nanoTime: Long): Unit + def advance(delta: Long): Unit + def advance(delta: Duration): Unit = advance(delta.inNanoseconds) +} + +object EwmaSuccessRateTracker { + private[EwmaSuccessRateTracker] val localNanoTime = new Local[() => Long] + + private[EwmaSuccessRateTracker] def nanoTime(): Long = { + localNanoTime() match { + case None => System.nanoTime() + case Some(f) => f() + } + } + + /** + * Execute body with the time function replaced by `timeFunction` + * WARNING: This is only meant for testing purposes. + */ + private[this] def withNanoTimeFunction[A]( + timeFunction: => Long + )( + body: NanoTimeControl => A + ): A = { + @volatile var tf = () => timeFunction + + localNanoTime.let(() => tf()) { + val timeControl = new NanoTimeControl { + def set(nanoTime: Long): Unit = { + tf = () => nanoTime + } + def advance(delta: Long): Unit = { + val newNanoTime = tf() + delta + tf = () => newNanoTime + } + } + + body(timeControl) + } + } + + private[this] def withNanoTimeAt[A](nanoTime: Long)(body: NanoTimeControl => A): A = + withNanoTimeFunction(nanoTime)(body) + + private[servo] def withCurrentNanoTimeFrozen[A](body: NanoTimeControl => A): A = + withNanoTimeAt(System.nanoTime())(body) +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SynchronizedHashMap.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SynchronizedHashMap.scala new file mode 100644 index 000000000..a57d30533 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SynchronizedHashMap.scala @@ -0,0 +1,5 @@ +package com.twitter.servo.util + +import scala.collection.mutable + +class SynchronizedHashMap[K, V] extends mutable.HashMap[K, V] with mutable.SynchronizedMap[K, V] diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThreadLocalStringBuilder.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThreadLocalStringBuilder.scala new file mode 100644 index 000000000..3edd1cf31 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThreadLocalStringBuilder.scala @@ -0,0 +1,11 @@ +package com.twitter.servo.util + +class ThreadLocalStringBuilder(initialSize: Int) extends ThreadLocal[StringBuilder] { + override def initialValue = new StringBuilder(initialSize) + + def apply() = { + val buf = get + buf.setLength(0) + buf + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThrowableHelper.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThrowableHelper.scala new file mode 100644 index 000000000..5feeaa7e7 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThrowableHelper.scala @@ -0,0 +1,41 @@ +package com.twitter.servo.util + +import com.twitter.util.Throwables + +/** + * An object with some helper methods for dealing with exceptions + * (currently just classname cleanup) + */ +object ThrowableHelper { + + /** + * Returns a sanitized sequence of classname for the given Throwable + * including root causes. + */ + def sanitizeClassnameChain(t: Throwable): Seq[String] = + Throwables.mkString(t).map(classnameTransform(_)) + + /** + * Returns a sanitized classname for the given Throwable. + */ + def sanitizeClassname(t: Throwable): String = + classnameTransform(t.getClass.getName) + + /** + * A function that applies a bunch of cleanup transformations to exception classnames + * (currently just 1, but there will likely be more!). + */ + private val classnameTransform: String => String = + Memoize { stripSuffix("$Immutable").andThen(stripSuffix("$")) } + + /** + * Generates a function that strips off the specified suffix from strings, if found. + */ + private def stripSuffix(suffix: String): String => String = + s => { + if (s.endsWith(suffix)) + s.substring(0, s.length - suffix.length) + else + s + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Transformer.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Transformer.scala new file mode 100644 index 000000000..d5cb14479 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Transformer.scala @@ -0,0 +1,227 @@ +package com.twitter.servo.util + +import com.google.common.base.Charsets +import com.google.common.primitives.{Ints, Longs} +import com.twitter.scrooge.{BinaryThriftStructSerializer, ThriftStructCodec, ThriftStruct} +import com.twitter.util.{Future, Return, Try, Throw} +import java.nio.{ByteBuffer, CharBuffer} +import java.nio.charset.{Charset, CharsetEncoder, CharsetDecoder} + +/** + * Transformer is a (possibly partial) bidirectional conversion + * between values of two types. It is particularly useful for + * serializing values for storage and reading them back out (see + * com.twitter.servo.cache.Serializer). + * + * In some implementations, the conversion may lose data (for example + * when used for storage in a cache). In general, any data that passes + * through a conversion should be preserved if the data is converted + * back. There is code to make it easy to check that your Transformer + * instance has this property in + * com.twitter.servo.util.TransformerLawSpec. + * + * Transformers should take care not to mutate their inputs when + * converting in either direction, in order to ensure that concurrent + * transformations of the same input yield the same result. + * + * Transformer forms a category with `andThen` and `identity`. + */ +trait Transformer[A, B] { self => + def to(a: A): Try[B] + + def from(b: B): Try[A] + + @deprecated("Use Future.const(transformer.to(x))", "2.0.1") + def asyncTo(a: A): Future[B] = Future.const(to(a)) + + @deprecated("Use Future.const(transformer.from(x))", "2.0.1") + def asyncFrom(b: B): Future[A] = Future.const(from(b)) + + /** + * Compose this transformer with another. As long as both + * transformers follow the stated laws, the composed transformer + * will follow them. + */ + def andThen[C](t: Transformer[B, C]): Transformer[A, C] = + new Transformer[A, C] { + override def to(a: A) = self.to(a) andThen t.to + override def from(c: C) = t.from(c) andThen self.from + } + + /** + * Reverse the direction of this transformer. + * + * Law: t.flip.flip == t + */ + lazy val flip: Transformer[B, A] = + new Transformer[B, A] { + override lazy val flip = self + override def to(b: B) = self.from(b) + override def from(a: A) = self.to(a) + } +} + +object Transformer { + + /** + * Create a new Transformer from the supplied functions, catching + * exceptions and converting them to failures. + */ + def apply[A, B](tTo: A => B, tFrom: B => A): Transformer[A, B] = + new Transformer[A, B] { + override def to(a: A): Try[B] = Try { tTo(a) } + override def from(b: B): Try[A] = Try { tFrom(b) } + } + + def identity[A]: Transformer[A, A] = pure[A, A](a => a, a => a) + + /** + * Lift a pair of (total) conversion functions to a Transformer. The + * caller is responsible for ensuring that the resulting transformer + * follows the laws for Transformers. + */ + def pure[A, B](pureTo: A => B, pureFrom: B => A): Transformer[A, B] = + new Transformer[A, B] { + override def to(a: A): Try[B] = Return(pureTo(a)) + override def from(b: B): Try[A] = Return(pureFrom(b)) + } + + /** + * Lift a transformer to a transformer on optional values. + * + * None bypasses the underlying conversion (as it must, since there + * is no value to transform). + */ + def optional[A, B](underlying: Transformer[A, B]): Transformer[Option[A], Option[B]] = + new Transformer[Option[A], Option[B]] { + override def to(optA: Option[A]) = optA match { + case None => Return.None + case Some(a) => underlying.to(a) map { Some(_) } + } + + override def from(optB: Option[B]) = optB match { + case None => Return.None + case Some(b) => underlying.from(b) map { Some(_) } + } + } + + ////////////////////////////////////////////////// + // Transformers for accessing/generating fields of a Map. + // + // These transformers are useful for serializing/deserializing to + // storage that stores Maps, for example Hamsa. + + /** + * Thrown by `requiredField` when the field is not present. + */ + case class MissingRequiredField[K](k: K) extends RuntimeException + + /** + * Get a value from the map, yielding MissingRequiredField when the + * value is not present in the map. + * + * The inverse transform yields a Map containing only the one value. + */ + def requiredField[K, V](k: K): Transformer[Map[K, V], V] = + new Transformer[Map[K, V], V] { + override def to(m: Map[K, V]) = + m get k match { + case Some(v) => Return(v) + case None => Throw(MissingRequiredField(k)) + } + + override def from(v: V) = Return(Map(k -> v)) + } + + /** + * Attempt to get a field from a Map, yielding None if the value is + * not present. + * + * The inverse transform will put the value in a Map if it is Some, + * and omit it if it is None. + */ + def optionalField[K, V](k: K): Transformer[Map[K, V], Option[V]] = + pure[Map[K, V], Option[V]](_.get(k), _.map { k -> _ }.toMap) + + /** + * Transforms an Option[T] to a T, using a default value for None. + * + * Note that the default value will be converted back to None by + * .from (.from will never return Some(default)). + */ + def default[T](value: T): Transformer[Option[T], T] = + pure[Option[T], T](_ getOrElse value, t => if (t == value) None else Some(t)) + + /** + * Transforms `Long`s to big-endian byte arrays. + */ + lazy val LongToBigEndian: Transformer[Long, Array[Byte]] = + new Transformer[Long, Array[Byte]] { + def to(a: Long) = Try(Longs.toByteArray(a)) + def from(b: Array[Byte]) = Try(Longs.fromByteArray(b)) + } + + /** + * Transforms `Int`s to big-endian byte arrays. + */ + lazy val IntToBigEndian: Transformer[Int, Array[Byte]] = + new Transformer[Int, Array[Byte]] { + def to(a: Int) = Try(Ints.toByteArray(a)) + def from(b: Array[Byte]) = Try(Ints.fromByteArray(b)) + } + + /** + * Transforms UTF8-encoded strings to byte arrays. + */ + lazy val Utf8ToBytes: Transformer[String, Array[Byte]] = + stringToBytes(Charsets.UTF_8) + + /** + * Transforms strings, encoded in a given character set, to byte arrays. + */ + private[util] def stringToBytes(charset: Charset): Transformer[String, Array[Byte]] = + new Transformer[String, Array[Byte]] { + private[this] val charsetEncoder = new ThreadLocal[CharsetEncoder]() { + protected override def initialValue() = charset.newEncoder + } + + private[this] val charsetDecoder = new ThreadLocal[CharsetDecoder]() { + protected override def initialValue() = charset.newDecoder + } + + override def to(str: String): Try[Array[Byte]] = Try { + // We can't just use `String.getBytes("UTF-8")` here because it will + // silently replace UTF-16 surrogate characters, which will cause + // CharsetEncoder to throw exceptions. + val bytes = charsetEncoder.get.encode(CharBuffer.wrap(str)) + bytes.array.slice(bytes.position, bytes.limit) + } + + override def from(bytes: Array[Byte]): Try[String] = Try { + charsetDecoder.get.decode(ByteBuffer.wrap(bytes)).toString + } + } + + /** + * Transforms a ThriftStruct to a byte-array using Thrift's TBinaryProtocol. + */ + def thriftStructToBytes[T <: ThriftStruct](c: ThriftStructCodec[T]): Transformer[T, Array[Byte]] = + new Transformer[T, Array[Byte]] { + private[this] val ser = BinaryThriftStructSerializer(c) + def to(a: T) = Try(ser.toBytes(a)) + def from(b: Array[Byte]) = Try(ser.fromBytes(b)) + } +} + +/** + * transforms an Option[T] to a T, using a default value for None + */ +@deprecated("Use Transformer.default", "2.0.1") +class OptionToTypeTransformer[T](default: T) extends Transformer[Option[T], T] { + override def to(b: Option[T]): Try[T] = Return(b.getOrElse(default)) + + override def from(a: T): Try[Option[T]] = a match { + case `default` => Return.None + case _ => Return(Some(a)) + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/TryOrdering.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/TryOrdering.scala new file mode 100644 index 000000000..d770be704 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/TryOrdering.scala @@ -0,0 +1,23 @@ +package com.twitter.servo.util + +import com.twitter.util.{Return, Throw, Try} + +object TryOrdering { + + /** + * Creates an Ordering of Try objects. Throws are ordered before Returns, and two Returns + * are ordered according to the given value ordering. + */ + def apply[A](valueOrdering: Ordering[A]) = new Ordering[Try[A]] { + def compare(x: Try[A], y: Try[A]): Int = { + x match { + case Throw(_) => if (y.isReturn) -1 else 0 + case Return(xValue) => + y match { + case Throw(_) => 1 + case Return(yValue) => valueOrdering.compare(xValue, yValue) + } + } + } + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/WaitForServerSets.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/WaitForServerSets.scala new file mode 100644 index 000000000..e76020098 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/WaitForServerSets.scala @@ -0,0 +1,60 @@ +package com.twitter.servo.util + +import com.twitter.finagle.util.DefaultTimer +import com.twitter.finagle.{Addr, Name, Namer} +import com.twitter.logging.Logger +import com.twitter.util._ +import scala.collection.JavaConverters._ + +/** + * A simple utility class to wait for serverset names to be resolved at startup. + * + * See [[com.twitter.finagle.client.ClientRegistry.expAllRegisteredClientsResolved()]] for an + * alternative way to wait for ServerSet resolution. + */ +object WaitForServerSets { + val log = Logger.get("WaitForServerSets") + + /** + * Convenient wrapper for single name in Java. Provides the default timer from Finagle. + */ + def ready(name: Name, timeout: Duration): Future[Unit] = + ready(Seq(name), timeout, DefaultTimer) + + /** + * Java Compatibility wrapper. Uses java.util.List instead of Seq. + */ + def ready(names: java.util.List[Name], timeout: Duration, timer: Timer): Future[Unit] = + ready(names.asScala, timeout, timer) + + /** + * Returns a Future that is satisfied when no more names resolve to Addr.Pending, + * or the specified timeout expires. + * + * This ignores address resolution failures, so just because the Future is satisfied + * doesn't necessarily imply that all names are resolved to something useful. + */ + def ready(names: Seq[Name], timeout: Duration, timer: Timer): Future[Unit] = { + val vars: Var[Seq[(Name, Addr)]] = Var.collect(names.map { + case n @ Name.Path(v) => Namer.resolve(v).map((n, _)) + case n @ Name.Bound(v) => v.map((n, _)) + }) + + val pendings = vars.changes.map { names => + names.filter { case (_, addr) => addr == Addr.Pending } + } + + pendings + .filter(_.isEmpty) + .toFuture() + .unit + .within( + timer, + timeout, + new TimeoutException( + "Failed to resolve: " + + vars.map(_.map { case (name, _) => name }).sample() + ) + ) + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/package.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/package.scala new file mode 100644 index 000000000..e9afcacc1 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/package.scala @@ -0,0 +1,6 @@ +package com.twitter.servo + +package object util { + /* aliases to preserve compatibility after classes moved to different package */ + val Memoize = com.twitter.util.Memoize +}