diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetDeletedTweetsHandler.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetDeletedTweetsHandler.docx new file mode 100644 index 000000000..26f7627c6 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetDeletedTweetsHandler.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetDeletedTweetsHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetDeletedTweetsHandler.scala deleted file mode 100644 index dfacaa4a6..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetDeletedTweetsHandler.scala +++ /dev/null @@ -1,150 +0,0 @@ -package com.twitter.tweetypie.storage - -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.stitch.Stitch -import com.twitter.storage.client.manhattan.kv.DeniedManhattanException -import com.twitter.tweetypie.storage.Response.TweetResponseCode -import com.twitter.tweetypie.storage.TweetUtils._ -import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet -import com.twitter.tweetypie.thriftscala.DeletedTweet -import scala.util.control.NonFatal - -sealed trait DeleteState -object DeleteState { - - /** - * This tweet is deleted but has not been permanently deleted from Manhattan. Tweets in this state - * may be undeleted. - */ - case object SoftDeleted extends DeleteState - - /** - * This tweet is deleted after being bounced for violating the Twitter Rules but has not been - * permanently deleted from Manhattan. Tweets in this state may NOT be undeleted. - */ - case object BounceDeleted extends DeleteState - - /** - * This tweet has been permanently deleted from Manhattan. - */ - case object HardDeleted extends DeleteState - - /** - * There is no data in Manhattan to distinguish this tweet id from one that never existed. - */ - case object NotFound extends DeleteState - - /** - * This tweet exists and is not in a deleted state. - */ - case object NotDeleted extends DeleteState -} - -case class DeletedTweetResponse( - tweetId: TweetId, - overallResponse: TweetResponseCode, - deleteState: DeleteState, - tweet: Option[DeletedTweet]) - -object GetDeletedTweetsHandler { - def apply( - read: ManhattanOperations.Read, - stats: StatsReceiver - ): TweetStorageClient.GetDeletedTweets = - (unfilteredTweetIds: Seq[TweetId]) => { - val tweetIds = unfilteredTweetIds.filter(_ > 0) - - Stats.addWidthStat("getDeletedTweets", "tweetIds", tweetIds.size, stats) - - val stitches = tweetIds.map { tweetId => - read(tweetId) - .map { mhRecords => - val storedTweet = buildStoredTweet(tweetId, mhRecords) - - TweetStateRecord.mostRecent(mhRecords) match { - case Some(m: TweetStateRecord.SoftDeleted) => softDeleted(m, storedTweet) - case Some(m: TweetStateRecord.BounceDeleted) => bounceDeleted(m, storedTweet) - case Some(m: TweetStateRecord.HardDeleted) => hardDeleted(m, storedTweet) - case _ if storedTweet.getFieldBlobs(expectedFields).isEmpty => notFound(tweetId) - case _ => notDeleted(tweetId, storedTweet) - } - } - .handle { - case _: DeniedManhattanException => - DeletedTweetResponse( - tweetId, - TweetResponseCode.OverCapacity, - DeleteState.NotFound, - None - ) - - case NonFatal(ex) => - TweetUtils.log.warning( - ex, - s"Unhandled exception in GetDeletedTweetsHandler for tweetId: $tweetId" - ) - DeletedTweetResponse(tweetId, TweetResponseCode.Failure, DeleteState.NotFound, None) - } - } - - Stitch.collect(stitches) - } - - private def notFound(tweetId: TweetId) = - DeletedTweetResponse( - tweetId = tweetId, - overallResponse = TweetResponseCode.Success, - deleteState = DeleteState.NotFound, - tweet = None - ) - - private def softDeleted(record: TweetStateRecord.SoftDeleted, storedTweet: StoredTweet) = - DeletedTweetResponse( - record.tweetId, - TweetResponseCode.Success, - DeleteState.SoftDeleted, - Some( - StorageConversions - .toDeletedTweet(storedTweet) - .copy(deletedAtMsec = Some(record.createdAt)) - ) - ) - - private def bounceDeleted(record: TweetStateRecord.BounceDeleted, storedTweet: StoredTweet) = - DeletedTweetResponse( - record.tweetId, - TweetResponseCode.Success, - DeleteState.BounceDeleted, - Some( - StorageConversions - .toDeletedTweet(storedTweet) - .copy(deletedAtMsec = Some(record.createdAt)) - ) - ) - - private def hardDeleted(record: TweetStateRecord.HardDeleted, storedTweet: StoredTweet) = - DeletedTweetResponse( - record.tweetId, - TweetResponseCode.Success, - DeleteState.HardDeleted, - Some( - StorageConversions - .toDeletedTweet(storedTweet) - .copy( - hardDeletedAtMsec = Some(record.createdAt), - deletedAtMsec = Some(record.deletedAt) - ) - ) - ) - - /** - * notDeleted returns a tweet to simplify tweetypie.handler.UndeleteTweetHandler - */ - private def notDeleted(tweetId: TweetId, storedTweet: StoredTweet) = - DeletedTweetResponse( - tweetId = tweetId, - overallResponse = TweetResponseCode.Success, - deleteState = DeleteState.NotDeleted, - tweet = Some(StorageConversions.toDeletedTweet(storedTweet)) - ) -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetStoredTweetHandler.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetStoredTweetHandler.docx new file mode 100644 index 000000000..8a25cc180 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetStoredTweetHandler.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetStoredTweetHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetStoredTweetHandler.scala deleted file mode 100644 index eafdda5e9..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetStoredTweetHandler.scala +++ /dev/null @@ -1,126 +0,0 @@ -package com.twitter.tweetypie.storage - -import com.twitter.conversions.DurationOps._ -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.stitch.Stitch -import com.twitter.stitch.StitchSeqGroup -import com.twitter.tweetypie.storage.TweetStorageClient.GetStoredTweet -import com.twitter.tweetypie.storage.TweetStorageClient.GetStoredTweet.Error -import com.twitter.tweetypie.storage.TweetStorageClient.GetStoredTweet.Response._ -import com.twitter.tweetypie.storage.TweetUtils._ -import com.twitter.tweetypie.thriftscala.Tweet -import com.twitter.util.Time -import com.twitter.util.Try -import scala.collection.mutable - -object GetStoredTweetHandler { - private[this] object DeletedState { - def unapply(stateRecord: Option[TweetStateRecord]): Option[TweetStateRecord] = - stateRecord match { - case state @ (Some(_: TweetStateRecord.SoftDeleted) | Some( - _: TweetStateRecord.HardDeleted) | Some(_: TweetStateRecord.BounceDeleted)) => - state - case _ => None - } - } - - private[this] def deletedAtMs(stateRecord: Option[TweetStateRecord]): Option[Long] = - stateRecord match { - case Some(d: TweetStateRecord.SoftDeleted) => Some(d.createdAt) - case Some(d: TweetStateRecord.BounceDeleted) => Some(d.createdAt) - case Some(d: TweetStateRecord.HardDeleted) => Some(d.deletedAt) - case _ => None - } - - private[this] def tweetResponseFromRecords( - tweetId: TweetId, - mhRecords: Seq[TweetManhattanRecord], - statsReceiver: StatsReceiver, - ): GetStoredTweet.Response = { - val errs = - mutable.Buffer[Error]() - - val hasStoredTweetFields: Boolean = mhRecords.exists { - case TweetManhattanRecord(TweetKey(_, _: TweetKey.LKey.FieldKey), _) => true - case _ => false - } - - val storedTweet = if (hasStoredTweetFields) { - Try(buildStoredTweet(tweetId, mhRecords, includeScrubbed = true)) - .onFailure(_ => errs.append(Error.TweetIsCorrupt)) - .toOption - } else { - None - } - - val scrubbedFields: Set[FieldId] = extractScrubbedFields(mhRecords) - val tweet: Option[Tweet] = storedTweet.map(StorageConversions.fromStoredTweetAllowInvalid) - val stateRecords: Seq[TweetStateRecord] = TweetStateRecord.fromTweetMhRecords(mhRecords) - val tweetState: Option[TweetStateRecord] = TweetStateRecord.mostRecent(mhRecords) - - storedTweet.foreach { storedTweet => - val storedExpectedFields = storedTweet.getFieldBlobs(expectedFields) - val missingExpectedFields = expectedFields.filterNot(storedExpectedFields.contains) - if (missingExpectedFields.nonEmpty || !isValid(storedTweet)) { - errs.append(Error.TweetFieldsMissingOrInvalid) - } - - val invalidScrubbedFields = storedTweet.getFieldBlobs(scrubbedFields).keys - if (invalidScrubbedFields.nonEmpty) { - errs.append(Error.ScrubbedFieldsPresent) - } - - if (deletedAtMs(tweetState).exists(_ < Time.now.inMilliseconds - 14.days.inMilliseconds)) { - errs.append(Error.TweetShouldBeHardDeleted) - } - } - - val err = Option(errs.toList).filter(_.nonEmpty) - - (tweet, tweetState, err) match { - case (None, None, None) => - statsReceiver.counter("not_found").incr() - NotFound(tweetId) - - case (None, Some(tweetState: TweetStateRecord.HardDeleted), None) => - statsReceiver.counter("hard_deleted").incr() - HardDeleted(tweetId, Some(tweetState), stateRecords, scrubbedFields) - - case (None, _, Some(errs)) => - statsReceiver.counter("failed").incr() - Failed(tweetId, tweetState, stateRecords, scrubbedFields, errs) - - case (Some(tweet), _, Some(errs)) => - statsReceiver.counter("found_invalid").incr() - FoundWithErrors(tweet, tweetState, stateRecords, scrubbedFields, errs) - - case (Some(tweet), DeletedState(state), None) => - statsReceiver.counter("deleted").incr() - FoundDeleted(tweet, Some(state), stateRecords, scrubbedFields) - - case (Some(tweet), _, None) => - statsReceiver.counter("found").incr() - Found(tweet, tweetState, stateRecords, scrubbedFields) - } - } - - def apply(read: ManhattanOperations.Read, statsReceiver: StatsReceiver): GetStoredTweet = { - - object mhGroup extends StitchSeqGroup[TweetId, Seq[TweetManhattanRecord]] { - override def run(tweetIds: Seq[TweetId]): Stitch[Seq[Seq[TweetManhattanRecord]]] = { - Stats.addWidthStat("getStoredTweet", "tweetIds", tweetIds.size, statsReceiver) - Stitch.traverse(tweetIds)(read(_)) - } - } - - tweetId => - if (tweetId <= 0) { - Stitch.NotFound - } else { - Stitch - .call(tweetId, mhGroup) - .map(mhRecords => - tweetResponseFromRecords(tweetId, mhRecords, statsReceiver.scope("getStoredTweet"))) - } - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetTweetHandler.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetTweetHandler.docx new file mode 100644 index 000000000..2db3d15c6 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetTweetHandler.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetTweetHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetTweetHandler.scala deleted file mode 100644 index f68025e2d..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetTweetHandler.scala +++ /dev/null @@ -1,167 +0,0 @@ -package com.twitter.tweetypie.storage - -import com.twitter.conversions.DurationOps._ -import com.twitter.finagle.stats.Counter -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.logging.Logger -import com.twitter.snowflake.id.SnowflakeId -import com.twitter.stitch.Stitch -import com.twitter.stitch.StitchSeqGroup -import com.twitter.storage.client.manhattan.kv.DeniedManhattanException -import com.twitter.storage.client.manhattan.kv.ManhattanException -import com.twitter.tweetypie.storage.TweetStateRecord.BounceDeleted -import com.twitter.tweetypie.storage.TweetStateRecord.HardDeleted -import com.twitter.tweetypie.storage.TweetStateRecord.SoftDeleted -import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet -import com.twitter.tweetypie.storage.TweetUtils._ -import com.twitter.util.Duration -import com.twitter.util.Return -import com.twitter.util.Throw -import com.twitter.util.Time - -object GetTweetHandler { - private[this] val logger = Logger(getClass) - - ////////////////////////////////////////////////// - // Logging racy reads for later validation. - - val RacyTweetWindow: Duration = 10.seconds - - /** - * If this read is soon after the tweet was created, then we would usually - * expect it to be served from cache. This early read indicates that this - * tweet is prone to consistency issues, so we log what's present in - * Manhattan at the time of the read for later analysis. - */ - private[this] def logRacyRead(tweetId: TweetId, records: Seq[TweetManhattanRecord]): Unit = - if (SnowflakeId.isSnowflakeId(tweetId)) { - val tweetAge = Time.now.since(SnowflakeId(tweetId).time) - if (tweetAge <= RacyTweetWindow) { - val sb = new StringBuilder - sb.append("racy_tweet_read\t") - .append(tweetId) - .append('\t') - .append(tweetAge.inMilliseconds) // Log the age for analysis purposes - records.foreach { rec => - sb.append('\t') - .append(rec.lkey) - rec.value.timestamp.foreach { ts => - // If there is a timestamp for this key, log it so that we can tell - // later on whether a value should have been present. We expect - // keys written in a single write to have the same timestamp, and - // generally, keys written in separate writes will have different - // timestamps. The timestamp value is optional in Manhattan, but - // we expect there to always be a value for the timestamp. - sb.append(':') - .append(ts.inMilliseconds) - } - } - logger.info(sb.toString) - } - } - - /** - * Convert a set of records from Manhattan into a GetTweet.Response. - */ - def tweetResponseFromRecords( - tweetId: TweetId, - mhRecords: Seq[TweetManhattanRecord], - statsReceiver: StatsReceiver = NullStatsReceiver - ): GetTweet.Response = - if (mhRecords.isEmpty) { - GetTweet.Response.NotFound - } else { - // If no internal fields are present or no required fields present, we consider the tweet - // as not returnable (even if some additional fields are present) - def tweetFromRecords(tweetId: TweetId, mhRecords: Seq[TweetManhattanRecord]) = { - val storedTweet = buildStoredTweet(tweetId, mhRecords) - if (storedTweet.getFieldBlobs(expectedFields).nonEmpty) { - if (isValid(storedTweet)) { - statsReceiver.counter("valid").incr() - Some(StorageConversions.fromStoredTweet(storedTweet)) - } else { - log.info(s"Invalid Tweet Id: $tweetId") - statsReceiver.counter("invalid").incr() - None - } - } else { - // The Tweet contained none of the fields defined in `expectedFields` - log.info(s"Expected Fields Not Present Tweet Id: $tweetId") - statsReceiver.counter("expected_fields_not_present").incr() - None - } - } - - val stateRecord = TweetStateRecord.mostRecent(mhRecords) - stateRecord match { - // some other cases don't require an attempt to construct a Tweet - case Some(_: SoftDeleted) | Some(_: HardDeleted) => GetTweet.Response.Deleted - - // all other cases require an attempt to construct a Tweet, which may not be successful - case _ => - logRacyRead(tweetId, mhRecords) - (stateRecord, tweetFromRecords(tweetId, mhRecords)) match { - // BounceDeleted contains the Tweet data so that callers can access data on the the - // tweet (e.g. hard delete daemon requires conversationId and userId. There are no - // plans for Tweetypie server to make use of the returned tweet at this time. - case (Some(_: BounceDeleted), Some(tweet)) => GetTweet.Response.BounceDeleted(tweet) - case (Some(_: BounceDeleted), None) => GetTweet.Response.Deleted - case (_, Some(tweet)) => GetTweet.Response.Found(tweet) - case _ => GetTweet.Response.NotFound - } - } - } - - def apply(read: ManhattanOperations.Read, statsReceiver: StatsReceiver): GetTweet = { - - object stats { - val getTweetScope = statsReceiver.scope("getTweet") - val deniedCounter: Counter = getTweetScope.counter("mh_denied") - val mhExceptionCounter: Counter = getTweetScope.counter("mh_exception") - val nonFatalExceptionCounter: Counter = getTweetScope.counter("non_fatal_exception") - val notFoundCounter: Counter = getTweetScope.counter("not_found") - } - - object mhGroup extends StitchSeqGroup[TweetId, Seq[TweetManhattanRecord]] { - override def run(tweetIds: Seq[TweetId]): Stitch[Seq[Seq[TweetManhattanRecord]]] = { - Stats.addWidthStat("getTweet", "tweetIds", tweetIds.size, statsReceiver) - Stitch.traverse(tweetIds)(read(_)) - } - } - - tweetId => - if (tweetId <= 0) { - Stitch.NotFound - } else { - Stitch - .call(tweetId, mhGroup) - .map(mhRecords => tweetResponseFromRecords(tweetId, mhRecords, stats.getTweetScope)) - .liftToTry - .map { - case Throw(mhException: DeniedManhattanException) => - stats.deniedCounter.incr() - Throw(RateLimited("", mhException)) - - // Encountered some other Manhattan error - case t @ Throw(_: ManhattanException) => - stats.mhExceptionCounter.incr() - t - - // Something else happened - case t @ Throw(ex) => - stats.nonFatalExceptionCounter.incr() - TweetUtils.log - .warning(ex, s"Unhandled exception in GetTweetHandler for tweetId: $tweetId") - t - - case r @ Return(GetTweet.Response.NotFound) => - stats.notFoundCounter.incr() - r - - case r @ Return(_) => r - } - .lowerFromTry - } - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/HardDeleteTweetHandler.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/HardDeleteTweetHandler.docx new file mode 100644 index 000000000..c32feefb1 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/HardDeleteTweetHandler.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/HardDeleteTweetHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/HardDeleteTweetHandler.scala deleted file mode 100644 index 8483926f4..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/HardDeleteTweetHandler.scala +++ /dev/null @@ -1,153 +0,0 @@ -package com.twitter.tweetypie.storage - -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.stitch.Stitch -import com.twitter.tweetypie.storage.TweetKey.LKey.ForceAddedStateKey -import com.twitter.tweetypie.storage.TweetStorageClient.HardDeleteTweet -import com.twitter.tweetypie.storage.TweetStorageClient.HardDeleteTweet.Response._ -import com.twitter.tweetypie.storage.TweetUtils._ -import com.twitter.util.Return -import com.twitter.util.Throw -import com.twitter.util.Time -import com.twitter.util.Try - -object HardDeleteTweetHandler { - - /** - * When a tweet is removed lkeys with these prefixes will be deleted permanently. - */ - private[storage] def isKeyToBeDeleted(key: TweetKey): Boolean = - key.lKey match { - case (TweetKey.LKey.CoreFieldsKey | TweetKey.LKey.InternalFieldsKey(_) | - TweetKey.LKey.AdditionalFieldsKey(_) | TweetKey.LKey.SoftDeletionStateKey | - TweetKey.LKey.BounceDeletionStateKey | TweetKey.LKey.UnDeletionStateKey | - TweetKey.LKey.ForceAddedStateKey) => - true - case _ => false - } - - /** - * When hard deleting, there are two actions, writing the record and - * removing the tweet data. If we are performing any action, we will - * always try to remove the tweet data. If the tweet does not yet have a - * hard deletion record, then we will need to write one. This method - * returns the HardDeleted record if it needs to be written, and None - * if it has already been written. - * - * If the tweet is not in a deleted state we signal this with a - * Throw(NotDeleted). - */ - private[storage] def getHardDeleteStateRecord( - tweetId: TweetId, - records: Seq[TweetManhattanRecord], - mhTimestamp: Time, - stats: StatsReceiver - ): Try[Option[TweetStateRecord.HardDeleted]] = { - val mostRecent = TweetStateRecord.mostRecent(records) - val currentStateStr = mostRecent.map(_.name).getOrElse("no_tweet_state_record") - stats.counter(currentStateStr).incr() - - mostRecent match { - case Some( - record @ (TweetStateRecord.SoftDeleted(_, _) | TweetStateRecord.BounceDeleted(_, _))) => - Return( - Some( - TweetStateRecord.HardDeleted( - tweetId = tweetId, - // createdAt is the hard deletion timestamp when dealing with hard deletes in Manhattan - createdAt = mhTimestamp.inMillis, - // deletedAt is the soft deletion timestamp when dealing with hard deletes in Manhattan - deletedAt = record.createdAt - ) - ) - ) - - case Some(_: TweetStateRecord.HardDeleted) => - Return(None) - - case Some(_: TweetStateRecord.ForceAdded) => - Throw(NotDeleted(tweetId, Some(ForceAddedStateKey))) - - case Some(_: TweetStateRecord.Undeleted) => - Throw(NotDeleted(tweetId, Some(TweetKey.LKey.UnDeletionStateKey))) - - case None => - Throw(NotDeleted(tweetId, None)) - } - } - - /** - * This handler returns HardDeleteTweet.Response.Deleted if data associated with the tweet is deleted, - * either as a result of this request or a previous one. - * - * The most recently added record determines the tweet's state. This method will only delete data - * for tweets in the soft-delete or hard-delete state. (Calling hardDeleteTweet for tweets that have - * already been hard-deleted will remove any lkeys that may not have been deleted previously). - */ - def apply( - read: ManhattanOperations.Read, - insert: ManhattanOperations.Insert, - delete: ManhattanOperations.Delete, - scribe: Scribe, - stats: StatsReceiver - ): TweetId => Stitch[HardDeleteTweet.Response] = { - val hardDeleteStats = stats.scope("hardDeleteTweet") - val hardDeleteTweetCancelled = hardDeleteStats.counter("cancelled") - val beforeStateStats = hardDeleteStats.scope("before_state") - - def removeRecords(keys: Seq[TweetKey], mhTimestamp: Time): Stitch[Unit] = - Stitch - .collect(keys.map(key => delete(key, Some(mhTimestamp)).liftToTry)) - .map(collectWithRateLimitCheck) - .lowerFromTry - - def writeRecord(record: Option[TweetStateRecord.HardDeleted]): Stitch[Unit] = - record match { - case Some(r) => - insert(r.toTweetMhRecord).onSuccess { _ => - scribe.logRemoved( - r.tweetId, - Time.fromMilliseconds(r.createdAt), - isSoftDeleted = false - ) - } - case None => Stitch.Unit - } - - tweetId => - read(tweetId) - .flatMap { records => - val hardDeletionTimestamp = Time.now - - val keysToBeDeleted: Seq[TweetKey] = records.map(_.key).filter(isKeyToBeDeleted) - - getHardDeleteStateRecord( - tweetId, - records, - hardDeletionTimestamp, - beforeStateStats) match { - case Return(record) => - Stitch - .join( - writeRecord(record), - removeRecords(keysToBeDeleted, hardDeletionTimestamp) - ).map(_ => - // If the tweetId is non-snowflake and has previously been hard deleted - // there will be no coreData record to fall back on to get the tweet - // creation time and createdAtMillis will be None. - Deleted( - // deletedAtMillis: when the tweet was hard deleted - deletedAtMillis = Some(hardDeletionTimestamp.inMillis), - // createdAtMillis: when the tweet itself was created - // (as opposed to when the deletion record was created) - createdAtMillis = - TweetUtils.creationTimeFromTweetIdOrMHRecords(tweetId, records) - )) - case Throw(notDeleted: NotDeleted) => - hardDeleteTweetCancelled.incr() - Stitch.value(notDeleted) - case Throw(e) => Stitch.exception(e) // this should never happen - } - } - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/InspectFields.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/InspectFields.docx new file mode 100644 index 000000000..0a0566984 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/InspectFields.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/InspectFields.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/InspectFields.scala deleted file mode 100644 index 113a749cb..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/InspectFields.scala +++ /dev/null @@ -1,228 +0,0 @@ -package com.twitter.tweetypie.storage - -import com.google.common.base.CaseFormat -import com.twitter.conversions.DurationOps._ -import com.twitter.finagle.mtls.authentication.ServiceIdentifier -import com.twitter.scrooge.TFieldBlob -import com.twitter.scrooge.ThriftStructFieldInfo -import com.twitter.stitch.Stitch -import com.twitter.storage.client.manhattan.kv._ -import com.twitter.tweetypie.additionalfields.AdditionalFields -import com.twitter.tweetypie.storage.ManhattanOperations.Read -import com.twitter.tweetypie.storage.TweetUtils._ -import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet -import com.twitter.tweetypie.thriftscala.{Tweet => TweetypieTweet} -import com.twitter.util.Duration -import com.twitter.util.Future -import com.twitter.util.Return -import com.twitter.util.Throw -import diffshow.Container -import diffshow.DiffShow -import diffshow.Expr -import org.apache.commons.codec.binary.Base64 -import scala.util.Try -import shapeless.Cached -import shapeless.Strict - -// This class is used by the Tweetypie Console to inspect tweet field content in Manhattan -class InspectFields(svcIdentifier: ServiceIdentifier) { - val mhApplicationId = "tbird_mh" - val mhDatasetName = "tbird_mh" - val mhDestinationName = "/s/manhattan/cylon.native-thrift" - val mhTimeout: Duration = 5000.milliseconds - - val localMhEndpoint: ManhattanKVEndpoint = - ManhattanKVEndpointBuilder( - ManhattanKVClient( - mhApplicationId, - mhDestinationName, - ManhattanKVClientMtlsParams(svcIdentifier))) - .defaultGuarantee(Guarantee.SoftDcReadMyWrites) - .defaultMaxTimeout(mhTimeout) - .build() - - val readOperation: Read = (new ManhattanOperations(mhDatasetName, localMhEndpoint)).read - - def lookup(tweetId: Long): Future[String] = { - val result = readOperation(tweetId).liftToTry.map { - case Return(mhRecords) => - prettyPrintManhattanRecords(tweetId, TweetKey.padTweetIdStr(tweetId), mhRecords) - case Throw(e) => e.toString - } - - Stitch.run(result) - } - - def storedTweet(tweetId: Long): Future[StoredTweet] = { - val result = readOperation(tweetId).liftToTry.map { - case Return(mhRecords) => - buildStoredTweet(tweetId, mhRecords) - case Throw(e) => - throw e - } - - Stitch.run(result) - } - - private[this] def prettyPrintManhattanRecords( - tweetId: Long, - pkey: String, - mhRecords: Seq[TweetManhattanRecord] - ): String = { - if (mhRecords.isEmpty) { - "Not Found" - } else { - val formattedRecords = getFormattedManhattanRecords(tweetId, mhRecords) - val keyFieldWidth = formattedRecords.map(_.key.length).max + 2 - val fieldNameFieldWidth = formattedRecords.map(_.fieldName.length).max + 2 - - val formatString = s" %-${keyFieldWidth}s %-${fieldNameFieldWidth}s %s" - - val recordsString = - formattedRecords - .map { record => - val content = record.content.replaceAll("\n", "\n" + formatString.format("", "", "")) - formatString.format(record.key, record.fieldName, content) - } - .mkString("\n") - - "/tbird_mh/" + pkey + "/" + "\n" + recordsString - } - } - - private[this] def getFormattedManhattanRecords( - tweetId: Long, - mhRecords: Seq[TweetManhattanRecord] - ): Seq[FormattedManhattanRecord] = { - val storedTweet = buildStoredTweet(tweetId, mhRecords).copy(updatedAt = None) - val tweetypieTweet: Option[TweetypieTweet] = - Try(StorageConversions.fromStoredTweet(storedTweet)).toOption - - val blobMap: Map[String, TFieldBlob] = getStoredTweetBlobs(mhRecords).map { blob => - getFieldName(blob.field.id) -> blob - }.toMap - - mhRecords - .map { - case TweetManhattanRecord(fullKey, mhValue) => - FormattedManhattanRecord( - key = fullKey.lKey.toString, - fieldName = getFieldName(fullKey.lKey), - content = prettyPrintManhattanValue( - fullKey.lKey, - mhValue, - storedTweet, - tweetypieTweet, - tweetId, - blobMap - ) - ) - } - .sortBy(_.key.replace("external", "xternal")) // sort by key, with internal first - } - - private[this] def getFieldNameFromThrift( - fieldId: Short, - fieldInfos: List[ThriftStructFieldInfo] - ): String = - fieldInfos - .find(info => info.tfield.id == fieldId) - .map(_.tfield.name) - .getOrElse("") - - private[this] def isLkeyScrubbedField(lkey: String): Boolean = - lkey.split("/")(1) == "scrubbed_fields" - - private[this] def getFieldName(lkey: TweetKey.LKey): String = - lkey match { - case fieldKey: TweetKey.LKey.FieldKey => getFieldName(fieldKey.fieldId) - case _ => "" - } - - private[this] def getFieldName(fieldId: Short): String = - if (fieldId == 1) { - "core_fields" - } else if (AdditionalFields.isAdditionalFieldId(fieldId)) { - getFieldNameFromThrift(fieldId, TweetypieTweet.fieldInfos) - } else { - getFieldNameFromThrift(fieldId, StoredTweet.fieldInfos) - } - - private[this] def prettyPrintManhattanValue( - lkey: TweetKey.LKey, - mhValue: TweetManhattanValue, - storedTweet: StoredTweet, - tweetypieTweet: Option[TweetypieTweet], - tweetId: Long, - tfieldBlobs: Map[String, TFieldBlob] - ): String = { - val decoded = lkey match { - case _: TweetKey.LKey.MetadataKey => - decodeMetadata(mhValue) - - case fieldKey: TweetKey.LKey.FieldKey => - tfieldBlobs - .get(getFieldName(fieldKey.fieldId)) - .map(blob => decodeField(tweetId, blob, storedTweet, tweetypieTweet)) - - case _ => - None - } - - decoded.getOrElse { // If all else fails, encode the data as a base64 string - val contents = mhValue.contents.array - if (contents.isEmpty) { - "" - } else { - Base64.encodeBase64String(contents) - } - } - } - - private[this] def decodeMetadata(mhValue: TweetManhattanValue): Option[String] = { - val byteArray = ByteArrayCodec.fromByteBuffer(mhValue.contents) - Try(Json.decode(byteArray).toString).toOption - } - - private[this] def decodeField( - tweetId: Long, - blob: TFieldBlob, - storedTweet: StoredTweet, - tweetypieTweet: Option[TweetypieTweet] - ): String = { - val fieldId = blob.field.id - - if (fieldId == 1) { - coreFields(storedTweet) - } else if (AdditionalFields.isAdditionalFieldId(fieldId)) { - decodeTweetWithOneField(TweetypieTweet(tweetId).setField(blob)) - } else { - decodeTweetWithOneField(StoredTweet(tweetId).setField(blob)) - } - } - - // Takes a Tweet or StoredTweet with a single field set and returns the value of that field - private[this] def decodeTweetWithOneField[T]( - tweetWithOneField: T - )( - implicit ev: Cached[Strict[DiffShow[T]]] - ): String = { - val config = diffshow.Config(hideFieldWithEmptyVal = true) - val tree: Expr = config.transform(DiffShow.show(tweetWithOneField)) - - // matches a Tweet or StoredTweet with two values, the first being the id - val value = tree.transform { - case Container(_, List(diffshow.Field("id", _), diffshow.Field(_, value))) => value - } - - config.exprPrinter.apply(value, width = 80).render - } - - private[this] def coreFields(storedTweet: StoredTweet): String = - diffshow.show(CoreFieldsCodec.fromTweet(storedTweet), hideFieldWithEmptyVal = true) - - private[this] def toCamelCase(s: String): String = - CaseFormat.LOWER_UNDERSCORE.to(CaseFormat.LOWER_CAMEL, s) -} - -case class FormattedManhattanRecord(key: String, fieldName: String, content: String) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Json.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Json.docx new file mode 100644 index 000000000..69962ed81 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Json.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Json.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Json.scala deleted file mode 100644 index e5f087a34..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Json.scala +++ /dev/null @@ -1,17 +0,0 @@ -package com.twitter.tweetypie.storage - -import com.fasterxml.jackson.databind.ObjectMapper -import com.fasterxml.jackson.module.scala.DefaultScalaModule - -object Json { - val TimestampKey = "timestamp" - val SoftDeleteTimestampKey = "softdelete_timestamp" - - private val mapper = new ObjectMapper - mapper.registerModule(DefaultScalaModule) - - def encode(m: Map[String, Any]): Array[Byte] = mapper.writeValueAsBytes(m) - - def decode(arr: Array[Byte]): Map[String, Any] = - mapper.readValue[Map[String, Any]](arr, classOf[Map[String, Any]]) -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanOperations.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanOperations.docx new file mode 100644 index 000000000..81b424f38 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanOperations.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanOperations.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanOperations.scala deleted file mode 100644 index fed0af6c7..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanOperations.scala +++ /dev/null @@ -1,103 +0,0 @@ -package com.twitter.tweetypie.storage - -import com.twitter.bijection.Injection -import com.twitter.io.Buf -import com.twitter.stitch.Stitch -import com.twitter.storage.client.manhattan.bijections.Bijections.BufInjection -import com.twitter.storage.client.manhattan.kv.ManhattanKVEndpoint -import com.twitter.storage.client.manhattan.kv.impl.DescriptorP1L1 -import com.twitter.storage.client.manhattan.kv.impl.Component -import com.twitter.storage.client.manhattan.kv.{impl => mh} -import com.twitter.storage.client.manhattan.bijections.Bijections.StringInjection -import com.twitter.util.Time -import java.nio.ByteBuffer -import scala.util.control.NonFatal - -case class TweetManhattanRecord(key: TweetKey, value: TweetManhattanValue) { - def pkey: TweetId = key.tweetId - def lkey: TweetKey.LKey = key.lKey - - /** - * Produces a representation that is human-readable, but contains - * all of the information from the record. It is not intended for - * producing machine-readable values. - * - * This conversion is relatively expensive, so beware of using it in - * hot code paths. - */ - override def toString: String = { - val valueString = - try { - key.lKey match { - case _: TweetKey.LKey.MetadataKey => - StringCodec.fromByteBuffer(value.contents) - - case _: TweetKey.LKey.FieldKey => - val tFieldBlob = TFieldBlobCodec.fromByteBuffer(value.contents) - s"TFieldBlob(${tFieldBlob.field}, 0x${Buf.slowHexString(tFieldBlob.content)})" - - case TweetKey.LKey.Unknown(_) => - "0x" + Buf.slowHexString(Buf.ByteBuffer.Shared(value.contents)) - } - } catch { - case NonFatal(e) => - val hexValue = Buf.slowHexString(Buf.ByteBuffer.Shared(value.contents)) - s"0x$hexValue (failed to decode due to $e)" - } - - s"$key => ${value.copy(contents = valueString)}" - } -} - -object ManhattanOperations { - type Read = TweetId => Stitch[Seq[TweetManhattanRecord]] - type Insert = TweetManhattanRecord => Stitch[Unit] - type Delete = (TweetKey, Option[Time]) => Stitch[Unit] - type DeleteRange = TweetId => Stitch[Unit] - - object PkeyInjection extends Injection[TweetId, String] { - override def apply(tweetId: TweetId): String = TweetKey.padTweetIdStr(tweetId) - override def invert(str: String): scala.util.Try[TweetId] = scala.util.Try(str.toLong) - } - - case class InvalidLkey(lkeyStr: String) extends Exception - - object LkeyInjection extends Injection[TweetKey.LKey, String] { - override def apply(lkey: TweetKey.LKey): String = lkey.toString - override def invert(str: String): scala.util.Try[TweetKey.LKey] = - scala.util.Success(TweetKey.LKey.fromString(str)) - } - - val KeyDescriptor: DescriptorP1L1.EmptyKey[TweetId, TweetKey.LKey] = - mh.KeyDescriptor( - Component(PkeyInjection.andThen(StringInjection)), - Component(LkeyInjection.andThen(StringInjection)) - ) - - val ValueDescriptor: mh.ValueDescriptor.EmptyValue[ByteBuffer] = mh.ValueDescriptor(BufInjection) -} - -class ManhattanOperations(dataset: String, mhEndpoint: ManhattanKVEndpoint) { - import ManhattanOperations._ - - private[this] def pkey(tweetId: TweetId) = KeyDescriptor.withDataset(dataset).withPkey(tweetId) - - def read: Read = { tweetId => - mhEndpoint.slice(pkey(tweetId).under(), ValueDescriptor).map { mhData => - mhData.map { - case (key, value) => TweetManhattanRecord(TweetKey(key.pkey, key.lkey), value) - } - } - } - - def insert: Insert = - record => { - val mhKey = pkey(record.key.tweetId).withLkey(record.key.lKey) - mhEndpoint.insert(mhKey, ValueDescriptor.withValue(record.value)) - } - - def delete: Delete = (key, time) => mhEndpoint.delete(pkey(key.tweetId).withLkey(key.lKey), time) - - def deleteRange: DeleteRange = - tweetId => mhEndpoint.deleteRange(KeyDescriptor.withDataset(dataset).withPkey(tweetId).under()) -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanTweetStorageClient.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanTweetStorageClient.docx new file mode 100644 index 000000000..d33ebb40d Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanTweetStorageClient.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanTweetStorageClient.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanTweetStorageClient.scala deleted file mode 100644 index daf6a3076..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanTweetStorageClient.scala +++ /dev/null @@ -1,451 +0,0 @@ -package com.twitter.tweetypie.storage - -import com.twitter.conversions.DurationOps._ -import com.twitter.finagle.mtls.authentication.EmptyServiceIdentifier -import com.twitter.finagle.mtls.authentication.ServiceIdentifier -import com.twitter.finagle.ssl.OpportunisticTls -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.logging.BareFormatter -import com.twitter.logging.Level -import com.twitter.logging.ScribeHandler -import com.twitter.logging._ -import com.twitter.stitch.Stitch -import com.twitter.storage.client.manhattan.bijections.Bijections._ -import com.twitter.storage.client.manhattan.kv._ -import com.twitter.storage.client.manhattan.kv.impl.ValueDescriptor -import com.twitter.tweetypie.client_id.ClientIdHelper -import com.twitter.tweetypie.storage.Scribe.ScribeHandlerFactory -import com.twitter.tweetypie.storage.TweetStorageClient.BounceDelete -import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet -import com.twitter.tweetypie.storage.TweetStorageClient.HardDeleteTweet -import com.twitter.tweetypie.thriftscala.Tweet -import com.twitter.tweetypie.util.StitchUtils -import com.twitter.util.Duration -import com.twitter.util.Return -import com.twitter.util.Throw -import scala.util.Random - -object ManhattanTweetStorageClient { - object Config { - - /** - * The Manhattan dataset where tweets are stored is not externally - * configurable because writing tweets to a non-production dataset - * requires great care. Staging instances using a different dataset will - * write tweets to a non-production store, but will publish events, log to - * HDFS, and cache data referencing tweets in that store which are not - * accessible by the rest of the production cluster. - * - * In a completely isolated environment it should be safe to write to - * other datasets for testing purposes. - */ - val Dataset = "tbird_mh" - - /** - * Once a tweet has been deleted it can only be undeleted within this time - * window, after which [[UndeleteHandler]] will return an error on - * undelete attempts. - */ - val UndeleteWindowHours = 240 - - /** - * Default label used for underlying Manhattan Thrift client metrics - * - * The finagle client metrics will be exported at clnt/:label. - */ - val ThriftClientLabel = "mh_cylon" - - /** - * Return the corresponding Wily path for the Cylon cluster in the "other" DC - */ - def remoteDestination(zone: String): String = - s"/srv#/prod/${remoteZone(zone)}/manhattan/cylon.native-thrift" - - private def remoteZone(zone: String) = zone match { - case "pdxa" => "atla" - case "atla" | "localhost" => "pdxa" - case _ => - throw new IllegalArgumentException(s"Cannot configure remote DC for unknown zone '$zone'") - } - } - - /** - * @param applicationId Manhattan application id used for quota accounting - * @param localDestination Wily path to local Manhattan cluster - * @param localTimeout Overall timeout (including retries) for all reads/writes to local cluster - * @param remoteDestination Wily path to remote Manhattan cluster, used for undelete and force add - * @param remoteTimeout Overall timeout (including retries) for all reads/writes to remote cluster - * @param undeleteWindowHours Amount of time during which a deleted tweet can be undeleted - * @param thriftClientLabel Label used to scope stats for Manhattan Thrift client - * @param maxRequestsPerBatch Configure the Stitch RequestGroup.Generator batch size - * @param serviceIdentifier The ServiceIdentifier to use when making connections to a Manhattan cluster - * @param opportunisticTlsLevel The level to use for opportunistic TLS for connections to the Manhattan cluster - */ - case class Config( - applicationId: String, - localDestination: String, - localTimeout: Duration, - remoteDestination: String, - remoteTimeout: Duration, - undeleteWindowHours: Int = Config.UndeleteWindowHours, - thriftClientLabel: String = Config.ThriftClientLabel, - maxRequestsPerBatch: Int = Int.MaxValue, - serviceIdentifier: ServiceIdentifier, - opportunisticTlsLevel: OpportunisticTls.Level) - - /** - * Sanitizes the input for APIs which take in a (Tweet, Seq[Field]) as input. - * - * NOTE: This function only applies sanity checks which are common to - * all APIs which take in a (Tweet, Seq[Field]) as input. API specific - * checks are not covered here. - * - * @param apiStitch the backing API call - * @tparam T the output type of the backing API call - * @return a stitch function which does some basic input sanity checking - */ - private[storage] def sanitizeTweetFields[T]( - apiStitch: (Tweet, Seq[Field]) => Stitch[T] - ): (Tweet, Seq[Field]) => Stitch[T] = - (tweet, fields) => { - require(fields.forall(_.id > 0), s"Field ids ${fields} are not positive numbers") - apiStitch(tweet, fields) - } - - // Returns a handler that asynchronously logs messages to Scribe using the BareFormatter which - // logs just the message without any additional metadata - def scribeHandler(categoryName: String): HandlerFactory = - ScribeHandler( - formatter = BareFormatter, - maxMessagesPerTransaction = 100, - category = categoryName, - level = Some(Level.TRACE) - ) - - /** - * A Config appropriate for interactive sessions and scripts. - */ - def develConfig(): Config = - Config( - applicationId = Option(System.getenv("USER")).getOrElse("") + ".devel", - localDestination = "/s/manhattan/cylon.native-thrift", - localTimeout = 10.seconds, - remoteDestination = "/s/manhattan/cylon.native-thrift", - remoteTimeout = 10.seconds, - undeleteWindowHours = Config.UndeleteWindowHours, - thriftClientLabel = Config.ThriftClientLabel, - maxRequestsPerBatch = Int.MaxValue, - serviceIdentifier = ServiceIdentifier(System.getenv("USER"), "tweetypie", "devel", "local"), - opportunisticTlsLevel = OpportunisticTls.Required - ) - - /** - * Build a Manhattan tweet storage client for use in interactive - * sessions and scripts. - */ - def devel(): TweetStorageClient = - new ManhattanTweetStorageClient( - develConfig(), - NullStatsReceiver, - ClientIdHelper.default, - ) -} - -class ManhattanTweetStorageClient( - config: ManhattanTweetStorageClient.Config, - statsReceiver: StatsReceiver, - private val clientIdHelper: ClientIdHelper) - extends TweetStorageClient { - import ManhattanTweetStorageClient._ - - lazy val scribeHandlerFactory: ScribeHandlerFactory = scribeHandler _ - val scribe: Scribe = new Scribe(scribeHandlerFactory, statsReceiver) - - def mkClient( - dest: String, - label: String - ): ManhattanKVClient = { - val mhMtlsParams = - if (config.serviceIdentifier == EmptyServiceIdentifier) NoMtlsParams - else - ManhattanKVClientMtlsParams( - serviceIdentifier = config.serviceIdentifier, - opportunisticTls = config.opportunisticTlsLevel - ) - - new ManhattanKVClient( - config.applicationId, - dest, - mhMtlsParams, - label, - Seq(Experiments.ApertureLoadBalancer)) - } - - val localClient: ManhattanKVClient = mkClient(config.localDestination, config.thriftClientLabel) - - val localMhEndpoint: ManhattanKVEndpoint = ManhattanKVEndpointBuilder(localClient) - .defaultGuarantee(Guarantee.SoftDcReadMyWrites) - .defaultMaxTimeout(config.localTimeout) - .maxRequestsPerBatch(config.maxRequestsPerBatch) - .build() - - val localManhattanOperations = new ManhattanOperations(Config.Dataset, localMhEndpoint) - - val remoteClient: ManhattanKVClient = - mkClient(config.remoteDestination, s"${config.thriftClientLabel}_remote") - - val remoteMhEndpoint: ManhattanKVEndpoint = ManhattanKVEndpointBuilder(remoteClient) - .defaultGuarantee(Guarantee.SoftDcReadMyWrites) - .defaultMaxTimeout(config.remoteTimeout) - .build() - - val remoteManhattanOperations = new ManhattanOperations(Config.Dataset, remoteMhEndpoint) - - /** - * Note: This translation is only useful for non-batch endpoints. Batch endpoints currently - * represent failure without propagating an exception - * (e.g. [[com.twitter.tweetypie.storage.Response.TweetResponseCode.Failure]]). - */ - private[this] def translateExceptions( - apiName: String, - statsReceiver: StatsReceiver - ): PartialFunction[Throwable, Throwable] = { - case e: IllegalArgumentException => ClientError(e.getMessage, e) - case e: DeniedManhattanException => RateLimited(e.getMessage, e) - case e: VersionMismatchError => - statsReceiver.scope(apiName).counter("mh_version_mismatches").incr() - e - case e: InternalError => - TweetUtils.log.error(e, s"Error processing $apiName request: ${e.getMessage}") - e - } - - /** - * Count requests per client id producing metrics of the form - * .../clients/:root_client_id/requests - */ - def observeClientId[A, B]( - apiStitch: A => Stitch[B], - statsReceiver: StatsReceiver, - clientIdHelper: ClientIdHelper, - ): A => Stitch[B] = { - val clients = statsReceiver.scope("clients") - - val incrementClientRequests = { args: A => - val clientId = clientIdHelper.effectiveClientIdRoot.getOrElse(ClientIdHelper.UnknownClientId) - clients.counter(clientId, "requests").incr - } - - a => { - incrementClientRequests(a) - apiStitch(a) - } - } - - /** - * Increment counters based on the overall response status of the returned [[GetTweet.Response]]. - */ - def observeGetTweetResponseCode[A]( - apiStitch: A => Stitch[GetTweet.Response], - statsReceiver: StatsReceiver - ): A => Stitch[GetTweet.Response] = { - val scope = statsReceiver.scope("response_code") - - val success = scope.counter("success") - val notFound = scope.counter("not_found") - val failure = scope.counter("failure") - val overCapacity = scope.counter("over_capacity") - val deleted = scope.counter("deleted") - val bounceDeleted = scope.counter("bounce_deleted") - - a => - apiStitch(a).respond { - case Return(_: GetTweet.Response.Found) => success.incr() - case Return(GetTweet.Response.NotFound) => notFound.incr() - case Return(_: GetTweet.Response.BounceDeleted) => bounceDeleted.incr() - case Return(GetTweet.Response.Deleted) => deleted.incr() - case Throw(_: RateLimited) => overCapacity.incr() - case Throw(_) => failure.incr() - } - } - - /** - * We do 3 things here: - * - * - Bookkeeping for overall requests - * - Bookkeeping for per api requests - * - Translate exceptions - * - * @param apiName the API being called - * @param apiStitch the implementation of the API - * @tparam A template for input type of API - * @tparam B template for output type of API - * @return Function which executes the given API call - */ - private[storage] def endpoint[A, B]( - apiName: String, - apiStitch: A => Stitch[B] - ): A => Stitch[B] = { - val translateException = translateExceptions(apiName, statsReceiver) - val observe = StitchUtils.observe[B](statsReceiver, apiName) - - a => - StitchUtils.translateExceptions( - observe(apiStitch(a)), - translateException - ) - } - - private[storage] def endpoint2[A, B, C]( - apiName: String, - apiStitch: (A, B) => Stitch[C], - clientIdHelper: ClientIdHelper, - ): (A, B) => Stitch[C] = - Function.untupled(endpoint(apiName, apiStitch.tupled)) - - val getTweet: TweetStorageClient.GetTweet = { - val stats = statsReceiver.scope("getTweet") - - observeClientId( - observeGetTweetResponseCode( - endpoint( - "getTweet", - GetTweetHandler( - read = localManhattanOperations.read, - statsReceiver = stats, - ) - ), - stats, - ), - stats, - clientIdHelper, - ) - } - - val getStoredTweet: TweetStorageClient.GetStoredTweet = { - val stats = statsReceiver.scope("getStoredTweet") - - observeClientId( - endpoint( - "getStoredTweet", - GetStoredTweetHandler( - read = localManhattanOperations.read, - statsReceiver = stats, - ) - ), - stats, - clientIdHelper, - ) - } - - val addTweet: TweetStorageClient.AddTweet = - endpoint( - "addTweet", - AddTweetHandler( - insert = localManhattanOperations.insert, - scribe = scribe, - stats = statsReceiver - ) - ) - - val updateTweet: TweetStorageClient.UpdateTweet = - endpoint2( - "updateTweet", - ManhattanTweetStorageClient.sanitizeTweetFields( - UpdateTweetHandler( - insert = localManhattanOperations.insert, - stats = statsReceiver, - ) - ), - clientIdHelper, - ) - - val softDelete: TweetStorageClient.SoftDelete = - endpoint( - "softDelete", - SoftDeleteHandler( - insert = localManhattanOperations.insert, - scribe = scribe - ) - ) - - val bounceDelete: BounceDelete = - endpoint( - "bounceDelete", - BounceDeleteHandler( - insert = localManhattanOperations.insert, - scribe = scribe - ) - ) - - val undelete: TweetStorageClient.Undelete = - endpoint( - "undelete", - UndeleteHandler( - read = localManhattanOperations.read, - localInsert = localManhattanOperations.insert, - remoteInsert = remoteManhattanOperations.insert, - delete = localManhattanOperations.delete, - undeleteWindowHours = config.undeleteWindowHours, - stats = statsReceiver - ) - ) - - val getDeletedTweets: TweetStorageClient.GetDeletedTweets = - endpoint( - "getDeletedTweets", - GetDeletedTweetsHandler( - read = localManhattanOperations.read, - stats = statsReceiver - ) - ) - - val deleteAdditionalFields: TweetStorageClient.DeleteAdditionalFields = - endpoint2( - "deleteAdditionalFields", - DeleteAdditionalFieldsHandler( - delete = localManhattanOperations.delete, - stats = statsReceiver, - ), - clientIdHelper, - ) - - val scrub: TweetStorageClient.Scrub = - endpoint2( - "scrub", - ScrubHandler( - insert = localManhattanOperations.insert, - delete = localManhattanOperations.delete, - scribe = scribe, - stats = statsReceiver, - ), - clientIdHelper, - ) - - val hardDeleteTweet: HardDeleteTweet = - endpoint( - "hardDeleteTweet", - HardDeleteTweetHandler( - read = localManhattanOperations.read, - insert = localManhattanOperations.insert, - delete = localManhattanOperations.delete, - scribe = scribe, - stats = statsReceiver - ) - ) - - val ping: TweetStorageClient.Ping = - () => - Stitch - .run( - localMhEndpoint - .get( - ManhattanOperations.KeyDescriptor - .withDataset(Config.Dataset) - .withPkey(Random.nextLong().abs) - .withLkey(TweetKey.LKey.CoreFieldsKey), // could be any lkey - ValueDescriptor(BufInjection) - ).unit - ) -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Response.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Response.docx new file mode 100644 index 000000000..94d19f56e Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Response.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Response.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Response.scala deleted file mode 100644 index 8444a7d96..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Response.scala +++ /dev/null @@ -1,30 +0,0 @@ -package com.twitter.tweetypie.storage - -object Response { - case class TweetResponse( - tweetId: Long, - overallResponse: TweetResponseCode, - additionalFieldResponses: Option[Map[Short, FieldResponse]] = None) - - sealed trait TweetResponseCode - - object TweetResponseCode { - object Success extends TweetResponseCode - object Partial extends TweetResponseCode - object Failure extends TweetResponseCode - object OverCapacity extends TweetResponseCode - object Deleted extends TweetResponseCode - } - - case class FieldResponse(code: FieldResponseCode, message: Option[String] = None) - - sealed trait FieldResponseCode - - object FieldResponseCode { - object Success extends FieldResponseCode - object InvalidRequest extends FieldResponseCode - object ValueNotFound extends FieldResponseCode - object Timeout extends FieldResponseCode - object Error extends FieldResponseCode - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Scribe.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Scribe.docx new file mode 100644 index 000000000..a81afd996 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Scribe.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Scribe.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Scribe.scala deleted file mode 100644 index 89b3e8efc..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Scribe.scala +++ /dev/null @@ -1,85 +0,0 @@ -package com.twitter.tweetypie.storage - -import com.twitter.servo.util.FutureEffect -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.logging._ -import com.twitter.scrooge.BinaryThriftStructSerializer -import com.twitter.servo.util.{Scribe => ServoScribe} -import com.twitter.tweetypie.storage_internal.thriftscala._ -import com.twitter.tbird.thriftscala.Added -import com.twitter.tbird.thriftscala.Removed -import com.twitter.tbird.thriftscala.Scrubbed -import com.twitter.util.Time - -/** - * Scribe is used to log tweet writes which are used to generate /tables/statuses in HDFS. - * - * Write Scribe Category Message - * ----- --------------- ------- - * add tbird_add_status [[com.twitter.tbird.thriftscala.Added]] - * remove tbird_remove_status [[com.twitter.tbird.thriftscala.Removed]] - * scrub tbird_scrub_status [[com.twitter.tbird.thriftscala.Scrubbed]] - * - * The thrift representation is encoded using binary thrift protocol format, followed by base64 - * encoding and converted to string using default character set (utf8). The logger uses BareFormatter. - * - * The thrift ops are scribed only after the write API call has succeeded. - * - * The class is thread safe except initial configuration and registration routines, - * and no exception is expected unless java heap is out of memory. - * - * If exception does get thrown, add/remove/scrub operations will fail and - * client will have to retry - */ -class Scribe(factory: Scribe.ScribeHandlerFactory, statsReceiver: StatsReceiver) { - import Scribe._ - - private val AddedSerializer = BinaryThriftStructSerializer(Added) - private val RemovedSerializer = BinaryThriftStructSerializer(Removed) - private val ScrubbedSerializer = BinaryThriftStructSerializer(Scrubbed) - - private val addCounter = statsReceiver.counter("scribe/add/count") - private val removeCounter = statsReceiver.counter("scribe/remove/count") - private val scrubCounter = statsReceiver.counter("scribe/scrub/count") - - val addHandler: FutureEffect[String] = ServoScribe(factory(scribeAddedCategory)()) - val removeHandler: FutureEffect[String] = ServoScribe(factory(scribeRemovedCategory)()) - val scrubHandler: FutureEffect[String] = ServoScribe(factory(scribeScrubbedCategory)()) - - private def addedToString(tweet: StoredTweet): String = - AddedSerializer.toString( - Added(StatusConversions.toTBirdStatus(tweet), Time.now.inMilliseconds, Some(false)) - ) - - private def removedToString(id: Long, at: Time, isSoftDeleted: Boolean): String = - RemovedSerializer.toString(Removed(id, at.inMilliseconds, Some(isSoftDeleted))) - - private def scrubbedToString(id: Long, cols: Seq[Int], at: Time): String = - ScrubbedSerializer.toString(Scrubbed(id, cols, at.inMilliseconds)) - - def logAdded(tweet: StoredTweet): Unit = { - addHandler(addedToString(tweet)) - addCounter.incr() - } - - def logRemoved(id: Long, at: Time, isSoftDeleted: Boolean): Unit = { - removeHandler(removedToString(id, at, isSoftDeleted)) - removeCounter.incr() - } - - def logScrubbed(id: Long, cols: Seq[Int], at: Time): Unit = { - scrubHandler(scrubbedToString(id, cols, at)) - scrubCounter.incr() - } -} - -object Scribe { - type ScribeHandlerFactory = (String) => HandlerFactory - - /** WARNING: These categories are white-listed. If you are changing them, the new categories should be white-listed. - * You should followup with CoreWorkflows team (CW) for that. - */ - private val scribeAddedCategory = "tbird_add_status" - private val scribeRemovedCategory = "tbird_remove_status" - private val scribeScrubbedCategory = "tbird_scrub_status" -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ScrubHandler.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ScrubHandler.docx new file mode 100644 index 000000000..75358d910 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ScrubHandler.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ScrubHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ScrubHandler.scala deleted file mode 100644 index 7bbae6251..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ScrubHandler.scala +++ /dev/null @@ -1,71 +0,0 @@ -package com.twitter.tweetypie.storage - -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.stitch.Stitch -import com.twitter.storage.client.manhattan.kv.ManhattanValue -import com.twitter.tweetypie.storage.TweetUtils._ -import com.twitter.util.Time - -/** - * Deletes data for the scrubbed field and writes a metadata record. - * Provides scrub functionality. Right now, we only allow the scrubbing of the geo field. - * It should be simple to add more fields to the allowlist if needed. - */ -object ScrubHandler { - - val scrubFieldsAllowlist: Set[Field] = Set(Field.Geo) - - def apply( - insert: ManhattanOperations.Insert, - delete: ManhattanOperations.Delete, - scribe: Scribe, - stats: StatsReceiver - ): TweetStorageClient.Scrub = - (unfilteredTweetIds: Seq[TweetId], columns: Seq[Field]) => { - val tweetIds = unfilteredTweetIds.filter(_ > 0) - - require(columns.nonEmpty, "Must specify fields to scrub") - require( - columns.toSet.size == columns.size, - s"Duplicate fields to scrub specified: $columns" - ) - require( - columns.forall(scrubFieldsAllowlist.contains(_)), - s"Cannot scrub $columns; scrubbable fields are restricted to $scrubFieldsAllowlist" - ) - - Stats.addWidthStat("scrub", "ids", tweetIds.size, stats) - val mhTimestamp = Time.now - - val stitches = tweetIds.map { tweetId => - val deletionStitches = columns.map { field => - val mhKeyToDelete = TweetKey.fieldKey(tweetId, field.id) - delete(mhKeyToDelete, Some(mhTimestamp)).liftToTry - } - - val collectedStitch = - Stitch.collect(deletionStitches).map(collectWithRateLimitCheck).lowerFromTry - - collectedStitch - .flatMap { _ => - val scrubbedStitches = columns.map { column => - val scrubbedKey = TweetKey.scrubbedFieldKey(tweetId, column.id) - val record = - TweetManhattanRecord( - scrubbedKey, - ManhattanValue(StringCodec.toByteBuffer(""), Some(mhTimestamp)) - ) - - insert(record).liftToTry - } - - Stitch.collect(scrubbedStitches) - } - .map(collectWithRateLimitCheck) - } - - Stitch.collect(stitches).map(collectWithRateLimitCheck).lowerFromTry.onSuccess { _ => - tweetIds.foreach { id => scribe.logScrubbed(id, columns.map(_.id.toInt), mhTimestamp) } - } - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/SoftDeleteHandler.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/SoftDeleteHandler.docx new file mode 100644 index 000000000..663dedd4c Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/SoftDeleteHandler.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/SoftDeleteHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/SoftDeleteHandler.scala deleted file mode 100644 index ea350ccb9..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/SoftDeleteHandler.scala +++ /dev/null @@ -1,20 +0,0 @@ -package com.twitter.tweetypie.storage - -import com.twitter.util.Time - -object SoftDeleteHandler { - def apply( - insert: ManhattanOperations.Insert, - scribe: Scribe - ): TweetStorageClient.SoftDelete = - tweetId => { - val mhTimestamp = Time.now - val softDeleteRecord = TweetStateRecord - .SoftDeleted(tweetId, mhTimestamp.inMillis) - .toTweetMhRecord - - insert(softDeleteRecord).onSuccess { _ => - scribe.logRemoved(tweetId, mhTimestamp, isSoftDeleted = true) - } - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Stats.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Stats.docx new file mode 100644 index 000000000..36a7fb116 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Stats.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Stats.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Stats.scala deleted file mode 100644 index 87d8b41a1..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Stats.scala +++ /dev/null @@ -1,33 +0,0 @@ -package com.twitter.tweetypie.storage - -import com.twitter.finagle.stats.StatsReceiver - -object Stats { - // These two methods below (addWidthStat and updatePerFieldQpsCounters) are called per RPC call for most APIs, - // so we rely on the stats receiver that is passed in to the library to do memoization. - - private[storage] def addWidthStat( - rpcName: String, - paramName: String, - width: Int, - stats: StatsReceiver - ): Unit = - getStat(rpcName, paramName, stats).add(width) - - // Updates the counters for each Additional field. The idea here is to expose the QPS for each - // additional field - private[storage] def updatePerFieldQpsCounters( - rpcName: String, - fieldIds: Seq[FieldId], - count: Int, - stats: StatsReceiver - ): Unit = { - fieldIds.foreach { fieldId => getCounter(rpcName, fieldId, stats).incr(count) } - } - - private def getCounter(rpcName: String, fieldId: FieldId, stats: StatsReceiver) = - stats.scope(rpcName, "fields", fieldId.toString).counter("count") - - private def getStat(rpcName: String, paramName: String, stats: StatsReceiver) = - stats.scope(rpcName, paramName).stat("width") -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StatusConversions.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StatusConversions.docx new file mode 100644 index 000000000..f078cebce Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StatusConversions.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StatusConversions.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StatusConversions.scala deleted file mode 100644 index 77dfed9ba..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StatusConversions.scala +++ /dev/null @@ -1,129 +0,0 @@ -package com.twitter.tweetypie.storage - -import com.twitter.tweetypie.storage_internal.thriftscala._ -import com.twitter.tbird.{thriftscala => tbird} - -object StatusConversions { - - /** - * This is used only in Scribe.scala, when scribing to tbird_add_status - * Once we remove that, we can also remove this. - */ - def toTBirdStatus(tweet: StoredTweet): tbird.Status = - tbird.Status( - id = tweet.id, - userId = tweet.userId.get, - text = tweet.text.get, - createdVia = tweet.createdVia.get, - createdAtSec = tweet.createdAtSec.get, - reply = tweet.reply.map(toTBirdReply), - share = tweet.share.map(toTBirdShare), - contributorId = tweet.contributorId, - geo = tweet.geo.map(toTBirdGeo), - hasTakedown = tweet.hasTakedown.getOrElse(false), - nsfwUser = tweet.nsfwUser.getOrElse(false), - nsfwAdmin = tweet.nsfwAdmin.getOrElse(false), - media = tweet.media.map(_.map(toTBirdMedia)).getOrElse(Seq()), - narrowcast = tweet.narrowcast.map(toTBirdNarrowcast), - nullcast = tweet.nullcast.getOrElse(false), - trackingId = tweet.trackingId - ) - - /** - * This is only used in a test, to verify that the above method `toTBirdStatus` - * works, so we can't remove it as long as the above method exists. - */ - def fromTBirdStatus(status: tbird.Status): StoredTweet = { - StoredTweet( - id = status.id, - userId = Some(status.userId), - text = Some(status.text), - createdVia = Some(status.createdVia), - createdAtSec = Some(status.createdAtSec), - reply = status.reply.map(fromTBirdReply), - share = status.share.map(fromTBirdShare), - contributorId = status.contributorId, - geo = status.geo.map(fromTBirdGeo), - hasTakedown = Some(status.hasTakedown), - nsfwUser = Some(status.nsfwUser), - nsfwAdmin = Some(status.nsfwAdmin), - media = Some(status.media.map(fromTBirdMedia)), - narrowcast = status.narrowcast.map(fromTBirdNarrowcast), - nullcast = Some(status.nullcast), - trackingId = status.trackingId - ) - } - - private def fromTBirdReply(reply: tbird.Reply): StoredReply = - StoredReply( - inReplyToStatusId = reply.inReplyToStatusId, - inReplyToUserId = reply.inReplyToUserId - ) - - private def fromTBirdShare(share: tbird.Share): StoredShare = - StoredShare( - sourceStatusId = share.sourceStatusId, - sourceUserId = share.sourceUserId, - parentStatusId = share.parentStatusId - ) - - private def fromTBirdGeo(geo: tbird.Geo): StoredGeo = - StoredGeo( - latitude = geo.latitude, - longitude = geo.longitude, - geoPrecision = geo.geoPrecision, - entityId = geo.entityId - ) - - private def fromTBirdMedia(media: tbird.MediaEntity): StoredMediaEntity = - StoredMediaEntity( - id = media.id, - mediaType = media.mediaType, - width = media.width, - height = media.height - ) - - private def fromTBirdNarrowcast(narrowcast: tbird.Narrowcast): StoredNarrowcast = - StoredNarrowcast( - language = Some(narrowcast.language), - location = Some(narrowcast.location), - ids = Some(narrowcast.ids) - ) - - private def toTBirdReply(reply: StoredReply): tbird.Reply = - tbird.Reply( - inReplyToStatusId = reply.inReplyToStatusId, - inReplyToUserId = reply.inReplyToUserId - ) - - private def toTBirdShare(share: StoredShare): tbird.Share = - tbird.Share( - sourceStatusId = share.sourceStatusId, - sourceUserId = share.sourceUserId, - parentStatusId = share.parentStatusId - ) - - private def toTBirdGeo(geo: StoredGeo): tbird.Geo = - tbird.Geo( - latitude = geo.latitude, - longitude = geo.longitude, - geoPrecision = geo.geoPrecision, - entityId = geo.entityId, - name = geo.name - ) - - private def toTBirdMedia(media: StoredMediaEntity): tbird.MediaEntity = - tbird.MediaEntity( - id = media.id, - mediaType = media.mediaType, - width = media.width, - height = media.height - ) - - private def toTBirdNarrowcast(narrowcast: StoredNarrowcast): tbird.Narrowcast = - tbird.Narrowcast( - language = narrowcast.language.getOrElse(Nil), - location = narrowcast.location.getOrElse(Nil), - ids = narrowcast.ids.getOrElse(Nil) - ) -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StorageConversions.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StorageConversions.docx new file mode 100644 index 000000000..c0d853b6e Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StorageConversions.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StorageConversions.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StorageConversions.scala deleted file mode 100644 index d424a8817..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StorageConversions.scala +++ /dev/null @@ -1,346 +0,0 @@ -package com.twitter.tweetypie.storage - -import com.twitter.mediaservices.commons.tweetmedia.thriftscala._ -import com.twitter.scrooge.TFieldBlob -import com.twitter.tweetypie.additionalfields.AdditionalFields -import com.twitter.tweetypie.storage_internal.thriftscala._ -import com.twitter.tweetypie.thriftscala._ -import com.twitter.tweetypie.util.TweetLenses - -object StorageConversions { - private val tbTweetCompiledAdditionalFieldIds = - StoredTweet.metaData.fields.map(_.id).filter(AdditionalFields.isAdditionalFieldId) - - def toStoredReply(reply: Reply, conversationId: Option[TweetId]): StoredReply = - StoredReply( - inReplyToStatusId = reply.inReplyToStatusId.getOrElse(0), - inReplyToUserId = reply.inReplyToUserId, - conversationId = conversationId - ) - - def toStoredShare(share: Share): StoredShare = - StoredShare( - share.sourceStatusId, - share.sourceUserId, - share.parentStatusId - ) - - def toStoredQuotedTweet(qt: QuotedTweet, text: String): Option[StoredQuotedTweet] = - qt.permalink - .filterNot { p => - text.contains(p.shortUrl) - } // omit StoredQuotedTweet when url already in text - .map { p => - StoredQuotedTweet( - qt.tweetId, - qt.userId, - p.shortUrl - ) - } - - def toStoredGeo(tweet: Tweet): Option[StoredGeo] = - TweetLenses.geoCoordinates.get(tweet) match { - case None => - TweetLenses.placeId.get(tweet) match { - case None => None - case Some(placeId) => - Some( - StoredGeo( - latitude = 0.0, - longitude = 0.0, - geoPrecision = 0, - entityId = 0, - name = Some(placeId) - ) - ) - } - case Some(coords) => - Some( - StoredGeo( - latitude = coords.latitude, - longitude = coords.longitude, - geoPrecision = coords.geoPrecision, - entityId = if (coords.display) 2 else 0, - name = TweetLenses.placeId.get(tweet) - ) - ) - } - - def toStoredMedia(mediaList: Seq[MediaEntity]): Seq[StoredMediaEntity] = - mediaList.filter(_.sourceStatusId.isEmpty).flatMap(toStoredMediaEntity) - - def toStoredMediaEntity(media: MediaEntity): Option[StoredMediaEntity] = - media.sizes.find(_.sizeType == MediaSizeType.Orig).map { origSize => - StoredMediaEntity( - id = media.mediaId, - mediaType = origSize.deprecatedContentType.value.toByte, - width = origSize.width.toShort, - height = origSize.height.toShort - ) - } - - // The language and ids fields are for compatibility with existing tweets stored in manhattan. - def toStoredNarrowcast(narrowcast: Narrowcast): StoredNarrowcast = - StoredNarrowcast( - language = Some(Seq.empty), - location = Some(narrowcast.location), - ids = Some(Seq.empty) - ) - - def toStoredAdditionalFields(from: Seq[TFieldBlob], to: StoredTweet): StoredTweet = - from.foldLeft(to) { case (t, f) => t.setField(f) } - - def toStoredAdditionalFields(from: Tweet, to: StoredTweet): StoredTweet = - toStoredAdditionalFields(AdditionalFields.additionalFields(from), to) - - def toStoredTweet(tweet: Tweet): StoredTweet = { - val storedTweet = - StoredTweet( - id = tweet.id, - userId = Some(TweetLenses.userId(tweet)), - text = Some(TweetLenses.text(tweet)), - createdVia = Some(TweetLenses.createdVia(tweet)), - createdAtSec = Some(TweetLenses.createdAt(tweet)), - reply = - TweetLenses.reply(tweet).map { r => toStoredReply(r, TweetLenses.conversationId(tweet)) }, - share = TweetLenses.share(tweet).map(toStoredShare), - contributorId = tweet.contributor.map(_.userId), - geo = toStoredGeo(tweet), - hasTakedown = Some(TweetLenses.hasTakedown(tweet)), - nsfwUser = Some(TweetLenses.nsfwUser(tweet)), - nsfwAdmin = Some(TweetLenses.nsfwAdmin(tweet)), - media = tweet.media.map(toStoredMedia), - narrowcast = TweetLenses.narrowcast(tweet).map(toStoredNarrowcast), - nullcast = Some(TweetLenses.nullcast(tweet)), - trackingId = TweetLenses.trackingId(tweet), - quotedTweet = TweetLenses.quotedTweet(tweet).flatMap { qt => - toStoredQuotedTweet(qt, TweetLenses.text(tweet)) - } - ) - toStoredAdditionalFields(tweet, storedTweet) - } - - /** - * Does not need core data to be set. Constructs on disk tweet by avoiding the TweetLenses object - * and only extracting the specified fields. - * - * NOTE: Assumes that specified fields are set in the tweet. - * - * @param tpTweet Tweetypie Tweet to be converted - * @param fields the fields to be populated in the on disk Tweet - * - * @return an on disk Tweet which has only the specified fields set - */ - def toStoredTweetForFields(tpTweet: Tweet, fields: Set[Field]): StoredTweet = { - - // Make sure all the passed in fields are known or additional fields - require( - (fields -- Field.AllUpdatableCompiledFields) - .forall(field => AdditionalFields.isAdditionalFieldId(field.id)) - ) - - val storedTweet = - StoredTweet( - id = tpTweet.id, - geo = if (fields.contains(Field.Geo)) { - tpTweet.coreData.get.coordinates match { - case None => - tpTweet.coreData.get.placeId match { - case None => None - case Some(placeId) => - Some( - StoredGeo( - latitude = 0.0, - longitude = 0.0, - geoPrecision = 0, - entityId = 0, - name = Some(placeId) - ) - ) - } - case Some(coords) => - Some( - StoredGeo( - latitude = coords.latitude, - longitude = coords.longitude, - geoPrecision = coords.geoPrecision, - entityId = if (coords.display) 2 else 0, - name = tpTweet.coreData.get.placeId - ) - ) - } - } else { - None - }, - hasTakedown = - if (fields.contains(Field.HasTakedown)) - Some(tpTweet.coreData.get.hasTakedown) - else - None, - nsfwUser = - if (fields.contains(Field.NsfwUser)) - Some(tpTweet.coreData.get.nsfwUser) - else - None, - nsfwAdmin = - if (fields.contains(Field.NsfwAdmin)) - Some(tpTweet.coreData.get.nsfwAdmin) - else - None - ) - - if (fields.map(_.id).exists(AdditionalFields.isAdditionalFieldId)) - toStoredAdditionalFields(tpTweet, storedTweet) - else - storedTweet - } - - def fromStoredReply(reply: StoredReply): Reply = - Reply( - Some(reply.inReplyToStatusId).filter(_ > 0), - reply.inReplyToUserId - ) - - def fromStoredShare(share: StoredShare): Share = - Share( - share.sourceStatusId, - share.sourceUserId, - share.parentStatusId - ) - - def fromStoredQuotedTweet(qt: StoredQuotedTweet): QuotedTweet = - QuotedTweet( - qt.tweetId, - qt.userId, - Some( - ShortenedUrl( - shortUrl = qt.shortUrl, - longUrl = "", // will be hydrated later via tweetypie's QuotedTweetRefUrlsHydrator - displayText = "" //will be hydrated later via tweetypie's QuotedTweetRefUrlsHydrator - ) - ) - ) - - def fromStoredGeo(geo: StoredGeo): GeoCoordinates = - GeoCoordinates( - latitude = geo.latitude, - longitude = geo.longitude, - geoPrecision = geo.geoPrecision, - display = geo.entityId == 2 - ) - - def fromStoredMediaEntity(media: StoredMediaEntity): MediaEntity = - MediaEntity( - fromIndex = -1, // will get filled in later - toIndex = -1, // will get filled in later - url = null, // will get filled in later - mediaPath = "", // field is obsolete - mediaUrl = null, // will get filled in later - mediaUrlHttps = null, // will get filled in later - displayUrl = null, // will get filled in later - expandedUrl = null, // will get filled in later - mediaId = media.id, - nsfw = false, - sizes = Set( - MediaSize( - sizeType = MediaSizeType.Orig, - resizeMethod = MediaResizeMethod.Fit, - deprecatedContentType = MediaContentType(media.mediaType), - width = media.width, - height = media.height - ) - ) - ) - - def fromStoredNarrowcast(narrowcast: StoredNarrowcast): Narrowcast = - Narrowcast( - location = narrowcast.location.getOrElse(Seq()) - ) - - def fromStoredTweet(storedTweet: StoredTweet): Tweet = { - val coreData = - TweetCoreData( - userId = storedTweet.userId.get, - text = storedTweet.text.get, - createdVia = storedTweet.createdVia.get, - createdAtSecs = storedTweet.createdAtSec.get, - reply = storedTweet.reply.map(fromStoredReply), - share = storedTweet.share.map(fromStoredShare), - hasTakedown = storedTweet.hasTakedown.getOrElse(false), - nsfwUser = storedTweet.nsfwUser.getOrElse(false), - nsfwAdmin = storedTweet.nsfwAdmin.getOrElse(false), - narrowcast = storedTweet.narrowcast.map(fromStoredNarrowcast), - nullcast = storedTweet.nullcast.getOrElse(false), - trackingId = storedTweet.trackingId, - conversationId = storedTweet.reply.flatMap(_.conversationId), - placeId = storedTweet.geo.flatMap(_.name), - coordinates = storedTweet.geo.map(fromStoredGeo), - hasMedia = if (storedTweet.media.exists(_.nonEmpty)) Some(true) else None - ) - - // retweets should never have their media, but some tweets incorrectly do. - val storedMedia = if (coreData.share.isDefined) Nil else storedTweet.media.toSeq - - val tpTweet = - Tweet( - id = storedTweet.id, - coreData = Some(coreData), - contributor = storedTweet.contributorId.map(Contributor(_)), - media = Some(storedMedia.flatten.map(fromStoredMediaEntity)), - mentions = Some(Seq.empty), - urls = Some(Seq.empty), - cashtags = Some(Seq.empty), - hashtags = Some(Seq.empty), - quotedTweet = storedTweet.quotedTweet.map(fromStoredQuotedTweet) - ) - fromStoredAdditionalFields(storedTweet, tpTweet) - } - - def fromStoredTweetAllowInvalid(storedTweet: StoredTweet): Tweet = { - fromStoredTweet( - storedTweet.copy( - userId = storedTweet.userId.orElse(Some(-1L)), - text = storedTweet.text.orElse(Some("")), - createdVia = storedTweet.createdVia.orElse(Some("")), - createdAtSec = storedTweet.createdAtSec.orElse(Some(-1L)) - )) - } - - def fromStoredAdditionalFields(from: StoredTweet, to: Tweet): Tweet = { - val passThroughAdditionalFields = - from._passthroughFields.filterKeys(AdditionalFields.isAdditionalFieldId) - val allAdditionalFields = - from.getFieldBlobs(tbTweetCompiledAdditionalFieldIds) ++ passThroughAdditionalFields - allAdditionalFields.values.foldLeft(to) { case (t, f) => t.setField(f) } - } - - def toDeletedTweet(storedTweet: StoredTweet): DeletedTweet = { - val noteTweetBlob = storedTweet.getFieldBlob(Tweet.NoteTweetField.id) - val noteTweetOption = noteTweetBlob.map(blob => NoteTweet.decode(blob.read)) - DeletedTweet( - id = storedTweet.id, - userId = storedTweet.userId, - text = storedTweet.text, - createdAtSecs = storedTweet.createdAtSec, - share = storedTweet.share.map(toDeletedShare), - media = storedTweet.media.map(_.map(toDeletedMediaEntity)), - noteTweetId = noteTweetOption.map(_.id), - isExpandable = noteTweetOption.flatMap(_.isExpandable) - ) - } - - def toDeletedShare(storedShare: StoredShare): DeletedTweetShare = - DeletedTweetShare( - sourceStatusId = storedShare.sourceStatusId, - sourceUserId = storedShare.sourceUserId, - parentStatusId = storedShare.parentStatusId - ) - - def toDeletedMediaEntity(storedMediaEntity: StoredMediaEntity): DeletedTweetMediaEntity = - DeletedTweetMediaEntity( - id = storedMediaEntity.id, - mediaType = storedMediaEntity.mediaType, - width = storedMediaEntity.width, - height = storedMediaEntity.height - ) -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TimestampDecoder.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TimestampDecoder.docx new file mode 100644 index 000000000..a145a7cba Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TimestampDecoder.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TimestampDecoder.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TimestampDecoder.scala deleted file mode 100644 index 52e907594..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TimestampDecoder.scala +++ /dev/null @@ -1,92 +0,0 @@ -package com.twitter.tweetypie.storage - -import com.twitter.util.Return -import com.twitter.util.Throw -import com.twitter.util.Time -import com.twitter.util.Try -import java.util.Arrays -import scala.util.control.NoStackTrace -import scala.util.control.NonFatal - -sealed abstract class TimestampType(val keyName: String) -object TimestampType { - object Default extends TimestampType("timestamp") - object SoftDelete extends TimestampType("softdelete_timestamp") -} - -/** - * TimestampDecoder gets the timestamps associated with state records. The Manhattan timestamp is - * used for legacy records (with value "1"), otherwise the timestamp is extracted from the - * JSON value. - * - * See "Metadata" in README.md for further information about state records. - */ -object TimestampDecoder { - case class UnparsableJson(msg: String, t: Throwable) extends Exception(msg, t) with NoStackTrace - case class MissingJsonTimestamp(msg: String) extends Exception(msg) with NoStackTrace - case class UnexpectedJsonValue(msg: String) extends Exception(msg) with NoStackTrace - case class MissingManhattanTimestamp(msg: String) extends Exception(msg) with NoStackTrace - - private[storage] val LegacyValue: Array[Byte] = Array('1') - - /** - * The first backfill of tweet data to Manhattan supplied timestamps in milliseconds where - * nanoseconds were expected. The result is that some values have an incorrect Manhattan - * timestamp. For these bad timestamps, time.inNanoseconds is actually milliseconds. - * - * For example, the deletion record for tweet 22225781 has Manhattan timestamp 1970-01-01 00:23:24 +0000. - * Contrast with the deletion record for tweet 435404491999813632 with Manhattan timestamp 2014-11-09 14:24:04 +0000 - * - * This threshold value comes from the last time in milliseconds that was interpreted - * as nanoseconds, e.g. Time.fromNanoseconds(1438387200000L) == 1970-01-01 00:23:58 +0000 - */ - private[storage] val BadTimestampThreshold = Time.at("1970-01-01 00:23:58 +0000") - - def decode(record: TweetManhattanRecord, tsType: TimestampType): Try[Long] = - decode(record.value, tsType) - - def decode(mhValue: TweetManhattanValue, tsType: TimestampType): Try[Long] = { - val value = ByteArrayCodec.fromByteBuffer(mhValue.contents) - if (isLegacyRecord(value)) { - nativeManhattanTimestamp(mhValue) - } else { - jsonTimestamp(value, tsType) - } - } - - private def isLegacyRecord(value: Array[Byte]) = Arrays.equals(value, LegacyValue) - - private def nativeManhattanTimestamp(mhValue: TweetManhattanValue): Try[Long] = - mhValue.timestamp match { - case Some(ts) => Return(correctedTimestamp(ts)) - case None => - Throw(MissingManhattanTimestamp(s"Manhattan timestamp missing in value $mhValue")) - } - - private def jsonTimestamp(value: Array[Byte], tsType: TimestampType): Try[Long] = - Try { Json.decode(value) } - .rescue { case NonFatal(e) => Throw(UnparsableJson(e.getMessage, e)) } - .flatMap { m => - m.get(tsType.keyName) match { - case Some(v) => - v match { - case l: Long => Return(l) - case i: Integer => Return(i.toLong) - case _ => - Throw( - UnexpectedJsonValue(s"Unexpected value for ${tsType.keyName} in record data $m") - ) - } - case None => - Throw(MissingJsonTimestamp(s"Missing key ${tsType.keyName} in record data $m")) - } - } - - def correctedTime(t: Time): Time = - if (t < BadTimestampThreshold) Time.fromMilliseconds(t.inNanoseconds) else t - - def correctedTime(t: Long): Time = correctedTime(Time.fromNanoseconds(t)) - - def correctedTimestamp(t: Time): Long = - if (t < BadTimestampThreshold) t.inNanoseconds else t.inMilliseconds -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetKey.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetKey.docx new file mode 100644 index 000000000..1cee29c2c Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetKey.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetKey.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetKey.scala deleted file mode 100644 index ed5d01141..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetKey.scala +++ /dev/null @@ -1,164 +0,0 @@ -package com.twitter.tweetypie.storage - -/** - * Responsible for encoding/decoding Tweet records to/from Manhattan keys - * - * K/V Scheme: - * ----------- - * [TweetId] - * /metadata - * /delete_state (a.k.a. hard delete) - * /soft_delete_state - * /bounce_delete_state - * /undelete_state - * /force_added_state - * /scrubbed_fields/ - * /[ScrubbedFieldId_1] - * .. - * /[ScrubbedFieldId_M] - * /fields - * /internal - * /1 - * /9 - * .. - * /99 - * /external - * /100 - * .. - * - * IMPORTANT NOTE: - * 1) Field Ids 2 to 8 in Tweet thrift struct are considered "core fields" are 'packed' together - * into a TFieldBlob and stored under field id 1 (i.e [DatasetName]/[TweetId]/fields/internal/1). - * This is why we do not see keys from [DatasetName]/[TweetId]/fields/internal/2 to [DatasetName]/ - * [TweetId]/fields/internal/8) - * - * 2) Also, the tweet id (which is the field id 1 in Tweet thrift structure) is not explicitly stored - * in Manhattan. There is no need to explicitly store it since it is a part of the Pkey - */ -case class TweetKey(tweetId: TweetId, lKey: TweetKey.LKey) { - override def toString: String = - s"/${ManhattanOperations.PkeyInjection(tweetId)}/${ManhattanOperations.LkeyInjection(lKey)}" -} - -object TweetKey { - // Manhattan uses lexicographical order for keys. To make sure lexicographical order matches the - // numerical order, we should pad both tweet id and field ids with leading zeros. - // Since tweet id is long and field id is a short, the max width of each can be obtained by doing - // Long.MaxValue.toString.length and Short.MaxValue.toString.length respectively - private val TweetIdFormatStr = s"%0${Long.MaxValue.toString.length}d" - private val FieldIdFormatStr = s"%0${Short.MaxValue.toString.length}d" - private[storage] def padTweetIdStr(tweetId: Long): String = TweetIdFormatStr.format(tweetId) - private[storage] def padFieldIdStr(fieldId: Short): String = FieldIdFormatStr.format(fieldId) - - def coreFieldsKey(tweetId: TweetId): TweetKey = TweetKey(tweetId, LKey.CoreFieldsKey) - def hardDeletionStateKey(tweetId: TweetId): TweetKey = - TweetKey(tweetId, LKey.HardDeletionStateKey) - def softDeletionStateKey(tweetId: TweetId): TweetKey = - TweetKey(tweetId, LKey.SoftDeletionStateKey) - def bounceDeletionStateKey(tweetId: TweetId): TweetKey = - TweetKey(tweetId, LKey.BounceDeletionStateKey) - def unDeletionStateKey(tweetId: TweetId): TweetKey = TweetKey(tweetId, LKey.UnDeletionStateKey) - def forceAddedStateKey(tweetId: TweetId): TweetKey = TweetKey(tweetId, LKey.ForceAddedStateKey) - def scrubbedGeoFieldKey(tweetId: TweetId): TweetKey = TweetKey(tweetId, LKey.ScrubbedGeoFieldKey) - def fieldKey(tweetId: TweetId, fieldId: FieldId): TweetKey = - TweetKey(tweetId, LKey.FieldKey(fieldId)) - def internalFieldsKey(tweetId: TweetId, fieldId: FieldId): TweetKey = - TweetKey(tweetId, LKey.InternalFieldsKey(fieldId)) - def additionalFieldsKey(tweetId: TweetId, fieldId: FieldId): TweetKey = - TweetKey(tweetId, LKey.AdditionalFieldsKey(fieldId)) - def scrubbedFieldKey(tweetId: TweetId, fieldId: FieldId): TweetKey = - TweetKey(tweetId, LKey.ScrubbedFieldKey(fieldId)) - - // AllFieldsKeyPrefix: fields - // CoreFieldsKey: fields/internal/1 (Stores subset of StoredTweet fields which are - // "packed" into a single CoreFields record) - // HardDeletionStateKey: metadata/delete_state - // SoftDeletionStateKey: metadata/soft_delete_state - // BounceDeletionStateKey: metadata/bounce_delete_state - // UnDeletionStateKey: metadata/undelete_state - // ForceAddedStateKey: metadata/force_added_state - // FieldKey: fields// (where - // is 'internal' for field ids < 100 and 'external' for all other - // fields ids) - // InternalFieldsKeyPrefix: fields/internal - // PKey: - // ScrubbedFieldKey: metadata/scrubbed_fields/ - // ScrubbedFieldKeyPrefix: metadata/scrubbed_fields - sealed abstract class LKey(override val toString: String) - object LKey { - private val HardDeletionRecordLiteral = "delete_state" - private val SoftDeletionRecordLiteral = "soft_delete_state" - private val BounceDeletionRecordLiteral = "bounce_delete_state" - private val UnDeletionRecordLiteral = "undelete_state" - private val ForceAddRecordLiteral = "force_added_state" - private val ScrubbedFieldsGroup = "scrubbed_fields" - private val InternalFieldsGroup = "internal" - private val ExternalFieldsGroup = "external" - private val MetadataCategory = "metadata" - private val FieldsCategory = "fields" - private val InternalFieldsKeyPrefix = s"$FieldsCategory/$InternalFieldsGroup/" - private val ExternalFieldsKeyPrefix = s"$FieldsCategory/$ExternalFieldsGroup/" - private val ScrubbedFieldsKeyPrefix = s"$MetadataCategory/$ScrubbedFieldsGroup/" - - sealed abstract class MetadataKey(metadataType: String) - extends LKey(s"$MetadataCategory/$metadataType") - sealed abstract class StateKey(stateType: String) extends MetadataKey(stateType) - case object HardDeletionStateKey extends StateKey(s"$HardDeletionRecordLiteral") - case object SoftDeletionStateKey extends StateKey(s"$SoftDeletionRecordLiteral") - case object BounceDeletionStateKey extends StateKey(s"$BounceDeletionRecordLiteral") - case object UnDeletionStateKey extends StateKey(s"$UnDeletionRecordLiteral") - case object ForceAddedStateKey extends StateKey(s"$ForceAddRecordLiteral") - - case class ScrubbedFieldKey(fieldId: FieldId) - extends MetadataKey(s"$ScrubbedFieldsGroup/${padFieldIdStr(fieldId)}") - val ScrubbedGeoFieldKey: LKey.ScrubbedFieldKey = ScrubbedFieldKey(TweetFields.geoFieldId) - - /** - * LKey that has one of many possible fields id. This generalize over - * internal and additional fields key. - */ - sealed abstract class FieldKey(prefix: String) extends LKey(toString) { - def fieldId: FieldId - override val toString: String = prefix + padFieldIdStr(fieldId) - } - object FieldKey { - def apply(fieldId: FieldId): FieldKey = - fieldId match { - case id if id < TweetFields.firstAdditionalFieldId => InternalFieldsKey(fieldId) - case _ => AdditionalFieldsKey(fieldId) - } - } - - case class InternalFieldsKey(fieldId: FieldId) extends FieldKey(InternalFieldsKeyPrefix) { - assert(fieldId < TweetFields.firstAdditionalFieldId) - } - case class AdditionalFieldsKey(fieldId: FieldId) extends FieldKey(ExternalFieldsKeyPrefix) { - assert(fieldId >= TweetFields.firstAdditionalFieldId) - } - val CoreFieldsKey: LKey.InternalFieldsKey = InternalFieldsKey(TweetFields.rootCoreFieldId) - - case class Unknown private (str: String) extends LKey(str) - - def fromString(str: String): LKey = { - def extractFieldId(prefix: String): FieldId = - str.slice(prefix.length, str.length).toShort - - str match { - case CoreFieldsKey.toString => CoreFieldsKey - case HardDeletionStateKey.toString => HardDeletionStateKey - case SoftDeletionStateKey.toString => SoftDeletionStateKey - case BounceDeletionStateKey.toString => BounceDeletionStateKey - case UnDeletionStateKey.toString => UnDeletionStateKey - case ForceAddedStateKey.toString => ForceAddedStateKey - case ScrubbedGeoFieldKey.toString => ScrubbedGeoFieldKey - case _ if str.startsWith(InternalFieldsKeyPrefix) => - InternalFieldsKey(extractFieldId(InternalFieldsKeyPrefix)) - case _ if str.startsWith(ExternalFieldsKeyPrefix) => - AdditionalFieldsKey(extractFieldId(ExternalFieldsKeyPrefix)) - case _ if str.startsWith(ScrubbedFieldsKeyPrefix) => - ScrubbedFieldKey(extractFieldId(ScrubbedFieldsKeyPrefix)) - case _ => Unknown(str) - } - } - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStateRecord.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStateRecord.docx new file mode 100644 index 000000000..50601ae0b Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStateRecord.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStateRecord.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStateRecord.scala deleted file mode 100644 index a5d31a62d..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStateRecord.scala +++ /dev/null @@ -1,90 +0,0 @@ -package com.twitter.tweetypie.storage - -import com.twitter.storage.client.manhattan.kv.ManhattanValue -import com.twitter.util.Time - -/** - * A [[TweetStateRecord]] represents an action taken on a tweet and can be used to determine a tweet's state. - * - * The state is determined by the record with the most recent timestamp. In the absence of any - * record a tweet is considered found, which is to say the tweet has not been through the - * deletion process. - * - * The [[TweetStateRecord]] type is determined by the lkey of a tweet manhattan record: - * metadata/delete_state -> HardDeleted - * metadata/soft_delete_state -> SoftDeleted - * metadata/undelete_state -> Undeleted - * metadata/force_added_state -> ForceAdded - * - * See the README in this directory for more details about the state of a tweet. - */ -sealed trait TweetStateRecord { - def tweetId: TweetId - def createdAt: Long - def stateKey: TweetKey.LKey.StateKey - def values: Map[String, Long] = Map("timestamp" -> createdAt) - def name: String - - def toTweetMhRecord: TweetManhattanRecord = { - val valByteBuffer = ByteArrayCodec.toByteBuffer(Json.encode(values)) - val value = ManhattanValue(valByteBuffer, Some(Time.fromMilliseconds(createdAt))) - TweetManhattanRecord(TweetKey(tweetId, stateKey), value) - } -} - -object TweetStateRecord { - - /** When a soft-deleted or bounce deleted tweet is ultimately hard-deleted by an offline job. */ - case class HardDeleted(tweetId: TweetId, createdAt: Long, deletedAt: Long) - extends TweetStateRecord { - // timestamp in the mh backend is the hard deletion timestamp - override def values = Map("timestamp" -> createdAt, "softdelete_timestamp" -> deletedAt) - def stateKey = TweetKey.LKey.HardDeletionStateKey - def name = "hard_deleted" - } - - /** When a tweet is deleted by the user. It can still be undeleted while in the soft deleted state. */ - case class SoftDeleted(tweetId: TweetId, createdAt: Long) extends TweetStateRecord { - def stateKey = TweetKey.LKey.SoftDeletionStateKey - def name = "soft_deleted" - } - - /** When a tweet is deleted by go/bouncer for violating Twitter Rules. It MAY NOT be undeleted. */ - case class BounceDeleted(tweetId: TweetId, createdAt: Long) extends TweetStateRecord { - def stateKey = TweetKey.LKey.BounceDeletionStateKey - def name = "bounce_deleted" - } - - /** When a tweet is undeleted by an internal system. */ - case class Undeleted(tweetId: TweetId, createdAt: Long) extends TweetStateRecord { - def stateKey = TweetKey.LKey.UnDeletionStateKey - def name = "undeleted" - } - - /** When a tweet is created using the forceAdd endpoint. */ - case class ForceAdded(tweetId: TweetId, createdAt: Long) extends TweetStateRecord { - def stateKey = TweetKey.LKey.ForceAddedStateKey - def name = "force_added" - } - - def fromTweetMhRecord(record: TweetManhattanRecord): Option[TweetStateRecord] = { - def ts = TimestampDecoder.decode(record, TimestampType.Default).getOrElse(0L) - def sdts = TimestampDecoder.decode(record, TimestampType.SoftDelete).getOrElse(0L) - def tweetId = record.pkey - - record.lkey match { - case TweetKey.LKey.HardDeletionStateKey => Some(HardDeleted(tweetId, ts, sdts)) - case TweetKey.LKey.SoftDeletionStateKey => Some(SoftDeleted(tweetId, ts)) - case TweetKey.LKey.BounceDeletionStateKey => Some(BounceDeleted(tweetId, ts)) - case TweetKey.LKey.UnDeletionStateKey => Some(Undeleted(tweetId, ts)) - case TweetKey.LKey.ForceAddedStateKey => Some(ForceAdded(tweetId, ts)) - case _ => None - } - } - - def fromTweetMhRecords(records: Seq[TweetManhattanRecord]): Seq[TweetStateRecord] = - records.flatMap(fromTweetMhRecord) - - def mostRecent(records: Seq[TweetManhattanRecord]): Option[TweetStateRecord] = - fromTweetMhRecords(records).sortBy(_.createdAt).lastOption -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageClient.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageClient.docx new file mode 100644 index 000000000..32143db05 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageClient.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageClient.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageClient.scala deleted file mode 100644 index 69023abc2..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageClient.scala +++ /dev/null @@ -1,201 +0,0 @@ -package com.twitter.tweetypie.storage - -import com.twitter.stitch.Stitch -import com.twitter.tweetypie.storage.Response.TweetResponse -import com.twitter.tweetypie.thriftscala.Tweet -import com.twitter.util.Future - -/** - * Interface for reading and writing tweet data in Manhattan - */ -trait TweetStorageClient { - import TweetStorageClient._ - def addTweet: AddTweet - def deleteAdditionalFields: DeleteAdditionalFields - def getTweet: GetTweet - def getStoredTweet: GetStoredTweet - def getDeletedTweets: GetDeletedTweets - def undelete: Undelete - def updateTweet: UpdateTweet - def scrub: Scrub - def softDelete: SoftDelete - def bounceDelete: BounceDelete - def hardDeleteTweet: HardDeleteTweet - def ping: Ping -} - -object TweetStorageClient { - type GetTweet = TweetId => Stitch[GetTweet.Response] - - object GetTweet { - sealed trait Response - object Response { - case class Found(tweet: Tweet) extends Response - object NotFound extends Response - object Deleted extends Response - // On BounceDeleted, provide the full Tweet so that implementations - // (i.e. ManhattanTweetStorageClient) don't not need to be aware of the specific tweet - // fields required by callers for proper processing of bounced deleted tweets. - case class BounceDeleted(tweet: Tweet) extends Response - } - } - - type GetStoredTweet = TweetId => Stitch[GetStoredTweet.Response] - - object GetStoredTweet { - sealed abstract class Error(val message: String) { - override def toString: String = message - } - object Error { - case object TweetIsCorrupt extends Error("stored tweet data is corrupt and cannot be decoded") - - case object ScrubbedFieldsPresent - extends Error("stored tweet fields that should be scrubbed are still present") - - case object TweetFieldsMissingOrInvalid - extends Error("expected tweet fields are missing or contain invalid values") - - case object TweetShouldBeHardDeleted - extends Error("stored tweet that should be hard deleted is still present") - } - - sealed trait Response - object Response { - sealed trait StoredTweetMetadata { - def state: Option[TweetStateRecord] - def allStates: Seq[TweetStateRecord] - def scrubbedFields: Set[FieldId] - } - - sealed trait StoredTweetErrors { - def errs: Seq[Error] - } - - /** - * Tweet data was found, possibly state records and/or scrubbed field records. - */ - sealed trait FoundAny extends Response with StoredTweetMetadata { - def tweet: Tweet - } - - object FoundAny { - def unapply( - response: Response - ): Option[ - (Tweet, Option[TweetStateRecord], Seq[TweetStateRecord], Set[FieldId], Seq[Error]) - ] = - response match { - case f: FoundWithErrors => - Some((f.tweet, f.state, f.allStates, f.scrubbedFields, f.errs)) - case f: FoundAny => Some((f.tweet, f.state, f.allStates, f.scrubbedFields, Seq.empty)) - case _ => None - } - } - - /** - * No records for this tweet id were found in storage - */ - case class NotFound(id: TweetId) extends Response - - /** - * Data related to the Tweet id was found but could not be loaded successfully. The - * errs array contains details of the problems. - */ - case class Failed( - id: TweetId, - state: Option[TweetStateRecord], - allStates: Seq[TweetStateRecord], - scrubbedFields: Set[FieldId], - errs: Seq[Error], - ) extends Response - with StoredTweetMetadata - with StoredTweetErrors - - /** - * No Tweet data was found, and the most recent state record found is HardDeleted - */ - case class HardDeleted( - id: TweetId, - state: Option[TweetStateRecord.HardDeleted], - allStates: Seq[TweetStateRecord], - scrubbedFields: Set[FieldId], - ) extends Response - with StoredTweetMetadata - - /** - * Tweet data was found, and the most recent state record found, if any, is not - * any form of deletion record. - */ - case class Found( - tweet: Tweet, - state: Option[TweetStateRecord], - allStates: Seq[TweetStateRecord], - scrubbedFields: Set[FieldId], - ) extends FoundAny - - /** - * Tweet data was found, and the most recent state record found indicates deletion. - */ - case class FoundDeleted( - tweet: Tweet, - state: Option[TweetStateRecord], - allStates: Seq[TweetStateRecord], - scrubbedFields: Set[FieldId], - ) extends FoundAny - - /** - * Tweet data was found, however errors were detected in the stored data. Required - * fields may be missing from the Tweet struct (e.g. CoreData), stored fields that - * should be scrubbed remain present, or Tweets that should be hard-deleted remain - * in storage. The errs array contains details of the problems. - */ - case class FoundWithErrors( - tweet: Tweet, - state: Option[TweetStateRecord], - allStates: Seq[TweetStateRecord], - scrubbedFields: Set[FieldId], - errs: Seq[Error], - ) extends FoundAny - with StoredTweetErrors - } - } - - type HardDeleteTweet = TweetId => Stitch[HardDeleteTweet.Response] - type SoftDelete = TweetId => Stitch[Unit] - type BounceDelete = TweetId => Stitch[Unit] - - object HardDeleteTweet { - sealed trait Response - object Response { - case class Deleted(deletedAtMillis: Option[Long], createdAtMillis: Option[Long]) - extends Response - case class NotDeleted(id: TweetId, ineligibleLKey: Option[TweetKey.LKey]) - extends Throwable - with Response - } - } - - type Undelete = TweetId => Stitch[Undelete.Response] - object Undelete { - case class Response( - code: UndeleteResponseCode, - tweet: Option[Tweet] = None, - createdAtMillis: Option[Long] = None, - archivedAtMillis: Option[Long] = None) - - sealed trait UndeleteResponseCode - - object UndeleteResponseCode { - object Success extends UndeleteResponseCode - object BackupNotFound extends UndeleteResponseCode - object NotCreated extends UndeleteResponseCode - } - } - - type AddTweet = Tweet => Stitch[Unit] - type UpdateTweet = (Tweet, Seq[Field]) => Stitch[TweetResponse] - type GetDeletedTweets = Seq[TweetId] => Stitch[Seq[DeletedTweetResponse]] - type DeleteAdditionalFields = (Seq[TweetId], Seq[Field]) => Stitch[Seq[TweetResponse]] - type Scrub = (Seq[TweetId], Seq[Field]) => Stitch[Unit] - type Ping = () => Future[Unit] -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageException.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageException.docx new file mode 100644 index 000000000..7eb516699 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageException.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageException.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageException.scala deleted file mode 100644 index 7f1bd6b1e..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageException.scala +++ /dev/null @@ -1,34 +0,0 @@ -package com.twitter.tweetypie.storage - -import scala.util.control.NoStackTrace - -sealed abstract class TweetStorageException(message: String, cause: Throwable) - extends Exception(message, cause) - -/** - * The request was not properly formed and failed an assertion present in the code. Should not be - * retried without modification. - */ -case class ClientError(message: String, cause: Throwable) - extends TweetStorageException(message, cause) - with NoStackTrace - -/** - * Request was rejected by Manhattan or the in-process rate limiter. Should not be retried. - */ -case class RateLimited(message: String, cause: Throwable) - extends TweetStorageException(message, cause) - with NoStackTrace - -/** - * Corrupt tweets were requested from Manhattan - */ -case class VersionMismatchError(message: String, cause: Throwable = null) - extends TweetStorageException(message, cause) - with NoStackTrace - -/** - * All other unhandled exceptions. - */ -case class InternalError(message: String, cause: Throwable = null) - extends TweetStorageException(message, cause) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetUtils.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetUtils.docx new file mode 100644 index 000000000..e5a8e5071 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetUtils.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetUtils.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetUtils.scala deleted file mode 100644 index b10ef107d..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetUtils.scala +++ /dev/null @@ -1,265 +0,0 @@ -package com.twitter.tweetypie.storage - -import com.twitter.logging.Logger -import com.twitter.scrooge.TFieldBlob -import com.twitter.snowflake.id.SnowflakeId -import com.twitter.storage.client.manhattan.kv.DeniedManhattanException -import com.twitter.storage.client.manhattan.kv.ManhattanException -import com.twitter.tweetypie.storage.Response._ -import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet -import com.twitter.util.Return -import com.twitter.util.Throw -import com.twitter.util.Try - -object TweetUtils { - val log: Logger = Logger("com.twitter.tweetypie.storage.TweetStorageLibrary") - import FieldResponseCodec.ValueNotFoundException - - /** - * It's rare, but we have seen tweets with userId=0, which is likely the result of a - * failed/partial delete. Treat these as invalid tweets, which are returned to callers - * as not found. - */ - def isValid(tweet: StoredTweet): Boolean = - tweet.userId.exists(_ != 0) && tweet.text.nonEmpty && - tweet.createdVia.nonEmpty && tweet.createdAtSec.nonEmpty - - /** - * Helper function to extract Scrubbed field Ids from the result returned by reading entire tweet prefix - * function. - * - * @param records The sequence of MH records for the given tweetId - * - * @return The set of scrubbed field ids - */ - private[tweetypie] def extractScrubbedFields(records: Seq[TweetManhattanRecord]): Set[Short] = - records - .map(r => r.lkey) - .collect { case TweetKey.LKey.ScrubbedFieldKey(fieldId) => fieldId } - .toSet - - private[tweetypie] val expectedFields = - TweetFields.requiredFieldIds.toSet - TweetFields.tweetIdField - - /** - * Find the timestamp from a tweetId and a list of MH records. This is used when - * you need a timestamp and you aren't sure that tweetId is a snowflake id. - * - * @param tweetId A tweetId you want the timestamp for. - * @param records Tbird_mh records keyed on tweetId, one of which should be the - * core fields record. - * @return A milliseconds timestamp if one could be found. - */ - private[tweetypie] def creationTimeFromTweetIdOrMHRecords( - tweetId: Long, - records: Seq[TweetManhattanRecord] - ): Option[Long] = - SnowflakeId - .unixTimeMillisOptFromId(tweetId).orElse({ - records - .find(_.lkey == TweetKey.LKey.CoreFieldsKey) - .flatMap { coreFields => - CoreFieldsCodec - .fromTFieldBlob( - TFieldBlobCodec.fromByteBuffer(coreFields.value.contents) - ).createdAtSec.map(seconds => seconds * 1000) - } - }) - - /** - * Helper function used to parse manhattan results for fields in a tweet (given in the form of - * Sequence of (FieldKey, Try[Unit]) pairs) and build a TweetResponse object. - * - * @param callerName The name of the caller function. Used for error messages - * @param tweetId Id of the Tweet for which TweetResponse is being built - * @param fieldResults Sequence of (FieldKey, Try[Unit]). - * - * @return TweetResponse object - */ - private[tweetypie] def buildTweetResponse( - callerName: String, - tweetId: Long, - fieldResults: Map[FieldId, Try[Unit]] - ): TweetResponse = { - // Count Found/Not Found - val successCount = - fieldResults.foldLeft(0) { - case (count, (_, Return(_))) => count + 1 - case (count, (_, Throw(_: ValueNotFoundException))) => count + 1 - case (count, _) => count - } - - val fieldResponsesMap = getFieldResponses(callerName, tweetId, fieldResults) - - val overallCode = if (successCount > 0 && successCount == fieldResults.size) { - TweetResponseCode.Success - } else { - - // If any field was rate limited, then we consider the entire tweet to be rate limited. So first we scan - // the field results to check such an occurrence. - val wasRateLimited = fieldResults.exists { fieldResult => - fieldResult._2 match { - case Throw(e: DeniedManhattanException) => true - case _ => false - } - } - - // Were we rate limited for any of the additional fields? - if (wasRateLimited) { - TweetResponseCode.OverCapacity - } else if (successCount == 0) { - // successCount is < fieldResults.size at this point. So if allOrNone is true or - // if successCount == 0 (i.e failed on all Fields), the overall code should be 'Failure' - TweetResponseCode.Failure - } else { - // allOrNone == false AND successCount > 0 at this point. Clearly the overallCode should be Partial - TweetResponseCode.Partial - } - } - - TweetResponse(tweetId, overallCode, Some(fieldResponsesMap)) - - } - - /** - * Helper function to convert manhattan results into a Map[FieldId, FieldResponse] - * - * @param fieldResults Sequence of (TweetKey, TFieldBlob). - */ - private[tweetypie] def getFieldResponses( - callerName: String, - tweetId: TweetId, - fieldResults: Map[FieldId, Try[_]] - ): Map[FieldId, FieldResponse] = - fieldResults.map { - case (fieldId, resp) => - def keyStr = TweetKey.fieldKey(tweetId, fieldId).toString - resp match { - case Return(_) => - fieldId -> FieldResponse(FieldResponseCode.Success, None) - case Throw(mhException: ManhattanException) => - val errMsg = s"Exception in $callerName. Key: $keyStr. Error: $mhException" - mhException match { - case _: ValueNotFoundException => // ValueNotFound is not an error - case _ => log.error(errMsg) - } - fieldId -> FieldResponseCodec.fromThrowable(mhException, Some(errMsg)) - case Throw(e) => - val errMsg = s"Exception in $callerName. Key: $keyStr. Error: $e" - log.error(errMsg) - fieldId -> FieldResponse(FieldResponseCode.Error, Some(errMsg)) - } - } - - /** - * Helper function to build a TweetResponse object when being rate limited. Its possible that only some of the fields - * got rate limited, so we indicate which fields got processed successfully, and which encountered some sort of error. - * - * @param tweetId Tweet id - * @param callerName name of API calling this function - * @param fieldResponses field responses for the case where - * - * @return The TweetResponse object - */ - private[tweetypie] def buildTweetOverCapacityResponse( - callerName: String, - tweetId: Long, - fieldResponses: Map[FieldId, Try[Unit]] - ) = { - val fieldResponsesMap = getFieldResponses(callerName, tweetId, fieldResponses) - TweetResponse(tweetId, TweetResponseCode.OverCapacity, Some(fieldResponsesMap)) - } - - /** - * Build a StoredTweet from a Seq of records. Core fields are handled specially. - */ - private[tweetypie] def buildStoredTweet( - tweetId: TweetId, - records: Seq[TweetManhattanRecord], - includeScrubbed: Boolean = false, - ): StoredTweet = { - getStoredTweetBlobs(records, includeScrubbed) - .flatMap { fieldBlob => - // When fieldId == TweetFields.rootCoreFieldId, we have further work to do since the - // 'value' is really serialized/packed version of all core fields. In this case we'll have - // to unpack it into many TFieldBlobs. - if (fieldBlob.id == TweetFields.rootCoreFieldId) { - // We won't throw any error in this function and instead let the caller function handle this - // condition (i.e If the caller function does not find any values for the core-fields in - // the returned map, it should assume that the tweet is not found) - CoreFieldsCodec.unpackFields(fieldBlob).values.toSeq - } else { - Seq(fieldBlob) - } - }.foldLeft(StoredTweet(tweetId))(_.setField(_)) - } - - private[tweetypie] def buildValidStoredTweet( - tweetId: TweetId, - records: Seq[TweetManhattanRecord] - ): Option[StoredTweet] = { - val storedTweet = buildStoredTweet(tweetId, records) - if (storedTweet.getFieldBlobs(expectedFields).nonEmpty && isValid(storedTweet)) { - Some(storedTweet) - } else { - None - } - } - - /** - * Return a TFieldBlob for each StoredTweet field defined in this set of records. - * @param includeScrubbed when false, result will not include scrubbed fields even - * if the data is present in the set of records. - */ - private[tweetypie] def getStoredTweetBlobs( - records: Seq[TweetManhattanRecord], - includeScrubbed: Boolean = false, - ): Seq[TFieldBlob] = { - val scrubbed = extractScrubbedFields(records) - - records - .flatMap { r => - // extract LKey.FieldKey records if they are not scrubbed and get their TFieldBlobs - r.key match { - case fullKey @ TweetKey(_, key: TweetKey.LKey.FieldKey) - if includeScrubbed || !scrubbed.contains(key.fieldId) => - try { - val fieldBlob = TFieldBlobCodec.fromByteBuffer(r.value.contents) - if (fieldBlob.field.id != key.fieldId) { - throw new AssertionError( - s"Blob stored for $fullKey has unexpected id ${fieldBlob.field.id}" - ) - } - Some(fieldBlob) - } catch { - case e: VersionMismatchError => - log.error( - s"Failed to decode bytebuffer for $fullKey: ${e.getMessage}" - ) - throw e - } - case _ => None - } - } - } - - /** - * Its important to bubble up rate limiting exceptions as they would likely be the root cause for other issues - * (timeouts etc.), so we scan for this particular exception, and if found, we bubble that up specifically - * - * @param seqOfTries The sequence of tries which may contain within it a rate limit exception - * - * @return if a rate limiting exn was detected, this will be a Throw(e: DeniedManhattanException) - * otherwise it will be a Return(_) only if all individual tries succeeded - */ - private[tweetypie] def collectWithRateLimitCheck(seqOfTries: Seq[Try[Unit]]): Try[Unit] = { - val rateLimitThrowOpt = seqOfTries.find { - case Throw(e: DeniedManhattanException) => true - case _ => false - } - - rateLimitThrowOpt.getOrElse( - Try.collect(seqOfTries).map(_ => ()) - ) // Operation is considered successful only if all the deletions are successful - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UndeleteHandler.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UndeleteHandler.docx new file mode 100644 index 000000000..db55e2f56 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UndeleteHandler.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UndeleteHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UndeleteHandler.scala deleted file mode 100644 index f0e14eb9d..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UndeleteHandler.scala +++ /dev/null @@ -1,106 +0,0 @@ -package com.twitter.tweetypie.storage - -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.stitch.Stitch -import com.twitter.tweetypie.storage.TweetStorageClient.Undelete -import com.twitter.tweetypie.storage.TweetUtils._ -import com.twitter.util.Time - -object UndeleteHandler { - def apply( - read: ManhattanOperations.Read, - localInsert: ManhattanOperations.Insert, - remoteInsert: ManhattanOperations.Insert, - delete: ManhattanOperations.Delete, - undeleteWindowHours: Int, - stats: StatsReceiver - ): Undelete = { - def withinUndeleteWindow(timestampMs: Long) = - (Time.now - Time.fromMilliseconds(timestampMs)).inHours < undeleteWindowHours - - def prepareUndelete( - tweetId: TweetId, - records: Seq[TweetManhattanRecord] - ): (Undelete.Response, Option[TweetManhattanRecord]) = { - val undeleteRecord = - Some(TweetStateRecord.Undeleted(tweetId, Time.now.inMillis).toTweetMhRecord) - - TweetStateRecord.mostRecent(records) match { - // check if we need to undo a soft deletion - case Some(TweetStateRecord.SoftDeleted(_, createdAt)) => - if (createdAt > 0) { - if (withinUndeleteWindow(createdAt)) { - ( - mkSuccessfulUndeleteResponse(tweetId, records, Some(createdAt)), - undeleteRecord - ) - } else { - (Undelete.Response(Undelete.UndeleteResponseCode.BackupNotFound), None) - } - } else { - throw InternalError(s"Timestamp unavailable for $tweetId") - } - - // BounceDeleted tweets may not be undeleted. see go/bouncedtweet - case Some(_: TweetStateRecord.HardDeleted | _: TweetStateRecord.BounceDeleted) => - (Undelete.Response(Undelete.UndeleteResponseCode.BackupNotFound), None) - - case Some(_: TweetStateRecord.Undeleted) => - // We still want to write the undelete record, because at this point, we only know that the local DC's - // winning record is not a soft/hard deletion record, while its possible that the remote DC's winning - // record might still be a soft deletion record. Having said that, we don't want to set it to true - // if the winning record is forceAdd, as the forceAdd call should have ensured that both DCs had the - // forceAdd record. - (mkSuccessfulUndeleteResponse(tweetId, records), undeleteRecord) - - case Some(_: TweetStateRecord.ForceAdded) => - (mkSuccessfulUndeleteResponse(tweetId, records), None) - - // lets write the undeletion record just in case there is a softdeletion record in flight - case None => (mkSuccessfulUndeleteResponse(tweetId, records), undeleteRecord) - } - } - - // Write the undelete record both locally and remotely to protect - // against races with hard delete replication. We only need this - // protection for the insertion of the undelete record. - def multiInsert(record: TweetManhattanRecord): Stitch[Unit] = - Stitch - .collect( - Seq( - localInsert(record).liftToTry, - remoteInsert(record).liftToTry - ) - ) - .map(collectWithRateLimitCheck) - .lowerFromTry - - def deleteSoftDeleteRecord(tweetId: TweetId): Stitch[Unit] = { - val mhKey = TweetKey.softDeletionStateKey(tweetId) - delete(mhKey, None) - } - - tweetId => - for { - records <- read(tweetId) - (response, undeleteRecord) = prepareUndelete(tweetId, records) - _ <- Stitch.collect(undeleteRecord.map(multiInsert)).unit - _ <- deleteSoftDeleteRecord(tweetId) - } yield { - response - } - } - - private[storage] def mkSuccessfulUndeleteResponse( - tweetId: TweetId, - records: Seq[TweetManhattanRecord], - timestampOpt: Option[Long] = None - ) = - Undelete.Response( - Undelete.UndeleteResponseCode.Success, - Some( - StorageConversions.fromStoredTweet(buildStoredTweet(tweetId, records)) - ), - archivedAtMillis = timestampOpt - ) -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UpdateTweetHandler.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UpdateTweetHandler.docx new file mode 100644 index 000000000..97bd16e6e Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UpdateTweetHandler.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UpdateTweetHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UpdateTweetHandler.scala deleted file mode 100644 index 7bf68f6ef..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UpdateTweetHandler.scala +++ /dev/null @@ -1,64 +0,0 @@ -package com.twitter.tweetypie.storage - -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.stitch.Stitch -import com.twitter.storage.client.manhattan.kv.DeniedManhattanException -import com.twitter.storage.client.manhattan.kv.ManhattanValue -import com.twitter.tweetypie.storage.TweetUtils._ -import com.twitter.tweetypie.thriftscala.Tweet -import com.twitter.util.Throw -import com.twitter.util.Time - -object UpdateTweetHandler { - def apply( - insert: ManhattanOperations.Insert, - stats: StatsReceiver - ): TweetStorageClient.UpdateTweet = { (tpTweet: Tweet, fields: Seq[Field]) => - require( - fields.forall(!TweetFields.coreFieldIds.contains(_)), - "Core fields cannot be modified by calling updateTweet; use addTweet instead." - ) - require( - areAllFieldsDefined(tpTweet, fields), - s"Input tweet $tpTweet does not have specified fields $fields set" - ) - - val now = Time.now - val storedTweet = StorageConversions.toStoredTweetForFields(tpTweet, fields.toSet) - val tweetId = storedTweet.id - Stats.updatePerFieldQpsCounters("updateTweet", fields.map(_.id), 1, stats) - - val (fieldIds, stitchesPerTweet) = - fields.map { field => - val fieldId = field.id - val tweetKey = TweetKey.fieldKey(tweetId, fieldId) - val blob = storedTweet.getFieldBlob(fieldId).get - val value = ManhattanValue(TFieldBlobCodec.toByteBuffer(blob), Some(now)) - val record = TweetManhattanRecord(tweetKey, value) - - (fieldId, insert(record).liftToTry) - }.unzip - - Stitch.collect(stitchesPerTweet).map { seqOfTries => - val fieldkeyAndMhResults = fieldIds.zip(seqOfTries).toMap - // If even a single field was rate limited, we will send an overall OverCapacity TweetResponse - val wasRateLimited = fieldkeyAndMhResults.exists { keyAndResult => - keyAndResult._2 match { - case Throw(e: DeniedManhattanException) => true - case _ => false - } - } - - if (wasRateLimited) { - buildTweetOverCapacityResponse("updateTweets", tweetId, fieldkeyAndMhResults) - } else { - buildTweetResponse("updateTweets", tweetId, fieldkeyAndMhResults) - } - } - } - - private def areAllFieldsDefined(tpTweet: Tweet, fields: Seq[Field]) = { - val storedTweet = StorageConversions.toStoredTweetForFields(tpTweet, fields.toSet) - fields.map(_.id).forall(storedTweet.getFieldBlob(_).isDefined) - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/package.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/package.docx new file mode 100644 index 000000000..6dfaf816a Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/package.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/package.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/package.scala deleted file mode 100644 index 57a02248b..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/package.scala +++ /dev/null @@ -1,11 +0,0 @@ -package com.twitter.tweetypie - -import com.twitter.storage.client.manhattan.kv.ManhattanValue -import java.nio.ByteBuffer - -package object storage { - type TweetId = Long - type FieldId = Short - - type TweetManhattanValue = ManhattanValue[ByteBuffer] -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/BUILD deleted file mode 100644 index e93c3b2ba..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/BUILD +++ /dev/null @@ -1,20 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "finagle/finagle-core/src/main", - "flock-client/src/main/scala", - "flock-client/src/main/thrift:thrift-scala", - "tweetypie/servo/util/src/main/scala", - "snowflake:id", - "src/thrift/com/twitter/gizmoduck:thrift-scala", - "src/thrift/com/twitter/servo:servo-exception-java", - "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", - "tweetypie/server/src/main/scala/com/twitter/tweetypie", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", - "tweetypie/common/src/scala/com/twitter/tweetypie/util", - "util/util-core:scala", - ], -) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/BUILD.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/BUILD.docx new file mode 100644 index 000000000..d4d9bbb01 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/BUILD.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TFlockIndexer.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TFlockIndexer.docx new file mode 100644 index 000000000..86f4742e7 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TFlockIndexer.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TFlockIndexer.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TFlockIndexer.scala deleted file mode 100644 index 046ff226a..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TFlockIndexer.scala +++ /dev/null @@ -1,532 +0,0 @@ -/** Copyright 2010 Twitter, Inc. */ -package com.twitter.tweetypie -package tflock - -import com.twitter.finagle.stats.Counter -import com.twitter.flockdb.client._ -import com.twitter.flockdb.client.thriftscala.Priority -import com.twitter.snowflake.id.SnowflakeId -import com.twitter.tweetypie.serverutil.StoredCard -import com.twitter.tweetypie.thriftscala._ -import com.twitter.util.Future -import scala.collection.mutable.ListBuffer - -object TFlockIndexer { - - /** - * Printable names for some edge types currently defined in [[com.twitter.flockdb.client]]. - * Used to defined stats counters for adding edges. - */ - val graphNames: Map[Int, String] = - Map( - CardTweetsGraph.id -> "card_tweets", - ConversationGraph.id -> "conversation", - DirectedAtUserIdGraph.id -> "directed_at_user_id", - InvitedUsersGraph.id -> "invited_users", - MediaTimelineGraph.id -> "media_timeline", - MentionsGraph.id -> "mentions", - NarrowcastSentTweetsGraph.id -> "narrowcast_sent_tweets", - NullcastedTweetsGraph.id -> "nullcasted_tweets", - QuotersGraph.id -> "quoters", - QuotesGraph.id -> "quotes", - QuoteTweetsIndexGraph.id -> "quote_tweets_index", - RepliesToTweetsGraph.id -> "replies_to_tweets", - RetweetsByMeGraph.id -> "retweets_by_me", - RetweetsGraph.id -> "retweets", - RetweetsOfMeGraph.id -> "retweets_of_me", - RetweetSourceGraph.id -> "retweet_source", - TweetsRetweetedGraph.id -> "tweets_retweeted", - UserTimelineGraph.id -> "user_timeline", - CreatorSubscriptionTimelineGraph.id -> "creator_subscription_timeline", - CreatorSubscriptionMediaTimelineGraph.id -> "creator_subscription_image_timeline", - ) - - /** - * On edge deletion, edges are either archived permanently or retained for 3 months, based on - * the retention policy in the above confluence page. - * - * These two retention policies correspond to the two deletion techniques: archive and remove. - * We call removeEdges for edges with a short retention policy and archiveEdges for edges with - * a permanent retention policy. - */ - val graphsWithRemovedEdges: Seq[Int] = - Seq( - CardTweetsGraph.id, - CuratedTimelineGraph.id, - CuratedTweetsGraph.id, - DirectedAtUserIdGraph.id, - MediaTimelineGraph.id, - MutedConversationsGraph.id, - QuotersGraph.id, - QuotesGraph.id, - QuoteTweetsIndexGraph.id, - ReportedTweetsGraph.id, - RetweetsOfMeGraph.id, - RetweetSourceGraph.id, - SoftLikesGraph.id, - TweetsRetweetedGraph.id, - CreatorSubscriptionTimelineGraph.id, - CreatorSubscriptionMediaTimelineGraph.id, - ) - - /** - * These edges should be left in place when bounced tweets are deleted. - * These edges are removed during hard deletion. - * - * This is done so external teams (timelines) can execute on these edges for - * tombstone feature. - */ - val bounceDeleteGraphIds: Set[Int] = - Set( - UserTimelineGraph.id, - ConversationGraph.id - ) - - def makeCounters(stats: StatsReceiver, operation: String): Map[Int, Counter] = { - TFlockIndexer.graphNames - .mapValues(stats.scope(_).counter(operation)) - .withDefaultValue(stats.scope("unknown").counter(operation)) - } -} - -/** - * @param backgroundIndexingPriority specifies the queue to use for - * background indexing operations. This is useful for making the - * effects of background indexing operations (such as deleting edges - * for deleted Tweets) available sooner in testing scenarios - * (end-to-end tests or development instances). It is set to - * Priority.Low in production to reduce the load on high priority - * queues that we use for prominently user-visible operations. - */ -class TFlockIndexer( - tflock: TFlockClient, - hasMedia: Tweet => Boolean, - backgroundIndexingPriority: Priority, - stats: StatsReceiver) - extends TweetIndexer { - private[this] val FutureNil = Future.Nil - - private[this] val archiveCounters = TFlockIndexer.makeCounters(stats, "archive") - private[this] val removeCounters = TFlockIndexer.makeCounters(stats, "remove") - private[this] val insertCounters = TFlockIndexer.makeCounters(stats, "insert") - private[this] val negateCounters = TFlockIndexer.makeCounters(stats, "negate") - - private[this] val foregroundIndexingPriority: Priority = Priority.High - - override def createIndex(tweet: Tweet): Future[Unit] = - createEdges(tweet, isUndelete = false) - - override def undeleteIndex(tweet: Tweet): Future[Unit] = - createEdges(tweet, isUndelete = true) - - private[this] case class PartitionedEdges( - longRetention: Seq[ExecuteEdge[StatusGraph]] = Nil, - shortRetention: Seq[ExecuteEdge[StatusGraph]] = Nil, - negate: Seq[ExecuteEdge[StatusGraph]] = Nil, - ignore: Seq[ExecuteEdge[StatusGraph]] = Nil) - - private[this] def partitionEdgesForDelete( - edges: Seq[ExecuteEdge[StatusGraph]], - isBounceDelete: Boolean - ) = - edges.foldLeft(PartitionedEdges()) { - // Two dependees of UserTimelineGraph edge states to satisfy: timelines & safety tools. - // Timelines show bounce-deleted tweets as tombstones; regular deletes are not shown. - // - i.e. timelineIds = UserTimelineGraph(Normal || Negative) - // Safety tools show deleted tweets to authorized internal review agents - // - i.e. deletedIds = UserTimelineGraph(Removed || Negative) - case (partitionedEdges, edge) if isBounceDelete && edge.graphId == UserTimelineGraph.id => - partitionedEdges.copy(negate = edge +: partitionedEdges.negate) - - case (partitionedEdges, edge) if isBounceDelete && edge.graphId == ConversationGraph.id => - // Bounce-deleted tweets remain rendered as tombstones in conversations, so do not modify - // the ConversationGraph edge state - partitionedEdges.copy(ignore = edge +: partitionedEdges.ignore) - - case (partitionedEdges, edge) - if TFlockIndexer.graphsWithRemovedEdges.contains(edge.graphId) => - partitionedEdges.copy(shortRetention = edge +: partitionedEdges.shortRetention) - - case (partitionedEdges, edge) => - partitionedEdges.copy(longRetention = edge +: partitionedEdges.longRetention) - } - - override def deleteIndex(tweet: Tweet, isBounceDelete: Boolean): Future[Unit] = - for { - edges <- getEdges(tweet, isCreate = false, isDelete = true, isUndelete = false) - partitionedEdges = partitionEdgesForDelete(edges, isBounceDelete) - () <- - Future - .join( - tflock - .archiveEdges(partitionedEdges.longRetention, backgroundIndexingPriority) - .onSuccess(_ => - partitionedEdges.longRetention.foreach(e => archiveCounters(e.graphId).incr())), - tflock - .removeEdges(partitionedEdges.shortRetention, backgroundIndexingPriority) - .onSuccess(_ => - partitionedEdges.shortRetention.foreach(e => removeCounters(e.graphId).incr())), - tflock - .negateEdges(partitionedEdges.negate, backgroundIndexingPriority) - .onSuccess(_ => - partitionedEdges.negate.foreach(e => negateCounters(e.graphId).incr())) - ) - .unit - } yield () - - /** - * This operation is called when a user is put into or taken out of - * a state in which their retweets should no longer be visible - * (e.g. suspended or ROPO). - */ - override def setRetweetVisibility(retweetId: TweetId, setVisible: Boolean): Future[Unit] = { - val retweetEdge = Seq(ExecuteEdge(retweetId, RetweetsGraph, None, Reverse)) - - if (setVisible) { - tflock - .insertEdges(retweetEdge, backgroundIndexingPriority) - .onSuccess(_ => insertCounters(RetweetsGraph.id).incr()) - } else { - tflock - .archiveEdges(retweetEdge, backgroundIndexingPriority) - .onSuccess(_ => archiveCounters(RetweetsGraph.id).incr()) - } - } - - private[this] def createEdges(tweet: Tweet, isUndelete: Boolean): Future[Unit] = - for { - edges <- getEdges(tweet = tweet, isCreate = true, isDelete = false, isUndelete = isUndelete) - () <- tflock.insertEdges(edges, foregroundIndexingPriority) - } yield { - // Count all the edges we've successfully added: - edges.foreach(e => insertCounters(e.graphId).incr()) - } - - private[this] def addRTEdges( - tweet: Tweet, - share: Share, - isCreate: Boolean, - edges: ListBuffer[ExecuteEdge[StatusGraph]], - futureEdges: ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]] - ): Unit = { - - edges += RetweetsOfMeGraph.edge(share.sourceUserId, tweet.id) - edges += RetweetsByMeGraph.edge(getUserId(tweet), tweet.id) - edges += RetweetsGraph.edge(share.sourceStatusId, tweet.id) - - if (isCreate) { - edges += ExecuteEdge( - sourceId = getUserId(tweet), - graph = RetweetSourceGraph, - destinationIds = Some(Seq(share.sourceStatusId)), - direction = Forward, - position = Some(SnowflakeId(tweet.id).time.inMillis) - ) - edges.append(TweetsRetweetedGraph.edge(share.sourceUserId, share.sourceStatusId)) - } else { - edges += RetweetSourceGraph.edge(getUserId(tweet), share.sourceStatusId) - - // if this is the last retweet we need to remove it from the source user's - // tweets retweeted graph - futureEdges.append( - tflock.count(RetweetsGraph.from(share.sourceStatusId)).flatMap { count => - if (count <= 1) { - tflock.selectAll(RetweetsGraph.from(share.sourceStatusId)).map { tweets => - if (tweets.size <= 1) - Seq(TweetsRetweetedGraph.edge(share.sourceUserId, share.sourceStatusId)) - else - Nil - } - } else { - FutureNil - } - } - ) - } - } - - private[this] def addReplyEdges( - tweet: Tweet, - edges: ListBuffer[ExecuteEdge[StatusGraph]] - ): Unit = { - getReply(tweet).foreach { reply => - reply.inReplyToStatusId.flatMap { inReplyToStatusId => - edges += RepliesToTweetsGraph.edge(inReplyToStatusId, tweet.id) - - // only index conversationId if this is a reply to another tweet - TweetLenses.conversationId.get(tweet).map { conversationId => - edges += ConversationGraph.edge(conversationId, tweet.id) - } - } - } - } - - private[this] def addDirectedAtEdges( - tweet: Tweet, - edges: ListBuffer[ExecuteEdge[StatusGraph]] - ): Unit = { - TweetLenses.directedAtUser.get(tweet).foreach { directedAtUser => - edges += DirectedAtUserIdGraph.edge(directedAtUser.userId, tweet.id) - } - } - - private[this] def addMentionEdges( - tweet: Tweet, - edges: ListBuffer[ExecuteEdge[StatusGraph]] - ): Unit = { - getMentions(tweet) - .flatMap(_.userId).foreach { mention => - edges += MentionsGraph.edge(mention, tweet.id) - } - } - - private[this] def addQTEdges( - tweet: Tweet, - edges: ListBuffer[ExecuteEdge[StatusGraph]], - futureEdges: ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]], - isCreate: Boolean - ): Unit = { - val userId = getUserId(tweet) - - tweet.quotedTweet.foreach { quotedTweet => - // Regardless of tweet creates/deletes, we add the corresponding edges to the - // following two graphs. Note that we're handling the case for - // the QuotersGraph slightly differently in the tweet delete case. - edges.append(QuotesGraph.edge(quotedTweet.userId, tweet.id)) - edges.append(QuoteTweetsIndexGraph.edge(quotedTweet.tweetId, tweet.id)) - if (isCreate) { - // As mentioned above, for tweet creates we go ahead and add an edge - // to the QuotersGraph without any additional checks. - edges.append(QuotersGraph.edge(quotedTweet.tweetId, userId)) - } else { - // For tweet deletes, we only add an edge to be deleted from the - // QuotersGraph if the tweeting user isn't quoting the tweet anymore - // i.e. if a user has quoted a tweet multiple times, we only delete - // an edge from the QuotersGraph if they've deleted all the quotes, - // otherwise an edge should exist by definition of what the QuotersGraph - // represents. - - // Note: There can be a potential edge case here due to a race condition - // in the following scenario. - // i) A quotes a tweet T twice resulting in tweets T1 and T2. - // ii) There should exist edges in the QuotersGraph from T -> A and T1 <-> T, T2 <-> T in - // the QuoteTweetsIndexGraph, but one of the edges haven't been written - // to the QuoteTweetsIndex graph in TFlock yet. - // iii) In this scenario, we shouldn't really be deleting an edge as we're doing below. - // The approach that we're taking below is a "best effort" approach similar to what we - // currently do for RTs. - - // Find all the quotes of the quoted tweet from the quoting user - val quotesFromQuotingUser = QuoteTweetsIndexGraph - .from(quotedTweet.tweetId) - .intersect(UserTimelineGraph.from(userId)) - futureEdges.append( - tflock - .count(quotesFromQuotingUser).flatMap { count => - // If this is the last quote of the quoted tweet from the quoting user, - // we go ahead and delete the edge from the QuotersGraph. - if (count <= 1) { - tflock.selectAll(quotesFromQuotingUser).map { tweets => - if (tweets.size <= 1) { - Seq(QuotersGraph.edge(quotedTweet.tweetId, userId)) - } else { - Nil - } - } - } else { - FutureNil - } - } - ) - } - } - } - - private[this] def addCardEdges( - tweet: Tweet, - edges: ListBuffer[ExecuteEdge[StatusGraph]] - ): Unit = { - // Note that we are indexing only the TOO "stored" cards - // (cardUri=card://). Rest of the cards are ignored here. - tweet.cardReference - .collect { - case StoredCard(id) => - edges.append(CardTweetsGraph.edge(id, tweet.id)) - }.getOrElse(()) - } - - // Note: on undelete, this method restores all archived edges, including those that may have - // been archived prior to the delete. This is incorrect behavior but in practice rarely - // causes problems, as undeletes are so rare. - private[this] def addEdgesForDeleteOrUndelete( - tweet: Tweet, - edges: ListBuffer[ExecuteEdge[StatusGraph]] - ): Unit = { - edges.appendAll( - Seq( - MentionsGraph.edges(tweet.id, None, Reverse), - RepliesToTweetsGraph.edges(tweet.id, None) - ) - ) - - // When we delete or undelete a conversation control root Tweet we want to archive or restore - // all the edges in InvitedUsersGraph from the Tweet id. - if (hasConversationControl(tweet) && isConversationRoot(tweet)) { - edges.append(InvitedUsersGraph.edges(tweet.id, None)) - } - } - - private[this] def addSimpleEdges( - tweet: Tweet, - edges: ListBuffer[ExecuteEdge[StatusGraph]] - ): Unit = { - if (TweetLenses.nullcast.get(tweet)) { - edges.append(NullcastedTweetsGraph.edge(getUserId(tweet), tweet.id)) - } else if (TweetLenses.narrowcast.get(tweet).isDefined) { - edges.append(NarrowcastSentTweetsGraph.edge(getUserId(tweet), tweet.id)) - } else { - edges.append(UserTimelineGraph.edge(getUserId(tweet), tweet.id)) - - if (hasMedia(tweet)) - edges.append(MediaTimelineGraph.edge(getUserId(tweet), tweet.id)) - - // Index root creator subscription tweets. - // Ignore replies because those are not necessarily visible to a user who subscribes to tweet author - val isRootTweet: Boolean = tweet.coreData match { - case Some(c) => c.reply.isEmpty && c.share.isEmpty - case None => true - } - - if (tweet.exclusiveTweetControl.isDefined && isRootTweet) { - edges.append(CreatorSubscriptionTimelineGraph.edge(getUserId(tweet), tweet.id)) - - if (hasMedia(tweet)) - edges.append(CreatorSubscriptionMediaTimelineGraph.edge(getUserId(tweet), tweet.id)) - } - } - } - - /** - * Issues edges for each mention of user in a conversation-controlled tweet. This way InvitedUsers - * graph accumulates complete set of ids for @mention-invited users, by conversation id. - */ - private def invitedUsersEdgesForCreate( - tweet: Tweet, - edges: ListBuffer[ExecuteEdge[StatusGraph]] - ): Unit = { - val conversationId: Long = getConversationId(tweet).getOrElse(tweet.id) - val mentions: Seq[UserId] = getMentions(tweet).flatMap(_.userId) - edges.appendAll(mentions.map(userId => InvitedUsersGraph.edge(conversationId, userId))) - } - - /** - * Issues edges of InviteUsersGraph that ought to be deleted for a conversation controlled reply. - * These are mentions of users in the given tweet, only if the user was not mentioned elsewhere - * in the conversation. This way for a conversation, InvitedUsersGraph would always hold a set - * of all users invited to the conversation, and an edge is removed only after the last mention of - * a user is deleted. - */ - private def invitedUsersEdgesForDelete( - tweet: Tweet, - futureEdges: ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]] - ): Unit = { - getConversationId(tweet).foreach { conversationId: Long => - val mentions: Seq[UserId] = getMentions(tweet).flatMap(_.userId) - mentions.foreach { userId => - val tweetIdsWithinConversation = ConversationGraph.from(conversationId) - val tweetIdsThatMentionUser = MentionsGraph.from(userId) - futureEdges.append( - tflock - .selectAll( - query = tweetIdsThatMentionUser.intersect(tweetIdsWithinConversation), - limit = Some(2), // Just need to know if it is >1 or <=1, so 2 are enough. - pageSize = None // Provide default, otherwise Mockito complains - ).map { tweetIds: Seq[Long] => - if (tweetIds.size <= 1) { - Seq(InvitedUsersGraph.edge(conversationId, userId)) - } else { - Nil - } - } - ) - } - } - } - - private def hasInviteViaMention(tweet: Tweet): Boolean = { - tweet.conversationControl match { - case Some(ConversationControl.ByInvitation(controls)) => - controls.inviteViaMention.getOrElse(false) - case Some(ConversationControl.Community(controls)) => - controls.inviteViaMention.getOrElse(false) - case Some(ConversationControl.Followers(followers)) => - followers.inviteViaMention.getOrElse(false) - case _ => - false - } - } - - private def hasConversationControl(tweet: Tweet): Boolean = - tweet.conversationControl.isDefined - - // If a Tweet has a ConversationControl, it must have a ConversationId associated with it so we - // can compare the ConversationId with the current Tweet ID to determine if it's the root of the - // conversation. See ConversationIdHydrator for more details - private def isConversationRoot(tweet: Tweet): Boolean = - getConversationId(tweet).get == tweet.id - - private def addInvitedUsersEdges( - tweet: Tweet, - isCreate: Boolean, - isUndelete: Boolean, - edges: ListBuffer[ExecuteEdge[StatusGraph]], - futureEdges: ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]] - ): Unit = { - if (hasConversationControl(tweet)) { - if (isCreate) { - if (isConversationRoot(tweet) && !isUndelete) { - // For root Tweets, only add edges for original creates, not for undeletes. - // Undeletes are handled by addEdgesForDeleteOrUndelete. - invitedUsersEdgesForCreate(tweet, edges) - } - if (!isConversationRoot(tweet) && hasInviteViaMention(tweet)) { - // For replies, only add edges when the conversation control is in inviteViaMention mode. - invitedUsersEdgesForCreate(tweet, edges) - } - } else { - if (!isConversationRoot(tweet)) { - invitedUsersEdgesForDelete(tweet, futureEdges) - } - } - } - } - - private[this] def getEdges( - tweet: Tweet, - isCreate: Boolean, - isDelete: Boolean, - isUndelete: Boolean - ): Future[Seq[ExecuteEdge[StatusGraph]]] = { - val edges = ListBuffer[ExecuteEdge[StatusGraph]]() - val futureEdges = ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]]() - - addSimpleEdges(tweet, edges) - getShare(tweet) match { - case Some(share) => addRTEdges(tweet, share, isCreate, edges, futureEdges) - case _ => - addInvitedUsersEdges(tweet, isCreate, isUndelete, edges, futureEdges) - addReplyEdges(tweet, edges) - addDirectedAtEdges(tweet, edges) - addMentionEdges(tweet, edges) - addQTEdges(tweet, edges, futureEdges, isCreate) - addCardEdges(tweet, edges) - if (isDelete || isUndelete) { - addEdgesForDeleteOrUndelete(tweet, edges) - } - } - - Future - .collect(futureEdges) - .map { moreEdges => (edges ++= moreEdges.flatten).toList } - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TweetIndexer.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TweetIndexer.docx new file mode 100644 index 000000000..256a3d82f Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TweetIndexer.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TweetIndexer.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TweetIndexer.scala deleted file mode 100644 index 9145a4362..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TweetIndexer.scala +++ /dev/null @@ -1,30 +0,0 @@ -/** Copyright 2010 Twitter, Inc. */ -package com.twitter.tweetypie -package tflock - -import com.twitter.tweetypie.thriftscala.Tweet -import com.twitter.util.Future - -trait TweetIndexer { - - /** - * Called at tweet-creation time, this method should set up all relevant indices on the tweet. - */ - def createIndex(tweet: Tweet): Future[Unit] = Future.Unit - - /** - * Called at tweet-undelete time (which isn't yet handled), this method should - * restore all relevant indices on the tweet. - */ - def undeleteIndex(tweet: Tweet): Future[Unit] = Future.Unit - - /** - * Called at tweet-delete time, this method should archive all relevant indices on the tweet. - */ - def deleteIndex(tweet: Tweet, isBounceDelete: Boolean): Future[Unit] = Future.Unit - - /** - * This method should archive or unarchive the retweet edge in TFlock RetweetsGraph. - */ - def setRetweetVisibility(retweetId: TweetId, visible: Boolean): Future[Unit] = Future.Unit -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/BUILD deleted file mode 100644 index c7ad2b832..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/BUILD +++ /dev/null @@ -1,13 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - platform = "java8", - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "finagle/finagle-core/src/main", - "scrooge/scrooge-core/src/main/scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", - "util/util-core:scala", - ], -) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/BUILD.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/BUILD.docx new file mode 100644 index 000000000..5fd77b2de Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/BUILD.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/NotImplementedTweetService.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/NotImplementedTweetService.docx new file mode 100644 index 000000000..288e152fa Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/NotImplementedTweetService.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/NotImplementedTweetService.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/NotImplementedTweetService.scala deleted file mode 100644 index f450abd15..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/NotImplementedTweetService.scala +++ /dev/null @@ -1,8 +0,0 @@ -package com.twitter.tweetypie.thriftscala - -import com.twitter.finagle.service.FailedService - -class NotImplementedTweetService - extends TweetService$FinagleClient( - new FailedService(new UnsupportedOperationException("not implemented")) - ) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/TweetServiceProxy.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/TweetServiceProxy.docx new file mode 100644 index 000000000..717e6bf58 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/TweetServiceProxy.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/TweetServiceProxy.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/TweetServiceProxy.scala deleted file mode 100644 index df3ca4362..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/TweetServiceProxy.scala +++ /dev/null @@ -1,79 +0,0 @@ -package com.twitter.tweetypie.thriftscala - -import com.twitter.util.Future - -/** - * A trait for TweetService implementations that wrap an underlying - * TweetService and need to modify only some of the methods. - */ -trait TweetServiceProxy extends TweetService.MethodPerEndpoint { - protected def underlying: TweetService.MethodPerEndpoint - - /** - * Default implementation simply passes through the Future but logic can be added to wrap each - * invocation to the underlying TweetService - */ - protected def wrap[A](f: => Future[A]): Future[A] = - f - - override def getTweets(request: GetTweetsRequest): Future[Seq[GetTweetResult]] = - wrap(underlying.getTweets(request)) - - override def getTweetFields(request: GetTweetFieldsRequest): Future[Seq[GetTweetFieldsResult]] = - wrap(underlying.getTweetFields(request)) - - override def getTweetCounts(request: GetTweetCountsRequest): Future[Seq[GetTweetCountsResult]] = - wrap(underlying.getTweetCounts(request)) - - override def setAdditionalFields(request: SetAdditionalFieldsRequest): Future[Unit] = - wrap(underlying.setAdditionalFields(request)) - - override def deleteAdditionalFields(request: DeleteAdditionalFieldsRequest): Future[Unit] = - wrap(underlying.deleteAdditionalFields(request)) - - override def postTweet(request: PostTweetRequest): Future[PostTweetResult] = - wrap(underlying.postTweet(request)) - - override def postRetweet(request: RetweetRequest): Future[PostTweetResult] = - wrap(underlying.postRetweet(request)) - - override def unretweet(request: UnretweetRequest): Future[UnretweetResult] = - wrap(underlying.unretweet(request)) - - override def getDeletedTweets( - request: GetDeletedTweetsRequest - ): Future[Seq[GetDeletedTweetResult]] = - wrap(underlying.getDeletedTweets(request)) - - override def deleteTweets(request: DeleteTweetsRequest): Future[Seq[DeleteTweetResult]] = - wrap(underlying.deleteTweets(request)) - - override def updatePossiblySensitiveTweet( - request: UpdatePossiblySensitiveTweetRequest - ): Future[Unit] = - wrap(underlying.updatePossiblySensitiveTweet(request)) - - override def undeleteTweet(request: UndeleteTweetRequest): Future[UndeleteTweetResponse] = - wrap(underlying.undeleteTweet(request)) - - override def eraseUserTweets(request: EraseUserTweetsRequest): Future[Unit] = - wrap(underlying.eraseUserTweets(request)) - - override def incrTweetFavCount(request: IncrTweetFavCountRequest): Future[Unit] = - wrap(underlying.incrTweetFavCount(request)) - - override def deleteLocationData(request: DeleteLocationDataRequest): Future[Unit] = - wrap(underlying.deleteLocationData(request)) - - override def scrubGeo(request: GeoScrub): Future[Unit] = - wrap(underlying.scrubGeo(request)) - - override def takedown(request: TakedownRequest): Future[Unit] = - wrap(underlying.takedown(request)) - - override def flush(request: FlushRequest): Future[Unit] = - wrap(underlying.flush(request)) - - override def incrTweetBookmarkCount(request: IncrTweetBookmarkCountRequest): Future[Unit] = - wrap(underlying.incrTweetBookmarkCount(request)) -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/BUILD deleted file mode 100644 index ff66fe5b2..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/BUILD +++ /dev/null @@ -1,15 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "tweetypie/servo/util", - "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", - "tco-util", - "tweetypie/common/src/scala/com/twitter/tweetypie/tweettext", - "tweetypie/common/src/scala/com/twitter/tweetypie/util", - "twitter-text/lib/java/src/main/java/com/twitter/twittertext", - ], -) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/BUILD.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/BUILD.docx new file mode 100644 index 000000000..b703806a2 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/BUILD.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/CashtagTextEntity.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/CashtagTextEntity.docx new file mode 100644 index 000000000..ad66e5d96 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/CashtagTextEntity.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/CashtagTextEntity.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/CashtagTextEntity.scala deleted file mode 100644 index 09c0941ec..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/CashtagTextEntity.scala +++ /dev/null @@ -1,11 +0,0 @@ -package com.twitter.tweetypie.thriftscala.entities - -import com.twitter.tweetypie.thriftscala.CashtagEntity -import com.twitter.tweetypie.tweettext.TextEntity - -object CashtagTextEntity extends TextEntity[CashtagEntity] { - override def fromIndex(entity: CashtagEntity): Short = entity.fromIndex - override def toIndex(entity: CashtagEntity): Short = entity.toIndex - override def move(entity: CashtagEntity, fromIndex: Short, toIndex: Short): CashtagEntity = - entity.copy(fromIndex = fromIndex, toIndex = toIndex) -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/EntityExtractor.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/EntityExtractor.docx new file mode 100644 index 000000000..9aca26a5b Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/EntityExtractor.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/EntityExtractor.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/EntityExtractor.scala deleted file mode 100644 index c9d7b30bc..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/EntityExtractor.scala +++ /dev/null @@ -1,118 +0,0 @@ -package com.twitter.tweetypie.thriftscala.entities - -import com.twitter.servo.data.Mutation -import com.twitter.tco_util.TcoUrl -import com.twitter.tweetypie.thriftscala._ -import com.twitter.tweetypie.thriftscala.entities.Implicits._ -import com.twitter.tweetypie.tweettext.PartialHtmlEncoding -import com.twitter.tweetypie.tweettext.TextEntity -import com.twitter.tweetypie.tweettext.TextModification -import com.twitter.tweetypie.util.TweetLenses -import com.twitter.twittertext.Extractor -import scala.collection.JavaConverters._ - -/** - * Contains functions to collect urls, mentions, hashtags, and cashtags from the text of tweets and messages - */ -object EntityExtractor { - // We only use one configuration of com.twitter.twittertext.Extractor, so it's - // OK to share one global reference. The only available - // configuration option is whether to extract URLs without protocols - // (defaults to true) - private[this] val extractor = new Extractor - - // The twitter-text library operates on unencoded text, but we store - // and process HTML-encoded text. The TextModification returned - // from this function contains the decoded text which we will operate on, - // but also provides us with the ability to map the indices on - // the twitter-text entities back to the entities on the encoded text. - private val htmlEncodedTextToEncodeModification: String => TextModification = - text => - PartialHtmlEncoding - .decodeWithModification(text) - .getOrElse(TextModification.identity(text)) - .inverse - - private[this] val extractAllUrlsFromTextMod: TextModification => Seq[UrlEntity] = - extractUrls(false) - - val extractAllUrls: String => Seq[UrlEntity] = - htmlEncodedTextToEncodeModification.andThen(extractAllUrlsFromTextMod) - - private[this] val extractTcoUrls: TextModification => Seq[UrlEntity] = - extractUrls(true) - - private[this] def extractUrls(tcoOnly: Boolean): TextModification => Seq[UrlEntity] = - mkEntityExtractor[UrlEntity]( - extractor.extractURLsWithIndices(_).asScala.filter { e => - if (tcoOnly) TcoUrl.isTcoUrl(e.getValue) else true - }, - UrlEntity(_, _, _) - ) - - private[this] val extractMentionsFromTextMod: TextModification => Seq[MentionEntity] = - mkEntityExtractor[MentionEntity]( - extractor.extractMentionedScreennamesWithIndices(_).asScala, - MentionEntity(_, _, _) - ) - - val extractMentions: String => Seq[MentionEntity] = - htmlEncodedTextToEncodeModification.andThen(extractMentionsFromTextMod) - - private[this] val extractHashtagsFromTextMod: TextModification => Seq[HashtagEntity] = - mkEntityExtractor[HashtagEntity]( - extractor.extractHashtagsWithIndices(_).asScala, - HashtagEntity(_, _, _) - ) - - val extractHashtags: String => Seq[HashtagEntity] = - htmlEncodedTextToEncodeModification.andThen(extractHashtagsFromTextMod) - - private[this] val extractCashtagsFromTextMod: TextModification => Seq[CashtagEntity] = - mkEntityExtractor[CashtagEntity]( - extractor.extractCashtagsWithIndices(_).asScala, - CashtagEntity(_, _, _) - ) - - val extractCashtags: String => Seq[CashtagEntity] = - htmlEncodedTextToEncodeModification.andThen(extractCashtagsFromTextMod) - - private[this] def mkEntityExtractor[E: TextEntity]( - extract: String => Seq[Extractor.Entity], - construct: (Short, Short, String) => E - ): TextModification => Seq[E] = - htmlEncodedMod => { - val convert: Extractor.Entity => Option[E] = - e => - for { - start <- asShort(e.getStart.intValue) - end <- asShort(e.getEnd.intValue) - if e.getValue != null - res <- htmlEncodedMod.reindexEntity(construct(start, end, e.getValue)) - } yield res - - val entities = extract(htmlEncodedMod.original) - extractor.modifyIndicesFromUTF16ToUnicode(htmlEncodedMod.original, entities.asJava) - entities.map(convert).flatten - } - - private[this] def asShort(i: Int): Option[Short] = - if (i.isValidShort) Some(i.toShort) else None - - private[this] def mutation(extractUrls: Boolean): Mutation[Tweet] = - Mutation { tweet => - val htmlEncodedMod = htmlEncodedTextToEncodeModification(TweetLenses.text.get(tweet)) - - Some( - tweet.copy( - urls = if (extractUrls) Some(extractTcoUrls(htmlEncodedMod)) else tweet.urls, - mentions = Some(extractMentionsFromTextMod(htmlEncodedMod)), - hashtags = Some(extractHashtagsFromTextMod(htmlEncodedMod)), - cashtags = Some(extractCashtagsFromTextMod(htmlEncodedMod)) - ) - ) - } - - val mutationWithoutUrls: Mutation[Tweet] = mutation(false) - val mutationAll: Mutation[Tweet] = mutation(true) -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/HashtagTextEntity.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/HashtagTextEntity.docx new file mode 100644 index 000000000..003499b4d Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/HashtagTextEntity.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/HashtagTextEntity.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/HashtagTextEntity.scala deleted file mode 100644 index 4ba86ebc8..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/HashtagTextEntity.scala +++ /dev/null @@ -1,11 +0,0 @@ -package com.twitter.tweetypie.thriftscala.entities - -import com.twitter.tweetypie.thriftscala.HashtagEntity -import com.twitter.tweetypie.tweettext.TextEntity - -object HashtagTextEntity extends TextEntity[HashtagEntity] { - override def fromIndex(entity: HashtagEntity): Short = entity.fromIndex - override def toIndex(entity: HashtagEntity): Short = entity.toIndex - override def move(entity: HashtagEntity, fromIndex: Short, toIndex: Short): HashtagEntity = - entity.copy(fromIndex = fromIndex, toIndex = toIndex) -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/Implicits.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/Implicits.docx new file mode 100644 index 000000000..1bfa2cc32 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/Implicits.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/Implicits.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/Implicits.scala deleted file mode 100644 index a68595dee..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/Implicits.scala +++ /dev/null @@ -1,10 +0,0 @@ -package com.twitter.tweetypie.thriftscala.entities - -object Implicits { - implicit val hashtagTextEntity: HashtagTextEntity.type = HashtagTextEntity - implicit val cashtagTextEntity: CashtagTextEntity.type = CashtagTextEntity - implicit val mentionTextEntity: MentionTextEntity.type = MentionTextEntity - implicit val urlTextEntity: UrlTextEntity.type = UrlTextEntity - implicit val mediaTextEntity: MediaTextEntity.type = MediaTextEntity - implicit val textRangeTextEntity: TextRangeEntityAdapter.type = TextRangeEntityAdapter -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MediaTextEntity.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MediaTextEntity.docx new file mode 100644 index 000000000..e42e58415 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MediaTextEntity.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MediaTextEntity.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MediaTextEntity.scala deleted file mode 100644 index 45c145399..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MediaTextEntity.scala +++ /dev/null @@ -1,11 +0,0 @@ -package com.twitter.tweetypie.thriftscala.entities - -import com.twitter.tweetypie.thriftscala.MediaEntity -import com.twitter.tweetypie.tweettext.TextEntity - -object MediaTextEntity extends TextEntity[MediaEntity] { - override def fromIndex(entity: MediaEntity): Short = entity.fromIndex - override def toIndex(entity: MediaEntity): Short = entity.toIndex - override def move(entity: MediaEntity, fromIndex: Short, toIndex: Short): MediaEntity = - entity.copy(fromIndex = fromIndex, toIndex = toIndex) -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MentionTextEntity.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MentionTextEntity.docx new file mode 100644 index 000000000..bdc137676 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MentionTextEntity.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MentionTextEntity.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MentionTextEntity.scala deleted file mode 100644 index f4ce11a43..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MentionTextEntity.scala +++ /dev/null @@ -1,11 +0,0 @@ -package com.twitter.tweetypie.thriftscala.entities - -import com.twitter.tweetypie.thriftscala.MentionEntity -import com.twitter.tweetypie.tweettext.TextEntity - -object MentionTextEntity extends TextEntity[MentionEntity] { - override def fromIndex(entity: MentionEntity): Short = entity.fromIndex - override def toIndex(entity: MentionEntity): Short = entity.toIndex - override def move(entity: MentionEntity, fromIndex: Short, toIndex: Short): MentionEntity = - entity.copy(fromIndex = fromIndex, toIndex = toIndex) -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/TextRangeEntityAdapter.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/TextRangeEntityAdapter.docx new file mode 100644 index 000000000..41248a0b8 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/TextRangeEntityAdapter.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/TextRangeEntityAdapter.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/TextRangeEntityAdapter.scala deleted file mode 100644 index a0dd5be79..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/TextRangeEntityAdapter.scala +++ /dev/null @@ -1,11 +0,0 @@ -package com.twitter.tweetypie.thriftscala.entities - -import com.twitter.tweetypie.thriftscala.TextRange -import com.twitter.tweetypie.tweettext.TextEntity - -object TextRangeEntityAdapter extends TextEntity[TextRange] { - override def fromIndex(entity: TextRange): Short = entity.fromIndex.toShort - override def toIndex(entity: TextRange): Short = entity.toIndex.toShort - override def move(entity: TextRange, fromIndex: Short, toIndex: Short): TextRange = - entity.copy(fromIndex = fromIndex, toIndex = toIndex) -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/UrlTextEntity.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/UrlTextEntity.docx new file mode 100644 index 000000000..28f7cbc8e Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/UrlTextEntity.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/UrlTextEntity.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/UrlTextEntity.scala deleted file mode 100644 index 8ab52747a..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/UrlTextEntity.scala +++ /dev/null @@ -1,11 +0,0 @@ -package com.twitter.tweetypie.thriftscala.entities - -import com.twitter.tweetypie.thriftscala.UrlEntity -import com.twitter.tweetypie.tweettext.TextEntity - -object UrlTextEntity extends TextEntity[UrlEntity] { - override def fromIndex(entity: UrlEntity): Short = entity.fromIndex - override def toIndex(entity: UrlEntity): Short = entity.toIndex - override def move(entity: UrlEntity, fromIndex: Short, toIndex: Short): UrlEntity = - entity.copy(fromIndex = fromIndex, toIndex = toIndex) -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/BUILD deleted file mode 100644 index 0fb3b965a..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/BUILD +++ /dev/null @@ -1,16 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - platform = "java8", - provides = scala_artifact( - org = "com.twitter", - name = "tweetypie-tweettext", - repo = artifactory, - ), - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/com/ibm/icu:icu4j", - "twitter-text/lib/java/src/main/java/com/twitter/twittertext", - ], -) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/BUILD.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/BUILD.docx new file mode 100644 index 000000000..8a668cfdc Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/BUILD.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/GraphemeIndexIterator.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/GraphemeIndexIterator.docx new file mode 100644 index 000000000..07083ac79 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/GraphemeIndexIterator.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/GraphemeIndexIterator.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/GraphemeIndexIterator.scala deleted file mode 100644 index e24076f55..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/GraphemeIndexIterator.scala +++ /dev/null @@ -1,44 +0,0 @@ -package com.twitter.tweetypie.tweettext - -import com.ibm.icu.text.BreakIterator - -/** - * Adapt the [[BreakIterator]] interface to a scala [[Iterator]] - * over the offsets of user-perceived characters in a String. - */ -object GraphemeIndexIterator { - - /** - * Produce an iterator over indices in the string that mark the end - * of a user-perceived character (grapheme) - */ - def ends(s: String): Iterator[Offset.CodeUnit] = - // The start of every grapheme but the first is also a grapheme - // end. The last grapheme ends at the end of the string. - starts(s).drop(1) ++ Iterator(Offset.CodeUnit.length(s)) - - /** - * Produce an iterator over indices in the string that mark the start - * of a user-perceived character (grapheme) - */ - def starts(s: String): Iterator[Offset.CodeUnit] = - new Iterator[Offset.CodeUnit] { - private[this] val it = BreakIterator.getCharacterInstance() - - it.setText(s) - - override def hasNext: Boolean = it.current < s.length - - override def next: Offset.CodeUnit = { - if (!hasNext) throw new IllegalArgumentException(s"${it.current()}, ${s.length}") - - // No matter what, we will be returning the value of `current`, - // which is the index of the start of the next grapheme. - val result = it.current() - - it.next() - - Offset.CodeUnit(result) - } - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/IndexConverter.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/IndexConverter.docx new file mode 100644 index 000000000..c6ecedd7a Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/IndexConverter.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/IndexConverter.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/IndexConverter.scala deleted file mode 100644 index 6a4cb0f5a..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/IndexConverter.scala +++ /dev/null @@ -1,85 +0,0 @@ -package com.twitter.tweetypie.tweettext - -/** - * An efficient converter of indices between code points and code units. - */ -class IndexConverter(text: String) { - // Keep track of a single corresponding pair of code unit and code point - // offsets so that we can re-use counting work if the next requested - // entity is near the most recent entity. - private var codePointIndex = 0 - // The code unit index should never split a surrogate pair. - private var charIndex = 0 - - /** - * @param offset Index into the string measured in code units. - * @return The code point index that corresponds to the specified character index. - */ - def toCodePoints(offset: Offset.CodeUnit): Offset.CodePoint = - Offset.CodePoint(codeUnitsToCodePoints(offset.toInt)) - - /** - * @param charIndex Index into the string measured in code units. - * @return The code point index that corresponds to the specified character index. - */ - def codeUnitsToCodePoints(charIndex: Int): Int = { - if (charIndex < this.charIndex) { - this.codePointIndex -= text.codePointCount(charIndex, this.charIndex) - } else { - this.codePointIndex += text.codePointCount(this.charIndex, charIndex) - } - this.charIndex = charIndex - - // Make sure that charIndex never points to the second code unit of a - // surrogate pair. - if (charIndex > 0 && Character.isSupplementaryCodePoint(text.codePointAt(charIndex - 1))) { - this.charIndex -= 1 - this.codePointIndex -= 1 - } - - this.codePointIndex - } - - /** - * @param offset Index into the string measured in code points. - * @return the corresponding code unit index - */ - def toCodeUnits(offset: Offset.CodePoint): Offset.CodeUnit = { - this.charIndex = text.offsetByCodePoints(charIndex, offset.toInt - this.codePointIndex) - this.codePointIndex = offset.toInt - Offset.CodeUnit(this.charIndex) - } - - /** - * @param codePointIndex Index into the string measured in code points. - * @return the corresponding code unit index - */ - def codePointsToCodeUnits(codePointIndex: Int): Int = - toCodeUnits(Offset.CodePoint(codePointIndex)).toInt - - /** - * Returns a substring which begins at the specified code point `from` and extends to the - * code point `to`. Since String.substring only works with character, the method first - * converts code point offset to code unit offset. - */ - def substring(from: Offset.CodePoint, to: Offset.CodePoint): String = - text.substring(toCodeUnits(from).toInt, toCodeUnits(to).toInt) - - /** - * Returns a substring which begins at the specified code point `from` and extends to the - * code point `to`. Since String.substring only works with character, the method first - * converts code point offset to code unit offset. - */ - def substringByCodePoints(from: Int, to: Int): String = - substring(Offset.CodePoint(from), Offset.CodePoint(to)) - - /** - * Returns a substring which begins at the specified code point `from` and extends to the - * end of the string. Since String.substring only works with character, the method first - * converts code point offset to code unit offset. - */ - def substringByCodePoints(from: Int): String = { - val charFrom = codePointsToCodeUnits(from) - text.substring(charFrom) - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Offset.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Offset.docx new file mode 100644 index 000000000..557cd8e43 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Offset.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Offset.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Offset.scala deleted file mode 100644 index 119458643..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Offset.scala +++ /dev/null @@ -1,253 +0,0 @@ -package com.twitter.tweetypie.tweettext -import scala.collection.immutable - -/** - * An Offset is a typed index into a String. - */ -trait Offset[T] extends Ordering[T] { - def toInt(t: T): Int - def count(text: String, start: Offset.CodeUnit, end: Offset.CodeUnit): T - - def compare(t1: T, t2: T): Int = toInt(t1).compare(toInt(t2)) - def length(input: String): T = count(input, Offset.CodeUnit(0), Offset.CodeUnit.length(input)) -} - -object Offset { - - /** - * UTF-16 code unit offsets are the native offsets for Java/Scala - * Strings. - */ - case class CodeUnit(toInt: Int) extends AnyVal with Ordered[CodeUnit] { - def compare(other: CodeUnit): Int = toInt.compare(other.toInt) - def +(other: CodeUnit) = CodeUnit(toInt + other.toInt) - def -(other: CodeUnit) = CodeUnit(toInt - other.toInt) - def min(other: CodeUnit): CodeUnit = if (toInt < other.toInt) this else other - def max(other: CodeUnit): CodeUnit = if (toInt > other.toInt) this else other - def incr: CodeUnit = CodeUnit(toInt + 1) - def decr: CodeUnit = CodeUnit(toInt - 1) - def until(end: CodeUnit): immutable.IndexedSeq[CodeUnit] = - toInt.until(end.toInt).map(CodeUnit(_)) - - /** - * Converts this `CodeUnit` to the equivalent `CodePoint` within the - * given text. - */ - def toCodePoint(text: String): CodePoint = - CodePoint(text.codePointCount(0, toInt)) - - def offsetByCodePoints(text: String, codePoints: CodePoint): CodeUnit = - CodeUnit(text.offsetByCodePoints(toInt, codePoints.toInt)) - } - - implicit object CodeUnit extends Offset[CodeUnit] { - def toInt(u: CodeUnit): Int = u.toInt - override def length(text: String): CodeUnit = CodeUnit(text.length) - def count(text: String, start: CodeUnit, end: CodeUnit): CodeUnit = end - start - } - - /** - * Offsets in whole Unicode code points. Any CodePoint is a valid - * offset into the String as long as it is >= 0 and less than the - * number of code points in the string. - */ - case class CodePoint(toInt: Int) extends AnyVal with Ordered[CodePoint] { - def toShort: Short = toInt.toShort - def compare(other: CodePoint): Int = toInt.compare(other.toInt) - def +(other: CodePoint) = CodePoint(toInt + other.toInt) - def -(other: CodePoint) = CodePoint(toInt - other.toInt) - def min(other: CodePoint): CodePoint = if (toInt < other.toInt) this else other - def max(other: CodePoint): CodePoint = if (toInt > other.toInt) this else other - def until(end: CodePoint): immutable.IndexedSeq[CodePoint] = - toInt.until(end.toInt).map(CodePoint(_)) - - def toCodeUnit(text: String): CodeUnit = - CodeUnit(text.offsetByCodePoints(0, toInt)) - } - - implicit object CodePoint extends Offset[CodePoint] { - def toInt(p: CodePoint): Int = p.toInt - - def count(text: String, start: CodeUnit, end: CodeUnit): CodePoint = - CodePoint(text.codePointCount(start.toInt, end.toInt)) - } - - /** - * Offsets into the String as if the String were encoded as UTF-8. You - * cannot use a [[Utf8]] offset to index a String, because not all - * Utf8 indices are valid indices into the String. - */ - case class Utf8(toInt: Int) extends AnyVal with Ordered[Utf8] { - def compare(other: Utf8): Int = toInt.compare(other.toInt) - def +(other: Utf8) = Utf8(toInt + other.toInt) - def -(other: Utf8) = Utf8(toInt - other.toInt) - def min(other: Utf8): Utf8 = if (toInt < other.toInt) this else other - def max(other: Utf8): Utf8 = if (toInt > other.toInt) this else other - } - - implicit object Utf8 extends Offset[Utf8] { - def toInt(u: Utf8): Int = u.toInt - - /** - * Count how many bytes this section of text would be when encoded as - * UTF-8. - */ - def count(s: String, start: CodeUnit, end: CodeUnit): Utf8 = { - def go(i: CodeUnit, byteLength: Utf8): Utf8 = - if (i < end) { - val cp = s.codePointAt(i.toInt) - go(i + CodeUnit(Character.charCount(cp)), byteLength + forCodePoint(cp)) - } else { - byteLength - } - - go(start, Utf8(0)) - } - - /** - * Unfortunately, there is no convenient API for finding out how many - * bytes a unicode code point would take in UTF-8, so we have to - * explicitly calculate it. - * - * @see http://en.wikipedia.org/wiki/UTF-8#Description - */ - def forCodePoint(cp: Int): Utf8 = - Utf8 { - // if the code point is an unpaired surrogate, it will be converted - // into a 1 byte replacement character - if (Character.getType(cp) == Character.SURROGATE) 1 - else { - cp match { - case _ if cp < 0x80 => 1 - case _ if cp < 0x800 => 2 - case _ if cp < 0x10000 => 3 - case _ => 4 - } - } - } - } - - /** - * Display units count what we consider a "character" in a - * Tweet. [[DisplayUnit]] offsets are only valid for text that is - * NFC-normalized (See: http://www.unicode.org/reports/tr15) and - * HTML-encoded, though this interface cannot enforce that. - * - * Currently, a [[DisplayUnit]] is equivalent to a single Unicode code - * point combined with treating "<", ">", and "&" each as a - * single character (since they are displayed as '<', '>', and '&' - * respectively). This implementation is not directly exposed. - * - * It should be possible to change this definition without breaking - * code that uses the [[DisplayUnit]] interface e.g. to count - * user-perceived characters (graphemes) rather than code points, - * though any change has to be made in concert with changing the - * mobile client and Web implementations so that the user experience - * of character counting remains consistent. - */ - case class DisplayUnit(toInt: Int) extends AnyVal with Ordered[DisplayUnit] { - def compare(other: DisplayUnit): Int = toInt.compare(other.toInt) - def +(other: DisplayUnit) = DisplayUnit(toInt + other.toInt) - def -(other: DisplayUnit) = DisplayUnit(toInt - other.toInt) - def min(other: DisplayUnit): DisplayUnit = if (toInt < other.toInt) this else other - def max(other: DisplayUnit): DisplayUnit = if (toInt > other.toInt) this else other - } - - implicit object DisplayUnit extends Offset[DisplayUnit] { - def toInt(d: DisplayUnit): Int = d.toInt - - /** - * Returns the number of display units in the specified range of the - * given text. See [[DisplayUnit]] for a descrption of what we - * consider a display unit. - * - * The input string should already be NFC normalized to get - * consistent results. If partially html encoded, it will correctly - * count html entities as a single display unit. - * - * @param text the string containing the characters to count. - * @param the index to the first char of the text range - * @param the index after the last char of the text range. - */ - def count(text: String, start: CodeUnit, end: CodeUnit): DisplayUnit = { - val stop = end.min(CodeUnit.length(text)) - - @annotation.tailrec - def go(offset: CodeUnit, total: DisplayUnit): DisplayUnit = - if (offset >= stop) total - else go(offset + at(text, offset), total + DisplayUnit(1)) - - go(start, DisplayUnit(0)) - } - - /** - * Return the length of the display unit at the specified offset in - * the (NFC-normalized, HTML-encoded) text. - */ - def at(text: String, offset: CodeUnit): CodeUnit = - CodeUnit { - text.codePointAt(offset.toInt) match { - case '&' => - if (text.regionMatches(offset.toInt, "&", 0, 5)) 5 - else if (text.regionMatches(offset.toInt, "<", 0, 4)) 4 - else if (text.regionMatches(offset.toInt, ">", 0, 4)) 4 - else 1 - - case cp => Character.charCount(cp) - } - } - } - - /** - * Ranges of offsets, useful for avoiding slicing entities. - */ - sealed trait Ranges[T] { - def contains(t: T): Boolean - } - - object Ranges { - private[this] case class Impl[T](toSeq: Seq[(T, T)])(implicit off: Offset[T]) - extends Ranges[T] { - def contains(t: T): Boolean = toSeq.exists { case (lo, hi) => off.gt(t, lo) && off.lt(t, hi) } - } - - /** - * Non-inclusive range of offsets (matches values that are strictly - * between `hi` and `lo`) - */ - def between[T](lo: T, hi: T)(implicit off: Offset[T]): Ranges[T] = - if (off.toInt(hi) > off.toInt(lo) + 1 && off.toInt(lo) < Int.MaxValue) Impl(Seq((lo, hi))) - else Impl(Nil) - - /** - * The union of all of the specified ranges. - */ - def all[T](ranges: Seq[Ranges[T]])(implicit off: Offset[T]): Ranges[T] = - Impl( - // Preprocess the ranges so that each contains check is as cheap - // as possible. - ranges - .flatMap { case r: Impl[T] => r.toSeq } - .sortBy(_._1) - .foldLeft(Nil: List[(T, T)]) { - case ((a, b) :: out, (c, d)) if off.lt(c, b) => (a, d) :: out - case (out, r) => r :: out - } - ) - - def Empty[T: Offset]: Ranges[T] = Impl[T](Nil) - - private[this] val HtmlEscapes = """&(?:amp|lt|gt);""".r - - /** - * Match [[CodeUnit]]s that would split a HTML entity. - */ - def htmlEntities(s: String): Ranges[CodeUnit] = { - val it = HtmlEscapes.findAllIn(s) - all(it.map(_ => between(CodeUnit(it.start), CodeUnit(it.end))).toSeq) - } - - def fromCodePointPairs(pairs: Seq[(Int, Int)]): Ranges[CodePoint] = - all(pairs.map { case (lo, hi) => between(CodePoint(lo), CodePoint(hi)) }) - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/PartialHtmlEncoding.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/PartialHtmlEncoding.docx new file mode 100644 index 000000000..1899ef9b1 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/PartialHtmlEncoding.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/PartialHtmlEncoding.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/PartialHtmlEncoding.scala deleted file mode 100644 index 7f1f338c3..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/PartialHtmlEncoding.scala +++ /dev/null @@ -1,55 +0,0 @@ -package com.twitter.tweetypie.tweettext - -/** - * Code used to convert raw user-provided text into an allowable form. - */ -object PartialHtmlEncoding { - - /** - * Replaces all `<`, `>`, and '&' chars with "<", ">", and "&", respectively. - * - * Tweet text is HTML-encoded at tweet creation time, and is stored and processed in encoded form. - */ - def encode(text: String): String = { - val buf = new StringBuilder - - text.foreach { - case '<' => buf.append("<") - case '>' => buf.append(">") - case '&' => buf.append("&") - case c => buf.append(c) - } - - buf.toString - } - - private val AmpLtRegex = "<".r - private val AmpGtRegex = ">".r - private val AmpAmpRegex = "&".r - - private val partialHtmlDecoder: (String => String) = - ((s: String) => AmpLtRegex.replaceAllIn(s, "<")) - .andThen(s => AmpGtRegex.replaceAllIn(s, ">")) - .andThen(s => AmpAmpRegex.replaceAllIn(s, "&")) - - /** - * The opposite of encode, it replaces all "<", ">", and "&" with - * `<`, `>`, and '&', respectively. - */ - def decode(text: String): String = - decodeWithModification(text) match { - case Some(mod) => mod.updated - case None => text - } - - /** - * Decodes encoded entities, and returns a `TextModification` if the text was modified. - */ - def decodeWithModification(text: String): Option[TextModification] = - TextModification.replaceAll( - text, - AmpLtRegex -> "<", - AmpGtRegex -> ">", - AmpAmpRegex -> "&" - ) -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Preprocessor.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Preprocessor.docx new file mode 100644 index 000000000..cb3720bcc Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Preprocessor.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Preprocessor.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Preprocessor.scala deleted file mode 100644 index 0e5c06915..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Preprocessor.scala +++ /dev/null @@ -1,251 +0,0 @@ -package com.twitter.tweetypie.tweettext -import scala.util.matching.Regex - -/** - * Code used to convert raw user-provided text into an allowable form. - */ -object Preprocessor { - import TweetText._ - import TextModification.replaceAll - - /** - * Regex for dos-style line endings. - */ - val DosLineEndingRegex: Regex = """\r\n""".r - - /** - * Converts \r\n to just \n. - */ - def normalizeNewlines(text: String): String = - DosLineEndingRegex.replaceAllIn(text, "\n") - - /** - * Characters to strip out of tweet text at write-time. - */ - val unicodeCharsToStrip: Seq[Char] = - Seq( - '\uFFFE', '\uFEFF', // BOM - '\uFFFF', // Special - '\u200E', '\u200F', // ltr, rtl - '\u202A', '\u202B', '\u202C', '\u202D', '\u202E', // Directional change - '\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', '\u0007', '\u0008', - '\u0009', '\u000B', '\u000C', '\u000E', '\u000F', '\u0010', '\u0011', '\u0012', '\u0013', - '\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019', '\u001A', '\u001B', '\u001C', - '\u001D', '\u001E', '\u001F', '\u007F', - '\u2065', - ) - - val UnicodeCharsToStripRegex: Regex = unicodeCharsToStrip.mkString("[", "", "]").r - - /** - * Strips out control characters and other non-textual unicode chars that can break xml and/or - * json rendering, or be used for exploits. - */ - def stripControlCharacters(text: String): String = - UnicodeCharsToStripRegex.replaceAllIn(text, "") - - val Tweetypie674UnicodeSequence: String = - "\u0633\u0645\u064e\u0640\u064e\u0651\u0648\u064f\u0648\u064f\u062d\u062e " + - "\u0337\u0334\u0310\u062e \u0337\u0334\u0310\u062e \u0337\u0334\u0310\u062e " + - "\u0627\u0645\u0627\u0631\u062a\u064a\u062e \u0337\u0334\u0310\u062e" - - val Tweetypie674UnicodeRegex: Regex = Tweetypie674UnicodeSequence.r - - /** - * Replace each `Tweetypie674UnicodeSequence` of this string to REPLACEMENT - * CHARACTER. - * - * Apple has a bug in its CoreText library. This aims to prevent - * ios clients from being crashed when a tweet contains the specific - * unicode sequence. - */ - def avoidCoreTextBug(text: String): String = - Tweetypie674UnicodeRegex.replaceAllIn(text, "\ufffd") - - /** - * Replace each `Tweetypie674UnicodeSequence` of this string to a REPLACEMENT - * CHARACTER, returns a TextModification object that provides information - * to also update entity indices. - */ - def replaceCoreTextBugModification(text: String): Option[TextModification] = - replaceAll(text, Tweetypie674UnicodeRegex, "\ufffd") - - private val preprocessor: String => String = - ((s: String) => nfcNormalize(s)) - .andThen(stripControlCharacters _) - .andThen(trimBlankCharacters _) - .andThen(normalizeNewlines _) - .andThen(collapseBlankLines _) - .andThen(avoidCoreTextBug _) - - /** - * Performs the text modifications that are necessary in the write-path before extracting URLs. - */ - def preprocessText(text: String): String = - preprocessor(text) - - /** - * Replaces all `<`, `>`, and '&' chars with "<", ">", and "&", respectively. - * - * The original purpose of this was presumably to prevent script injections when - * displaying tweets without proper escaping. Currently, tweets are encoded before - * they are stored in the database. - * - * Note that the pre-escaping of & < and > also happens in the rich text editor in javascript - */ - def partialHtmlEncode(text: String): String = - PartialHtmlEncoding.encode(text) - - /** - * The opposite of partialHtmlEncode, it replaces all "<", ">", and "&" with - * `<`, `>`, and '&', respectively. - */ - def partialHtmlDecode(text: String): String = - PartialHtmlEncoding.decode(text) - - /** - * - * Detects all forms of whitespace, considering as whitespace the following: - * This regex detects characters that always or often are rendered as blank space. We use - * this to prevent users from inserting excess blank lines and from tweeting effectively - * blank tweets. - * - * Note that these are not all semantically "whitespace", so this regex should not be used - * to process non-blank text, e.g. to separate words. - * - * Codepoints below and the `\p{Z}` regex character property alias are defined in the Unicode - * Character Database (UCD) at https://unicode.org/ucd/ and https://unicode.org/reports/tr44/ - * - * The `\p{Z}` regex character property alias is defined specifically in UCD as: - * - * Zs | Space_Separator | a space character (of various non-zero widths) - * Zl | Line_Separator | U+2028 LINE SEPARATOR only - * Zp | Paragraph_Separator | U+2029 PARAGRAPH SEPARATOR only - * Z | Separator | Zs | Zl | Zp - * ref: https://unicode.org/reports/tr44/#GC_Values_Table - * - * U+0009 Horizontal Tab (included in \s) - * U+000B Vertical Tab (included in \s) - * U+000C Form feed (included in \s) - * U+000D Carriage return (included in \s) - * U+0020 space (included in \s) - * U+0085 Next line (included in \u0085) - * U+061C arabic letter mark (included in \u061C) - * U+00A0 no-break space (included in \p{Z}) - * U+00AD soft-hyphen marker (included in \u00AD) - * U+1680 ogham space mark (included in \p{Z}) - * U+180E mongolian vowel separator (included in \p{Z} on jdk8 and included in \u180E on jdk11) - * U+2000 en quad (included in \p{Z}) - * U+2001 em quad (included in \p{Z}) - * U+2002 en space (included in \p{Z}) - * U+2003 em space (included in \p{Z}) - * U+2004 three-per-em space (included in \p{Z}) - * U+2005 four-per-em space (included in \p{Z}) - * U+2006 six-per-em space (included in \p{Z}) - * U+2007 figure space (included in \p{Z}) - * U+2008 punctuation space (included in \p{Z}) - * U+2009 thin space (included in \p{Z}) - * U+200A hair space (included in \p{Z}) - * U+200B zero-width (included in \u200B-\u200D) - * U+200C zero-width non-joiner (included in \u200B-\u200D) - * U+200D zero-width joiner (included in \u200B-\u200D) - * U+2028 line separator (included in \p{Z}) - * U+2029 paragraph separator (included in \p{Z}) - * U+202F narrow no-break space (included in \p{Z}) - * U+205F medium mathematical space (included in \p{Z}) - * U+2061 function application (included in \u2061-\u2064) - * U+2062 invisible times (included in \u2061-\u2064) - * U+2063 invisible separator (included in \u2061-\u2064) - * U+2064 invisible plus (included in \u2061-\u2064) - * U+2066 left-to-right isolate (included in \u2066-\u2069) - * U+2067 right-to-left isolate (included in \u2066-\u2069) - * U+2068 first strong isolate (included in \u2066-\u2069) - * U+2069 pop directional isolate (included in \u2066-\u2069) - * U+206A inhibit symmetric swapping (included in \u206A-\u206F) - * U+206B activate symmetric swapping (included in \u206A-\u206F) - * U+206C inhibit arabic form shaping (included in \u206A-\u206F) - * U+206D activate arabic form shaping (included in \u206A-\u206F) - * U+206E national digit shapes (included in \u206A-\u206F) - * U+206F nominal digit shapes (included in \u206A-\u206F) - * U+2800 braille pattern blank (included in \u2800) - * U+3164 hongul filler (see UCD Ignorable_Code_Point) - * U+FFA0 halfwidth hongul filler (see UCD Ignorable_Code_Point) - * U+3000 ideographic space (included in \p{Z}) - * U+FEFF zero-width no-break space (explicitly included in \uFEFF) - */ - val BlankTextRegex: Regex = - """[\s\p{Z}\u180E\u0085\u00AD\u061C\u200B-\u200D\u2061-\u2064\u2066-\u2069\u206A-\u206F\u2800\u3164\uFEFF\uFFA0]*""".r - - /** - * Some of the above blank characters are valid at the start of a Tweet (and irrelevant at the end) - * such as characters that change the direction of text. When trimming from the start - * or end of text we use a smaller set of characters - */ - val BlankWhenLeadingOrTrailingRegex: Regex = """[\s\p{Z}\u180E\u0085\u200B\uFEFF]*""".r - - /** - * Matches consecutive blanks, starting at a newline. - */ - val ConsecutiveBlankLinesRegex: Regex = ("""\n(""" + BlankTextRegex + """\n){2,}""").r - - val LeadingBlankCharactersRegex: Regex = ("^" + BlankWhenLeadingOrTrailingRegex).r - val TrailingBlankCharactersRegex: Regex = (BlankWhenLeadingOrTrailingRegex + "$").r - - /** - * Is the given text empty or contains nothing but whitespace? - */ - def isBlank(text: String): Boolean = - BlankTextRegex.pattern.matcher(text).matches() - - /** - * See http://confluence.local.twitter.com/display/PROD/Displaying+line+breaks+in+Tweets - * - * Collapses consecutive blanks lines down to a single blank line. We can assume that - * all newlines have already been normalized to just \n, so we don't have to worry about - * \r\n. - */ - def collapseBlankLinesModification(text: String): Option[TextModification] = - replaceAll(text, ConsecutiveBlankLinesRegex, "\n\n") - - def collapseBlankLines(text: String): String = - ConsecutiveBlankLinesRegex.replaceAllIn(text, "\n\n") - - def trimBlankCharacters(text: String): String = - TrailingBlankCharactersRegex.replaceFirstIn( - LeadingBlankCharactersRegex.replaceFirstIn(text, ""), - "" - ) - - /** Characters that are not visible on their own. Some of these are used in combination with - * other visible characters, and therefore cannot be always stripped from tweets. - */ - private[tweettext] val InvisibleCharacters: Seq[Char] = - Seq( - '\u2060', '\u2061', '\u2062', '\u2063', '\u2064', '\u206A', '\u206B', '\u206C', '\u206D', - '\u206D', '\u206E', '\u206F', '\u200C', - '\u200D', // non-printing chars with valid use in Arabic - '\u2009', '\u200A', '\u200B', // include very skinny spaces too - '\ufe00', '\ufe01', '\ufe02', '\ufe03', '\ufe04', '\ufe05', '\ufe06', '\ufe07', '\ufe08', - '\ufe09', '\ufe0A', '\ufe0B', '\ufe0C', '\ufe0D', '\ufe0E', '\ufe0F', - ) - - private[tweetypie] val InvisibleUnicodePattern: Regex = - ("^[" + InvisibleCharacters.mkString + "]+$").r - - def isInvisibleChar(input: Char): Boolean = { - InvisibleCharacters contains input - } - - /** If string is only "invisible characters", replace full string with whitespace. - * The purpose of this method is to remove invisible characters when ONLY invisible characters - * appear between two urls, which can be a security vulnerability due to misleading behavior. These - * characters cannot be removed as a rule applied to the tweet, because they are used in - * conjuction with other characters. - */ - def replaceInvisiblesWithWhitespace(text: String): String = { - text match { - case invisible @ InvisibleUnicodePattern() => " " * TweetText.codePointLength(invisible) - case other => other - } - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextEntity.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextEntity.docx new file mode 100644 index 000000000..d2ee1e007 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextEntity.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextEntity.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextEntity.scala deleted file mode 100644 index e24eb7061..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextEntity.scala +++ /dev/null @@ -1,24 +0,0 @@ -package com.twitter.tweetypie.tweettext - -/** - * A type class for entities found within a piece of tweet text. - */ -trait TextEntity[T] { - def fromIndex(entity: T): Short - def toIndex(entity: T): Short - def move(entity: T, fromIndex: Short, toIndex: Short): T -} - -object TextEntity { - def fromIndex[T: TextEntity](entity: T): Short = - implicitly[TextEntity[T]].fromIndex(entity) - - def toIndex[T: TextEntity](entity: T): Short = - implicitly[TextEntity[T]].toIndex(entity) - - def move[T: TextEntity](entity: T, fromIndex: Short, toIndex: Short): T = - implicitly[TextEntity[T]].move(entity, fromIndex, toIndex) - - def shift[T: TextEntity](entity: T, offset: Short): T = - move(entity, (fromIndex(entity) + offset).toShort, (toIndex(entity) + offset).toShort) -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextModification.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextModification.docx new file mode 100644 index 000000000..30964e522 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextModification.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextModification.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextModification.scala deleted file mode 100644 index 053a4e115..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextModification.scala +++ /dev/null @@ -1,232 +0,0 @@ -package com.twitter.tweetypie.tweettext - -import scala.util.matching.Regex - -object TextModification { - - /** - * Lift a text into a TextModification where `original` and `updated` text are the same - * and `replacements` is empty. - */ - def identity(text: String): TextModification = - TextModification(original = text, updated = text, replacements = Nil) - - /** - * Replace each substring that matches the regex with the substitution string, returns a - * TextModification object that contains the updated text and enough information to also - * update entity indices. - * - * This method should correctly be taking into account surrogate-pairs. The returned - * TextModification object has code-point offsets, instead of code-unit offsets. - */ - def replaceAll(text: String, regex: Regex, substitution: String): Option[TextModification] = - replaceAll(text, regex -> substitution) - - /** - * Replaces substrings that match the given `Regex` with the corresonding substitution - * string. Returns a `TextModification` that can be used to reindex entities. - */ - def replaceAll( - text: String, - regexAndSubstitutions: (Regex, String)* - ): Option[TextModification] = { - val matches = - (for { - (r, s) <- regexAndSubstitutions - m <- r.findAllIn(text).matchData - } yield (m, s)).sortBy { case (m, _) => m.start } - - if (matches.isEmpty) { - // no match found, return None to indicate no modifications made - None - } else { - val replacements = List.newBuilder[TextReplacement] - val indexConverter = new IndexConverter(text) - // contains the retained text, built up as we walk through the regex matches - val buf = new StringBuilder(text.length) - // the number of code-points copied into buf - var codePointsCopied = Offset.CodePoint(0) - // always holds the start code-unit offset to copy to buf when we encounter - // either a regex match or end-of-string. - var anchor = 0 - - import indexConverter.toCodePoints - - for ((m, sub) <- matches) { - val unchangedText = text.substring(anchor, m.start) - val unchangedLen = Offset.CodePoint.length(unchangedText) - val subLen = Offset.CodePoint.length(sub) - - // copies the text upto the regex match run, plus the replacement string - buf.append(unchangedText).append(sub) - codePointsCopied += unchangedLen + subLen - - // the offsets indicate the indices of the matched string in the original - // text, and the indices of the replacement string in the updated string - replacements += - TextReplacement( - originalFrom = toCodePoints(Offset.CodeUnit(m.start)), - originalTo = toCodePoints(Offset.CodeUnit(m.end)), - updatedFrom = codePointsCopied - subLen, - updatedTo = codePointsCopied - ) - - anchor = m.end - } - - buf.append(text.substring(anchor)) - - Some(TextModification(text, buf.toString, replacements.result())) - } - } - - /** - * Inserts a string at a specified code point offset. - * Returns a `TextModification` that can be used to reindex entities. - */ - def insertAt( - originalText: String, - insertAt: Offset.CodePoint, - textToInsert: String - ): TextModification = { - val insertAtCodeUnit = insertAt.toCodeUnit(originalText).toInt - val (before, after) = originalText.splitAt(insertAtCodeUnit) - val updatedText = s"$before$textToInsert$after" - val textToInsertLength = TweetText.codePointLength(textToInsert) - - TextModification( - original = originalText, - updated = updatedText, - replacements = List( - TextReplacement.fromCodePoints( - originalFrom = insertAt.toInt, - originalTo = insertAt.toInt, - updatedFrom = insertAt.toInt, - updatedTo = insertAt.toInt + textToInsertLength - )) - ) - } -} - -/** - * Encodes information about insertions/deletions/replacements made to a string, providing - * the original string, the updated string, and a list of TextReplacement objects - * that encode the indices of the segments that were changed. Using this information, - * it is possible to map an offset into the original string to an offset into the updated - * string, assuming the text at the offset was not within one of the modified segments. - * - * All offsets are code-points, not UTF6 code-units. - */ -case class TextModification( - original: String, - updated: String, - replacements: List[TextReplacement]) { - private val originalLen = Offset.CodePoint.length(original) - - /** - * Using an offset into the original String, computes the equivalent offset into the updated - * string. If the offset falls within a segment that was removed/replaced, None is returned. - */ - def reindex(index: Offset.CodePoint): Option[Offset.CodePoint] = - reindex(index, Offset.CodePoint(0), replacements) - - /** - * Reindexes an entity of type T. Returns the updated entity, or None if either the `fromIndex` - * or `toIndex` value is now out of range. - */ - def reindexEntity[T: TextEntity](e: T): Option[T] = - for { - from <- reindex(Offset.CodePoint(TextEntity.fromIndex(e))) - to <- reindex(Offset.CodePoint(TextEntity.toIndex(e) - 1)) - } yield TextEntity.move(e, from.toShort, (to.toShort + 1).toShort) - - /** - * Reindexes a sequence of entities of type T. Some entities could be filtered - * out if they span a region of text that has been removed. - */ - def reindexEntities[T: TextEntity](es: Seq[T]): Seq[T] = - for (e <- es; e2 <- reindexEntity(e)) yield e2 - - /** - * Swaps `original` and `updated` text and inverts all `TextReplacement` instances. - */ - def inverse: TextModification = - TextModification(updated, original, replacements.map(_.inverse)) - - // recursively walks through the list of TextReplacement objects computing - // offsets to add/substract from 'shift', which accumulates all changes and - // then gets added to index at the end. - private def reindex( - index: Offset.CodePoint, - shift: Offset.CodePoint, - reps: List[TextReplacement] - ): Option[Offset.CodePoint] = - reps match { - case Nil => - if (index.toInt >= 0 && index <= originalLen) - Some(index + shift) - else - None - case (r @ TextReplacement(fr, to, _, _)) :: tail => - if (index < fr) Some(index + shift) - else if (index < to) None - else reindex(index, shift + r.lengthDelta, tail) - } -} - -object TextReplacement { - def fromCodePoints( - originalFrom: Int, - originalTo: Int, - updatedFrom: Int, - updatedTo: Int - ): TextReplacement = - TextReplacement( - Offset.CodePoint(originalFrom), - Offset.CodePoint(originalTo), - Offset.CodePoint(updatedFrom), - Offset.CodePoint(updatedTo) - ) -} - -/** - * Encodes the indices of a segment of text in one string that maps to a replacement - * segment in an updated version of the text. The replacement segment could be empty - * (updatedTo == updatedFrom), indicating the segment was removed. - * - * All offsets are code-points, not UTF16 code-units. - * - * `originalFrom` and `updatedFrom` are inclusive. - * `originalTo` and `updatedTo` are exclusive. - */ -case class TextReplacement( - originalFrom: Offset.CodePoint, - originalTo: Offset.CodePoint, - updatedFrom: Offset.CodePoint, - updatedTo: Offset.CodePoint) { - def originalLength: Offset.CodePoint = originalTo - originalFrom - def updatedLength: Offset.CodePoint = updatedTo - updatedFrom - def lengthDelta: Offset.CodePoint = updatedLength - originalLength - - def shiftOriginal(offset: Offset.CodePoint): TextReplacement = - copy(originalFrom = originalFrom + offset, originalTo = originalTo + offset) - - def shiftUpdated(offset: Offset.CodePoint): TextReplacement = - copy(updatedFrom = updatedFrom + offset, updatedTo = updatedTo + offset) - - def shift(offset: Offset.CodePoint): TextReplacement = - TextReplacement( - originalFrom + offset, - originalTo + offset, - updatedFrom + offset, - updatedTo + offset - ) - - def inverse: TextReplacement = - TextReplacement( - originalFrom = updatedFrom, - originalTo = updatedTo, - updatedFrom = originalFrom, - updatedTo = originalTo - ) -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Truncator.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Truncator.docx new file mode 100644 index 000000000..063c26fc6 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Truncator.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Truncator.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Truncator.scala deleted file mode 100644 index c9f6e28cc..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Truncator.scala +++ /dev/null @@ -1,159 +0,0 @@ -package com.twitter.tweetypie.tweettext - -import com.twitter.tweetypie.tweettext.TweetText._ -import com.twitter.twittertext.Extractor -import java.lang.Character -import scala.annotation.tailrec -import scala.collection.JavaConverters._ - -object Truncator { - val Ellipsis = "\u2026" - - /** - * Truncate tweet text for a retweet. If the text is longer than - * either of the length limits, code points are cut off from the end - * of the text and replaced with an ellipsis. We keep as much of the - * leading text as possible, subject to these constraints: - * - * - There are no more than `MaxDisplayLength` characters. - * - * - When converted to UTF-8, the result does not exceed `MaxByteLength`. - * - * - We do not break within a single grapheme cluster. - * - * The input is assumed to be partial HTML-encoded and may or may - * not be NFC normalized. The result will be partial HTML-encoded - * and will be NFC normalized. - */ - def truncateForRetweet(input: String): String = truncateWithEllipsis(input, Ellipsis) - - /** - * Truncate to [[com.twitter.tweetypie.tweettext.TweetText#OrginalMaxDisplayLength]] display - * units, using "..." as an ellipsis. The resulting text is guaranteed to pass our tweet length - * check, but it is not guaranteed to fit in a SMS message. - */ - def truncateForSms(input: String): String = truncateWithEllipsis(input, "...") - - /** - * Check the length of the given text, and truncate it if it is longer - * than the allowed length for a Tweet. The result of this method will - * always have: - * - * - Display length <= OriginalMaxDisplayLength. - * - Length when encoded as UTF-8 <= OriginalMaxUtf8Length. - * - * If the input would violate this, then the text will be - * truncated. When the text is truncated, it will be truncated such - * that: - * - * - Grapheme clusters will not be split. - * - The last character before the ellipsis will not be a whitespace - * character. - * - The ellipsis text will be appended to the end. - */ - private[this] def truncateWithEllipsis(input: String, ellipsis: String): String = { - val text = nfcNormalize(input) - val truncateAt = - truncationPoint(text, OriginalMaxDisplayLength, OriginalMaxUtf8Length, Some(ellipsis)) - if (truncateAt.codeUnitOffset.toInt == text.length) text - else text.take(truncateAt.codeUnitOffset.toInt) + ellipsis - } - - /** - * Indicates a potential TruncationPoint in piece of text. - * - * @param charOffset the utf-16 character offset of the truncation point - * @param codePointOffset the offset in code points - */ - case class TruncationPoint(codeUnitOffset: Offset.CodeUnit, codePointOffset: Offset.CodePoint) - - /** - * Computes a TruncationPoint for the given text and length constraints. If `truncated` on - * the result is `false`, it means the text will fit within the given constraints without - * truncation. Otherwise, the result indicates both the character and code-point offsets - * at which to perform the truncation, and the resulting display length and byte length of - * the truncated string. - * - * Text should be NFC normalized first for best results. - * - * @param withEllipsis if true, then the truncation point will be computed so that there is space - * to append an ellipsis and to still remain within the limits. The ellipsis is not counted - * in the returned display and byte lengths. - * - * @param atomicUnits may contain a list of ranges that should be treated as atomic unit and - * not split. each tuple is half-open range in code points. - */ - def truncationPoint( - text: String, - maxDisplayLength: Int = OriginalMaxDisplayLength, - maxByteLength: Int = OriginalMaxUtf8Length, - withEllipsis: Option[String] = None, - atomicUnits: Offset.Ranges[Offset.CodePoint] = Offset.Ranges.Empty - ): TruncationPoint = { - val breakPoints = - GraphemeIndexIterator - .ends(text) - .filterNot(Offset.Ranges.htmlEntities(text).contains) - - val ellipsisDisplayUnits = - withEllipsis.map(Offset.DisplayUnit.length).getOrElse(Offset.DisplayUnit(0)) - val maxTruncatedDisplayLength = Offset.DisplayUnit(maxDisplayLength) - ellipsisDisplayUnits - - val ellipsisByteLength = withEllipsis.map(Offset.Utf8.length).getOrElse(Offset.Utf8(0)) - val maxTruncatedByteLength = Offset.Utf8(maxByteLength) - ellipsisByteLength - - var codeUnit = Offset.CodeUnit(0) - var codePoint = Offset.CodePoint(0) - var displayLength = Offset.DisplayUnit(0) - var byteLength = Offset.Utf8(0) - var truncateCodeUnit = codeUnit - var truncateCodePoint = codePoint - - @tailrec def go(): TruncationPoint = - if (displayLength.toInt > maxDisplayLength || byteLength.toInt > maxByteLength) { - TruncationPoint(truncateCodeUnit, truncateCodePoint) - } else if (codeUnit != truncateCodeUnit && - displayLength <= maxTruncatedDisplayLength && - byteLength <= maxTruncatedByteLength && - (codeUnit.toInt == 0 || !Character.isWhitespace(text.codePointBefore(codeUnit.toInt))) && - !atomicUnits.contains(codePoint)) { - // we can advance the truncation point - truncateCodeUnit = codeUnit - truncateCodePoint = codePoint - go() - } else if (breakPoints.hasNext) { - // there are further truncation points to consider - val nextCodeUnit = breakPoints.next - codePoint += Offset.CodePoint.count(text, codeUnit, nextCodeUnit) - displayLength += Offset.DisplayUnit.count(text, codeUnit, nextCodeUnit) - byteLength += Offset.Utf8.count(text, codeUnit, nextCodeUnit) - codeUnit = nextCodeUnit - go() - } else { - TruncationPoint(codeUnit, codePoint) - } - - go() - } - - /** - * Truncate the given text, avoiding chopping HTML entities and tweet - * entities. This should only be used for testing because it performs - * entity extraction, and so is very inefficient. - */ - def truncateForTests( - input: String, - maxDisplayLength: Int = OriginalMaxDisplayLength, - maxByteLength: Int = OriginalMaxUtf8Length - ): String = { - val text = nfcNormalize(input) - val extractor = new Extractor - val entities = extractor.extractEntitiesWithIndices(text) - extractor.modifyIndicesFromUTF16ToUnicode(text, entities) - val avoid = Offset.Ranges.fromCodePointPairs( - entities.asScala.map(e => (e.getStart().intValue, e.getEnd().intValue)) - ) - val truncateAt = truncationPoint(text, maxDisplayLength, maxByteLength, None, avoid) - text.take(truncateAt.codeUnitOffset.toInt) - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TweetText.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TweetText.docx new file mode 100644 index 000000000..9e262504c Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TweetText.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TweetText.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TweetText.scala deleted file mode 100644 index cb2ae3069..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TweetText.scala +++ /dev/null @@ -1,62 +0,0 @@ -package com.twitter.tweetypie.tweettext - -import java.text.Normalizer - -object TweetText { - - /** The original maximum tweet length, taking into account normalization */ - private[tweetypie] val OriginalMaxDisplayLength = 140 - - /** Maximum number of visible code points allowed in a tweet when tweet length is counted by code - * points, taking into account normalization. See also [[MaxVisibleWeightedEmojiLength]]. - */ - private[tweetypie] val MaxVisibleWeightedLength = 280 - - /** Maximum number of visible code points allowed in a tweet when tweet length is counted by - * emoji, taking into account normalization. See also [[MaxVisibleWeightedLength]]. - * 140 is the max number of Emojis, visible, fully-weighted per Twitter's cramming rules - * 10 is the max number of Code Points per Emoji - */ - private[tweetypie] val MaxVisibleWeightedEmojiLength = 140 * 10 - - /** Maximum number of bytes when truncating tweet text for a retweet. Originally was the - * max UTF-8 length when tweets were at most 140 characters. - * See also [[OriginalMaxDisplayLength]]. - */ - private[tweetypie] val OriginalMaxUtf8Length = 600 - - /** Maximum number of bytes for tweet text using utf-8 encoding. - */ - private[tweetypie] val MaxUtf8Length = 5708 - - /** Maximum number of mentions allowed in tweet text. This is enforced at tweet creation time */ - private[tweetypie] val MaxMentions = 50 - - /** Maximum number of urls allowed in tweet text. This is enforced at tweet creation time */ - private[tweetypie] val MaxUrls = 10 - - /** Maximum number of hashtags allowed in tweet text. This is enforced at tweet creation time */ - private[tweetypie] val MaxHashtags = 50 - - /** Maximum number of cashtags allowed in tweet text. This is enforced at tweet creation time */ - private[tweetypie] val MaxCashtags = 50 - - /** Maximum length of a hashtag (not including the '#') */ - private[tweetypie] val MaxHashtagLength = 100 - - /** - * Normalizes the text according to the unicode NFC spec. - */ - def nfcNormalize(text: String): String = Normalizer.normalize(text, Normalizer.Form.NFC) - - /** - * Return the number of "characters" in this text. See - * [[Offset.DisplayUnit]]. - */ - def displayLength(text: String): Int = Offset.DisplayUnit.length(text).toInt - - /** - * Return the number of Unicode code points in this String. - */ - def codePointLength(text: String): Int = Offset.CodePoint.length(text).toInt -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/util/BUILD deleted file mode 100644 index 9a3c54773..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/BUILD +++ /dev/null @@ -1,76 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - platform = "java8", - provides = scala_artifact( - org = "com.twitter.tweetypie", - name = "util", - repo = artifactory, - ), - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "//:scala-reflect", - "3rdparty/jvm/commons-codec", - "3rdparty/jvm/org/apache/thrift:libthrift", - "finagle/finagle-core/src/main", - "mediaservices/commons/src/main/thrift:thrift-scala", - "scrooge/scrooge-serializer/src/main/scala", - "tweetypie/servo/repo", - "tweetypie/servo/util", - "tweetypie/servo/util/src/main/scala:exception", - "src/scala/com/twitter/takedown/util", - "src/thrift/com/twitter/dataproducts:enrichments_profilegeo-scala", - "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", - "src/thrift/com/twitter/expandodo:cards-scala", - "src/thrift/com/twitter/gizmoduck:thrift-scala", - "src/thrift/com/twitter/servo:servo-exception-scala", - "src/thrift/com/twitter/spam/rtf:safety-label-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:deprecated-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:transient_context-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", - "stitch/stitch-core", - "tweet-util", - "util/util-core:scala", - ], -) - -scala_library( - name = "EditControlUtil", - sources = [ - "EditControlUtil.scala", - "package.scala", - ], - compiler_option_sets = ["fatal_warnings"], - platform = "java8", - provides = scala_artifact( - org = "com.twitter.tweetypie", - name = "util-EditControlUtil", - repo = artifactory, - ), - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "//:scala-reflect", - "3rdparty/jvm/commons-codec", - "3rdparty/jvm/org/apache/thrift:libthrift", - "finagle/finagle-core/src/main", - "mediaservices/commons/src/main/thrift:thrift-scala", - "scrooge/scrooge-serializer/src/main/scala", - "tweetypie/servo/util/src/main/scala:exception", - "src/thrift/com/twitter/dataproducts:enrichments_profilegeo-scala", - "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", - "src/thrift/com/twitter/expandodo:cards-scala", - "src/thrift/com/twitter/gizmoduck:thrift-scala", - "src/thrift/com/twitter/servo:servo-exception-scala", - "src/thrift/com/twitter/spam/rtf:safety-label-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:deprecated-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:transient_context-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", - "stitch/stitch-core", - "tweet-util", - "util/util-core:scala", - ], -) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/BUILD.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/util/BUILD.docx new file mode 100644 index 000000000..a586b339c Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/util/BUILD.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityAnnotation.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityAnnotation.docx new file mode 100644 index 000000000..3f54a4dd5 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityAnnotation.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityAnnotation.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityAnnotation.scala deleted file mode 100644 index 6a89f6a3a..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityAnnotation.scala +++ /dev/null @@ -1,29 +0,0 @@ -package com.twitter.tweetypie.util - -import com.twitter.escherbird.thriftscala.TweetEntityAnnotation -import com.twitter.tweetypie.thriftscala.EscherbirdEntityAnnotations -import com.twitter.tweetypie.thriftscala.Tweet - -object CommunityAnnotation { - - val groupId: Long = 8 - val domainId: Long = 31 - - def apply(communityId: Long): TweetEntityAnnotation = - TweetEntityAnnotation(groupId, domainId, entityId = communityId) - - def unapply(annotation: TweetEntityAnnotation): Option[Long] = - annotation match { - case TweetEntityAnnotation(`groupId`, `domainId`, entityId) => Some(entityId) - case _ => None - } - - // Returns None instead of Some(Seq()) when there are non-community annotations present - def additionalFieldsToCommunityIDs(additionalFields: Tweet): Option[Seq[Long]] = { - additionalFields.escherbirdEntityAnnotations - .map { - case EscherbirdEntityAnnotations(entityAnnotations) => - entityAnnotations.flatMap(CommunityAnnotation.unapply) - }.filter(_.nonEmpty) - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityUtil.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityUtil.docx new file mode 100644 index 000000000..ad84decf7 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityUtil.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityUtil.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityUtil.scala deleted file mode 100644 index a455fe3d8..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityUtil.scala +++ /dev/null @@ -1,19 +0,0 @@ -package com.twitter.tweetypie.util - -import com.twitter.tweetypie.thriftscala.Communities - -object CommunityUtil { - - def communityIds(maybeCommunities: Option[Communities]): Seq[Long] = { - maybeCommunities match { - case None => - Nil - case Some(Communities(seq)) => - seq - } - } - - def hasCommunity(maybeCommunities: Option[Communities]): Boolean = { - maybeCommunities.exists(_.communityIds.nonEmpty) - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/ConversationControls.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/util/ConversationControls.docx new file mode 100644 index 000000000..5ca52dcbe Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/util/ConversationControls.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/ConversationControls.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/ConversationControls.scala deleted file mode 100644 index cb0ea84fb..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/ConversationControls.scala +++ /dev/null @@ -1,112 +0,0 @@ -package com.twitter.tweetypie -package util - -import com.twitter.tweetypie.thriftscala._ - -object ConversationControls { - object Create { - def byInvitation( - inviteViaMention: Option[Boolean] = None - ): TweetCreateConversationControl.ByInvitation = TweetCreateConversationControl.ByInvitation( - TweetCreateConversationControlByInvitation(inviteViaMention = inviteViaMention) - ) - - def community( - inviteViaMention: Option[Boolean] = None - ): TweetCreateConversationControl.Community = TweetCreateConversationControl.Community( - TweetCreateConversationControlCommunity(inviteViaMention = inviteViaMention) - ) - - def followers( - inviteViaMention: Option[Boolean] = None - ): TweetCreateConversationControl.Followers = TweetCreateConversationControl.Followers( - TweetCreateConversationControlFollowers(inviteViaMention = inviteViaMention) - ) - } - - object Scenario { - case class CommonScenario( - createConversationControl: TweetCreateConversationControl, - descriptionSuffix: String, - expectedConversationControl: (UserId, Seq[UserId]) => ConversationControl, - inviteViaMention: Option[Boolean]) - - def mkCommunityScenario(inviteViaMention: Option[Boolean]): CommonScenario = - CommonScenario( - Create.community(inviteViaMention = inviteViaMention), - "community", - expectedConversationControl = (authorId, userIds) => { - community(userIds, authorId, inviteViaMention) - }, - inviteViaMention - ) - - def mkByInvitationScenario(inviteViaMention: Option[Boolean]): CommonScenario = - CommonScenario( - Create.byInvitation(inviteViaMention = inviteViaMention), - "invited users", - expectedConversationControl = (authorId, userIds) => { - byInvitation(userIds, authorId, inviteViaMention) - }, - inviteViaMention - ) - - def mkFollowersScenario(inviteViaMention: Option[Boolean]): CommonScenario = - CommonScenario( - Create.followers(inviteViaMention = inviteViaMention), - "followers", - expectedConversationControl = (authorId, userIds) => { - followers(userIds, authorId, inviteViaMention) - }, - inviteViaMention - ) - - val communityScenario = mkCommunityScenario(None) - val communityInviteViaMentionScenario = mkCommunityScenario(Some(true)) - - val byInvitationScenario = mkByInvitationScenario(None) - val byInvitationInviteViaMentionScenario = mkByInvitationScenario(Some(true)) - - val followersScenario = mkFollowersScenario(None) - val followersInviteViaMentionScenario = mkFollowersScenario(Some(true)) - } - - def byInvitation( - invitedUserIds: Seq[UserId], - conversationTweetAuthorId: UserId, - inviteViaMention: Option[Boolean] = None - ): ConversationControl = - ConversationControl.ByInvitation( - ConversationControlByInvitation( - conversationTweetAuthorId = conversationTweetAuthorId, - invitedUserIds = invitedUserIds, - inviteViaMention = inviteViaMention - ) - ) - - def community( - invitedUserIds: Seq[UserId], - conversationTweetAuthorId: UserId, - inviteViaMention: Option[Boolean] = None - ): ConversationControl = - ConversationControl.Community( - ConversationControlCommunity( - conversationTweetAuthorId = conversationTweetAuthorId, - invitedUserIds = invitedUserIds, - inviteViaMention = inviteViaMention - ) - ) - - def followers( - invitedUserIds: Seq[UserId], - conversationTweetAuthorId: UserId, - inviteViaMention: Option[Boolean] = None - ): ConversationControl = - ConversationControl.Followers( - ConversationControlFollowers( - conversationTweetAuthorId = conversationTweetAuthorId, - invitedUserIds = invitedUserIds, - inviteViaMention = inviteViaMention - ) - ) -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/EditControlUtil.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/util/EditControlUtil.docx new file mode 100644 index 000000000..6883825a9 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/util/EditControlUtil.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/EditControlUtil.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/EditControlUtil.scala deleted file mode 100644 index 7135e9538..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/EditControlUtil.scala +++ /dev/null @@ -1,174 +0,0 @@ -package com.twitter.tweetypie.util - -import com.twitter.servo.util.Gate -import com.twitter.tweetypie.util.TweetEditFailure.TweetEditInvalidEditControlException -import com.twitter.tweetypie.util.TweetEditFailure.TweetEditUpdateEditControlException -import com.twitter.tweetypie.thriftscala.EditControl -import com.twitter.tweetypie.thriftscala.EditControlEdit -import com.twitter.tweetypie.thriftscala.EditControlInitial -import com.twitter.tweetypie.thriftscala.Tweet -import com.twitter.util.Try -import com.twitter.util.Return -import com.twitter.util.Throw -import com.twitter.util.Time -import com.twitter.util.Duration - -object EditControlUtil { - - val maxTweetEditsAllowed = 5 - val oldEditTimeWindow = Duration.fromMinutes(30) - val editTimeWindow = Duration.fromMinutes(60) - - def editControlEdit( - initialTweetId: TweetId, - editControlInitial: Option[EditControlInitial] = None - ): EditControl.Edit = - EditControl.Edit( - EditControlEdit(initialTweetId = initialTweetId, editControlInitial = editControlInitial)) - - // EditControl for the tweet that is not an edit, that is, any regular tweet we create - // that can, potentially, be edited later. - def makeEditControlInitial( - tweetId: TweetId, - createdAt: Time, - setEditWindowToSixtyMinutes: Gate[Unit] = Gate(_ => false) - ): EditControl.Initial = { - val editWindow = if (setEditWindowToSixtyMinutes()) editTimeWindow else oldEditTimeWindow - val initial = EditControlInitial( - editTweetIds = Seq(tweetId), - editableUntilMsecs = Some(createdAt.plus(editWindow).inMilliseconds), - editsRemaining = Some(maxTweetEditsAllowed), - isEditEligible = defaultIsEditEligible, - ) - EditControl.Initial(initial) - } - - // Returns if a given latestTweetId is the latest edit in the EditControl - def isLatestEdit( - tweetEditControl: Option[EditControl], - latestTweetId: TweetId - ): Try[Boolean] = { - tweetEditControl match { - case Some(EditControl.Initial(initial)) => - isLatestEditFromEditControlInitial(Some(initial), latestTweetId) - case Some(EditControl.Edit(edit)) => - isLatestEditFromEditControlInitial( - edit.editControlInitial, - latestTweetId - ) - case _ => Throw(TweetEditInvalidEditControlException) - } - } - - // Returns if a given latestTweetId is the latest edit in the EditControlInitial - private def isLatestEditFromEditControlInitial( - initialTweetEditControl: Option[EditControlInitial], - latestTweetId: TweetId - ): Try[Boolean] = { - initialTweetEditControl match { - case Some(initial) => - Return(latestTweetId == initial.editTweetIds.last) - case _ => Throw(TweetEditInvalidEditControlException) - } - } - - /* Create an updated edit control for an initialTweet given the id of the new edit */ - def editControlForInitialTweet( - initialTweet: Tweet, - newEditId: TweetId - ): Try[EditControl.Initial] = { - initialTweet.editControl match { - case Some(EditControl.Initial(initial)) => - Return(EditControl.Initial(plusEdit(initial, newEditId))) - - case Some(EditControl.Edit(_)) => Throw(TweetEditUpdateEditControlException) - - case _ => - initialTweet.coreData match { - case Some(coreData) => - Return( - makeEditControlInitial( - tweetId = initialTweet.id, - createdAt = Time.fromMilliseconds(coreData.createdAtSecs * 1000), - setEditWindowToSixtyMinutes = Gate(_ => true) - ) - ) - case None => Throw(new Exception("Tweet Missing Required CoreData")) - } - } - } - - def updateEditControl(tweet: Tweet, newEditId: TweetId): Try[Tweet] = - editControlForInitialTweet(tweet, newEditId).map { editControl => - tweet.copy(editControl = Some(editControl)) - } - - def plusEdit(initial: EditControlInitial, newEditId: TweetId): EditControlInitial = { - val newEditTweetIds = (initial.editTweetIds :+ newEditId).distinct.sorted - val editsCount = newEditTweetIds.size - 1 // as there is the original tweet ID there too. - initial.copy( - editTweetIds = newEditTweetIds, - editsRemaining = Some(maxTweetEditsAllowed - editsCount), - ) - } - - // The ID of the initial Tweet if this is an edit - def getInitialTweetIdIfEdit(tweet: Tweet): Option[TweetId] = tweet.editControl match { - case Some(EditControl.Edit(edit)) => Some(edit.initialTweetId) - case _ => None - } - - // If this is the first tweet in an edit chain, return the same tweet id - // otherwise return the result of getInitialTweetId - def getInitialTweetId(tweet: Tweet): TweetId = - getInitialTweetIdIfEdit(tweet).getOrElse(tweet.id) - - def isInitialTweet(tweet: Tweet): Boolean = - getInitialTweetId(tweet) == tweet.id - - // Extracted just so that we can easily track where the values of isEditEligible is coming from. - private def defaultIsEditEligible: Option[Boolean] = Some(true) - - // returns true if it's an edit of a Tweet or an initial Tweet that's been edited - def isEditTweet(tweet: Tweet): Boolean = - tweet.editControl match { - case Some(eci: EditControl.Initial) if eci.initial.editTweetIds.size <= 1 => false - case Some(_: EditControl.Initial) | Some(_: EditControl.Edit) | Some( - EditControl.UnknownUnionField(_)) => - true - case None => false - } - - // returns true if editControl is from an edit of a Tweet - // returns false for any other state, including edit intial. - def isEditControlEdit(editControl: EditControl): Boolean = { - editControl match { - case _: EditControl.Edit | EditControl.UnknownUnionField(_) => true - case _ => false - } - } - - def getEditTweetIds(editControl: Option[EditControl]): Try[Seq[TweetId]] = { - editControl match { - case Some(EditControl.Edit(EditControlEdit(_, Some(eci)))) => - Return(eci.editTweetIds) - case Some(EditControl.Initial(initial)) => - Return(initial.editTweetIds) - case _ => - Throw(new Exception(s"EditControlInitial not found in $editControl")) - } - } -} - -object TweetEditFailure { - abstract class TweetEditException(msg: String) extends Exception(msg) - - case object TweetEditGetInitialEditControlException - extends TweetEditException("Initial EditControl not found") - - case object TweetEditInvalidEditControlException - extends TweetEditException("Invalid EditControl for initial_tweet") - - case object TweetEditUpdateEditControlException - extends TweetEditException("Invalid Edit Control Update") -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/RetryPolicyBuilder.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/util/RetryPolicyBuilder.docx new file mode 100644 index 000000000..a0411e8ca Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/util/RetryPolicyBuilder.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/RetryPolicyBuilder.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/RetryPolicyBuilder.scala deleted file mode 100644 index ce0b49079..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/RetryPolicyBuilder.scala +++ /dev/null @@ -1,45 +0,0 @@ -package com.twitter.tweetypie.util - -import com.twitter.finagle.Backoff -import com.twitter.finagle.service.RetryPolicy -import com.twitter.finagle.service.RetryPolicy.RetryableWriteException -import com.twitter.servo.exception.thriftscala.ServerError -import com.twitter.util.Duration -import com.twitter.util.Throw -import com.twitter.util.TimeoutException -import com.twitter.util.Try - -object RetryPolicyBuilder { - - /** - * Retry on any exception. - */ - def anyFailure[A](backoffs: Stream[Duration]): RetryPolicy[Try[A]] = - RetryPolicy.backoff[Try[A]](Backoff.fromStream(backoffs)) { - case Throw(_) => true - } - - /** - * Retry on com.twitter.util.TimeoutException - */ - def timeouts[A](backoffs: Stream[Duration]): RetryPolicy[Try[A]] = - RetryPolicy.backoff[Try[A]](Backoff.fromStream(backoffs)) { - case Throw(_: TimeoutException) => true - } - - /** - * Retry on com.twitter.finagle.service.RetryableWriteExceptions - */ - def writes[A](backoffs: Stream[Duration]): RetryPolicy[Try[A]] = - RetryPolicy.backoff[Try[A]](Backoff.fromStream(backoffs)) { - case Throw(RetryableWriteException(_)) => true - } - - /** - * Retry on com.twitter.servo.exception.thriftscala.ServerError - */ - def servoServerError[A](backoffs: Stream[Duration]): RetryPolicy[Try[A]] = - RetryPolicy.backoff[Try[A]](Backoff.fromStream(backoffs)) { - case Throw(ServerError(_)) => true - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/StitchUtils.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/util/StitchUtils.docx new file mode 100644 index 000000000..6c2da950d Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/util/StitchUtils.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/StitchUtils.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/StitchUtils.scala deleted file mode 100644 index 7113beed5..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/StitchUtils.scala +++ /dev/null @@ -1,54 +0,0 @@ -package com.twitter.tweetypie.util - -import com.twitter.finagle.stats.Stat -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.servo -import com.twitter.util.Return -import com.twitter.util.Throw -import com.twitter.stitch.Stitch - -object StitchUtils { - def trackLatency[T](latencyStat: Stat, s: => Stitch[T]): Stitch[T] = { - Stitch - .time(s) - .map { - case (res, duration) => - latencyStat.add(duration.inMillis) - res - } - .lowerFromTry - } - - def observe[T](statsReceiver: StatsReceiver, apiName: String): Stitch[T] => Stitch[T] = { - val stats = statsReceiver.scope(apiName) - - val requests = stats.counter("requests") - val success = stats.counter("success") - val latencyStat = stats.stat("latency_ms") - - val exceptionCounter = - new servo.util.ExceptionCounter(stats, "failures") - - stitch => - trackLatency(latencyStat, stitch) - .respond { - case Return(_) => - requests.incr() - success.incr() - - case Throw(e) => - exceptionCounter(e) - requests.incr() - } - } - - def translateExceptions[T]( - stitch: Stitch[T], - translateException: PartialFunction[Throwable, Throwable] - ): Stitch[T] = - stitch.rescue { - case t if translateException.isDefinedAt(t) => - Stitch.exception(translateException(t)) - case t => Stitch.exception(t) - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/StringLiteral.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/util/StringLiteral.docx new file mode 100644 index 000000000..7db5f1626 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/util/StringLiteral.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/StringLiteral.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/StringLiteral.scala deleted file mode 100644 index ccddcf540..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/StringLiteral.scala +++ /dev/null @@ -1,31 +0,0 @@ -package com.twitter.tweetypie.util - -/** - * Escape a String into Java or Scala String literal syntax (adds the - * surrounding quotes.) - * - * This is primarily for printing Strings for debugging or logging. - */ -object StringLiteral extends (String => String) { - private[this] val ControlLimit = ' ' - private[this] val PrintableLimit = '\u007e' - private[this] val Specials = - Map('\n' -> 'n', '\r' -> 'r', '\t' -> 't', '"' -> '"', '\'' -> '\'', '\\' -> '\\') - - def apply(str: String): String = { - val s = new StringBuilder(str.length) - s.append('"') - var i = 0 - while (i < str.length) { - val c = str(i) - Specials.get(c) match { - case None => - if (c >= ControlLimit && c <= PrintableLimit) s.append(c) - else s.append("\\u%04x".format(c.toInt)) - case Some(special) => s.append('\\').append(special) - } - i += 1 - } - s.append('"').result - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/Takedowns.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/util/Takedowns.docx new file mode 100644 index 000000000..3b07e09b6 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/util/Takedowns.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/Takedowns.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/Takedowns.scala deleted file mode 100644 index 643971969..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/Takedowns.scala +++ /dev/null @@ -1,49 +0,0 @@ -package com.twitter.tweetypie.util - -import com.twitter.takedown.util.TakedownReasons -import com.twitter.takedown.util.TakedownReasons.CountryCode -import com.twitter.tseng.withholding.thriftscala.TakedownReason -import com.twitter.tseng.withholding.thriftscala.UnspecifiedReason -import com.twitter.tweetypie.thriftscala.Tweet - -/** - * Contains tweetypie-specific utils for working with TakedownReasons. - */ -object Takedowns { - - type CountryCode = String - - /** - * Take a list of [[TakedownReason]] and return values to be saved on the [[Tweet]] in fields - * tweetypieOnlyTakedownCountryCode and tweetypieOnlyTakedownReason. - * - * - tweetypieOnlyTakedownCountryCode contains the country_code of all UnspecifiedReasons - * - tweetypieOnlyTakedownReason contains all other reasons - */ - def partitionReasons(reasons: Seq[TakedownReason]): (Seq[String], Seq[TakedownReason]) = { - val (unspecifiedReasons, specifiedReasons) = reasons.partition { - case TakedownReason.UnspecifiedReason(UnspecifiedReason(_)) => true - case _ => false - } - val unspecifiedCountryCodes = unspecifiedReasons.collect(TakedownReasons.reasonToCountryCode) - (unspecifiedCountryCodes, specifiedReasons) - } - - def fromTweet(t: Tweet): Takedowns = - Takedowns( - Seq - .concat( - t.tweetypieOnlyTakedownCountryCodes - .getOrElse(Nil).map(TakedownReasons.countryCodeToReason), - t.tweetypieOnlyTakedownReasons.getOrElse(Nil) - ).toSet - ) -} - -/** - * This class is used to ensure the caller has access to both the full list of reasons as well - * as the backwards-compatible list of country codes. - */ -case class Takedowns(reasons: Set[TakedownReason]) { - def countryCodes: Set[CountryCode] = reasons.collect(TakedownReasons.reasonToCountryCode) -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TransientContextUtil.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TransientContextUtil.docx new file mode 100644 index 000000000..f318dd080 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TransientContextUtil.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TransientContextUtil.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TransientContextUtil.scala deleted file mode 100644 index 9fa6d77a0..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TransientContextUtil.scala +++ /dev/null @@ -1,17 +0,0 @@ -package com.twitter.tweetypie.util - -import com.twitter.tweetypie.thriftscala.TransientCreateContext -import com.twitter.tweetypie.thriftscala.TweetCreateContextKey -import com.twitter.tweetypie.thriftscala.TweetCreateContextKey.PeriscopeCreatorId -import com.twitter.tweetypie.thriftscala.TweetCreateContextKey.PeriscopeIsLive - -object TransientContextUtil { - - def toAdditionalContext(context: TransientCreateContext): Map[TweetCreateContextKey, String] = - Seq - .concat( - context.periscopeIsLive.map(PeriscopeIsLive -> _.toString), // "true" or "false" - context.periscopeCreatorId.map(PeriscopeCreatorId -> _.toString) // userId - ) - .toMap -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetCreationLock.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetCreationLock.docx new file mode 100644 index 000000000..c4944a416 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetCreationLock.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetCreationLock.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetCreationLock.scala deleted file mode 100644 index 06295fa25..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetCreationLock.scala +++ /dev/null @@ -1,203 +0,0 @@ -package com.twitter.tweetypie.util - -import com.twitter.conversions.DurationOps._ -import com.twitter.logging.Logger -import com.twitter.mediaservices.commons.mediainformation.thriftscala.UserDefinedProductMetadata -import com.twitter.scrooge.BinaryThriftStructSerializer -import com.twitter.servo.cache.ScopedCacheKey -import com.twitter.servo.util.Transformer -import com.twitter.tweetypie.thriftscala.PostTweetRequest -import com.twitter.util.Base64Long -import com.twitter.util.Time -import java.nio.ByteBuffer -import java.security.MessageDigest -import org.apache.commons.codec.binary.Base64 -import scala.collection.immutable.SortedMap - -object TweetCreationLock { - case class Key private (userId: UserId, typeCode: String, idOrMd5: String) - extends ScopedCacheKey("t", "locker", 2, Base64Long.toBase64(userId), typeCode, idOrMd5) { - def uniquenessId: Option[String] = - if (typeCode == Key.TypeCode.UniquenessId) Some(idOrMd5) else None - } - - object Key { - private[this] val log = Logger(getClass) - - object TypeCode { - val SourceTweetId = "r" - val UniquenessId = "u" - val PostTweetRequest = "p" - } - - private[this] val serializer = BinaryThriftStructSerializer(PostTweetRequest) - - // normalize the representation of no media ids. - private[util] def sanitizeMediaUploadIds(mediaUploadIds: Option[Seq[Long]]) = - mediaUploadIds.filter(_.nonEmpty) - - /** - * Request deduplication depends on the hash of a serialized Thrift value. - * - * In order to guarantee that a Map has a reproducible serialized form, - * it's necessary to fix the ordering of its keys. - */ - private[util] def sanitizeMediaMetadata( - mediaMetadata: Option[scala.collection.Map[MediaId, UserDefinedProductMetadata]] - ): Option[scala.collection.Map[MediaId, UserDefinedProductMetadata]] = - mediaMetadata.map(m => SortedMap(m.toSeq: _*)) - - /** - * Make sure to sanitize request fields with map/set since serialized - * bytes ordering is not guaranteed for same thrift values. - */ - private[util] def sanitizeRequest(request: PostTweetRequest): PostTweetRequest = - PostTweetRequest( - userId = request.userId, - text = request.text, - createdVia = "", - inReplyToTweetId = request.inReplyToTweetId, - geo = request.geo, - mediaUploadIds = sanitizeMediaUploadIds(request.mediaUploadIds), - narrowcast = request.narrowcast, - nullcast = request.nullcast, - additionalFields = request.additionalFields, - attachmentUrl = request.attachmentUrl, - mediaMetadata = sanitizeMediaMetadata(request.mediaMetadata), - conversationControl = request.conversationControl, - underlyingCreativesContainerId = request.underlyingCreativesContainerId, - editOptions = request.editOptions, - noteTweetOptions = request.noteTweetOptions - ) - - def bySourceTweetId(userId: UserId, sourceTweetId: TweetId): Key = - Key(userId, TypeCode.SourceTweetId, Base64Long.toBase64(sourceTweetId)) - - def byRequest(request: PostTweetRequest): Key = - request.uniquenessId match { - case Some(uqid) => - byUniquenessId(request.userId, uqid) - case None => - val sanitized = sanitizeRequest(request) - val sanitizedBytes = serializer.toBytes(sanitized) - val digested = MessageDigest.getInstance("SHA-256").digest(sanitizedBytes) - val base64Digest = Base64.encodeBase64String(digested) - val key = Key(request.userId, TypeCode.PostTweetRequest, base64Digest) - log.ifDebug(s"Generated key $key from request:\n${sanitized}") - key - } - - /** - * Key for tweets that have a uniqueness id set. There is only one - * namespace of uniqueness ids, across all clients. They are - * expected to be Snowflake ids, in order to avoid cache - * collisions. - */ - def byUniquenessId(userId: UserId, uniquenessId: Long): Key = - Key(userId, TypeCode.UniquenessId, Base64Long.toBase64(uniquenessId)) - } - - /** - * The state of tweet creation for a given Key (request). - */ - sealed trait State - - object State { - - /** - * There is no tweet creation currently in progress. (This can - * either be represented by no entry in the cache, or this special - * marker. This lets us use checkAndSet for deletion to avoid - * accidentally overwriting other process' values.) - */ - case object Unlocked extends State - - /** - * Some process is attempting to create the tweet. - */ - case class InProgress(token: Long, timestamp: Time) extends State - - /** - * The tweet has already been successfully created, and has the - * specified id. - */ - case class AlreadyCreated(tweetId: TweetId, timestamp: Time) extends State - - /** - * When stored in cache, each state is prefixed by a byte - * indicating the type of the entry. - */ - object TypeCode { - val Unlocked: Byte = 0.toByte - val InProgress: Byte = 1.toByte // + random long + timestamp - val AlreadyCreated: Byte = 2.toByte // + tweet id + timestamp - } - - private[this] val BufferSize = 17 // type byte + 64-bit value + 64-bit timestamp - - // Constant buffer to use for storing the serialized form on - // Unlocked. - private[this] val UnlockedBuf = Array[Byte](TypeCode.Unlocked) - - // Store the serialization function in a ThreadLocal so that we can - // reuse the buffer between invocations. - private[this] val threadLocalSerialize = new ThreadLocal[State => Array[Byte]] { - override def initialValue(): State => Array[Byte] = { - // Allocate the thread-local state - val ary = new Array[Byte](BufferSize) - val buf = ByteBuffer.wrap(ary) - - { - case Unlocked => UnlockedBuf - case InProgress(token, timestamp) => - buf.clear() - buf - .put(TypeCode.InProgress) - .putLong(token) - .putLong(timestamp.sinceEpoch.inNanoseconds) - ary - case AlreadyCreated(tweetId, timestamp) => - buf.clear() - buf - .put(TypeCode.AlreadyCreated) - .putLong(tweetId) - .putLong(timestamp.sinceEpoch.inNanoseconds) - ary - } - } - } - - /** - * Convert this State to the cache representation. - */ - private[this] def toBytes(state: State): Array[Byte] = - threadLocalSerialize.get()(state) - - /** - * Convert this byte array into a LockState. - * - * @throws RuntimeException if the buffer is not of the right size - * and format - */ - private[this] def fromBytes(bytes: Array[Byte]): State = { - val buf = ByteBuffer.wrap(bytes) - val result = buf.get() match { - case TypeCode.Unlocked => Unlocked - case TypeCode.InProgress => InProgress(buf.getLong(), buf.getLong().nanoseconds.afterEpoch) - case TypeCode.AlreadyCreated => - AlreadyCreated(buf.getLong(), buf.getLong().nanoseconds.afterEpoch) - case other => throw new RuntimeException("Invalid type code: " + other) - } - if (buf.remaining != 0) { - throw new RuntimeException("Extra data in buffer: " + bytes) - } - result - } - - /** - * How to serialize the State for storage in cache. - */ - val Serializer: Transformer[State, Array[Byte]] = - Transformer[State, Array[Byte]](tTo = toBytes _, tFrom = fromBytes _) - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetLenses.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetLenses.docx new file mode 100644 index 000000000..f2e4e97b7 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetLenses.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetLenses.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetLenses.scala deleted file mode 100644 index 6334c5d43..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetLenses.scala +++ /dev/null @@ -1,506 +0,0 @@ -package com.twitter.tweetypie.util - -import com.twitter.dataproducts.enrichments.thriftscala.ProfileGeoEnrichment -import com.twitter.expandodo.thriftscala._ -import com.twitter.mediaservices.commons.thriftscala.MediaKey -import com.twitter.mediaservices.commons.tweetmedia.thriftscala._ -import com.twitter.servo.data.Lens -import com.twitter.spam.rtf.thriftscala.SafetyLabel -import com.twitter.tseng.withholding.thriftscala.TakedownReason -import com.twitter.tweetypie.thriftscala._ -import com.twitter.tweetypie.unmentions.thriftscala.UnmentionData - -object TweetLenses { - import Lens.checkEq - - def requireSome[A, B](l: Lens[A, Option[B]]): Lens[A, B] = - checkEq[A, B]( - a => l.get(a).get, - (a, b) => l.set(a, Some(b)) - ) - - def tweetLens[A](get: Tweet => A, set: (Tweet, A) => Tweet): Lens[Tweet, A] = - checkEq[Tweet, A](get, set) - - val id: Lens[Tweet, TweetId] = - tweetLens[TweetId](_.id, (t, id) => t.copy(id = id)) - - val coreData: Lens[Tweet, Option[TweetCoreData]] = - tweetLens[Option[TweetCoreData]](_.coreData, (t, coreData) => t.copy(coreData = coreData)) - - val requiredCoreData: Lens[Tweet, TweetCoreData] = - requireSome(coreData) - - val optUrls: Lens[Tweet, Option[Seq[UrlEntity]]] = - tweetLens[Option[Seq[UrlEntity]]](_.urls, (t, urls) => t.copy(urls = urls)) - - val urls: Lens[Tweet, Seq[UrlEntity]] = - tweetLens[Seq[UrlEntity]](_.urls.toSeq.flatten, (t, urls) => t.copy(urls = Some(urls))) - - val optMentions: Lens[Tweet, Option[Seq[MentionEntity]]] = - tweetLens[Option[Seq[MentionEntity]]](_.mentions, (t, v) => t.copy(mentions = v)) - - val mentions: Lens[Tweet, Seq[MentionEntity]] = - tweetLens[Seq[MentionEntity]](_.mentions.toSeq.flatten, (t, v) => t.copy(mentions = Some(v))) - - val unmentionData: Lens[Tweet, Option[UnmentionData]] = - tweetLens[Option[UnmentionData]](_.unmentionData, (t, v) => t.copy(unmentionData = v)) - - val optHashtags: Lens[Tweet, Option[Seq[HashtagEntity]]] = - tweetLens[Option[Seq[HashtagEntity]]](_.hashtags, (t, v) => t.copy(hashtags = v)) - - val hashtags: Lens[Tweet, Seq[HashtagEntity]] = - tweetLens[Seq[HashtagEntity]](_.hashtags.toSeq.flatten, (t, v) => t.copy(hashtags = Some(v))) - - val optCashtags: Lens[Tweet, Option[Seq[CashtagEntity]]] = - tweetLens[Option[Seq[CashtagEntity]]](_.cashtags, (t, v) => t.copy(cashtags = v)) - - val cashtags: Lens[Tweet, Seq[CashtagEntity]] = - tweetLens[Seq[CashtagEntity]](_.cashtags.toSeq.flatten, (t, v) => t.copy(cashtags = Some(v))) - - val optMedia: Lens[Tweet, Option[Seq[MediaEntity]]] = - tweetLens[Option[Seq[MediaEntity]]](_.media, (t, v) => t.copy(media = v)) - - val media: Lens[Tweet, Seq[MediaEntity]] = - tweetLens[Seq[MediaEntity]](_.media.toSeq.flatten, (t, v) => t.copy(media = Some(v))) - - val mediaKeys: Lens[Tweet, Seq[MediaKey]] = - tweetLens[Seq[MediaKey]]( - _.mediaKeys.toSeq.flatten, - { - case (t, v) => t.copy(mediaKeys = Some(v)) - }) - - val place: Lens[Tweet, Option[Place]] = - tweetLens[Option[Place]]( - _.place, - { - case (t, v) => t.copy(place = v) - }) - - val quotedTweet: Lens[Tweet, Option[QuotedTweet]] = - tweetLens[Option[QuotedTweet]]( - _.quotedTweet, - { - case (t, v) => t.copy(quotedTweet = v) - }) - - val selfThreadMetadata: Lens[Tweet, Option[SelfThreadMetadata]] = - tweetLens[Option[SelfThreadMetadata]]( - _.selfThreadMetadata, - { - case (t, v) => t.copy(selfThreadMetadata = v) - }) - - val composerSource: Lens[Tweet, Option[ComposerSource]] = - tweetLens[Option[ComposerSource]]( - _.composerSource, - { - case (t, v) => t.copy(composerSource = v) - }) - - val deviceSource: Lens[Tweet, Option[DeviceSource]] = - tweetLens[Option[DeviceSource]]( - _.deviceSource, - { - case (t, v) => t.copy(deviceSource = v) - }) - - val perspective: Lens[Tweet, Option[StatusPerspective]] = - tweetLens[Option[StatusPerspective]]( - _.perspective, - { - case (t, v) => t.copy(perspective = v) - }) - - val cards: Lens[Tweet, Option[Seq[Card]]] = - tweetLens[Option[Seq[Card]]]( - _.cards, - { - case (t, v) => t.copy(cards = v) - }) - - val card2: Lens[Tweet, Option[Card2]] = - tweetLens[Option[Card2]]( - _.card2, - { - case (t, v) => t.copy(card2 = v) - }) - - val cardReference: Lens[Tweet, Option[CardReference]] = - tweetLens[Option[CardReference]]( - _.cardReference, - { - case (t, v) => t.copy(cardReference = v) - }) - - val spamLabel: Lens[Tweet, Option[SafetyLabel]] = - tweetLens[Option[SafetyLabel]]( - _.spamLabel, - { - case (t, v) => t.copy(spamLabel = v) - }) - - val lowQualityLabel: Lens[Tweet, Option[SafetyLabel]] = - tweetLens[Option[SafetyLabel]]( - _.lowQualityLabel, - { - case (t, v) => t.copy(lowQualityLabel = v) - }) - - val nsfwHighPrecisionLabel: Lens[Tweet, Option[SafetyLabel]] = - tweetLens[Option[SafetyLabel]]( - _.nsfwHighPrecisionLabel, - { - case (t, v) => t.copy(nsfwHighPrecisionLabel = v) - }) - - val bounceLabel: Lens[Tweet, Option[SafetyLabel]] = - tweetLens[Option[SafetyLabel]]( - _.bounceLabel, - { - case (t, v) => t.copy(bounceLabel = v) - }) - - val takedownCountryCodes: Lens[Tweet, Option[Seq[String]]] = - tweetLens[Option[Seq[String]]]( - _.takedownCountryCodes, - { - case (t, v) => t.copy(takedownCountryCodes = v) - }) - - val takedownReasons: Lens[Tweet, Option[Seq[TakedownReason]]] = - tweetLens[Option[Seq[TakedownReason]]]( - _.takedownReasons, - { - case (t, v) => t.copy(takedownReasons = v) - }) - - val contributor: Lens[Tweet, Option[Contributor]] = - tweetLens[Option[Contributor]]( - _.contributor, - { - case (t, v) => t.copy(contributor = v) - }) - - val mediaTags: Lens[Tweet, Option[TweetMediaTags]] = - tweetLens[Option[TweetMediaTags]]( - _.mediaTags, - { - case (t, v) => t.copy(mediaTags = v) - }) - - val mediaTagMap: Lens[Tweet, Map[MediaId, Seq[MediaTag]]] = - tweetLens[Map[MediaId, Seq[MediaTag]]]( - _.mediaTags.map { case TweetMediaTags(tagMap) => tagMap.toMap }.getOrElse(Map.empty), - (t, v) => { - val cleanMap = v.filter { case (_, tags) => tags.nonEmpty } - t.copy(mediaTags = if (cleanMap.nonEmpty) Some(TweetMediaTags(cleanMap)) else None) - } - ) - - val escherbirdEntityAnnotations: Lens[Tweet, Option[EscherbirdEntityAnnotations]] = - tweetLens[Option[EscherbirdEntityAnnotations]]( - _.escherbirdEntityAnnotations, - { - case (t, v) => t.copy(escherbirdEntityAnnotations = v) - }) - - val communities: Lens[Tweet, Option[Communities]] = - tweetLens[Option[Communities]]( - _.communities, - { - case (t, v) => t.copy(communities = v) - }) - - val tweetypieOnlyTakedownCountryCodes: Lens[Tweet, Option[Seq[String]]] = - tweetLens[Option[Seq[String]]]( - _.tweetypieOnlyTakedownCountryCodes, - { - case (t, v) => t.copy(tweetypieOnlyTakedownCountryCodes = v) - }) - - val tweetypieOnlyTakedownReasons: Lens[Tweet, Option[Seq[TakedownReason]]] = - tweetLens[Option[Seq[TakedownReason]]]( - _.tweetypieOnlyTakedownReasons, - { - case (t, v) => t.copy(tweetypieOnlyTakedownReasons = v) - }) - - val profileGeo: Lens[Tweet, Option[ProfileGeoEnrichment]] = - tweetLens[Option[ProfileGeoEnrichment]]( - _.profileGeoEnrichment, - (t, v) => t.copy(profileGeoEnrichment = v) - ) - - val visibleTextRange: Lens[Tweet, Option[TextRange]] = - tweetLens[Option[TextRange]]( - _.visibleTextRange, - { - case (t, v) => t.copy(visibleTextRange = v) - }) - - val selfPermalink: Lens[Tweet, Option[ShortenedUrl]] = - tweetLens[Option[ShortenedUrl]]( - _.selfPermalink, - { - case (t, v) => t.copy(selfPermalink = v) - }) - - val extendedTweetMetadata: Lens[Tweet, Option[ExtendedTweetMetadata]] = - tweetLens[Option[ExtendedTweetMetadata]]( - _.extendedTweetMetadata, - { - case (t, v) => t.copy(extendedTweetMetadata = v) - }) - - object TweetCoreData { - val userId: Lens[TweetCoreData, UserId] = checkEq[TweetCoreData, UserId]( - _.userId, - { (c, v) => - // Pleases the compiler: https://github.com/scala/bug/issues/9171 - val userId = v - c.copy(userId = userId) - }) - val text: Lens[TweetCoreData, String] = checkEq[TweetCoreData, String]( - _.text, - { (c, v) => - // Pleases the compiler: https://github.com/scala/bug/issues/9171 - val text = v - c.copy(text = text) - }) - val createdAt: Lens[TweetCoreData, TweetId] = - checkEq[TweetCoreData, Long](_.createdAtSecs, (c, v) => c.copy(createdAtSecs = v)) - val createdVia: Lens[TweetCoreData, String] = - checkEq[TweetCoreData, String]( - _.createdVia, - { - case (c, v) => c.copy(createdVia = v) - }) - val hasTakedown: Lens[TweetCoreData, Boolean] = - checkEq[TweetCoreData, Boolean]( - _.hasTakedown, - { - case (c, v) => c.copy(hasTakedown = v) - }) - val nullcast: Lens[TweetCoreData, Boolean] = - checkEq[TweetCoreData, Boolean]( - _.nullcast, - { - case (c, v) => c.copy(nullcast = v) - }) - val nsfwUser: Lens[TweetCoreData, Boolean] = - checkEq[TweetCoreData, Boolean]( - _.nsfwUser, - { - case (c, v) => c.copy(nsfwUser = v) - }) - val nsfwAdmin: Lens[TweetCoreData, Boolean] = - checkEq[TweetCoreData, Boolean]( - _.nsfwAdmin, - { - case (c, v) => c.copy(nsfwAdmin = v) - }) - val reply: Lens[TweetCoreData, Option[Reply]] = - checkEq[TweetCoreData, Option[Reply]]( - _.reply, - { - case (c, v) => c.copy(reply = v) - }) - val share: Lens[TweetCoreData, Option[Share]] = - checkEq[TweetCoreData, Option[Share]]( - _.share, - { - case (c, v) => c.copy(share = v) - }) - val narrowcast: Lens[TweetCoreData, Option[Narrowcast]] = - checkEq[TweetCoreData, Option[Narrowcast]]( - _.narrowcast, - { - case (c, v) => c.copy(narrowcast = v) - }) - val directedAtUser: Lens[TweetCoreData, Option[DirectedAtUser]] = - checkEq[TweetCoreData, Option[DirectedAtUser]]( - _.directedAtUser, - { - case (c, v) => c.copy(directedAtUser = v) - }) - val conversationId: Lens[TweetCoreData, Option[ConversationId]] = - checkEq[TweetCoreData, Option[ConversationId]]( - _.conversationId, - { - case (c, v) => c.copy(conversationId = v) - }) - val placeId: Lens[TweetCoreData, Option[String]] = - checkEq[TweetCoreData, Option[String]]( - _.placeId, - { - case (c, v) => c.copy(placeId = v) - }) - val geoCoordinates: Lens[TweetCoreData, Option[GeoCoordinates]] = - checkEq[TweetCoreData, Option[GeoCoordinates]]( - _.coordinates, - (c, v) => c.copy(coordinates = v) - ) - val trackingId: Lens[TweetCoreData, Option[TweetId]] = - checkEq[TweetCoreData, Option[Long]]( - _.trackingId, - { - case (c, v) => c.copy(trackingId = v) - }) - val hasMedia: Lens[TweetCoreData, Option[Boolean]] = - checkEq[TweetCoreData, Option[Boolean]]( - _.hasMedia, - { - case (c, v) => c.copy(hasMedia = v) - }) - } - - val counts: Lens[Tweet, Option[StatusCounts]] = - tweetLens[Option[StatusCounts]]( - _.counts, - { - case (t, v) => t.copy(counts = v) - }) - - object StatusCounts { - val retweetCount: Lens[StatusCounts, Option[TweetId]] = - checkEq[StatusCounts, Option[Long]]( - _.retweetCount, - (c, retweetCount) => c.copy(retweetCount = retweetCount) - ) - - val replyCount: Lens[StatusCounts, Option[TweetId]] = - checkEq[StatusCounts, Option[Long]]( - _.replyCount, - (c, replyCount) => c.copy(replyCount = replyCount) - ) - - val favoriteCount: Lens[StatusCounts, Option[TweetId]] = - checkEq[StatusCounts, Option[Long]]( - _.favoriteCount, - { - case (c, v) => c.copy(favoriteCount = v) - }) - - val quoteCount: Lens[StatusCounts, Option[TweetId]] = - checkEq[StatusCounts, Option[Long]]( - _.quoteCount, - { - case (c, v) => c.copy(quoteCount = v) - }) - } - - val userId: Lens[Tweet, UserId] = requiredCoreData andThen TweetCoreData.userId - val text: Lens[Tweet, String] = requiredCoreData andThen TweetCoreData.text - val createdVia: Lens[Tweet, String] = requiredCoreData andThen TweetCoreData.createdVia - val createdAt: Lens[Tweet, ConversationId] = requiredCoreData andThen TweetCoreData.createdAt - val reply: Lens[Tweet, Option[Reply]] = requiredCoreData andThen TweetCoreData.reply - val share: Lens[Tweet, Option[Share]] = requiredCoreData andThen TweetCoreData.share - val narrowcast: Lens[Tweet, Option[Narrowcast]] = - requiredCoreData andThen TweetCoreData.narrowcast - val directedAtUser: Lens[Tweet, Option[DirectedAtUser]] = - requiredCoreData andThen TweetCoreData.directedAtUser - val conversationId: Lens[Tweet, Option[ConversationId]] = - requiredCoreData andThen TweetCoreData.conversationId - val placeId: Lens[Tweet, Option[String]] = requiredCoreData andThen TweetCoreData.placeId - val geoCoordinates: Lens[Tweet, Option[GeoCoordinates]] = - requiredCoreData andThen TweetCoreData.geoCoordinates - val hasTakedown: Lens[Tweet, Boolean] = requiredCoreData andThen TweetCoreData.hasTakedown - val nsfwAdmin: Lens[Tweet, Boolean] = requiredCoreData andThen TweetCoreData.nsfwAdmin - val nsfwUser: Lens[Tweet, Boolean] = requiredCoreData andThen TweetCoreData.nsfwUser - val nullcast: Lens[Tweet, Boolean] = requiredCoreData andThen TweetCoreData.nullcast - val trackingId: Lens[Tweet, Option[ConversationId]] = - requiredCoreData andThen TweetCoreData.trackingId - val hasMedia: Lens[Tweet, Option[Boolean]] = requiredCoreData andThen TweetCoreData.hasMedia - - object CashtagEntity { - val indices: Lens[CashtagEntity, (Short, Short)] = - checkEq[CashtagEntity, (Short, Short)]( - t => (t.fromIndex, t.toIndex), - (t, v) => t.copy(fromIndex = v._1, toIndex = v._2) - ) - val text: Lens[CashtagEntity, String] = - checkEq[CashtagEntity, String](_.text, (t, text) => t.copy(text = text)) - } - - object HashtagEntity { - val indices: Lens[HashtagEntity, (Short, Short)] = - checkEq[HashtagEntity, (Short, Short)]( - t => (t.fromIndex, t.toIndex), - (t, v) => t.copy(fromIndex = v._1, toIndex = v._2) - ) - val text: Lens[HashtagEntity, String] = - checkEq[HashtagEntity, String](_.text, (t, text) => t.copy(text = text)) - } - - object MediaEntity { - val indices: Lens[MediaEntity, (Short, Short)] = - checkEq[MediaEntity, (Short, Short)]( - t => (t.fromIndex, t.toIndex), - (t, v) => t.copy(fromIndex = v._1, toIndex = v._2) - ) - val mediaSizes: Lens[MediaEntity, collection.Set[MediaSize]] = - checkEq[MediaEntity, scala.collection.Set[MediaSize]]( - _.sizes, - (m, sizes) => m.copy(sizes = sizes) - ) - val url: Lens[MediaEntity, String] = - checkEq[MediaEntity, String]( - _.url, - { - case (t, v) => t.copy(url = v) - }) - val mediaInfo: Lens[MediaEntity, Option[MediaInfo]] = - checkEq[MediaEntity, Option[MediaInfo]]( - _.mediaInfo, - { - case (t, v) => t.copy(mediaInfo = v) - }) - } - - object MentionEntity { - val indices: Lens[MentionEntity, (Short, Short)] = - checkEq[MentionEntity, (Short, Short)]( - t => (t.fromIndex, t.toIndex), - (t, v) => t.copy(fromIndex = v._1, toIndex = v._2) - ) - val screenName: Lens[MentionEntity, String] = - checkEq[MentionEntity, String]( - _.screenName, - (t, screenName) => t.copy(screenName = screenName) - ) - } - - object UrlEntity { - val indices: Lens[UrlEntity, (Short, Short)] = - checkEq[UrlEntity, (Short, Short)]( - t => (t.fromIndex, t.toIndex), - (t, v) => t.copy(fromIndex = v._1, toIndex = v._2) - ) - val url: Lens[UrlEntity, String] = - checkEq[UrlEntity, String](_.url, (t, url) => t.copy(url = url)) - } - - object Contributor { - val screenName: Lens[Contributor, Option[String]] = - checkEq[Contributor, Option[String]]( - _.screenName, - (c, screenName) => c.copy(screenName = screenName) - ) - } - - object Reply { - val inReplyToScreenName: Lens[Reply, Option[String]] = - checkEq[Reply, Option[String]]( - _.inReplyToScreenName, - (c, inReplyToScreenName) => c.copy(inReplyToScreenName = inReplyToScreenName) - ) - - val inReplyToStatusId: Lens[Reply, Option[TweetId]] = - checkEq[Reply, Option[TweetId]]( - _.inReplyToStatusId, - (c, inReplyToStatusId) => c.copy(inReplyToStatusId = inReplyToStatusId) - ) - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetPermalinkUtil.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetPermalinkUtil.docx new file mode 100644 index 000000000..a9d846767 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetPermalinkUtil.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetPermalinkUtil.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetPermalinkUtil.scala deleted file mode 100644 index 5a0bbcb2d..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetPermalinkUtil.scala +++ /dev/null @@ -1,18 +0,0 @@ -package com.twitter.tweetypie.util - -import com.twitter.tweetutil.TweetPermalink -import com.twitter.tweetypie.thriftscala._ - -object TweetPermalinkUtil { - def lastQuotedTweetPermalink(tweet: Tweet): Option[(UrlEntity, TweetPermalink)] = - lastQuotedTweetPermalink(TweetLenses.urls.get(tweet)) - - def lastQuotedTweetPermalink(urls: Seq[UrlEntity]): Option[(UrlEntity, TweetPermalink)] = - urls.flatMap(matchQuotedTweetPermalink).lastOption - - def matchQuotedTweetPermalink(entity: UrlEntity): Option[(UrlEntity, TweetPermalink)] = - for { - expanded <- entity.expanded - permalink <- TweetPermalink.parse(expanded) - } yield (entity, permalink) -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetTransformer.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetTransformer.docx new file mode 100644 index 000000000..5297f319c Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetTransformer.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetTransformer.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetTransformer.scala deleted file mode 100644 index a9b9c8748..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetTransformer.scala +++ /dev/null @@ -1,128 +0,0 @@ -package com.twitter.tweetypie.util - -import com.twitter.tweetypie.thriftscala._ - -object TweetTransformer { - def toStatus(tweet: Tweet): Status = { - assert(tweet.coreData.nonEmpty, "tweet core data is missing") - val coreData = tweet.coreData.get - - val toGeo: Option[Geo] = - coreData.coordinates match { - case Some(coords) => - Some( - Geo( - latitude = coords.latitude, - longitude = coords.longitude, - geoPrecision = coords.geoPrecision, - entityId = if (coords.display) 2 else 0, - name = coreData.placeId, - place = tweet.place, - placeId = coreData.placeId, - coordinates = Some(coords) - ) - ) - case _ => - coreData.placeId match { - case None => None - case Some(_) => - Some(Geo(name = coreData.placeId, place = tweet.place, placeId = coreData.placeId)) - } - } - - Status( - id = tweet.id, - userId = coreData.userId, - text = coreData.text, - createdVia = coreData.createdVia, - createdAt = coreData.createdAtSecs, - urls = tweet.urls.getOrElse(Seq.empty), - mentions = tweet.mentions.getOrElse(Seq.empty), - hashtags = tweet.hashtags.getOrElse(Seq.empty), - cashtags = tweet.cashtags.getOrElse(Seq.empty), - media = tweet.media.getOrElse(Seq.empty), - reply = tweet.coreData.flatMap(_.reply), - directedAtUser = tweet.coreData.flatMap(_.directedAtUser), - share = tweet.coreData.flatMap(_.share), - quotedTweet = tweet.quotedTweet, - geo = toGeo, - hasTakedown = coreData.hasTakedown, - nsfwUser = coreData.nsfwUser, - nsfwAdmin = coreData.nsfwAdmin, - counts = tweet.counts, - deviceSource = tweet.deviceSource, - narrowcast = coreData.narrowcast, - takedownCountryCodes = tweet.takedownCountryCodes, - perspective = tweet.perspective, - cards = tweet.cards, - card2 = tweet.card2, - nullcast = coreData.nullcast, - conversationId = coreData.conversationId, - language = tweet.language, - trackingId = coreData.trackingId, - spamLabels = tweet.spamLabels, - hasMedia = coreData.hasMedia, - contributor = tweet.contributor, - mediaTags = tweet.mediaTags - ) - } - - def toTweet(status: Status): Tweet = { - val coreData = - TweetCoreData( - userId = status.userId, - text = status.text, - createdVia = status.createdVia, - createdAtSecs = status.createdAt, - reply = status.reply, - directedAtUser = status.directedAtUser, - share = status.share, - hasTakedown = status.hasTakedown, - nsfwUser = status.nsfwUser, - nsfwAdmin = status.nsfwAdmin, - nullcast = status.nullcast, - narrowcast = status.narrowcast, - trackingId = status.trackingId, - conversationId = status.conversationId, - hasMedia = status.hasMedia, - coordinates = toCoords(status), - placeId = status.geo.flatMap(_.placeId) - ) - - Tweet( - id = status.id, - coreData = Some(coreData), - urls = Some(status.urls), - mentions = Some(status.mentions), - hashtags = Some(status.hashtags), - cashtags = Some(status.cashtags), - media = Some(status.media), - place = status.geo.flatMap(_.place), - quotedTweet = status.quotedTweet, - takedownCountryCodes = status.takedownCountryCodes, - counts = status.counts, - deviceSource = status.deviceSource, - perspective = status.perspective, - cards = status.cards, - card2 = status.card2, - language = status.language, - spamLabels = status.spamLabels, - contributor = status.contributor, - mediaTags = status.mediaTags - ) - } - - private def toCoords(status: Status): Option[GeoCoordinates] = - status.geo.map { geo => - if (geo.coordinates.nonEmpty) geo.coordinates.get - // Status from monorail have the coordinates as the top level fields in Geo, - // while the nested struct is empty. So we need to copy from the flat fields. - else - GeoCoordinates( - latitude = geo.latitude, - longitude = geo.longitude, - geoPrecision = geo.geoPrecision, - display = geo.entityId == 2 - ) - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/AlertableExceptionLoggingFilter.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/AlertableExceptionLoggingFilter.docx new file mode 100644 index 000000000..d643e9507 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/AlertableExceptionLoggingFilter.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/AlertableExceptionLoggingFilter.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/AlertableExceptionLoggingFilter.scala deleted file mode 100644 index 0dae0bfdc..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/AlertableExceptionLoggingFilter.scala +++ /dev/null @@ -1,41 +0,0 @@ -package com.twitter.tweetypie.util.logging - -import ch.qos.logback.classic.spi.ILoggingEvent -import ch.qos.logback.classic.spi.ThrowableProxy -import ch.qos.logback.core.filter.Filter -import ch.qos.logback.core.spi.FilterReply -import com.twitter.tweetypie.serverutil.ExceptionCounter.isAlertable - -/** - * This class is currently being used by logback to log alertable exceptions to a seperate file. - * - * Filters do not change the log levels of individual loggers. Filters filter out specific messages - * for specific appenders. This allows us to have a log file with lots of information you will - * mostly not need and a log file with only important information. This type of filtering cannot be - * accomplished by changing the log levels of loggers, because the logger levels are global. We want - * to change the semantics for specific destinations (appenders). - */ -class AlertableExceptionLoggingFilter extends Filter[ILoggingEvent] { - private[this] val IgnorableLoggers: Set[String] = - Set( - "com.github.benmanes.caffeine.cache.BoundedLocalCache", - "abdecider", - "org.apache.kafka.common.network.SaslChannelBuilder", - "com.twitter.finagle.netty4.channel.ChannelStatsHandler$" - ) - - def include(proxy: ThrowableProxy, event: ILoggingEvent): Boolean = - isAlertable(proxy.getThrowable()) && !IgnorableLoggers(event.getLoggerName) - - override def decide(event: ILoggingEvent): FilterReply = - if (!isStarted) { - FilterReply.NEUTRAL - } else { - event.getThrowableProxy() match { - case proxy: ThrowableProxy if include(proxy, event) => - FilterReply.NEUTRAL - case _ => - FilterReply.DENY - } - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/BUILD deleted file mode 100644 index 68702d3cf..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/BUILD +++ /dev/null @@ -1,17 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = [ - "bazel-compatible", - "logging_impl_check_whitelisted_target", - ], - dependencies = [ - "3rdparty/jvm/ch/qos/logback:logback-classic", - "3rdparty/jvm/com/google/guava", - "finagle/finagle-memcached/src/main/scala", - "src/thrift/com/twitter/servo:servo-exception-java", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", - "util/util-stats/src/main/scala/com/twitter/finagle/stats", - ], -) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/BUILD.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/BUILD.docx new file mode 100644 index 000000000..d6e670d7d Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/BUILD.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/OnlyImportantLogsLoggingFilter.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/OnlyImportantLogsLoggingFilter.docx new file mode 100644 index 000000000..c125af718 Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/OnlyImportantLogsLoggingFilter.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/OnlyImportantLogsLoggingFilter.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/OnlyImportantLogsLoggingFilter.scala deleted file mode 100644 index fe035bddf..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/OnlyImportantLogsLoggingFilter.scala +++ /dev/null @@ -1,30 +0,0 @@ -package com.twitter.tweetypie.util.logging - -import ch.qos.logback.classic.Level -import ch.qos.logback.classic.spi.ILoggingEvent -import ch.qos.logback.core.filter.Filter -import ch.qos.logback.core.spi.FilterReply - -/** - * This class is currently being used by logback to log statements from tweetypie at one level and - * log statements from other packages at another. - * - * Filters do not change the log levels of individual loggers. Filters filter out specific messages - * for specific appenders. This allows us to have a log file with lots of information you will - * mostly not need and a log file with only important information. This type of filtering cannot be - * accomplished by changing the log levels of loggers, because the logger levels are global. We want - * to change the semantics for specific destinations (appenders). - */ -class OnlyImportantLogsLoggingFilter extends Filter[ILoggingEvent] { - private[this] def notImportant(loggerName: String): Boolean = - !loggerName.startsWith("com.twitter.tweetypie") - - override def decide(event: ILoggingEvent): FilterReply = - if (!isStarted || event.getLevel.isGreaterOrEqual(Level.WARN)) { - FilterReply.NEUTRAL - } else if (notImportant(event.getLoggerName())) { - FilterReply.DENY - } else { - FilterReply.NEUTRAL - } -} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/package.docx b/tweetypie/common/src/scala/com/twitter/tweetypie/util/package.docx new file mode 100644 index 000000000..9c2c1f0ca Binary files /dev/null and b/tweetypie/common/src/scala/com/twitter/tweetypie/util/package.docx differ diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/package.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/package.scala deleted file mode 100644 index c99d3afa7..000000000 --- a/tweetypie/common/src/scala/com/twitter/tweetypie/util/package.scala +++ /dev/null @@ -1,9 +0,0 @@ -package com.twitter.tweetypie - -package object util { - type TweetId = Long - type UserId = Long - type MediaId = Long - type ConversationId = Long - type PlaceId = String -} diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/BUILD b/tweetypie/common/src/thrift/com/twitter/tweetypie/BUILD deleted file mode 100644 index 1ccf63deb..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/BUILD +++ /dev/null @@ -1,353 +0,0 @@ -create_thrift_libraries( - base_name = "media-entity", - sources = ["media_entity.thrift"], - platform = "java8", - tags = ["bazel-compatible"], - dependency_roots = [ - "mediaservices/commons/src/main/thrift", - ], - export_roots = [ - "mediaservices/commons/src/main/thrift:thrift", - ], - generate_languages = [ - "go", - "java", - "lua", - "python", - "ruby", - "scala", - "strato", - ], - provides_java_name = "tweetypie-media-entity-thrift-java", - provides_scala_name = "tweetypie-media-entity-thrift-scala", -) - -create_thrift_libraries( - base_name = "edit-control", - sources = ["edit_control.thrift"], - platform = "java8", - tags = ["bazel-compatible"], - generate_languages = [ - "go", - "java", - "lua", - "python", - "ruby", - "scala", - "strato", - ], - provides_java_name = "tweetypie-edit-control-thrift-java", - provides_scala_name = "tweetypie-edit-control-thrift-scala", -) - -create_thrift_libraries( - base_name = "api-fields", - sources = ["api_fields.thrift"], - platform = "java8", - tags = ["bazel-compatible"], - generate_languages = [ - "go", - "java", - "lua", - "python", - "ruby", - "scala", - "strato", - ], - provides_java_name = "tweetypie-api-fields-thrift-java", - provides_scala_name = "tweetypie-api-fields-thrift-scala", -) - -create_thrift_libraries( - base_name = "note-tweet", - sources = ["note_tweet.thrift"], - platform = "java8", - tags = ["bazel-compatible"], - generate_languages = [ - "go", - "java", - "lua", - "python", - "ruby", - "scala", - "strato", - ], - provides_java_name = "tweetypie-note-tweet-thrift-java", - provides_scala_name = "tweetypie-note-tweet-thrift-scala", -) - -create_thrift_libraries( - base_name = "tweet", - sources = [ - "creative-entity-enrichments/creative_entity_enrichments.thrift", - "geo/tweet_location_info.thrift", - "media/media_ref.thrift", - "tweet.thrift", - "unmentions/unmentions.thrift", - ], - platform = "java8", - tags = ["bazel-compatible"], - dependency_roots = [ - ":api-fields", - ":edit-control", - ":media-entity", - ":note-tweet", - "mediaservices/commons/src/main/thrift", - "src/thrift/com/twitter/content-health/toxicreplyfilter", - "src/thrift/com/twitter/dataproducts:enrichments_profilegeo", - "src/thrift/com/twitter/escherbird:tweet-annotation", - "src/thrift/com/twitter/expandodo:cards", - "src/thrift/com/twitter/geoduck", - "src/thrift/com/twitter/service/scarecrow/gen:tiered-actions", - "src/thrift/com/twitter/spam/rtf:safety-label", - "src/thrift/com/twitter/timelines/self_thread:thrift", - "src/thrift/com/twitter/tseng/withholding:thrift", - "src/thrift/com/twitter/tweet_pivots:tweet-pivots", - "strato/config/src/thrift/com/twitter/strato/columns/creative_entity_enrichments", - "unified-cards/thrift/src/main/thrift:thrift-contract", - ], - export_roots = [ - ":api-fields", - ":edit-control", - ":media-entity", - ":note-tweet", - "mediaservices/commons/src/main/thrift:thrift", - "src/thrift/com/twitter/content-health/toxicreplyfilter", - "src/thrift/com/twitter/dataproducts:enrichments_profilegeo", - "src/thrift/com/twitter/escherbird:tweet-annotation", - "src/thrift/com/twitter/expandodo:cards", - "src/thrift/com/twitter/geoduck:geoduck", - "src/thrift/com/twitter/service/scarecrow/gen:tiered-actions", - "src/thrift/com/twitter/spam/rtf:safety-label", - "src/thrift/com/twitter/timelines/self_thread:thrift", - "src/thrift/com/twitter/tseng/withholding:thrift", - "src/thrift/com/twitter/tweet_pivots:tweet-pivots", - "strato/config/src/thrift/com/twitter/strato/columns/creative_entity_enrichments", - ], - generate_languages = [ - "go", - "java", - "lua", - "python", - "scala", - "strato", - ], - provides_java_name = "tweetypie-tweet-thrift-java", - provides_python_name = "tweetypie-tweet-thrift-python", - provides_scala_name = "tweetypie-tweet-thrift-scala", -) - -create_thrift_libraries( - base_name = "service", - sources = [ - "deleted_tweet.thrift", - "tweet_service.thrift", - ], - platform = "java8", - tags = ["bazel-compatible"], - dependency_roots = [ - ":audit", - ":transient_context", - ":tweet", - "carousel/service/thrift:service", - "incentives/jiminy/src/main/thrift/com/twitter/incentives/jiminy:thrift", - "mediaservices/commons/src/main/thrift", - "src/thrift/com/twitter/bouncer:bounce-action-thrift", - "src/thrift/com/twitter/context:feature-context", - "src/thrift/com/twitter/servo:servo-exception", - "src/thrift/com/twitter/spam/features:safety-meta-data", - "src/thrift/com/twitter/spam/rtf:safety-label", - "src/thrift/com/twitter/spam/rtf:safety-level", - "src/thrift/com/twitter/spam/rtf:safety-result", - "src/thrift/com/twitter/tseng/withholding:thrift", - ], - export_roots = [ - ":transient_context", - ":tweet", - "carousel/service/thrift:service", - "incentives/jiminy/src/main/thrift/com/twitter/incentives/jiminy:thrift", - "src/thrift/com/twitter/bouncer:bounce-action-thrift", - "src/thrift/com/twitter/context:feature-context", - "src/thrift/com/twitter/spam/features:safety-meta-data", - "src/thrift/com/twitter/spam/rtf:safety-level", - "src/thrift/com/twitter/spam/rtf:safety-result", - ], - generate_languages = [ - "go", - "java", - "python", - "scala", - "strato", - ], - provides_java_name = "tweetypie-service-thrift-java", - provides_python_name = "tweetypie-service-thrift-python", - provides_scala_name = "tweetypie-service-thrift-scala", -) - -create_thrift_libraries( - base_name = "events", - sources = [ - "retweet_archival_event.thrift", - "tweet_events.thrift", - ], - platform = "java8", - tags = ["bazel-compatible"], - dependency_roots = [ - ":audit", - ":transient_context", - ":tweet", - "src/thrift/com/twitter/gizmoduck:user-thrift", - ], - export_roots = [ - ":audit", - ":transient_context", - ":tweet", - "src/thrift/com/twitter/gizmoduck:user-thrift", - ], - generate_languages = [ - "java", - "python", - "scala", - "strato", - ], - provides_java_name = "tweetypie-events-thrift-java", - provides_scala_name = "tweetypie-events-thrift-scala", -) - -create_thrift_libraries( - base_name = "audit", - sources = ["tweet_audit.thrift"], - platform = "java8", - tags = ["bazel-compatible"], - generate_languages = [ - "go", - "java", - "lua", - "python", - "scala", - "strato", - ], - provides_java_name = "tweetypie-audit-thrift-java", - provides_scala_name = "tweetypie-audit-thrift-scala", -) - -create_thrift_libraries( - base_name = "deprecated", - sources = ["deprecated.thrift"], - platform = "java8", - tags = ["bazel-compatible"], - dependency_roots = [ - ":service", - ":tweet", - "mediaservices/commons/src/main/thrift", - "src/thrift/com/twitter/expandodo:cards", - "src/thrift/com/twitter/gizmoduck:user-thrift", - "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity", - ], - generate_languages = [ - "java", - "python", - "scala", - "strato", - ], - provides_java_name = "tweetypie-deprecated-thrift-java", - provides_scala_name = "tweetypie-deprecated-thrift-scala", -) - -create_thrift_libraries( - base_name = "delete_location_data", - sources = ["delete_location_data.thrift"], - tags = ["bazel-compatible"], - provides_java_name = "delete-location-data-java", - provides_scala_name = "delete-location-data-scala", -) - -create_thrift_libraries( - base_name = "transient_context", - sources = ["transient_context.thrift"], - platform = "java8", - tags = ["bazel-compatible"], - dependency_roots = [ - ":tweet", - ], - generate_languages = [ - "go", - "java", - "lua", - "python", - "scala", - "strato", - ], - provides_java_name = "transient-context-java", - provides_scala_name = "transient-context-scala", -) - -create_thrift_libraries( - base_name = "tweet_comparison_service", - sources = ["tweet_comparison_service.thrift"], - tags = ["bazel-compatible"], - dependency_roots = [ - ":service", - "src/thrift/com/twitter/context:twitter-context", - ], - generate_languages = [ - "java", - "scala", - ], - provides_java_name = "tweet-comparison-service-thrift-java", - provides_scala_name = "tweet-comparison-service-thrift-scala", -) - -create_thrift_libraries( - base_name = "tweet_service_graphql", - sources = ["tweet_service_graphql.thrift"], - platform = "java8", - tags = ["bazel-compatible"], - dependency_roots = [ - "src/thrift/com/twitter/ads/callback:engagement_request", - "strato/config/src/thrift/com/twitter/strato/graphql", - ], - generate_languages = [ - "scala", - "strato", - ], - provides_scala_name = "tweet-service-graphql-scala", -) - -create_thrift_libraries( - base_name = "stored-tweet-info", - sources = [ - "stored_tweet_info.thrift", - ], - platform = "java8", - tags = ["bazel-compatible"], - dependency_roots = [ - ":tweet", - ], - generate_languages = [ - "java", - "scala", - "strato", - ], - provides_java_name = "tweetypie-stored-tweet-info-thrift-java", - provides_scala_name = "tweetypie-stored-tweet-info-thrift-scala", -) - -create_thrift_libraries( - base_name = "tweet-service-federated", - sources = [ - "tweet_service_federated.thrift", - ], - platform = "java8", - tags = ["bazel-compatible"], - dependency_roots = [ - ":stored-tweet-info", - ], - generate_languages = [ - "java", - "scala", - "strato", - ], - provides_java_name = "tweetypie-service-federated-thrift-java", - provides_scala_name = "tweetypie-service-federated-thrift-scala", -) diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/BUILD.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/BUILD.docx new file mode 100644 index 000000000..225a980cc Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/BUILD.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/api_fields.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/api_fields.docx new file mode 100644 index 000000000..76300febb Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/api_fields.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/api_fields.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/api_fields.thrift deleted file mode 100644 index d48cbf171..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/api_fields.thrift +++ /dev/null @@ -1,18 +0,0 @@ -namespace java com.twitter.tweetypie.thriftjava -#@namespace scala com.twitter.tweetypie.thriftscala -#@namespace strato com.twitter.tweetypie -namespace py gen.twitter.tweetypie.api_fields -namespace rb TweetyPie -// Specific namespace to avoid golang circular import -namespace go tweetypie.tweet - -// Structs used specifically for rendering through graphql. - -/** - * Perspective of a Tweet from the point of view of a User. - */ -struct TweetPerspective { - 1: bool favorited - 2: bool retweeted - 3: optional bool bookmarked -}(persisted='true', hasPersonalData = 'false', strato.graphql.typename='TweetPerspective') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/creative-entity-enrichments/creative_entity_enrichments.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/creative-entity-enrichments/creative_entity_enrichments.docx new file mode 100644 index 000000000..5ce072d1b Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/creative-entity-enrichments/creative_entity_enrichments.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/creative-entity-enrichments/creative_entity_enrichments.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/creative-entity-enrichments/creative_entity_enrichments.thrift deleted file mode 100644 index 48a50ca03..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/creative-entity-enrichments/creative_entity_enrichments.thrift +++ /dev/null @@ -1,21 +0,0 @@ -namespace java com.twitter.tweetypie.creative_entity_enrichments.thriftjava -#@ namespace scala com.twitter.tweetypie.creative_entity_enrichments.thriftscala -#@ namespace strato com.twitter.tweetypie.creative_entity_enrichments -namespace py gen.twitter.tweetypie.creative_entity_enrichments - -include "com/twitter/strato/columns/creative_entity_enrichments/enrichments.thrift" - -struct CreativeEntityEnrichmentRef { - 1: required i64 enrichmentId -}(persisted='true', hasPersonalData='false') - -/** - * This struct represents a collection of enrichments applied to a tweet. - * The enrichment for a tweet is just a metadata attached to a tweet - * Each enrichment has a unique id (EnrichmentId) to uniquely identify an enrichment. - * - * enrichment_type signifies the type of an enrichment (eg: Interactive Text). - */ -struct CreativeEntityEnrichments { - 1: required map enrichment_type_to_ref -}(persisted='true', hasPersonalData='false') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/delete_location_data.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/delete_location_data.docx new file mode 100644 index 000000000..a105982ec Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/delete_location_data.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/delete_location_data.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/delete_location_data.thrift deleted file mode 100644 index 35f68dd10..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/delete_location_data.thrift +++ /dev/null @@ -1,32 +0,0 @@ -namespace java com.twitter.tweetypie.thriftjava -#@namespace scala com.twitter.tweetypie.thriftscala -namespace py gen.twitter.tweetypie -namespace rb TweetyPie -namespace go tweetypie - -/** - * Event that triggers deletion of the geo information on tweets created - * at timestamp_ms or earlier. - */ -struct DeleteLocationData { - /** - * The id of the user whose tweets should have their geo information - * removed. - */ - 1: required i64 user_id (personalDataType='UserId') - - /** - * The time at which this request was initiated. Tweets by this user - * whose snowflake ids contain timestamps less than or equal to this - * value will no longer be returned with geo information. - */ - 2: required i64 timestamp_ms - - /** - * The last time this user requested deletion of location data prior - * to this request. This value may be omitted, but should be included - * if available for implementation efficiency, since it eliminates the - * need to scan tweets older than this value for geo information. - */ - 3: optional i64 last_timestamp_ms -}(persisted='true', hasPersonalData='true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/deleted_tweet.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/deleted_tweet.docx new file mode 100644 index 000000000..20fdb0771 Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/deleted_tweet.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/deleted_tweet.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/deleted_tweet.thrift deleted file mode 100644 index cedf451d5..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/deleted_tweet.thrift +++ /dev/null @@ -1,86 +0,0 @@ -namespace java com.twitter.tweetypie.thriftjava -#@namespace scala com.twitter.tweetypie.thriftscala -#@namespace strato com.twitter.tweetypie -namespace py gen.twitter.tweetypie.deletedtweet -namespace rb TweetyPie -namespace go tweetypie - -// Structs used for response from getDeletedTweets - -struct DeletedTweetMediaEntity { - 1: required i64 id - 2: required i8 mediaType - 3: required i16 width - 4: required i16 height -} (persisted = 'true') - -struct DeletedTweetShare { - 1: required i64 sourceStatusId - 2: required i64 sourceUserId - 3: required i64 parentStatusId -} (persisted = 'true') - -/** - * A tweet that has been soft- or hard-deleted. - * - * Originally DeletedTweet used the same field ids as tbird.Status. - * This is no longer the case. - */ -struct DeletedTweet { - // Uses the same field ids as tbird.thrift so we can easily map and add fields later - 1: required i64 id - - /** - * User who created the tweet. Only available for soft-deleted tweets. - */ - 2: optional i64 userId - - /** - * Content of the tweet. Only available for soft-deleted tweets. - */ - 3: optional string text - - /** - * When the tweet was created. Only available for soft-deleted tweets. - */ - 5: optional i64 createdAtSecs - - /** - * Retweet information if the deleted tweet was a retweet. Only available - * for soft-deleted tweets. - */ - 7: optional DeletedTweetShare share - - /** - * Media metadata if the deleted tweet included media. Only available for - * soft-deleted tweets. - */ - 14: optional list media - - /** - * The time when this tweet was deleted by a user, in epoch milliseconds, either normally (aka - * "softDelete") or via a bouncer flow (aka "bounceDelete"). - * - * This data is not available for all deleted tweets. - */ - 18: optional i64 deletedAtMsec - - /** - * The time when this tweet was permanently deleted, in epoch milliseconds. - * - * This data is not available for all deleted tweets. - */ - 19: optional i64 hardDeletedAtMsec - - /** - * The ID of the NoteTweet associated with this Tweet if one exists. This is used by safety tools - * to fetch the NoteTweet content when viewing soft deleted Tweets. - */ - 20: optional i64 noteTweetId - - /** - * Specifies if the Tweet can be expanded into the NoteTweet, or if they have the same text. Can - * be used to distinguish between Longer Tweets and RichText Tweets. - */ - 21: optional bool isExpandable -} (persisted = 'true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/deprecated.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/deprecated.docx new file mode 100644 index 000000000..7575bd8c4 Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/deprecated.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/deprecated.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/deprecated.thrift deleted file mode 100644 index 55cdde2d7..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/deprecated.thrift +++ /dev/null @@ -1,99 +0,0 @@ -namespace java com.twitter.tweetypie.thriftjava -#@namespace scala com.twitter.tweetypie.thriftscala -#@namespace strato com.twitter.tweetypie -namespace py gen.twitter.tweetypie.deprecated -namespace rb TweetyPie - -include "com/twitter/expandodo/cards.thrift" -include "com/twitter/gizmoduck/user.thrift" -include "com/twitter/tweetypie/media_entity.thrift" -include "com/twitter/tweetypie/tweet.thrift" -include "com/twitter/tweetypie/tweet_service.thrift" - -/** - * @deprecated Use Place - */ -struct Geo { - /** - * @deprecated Use coordinates.latitude - */ - 1: double latitude = 0.0 (personalDataType = 'GpsCoordinates') - - /** - * @deprecated Use coordinates.longitude - */ - 2: double longitude = 0.0 (personalDataType = 'GpsCoordinates') - - /** - * @deprecated Use coordinates.geo_precision - */ - 3: i32 geo_precision = 0 - - /** - * 0: don't show lat/long - * 2: show - * - * @deprecated - */ - 4: i64 entity_id = 0 - - /** - * @deprecated Use place_id - */ - 5: optional string name (personalDataType = 'PublishedCoarseLocationTweet') - - 6: optional tweet.Place place // provided if StatusRequestOptions.load_places is set - 7: optional string place_id // ex: ad2f50942562790b - 8: optional tweet.GeoCoordinates coordinates -}(persisted = 'true', hasPersonalData = 'true') - -/** - * @deprecated Use Tweet and APIs that accept or return Tweet. - */ -struct Status { - 1: i64 id (personalDataType = 'TweetId') - 2: i64 user_id (personalDataType = 'UserId') - 3: string text (personalDataType = 'PrivateTweets, PublicTweets') - 4: string created_via (personalDataType = 'ClientType') - 5: i64 created_at // in seconds - 6: list urls = [] - 7: list mentions = [] - 8: list hashtags = [] - 29: list cashtags = [] - 9: list media = [] - 10: optional tweet.Reply reply - 31: optional tweet.DirectedAtUser directed_at_user - 11: optional tweet.Share share - 32: optional tweet.QuotedTweet quoted_tweet - 12: optional tweet.Contributor contributor - 13: optional Geo geo - // has_takedown indicates if there is a takedown specifically on this tweet. - // takedown_country_codes contains takedown countries for both the tweet and the user, - // so has_takedown might be false while takedown_country_codes is non-empty. - 14: bool has_takedown = 0 - 15: bool nsfw_user = 0 - 16: bool nsfw_admin = 0 - 17: optional tweet.StatusCounts counts - // 18: obsoleted - 19: optional tweet.DeviceSource device_source // not set on DB failure - 20: optional tweet.Narrowcast narrowcast - 21: optional list takedown_country_codes (personalDataType = 'ContentRestrictionStatus') - 22: optional tweet.StatusPerspective perspective // not set if no user ID or on TLS failure - 23: optional list cards // only included if StatusRequestOptions.include_cards == true - // only included when StatusRequestOptions.include_cards == true - // and StatusRequestOptions.cards_platform_key is set to valid value - 30: optional cards.Card2 card2 - 24: bool nullcast = 0 - 25: optional i64 conversation_id (personalDataType = 'TweetId') - 26: optional tweet.Language language - 27: optional i64 tracking_id (personalDataType = 'ImpressionId') - 28: optional map spam_labels - 33: optional bool has_media - // obsolete 34: optional list topic_labels - // Additional fields for flexible schema - 101: optional tweet.TweetMediaTags media_tags - 103: optional tweet.CardBindingValues binding_values - 104: optional tweet.ReplyAddresses reply_addresses - 105: optional tweet.TwitterSuggestInfo twitter_suggest_info -}(persisted = 'true', hasPersonalData = 'true') - diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/edit_control.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/edit_control.docx new file mode 100644 index 000000000..355adb72d Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/edit_control.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/edit_control.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/edit_control.thrift deleted file mode 100644 index d1eb83a33..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/edit_control.thrift +++ /dev/null @@ -1,71 +0,0 @@ -namespace java com.twitter.tweetypie.thriftjava -#@namespace scala com.twitter.tweetypie.thriftscala -#@namespace strato com.twitter.tweetypie -namespace py gen.twitter.tweetypie.edit_control -namespace rb TweetyPie -// Specific namespace to avoid golang circular import -namespace go tweetypie.tweet - -/** - * EditControlInitial is present on all new Tweets. Initially, edit_tweet_ids will only contain the id of the new Tweet. - * Subsequent edits will append the edited Tweet ids to edit_tweet_ids. -**/ -struct EditControlInitial { - /** - * A list of all edits of this initial Tweet, including the initial Tweet id, - * and in ascending time order (the oldest revision first). - */ - 1: required list edit_tweet_ids = [] (personalDataType = 'TweetId', strato.json.numbers.type = 'string') - /** - * Epoch timestamp in milli-seconds (UTC) after which the tweet will no longer be editable. - */ - 2: optional i64 editable_until_msecs (strato.json.numbers.type = 'string') - /** - * Number of edits that are available for this Tweet. This starts at 5 and decrements with each edit. - */ - 3: optional i64 edits_remaining (strato.json.numbers.type = 'string') - - /** - * Specifies whether the Tweet has any intrinsic properties that mean it can't be edited - * (for example, we have a business rule that poll Tweets can't be edited). - * - * If a Tweet edit expires due to time frame or number of edits, this field still is set - * to true for Tweets that could have been edited. - */ - 4: optional bool is_edit_eligible -}(persisted='true', hasPersonalData = 'true', strato.graphql.typename='EditControlInitial') - -/** - * EditControlEdit is present for any Tweets that are an edit of another Tweet. The full list of edits can be retrieved - * from the edit_control_initial field, which will always be hydrated. -**/ -struct EditControlEdit { - /** - * The id of the initial Tweet in an edit chain - */ - 1: required i64 initial_tweet_id (personalDataType = 'TweetId', strato.json.numbers.type = 'string') - /** - * This field is only used during hydration to return the EditControl of the initial Tweet for - * a subsequently edited version. - */ - 2: optional EditControlInitial edit_control_initial -}(persisted='true', hasPersonalData = 'true', strato.graphql.typename='EditControlEdit') - - -/** - * Tweet metadata about edits of a Tweet. A list of edits to a Tweet are represented as a chain of - * Tweets linked to each other using the EditControl field. - * - * EditControl can be either EditControlInitial which means that the Tweet is unedited or the first Tweet in - * an edit chain, or EditControlEdit which means it is a Tweet in the edit chain after the first - * Tweet. - */ -union EditControl { - 1: EditControlInitial initial - 2: EditControlEdit edit -}(persisted='true', hasPersonalData = 'true', strato.graphql.typename='EditControl') - - -service FederatedServiceBase { - EditControl getEditControl(1: required i64 tweetId) -} diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/geo/tweet_location_info.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/geo/tweet_location_info.docx new file mode 100644 index 000000000..00885ce6b Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/geo/tweet_location_info.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/geo/tweet_location_info.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/geo/tweet_location_info.thrift deleted file mode 100644 index 500e9ffcf..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/geo/tweet_location_info.thrift +++ /dev/null @@ -1,72 +0,0 @@ -namespace java com.twitter.tweetypie.geo.thriftjava -#@namespace scala com.twitter.tweetypie.geo.thriftscala -#@namespace strato com.twitter.tweetypie.geo -namespace py gen.twitter.tweetypie.geo -namespace rb TweetyPie - -////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// // -// This file contains type definitions to support the Geo field added to Tweet flexible schema ONLY. // -// It is unlikely to be re-usable so treat it them as private outside the subpackage defined here. // -// // -// In respect to back storage, consider it has limited capacity, provisioned to address particular use cases. // -// There is no free resources outside its current usage plus a future projection (see Storage Capacity below). // -// For example: // -// 1- Adding extra fields to TweetLocationInfo will likely require extra storage. // -// 2- Increase on front-load QPS (read or write) may require extra sharding to not impact delay percentiles. // -// Failure to observe these may impact Tweetypie write-path and read-path. // -// // -////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -/** - * Flags how a _Place_ is published into a tweet (a.k.a. geotagging). - */ -enum GeoTagPlaceSource { - /** - * Tweet is tagged to a place but it is impossible to determine its source. - * E.g.: created from non-TOO clients or legacy TOO clients - */ - UNKNOWN = 0 - /** - * Tweet is tagged to a Place by reverse geocoding its coordinates. - */ - COORDINATES = 1 - /** - * Tweet is tagged to a Place by the client application on user's behalf. - * N.B.: COORDINATES is not AUTO because the API request doesn't publish a Place - */ - AUTO = 2 - EXPLICIT = 3 - - // free to use, added for backwards compatibility on client code. - RESERVED_4 = 4 - RESERVED_5 = 5 - RESERVED_6 = 6 - RESERVED_7 = 7 -} - -/** - * Information about Tweet's Location(s). - * Designed to enable custom consumption experiences of the Tweet's location(s). - * E.g.: Tweet's perspectival view of a Location entity - * - * To guarantee user's rights of privacy: - * - * - Only include user's published location data or unpublished location data that - * is EXPLICITLY set as publicly available by the user. - * - * - Never include user's unpublished (aka shared) location data that - * is NOT EXPLICITLY set as publicly available by the user. - * - * E.g.: User is asked to share their GPS coordinates with Twitter from mobile client, - * under the guarantee it won't be made publicly available. - * - * Design notes: - * - Tweet's geotagged Place is represented by Tweet.place instead of being a field here. - */ -struct TweetLocationInfo { - /** - * Represents how the Tweet author published the "from" location in a Tweet (a.k.a geo-tagged). - */ - 1: optional GeoTagPlaceSource geotag_place_source -}(persisted='true', hasPersonalData='false') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/media/media_ref.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/media/media_ref.docx new file mode 100644 index 000000000..1896060dd Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/media/media_ref.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/media/media_ref.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/media/media_ref.thrift deleted file mode 100644 index f2a739094..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/media/media_ref.thrift +++ /dev/null @@ -1,20 +0,0 @@ -namespace java com.twitter.tweetypie.media.thriftjava -#@namespace scala com.twitter.tweetypie.media.thriftscala -#@namespace strato com.twitter.tweetypie.media -namespace py gen.twitter.tweetypie.media -namespace rb TweetyPie - - -/** -* A MediaRef represents a reference to a piece of media in MediaInfoService, along with metadata -* about the source Tweet that the media came from in case of pasted media. -**/ -struct MediaRef { - 1: string generic_media_key (personalDataType = 'MediaId') - - // For Tweets with pasted media, the id of the Tweet where this media was copied from - 2: optional i64 source_tweet_id (personalDataType = 'TweetId') - - // The author of source_tweet_id - 3: optional i64 source_user_id (personalDataType = 'UserId') -}(persisted='true', hasPersonalData='true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/media_entity.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/media_entity.docx new file mode 100644 index 000000000..2d0886bf6 Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/media_entity.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/media_entity.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/media_entity.thrift deleted file mode 100644 index c5b411710..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/media_entity.thrift +++ /dev/null @@ -1,135 +0,0 @@ -namespace java com.twitter.tweetypie.thriftjava -#@namespace scala com.twitter.tweetypie.thriftscala -#@namespace strato com.twitter.tweetypie -namespace py gen.twitter.tweetypie.media_entity -namespace rb TweetyPie.media_entity -namespace go tweetypie.media_entity - -include "com/twitter/mediaservices/commons/MediaInformation.thrift" -include "com/twitter/mediaservices/commons/MediaCommon.thrift" -include "com/twitter/mediaservices/commons/TweetMedia.thrift" - -/** - * DEPRECATED - * An RGB color. - * - * Each i8 should be interpreted as unsigned, ranging in value from 0 to - * 255. Borrowed from gizmoduck/user.thrift. - * - * The way in which we use ColorValue here is as metadata for a media file, - * so it needs to be annotated as having personal data. Fields that are of - * structured types cannot be annotated, so we have to put the annotation - * on the structure itself's fields even though it's more confusing to do so - * and could introduce issues if someone else reuses ColorValue outside of - * the context of a media file. - */ -struct ColorValue { - 1: i8 red (personalDataType = 'MediaFile') - 2: i8 green (personalDataType = 'MediaFile') - 3: i8 blue (personalDataType = 'MediaFile') -}(persisted = 'true', hasPersonalData = 'true') - -struct MediaEntity { - 1: i16 from_index (personalDataType = 'MediaFile') - 2: i16 to_index (personalDataType = 'MediaFile') - - /** - * The shortened t.co url found in the tweet text. - */ - 3: string url (personalDataType = 'ShortUrl') - - /** - * The text to display in place of the shortened url. - */ - 4: string display_url (personalDataType = 'LongUrl') - - /** - * The url to the media asset (a preview image in the case of a video). - */ - 5: string media_url (personalDataType = 'LongUrl') - - /** - * The https version of media_url. - */ - 6: string media_url_https (personalDataType = 'LongUrl') - - /** - * The expanded media permalink. - */ - 7: string expanded_url (personalDataType = 'LongUrl') - - 8: MediaCommon.MediaId media_id (strato.space = "Media", strato.name = "media", personalDataType = 'MediaId') - 9: bool nsfw - 10: set sizes - 11: string media_path - 12: optional bool is_protected - - /** - * The tweet that this MediaEntity was originally attached to. This value will be set if this - * MediaEntity is either on a retweet or a tweet with pasted-pic. - */ - 13: optional i64 source_status_id (strato.space = "Tweet", strato.name = "sourceStatus", personalDataType = 'TweetId') - - - /** - * The user to attribute views of the media to. - * - * This field should be set when the media's attributableUserId field does not match the current - * Tweet's owner. Retweets of a Tweet with media and "managed media" are some reasons this may - * occur. When the value is None any views should be attributed to the tweet's owner. - **/ - 14: optional i64 source_user_id (strato.space = "User", strato.name = "sourceUser", personalDataType = 'UserId') - - /** - * Additional information specific to the media type. - * - * This field is optional with images (as the image information is in the - * previous fields), but required for animated GIF and native video (as, in - * this case, the previous fields only describe the preview image). - */ - 15: optional TweetMedia.MediaInfo media_info - - /** - * DEPRECATED - * The dominant color for the entire image (or keyframe for video or GIF). - * - * This can be used for placeholders while the media downloads (either a - * solid color or a gradient using the grid). - */ - 16: optional ColorValue dominant_color_overall - - /** - * DEPRECATED - * Dominant color of each quadrant of the image (keyframe for video or GIF). - * - * If present this list should have 4 elements, corresponding to - * [top_left, top_right, bottom_left, bottom_right] - */ - 17: optional list dominant_color_grid - - // obsolete 18: optional map extensions - - /** - * Stratostore extension points data encoded as a Strato record. - */ - 19: optional binary extensions_reply - - /** - * Holds metadata defined by the user for the tweet-asset relationship. - */ - 20: optional MediaInformation.UserDefinedProductMetadata metadata - - /** - * Media key used to interact with the media systems. - */ - 21: optional MediaCommon.MediaKey media_key - - /** - * Flexible structure for additional media metadata. This field is only - * included in a read-path request if specifically requested. It will - * always be included, when applicable, in write-path responses. - */ - 22: optional MediaInformation.AdditionalMetadata additional_metadata - -}(persisted='true', hasPersonalData = 'true') - diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/note_tweet.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/note_tweet.docx new file mode 100644 index 000000000..662578aad Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/note_tweet.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/note_tweet.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/note_tweet.thrift deleted file mode 100644 index e8313a924..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/note_tweet.thrift +++ /dev/null @@ -1,13 +0,0 @@ -namespace java com.twitter.tweetypie.thriftjava -#@namespace scala com.twitter.tweetypie.thriftscala -#@namespace strato com.twitter.tweetypie -namespace py gen.twitter.tweetypie.tweet_note -namespace rb TweetyPie -// Specific namespace to avoid golang circular import -namespace go tweetypie.tweet - -// Struct representing a NoteTweet associated with a Tweet -struct NoteTweet { - 1: required i64 id (strato.space = 'NoteTweet', strato.name = "note_tweet", personalDataType = 'TwitterArticleID') - 2: optional bool is_expandable (strato.name = "is_expandable") -} (persisted='true', hasPersonalData = 'true', strato.graphql.typename = 'NoteTweetData') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/retweet_archival_event.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/retweet_archival_event.docx new file mode 100644 index 000000000..5be4ef46e Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/retweet_archival_event.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/retweet_archival_event.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/retweet_archival_event.thrift deleted file mode 100644 index 0476dbded..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/retweet_archival_event.thrift +++ /dev/null @@ -1,30 +0,0 @@ -namespace java com.twitter.tweetypie.thriftjava -namespace py gen.twitter.tweetypie.retweet_archival_event -#@namespace scala com.twitter.tweetypie.thriftscala -#@namespace strato com.twitter.tweetypie -namespace rb TweetyPie -namespace go tweetypie - -/** - * This event is published to "retweet_archival_events" when Tweetypie processes an - * AsyncSetRetweetVisibilityRequest. - * - * This is useful for services (Interaction Counter, Insights Track) that need to - * know when the retweet engagement count of a tweet has been modified due to the - * retweeting user being put in to or out of suspension or read-only mode. - */ -struct RetweetArchivalEvent { - // The retweet id affected by this archival event. - 1: required i64 retweet_id (personalDataType = 'TweetId') - // The source tweet id for the retweet. This tweet had its retweet count modified. - 2: required i64 src_tweet_id (personalDataType = 'TweetId') - 3: required i64 retweet_user_id (personalDataType = 'UserId') - 4: required i64 src_tweet_user_id (personalDataType = 'UserId') - // Approximate time in milliseconds for when the count modification occurred, based on - // Unix Epoch (1 January 1970 00:00:00 UTC). Tweetypie will use the time when it is - // about to send the asynchronous write request to tflock for this timestamp. - 5: required i64 timestamp_ms - // Marks if this event is for archiving(True) or unarchiving(False) action. - // Archiving indicates an engagement count decrement occurred and unarchiving indicates an incremental. - 6: optional bool is_archiving_action -}(persisted='true', hasPersonalData = 'true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/BUILD b/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/BUILD deleted file mode 100644 index c619298c4..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/BUILD +++ /dev/null @@ -1,11 +0,0 @@ -create_thrift_libraries( - base_name = "storage_internal", - sources = ["*.thrift"], - platform = "java8", - strict_deps = True, - tags = ["bazel-compatible"], - generate_languages = [ - "java", - "scala", - ], -) diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/BUILD.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/BUILD.docx new file mode 100644 index 000000000..4bfba1732 Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/BUILD.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/storage_internal.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/storage_internal.docx new file mode 100644 index 000000000..fa4ed7b5b Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/storage_internal.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/storage_internal.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/storage_internal.thrift deleted file mode 100644 index f614fa762..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/storage_internal.thrift +++ /dev/null @@ -1,79 +0,0 @@ -namespace java com.twitter.tweetypie.storage_internal.thriftjava -#@namespace scala com.twitter.tweetypie.storage_internal.thriftscala - -struct StoredReply { - 1: i64 in_reply_to_status_id (personalDataType = 'TweetId') - 2: i64 in_reply_to_user_id (personalDataType = 'UserId') - 3: optional i64 conversation_id (personalDataType = 'TweetId') -} (hasPersonalData = 'true', persisted='true') - -struct StoredShare { - 1: i64 source_status_id (personalDataType = 'TweetId') - 2: i64 source_user_id (personalDataType = 'UserId') - 3: i64 parent_status_id (personalDataType = 'TweetId') -} (hasPersonalData = 'true', persisted='true') - -struct StoredGeo { - 1: double latitude (personalDataType = 'GpsCoordinates') - 2: double longitude (personalDataType = 'GpsCoordinates') - 3: i32 geo_precision (personalDataType = 'GpsCoordinates') - 4: i64 entity_id (personalDataType = 'PublishedPreciseLocationTweet, PublishedCoarseLocationTweet') - 5: optional string name (personalDataType = 'PublishedPreciseLocationTweet, PublishedCoarseLocationTweet') -} (hasPersonalData = 'true', persisted='true') - -struct StoredMediaEntity { - 1: i64 id (personalDataType = 'MediaId') - 2: i8 media_type (personalDataType = 'ContentTypeTweetMedia') - 3: i16 width - 4: i16 height -} (hasPersonalData = 'true', persisted='true') - -struct StoredNarrowcast { - 1: optional list language (personalDataType = 'InferredLanguage') - 2: optional list location (personalDataType = 'PublishedCoarseLocationTweet') - 3: optional list ids (personalDataType = 'TweetId') -} (hasPersonalData = 'true', persisted='true') - -struct StoredQuotedTweet { - 1: i64 tweet_id (personalDataType = 'TweetId') // the tweet id being quoted - 2: i64 user_id (personalDataType = 'UserId') // the user id being quoted - 3: string short_url (personalDataType = 'ShortUrl') // tco url - used when rendering in backwards-compat mode -} (hasPersonalData = 'true', persisted='true') - -struct StoredTweet { - 1: i64 id (personalDataType = 'TweetId') - 2: optional i64 user_id (personalDataType = 'UserId') - 3: optional string text (personalDataType = 'PrivateTweets, PublicTweets') - 4: optional string created_via (personalDataType = 'ClientType') - 5: optional i64 created_at_sec (personalDataType = 'PrivateTimestamp, PublicTimestamp') // in seconds - - 6: optional StoredReply reply - 7: optional StoredShare share - 8: optional i64 contributor_id (personalDataType = 'Contributor') - 9: optional StoredGeo geo - 11: optional bool has_takedown - 12: optional bool nsfw_user (personalDataType = 'TweetSafetyLabels') - 13: optional bool nsfw_admin (personalDataType = 'TweetSafetyLabels') - 14: optional list media - 15: optional StoredNarrowcast narrowcast - 16: optional bool nullcast - 17: optional i64 tracking_id (personalDataType = 'ImpressionId') - 18: optional i64 updated_at (personalDataType = 'PrivateTimestamp, PublicTimestamp') - 19: optional StoredQuotedTweet quoted_tweet -} (hasPersonalData = 'true', persisted='true') - -struct CoreFields { - 2: optional i64 user_id (personalDataType = 'UserId') - 3: optional string text (personalDataType = 'PrivateTweets, PublicTweets') - 4: optional string created_via (personalDataType = 'ClientType') - 5: optional i64 created_at_sec (personalDataType = 'PrivateTimestamp, PublicTimestamp') - - 6: optional StoredReply reply - 7: optional StoredShare share - 8: optional i64 contributor_id (personalDataType = 'Contributor') - 19: optional StoredQuotedTweet quoted_tweet -} (hasPersonalData = 'true', persisted='true') - -struct InternalTweet { - 1: optional CoreFields core_fields -} (hasPersonalData = 'true', persisted='true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/stored_tweet_info.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/stored_tweet_info.docx new file mode 100644 index 000000000..0eb8c6c42 Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/stored_tweet_info.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/stored_tweet_info.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/stored_tweet_info.thrift deleted file mode 100644 index 4c37451fc..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/stored_tweet_info.thrift +++ /dev/null @@ -1,52 +0,0 @@ -namespace java com.twitter.tweetypie.thriftjava -#@namespace scala com.twitter.tweetypie.thriftscala -#@namespace strato com.twitter.tweetypie - -include "com/twitter/tweetypie/tweet.thrift" - -struct HardDeleted { - 1: i64 soft_deleted_timestamp_msec - 2: i64 timestamp_msec -} - -struct SoftDeleted { - 1: i64 timestamp_msec -} - -struct BounceDeleted { - 1: i64 timestamp_msec -} - -struct Undeleted { - 1: i64 timestamp_msec -} - -struct ForceAdded { - 1: i64 timestamp_msec -} - -struct NotFound {} - -union StoredTweetState { - 1: HardDeleted hard_deleted - 2: SoftDeleted soft_deleted - 3: BounceDeleted bounce_deleted - 4: Undeleted undeleted - 5: ForceAdded force_added - 6: NotFound not_found -} - -enum StoredTweetError { - CORRUPT = 1, - SCRUBBED_FIELDS_PRESENT = 2, - FIELDS_MISSING_OR_INVALID = 3, - SHOULD_BE_HARD_DELETED = 4, - FAILED_FETCH = 5 -} - -struct StoredTweetInfo { - 1: required i64 tweet_id - 2: optional tweet.Tweet tweet - 3: optional StoredTweetState stored_tweet_state - 4: required list errors = [] -} diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/transient_context.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/transient_context.docx new file mode 100644 index 000000000..f3b65efc2 Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/transient_context.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/transient_context.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/transient_context.thrift deleted file mode 100644 index 942e42d35..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/transient_context.thrift +++ /dev/null @@ -1,64 +0,0 @@ -/** - * This file contains definitions for transient, passthrough structured data. - * - * If you need to add structured data that Tweetypie accepts in a request - * and passes the data through to one or more backends (eg. EventBus), this - * is the place to put it. Tweetypie may or may not inspect the data and - * alter the behavior based on it, but it won't change it. - */ - -namespace java com.twitter.tweetypie.thriftjava -#@namespace scala com.twitter.tweetypie.thriftscala -#@namespace strato com.twitter.tweetypie -namespace py gen.twitter.tweetypie.transient_context -namespace rb TweetyPie -namespace go tweetypie - -include "com/twitter/tweetypie/tweet.thrift" - -enum BatchComposeMode { - /** - * This is the first Tweet in a batch. - */ - BATCH_FIRST = 1 - - /** - * This is any of the subsequent Tweets in a batch. - */ - BATCH_SUBSEQUENT = 2 -} - -/** - * Data supplied at Tweet creation time that is not served by Tweetypie, but - * is passed through to consumers of the tweet_events eventbus stream as part - * of TweetCreateEvent. - * This is different from additional_context in that Tweetypie - * inspects this data as well, and we prefer structs over strings. - * If adding a new field that will be passed through to eventbus, prefer this - * over additional_context. - */ -struct TransientCreateContext { - /** - * Indicates whether a Tweet was created using a batch composer, and if so - * position of a Tweet within the batch. - * - * A value of 'None' indicates that the tweet was not created in a batch. - * - * More info: https://docs.google.com/document/d/1dJ9K0KzXPzhk0V-Nsekt0CAdOvyVI8sH9ESEiA2eDW4/edit - */ - 1: optional BatchComposeMode batch_compose - - /** - * Indicates if the tweet contains a live Periscope streaming video. - * - * This enables Periscope LiveFollow. - */ - 2: optional bool periscope_is_live - - /** - * Indicates the userId of the live Periscope streaming video. - * - * This enables Periscope LiveFollow. - */ - 3: optional i64 periscope_creator_id (personalDataType='UserId') -}(persisted='true', hasPersonalData='true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet.docx new file mode 100644 index 000000000..a828388cd Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet.thrift deleted file mode 100644 index bffca50c5..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet.thrift +++ /dev/null @@ -1,1652 +0,0 @@ -namespace java com.twitter.tweetypie.thriftjava -#@namespace scala com.twitter.tweetypie.thriftscala -#@namespace strato com.twitter.tweetypie -namespace py gen.twitter.tweetypie.tweet -namespace rb TweetyPie -// Specific namespace to avoid golang circular import -namespace go tweetypie.tweet - -include "com/twitter/escherbird/tweet_annotation.thrift" -include "com/twitter/expandodo/cards.thrift" -include "com/twitter/content-health/toxicreplyfilter/filtered_reply_details.thrift" -include "com/twitter/dataproducts/enrichments_profilegeo.thrift" -include "com/twitter/geoduck/public/thriftv1/geoduck_common.thrift" -include "com/twitter/mediaservices/commons/MediaCommon.thrift" -include "com/twitter/mediaservices/commons/MediaInformation.thrift" -include "com/twitter/tweetypie/api_fields.thrift" -include "com/twitter/tweetypie/edit_control.thrift" -include "com/twitter/tweetypie/media_entity.thrift" -include "com/twitter/tweetypie/note_tweet.thrift" -include "com/twitter/service/scarecrow/gen/tiered_actions.thrift" -include "com/twitter/spam/rtf/safety_label.thrift" -include "com/twitter/timelines/self_thread/self_thread.thrift" -include "com/twitter/tseng/withholding/withholding.thrift" -include "com/twitter/tweet_pivots/tweet_pivots.thrift" -include "com/twitter/tweetypie/geo/tweet_location_info.thrift" -include "com/twitter/tweetypie/media/media_ref.thrift" -include "unified_cards_contract.thrift" -include "com/twitter/tweetypie/creative-entity-enrichments/creative_entity_enrichments.thrift" -include "com/twitter/tweetypie/unmentions/unmentions.thrift" - -/** - * IDs are annotated with their corresponding space for Strato. - */ - -/** - * A Reply is data about a tweet in response to another tweet or a - * user. - * - * This struct will be present if: - * 1. This tweet is a reply to another tweet, or - * 2. This tweet is directed at a user (the tweet's text begins with - * an @mention). - */ -struct Reply { - /** - * The id of the tweet that this tweet is replying to. - * - * This field will be missing for directed-at tweets (tweets whose - * text begins with an @mention) that are not replying to another - * tweet. - */ - 1: optional i64 in_reply_to_status_id (strato.space = "Tweet", strato.name = "inReplyToStatus", personalDataType = 'TweetId', tweetEditAllowed='false') - - /** - * The user to whom this tweet is directed. - * - * If in_reply_to_status_id is set, this field is the author of that tweet. - * If in_reply_to_status_id is not set, this field is the user mentioned at - * the beginning of the tweet. - */ - 2: i64 in_reply_to_user_id (strato.space = "User", strato.name = "inReplyToUser", personalDataType = 'UserId') - - /** - * The current username of in_reply_to_user_id. - * - * This field is not set when Gizmoduck returns a failure to Tweetypie. - */ - 3: optional string in_reply_to_screen_name (personalDataType = 'Username') -}(persisted='true', hasPersonalData = 'true') - -/** - * Includes information about the user a tweet is directed at (when a tweet - * begins with @mention). - * - * Tweets with a DirectedAtUser are delivered to users who follow both the - * author and the DirectedAtUser. Normally the DirectedAtUser will be the same - * as Reply.in_reply_to_user_id, but will be different if the tweet's author - * rearranges the @mentions in a reply. - */ -struct DirectedAtUser { - 1: i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId') - 2: string screen_name (personalDataType = 'Username') -}(persisted='true', hasPersonalData = 'true') - -/** - * A Share is data about the source tweet of a retweet. - * - * Share was the internal name for the retweet feature. - */ -struct Share { - /** - * The id of the original tweet that was retweeted. - * - * This is always a tweet and never a retweet (unlike parent_status_id). - */ - 1: i64 source_status_id (strato.space = "Tweet", strato.name = "sourceStatus", personalDataType = 'TweetId') - - /* - * The user id of the original tweet's author. - */ - 2: i64 source_user_id (strato.space = "User", strato.name = "sourceUser", personalDataType = 'UserId') - - /** - * The id of the tweet that the user retweeted. - * - * Often this is the same as source_status_id, but it is different when a - * user retweets via another retweet. For example, user A posts tweet id 1, - * user B retweets it, creating tweet 2. If user user C sees B's retweet and - * retweets it, the result is another retweet of tweet id 1, with the parent - * status id of tweet 2. - */ - 3: i64 parent_status_id (strato.space = "Tweet", strato.name = "parentStatus", personalDataType = 'TweetId') -}(persisted='true', hasPersonalData = 'true') - -/** - * A record mapping a shortened URL (usually t.co) to a long url, and a prettified - * display text. This is similar to data found in UrlEntity, and may replace that - * data in the future. - */ -struct ShortenedUrl { - /** - * Shortened t.co URL. - */ - 1: string short_url (personalDataType = 'ShortUrl') - - /** - * Original, full-length URL. - */ - 2: string long_url (personalDataType = 'LongUrl') - - /** - * Truncated version of expanded URL that does not include protocol and is - * limited to 27 characters. - */ - 3: string display_text (personalDataType = 'LongUrl') -}(persisted='true', hasPersonalData = 'true') - -/** - * A QuotedTweet is data about a tweet referenced within another tweet. - * - * QuotedTweet is included if Tweet.QuotedTweetField is requested, and the - * linked-to tweet is public and visible at the time that the linking tweet - * is hydrated, which can be during write-time or later after a cache-miss - * read. Since linked-to tweets can be deleted, and users can become - * suspended, deactivated, or protected, the presence of this value is not a - * guarantee that the quoted tweet is still public and visible. - * - * Because a tweet quoting another tweet may not require a permalink URL in - * the tweet's text, the URLs in ShortenedUrl may be useful to clients that - * require maintaining a legacy-rendering of the tweet's text with the permalink. - * See ShortenedUrl for details. Clients should avoid reading permalink whenever - * possible and prefer the QuotedTweet's tweet_id and user_id instead. - * - * we always populate the permalink on tweet hydration unless there are partial - * hydration errors or inner quoted tweet is filtered due to visibility rules. - * - */ -struct QuotedTweet { - 1: i64 tweet_id (strato.space = "Tweet", strato.name = "tweet", personalDataType = 'TweetId') - 2: i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId') - 3: optional ShortenedUrl permalink // URLs to access the quoted-tweet -}(persisted='true', hasPersonalData = 'true') - -/** - * A Contributor is a user who has access to another user's account. - */ -struct Contributor { - 1: i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId') - 2: optional string screen_name (personalDataType = 'Username')// not set on Gizmoduck failure -}(persisted='true', hasPersonalData = 'true') - -struct GeoCoordinates { - 1: double latitude (personalDataType = 'GpsCoordinates') - 2: double longitude (personalDataType = 'GpsCoordinates') - 3: i32 geo_precision = 0 (personalDataType = 'GpsCoordinates') - - /** - * Whether or not make the coordinates public. - * - * This parameter is needed because coordinates are not typically published - * by the author. If false: A tweet has geo coordinates shared but not make - * it public. - */ - 4: bool display = 1 -}(persisted='true', hasPersonalData = 'true') - -enum PlaceType { - UNKNOWN = 0 - COUNTRY = 1 - ADMIN = 2 - CITY = 3 - NEIGHBORHOOD = 4 - POI = 5 -} - -enum PlaceNameType { - NORMAL = 0 - ABBREVIATION = 1 - SYNONYM = 2 -} - -struct PlaceName { - 1: string name - 2: string language = "" - 3: PlaceNameType type - 4: bool preferred -}(persisted='true', hasPersonalData='false') - -/** - * A Place is the physical and political properties of a location on Earth. - */ -struct Place { - /** - * Geo service identifier. - */ - 1: string id (personalDataType = 'PublishedPreciseLocationTweet, PublishedCoarseLocationTweet') - - /** - * Granularity of place. - */ - 2: PlaceType type - - /** - * The name of this place composed with its parent locations. - * - * For example, the full name for "Brooklyn" would be "Brooklyn, NY". This - * name is returned in the language specified by - * GetTweetOptions.language_tag. - */ - 3: string full_name (personalDataType = 'InferredLocation') - - /** - * The best name for this place as determined by geoduck heuristics. - * - * This name is returned in the language specified by - * GetTweetOptions.language_tag. - * - * @see com.twitter.geoduck.util.primitives.bestPlaceNameMatchingFilter - */ - 4: string name (personalDataType = 'PublishedPreciseLocationTweet, PublishedCoarseLocationTweet') - - /** - * Arbitrary key/value data from the geoduck PlaceAttributes for this place. - */ - 5: map attributes (personalDataTypeKey = 'PostalCode') - - 7: set names - - /** - * The ISO 3166-1 alpha-2 code for the country containing this place. - */ - 9: optional string country_code (personalDataType = 'PublishedCoarseLocationTweet') - - /** - * The best name for the country containing this place as determined by - * geoduck heuristics. - * - * This name is returned in the language specified by - * GetTweetOptions.language_tag. - */ - 10: optional string country_name (personalDataType = 'PublishedCoarseLocationTweet') - - /** - * A simplified polygon that encompasses the place's geometry. - */ - 11: optional list bounding_box - - /** - * An unordered list of geo service identifiers for places that contain this - * one from the most immediate parent up to the country. - */ - 12: optional set containers (personalDataType = 'PublishedCoarseLocationTweet') - - /** - * A centroid-like coordinate that is within the geometry of the place. - */ - 13: optional GeoCoordinates centroid - - /** - * Reason this place is being suppressed from display. - * - * This field is present when we previously had a place for this ID, but are - * now choosing not to hydrate it and instead providing fake place metadata - * along with a reason for not including place information. - */ - 14: optional geoduck_common.WithheldReason withheldReason -}(persisted='true', hasPersonalData='true') - -/** - * A UrlEntity is the position and content of a t.co shortened URL in the - * tweet's text. - * - * If Talon returns an error to Tweetypie during tweet hydration, the - * UrlEntity will be omitted from the response. UrlEntities are not included - * for non-t.co-wrapped URLs found in older tweets, for spam and user safety - * reasons. -*/ -struct UrlEntity { - /** - * The position of this entity's first character, in zero-indexed Unicode - * code points. - */ - 1: i16 from_index - - /** - * The position after this entity's last character, in zero-indexed Unicode - * code points. - */ - 2: i16 to_index - - /** - * Shortened t.co URL. - */ - 3: string url (personalDataType = 'ShortUrl') - - /** - * Original, full-length URL. - * - * This field will always be present on URL entities returned by - * Tweetypie; it is optional as an implementation artifact. - */ - 4: optional string expanded (personalDataType = 'LongUrl') - - /** - * Truncated version of expanded URL that does not include protocol and is - * limited to 27 characters. - * - * This field will always be present on URL entities returned by - * Tweetypie; it is optional as an implementation artifact. - */ - 5: optional string display (personalDataType = 'LongUrl') - - 6: optional i64 click_count (personalDataType = 'CountOfTweetEntitiesClicked') -}(persisted = 'true', hasPersonalData = 'true') - -/** - * A MentionEntity is the position and content of a mention, (the "@" - * character followed by the name of another valid user) in a tweet's text. - * - * If Gizmoduck returns an error to Tweetypie during tweet hydration that - * MentionEntity will be omitted from the response. - */ -struct MentionEntity { - /** - * The position of this entity's first character ("@"), in zero-indexed - * Unicode code points. - */ - 1: i16 from_index - - /** - * The position after this entity's last character, in zero-indexed Unicode - * code points. - */ - 2: i16 to_index - - /** - * Contents of the mention without the leading "@". - */ - 3: string screen_name (personalDataType = 'Username') - - /** - * User id of the current user with the mentioned screen name. - * - * In the current implementation user id does not necessarily identify the - * user who was originally mentioned when the tweet was created, only the - * user who owns the mentioned screen name at the time of hydration. If a - * mentioned user changes their screen name and a second user takes the old - * name, this field identifies the second user. - * - * This field will always be present on mention entities returned by - * Tweetypie; it is optional as an implementation artifact. - */ - 4: optional i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId') - - /** - * Display name of the current user with the mentioned screen name. - * - * See user_id for caveats about which user's name is used here. This field - * will always be present on mention entities returned by Tweetypie; it is - * optional as an implementation artifact. - */ - 5: optional string name (personalDataType = 'DisplayName') - - /** - * Indicates if the user referred to by this MentionEntity has been unmentioned - * from the conversation. If this field is set to true, the fromIndex and toIndex - * fields will have a value of 0. - * - * @deprecated isUnmentioned is no longer being populated - */ - 6: optional bool isUnmentioned (personalDataType = 'ContentPrivacySettings') -}(persisted = 'true', hasPersonalData = 'true') - -/** - * A list of users that are mentioned in the tweet and have a blocking - * relationship with the tweet author. Mentions for these users will be unlinked - * in the tweet. - */ -struct BlockingUnmentions { - 1: optional list unmentioned_user_ids (strato.space = 'User', strato.name = 'users', personalDataType = 'UserId') -}(persisted = 'true', hasPersonalData = 'true', strato.graphql.typename = 'BlockingUnmentions') - -/** - * A list of users that are mentioned in the tweet and have indicated they do not want - * to be mentioned via their mention settings. Mentions for these users will be unlinked - * in the tweet by Twitter owned and operated clients. - */ -struct SettingsUnmentions { - 1: optional list unmentioned_user_ids (strato.space = 'User', strato.name = 'users', personalDataType = 'UserId') -}(persisted = 'true', hasPersonalData = 'true', strato.graphql.typename = 'SettingsUnmentions') - -/** - * A HashtagEntity is the position and content of a hashtag (a term starting - * with "#") in a tweet's text. - */ -struct HashtagEntity { - /** - * The position of this entity's first character ("#"), in zero-indexed - * Unicode code points. - */ - 1: i16 from_index - - /** - * The position after this entity's last character, in zero-indexed Unicode - * code points. - */ - 2: i16 to_index - - /** - * Contents of the hashtag without the leading "#". - */ - 3: string text (personalDataType = 'PrivateTweetEntitiesAndMetadata, PublicTweetEntitiesAndMetadata') -}(persisted = 'true', hasPersonalData = 'true') - -/** - * A CashtagEntity is the position and content of a cashtag (a term starting - * with "$") in a tweet's text. - */ -struct CashtagEntity { - /** - * The position of this entity's first character, in zero-indexed Unicode - * code points. - */ - 1: i16 from_index - - /** - * The position after this entity's last character, in zero-indexed Unicode - * code points. - */ - 2: i16 to_index - - /** - * Contents of the cashtag without the leading "$" - */ - 3: string text (personalDataType = 'PrivateTweetEntitiesAndMetadata, PublicTweetEntitiesAndMetadata') -}(persisted = 'true', hasPersonalData = 'true') - -enum MediaTagType { - USER = 0 - RESERVED_1 = 1 - RESERVED_2 = 2 - RESERVED_3 = 3 - RESERVED_4 = 4 -} - -struct MediaTag { - 1: MediaTagType tag_type - 2: optional i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId') - 3: optional string screen_name (personalDataType = 'Username') - 4: optional string name (personalDataType = 'DisplayName') -}(persisted='true', hasPersonalData = 'true') - -struct TweetMediaTags { - 1: map> tag_map -}(persisted='true', hasPersonalData = 'true') - -/** - * A UserMention is a user reference not stored in the tweet text. - * - * @deprecated Was used only in ReplyAddresses - */ -struct UserMention { - 1: i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId') - 2: optional string screen_name (personalDataType = 'Username') - 3: optional string name (personalDataType = 'DisplayName') -}(persisted='true', hasPersonalData = 'true') - -/** - * ReplyAddresses is a list of reply entities which are stored outside of the - * text. - * - * @deprecated - */ -struct ReplyAddresses { - 1: list users = [] -}(persisted='true', hasPersonalData = 'true') - -/** - * SchedulingInfo is metadata about tweets created by the tweet scheduling - * service. - */ -// -struct SchedulingInfo { - /** - * Id of the corresponding scheduled tweet before it was created as a real - * tweet. - */ - 1: i64 scheduled_tweet_id (personalDataType = 'TweetId') -}(persisted='true', hasPersonalData = 'true') - -/** - * @deprecated - */ -enum SuggestType { - WTF_CARD = 0 - WORLD_CUP = 1 - WTD_CARD = 2 - NEWS_CARD = 3 - RESERVED_4 = 4 - RESERVED_5 = 5 - RESERVED_6 = 6 - RESERVED_7 = 7 - RESERVED_8 = 8 - RESERVED_9 = 9 - RESERVED_10 = 10 - RESERVED_11 = 11 -} - -/** - * @deprecated - */ -enum TwitterSuggestsVisibilityType { - /** - * Always public to everyone - */ - PUBLIC = 1 - - /** - * Inherits visibility rules of personalized_for_user_id. - */ - RESTRICTED = 2 - - /** - * Only visible to personalized_for_user_id (and author). - */ - PRIVATE = 3 -} - -/** - * TwitterSuggestInfo is details about a synthetic tweet generated by an early - * version of Twitter Suggests. - * - * @deprecated - */ -struct TwitterSuggestInfo { - 1: SuggestType suggest_type - 2: TwitterSuggestsVisibilityType visibility_type - 3: optional i64 personalized_for_user_id (strato.space = "User", strato.name = "personalizedForUser", personalDataType = 'UserId') - 4: optional i64 display_timestamp_secs (personalDataType = 'PublicTimestamp') -}(persisted='true', hasPersonalData = 'true') - -/** - * A DeviceSource contains information about the client application from which - * a tweet was sent. - * - * This information is stored in Passbird. The developer that owns a client - * application provides this information on https://apps.twitter.com. - */ -struct DeviceSource { - - /** - * The id of the client in the now deprecated device_sources MySQL table. - * - * Today this value will always be 0. - * - * @deprecated Use client_app_id - */ - 1: required i64 id (personalDataType = 'AppId') - - /** - * Identifier for the client in the format "oauth:" - */ - 2: string parameter - - /** - * Identifier for the client in the format "oauth:" - */ - 3: string internal_name - - /** - * Developer-provided name of the client application. - */ - 4: string name - - /** - * Developer-provided publicly accessible home page for the client - * application. - */ - 5: string url - - /** - * HTML fragment with a link to the client-provided URL - */ - 6: string display - - /** - * This field is marked optional for backwards compatibility but will always - * be populated by Tweetypie. - */ - 7: optional i64 client_app_id (personalDataType = 'AppId') -}(persisted='true', hasPersonalData = 'true') - -/** - * A Narrowcast restricts delivery of a tweet geographically. - * - * Narrowcasts allow multi-national advertisers to create geo-relevant content - * from a central handle that is only delivered to to followers in a - * particular country or set of countries. - */ -struct Narrowcast { - 2: list location = [] (personalDataType = 'PublishedCoarseLocationTweet') -}(persisted='true', hasPersonalData = 'true') - -/** - * StatusCounts is a summary of engagement metrics for a tweet. - * - * These metrics are loaded from TFlock. - */ -struct StatusCounts { - - /** - * Number of times this tweet has been retweeted. - * - * This number may not match the list of users who have retweeted because it - * includes retweets from protected and suspended users who are not listed. - */ - 1: optional i64 retweet_count (personalDataType = 'CountOfPrivateRetweets, CountOfPublicRetweets', strato.json.numbers.type = 'int53') - - /** - * Number of direct replies to this tweet. - * - * This number does not include replies to replies. - */ - 2: optional i64 reply_count (personalDataType = 'CountOfPrivateReplies, CountOfPublicReplies', strato.json.numbers.type = 'int53') - - /** - * Number of favorites this tweet has received. - * - * This number may not match the list of users who have favorited a tweet - * because it includes favorites from protected and suspended users who are - * not listed. - */ - 3: optional i64 favorite_count (personalDataType = 'CountOfPrivateLikes, CountOfPublicLikes', strato.json.numbers.type = 'int53') - - /** - * @deprecated - */ - 4: optional i64 unique_users_impressed_count (strato.json.numbers.type = 'int53') - - /** - * Number of replies to this tweet including replies to replies. - * - * @deprecated - */ - 5: optional i64 descendent_reply_count (personalDataType = 'CountOfPrivateReplies, CountOfPublicReplies', strato.json.numbers.type = 'int53') - - /** - * Number of times this tweet has been quote tweeted. - * - * This number may not match the list of users who have quote tweeted because it - * includes quote tweets from protected and suspended users who are not listed. - */ - 6: optional i64 quote_count (personalDataType = 'CountOfPrivateRetweets, CountOfPublicRetweets', strato.json.numbers.type = 'int53') - - /** - * Number of bookmarks this tweet has received. - */ - 7: optional i64 bookmark_count (personalDataType = 'CountOfPrivateLikes', strato.json.numbers.type = 'int53') - -}(persisted='true', hasPersonalData = 'true', strato.graphql.typename='StatusCounts') - -/** - * A is a tweet's properties from one user's point of view. - */ -struct StatusPerspective { - 1: i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId') - - /** - * Whether user_id has favorited this tweet. - */ - 2: bool favorited - - /** - * Whether user_id has retweeted this tweet. - */ - 3: bool retweeted - - /** - * If user_id has retweeted this tweet, retweet_id identifies that tweet. - */ - 4: optional i64 retweet_id (strato.space = "Tweet", strato.name = "retweet", personalDataType = 'TweetId') - - /** - * Whether user_id has reported this tweet as spam, offensive, or otherwise - * objectionable. - */ - 5: bool reported - - /** - * Whether user_id has bookmarked this tweet. - */ - 6: optional bool bookmarked -}(persisted='true', hasPersonalData = 'true') - -/** - * A Language is a guess about the human language of a tweet's text. - * - * Language is determined by TwitterLanguageIdentifier from the - * com.twitter.common.text package (commonly called "Penguin"). - */ -struct Language { - /** - * Language code in BCP-47 format. - */ - 1: required string language (personalDataType = 'InferredLanguage') - - /** - * Language direction. - */ - 2: bool right_to_left - - /** - * Confidence level of the detected language. - */ - 3: double confidence = 1.0 - - /** - * Other possible languages and their confidence levels. - */ - 4: optional map other_candidates -}(persisted='true', hasPersonalData = 'true') - -/** - * A SupplementalLanguage is a guess about the human language of a tweet's - * text. - * - * SupplementalLanguage is typically determined by a third-party translation - * service. It is only stored when the service detects a different language - * than TwitterLanguageIdentifier. - * - * @deprecated 2020-07-08 no longer populated. - */ -struct SupplementalLanguage { - /** - * Language code in BCP-47 format. - */ - 1: required string language (personalDataType = 'InferredLanguage') -}(persisted='true', hasPersonalData = 'true') - -/** - * A SpamLabel is a collection of spam actions for a tweet. - * - * Absence of a SpamLabel indicates that no action needs to be taken - */ -struct SpamLabel { - /** - * Filter this content at render-time - * - * @deprecated 2014-05-19 Use filter_renders - */ - 1: bool spam = 0 - - 2: optional set actions; -}(persisted='true') - - -/** - * The available types of spam signal - * - * @deprecated - */ -enum SpamSignalType { - MENTION = 1 - SEARCH = 2 - STREAMING = 4 - # OBSOLETE HOME_TIMELINE = 3 - # OBSOLETE NOTIFICATION = 5 - # OBSOLETE CONVERSATION = 6 - # OBSOLETE CREATION = 7 - RESERVED_VALUE_8 = 8 - RESERVED_VALUE_9 = 9 - RESERVED_VALUE_10 = 10 -} - -/** - * @deprecated - * CardBindingValues is a collection of key-value pairs used to render a card. - */ -struct CardBindingValues { - 1: list pairs = [] -}(persisted='true') - -/** - * A CardReference is a mechanism for explicitly associating a card with a - * tweet. - */ -struct CardReference { - /** - * Link to the card to associate with a tweet. - * - * This URI may reference either a card stored in the card service, or - * another resource, such as a crawled web page URL. This value supercedes - * any URL present in tweet text. - */ - 1: string card_uri -}(persisted='true') - -/** - * A TweetPivot is a semantic entity related to a tweet. - * - * TweetPivots are used to direct to the user to another related location. For - * example, a "See more about " UI element that takes the user to - * when clicked. - */ -struct TweetPivot { - 1: required tweet_annotation.TweetEntityAnnotation annotation - 2: required tweet_pivots.TweetPivotData data -}(persisted='true') - -struct TweetPivots { - 1: required list tweet_pivots -}(persisted='true') - -struct EscherbirdEntityAnnotations { - 1: list entity_annotations -}(persisted='true') - -struct TextRange { - /** - * The inclusive index of the start of the range, in zero-indexed Unicode - * code points. - */ - 1: required i32 from_index - - /** - * The exclusive index of the end of the range, in zero-indexed Unicode - * code points. - */ - 2: required i32 to_index -}(persisted='true') - -struct TweetCoreData { - 1: i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId', tweetEditAllowed='false') - - /** - * The body of the tweet consisting of the user-supplied displayable message - * and: - * - an optional prefix list of @mentions - * - an optional suffix attachment url. - * - * The indices from visible_text_range specify the substring of text indended - * to be displayed, whose length is limited to 140 display characters. Note - * that the visible substring may be longer than 140 characters due to HTML - * entity encoding of &, <, and > . - - * For retweets the text is that of the original tweet, prepended with "RT - * @username: " and truncated to 140 characters. - */ - 2: string text (personalDataType = 'PrivateTweets, PublicTweets') - - /** - * The client from which this tweet was created - * - * The format of this value is oauth:. - */ - 3: string created_via (personalDataType = 'ClientType') - - /** - * Time this tweet was created. - * - * This value is seconds since the Unix epoch. For tweets with Snowflake IDs - * this value is redundant, since a millisecond-precision timestamp is part - * of the id. - */ - 4: i64 created_at_secs - - /** - * Present when this tweet is a reply to another tweet or another user. - */ - 5: optional Reply reply - - /** - * Present when a tweet begins with an @mention or has metadata indicating the directed-at user. - */ - 6: optional DirectedAtUser directed_at_user - - /** - * Present when this tweet is a retweet. - */ - 7: optional Share share - - /** - * Whether there is a takedown country code or takedown reason set for this specific tweet. - * - * See takedown_country_codes for the countries where the takedown is active. (deprecated) - * See takedown_reasons for a list of reasons why the tweet is taken down. - * - * has_takedown will be set to true if either this specific tweet or the author has a - * takedown active. - */ - 8: bool has_takedown = 0 - - /** - * Whether this tweet might be not-safe-for-work, judged by the tweet author. - * - * Users can flag their own accounts as not-safe-for-work in account - * preferences by selecting "Mark media I tweet as containing material that - * may be sensitive" and each tweet created after that point will have - * this flag set. - * - * The value can also be updated after tweet create time via the - * update_possibly_sensitive_tweet method. - */ - 9: bool nsfw_user = 0 - - /** - * Whether this tweet might be not-safe-for-work, judged by an internal Twitter - * support agent. - * - * This tweet value originates from the user's nsfw_admin flag at - * tweet create time but can be updated afterwards using the - * update_possibly_sensitive_tweet method. - */ - 10: bool nsfw_admin = 0 - - /** - * When nullcast is true a tweet is not delivered to a user's followers, not - * shown in the user's timeline, and does not appear in search results. - * - * This is primarily used to create tweets that can be used as ads without - * broadcasting them to an advertiser's followers. - */ - 11: bool nullcast = 0 (tweetEditAllowed='false') - - /** - * Narrowcast limits delivery of a tweet to followers in specific geographic - * regions. - */ - 12: optional Narrowcast narrowcast (tweetEditAllowed='false') - - /** - * The impression id of the ad from which this tweet was created. - * - * This is set when a user retweets or replies to a promoted tweet. It is - * used to attribute the "earned" exposure of an advertisement. - */ - 13: optional i64 tracking_id (personalDataType = 'ImpressionId', tweetEditAllowed='false') - - /** - * A shared identifier among all the tweets in the reply chain for a single - * tweet. - * - * The conversation id is the id of the tweet that started the conversation. - */ - 14: optional i64 conversation_id (strato.space = "Tweet", strato.name = "conversation", personalDataType = 'TweetId') - - /** - * Whether this tweet has media of any type. - * - * Media can be in the form of media entities, media cards, or URLs in the - * tweet text that link to media partners. - * - * @see MediaIndexHelper - */ - 15: optional bool has_media - - /** - * Supported for legacy clients to associate a location with a Tweet. - * - * Twitter owned clients must use place_id REST API param for geo-tagging. - * - * @deprecated Use place_id REST API param - */ - 16: optional GeoCoordinates coordinates (personalDataType = 'GpsCoordinates', tweetEditAllowed='false') - - /** - * The location where a tweet was sent from. - * - * Place is either published in API request explicitly or implicitly reverse - * geocoded from API lat/lon coordinates params. - * - * Tweetypie implementation notes: - * - Currently, if both place_id and coordinates are specified, coordinates - * takes precedence in geo-tagging. I.e.: Place returned rgc(coordinates) - * sets the place_id field. - * - place_id is reverse geocoded on write-path. - */ - 17: optional string place_id (personalDataType = 'PublishedPreciseLocationTweet, PublishedCoarseLocationTweet') -}(persisted='true', hasPersonalData = 'true', tweetEditAllowed='false') - -/** - * List of community ID's the tweet belongs to. - */ -struct Communities { - 1: required list community_ids (personalDataType = 'EngagementId') -}(persisted='true') - -/** - * Tweet metadata that is present on extended tweets, a tweet whose total text length is greater - * than the classic limit of 140 characters. - */ -struct ExtendedTweetMetadata { - /** - * @deprecated was display_count - */ - 1: i32 unused1 = 0 - - /** - * The index, in unicode code points, at which the tweet text should be truncated - * for rendering in a public API backwards-compatible mode. Once truncated to this - * point, the text should be appended with an ellipsis, a space, and the short_url - * from self_permalink. The resulting text must conform to the 140 display glyph - * limit. - */ - 2: required i32 api_compatible_truncation_index - - /** - * @deprecated was default_display_truncation_index - */ - 3: i32 unused3 = 0 - - /** - * @deprecated was is_long_form - */ - 4: bool unused4 = 0 - - /** - * @deprecated was preview_range - */ - 5: optional TextRange unused5 - - /** - * @deprecated was extended_preview_range - */ - 6: optional TextRange unused6 -}(persisted='true') - -/** - * @deprecated use TransientCreateContext instead - */ -enum TweetCreateContextKey { - PERISCOPE_IS_LIVE = 0, - PERISCOPE_CREATOR_ID = 1 -} - -/** - * DirectedAtUserMetadata is a tweetypie-internal structure that can be used to store metadata about - * a directed-at user on the tweet. - * - * Note: absence of this field does not imply the tweet does not have a DirectedAtUser, see - * tweet.directedAtUserMetadata for more information. - */ -struct DirectedAtUserMetadata { - /** - * ID of the user a tweet is directed-at. - */ - 1: optional i64 user_id (personalDataType = 'UserId') -}(persisted='true', hasPersonalData = 'true') - -/** - * Tweet metadata that may be present on tweets in a self-thread (tweetstorm). - * - * A self-thread is a tree of self-replies that may either: - * 1. begin as a reply to another user's tweet (called a non-root self-thread) or - * 2. stand alone (called root self-thread). - * - * Note that not all self-threads have SelfThreadMetadata. - */ -struct SelfThreadMetadata { - /** - * A shared identifier among all the tweets in the self-thread (tweetstorm). - * - * The tweetstorm id is the id of the tweet that started the self thread. - * - * If the id matches the tweet's conversation_id then it is a root self-thread, otherwise it is - * a non-root self-thread. - */ - 1: required i64 id (personalDataType = 'TweetId') - - /** - * Indicates if the tweet with this SelfThreadMetadata is a leaf in the self-thread tree. - * This flag might be used to encourage the author to extend their tweetstorm at the end. - */ - 2: bool isLeaf = 0 -}(persisted='true', hasPersonalData = 'true') - -/** - * Composer flow used to create this tweet. Unless using the News Camera (go/newscamera) - * flow, this should be `STANDARD`. - * - * When set to `CAMERA`, clients are expected to display the tweet with a different UI - * to emphasize attached media. - */ -enum ComposerSource { - STANDARD = 1 - CAMERA = 2 -} - - -/** - * The conversation owner and users in invited_user_ids can reply - **/ -struct ConversationControlByInvitation { - 1: required list invited_user_ids (personalDataType = 'UserId') - 2: required i64 conversation_tweet_author_id (personalDataType = 'UserId') - 3: optional bool invite_via_mention -}(persisted='true', hasPersonalData = 'true') - -/** - * The conversation owner, users in invited_user_ids, and users who the conversation owner follows can reply - **/ -struct ConversationControlCommunity { - 1: required list invited_user_ids (personalDataType = 'UserId') - 2: required i64 conversation_tweet_author_id (personalDataType = 'UserId') - 3: optional bool invite_via_mention -}(persisted='true', hasPersonalData = 'true') - -/** - * The conversation owner, users in invited_user_ids, and users who follows the conversation owner can reply - **/ -struct ConversationControlFollowers { - 1: required list invited_user_ids (personalDataType = 'UserId') - 2: required i64 conversation_tweet_author_id (personalDataType = 'UserId') - 3: optional bool invite_via_mention -}(persisted='true', hasPersonalData = 'true') - -/** -* This tweet metadata captures restrictions on who is allowed to reply in a conversation. -*/ -union ConversationControl { - - 1: ConversationControlCommunity community - - 2: ConversationControlByInvitation byInvitation - - 3: ConversationControlFollowers followers -}(persisted='true', hasPersonalData = 'true') - -// This tweet metadata shows the exclusivity of a tweet and is used to determine -// whether replies / visibility of a tweet is limited -struct ExclusiveTweetControl { - 1: required i64 conversation_author_id (personalDataType = 'UserId') -}(persisted='true', hasPersonalData = 'true') - -/** - * Tweet metadata for a Trusted Friends tweet. - * - * A Trusted Friends tweet is a tweet whose visibility is restricted to members - * of an author-specified list. - * - * Replies to a Trusted Friends tweet will inherit a copy of this metadata from - * the root tweet. - */ -struct TrustedFriendsControl { - /** - * The ID of the Trusted Friends List whose members can view this tweet. - */ - 1: required i64 trusted_friends_list_id (personalDataType = 'TrustedFriendsListMetadata') -}(persisted='true', hasPersonalData = 'true') - -enum CollabInvitationStatus { - PENDING = 0 - ACCEPTED = 1 - REJECTED = 2 -} - -/** - * Represents a user who has been invited to collaborate on a CollabTweet, associated with whether - * they have accepted or rejected collaboration - */ -struct InvitedCollaborator { - 1: required i64 collaborator_user_id (personalDataType = 'UserId') - 2: required CollabInvitationStatus collab_invitation_status -}(persisted='true', hasPersonalData='true') - -/** - * Present if Tweet is a CollabInvitation awaiting publishing, stores list of invited Collaborators - */ -struct CollabInvitation { - 1: required list invited_collaborators -}(persisted='true', hasPersonalData='true') - -/** - * Present if Tweet is a published CollabTweet, stores list of Collaborators - */ -struct CollabTweet { - 1: required list collaborator_user_ids (personalDataType = 'UserId') -}(persisted='true', hasPersonalData='true') - -/** - * CollabTweets treat multiple users as co-authors or "Collaborators" of a single "Collab Tweet". - * - * When creating a Collab Tweet, the original author will begin by creating a CollabInvitation which - * is sent to another Collaborator to accept or reject collaboration. If and when other - * Collaborators have accepted, the CollabInvitation is replaced by a CollabTweet which is published - * publicly and fanned out to followers of all Collaborators. A CollabInvitation will be hidden from - * anyone except the list of Collaborators using VF. The CollabTweet will then be fanned out like - * a regular Tweet to the profiles and combined audiences of all Collaborators. - * - * A Tweet representing a CollabTweet or CollabInvitation is denoted by the presence of a - * CollabControl field on a Tweet. - */ -union CollabControl { - 1: CollabInvitation collab_invitation - 2: CollabTweet collab_tweet -}(persisted='true', hasPersonalData='true') - -/** - * A Tweet is a message that belongs to a Twitter user. - * - * The Tweet struct replaces the deprecated Status struct. All fields except - * id are optional. - * - * This struct supports the additional fields flexible schema. Additional fields are - * defined starting from field 101. - * - * The guidelines for adding a new Additional field: - * 1. It's required to define the additional field as an optional struct. - * Inside the struct, define optional or non-optional field(s) according - * to your needs. - * 2. If you have several immutable piece of data that are always accessed - * together, you should define them in the same struct for better storage - * locality. - * 3. If your data model has several mutable pieces, and different piece can - * be updated in a close succession, you should group them into - * separate structs and each struct contains one mutable piece. - */ -struct Tweet { - /** - * The primary key for a tweet. - * - * A tweet's id is assigned by the tweet service at creation time. Since - * 2010-11-04 tweet ids have been generated using Snowflake. Prior to this - * ids were assigned sequentially by MySQL AUTOINCREMENT. - */ - 1: i64 id (personalDataType = 'TweetId') - - /** - * The essential properties of a tweet. - * - * This field will always be present on tweets returned by Tweetypie. It is - * marked optional so an empty tweet can be provided to write additional - * fields. - */ - 2: optional TweetCoreData core_data - - /** - * URLs extracted from the tweet's text. - */ - 3: optional list urls - - /** - * Mentions extracted from the tweet's text. - */ - 4: optional list mentions - - /** - * Hashtags extracted from the tweet's text. - */ - 5: optional list hashtags - - /** - * Cashtags extracted from the tweet's text - */ - 6: optional list cashtags - - 7: optional list media - - /** - * Place identified by Tweet.core_data.place_id. - */ - 10: optional Place place - - 11: optional QuotedTweet quoted_tweet - - /** - * The list of countries where this tweet will not be shown. - * - * This field contains countries for both the tweet and the user, so it may - * contain values even if has_takedown is false. - * - * @deprecated, use field 30 takedown_reasons which includes the same information and more - */ - 12: optional list takedown_country_codes (personalDataType = 'ContentRestrictionStatus') - - /** - * Interaction metrics for this tweet. - * - * Included when one of GetTweetOptions.load_retweet_count, - * GetTweetOptions.load_reply_count, or GetTweetOptions.load_favorite_count - * is set. This can be missing in a PARTIAL response if the TFlock request - * fails. - */ - 13: optional StatusCounts counts - - /** - * Properties of the client from which the tweet was sent. - * - * This can be missing in a PARTIAL response if the Passbird request fails. - */ - 14: optional DeviceSource device_source - - /** - * Properties of this tweet from the point of view of - * GetTweetOptions.for_user_id. - * - * This field is included only when for_user_id is provided and - * include_perspective == true This can be missing in a PARTIAL response if - * the timeline service request fails. - */ - 15: optional StatusPerspective perspective - - /** - * Version 1 cards. - * - * This field is included only when GetTweetOptions.include_cards == true. - */ - 16: optional list cards - - /** - * Version 2 cards. - * - * This field is included only included when GetTweetOptions.include_cards - * == true and GetTweetOptions.cards_platform_key is set to valid value. - */ - 17: optional cards.Card2 card2 - - /** - * Human language of tweet text as determined by TwitterLanguageIdentifier. - */ - 18: optional Language language - - /** - * @deprecated - */ - 19: optional map spam_labels - - /** - * User responsible for creating this tweet when it is not the same as the - * core_data.user_id. - * - * This is sensitive information and must not be shared externally (via UI, - * API, or streaming) except to the the owner of the tweet - * (core_data.user_id) or a contributor to the owner's account. - */ - 20: optional Contributor contributor - - // obsolete 21: optional list topic_labels - - 22: optional enrichments_profilegeo.ProfileGeoEnrichment profile_geo_enrichment - - // Maps extension name to value; only populated if the request contained an extension on tweets. - // obsolete 24: optional map extensions - - /** - * Deprecated. - * Semantic entities that are related to this tweet. - */ - 25: optional TweetPivots tweet_pivots - - /** - * @deprecated - * Strato Tweet Extensions support has moved to birdherd. - * - * Internal thrift clients should query strato columns directly and - * not rely upon ext/*.Tweet columns which are designed to serve - * client APIs. - */ - 26: optional binary extensions_reply - - /** - * Has the requesting user muted the conversation referred to by - * `conversation_id`? When this field is absent, the conversation may - * or may not be muted. Use the `include_conversation_muted` field in - * GetTweetOptions to request this field. - * - * If this field has a value, the value applies to the user in the - * `for_user_id` field of the requesting `GetTweetOptions`. - */ - 27: optional bool conversation_muted - - /** - * The user id of the tweet referenced by conversation_id - * - * @deprecated Was conversation_owner_id. This was never implemented. - */ - 28: optional i64 unused28 - - /** - * Has this tweet been removed from its conversation by the conversation owner? - * - * @deprecated Was is_removed_from_conversation. This was never implemented. - */ - 29: optional bool unused29 - - /** - * A list of takedown reasons indicating which country and reason this tweet was taken down. - */ - 30: optional list takedown_reasons - - /** - * @obsolete, self-thread metadata is now stored in field 151, self_thread_metadata - */ - 31: optional self_thread.SelfThreadInfo self_thread_info - - // field 32 to 99 are reserved - // field 100 is used for flexible schema proof of concept - // additional fields - // these fields are stored in Manhattan flexible schema - 101: optional TweetMediaTags media_tags - 102: optional SchedulingInfo scheduling_info - - /** - * @deprecated - */ - 103: optional CardBindingValues binding_values - - /** - * @deprecated - */ - 104: optional ReplyAddresses reply_addresses - - /** - * OBSOLETE, but originally contained information about synthetic tweets created by the first - * version of Twitter Suggests. - * - * @deprecated - */ - 105: optional TwitterSuggestInfo obsolete_twitter_suggest_info - - 106: optional EscherbirdEntityAnnotations escherbird_entity_annotations (personalDataType = 'AnnotationValue') - - // @deprecated 2021-07-19 - 107: optional safety_label.SafetyLabel spam_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 108: optional safety_label.SafetyLabel abusive_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 109: optional safety_label.SafetyLabel low_quality_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 110: optional safety_label.SafetyLabel nsfw_high_precision_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 111: optional safety_label.SafetyLabel nsfw_high_recall_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 112: optional safety_label.SafetyLabel abusive_high_recall_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 113: optional safety_label.SafetyLabel low_quality_high_recall_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 114: optional safety_label.SafetyLabel persona_non_grata_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 115: optional safety_label.SafetyLabel recommendations_low_quality_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 116: optional safety_label.SafetyLabel experimentation_label (personalDataType = 'TweetSafetyLabels') - - 117: optional tweet_location_info.TweetLocationInfo tweet_location_info - 118: optional CardReference card_reference - - /** - * @deprecated 2020-07-08 no longer populated. - */ - 119: optional SupplementalLanguage supplemental_language - - // field 120, additional_media_metadata, is deprecated. - // field 121, media_metadatas, is deprecated - - // under certain circumstances, including long form tweets, we create and store a self-permalink - // to this tweet. in the case of a long-form tweet, this will be used in a truncated version - // of the tweet text. - 122: optional ShortenedUrl self_permalink - - // metadata that is present on extended tweets. - 123: optional ExtendedTweetMetadata extended_tweet_metadata - - // obsolete 124: crosspost_destinations.CrosspostDestinations crosspost_destinations - - // Communities associated with a tweet - 125: optional Communities communities (personalDataType = 'PrivateTweetEntitiesAndMetadata', tweetEditAllowed='false') - - // If some text at the beginning or end of the tweet should be hidden, then this - // field indicates the range of text that should be shown in clients. - 126: optional TextRange visible_text_range - - // @deprecated 2021-07-19 - 127: optional safety_label.SafetyLabel spam_high_recall_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 128: optional safety_label.SafetyLabel duplicate_content_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 129: optional safety_label.SafetyLabel live_low_quality_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 130: optional safety_label.SafetyLabel nsfa_high_recall_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 131: optional safety_label.SafetyLabel pdna_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 132: optional safety_label.SafetyLabel search_blacklist_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 133: optional safety_label.SafetyLabel low_quality_mention_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 134: optional safety_label.SafetyLabel bystander_abusive_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 135: optional safety_label.SafetyLabel automation_high_recall_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 136: optional safety_label.SafetyLabel gore_and_violence_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 137: optional safety_label.SafetyLabel untrusted_url_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 138: optional safety_label.SafetyLabel gore_and_violence_high_recall_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 139: optional safety_label.SafetyLabel nsfw_video_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 140: optional safety_label.SafetyLabel nsfw_near_perfect_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 141: optional safety_label.SafetyLabel automation_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 142: optional safety_label.SafetyLabel nsfw_card_image_label (personalDataType = 'TweetSafetyLabels') - // @deprecated 2021-07-19 - 143: optional safety_label.SafetyLabel duplicate_mention_label (personalDataType = 'TweetSafetyLabels') - - // @deprecated 2021-07-19 - 144: optional safety_label.SafetyLabel bounce_label (personalDataType = 'TweetSafetyLabels') - // field 145 to 150 is reserved for safety labels - - /** - * If this tweet is part of a self_thread (tweetstorm) then this value may be set. - * See SelfThreadMetadata for details. - */ - 151: optional SelfThreadMetadata self_thread_metadata - // field 152 has been deprecated - - // The composer used to create this tweet. Either via the standard tweet creator or the - // Camera flow (go/newscamera). - // - // NOTE: this field is only set if a client passed an explicit ComposerSource in the PostTweetRequest. - // News Camera is deprecated and we no longer set ComposerSource in the PostTweetRequest so no new Tweets will - // have this field. - 153: optional ComposerSource composer_source - - // Present if replies are restricted, see ConversationControl for more details - 154: optional ConversationControl conversation_control - - // Determines the super follows requirements for being able to view a tweet. - 155: optional ExclusiveTweetControl exclusive_tweet_control (tweetEditAllowed='false') - - // Present for a Trusted Friends tweet, see TrustedFriendsControl for more details. - 156: optional TrustedFriendsControl trusted_friends_control (tweetEditAllowed='false') - - // Data about edits and editability. See EditControl for more details. - 157: optional edit_control.EditControl edit_control - - // Present for a CollabTweet or CollabInvitation, see CollabControl for more details. - 158: optional CollabControl collab_control (tweetEditAllowed='false') - - // Present for a 3rd-party developer-built card. See http://go/developer-built-cards-prd - 159: optional i64 developer_built_card_id (personalDataType = 'CardId') - - // Data about enrichments attached to a tweet. - 160: optional creative_entity_enrichments.CreativeEntityEnrichments creative_entity_enrichments_for_tweet - - // This field includes summed engagements from the previous tweets in the edit chain. - 161: optional StatusCounts previous_counts - - // A list of media references, including information about the source Tweet for pasted media. - // Prefer this field to media_keys, as media_keys is not present for old Tweets or pasted media Tweets. - 162: optional list media_refs - - // Whether this tweet is a 'backend tweet' to be referenced only by the creatives containers service - // go/cea-cc-integration for more details - 163: optional bool is_creatives_container_backend_tweet - - /** - * Aggregated perspective of this tweet and all other versions from the point of view of the - * user specified in for_user_id. - * - * This field is included only when for_user_id is provided and can be missing in a PARTIAL response - * if the timeline service request fails. - */ - 164: optional api_fields.TweetPerspective edit_perspective - - // Visibility controls related to Toxic Reply Filtering - // go/toxrf for more details - 165: optional filtered_reply_details.FilteredReplyDetails filtered_reply_details - - // The list of mentions that have unmentioned from the tweet's associated conversation - 166: optional unmentions.UnmentionData unmention_data - - /** - * A list of users that were mentioned in the tweet and have a blocking - * relationship with the author. - */ - 167: optional BlockingUnmentions blocking_unmentions - - /** - * A list of users that were mentioned in the tweet and should be unmentioned - * based on their mention setttings - */ - 168: optional SettingsUnmentions settings_unmentions - - /** - * A Note associated with this Tweet. - */ - 169: optional note_tweet.NoteTweet note_tweet - - // For additional fields, the next available field id is 169. - // NOTE: when adding a new additional field, please also update UnrequestedFieldScrubber.scrubKnownFields - - /** - * INTERNAL FIELDS - * - * These fields are used by tweetypie only and should not be accessed externally. - * The field ids are in descending order, starting with `32767`. - */ - - /** - * Present if tweet data is provided creatives container service instead of tweetypie storage, - * with encapsulated tweets or customized data. - */ - 32763: optional i64 underlying_creatives_container_id - - /** - * Stores tweetypie-internal metadata about a DirectedAtUser. - * - * A tweet's DirectedAtUser is hydrated as follows: - * 1. if this field is present, then DirectedAtUserMetadata.userId is the directed-at user - * 2. if this field is absent, then if the tweet has a reply and has a mention starting at text - * index 0 then that user is the directed-at user. - * - * Note: External clients should use CoreData.directed_at_user. - */ - 32764: optional DirectedAtUserMetadata directed_at_user_metadata - - // list of takedowns that are applied directly to the tweet - 32765: optional list tweetypie_only_takedown_reasons - - // Stores the media keys used to interact with the media platform systems. - // Prefer `media_refs` which will always have media data, unlike this field which is empty for - // older Tweets and Tweets with pasted media. - 32766: optional list media_keys - - // field 32767 is the list of takedowns that are applied directly to the tweet - 32767: optional list tweetypie_only_takedown_country_codes (personalDataType = 'ContentRestrictionStatus') - - - // for internal fields, the next available field id is 32765 (counting down) -}(persisted='true', hasPersonalData = 'true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_audit.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_audit.docx new file mode 100644 index 000000000..f704dc6d3 Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_audit.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_audit.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_audit.thrift deleted file mode 100644 index db8361805..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_audit.thrift +++ /dev/null @@ -1,32 +0,0 @@ -namespace java com.twitter.tweetypie.thriftjava -namespace py gen.twitter.tweetypie.tweet_audit -#@namespace scala com.twitter.tweetypie.thriftscala -#@namespace strato com.twitter.tweetypie -namespace rb TweetyPie -namespace go tweetypie - -// Copied from UserActionReason in guano.thrift - this should be kept in sync (though upper cased) -enum AuditUserActionReason { - SPAM - CHURNING - OTHER - PHISHING - BOUNCING - - RESERVED_1 - RESERVED_2 -} - -// This struct contains all fields of DestroyStatus in guano.thrift that can be set per remove/deleteTweets invocation -// Values are passed through TweetyPie as-is to guano scribe and not used by TweetyPie. -struct AuditDeleteTweet { - 1: optional string host (personalDataType = 'IpAddress') - 2: optional string bulk_id - 3: optional AuditUserActionReason reason - 4: optional string note - 5: optional bool done - 6: optional string run_id - // OBSOLETE 7: optional i64 id - 8: optional i64 client_application_id (personalDataType = 'AppId') - 9: optional string user_agent (personalDataType = 'UserAgent') -}(persisted = 'true', hasPersonalData = 'true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_comparison_service.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_comparison_service.docx new file mode 100644 index 000000000..06c8138a9 Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_comparison_service.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_comparison_service.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_comparison_service.thrift deleted file mode 100644 index 4ad96e564..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_comparison_service.thrift +++ /dev/null @@ -1,28 +0,0 @@ -namespace java com.twitter.tweetypiecomparison.thriftjava -#@namespace scala com.twitter.tweetypiecomparison.thriftscala -#@namespace strato com.twitter.tweetypiecomparison - -include "com/twitter/tweetypie/tweet_service.thrift" -include "com/twitter/context/viewer.thrift" - -service TweetComparisonService { - void compare_retweet( - 1: tweet_service.RetweetRequest request, - 2: optional viewer.Viewer viewer - ) - - void compare_post_tweet( - 1: tweet_service.PostTweetRequest request, - 2: optional viewer.Viewer viewer - ) - - void compare_unretweet( - 1: tweet_service.UnretweetRequest request, - 2: optional viewer.Viewer viewer - ) - - void compare_delete_tweets( - 1: tweet_service.DeleteTweetsRequest request, - 2: optional viewer.Viewer viewer - ) -} diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_events.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_events.docx new file mode 100644 index 000000000..ec3f5988d Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_events.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_events.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_events.thrift deleted file mode 100644 index a80a74bf9..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_events.thrift +++ /dev/null @@ -1,277 +0,0 @@ -namespace java com.twitter.tweetypie.thriftjava -namespace py gen.twitter.tweetypie.tweet_events -#@namespace scala com.twitter.tweetypie.thriftscala -#@namespace strato com.twitter.tweetypie -namespace rb TweetyPie -namespace go tweetypie - -include "com/twitter/tseng/withholding/withholding.thrift" -include "com/twitter/tweetypie/transient_context.thrift" -include "com/twitter/tweetypie/tweet.thrift" -include "com/twitter/tweetypie/tweet_audit.thrift" -include "com/twitter/gizmoduck/user.thrift" - -/** - * SafetyType encodes the event user's safety state in an enum so downstream - * event processors can filter events without having to load the user. - */ -enum SafetyType { - PRIVATE = 0 // user.safety.isProtected - RESTRICTED = 1 // !PRIVATE && user.safety.suspended - PUBLIC = 2 // !(PRIVATE || RESTRICTED) - RESERVED0 = 3 - RESERVED1 = 4 - RESERVED2 = 5 - RESERVED3 = 6 -} - -struct TweetCreateEvent { - /** - * The tweet that has been created. - */ - 1: tweet.Tweet tweet - - /** - * The user who owns the created tweet. - */ - 2: user.User user - - /** - * The tweet being retweeted. - */ - 3: optional tweet.Tweet source_tweet - - /** - * The user who owns source_tweet. - */ - 4: optional user.User source_user - - /** - * The user whose tweet or retweet is being retweeted. - * - * This is the id of the user who owns - * tweet.core_data.share.parent_status_id. In many cases this will be the - * same as source_user.id; it is different when the tweet is created via - * another retweet. See the explanation of source_user_id and parent_user_id - * in Share for examples. - */ - 5: optional i64 retweet_parent_user_id (personalDataType = 'UserId') - - /** - * The tweet quoted in the created tweet. - */ - 6: optional tweet.Tweet quoted_tweet - - /** - * The user who owns quoted_tweet. - */ - 7: optional user.User quoted_user - - /** - * Arbitrary passthrough metadata about tweet creation. - * - * See TweetCreateContextKey for more details about the data that may be - * present here. - */ - 8: optional map additional_context (personalDataTypeValue='UserId') - - /** - * Additional request arguments passed through to consumers. - */ - 9: optional transient_context.TransientCreateContext transient_context - - /** - * Flag exposing if a quoted tweet has been quoted by the user previously. - **/ - 10: optional bool quoter_has_already_quoted_tweet -}(persisted='true', hasPersonalData = 'true') - -struct TweetDeleteEvent { - /** - * The tweet being deleted. - */ - 1: tweet.Tweet tweet - - /** - * The user who owns the deleted tweet. - */ - 2: optional user.User user - - /** - * Whether this tweet was deleted as part of user erasure (the process of deleting tweets - * belonging to deactivated accounts). - * - * These deletions occur in high volume spikes and the tweets have already been made invisible - * externally. You may wish to process them in batches or offline. - */ - 3: optional bool is_user_erasure - - /** - * Audit information from the DeleteTweetRequest that caused this deletion. - * - * This field is used to track the reason for deletion in non-user-initiated - * tweet deletions, like Twitter support agents deleting tweets or spam - * cleanup. - */ - 4: optional tweet_audit.AuditDeleteTweet audit - - /** - * Id of the user initiating this request. - * It could be either the owner of the tweet or an admin. - * It is used for scrubbing. - */ - 5: optional i64 by_user_id (personalDataType = 'UserId') - - /** - * Whether this tweet was deleted by an admin user or not - * - * It is used for scrubbing. - */ - 6: optional bool is_admin_delete -}(persisted='true', hasPersonalData = 'true') - -struct TweetUndeleteEvent { - 1: tweet.Tweet tweet - 2: optional user.User user - 3: optional tweet.Tweet source_tweet - 4: optional user.User source_user - 5: optional i64 retweet_parent_user_id (personalDataType = 'UserId') - 6: optional tweet.Tweet quoted_tweet - 7: optional user.User quoted_user - // timestamp of the deletion that this undelete is reversing - 8: optional i64 deleted_at_msec -}(persisted='true', hasPersonalData = 'true') - -/** - * When a user deletes the location information for their tweets, we send one - * TweetScrubGeoEvent for every tweet from which the location is removed. - * - * Users cause this by selecting "Delete location information" in Settings -> - * Privacy. - */ -struct TweetScrubGeoEvent { - 1: i64 tweet_id (personalDataType = 'TweetId') - 2: i64 user_id (personalDataType = 'UserId') -}(persisted='true', hasPersonalData = 'true') - -/** - * When a user deletes the location information for their tweets, we send one - * UserScrubGeoEvent with the max tweet ID that was scrubbed (in addition to - * sending multiple TweetScrubGeoEvents as described above). - * - * Users cause this by selecting "Delete location information" in Settings -> - * Privacy. This additional event is sent to maintain backwards compatibility - * with Hosebird. - */ -struct UserScrubGeoEvent { - 1: i64 user_id (personalDataType = 'UserId') - 2: i64 max_tweet_id (personalDataType = 'TweetId') -}(persisted='true', hasPersonalData = 'true') - -struct TweetTakedownEvent { - 1: i64 tweet_id (personalDataType = 'TweetId') - 2: i64 user_id (personalDataType = 'UserId') - // This is the complete list of takedown country codes for the tweet, - // including whatever modifications were made to trigger this event. - // @deprecated Prefer takedown_reasons once TWEETYPIE-4329 deployed - 3: list takedown_country_codes = [] - // This is the complete list of takedown reasons for the tweet, - // including whatever modifications were made to trigger this event. - 4: list takedown_reasons = [] -}(persisted='true', hasPersonalData = 'true') - -struct AdditionalFieldUpdateEvent { - // Only contains the tweet id and modified or newly added fields on that tweet. - // Unchanged fields and tweet core data are omitted. - 1: tweet.Tweet updated_fields - 2: optional i64 user_id (personalDataType = 'UserId') -}(persisted='true', hasPersonalData = 'true') - -struct AdditionalFieldDeleteEvent { - // a map from tweet id to deleted field ids - // Each event will only contain one tweet. - 1: map> deleted_fields (personalDataTypeKey='TweetId') - 2: optional i64 user_id (personalDataType = 'UserId') -}(persisted='true', hasPersonalData = 'true') - -// This event is only logged to scribe not sent to EventBus -struct TweetMediaTagEvent { - 1: i64 tweet_id (personalDataType = 'TweetId') - 2: i64 user_id (personalDataType = 'UserId') - 3: set tagged_user_ids (personalDataType = 'UserId') - 4: optional i64 timestamp_ms -}(persisted='true', hasPersonalData = 'true') - -struct TweetPossiblySensitiveUpdateEvent { - 1: i64 tweet_id (personalDataType = 'TweetId') - 2: i64 user_id (personalDataType = 'UserId') - // The below two fields contain the results of the update. - 3: bool nsfw_admin - 4: bool nsfw_user -}(persisted='true', hasPersonalData = 'true') - -struct QuotedTweetDeleteEvent { - 1: i64 quoting_tweet_id (personalDataType = 'TweetId') - 2: i64 quoting_user_id (personalDataType = 'UserId') - 3: i64 quoted_tweet_id (personalDataType = 'TweetId') - 4: i64 quoted_user_id (personalDataType = 'UserId') -}(persisted='true', hasPersonalData = 'true') - -struct QuotedTweetTakedownEvent { - 1: i64 quoting_tweet_id (personalDataType = 'TweetId') - 2: i64 quoting_user_id (personalDataType = 'UserId') - 3: i64 quoted_tweet_id (personalDataType = 'TweetId') - 4: i64 quoted_user_id (personalDataType = 'UserId') - // This is the complete list of takedown country codes for the tweet, - // including whatever modifications were made to trigger this event. - // @deprecated Prefer takedown_reasons - 5: list takedown_country_codes = [] - // This is the complete list of takedown reasons for the tweet, - // including whatever modifications were made to trigger this event. - 6: list takedown_reasons = [] -}(persisted='true', hasPersonalData = 'true') - -union TweetEventData { - 1: TweetCreateEvent tweet_create_event - 2: TweetDeleteEvent tweet_delete_event - 3: AdditionalFieldUpdateEvent additional_field_update_event - 4: AdditionalFieldDeleteEvent additional_field_delete_event - 5: TweetUndeleteEvent tweet_undelete_event - 6: TweetScrubGeoEvent tweet_scrub_geo_event - 7: TweetTakedownEvent tweet_takedown_event - 8: UserScrubGeoEvent user_scrub_geo_event - 9: TweetPossiblySensitiveUpdateEvent tweet_possibly_sensitive_update_event - 10: QuotedTweetDeleteEvent quoted_tweet_delete_event - 11: QuotedTweetTakedownEvent quoted_tweet_takedown_event -}(persisted='true', hasPersonalData = 'true') - -/** - * @deprecated - */ -struct Checksum { - 1: i32 checksum -}(persisted='true') - -struct TweetEventFlags { - /** - * @deprecated Was dark_for_service. - */ - 1: list unused1 = [] - - 2: i64 timestamp_ms - - 3: optional SafetyType safety_type - - /** - * @deprecated Was checksum. - */ - 4: optional Checksum unused4 -}(persisted='true') - -/** - * A TweetEvent is a notification published to the tweet_events stream. - */ -struct TweetEvent { - 1: TweetEventData data - 2: TweetEventFlags flags -}(persisted='true', hasPersonalData = 'true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service.docx new file mode 100644 index 000000000..2bdf338d9 Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service.thrift deleted file mode 100644 index 3be5f3b12..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service.thrift +++ /dev/null @@ -1,2320 +0,0 @@ -namespace java com.twitter.tweetypie.thriftjava -#@namespace scala com.twitter.tweetypie.thriftscala -#@namespace strato com.twitter.tweetypie -namespace py gen.twitter.tweetypie.service -namespace rb TweetyPie -namespace go tweetypie - -include "com/twitter/bouncer/bounce.thrift" -include "com/twitter/carousel/service/carousel_service.thrift" -include "com/twitter/context/feature_context.thrift" -include "com/twitter/mediaservices/commons/MediaCommon.thrift" -include "com/twitter/mediaservices/commons/MediaInformation.thrift" -include "com/twitter/servo/exceptions.thrift" -include "com/twitter/spam/features/safety_meta_data.thrift" -include "com/twitter/spam/rtf/safety_label.thrift" -include "com/twitter/spam/rtf/safety_level.thrift" -include "com/twitter/spam/rtf/safety_result.thrift" -include "com/twitter/tseng/withholding/withholding.thrift" -include "com/twitter/tweetypie/deleted_tweet.thrift" -include "com/twitter/tweetypie/transient_context.thrift" -include "com/twitter/tweetypie/tweet.thrift" -include "com/twitter/tweetypie/tweet_audit.thrift" -include "com/twitter/incentives/jiminy/jiminy.thrift" -include "unified_cards_contract.thrift" - -typedef i16 FieldId - -struct TweetGeoSearchRequestID { - 1: required string id (personalDataType = 'PrivateTweetEntitiesAndMetadata, PublicTweetEntitiesAndMetadata') -}(hasPersonalData = 'true') - -struct TweetCreateGeo { - 1: optional tweet.GeoCoordinates coordinates - 2: optional string place_id (personalDataType = 'InferredLocation') - 3: optional map place_metadata (personalDataTypeKey = 'InferredLocation', personalDataTypeValue = 'InferredLocation') - 4: bool auto_create_place = 1 - // deprecated; use tweet.GeoCoordinates.display - 5: bool display_coordinates = 1 - 6: bool override_user_geo_setting = 0 - 7: optional TweetGeoSearchRequestID geo_search_request_id -}(hasPersonalData = 'true') - -enum StatusState { - /** - * The tweet was found and successfully hydrated. - */ - FOUND = 0 - - /** - * The tweet was not found. It may have been deleted, or could just be an invalid or - * unused tweet id. - */ - NOT_FOUND = 1 - - /** - * The tweet was found, but there was at least one error hydrating some data on the tweet. - * GetTweetResult.missing_fields indicates which fields may have not been hydrated completely. - */ - PARTIAL = 2 - - /** - * @deprecated All failures, including time outs, are indicated by `Failed`. - */ - TIMED_OUT = 3 - - /** - * There was an upstream or internal failure reading this tweet. Usually indicates a - * transient issue that is safe to retry immediately. - */ - FAILED = 4 - - /** - * @deprecated tweets from deactivated users will soon be indicated via `Drop` with - * a `FilteredReason` of `authorAccountIsInactive`. - */ - DEACTIVATED_USER = 5 - - /** - * @deprecated tweets from suspended users will soon be indicated via `Drop` with - * a `FilteredReason` of `authorAccountIsInactive`. - */ - SUSPENDED_USER = 6 - - /** - * @deprecated tweets from protected users that the viewer can't see will soon be - * indicated via `Drop` with a `FilteredReason` of `authorIsProtected`. - */ - PROTECTED_USER = 7 - /** - * @deprecated tweets that have been reported by the viewer will soon be indicated - * via `Drop` or `Suppress` with a `FilteredReason` of `reportedTweet`. - */ - REPORTED_TWEET = 8 - - // PrivateTweet was originally used for TwitterSuggest v1 but has since been removed - // obsolete: PRIVATE_TWEET = 9 - - /** - * Could not return this tweet because of backpressure, should - * not be retried immediately; try again later - */ - OVER_CAPACITY = 10 - - /** - * Returned when the requesting client is considered to not be - * able to render the tweet properly - */ - UNSUPPORTED_CLIENT = 11 - - /** - * The tweet exists, but was not returned because it should not be seen by the - * viewer. The reason for the tweet being filtered is indicated via - * GetTweetResult.filtered_reason. - */ - DROP = 12 - - /** - * The tweet exists and was returned, but should not be directly shown to the - * user without additional user intent to see the tweet, as it may be offensive. - * The reason for the suppression is indicated via GetTweetResult.filtered_reason. - */ - SUPPRESS = 13 - - /** - * The tweet once existed and has been deleted. - * When GetTweetOptions.enable_deleted_state is true, deleted tweets - * will be returned as DELETED - * When GetTweetOptions.enable_deleted_state is false, deleted tweets - * will be returned as NOT_FOUND. - */ - DELETED = 14 - - /** - * The tweet once existed, had violated Twitter Rules, and has been deleted. - * When GetTweetOptions.enable_deleted_state is true, bounce-deleted tweets - * will be returned as BOUNCE_DELETED - * When GetTweetOptions.enable_deleted_state is false, bounce-deleted tweets - * will be returned as NOT_FOUND. - */ - BOUNCE_DELETED = 15 - - RESERVED_1 = 16 - RESERVED_2 = 17 - RESERVED_3 = 18 - RESERVED_4 = 19 -} - -enum TweetCreateState { - /** - * Tweet was created successfully. - */ - OK = 0, - - /** - * The user_id field from the creation request does not correspond to a user. - */ - USER_NOT_FOUND = 1, - - SOURCE_TWEET_NOT_FOUND = 2, - SOURCE_USER_NOT_FOUND = 3, - - /** - * @deprecated Users can now retweet their own tweets. - */ - CANNOT_RETWEET_OWN_TWEET = 4, - - CANNOT_RETWEET_PROTECTED_TWEET = 5, - CANNOT_RETWEET_SUSPENDED_USER = 6, - CANNOT_RETWEET_DEACTIVATED_USER = 7, - CANNOT_RETWEET_BLOCKING_USER = 8, - - ALREADY_RETWEETED = 9, - CONTRIBUTOR_NOT_SUPPORTED = 10, - - /** - * The created_via field from the creation request does not correspond to a - * known client application. - */ - DEVICE_SOURCE_NOT_FOUND = 11, - - MALWARE_URL = 12, - INVALID_URL = 13, - USER_DEACTIVATED = 14, - USER_SUSPENDED = 15, - TEXT_TOO_LONG = 16, - TEXT_CANNOT_BE_BLANK = 17, - DUPLICATE = 18, - - /** - * PostTweetRequest.in_reply_to_tweet_id was set to a tweet that cannot be found. - * - * This usually means that the tweet was recently deleted, but could also - * mean that the tweet isn't visible to the reply author. (This is the - * case for replies by blocked users.) - */ - IN_REPLY_TO_TWEET_NOT_FOUND = 19, - - INVALID_IMAGE = 20, - INVALID_ADDITIONAL_FIELD = 21, - RATE_LIMIT_EXCEEDED = 22, - INVALID_NARROWCAST = 23, - - /** - * Antispam systems (Scarecrow) denied the request. - * - * This happens for tweets that are probably spam, but there is some - * uncertainty. Tweets that Scarecrow is certain are spammy will appear to - * succeed, but will not be added to backends. - */ - SPAM = 24, - SPAM_CAPTCHA = 25, - - /** - * A provided media upload ID can't be resolved. - */ - MEDIA_NOT_FOUND = 26, - - /** - * Catch-all for when uploaded media violate some condition. - * - * For example, too many photos in a multi-photo-set, or including an - * animated gif or video in a multi-photo-set. - */ - INVALID_MEDIA = 27, - - /** - * Returned when Scarecrow tell us to rate limit a tweet request. - * - * Non verified users (i.e., phone verified, email verified) have more - * strict rate limit. - */ - SAFETY_RATE_LIMIT_EXCEEDED = 28, - - /** - * Scarecrow has rejected the creation request until the user completes the - * bounce assignment. - * - * This flag indicates that PostTweetResult.bounce will contain a Bounce - * struct to be propagated to the client. - */ - BOUNCE = 29, - - /** - * Tweet creation was denied because the user is in ReadOnly mode. - * - * As with SPAM, tweets will appear to succeed but will not be actually - * created. - */ - USER_READONLY = 30, - - /** - * Maximum number of mentions allowed in a tweet was exceeded. - */ - MENTION_LIMIT_EXCEEDED = 31, - - /** - * Maximum number of URLs allowed in a tweet was exceeded. - */ - URL_LIMIT_EXCEEDED = 32, - - /** - * Maximum number of hashtags allowed in a tweet was exceeded. - */ - HASHTAG_LIMIT_EXCEEDED = 33, - - /** - * Maximum number of cashtags allowed in a tweet was exceeded. - */ - CASHTAG_LIMIT_EXCEEDED = 34, - - /** - * Maximum length of a hashtag was exceeded. - */ - HASHTAG_LENGTH_LIMIT_EXCEEDED = 35, - - /** - * Returned if a request contains more than one attachment type, which - * includes media, attachment_url, and card_reference. - */ - TOO_MANY_ATTACHMENT_TYPES = 36, - - /** - * Returned if the request contained an attachment URL that isn't allowed. - */ - INVALID_ATTACHMENT_URL = 37, - - /** - * We don't allow users without screen names to be retweeted. - */ - CANNOT_RETWEET_USER_WITHOUT_SCREEN_NAME = 38, - - /** - * Tweets may not be allowed if replying or retweeting IPI'd tweets - * See go/tp-ipi-tdd for more details - */ - DISABLED_BY_IPI_POLICY = 39, - - /** - * This state expands our transparency around which URLs are blacklisted or limited - */ - URL_SPAM = 40, - - // Conversation controls are only valid when present on a root - // conversation tweet and quoted tweets. - INVALID_CONVERSATION_CONTROL = 41, - - // Reply Tweet is limited due to conversation controls state set on - // root conversation Tweet. - REPLY_TWEET_NOT_ALLOWED = 42, - - // Nudge is returned when the client provides nudgeOptions and tweetypie receives a nudge - // from the Jiminy strato column. - NUDGE = 43, - - // ApiError BadRequest (400) "Reply to a community tweet must also be a community tweet" - // -- Triggered when a user tries replying to a community tweet with a non community tweet. - COMMUNITY_REPLY_TWEET_NOT_ALLOWED = 44, - // ApiError Forbidden (403) "User is not authorized to post to this community" - // -- Triggered when a user tries posting to a public/closed community that they are not part of. - COMMUNITY_USER_NOT_AUTHORIZED = 45, - // ApiError NotFound (404) "Community does not exist" -- Triggered when: - // a) A user tries posting to a private community they are not a part of. - // b) A user tries posting to a non existent community - COMMUNITY_NOT_FOUND = 46, - // ApiError BadRequest (400) "Cannot retweet a community tweet" - // -- Triggered when a user tries to retweet a community tweet. Community tweets can not be retweeted. - COMMUNITY_RETWEET_NOT_ALLOWED = 47, - - // Attempt to tweet with Conversation Controls was rejected, e.g. due to feature switch authorization. - CONVERSATION_CONTROL_NOT_ALLOWED = 48, - - // Super follow tweets require a special permission to create. - SUPER_FOLLOWS_CREATE_NOT_AUTHORIZED = 49, - - // Not all params can go together. E.g. super follow tweets can not be community tweets. - SUPER_FOLLOWS_INVALID_PARAMS = 50, - - // ApiError Forbidden (403) "Protected user can not post to communities" - // -- Triggered when a protected user tries tweeting or replying - // to a community tweet. They are not allowed to create community tweets. - COMMUNITY_PROTECTED_USER_CANNOT_TWEET = 51, - - // ApiError Forbidden (451) "User is not permitted to engage with this exclusive tweet." - // -- Triggered when a user tries to reply to an exclusive tweet without being - // a superfollower of the tweet author. Could be used for other engagements in the future (e.g. favorite) - EXCLUSIVE_TWEET_ENGAGEMENT_NOT_ALLOWED = 52 - - /** - * ApiError BadRequest (400) "Invalid parameters on Trusted Friends tweet creation" - * - * Returned when either of the following occur: - * a) A user tries setting Trusted Friends Control on a reply - * b) A user tries setting Trusted Friends Control on a tweet with any of the following set: - * i) Conversation Control - * ii) Community - * iii) Exclusive Tweet Control - */ - TRUSTED_FRIENDS_INVALID_PARAMS = 53, - - /** - * ApiError Forbidden (403) - * - * Returned when a user tries to retweet a Trusted Friends tweet. - */ - TRUSTED_FRIENDS_RETWEET_NOT_ALLOWED = 54, - - /** - * ApiError Forbidden (457) - * - * Returned when a user tries to reply to a Trusted Friends tweet - * and they are not a trusted friend. - */ - TRUSTED_FRIENDS_ENGAGEMENT_NOT_ALLOWED = 55, - - /** - * ApiError BadRequest (400) "Invalid parameters for creating a CollabTweet or CollabInvitation" - * - * Returned when any of the following are true: - * a) A user tries setting Collab Control on a reply - * b) A user tries setting Collab Control on a tweet with any of the following set: - * i) Conversation Control - * ii) Community - * iii) Exclusive Tweet Control - * iv) Trusted Friends Control - **/ - COLLAB_TWEET_INVALID_PARAMS = 56, - - /** - * ApiError Forbidden (457) - * - * Returned when a user tries to create a Trusted Friends tweet but they are not allowed to tweet - * to the requested Trusted Friends list. - */ - TRUSTED_FRIENDS_CREATE_NOT_ALLOWED = 57, - - /** - * Returned when the current user is not allowed to edit in general, this might be due to missing - * roles during development, or a missing subscription. - */ - EDIT_TWEET_USER_NOT_AUTHORIZED = 58, - - /** - * Returned when a user tries to edit a Tweet which they didn't author. - */ - EDIT_TWEET_USER_NOT_AUTHOR = 59, - - /** - * Returned when a user tries edit a stale tweet, meaning a tweet which has already been edited. - */ - EDIT_TWEET_NOT_LATEST_VERSION = 60, - - /** - * ApiError Forbidden (460) - * - * Returned when a user tries to create a Trusted Friends tweet that quotes tweets a Trusted - * Friends tweet. - */ - TRUSTED_FRIENDS_QUOTE_TWEET_NOT_ALLOWED = 61, - - /** - * Returned when a user tries edit a tweet for which the editing time has already expired. - */ - EDIT_TIME_LIMIT_REACHED = 62, - - /** - * Returned when a user tries edit a tweet which has been already edited maximum number of times. - */ - EDIT_COUNT_LIMIT_REACHED = 63, - - /* Returned when a user tries to edit a field that is not allowed to be edited */ - FIELD_EDIT_NOT_ALLOWED = 64, - - /* Returned when the initial Tweet could not be found when trying to validate an edit */ - INITIAL_TWEET_NOT_FOUND = 65, - - /** - * ApiError Forbidden (457) - * - * Returned when a user tries to reply to a stale tweet - */ - STALE_TWEET_ENGAGEMENT_NOT_ALLOWED = 66, - - /** - * ApiError Forbidden (460) - * - * Returned when a user tries to create a tweet that quotes tweets a stale tweet - */ - STALE_TWEET_QUOTE_TWEET_NOT_ALLOWED = 67, - - /* Tweet cannot be edited because the initial tweet is - * marked as not edit eligible */ - NOT_ELIGIBLE_FOR_EDIT = 68, - - /* A stale version of an edit tweet cannot be retweeted - * Only latest version of an edit chain should be allowed to be retweeted. */ - STALE_TWEET_RETWEET_NOT_ALLOWED = 69, - - RESERVED_32 = 70, - RESERVED_33 = 71, - RESERVED_34 = 72, - RESERVED_35 = 73, - RESERVED_36 = 74, - RESERVED_37 = 75, -} - -enum UndeleteTweetState { - /** - * The Tweet was successfully undeleted. - */ - SUCCESS = 0, - - /** - * The Tweet was deleted and is still deleted. It cannot be undeleted - * because the tweet is no longer in the soft delete archive. - */ - SOFT_DELETE_EXPIRED = 1, - - /** - * The Tweet likely has never existed, and therefore cannot be undeleted. - */ - TWEET_NOT_FOUND = 2, - - /** - * The Tweet could not be undeleted because it was not deleted in - * the first place. - */ - TWEET_ALREADY_EXISTS = 3, - - /** - * The user who created the Tweet being undeleted could not be found. - */ - USER_NOT_FOUND = 4, - - /** - * The Tweet could not be undeleted because it is a retweet and the original - * tweet is gone. - */ - SOURCE_TWEET_NOT_FOUND = 5, - - /** - * The Tweet could not be undeleted because it is a retweet and the author - * of the original tweet is gone. - */ - SOURCE_USER_NOT_FOUND = 6, - - /** - * The Tweet was deleted and is still deleted. It cannot be undeleted - * because the tweet has been bounce deleted. Bounce deleted tweet - * has been found to violate Twitter Rules. go/bouncer go/bounced-tweet - */ - TWEET_IS_BOUNCE_DELETED = 7, - - /** - * This tweet cannot be undeleted because the tweet was created by a - * user when they were under 13. - **/ - TWEET_IS_U13_TWEET = 8, - - RESERVED_2 = 9, - RESERVED_3 = 10 -} - -enum TweetDeleteState { - /** - * Tweet was deleted successfully. - */ - OK = 0, - - /** - * Tweet was not deleted because of the associated user. - * - * The DeleteTweetsRequest.by_user_id must match the tweet owner or be an - * admin user. - */ - PERMISSION_ERROR = 1, - - /** - * The expected_user_id provided in DeleteTweetsRequest does not match the - * user_id of the tweet owner. - */ - EXPECTED_USER_ID_MISMATCH = 2, - - /** - * @deprecated. - * - * is_user_erasure was set in DeleteTweetsRequest but the user was not in - * the erased state. - */ - USER_NOT_IN_ERASED_STATE = 3, - - /** - * Failed to Load the source Tweet while unretweeting stale revisions in an edit chain. - */ - SOURCE_TWEET_NOT_FOUND = 4, - - RESERVED_4 = 5, - RESERVED_5 = 6, - RESERVED_6 = 7, - RESERVED_7 = 8 -} - -enum DeletedTweetState { - /** - * The tweet has been marked as deleted but has not been permanently deleted. - */ - SOFT_DELETED = 1 - - /** - * The tweet has never existed. - */ - NOT_FOUND = 2 - - /** - * The tweet has been permanently deleted. - */ - HARD_DELETED = 3 - - /** - * The tweet exists and is not currently deleted. - */ - NOT_DELETED = 4 - - RESERVED1 = 5 - RESERVED2 = 6 - RESERVED3 = 7 -} - -/** - * Hydrations to perform on the Tweet returned by post_tweet and post_retweet. - */ -struct WritePathHydrationOptions { - /** - * Return cards for tweets with cards in Tweet.cards or Tweet.card2 - * - * card2 also requires setting a valid cards_platform_key - */ - 1: bool include_cards = 0 - - /** - * The card format version supported by the requesting client - */ - 2: optional string cards_platform_key - - # 3: obsolete - # 4: obsolete - - /** - * The argument passed to the Stratostore extension points mechanism. - */ - 5: optional binary extensions_args - - /** - * When returning a tweet that quotes another tweet, do not include - * the URL to the quoted tweet in the tweet text and url entities. - * This is intended for clients that use the quoted_tweet field of - * the tweet to display quoted tweets. Also see simple_quoted_tweet - * field in GetTweetOptions and GetTweetFieldsOptions - */ - 6: bool simple_quoted_tweet = 0 -} - -struct RetweetRequest { - /** - * Id of the tweet being retweeted. - */ - 1: required i64 source_status_id (personalDataType = 'TweetId') - - /** - * User creating the retweet. - */ - 2: required i64 user_id (personalDataType = 'UserId') - - /** - * @see PostTweetRequest.created_via - */ - 3: required string created_via (personalDataType = 'ClientType') - 4: optional i64 contributor_user_id (personalDataType = 'UserId') // no longer supported - - /** - * @see PostTweetRequest.tracking_id - */ - 5: optional i64 tracking_id (personalDataType = 'ImpressionId') - 6: optional tweet.Narrowcast narrowcast - - /** - * @see PostTweetRequest.nullcast - */ - 7: bool nullcast = 0 - - /** - * @see PostTweetRequest.dark - */ - 8: bool dark = 0 - - // OBSOLETE 9: bool send_retweet_sms_push = 0 - - 10: optional WritePathHydrationOptions hydration_options - - /** - * @see PostTweetRequest.additional_fields - */ - 11: optional tweet.Tweet additional_fields - - /** - * @see PostTweetRequest.uniqueness_id - */ - 12: optional i64 uniqueness_id (personalDataType = 'PrivateTweetEntitiesAndMetadata, PublicTweetEntitiesAndMetadata') - - 13: optional feature_context.FeatureContext feature_context - - 14: bool return_success_on_duplicate = 0 - - /** - * Passthrough data for Scarecrow that is used for safety checks. - */ - 15: optional safety_meta_data.SafetyMetaData safety_meta_data - - /** - * This is a unique identifier used in both the REST and GraphQL-dark - * requests that will be used to correlate the GraphQL mutation requests to the REST requests - * during a transition period when clients will be moving toward tweet creation via GraphQL. - * See also, the "Comparison Testing" section at go/tweet-create-on-graphql-tdd for additional - * context. - */ - 16: optional string comparison_id (personalDataType = 'UniversallyUniqueIdentifierUuid') -}(hasPersonalData = 'true') - -/** - * A request to set or unset nsfw_admin and/or nsfw_user. - */ -struct UpdatePossiblySensitiveTweetRequest { - /** - * Id of tweet being updated - */ - 1: required i64 tweet_id (personalDataType = 'TweetId') - - /** - * Id of the user initiating this request. - * - * It could be either the owner of the tweet or an admin. It is used when - * auditing the request in Guano. - */ - 2: required i64 by_user_id (personalDataType = 'UserId') - - /** - * New value for tweet.core_data.nsfw_admin. - */ - 3: optional bool nsfw_admin - - /** - * New value for tweet.core_data.nsfw_user. - */ - 4: optional bool nsfw_user - - /** - * Host or remote IP where the request originated. - * - * This data is used when auditing the request in Guano. If unset, it will - * be logged as "". - */ - 5: optional string host (personalDataType = 'IpAddress') - - /** - * Pass-through message sent to the audit service. - */ - 6: optional string note -}(hasPersonalData = 'true') - -struct UpdateTweetMediaRequest { - /** - * The tweet id that's being updated - */ - 1: required i64 tweet_id (personalDataType = 'TweetId') - - /** - * A mapping from old (existing) media ids on the tweet to new media ids. - * - * Existing tweet media not in this map will remain unchanged. - */ - 2: required map old_to_new_media_ids (personalDataTypeKey = 'MediaId', personalDataTypeValue = 'MediaId') -}(hasPersonalData = 'true') - -struct TakedownRequest { - 1: required i64 tweet_id (personalDataType = 'TweetId') - - /** - * The list of takedown country codes to add to the tweet. - * - * DEPRECATED, reasons_to_add should be used instead. - */ - 2: list countries_to_add = [] (personalDataType = 'ContentRestrictionStatus') - - /** - * This field is the list of takedown country codes to remove from the tweet. - * - * DEPRECATED, reasons_to_remove should be used instead. - */ - 3: list countries_to_remove = [] (personalDataType = 'ContentRestrictionStatus') - - /** - * This field is the list of takedown reasons to add to the tweet. - */ - 11: list reasons_to_add = [] - - /** - * This field is the list of takedown reasons to remove from the tweet. - */ - 12: list reasons_to_remove = [] - - /** - * Motivation for the takedown which is written to the audit service. - * - * This data is not persisted with the takedown itself. - */ - 4: optional string audit_note (personalDataType = 'AuditMessage') - - /** - * Whether to send this request to the audit service. - */ - 5: bool scribe_for_audit = 1 - - // DEPRECATED, this field is no longer used. - 6: bool set_has_takedown = 1 - - // DEPRECATED, this field is no longer used. - 7: optional list previous_takedown_country_codes (personalDataType = 'ContentRestrictionStatus') - - /** - * Whether this request should enqueue a TweetTakedownEvent to EventBus and - * Hosebird. - */ - 8: bool eventbus_enqueue = 1 - - /** - * ID of the user who initiated the takedown. - * - * This is used when writing the takedown to the audit service. If unset, it - * will be logged as -1. - */ - 9: optional i64 by_user_id (personalDataType = 'UserId') - - /** - * Host or remote IP where the request originated. - * - * This data is used when auditing the request in Guano. If unset, it will - * be logged as "". - */ - 10: optional string host (personalDataType = 'IpAddress') -}(hasPersonalData = 'true') - -// Arguments to delete_location_data -struct DeleteLocationDataRequest { - 1: i64 user_id (personalDataType = 'UserId') -}(hasPersonalData = 'true') - -// structs for API V2 (flexible schema) - -struct GetTweetOptions { - /** - * Return the original tweet in GetTweetResult.source_tweet for retweets. - */ - 1: bool include_source_tweet = 1 - - /** - * Return the hydrated Place object in Tweet.place for tweets with geolocation. - */ - 2: bool include_places = 0 - - /** - * Language used for place names when include_places is true. Also passed to - * the cards service, if cards are hydrated for the request. - */ - 3: string language_tag = "en" - - /** - * Return cards for tweets with cards in Tweet.cards or Tweet.card2 - * - * card2 also requires setting a valid cards_platform_key - */ - 4: bool include_cards = 0 - - /** - * Return the number of times a tweet has been retweeted in - * Tweet.counts.retweet_count. - */ - 5: bool include_retweet_count = 0 - - /** - * Return the number of direct replies to a tweet in - * Tweet.counts.reply_count. - */ - 6: bool include_reply_count = 0 - - /** - * Return the number of favorites a tweet has received in - * Tweet.counts.favorite_count. - */ - 7: bool include_favorite_count = 0 - - # OBSOLETE 8: bool include_unique_users_impressed_count = 0 - # OBSOLETE 9: bool include_click_count = 0 - # OBSOLETE 10: bool include_descendent_reply_count = 0 - - /** - * @deprecated Use safety_level for spam filtering. - */ - 11: optional tweet.SpamSignalType spam_signal_type - - /** - * If the requested tweet is not already in cache, do not add it. - * - * You should set do_not_cache to true if you are requesting old tweets - * (older than 30 days) and they are unlikely to be requested again. - */ - 12: bool do_not_cache = 0 - - /** - * The card format version supported by the requesting client - */ - 13: optional string cards_platform_key (personalDataType = 'PrivateTweetEntitiesAndMetadata, PublicTweetEntitiesAndMetadata') - - /** - * The user for whose perspective this request should be processed. - * - * If you are requesting tweets on behalf of a user, set this to their user - * id. The effect of setting this option is: - * - * - Tweetypie will return protected tweets that the user is allowed to - * access, rather than filtering out protected tweets. - * - * - If this field is set *and* `include_perspectivals` is set, then the - * tweets will have the `perspective` field set to a struct with flags - * that indicate whether the user has favorited, retweeted, or reported - * the tweet in question. - * - * If you have a specific need to access all protected tweets (not - * just tweets that should be accessible to the current user), see the - * documentation for `include_protected`. - */ - 14: optional i64 for_user_id (personalDataType = 'UserId') - - /** - * Do not enforce normal filtering for protected tweets, blocked quote tweets, - * contributor data, etc. This does not affect Visibility Library (http://go/vf) - * based filtering which executes when safety_level is specified, see request - * field 24 safety_level below - * - * If `bypass_visibility_filtering` is true, Tweetypie will not enforce filtering - * for protected tweets, blocked quote tweets, contributor data, etc. and your client - * will receive all tweets regardless of follow relationship. You will also be able - * to access tweets from deactivated and suspended users. This is only necessary - * for special cases, such as indexing or analyzing tweets, or administrator access. - * Since this elevated access is usually unnecessary, and is a security risk, you will - * need to get your client id whitelisted to access this feature. - * - * If you are accessing tweets on behalf of a user, set - * `bypass_visibility_filtering` to false and set `for_user_id`. This will - * allow access to exactly the set of tweets that that user is authorized to - * access, and filter out tweets the user should not be authorized to access - * (returned with a StatusState of PROTECTED_USER). - */ - 15: bool bypass_visibility_filtering = 0 - - /** - * Return the user-specific view of a tweet in Tweet.perspective - * - * for_user_id must also be set. - */ - 16: bool include_perspectivals = 0 - - // OBSOLETE media faces are always included - 17: bool include_media_faces = 0 - - /** - * The flexible schema fields of the tweet to return. - * - * Fields of tweets in the 100+ range will only be returned if they are - * explicitly requested. - */ - 18: list additional_field_ids = [] - - // OBSOLETE 19: bool include_topic_labels = 0 - - /** - * Exclude user-reported tweets from this request. Only applicable if - * forUserId is set. - * - * Users can report individual tweets in the UI as uninteresting, spam, - * sensitive, or abusive. - */ - 20: bool exclude_reported = 0 - - // if set to true, disables suggested tweet visibility checks - // OBSOLETE (TwitterSuggestInfo version of suggested tweets has been removed) - 21: bool obsolete_skip_twitter_suggests_visibility_check = 0 - // OBSOLETE 22: optional set spam_signal_types - - /** - * Return the quoted tweet in GetTweetResult.quoted_tweet - */ - 23: bool include_quoted_tweet = 0 - - /** - * Content filtering policy that will be used to drop or suppress tweets - * from response. The filtering is based on the result of Visibility Library - * and does not affect filtering of tweets from blocked or non-followed protected users, see - * request field 15 bypass_visibility_filtering above - * - * If not specified SafetyLevel.FilterDefault will be used. - */ - 24: optional safety_level.SafetyLevel safety_level - - // obsolete 25: bool include_animated_gif_media_entities = 0 - 26: bool include_profile_geo_enrichment = 0 - // obsolete 27: optional set extensions - 28: bool include_tweet_pivots = 0 - - /** - * The argument passed to the Stratostore extension points mechanism. - */ - 29: optional binary extensions_args - - /** - * Return the number of times a tweet has been quoted in Tweet.counts.quote_count - */ - 30: bool include_quote_count = 0 - - /** - * Return media metadata from MediaInfoService in MediaEntity.additional_metadata - */ - 31: bool include_media_additional_metadata = 0 - - /** - * Populate the conversation_muted field of the Tweet for the requesting - * user. - * - * Setting this to true will have no effect unless for_user_id is set. - */ - 32: bool include_conversation_muted = 0 - - /** - * @deprecated go/sunsetting-carousels - */ - 33: bool include_carousels = 0 - - /** - * When enable_deleted_state is true and we have evidence that the - * tweet once existed and was deleted, Tweetypie returns - * StatusState.DELETED or StatusState.BOUNCE_DELETED. (See comments - * on StatusState for details on these two states.) - * - * When enable_deleted_state is false, deleted tweets are - * returned as StatusState.NOT_FOUND. - * - * Note: even when enable_deleted_state is true, a deleted tweet may - * still be returned as StatusState.NOT_FOUND due to eventual - * consistency. - * - * This option is false by default for compatibility with clients - * expecting StatusState.NOT_FOUND. - */ - 34: bool enable_deleted_state = 0 - - /** - * Populate the conversation_owner_id field of the Tweet for the requesting - * user. Which translate into is_conversation_owner in birdherd - * - */ - // obsolete 35: bool include_conversation_owner_id = 0 - - /** - * Populate the is_removed_from_conversation field of the Tweet for the requesting - * user. - * - */ - // obsolete 36: bool include_is_removed_from_conversation = 0 - - // To retrieve self-thread metadata request field Tweet.SelfThreadMetadataField - // obsolete 37: bool include_self_thread_info = 0 - - /** - * This option surfaces CardReference field (118) in Tweet thrift object. - * We use card_uri present in card reference, to get access to stored card information. - */ - 37: bool include_card_uri = 0 - - /** - * When returning a tweet that quotes another tweet, do not include - * the URL to the quoted tweet in the tweet text and url entities. - * This is intended for clients that use the quoted_tweet field of - * the tweet to display quoted tweets. - */ - 38: bool simple_quoted_tweet = 0 - - /** - * This flag is used and only take affect if the requested tweet is creatives container backed - * tweet. This will suprress the tweet materialization and return tweet not found. - * - * go/creatives-containers-tdd - **/ - 39: bool disable_tweet_materialization = 0 - - - /** - * Used for load shedding. If set to true, Tweetypie service might shed the request, if the service - * is struggling. - **/ - 40: optional bool is_request_sheddable - -}(hasPersonalData = 'true') - -struct GetTweetsRequest { - 1: required list tweet_ids (personalDataType = 'TweetId') - // @deprecated unused - 2: optional list source_tweet_id_hints (personalDataType = 'TweetId') - 3: optional GetTweetOptions options - // @deprecated unused - 4: optional list quoted_tweet_id_hints (personalDataType = 'TweetId') -}(hasPersonalData = 'true') - -/** - * Can be used to reference an arbitrary nested field of some struct via - * a list of field IDs describing the path of fields to reach the referenced - * field. - */ -struct FieldByPath { - 1: required list field_id_path -} - -struct GetTweetResult { - 1: required i64 tweet_id (personalDataType = 'TweetId') - - /** - * Indicates what happened when the tweet was loaded. - */ - 2: required StatusState tweet_state - - /** - * The requested tweet when tweet_state is `FOUND`, `PARTIAL`, or `SUPPRESS`. - * - * This field will be set if the tweet exists, access is authorized, - * and enough data about the tweet is available to materialize a - * tweet. When this field is set, you should look at the tweet_state - * field to determine how to treat this tweet. - * - * If tweet_state is FOUND, then this tweet is complete and passes the - * authorization checks requested in GetTweetOptions. (See - * GetTweetOptions.for_user_id for more information about authorization.) - * - * If tweet_state is PARTIAL, then enough data was available to return - * a tweet, but there was an error when loading the tweet that prevented - * some data from being returned (for example, if a request to the cards - * service times out when cards were requested, then the tweet will be - * marked PARTIAL). `missing_fields` indicates which parts of the tweet - * failed to load. When you receive a PARTIAL tweet, it is up to you - * whether to proceed with the degraded tweet data or to consider it a - * failure. For example, a mobile client might choose to display a - * PARTIAL tweet to the user, but not store it in an internal cache. - * - * If tweet_state is SUPPRESS, then the tweet is complete, but soft - * filtering is enabled. This state is intended to hide potentially - * harmful tweets from user's view while not taking away the option for - * the user to override our filtering decision. See http://go/rtf - * (render-time filtering) for more information about how to treat these - * tweets. - */ - 3: optional tweet.Tweet tweet - - /** - * The tweet fields that could not be loaded when tweet_state is `PARTIAL` - * or `SUPPRESS`. - * - * This field will be set when the `tweet_state` is `PARTIAL`, and may - * be set when `tweet_state` is SUPPRESS. It indicates degraded data in - * the `tweet`. Each entry in `missing_fields` indicates a traversal of - * the `Tweet` thrift object terminating at the field that is - * missing. For most non-core fields, the path will just be the field id - * of the field that is missing. - * - * For example, if card2 failed to load for a tweet, the `tweet_state` - * will be `PARTIAL`, the `tweet` field will be set, the Tweet's `card2` - * field will be empty, and this field will be set to: - * - * Set(FieldByPath(Seq(17))) - */ - 4: optional set missing_fields - - /** - * The original tweet when `tweet` is a retweet and - * GetTweetOptions.include_source_tweet is true. - */ - 5: optional tweet.Tweet source_tweet - - /** - * The retweet fields that could not be loaded when tweet_state is `PARTIAL`. - */ - 6: optional set source_tweet_missing_fields - - /** - * The quoted tweet when `tweet` is a quote tweet and - * GetTweetOptions.include_quoted_tweet is true. - */ - 7: optional tweet.Tweet quoted_tweet - - /** - * The quoted tweet fields that could not be loaded when tweet_state is `PARTIAL`. - */ - 8: optional set quoted_tweet_missing_fields - - /** - * The reason that a tweet should not be displayed when tweet_state is - * `SUPPRESS` or `DROP`. - */ - 9: optional safety_result.FilteredReason filtered_reason - - /** - * Hydrated carousel if the tweet contains a carousel URL and the - * GetTweetOptions.include_carousel is true. - * - * In this case Carousel Service is requested to hydrate the carousel, and - * the result stored in this field. - * - * @deprecated go/sunsetting-carousels - */ - 10: optional carousel_service.GetCarouselResult carousel_result - - /** - * If a quoted tweet would be present, but it was filtered out, then - * this field will be set to the reason that it was filtered. - */ - 11: optional safety_result.FilteredReason quoted_tweet_filtered_reason -}(hasPersonalData = 'true') - -union TweetInclude { - /** - * Field ID within the `Tweet` struct to include. All fields may be optionally included - * except for the `id` field. - */ - 1: FieldId tweetFieldId - - /** - * Field ID within the `StatusCounts` struct to include. Only specifically requested - * count fields will be included. Including any `countsFieldIds` values automatically - * implies including `Tweet.counts`. - * - */ - 2: FieldId countsFieldId - - /** - * Field ID within the `MediaEntity` struct to include. Currently, only `MediaEntity.additionalMetadata` - * may be optionally included (i.e., it will not be included by default if you include - * `tweetFieldId` = `Tweet.media` without also including `mediaEntityFieldId` = - * `MediaEntity.additionalMetadata`. Including any `mediaEntityFieldId` values automatically - * implies include `Tweet.media`. - */ - 3: FieldId mediaEntityFieldId -} - -/** - * An enumeration of policy options indicating how tweets should be filtered (protected tweets, blocked quote tweets, - * contributor data, etc.). This does not affect Visibility Library (http://go/vf) based filtering. - * This is equivalent to `bypass_visibility_filtering` in get_tweets() call. This means that - * `TweetVisibilityPolicy.NO_FILTERING` is equivalent to `bypass_visibility_filtering` = true - */ -enum TweetVisibilityPolicy { - /** - * only return tweets that should be visible to either the `forUserId` user, if specified, - * or from the perspective of a logged-out user if `forUserId` is not specified. This option - * should always be used if requesting data to be returned via the public API. - */ - USER_VISIBLE = 1, - - /** - * returns all tweets that can be found, regardless of user visibility. This option should - * never be used when gather data to be return in an API, and should only be used for internal - * processing. because this option allows access to potentially sensitive data, clients - * must be whitelisted to use it. - */ - NO_FILTERING = 2 -} - -struct GetTweetFieldsOptions { - /** - * Identifies which `Tweet` or nested fields to include in the response. - */ - 1: required set tweet_includes - - /** - * If true and the requested tweet is a retweet, then a `Tweet` - * containing the requested fields for the retweeted tweet will be - * included in the response. - */ - 2: bool includeRetweetedTweet = 0 - - /** - * If true and the requested tweet is a quote-tweet, then the quoted - * tweet will also be queried and the result for the quoted tweet - * included in `GetTweetFieldsResult.quotedTweetResult`. - */ - 3: bool includeQuotedTweet = 0 - - /** - * If true and the requested tweet contains a carousel URL, then the - * carousel will also be queried and the result for the carousel - * included in `GetTweetFieldsResult.carouselResult`. - * - * @deprecated go/sunsetting-carousels - */ - 4: bool includeCarousel = 0 - - /** - * If you are requesting tweets on behalf of a user, set this to their - * user id. The effect of setting this option is: - * - * - Tweetypie will return protected tweets that the user is allowed - * to access, rather than filtering out protected tweets, when `visibility_policy` - * is set to `USER_VISIBLE`. - * - * - If this field is set *and* `Tweet.perspective` is requested, then - * the tweets will have the `perspective` field set to a struct with - * flags that indicate whether the user has favorited, retweeted, or - * reported the tweet in question. - */ - 10: optional i64 forUserId (personalDataType = 'UserId') - - /** - * language_tag is used when hydrating a `Place` object, to get localized names. - * Also passed to the cards service, if cards are hydrated for the request. - */ - 11: optional string languageTag (personalDataType = 'InferredLanguage') - - /** - * if requesting card2 cards, you must specify the platform key - */ - 12: optional string cardsPlatformKey (personalDataType = 'PrivateTweetEntitiesAndMetadata, PublicTweetEntitiesAndMetadata') - - /** - * The argument passed to the Stratostore extension points mechanism. - */ - 13: optional binary extensionsArgs - - /** - * the policy to use when filtering tweets for basic visibility. - */ - 20: TweetVisibilityPolicy visibilityPolicy = TweetVisibilityPolicy.USER_VISIBLE - - /** - * Content filtering policy that will be used to drop or suppress tweets from response. - * The filtering is based on the result of Visibility Library (http://go/vf) - * and does not affect filtering of tweets from blocked or non-followed protected users, see - * request field 20 visibilityPolicy above - * - * If not specified SafetyLevel.FilterNone will be used. - */ - 21: optional safety_level.SafetyLevel safetyLevel - - /** - * The tweet result won't be cached by Tweetypie if doNotCache is true. - * You should set it as true if old tweets (older than 30 days) are requested, - * and they are unlikely to be requested again. - */ - 30: bool doNotCache = 0 - - /** - * When returning a tweet that quotes another tweet, do not include - * the URL to the quoted tweet in the tweet text and url entities. - * This is intended for clients that use the quoted_tweet field of - * the tweet to display quoted tweets. - * - */ - 31: bool simple_quoted_tweet = 0 - - /** - * This flag is used and only take affect if the requested tweet is creatives container backed - * tweet. This will suprress the tweet materialization and return tweet not found. - * - * go/creatives-containers-tdd - **/ - 32: bool disable_tweet_materialization = 0 - - /** - * Used for load shedding. If set to true, Tweetypie service might shed the request, if the service - * is struggling. - **/ - 33: optional bool is_request_sheddable -}(hasPersonalData = 'true') - -struct GetTweetFieldsRequest { - 1: required list tweetIds (personalDataType = 'TweetId') - 2: required GetTweetFieldsOptions options -} (hasPersonalData = 'true') - -/** - * Used in `TweetFieldsResultState` when the requested tweet is found. - */ -struct TweetFieldsResultFound { - 1: required tweet.Tweet tweet - - /** - * If `tweet` is a retweet, `retweetedTweet` will be the retweeted tweet. - * Just like with the requested tweet, only the requested fields will be - * hydrated and set on the retweeted tweet. - */ - 2: optional tweet.Tweet retweetedTweet - - /** - * If specified, then the tweet should be soft filtered. - */ - 3: optional safety_result.FilteredReason suppressReason -} - -/** - * Used in `TweetFieldsResultState` when the requested tweet is not found. - */ -struct TweetFieldsResultNotFound { - // If this field is true, then we know that the tweet once existed and - // has since been deleted. - 1: bool deleted = 0 - - // This tweet is deleted after being bounced for violating the Twitter - // Rules and should never be rendered or undeleted. see go/bounced-tweet - // In certain timelines we render a tombstone in its place. - 2: bool bounceDeleted = 0 - - // The reason that a tweet should not be displayed. See go/vf-tombstones-in-tweetypie - // Tweets that are not found do not going through Visibility Filtering rule evaluation and thus - // are not `TweetFieldsResultFiltered`, but may still have a filtered_reason that distinguishes - // whether the unavailable tweet should be tombstoned or hard-filtered based on the Safety Level. - 3: optional safety_result.FilteredReason filtered_reason -} - -struct TweetFieldsPartial { - 1: required TweetFieldsResultFound found - - /** - * The tweet fields that could not be loaded when hydration fails - * and a backend fails with an exception. This field is populated - * when a tweet is "partially" hydrated, i.e. some fields were - * successfully fetched while others were not. - * - * It indicates degraded data in the `tweet`. Each entry in `missing_fields` - * indicates a traversal of the `Tweet` thrift object terminating at - * the field that is missing. For most non-core fields, the path will - * just be the field id of the field that is missing. - * - * For example, if card2 failed to load for a tweet, the tweet is marked "partial", - * the `tweet` field will be set, the Tweet's `card2` - * field will be empty, and this field will be set to: - * - * Set(FieldByPath(Seq(17))) - */ - 2: required set missingFields - - /** - * Same as `missing_fields` but for the source tweet in case the requested tweet - * was a retweet. - */ - 3: required set sourceTweetMissingFields -} -/** - * Used in `TweetFieldsResultState` when there was a failure loading the requested tweet. - */ -struct TweetFieldsResultFailed { - /** - * If true, the failure was the result of backpressure, which means the request - * should not be immediately retried. It is safe to retry again later. - * - * If false, the failure is probably transient and safe to retry immediately. - */ - 1: required bool overCapacity - - /** - * An optional message about the cause of the failure. - */ - 2: optional string message - - /** - * This field is populated when some tweet fields fail to load and the - * tweet is marked "partial" in tweetypie. It contains the tweet/RT - * information along with the set of tweet fields that failed to - * get populated. - */ - 3: optional TweetFieldsPartial partial -} - -/** - * Used in `TweetFieldsResultState` when the requested tweet has been filtered out. - */ -struct TweetFieldsResultFiltered { - 1: required safety_result.FilteredReason reason -} - -/** - * A union of the different possible outcomes of a fetching a single tweet. - */ -union TweetFieldsResultState { - 1: TweetFieldsResultFound found - 2: TweetFieldsResultNotFound notFound - 3: TweetFieldsResultFailed failed - 4: TweetFieldsResultFiltered filtered -} - -/** - * The response to get_tweet_fields will include a TweetFieldsResultRow for each - * requested tweet id. - */ -struct GetTweetFieldsResult { - /** - * The id of the requested tweet. - */ - 1: required i64 tweetId (personalDataType = 'TweetId') - - /** - * the result for the requested tweet - */ - 2: required TweetFieldsResultState tweetResult - - /** - * If quoted-tweets were requested and the primary tweet was found, - * this field will contain the result state for the quoted tweeted. - */ - 3: optional TweetFieldsResultState quotedTweetResult - - /** - * If the primary tweet was found, carousels were requested and there - * was a carousel URL in the primary tweet, this field will contain the - * result for the carousel. - * - * @deprecated - */ - 4: optional carousel_service.GetCarouselResult carouselResult -} - -struct TweetCreateConversationControlByInvitation { - 1: optional bool invite_via_mention -} - -struct TweetCreateConversationControlCommunity { - 1: optional bool invite_via_mention -} - -struct TweetCreateConversationControlFollowers { - 1: optional bool invite_via_mention -} - -/** - * Specify limits on user participation in a conversation. - * - * This is a union rather than a struct to support adding conversation - * controls that require carrying metadata along with them, such as a list id. - * - * See also: - * Tweet.conversation_control - * PostTweetRequest.conversation_control - */ -union TweetCreateConversationControl { - 1: TweetCreateConversationControlCommunity community - 2: TweetCreateConversationControlByInvitation byInvitation - 3: TweetCreateConversationControlFollowers followers -} - -/* - * Specifies the exclusivity of a tweet - * This limits the audience of the tweet to the author - * and the author's super followers - * While empty now, we are expecting to add additional fields in v1+ - */ -struct ExclusiveTweetControlOptions {} - -struct TrustedFriendsControlOptions { - 1: i64 trusted_friends_list_id = 0 (personalDataType = 'TrustedFriendsListMetadata') -}(hasPersonalData = 'true') - -struct CollabInvitationOptions { - 1: required list collaborator_user_ids (personalDataType = 'UserId') - // Note: status not sent here, will be added in TweetBuilder to set all but author as PENDING -} - -struct CollabTweetOptions { - 1: required list collaborator_user_ids (personalDataType = 'UserId') -} - -union CollabControlOptions { - 1: CollabInvitationOptions collabInvitation - 2: CollabTweetOptions collabTweet -} - -/** - * When this struct is supplied, this PostTweetRequest is interpreted as - * an edit of the Tweet whose latest version is represented by previous_tweet_id. - * If this is the first edit of a Tweet, this will be the same as the initial_tweet_id. - **/ -struct EditOptions { - /** - * The ID of the previous latest version of the Tweet that is being edited. - * If this is the first edit, this will be the same as the initial_tweet_id. - **/ - 1: required i64 previous_tweet_id (personalDataType = 'TweetId') -} - -struct NoteTweetOptions { - /** - * The ID of the NoteTweet to be associated with this Tweet. - **/ - 1: required i64 note_tweet_id (personalDataType = 'TwitterArticleID') - // Deprecated - 2: optional list mentioned_screen_names (personalDataType = 'Username') - /** - * The user IDs of the mentioned users - **/ - 3: optional list mentioned_user_ids (personalDataType = 'UserId') - /** - * Specifies if the Tweet can be expanded into the NoteTweet, or if they have the same text - **/ - 4: optional bool is_expandable -} - -struct PostTweetRequest { - /** - * Id of the user creating the tweet. - */ - 1: required i64 user_id (personalDataType = 'UserId') - - /** - * The user-supplied text of the tweet. - */ - 2: required string text (personalDataType = 'PrivateTweets, PublicTweets') - - /** - * The OAuth client application from which the creation request originated. - * - * This must be in the format "oauth:". For requests - * from a user this is the application id of their client; for internal - * services this is the id of an associated application registered at - * https://apps.twitter.com. - */ - 3: required string created_via (personalDataType = 'ClientType') - - 4: optional i64 in_reply_to_tweet_id (personalDataType = 'TweetId') - 5: optional TweetCreateGeo geo - 6: optional list media_upload_ids (personalDataType = 'MediaId') - 7: optional tweet.Narrowcast narrowcast - - /** - * Do not deliver this tweet to a user's followers. - * - * When true this tweet will not be fanned out, appear in the user's - * timeline, or appear in search results. It will be distributed via the - * firehose and available in the public API. - * - * This is primarily used to create tweets that can be used as ads without - * broadcasting them to an advertiser's followers. - * - */ - 8: bool nullcast = 0 - - /** - * The impression id of the ad from which this tweet was created. - * - * This is set when a user retweets or replies to a promoted tweet. It is - * used to attribute the "earned" exposure of an advertisement. - */ - 9: optional i64 tracking_id (personalDataType = 'ImpressionId') - - /** - * @deprecated. - * TOO clients don't actively use this input param, and the v2 API does not plan - * to expose this parameter. The value associated with this field that's - * stored with a tweet is obtained from the user's account preferences stored in - * `User.safety.nsfw_user`. (See go/user.thrift for more details on this field) - * - * Field indicates whether a individual tweet may contain objectionable content. - * - * If specified, tweet.core_data.nsfw_user will equal this value (otherwise, - * tweet.core_data.nsfw_user will be set to user.nsfw_user). - */ - 10: optional bool possibly_sensitive - - /** - * Do not save, index, fanout, or otherwise persist this tweet. - * - * When true, the tweet is validated, created, and returned but is not - * persisted. This can be used for dark testing or pre-validating a tweet - * scheduled for later creation. - */ - 11: bool dark = 0 - - /** - * IP address of the user making the request. - * - * This is used for logging certain kinds of actions, like attempting to - * tweet malware urls. - */ - 12: optional string remote_host (personalDataType = 'IpAddress') - - /** - * Additional fields to write with this tweet. - * - * This Tweet object should contain only additional fields to write with - * this tweet. Additional fields are tweet fields with id > 100. Set - * tweet.id to be 0; the id will be generated by Tweetypie. Any other non- - * additional fields set on this tweet will be considered an invalid - * request. - * - */ - 14: optional tweet.Tweet additional_fields - - 15: optional WritePathHydrationOptions hydration_options - - // OBSOLETE 16: optional bool bypass_rate_limit_for_xfactor - - /** - * ID to explicitly identify a creation request for the purpose of rejecting - * duplicates. - * - * If two requests are received with the same uniqueness_id, then they will - * be considered duplicates of each other. This only applies for tweets - * created within the same datacenter. This id should be a snowflake id so - * that it's globally unique. - */ - 17: optional i64 uniqueness_id (personalDataType = 'PrivateTweetEntitiesAndMetadata, PublicTweetEntitiesAndMetadata') - - 18: optional feature_context.FeatureContext feature_context - - /** - * Passthrough data for Scarecrow that is used for safety checks. - */ - 19: optional safety_meta_data.SafetyMetaData safety_meta_data - - // OBSOLETE 20: bool community_narrowcast = 0 - - /** - * Toggle narrowcasting behavior for leading @mentions. - * - * If in_reply_to_tweet_id is not set: - * - When this flag is true and the tweet text starts with a leading mention then the tweet - * will be narrowcasted. - * - * If in_reply_to_tweet_id is set: - * - If auto_populate_reply_metadata is true - * - Setting this flag to true will use the default narrowcast determination logic where - * most replies will be narrowcast but some special-cases of self-replies will not. - * - Setting this flag to false will disable narrowcasting and the tweet will be fanned out - * to all the author's followers. Previously users prefixed their reply text with "." to - * achieve this effect. - * - If auto_populate_reply_metadata is false, this flag will control whether a leading - * mention in the tweet text will be narrowcast (true) or broadcast (false). - */ - 21: bool enable_tweet_to_narrowcasting = 1 - - /** - * Automatically populate replies with leading mentions from tweet text. - */ - 22: bool auto_populate_reply_metadata = 0 - - /** - * Metadata at the tweet-asset relationship level. - */ - 23: optional map media_metadata - - /** - * An optional URL that identifies a resource that is treated as an attachment of the - * the tweet, such as a quote-tweet permalink. - * - * When provided, it is appended to the end of the tweet text, but is not - * included in the visible_text_range. - */ - 24: optional string attachment_url (personalDataType = 'CardId, ShortUrl') - - /** - * Pass-through information to be published in `TweetCreateEvent`. - * - * This data is not persisted by Tweetypie. - * - * @deprecated prefer transient_context (see field 27) over this. - */ - 25: optional map additional_context - - /** - * Users to exclude from the automatic reply population behavior. - * - * When auto_populate_reply_metadata is true, screen names appearing in the - * mention prefix can be excluded by specifying a corresponding user id in - * exclude_reply_user_ids. Because the mention prefix must always include - * the leading mention to preserve directed-at addressing for the in-reply- - * to tweet author, attempting to exclude that user id will have no effect. - * Specifying a user id not in the prefix will be silently ignored. - */ - 26: optional list exclude_reply_user_ids (personalDataType = 'UserId') - - /** - * Used to pass structured data to Tweetypie and tweet_events eventbus - * stream consumers. This data is not persisted by Tweetypie. - * - * If adding a new passthrough field, prefer this over additional_context, - * as this is structured data, while additional_context is text data. - */ - 27: optional transient_context.TransientCreateContext transient_context - - /** - * Composer flow used to create this tweet. Unless using the News Camera (go/newscamera) - * flow, this should be `STANDARD`. - * - * When set to `CAMERA`, clients are expected to display the tweet with a different UI - * to emphasize attached media. - */ - 28: optional tweet.ComposerSource composer_source - - /** - * present if we want to restrict replies to this tweet (go/dont-at-me-api) - * - This gets converted to Tweet.conversation_control and changes type - * - This is only valid for conversation root tweets - * - This applies to all replies to this tweet - */ - 29: optional TweetCreateConversationControl conversation_control - - // OBSOLETE 30: optional jiminy.CreateNudgeOptions nudge_options - - /** - * Provided if the client wants to have the tweet create evaluated for a nudge (e.g. to notify - * the user that they are about to create a toxic tweet). Reference: go/docbird/jiminy - */ - 31: optional jiminy.CreateTweetNudgeOptions nudge_options - - /** - * Provided for correlating requests originating from REST endpoints and GraphQL endpoints. - * Its presence or absence does not affect Tweet mutation. It used for validation - * and debugging. The expected format is a 36 ASCII UUIDv4. - * Please see API specification at go/graphql-tweet-mutations for more information. - */ - 32: optional string comparison_id (personalDataType = 'UniversallyUniqueIdentifierUuid') - - /** - * Options that determine the shape of an exclusive tweet's restrictions. - * The existence of this object indicates that the tweet is intended to be an exclusive tweet - * While this is an empty structure for now, it will have fields added to it later in later versions. - */ - 33: optional ExclusiveTweetControlOptions exclusiveTweetControlOptions - - 34: optional TrustedFriendsControlOptions trustedFriendsControlOptions - - /** - * Provided if tweet data is backed up by a creative container, that at tweet hydration - * time, tweetypie would delegate to creative container service. - * - * go/creatives-containers-tdd - * Please note that this id is never publically shared with clients, its only used for - * internal purposes. - */ - 35: optional i64 underlying_creatives_container_id (personalDataType = 'TweetId') - - /** - * Provided if tweet is a CollabTweet or a CollabInvitation, along with a list of Collaborators - * which includes the original author. - * - * go/collab-tweets - **/ - 36: optional CollabControlOptions collabControlOptions - - /** - * When supplied, this PostTweetRequest is an edit. See [[EditOptions]] for more details. - **/ - 37: optional EditOptions editOptions - - /** - * When supplied, the NoteTweet specified is associated with the created Tweet. - **/ - 38: optional NoteTweetOptions noteTweetOptions -} (hasPersonalData = 'true') - -struct SetAdditionalFieldsRequest { - 1: required tweet.Tweet additional_fields -} - -struct DeleteAdditionalFieldsRequest { - 1: required list tweet_ids (personalDataType = 'TweetId') - 2: required list field_ids -}(hasPersonalData = 'true') - -struct DeleteTweetsRequest { - 1: required list tweet_ids (personalDataType = 'TweetId') - // DEPRECATED and moved to tweetypie_internal.thrift's CascadedDeleteTweetsRequest - 2: optional i64 cascaded_from_tweet_id (personalDataType = 'TweetId') - 3: optional tweet_audit.AuditDeleteTweet audit_passthrough - - /** - * The id of the user initiating this request. - * - * It could be either the owner of the tweet or an admin. If not specified - * we will use TwitterContext.userId. - */ - 4: optional i64 by_user_id (personalDataType = 'UserId') - - - /** - * Where these tweets are being deleted as part of a user erasure, the process - * of deleting tweets belonging to deactivated accounts. - * - * This lets backends optimize processing of mass deletes of tweets from the - * same user. Talk to the Tweetypie team before setting this flag. - */ - 5: bool is_user_erasure = 0 - - /** - * Id to compare with the user id of the tweets being deleted. - * - * This provides extra protection against accidental deletion of tweets. - * This is required when is_user_erasure is true. If any of the tweets - * specified in tweet_ids do not match expected_user_id a - * EXPECTED_USER_ID_MISMATCH state will be returned. - */ - 6: optional i64 expected_user_id (personalDataType = 'UserId') - - /** - * A bounced tweet is a tweet that has been found to violate Twitter Rules. - * This is represented as a tweet with its bounce_label field set. - * - * When the Tweet owner deletes their offending bounced tweet in the Bounced workflow, Bouncer - * will submit a delete request with `is_bounce_delete` set to true. If the tweet(s) being deleted - * have a bounce_label set, this request results in the tweet transitioning into the - * BounceDeleted state which means the tweet is partially deleted. - * - * Most of the normal tweet deletion side-effects occur but the tweet remains in a - * few tflock graphs, tweet cache, and a Manhattan marker is added. Other than timelines services, - * bounce deleted tweets are considered deleted and will return a StatusState.BounceDelete. - * - * After a defined grace period, tweets in this state will be fully deleted. - * - * If the tweet(s) being deleted do not have the bounce_label set, they will be deleted as usual. - * - * Other than Bouncer, no service should use `is_bounce_delete` flag. - */ - 7: bool is_bounce_delete = 0 - - /** - * This is a unique identifier used in both the REST and GraphQL-dark - * requests that will be used to correlate the GraphQL mutation requests to the REST requests - * during a transition period when clients will be moving toward tweet creation via GraphQL. - * See also, the "Comparison Testing" section at go/tweet-create-on-graphql-tdd for additional - * context. - */ - 8: optional string comparison_id (personalDataType = 'UniversallyUniqueIdentifierUuid') - - /** - * When an edited tweet is deleted via daemons, we take a different action - * than if it was deleted normally. If deleted normally, we delete the - * initial tweet in the chain. When deleted via daemons, we delete the actual tweet. - */ - 9: optional bool cascaded_edited_tweet_deletion -}(hasPersonalData = 'true') - -struct DeleteTweetResult { - 1: required i64 tweet_id (personalDataType = 'TweetId') - 2: required TweetDeleteState state -}(hasPersonalData = 'true') - -struct UnretweetResult { - /** - * Id of the retweet that was deleted if a retweet could be found. - */ - 1: optional i64 tweet_id (personalDataType = 'TweetId') - - 2: required TweetDeleteState state -}(hasPersonalData = 'true') - -struct PostTweetResult { - 1: required TweetCreateState state - - /** - * The created tweet when state is OK. - */ - 2: optional tweet.Tweet tweet - - /** - * The original tweet when state is OK and tweet is a retweet. - */ - 3: optional tweet.Tweet source_tweet - - /** - * The quoted tweet when state is OK and tweet is a quote tweet. - */ - 4: optional tweet.Tweet quoted_tweet - - /** - * The required user remediation from Scarecrow when state is BOUNCE. - */ - 5: optional bounce.Bounce bounce - - /** - * Additional information when TweetCreateState is not OK. - * - * Not all failures provide a reason. - */ - 6: optional string failure_reason - - // OBSOLETE 7: optional jiminy.Nudge nudge - - /** - * Returned when the state is NUDGE to indicate that the tweet has not been created, and that - * the client should instead display the nudge to the user. Reference: go/docbird/jiminy - */ - 8: optional jiminy.TweetNudge nudge -} (persisted = "true", hasPersonalData = "true") - -/** - * Specifies the cause of an AccessDenied error. - */ -enum AccessDeniedCause { - // obsolete: INVALID_CLIENT_ID = 0, - // obsolete: DEPRECATED = 1, - USER_DEACTIVATED = 2, - USER_SUSPENDED = 3, - - RESERVED_4 = 4, - RESERVED_5 = 5, - RESERVED_6 = 6 -} - -/** - * AccessDenied error is returned by delete_tweets endpoint when - * by_user_id is suspended or deactivated. - */ -exception AccessDenied { - 1: required string message - 2: optional AccessDeniedCause errorCause -} - -struct UndeleteTweetRequest { - 1: required i64 tweet_id (personalDataType = 'TweetId') - 2: optional WritePathHydrationOptions hydration_options - - /** - * Perform the side effects of undeletion even if the tweet is not deleted. - * - * This flag is useful if you know that the tweet is present in Manhattan - * but is not undeleted with respect to other services. - */ - 3: optional bool force -}(hasPersonalData = 'true') - -struct UndeleteTweetResponse { - 1: required UndeleteTweetState state - 2: optional tweet.Tweet tweet -} - -struct EraseUserTweetsRequest { - 1: required i64 user_id (personalDataType = 'UserId') -}(hasPersonalData = 'true') - -struct UnretweetRequest { - /** - * The id of the user who owns the retweet. - */ - 1: required i64 user_id (personalDataType = 'UserId') - - /** - * The source tweet that should be unretweeted. - */ - 2: required i64 source_tweet_id (personalDataType = 'TweetId') - - /** - * This is a unique identifier used in both the REST and GraphQL-dark - * requests that will be used to correlate the GraphQL mutation requests to the REST requests - * during a transition period when clients will be moving toward tweet creation via GraphQL. - * See also, the "Comparison Testing" section at go/tweet-create-on-graphql-tdd for additional - * context. - */ - 3: optional string comparison_id (personalDataType = 'UniversallyUniqueIdentifierUuid') -}(hasPersonalData = 'true') - -struct GetDeletedTweetsRequest { - 1: required list tweetIds (personalDataType = 'TweetId') -}(hasPersonalData = 'true') - -struct GetDeletedTweetResult { - 1: required i64 tweetId (personalDataType = 'TweetId') - 2: required DeletedTweetState state - 4: optional deleted_tweet.DeletedTweet tweet -}(hasPersonalData = 'true') - -/** - * Flushes tweets and/or their counts from cache. - * - * Typically will be used manually for testing or when a particular problem is - * found that needs to be fixed by hand. Defaults to flushing both tweet - * struct and associated counts. - */ -struct FlushRequest { - 1: required list tweet_ids (personalDataType = 'TweetId') - 2: bool flushTweets = 1 - 3: bool flushCounts = 1 -}(hasPersonalData = 'true') - -/** - * A request to retrieve counts for one or more tweets. - */ -struct GetTweetCountsRequest { - 1: required list tweet_ids (personalDataType = 'TweetId') - 2: bool include_retweet_count = 0 - 3: bool include_reply_count = 0 - 4: bool include_favorite_count = 0 - 5: bool include_quote_count = 0 - 6: bool include_bookmark_count = 0 -}(hasPersonalData = 'true') - -/** - * A response optionally indicating one or more counts for a tweet. - */ -struct GetTweetCountsResult { - 1: required i64 tweet_id (personalDataType = 'TweetId') - 2: optional i64 retweet_count (personalDataType = 'CountOfPrivateRetweets, CountOfPublicRetweets') - 3: optional i64 reply_count (personalDataType = 'CountOfPrivateReplies, CountOfPublicReplies') - 4: optional i64 favorite_count (personalDataType = 'CountOfPrivateLikes, CountOfPublicLikes') - 5: optional i64 quote_count (personalDataType = 'CountOfPrivateRetweets, CountOfPublicRetweets') - 6: optional i64 bookmark_count (personalDataType = 'CountOfPrivateLikes') -}(hasPersonalData = 'true') - -/** - * A request to increment the cached favorites count for a tweet. - * - * Negative values decrement the count. This request is automatically - * replicated to other data centers. - */ -struct IncrTweetFavCountRequest { - 1: required i64 tweet_id (personalDataType = 'TweetId') - 2: required i32 delta (personalDataType = 'CountOfPrivateLikes, CountOfPublicLikes') -}(hasPersonalData = 'true') - -/** - * A request to increment the cached bookmarks count for a tweet. - * - * Negative values decrement the count. This request is automatically - * replicated to other data centers. - */ -struct IncrTweetBookmarkCountRequest { - 1: required i64 tweet_id (personalDataType = 'TweetId') - 2: required i32 delta (personalDataType = 'CountOfPrivateLikes') -}(hasPersonalData = 'true') - -/** - * Request to scrub geolocation from 1 or more tweets, and replicates to other - * data centers. - */ -struct GeoScrub { - 1: required list status_ids (personalDataType = 'TweetId') - // OBSOLETE 2: bool write_through = 1 - 3: bool hosebird_enqueue = 0 - 4: i64 user_id = 0 (personalDataType = 'UserId') // should always be set for hosebird enqueue -}(hasPersonalData = 'true') - -/** - * Contains different indicators of a tweets "nsfw" status. - */ -struct NsfwState { - 1: required bool nsfw_user - 2: required bool nsfw_admin - 3: optional safety_label.SafetyLabel nsfw_high_precision_label - 4: optional safety_label.SafetyLabel nsfw_high_recall_label -} - -/** - * Interface to Tweetypie - */ -service TweetService { - /** - * Performs a multi-get of tweets. This endpoint is geared towards fetching - * tweets for the API, with many fields returned by default. - * - * The response list is ordered the same as the requested ids list. - */ - list get_tweets(1: GetTweetsRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Performs a multi-get of tweets. This endpoint is geared towards internal - * processing that needs only specific subsets of the data. - * - * The response list is ordered the same as the requested ids list. - */ - list get_tweet_fields(1: GetTweetFieldsRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Execute a {@link GetTweetCountsRequest} and return one or more {@link GetTweetCountsResult} - */ - list get_tweet_counts(1: GetTweetCountsRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Set/Update additional fields on an existing tweet - */ - void set_additional_fields(1: SetAdditionalFieldsRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Delete additional fields on a tweet - */ - void delete_additional_fields(1: DeleteAdditionalFieldsRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Creates and saves a tweet. - * - * URLs contained in the text will be shortened via Talon. Validations that are - * handled by this endpoint include: - * - * - tweet length not greater than 140 display characters, after URL shortening; - * - tweet is not a duplicate of a recently created tweet by the same user; - * - user is not suspended or deactivated; - * - text does not contain malware urls, as determined by talon; - * - * Checks that are not handled here that should be handled by the web API: - * - oauth authentication; - * - client application has narrowcasting/nullcasting privileges; - */ - PostTweetResult post_tweet(1: PostTweetRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Creates and saves a retweet. - * - * Validations that are handled by this endpoint include: - * - * - source tweet exists; - * - source-tweet user exists and is not suspended or deactivated; - * - source-tweet user is not blocking retweeter; - * - user has not already retweeted the source tweet; - * - * Checks that are not handled here that should be handled by the web API: - * - oauth authentication; - * - client application has narrowcasting/nullcasting privileges; - */ - PostTweetResult post_retweet(1: RetweetRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Remove tweets. It removes all associated fields of the tweets in - * cache and the persistent storage. - */ - list delete_tweets(1: DeleteTweetsRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error, - 3: AccessDenied access_denied) - - /** - * Restore a deleted Tweet. - * - * Tweets exist in a soft-deleted state for N days during which they can be - * restored by support agents following the internal restoration guidelines. - * If the undelete succeeds, the Tweet is given similar treatment to a new - * tweet e.g inserted into cache, sent to the timeline service, reindexed by - * TFlock etc. - */ - UndeleteTweetResponse undelete_tweet(1: UndeleteTweetRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Add or remove takedown countries associated with a Tweet. - */ - void takedown(1: TakedownRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Set or unset the nsfw_admin and/or nsfw_user bit of tweet.core_data. - **/ - void update_possibly_sensitive_tweet(1: UpdatePossiblySensitiveTweetRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error - ) - - /** - * Delete all tweets for a given user. Currently only called by Test User Service, but we - * can also use it ad-hoc. - * - * Note: regular user erasure is handled by the EraseUserTweets daemon. - */ - void erase_user_tweets(1: EraseUserTweetsRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Unretweet a given tweet. - * - * There are two ways to unretweet: - * - call deleteTweets() with the retweetId - * - call unretweet() with the retweeter userId and sourceTweetId - * - * This is useful if you want to be able to undo a retweet without having to - * keep track of a retweetId. - */ - UnretweetResult unretweet(1: UnretweetRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Get tweet content and deletion times for soft-deleted tweets. - * - * The response list is ordered the same as the requested ids list. - */ - list get_deleted_tweets(1: GetDeletedTweetsRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Execute a {@link FlushRequest} - */ - void flush(1: FlushRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Execute an {@link IncrTweetFavCountRequest} - */ - void incr_tweet_fav_count(1: IncrTweetFavCountRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Execute an {@link IncrTweetBookmarkCountRequest} - */ - void incr_tweet_bookmark_count(1: IncrTweetBookmarkCountRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Delete location data from all of a user's tweets. - * - * This endpoint initiates the process of deleting the user's location data - * from all of their tweets, as well as clearing the has_geotagged_statuses - * flag of the user. This method returns as soon as the event is enqueued, - * but the location data won't be scrubbed until the event is processed. - * Usually the latency for the whole process to complete is small, but it - * could take up to a couple of minutes if the user has a very large number - * of tweets, or if the request gets backed up behind other requests that - * need to scrub a large number of tweets. - * - * The event is processed by the Tweetypie geoscrub daemon. - * - */ - void delete_location_data(1: DeleteLocationDataRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Execute a {@link GeoScrub} request. - * - */ - void scrub_geo(1: GeoScrub geo_scrub) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) -} diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_federated.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_federated.docx new file mode 100644 index 000000000..8175adc28 Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_federated.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_federated.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_federated.thrift deleted file mode 100644 index dd69a3299..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_federated.thrift +++ /dev/null @@ -1,32 +0,0 @@ -namespace java com.twitter.tweetypie.thriftjava.federated -#@namespace scala com.twitter.tweetypie.thriftscala.federated -#@namespace strato com.twitter.tweetypie.federated - -include "com/twitter/tweetypie/stored_tweet_info.thrift" - -typedef i16 FieldId - -struct GetStoredTweetsView { - 1: bool bypass_visibility_filtering = 0 - 2: optional i64 for_user_id - 3: list additional_field_ids = [] -} - -struct GetStoredTweetsResponse { - 1: stored_tweet_info.StoredTweetInfo stored_tweet -} - -struct GetStoredTweetsByUserView { - 1: bool bypass_visibility_filtering = 0 - 2: bool set_for_user_id = 0 - 3: optional i64 start_time_msec - 4: optional i64 end_time_msec - 5: optional i64 cursor - 6: bool start_from_oldest = 0 - 7: list additional_field_ids = [] -} - -struct GetStoredTweetsByUserResponse { - 1: required list stored_tweets - 2: optional i64 cursor -} diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_graphql.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_graphql.docx new file mode 100644 index 000000000..a0edf449d Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_graphql.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_graphql.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_graphql.thrift deleted file mode 100644 index 3aa0ada82..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_graphql.thrift +++ /dev/null @@ -1,391 +0,0 @@ -namespace java com.twitter.tweetypie.thriftjava.graphql -#@namespace scala com.twitter.tweetypie.thriftscala.graphql -#@namespace strato com.twitter.tweetypie.graphql - -/** - * Reasons for defining "prefetch" structs: - * i) It enables GraphQL prefetch caching - * ii) All tweet mutation operations are defined to support prefetch caching for API consistency - * and future flexibility. (Populating the cache with VF results being a potential use case.) - */ -include "com/twitter/ads/callback/engagement_request.thrift" -include "com/twitter/strato/graphql/existsAndPrefetch.thrift" - -struct UnretweetRequest { - /** - * Tweet ID of the source tweet being referenced in the unretweet. - * Note: The retweet_id isn't being passed here as it will result in a - * successful response, but won't have any effect. This is due to - * how Tweetypie's unretweet endpoint works. - */ - 1: required i64 source_tweet_id ( - strato.json.numbers.type='string', - strato.description='The source tweet to be unretweeted.' - ) - 2: optional string comparison_id ( - strato.description='Correlates requests originating from REST endpoints and GraphQL endpoints.' - ) -} (strato.graphql.typename='UnretweetRequest') - -struct UnretweetResponse { - /** - * The response contains the source tweet's ID being unretweeted. - * Reasons for this: - * i) The operation should return a non-void response to retain consistency - * with other tweet mutation APIs. - * ii) The response struct should define at least one field due to requirements - * of the GraphQL infrastructure. - * iii) This allows the caller to hydrate the source tweet if required and request - * updated counts on the source tweet if desired. (since this operation decrements - * the source tweet's retweet count) - */ - 1: optional i64 source_tweet_id ( - strato.space='Tweet', - strato.graphql.fieldname='source_tweet', - strato.description='The source tweet that was unretweeted.' - ) -} (strato.graphql.typename='UnretweetResponse') - -struct UnretweetResponseWithSubqueryPrefetchItems { - 1: optional UnretweetResponse data - 2: optional existsAndPrefetch.PrefetchedData subqueryPrefetchItems -} - - -struct CreateRetweetRequest { - 1: required i64 tweet_id (strato.json.numbers.type='string') - - // @see com.twitter.tweetypie.thriftscala.PostTweetRequest.nullcast - 2: bool nullcast = 0 ( - strato.description='Do not deliver this retweet to a user\'s followers. http://go/nullcast' - ) - - // @see com.twitter.ads.callback.thriftscala.EngagementRequest - 3: optional engagement_request.EngagementRequest engagement_request ( - strato.description='The ad engagement from which this retweet was created.' - ) - - // @see com.twitter.tweetypie.thriftscala.PostTweetRequest.PostTweetRequest.comparison_id - 4: optional string comparison_id ( - strato.description='Correlates requests originating from REST endpoints and GraphQL endpoints. UUID v4 (random) 36 character string.' - ) -} (strato.graphql.typename='CreateRetweetRequest') - -struct CreateRetweetResponse { - 1: optional i64 retweet_id ( - strato.space='Tweet', - strato.graphql.fieldname='retweet', - strato.description='The created retweet.' - ) -} (strato.graphql.typename='CreateRetweetResponse') - -struct CreateRetweetResponseWithSubqueryPrefetchItems { - 1: optional CreateRetweetResponse data - 2: optional existsAndPrefetch.PrefetchedData subqueryPrefetchItems -} - -struct TweetReply { - //@see com.twitter.tweetypie.thriftscala.PostTweetRequest.in_reply_to_tweet_id - 1: i64 in_reply_to_tweet_id ( - strato.json.numbers.type='string', - strato.description='The id of the tweet that this tweet is replying to.' - ) - //@see com.twitter.tweetypie.thriftscala.PostTweetRequest.exclude_reply_user_ids - 2: list exclude_reply_user_ids = [] ( - strato.json.numbers.type='string', - strato.description='Screen names appearing in the mention prefix can be excluded. Because the mention prefix must always include the leading mention to preserve directed-at addressing for the in-reply-to tweet author, attempting to exclude that user id will have no effect. Specifying a user id not in the prefix will be silently ignored.' - ) -} (strato.graphql.typename='TweetReply') - -struct TweetMediaEntity { - // @see com.twitter.tweetypie.thriftscala.PostTweetRequest.media_upload_ids - 1: i64 media_id ( - strato.json.numbers.type='string', - strato.description='Media id as obtained from the User Image Service when uploaded.' - ) - - // @see com.twitter.tweetypie.thriftscala.Tweet.media_tags - 2: list tagged_users = [] ( - strato.json.numbers.type='string', - strato.description='List of user_ids to tag in this media entity. Requires Client App Privelege MEDIA_TAGS. Contributors (http://go/teams) are not supported. Tags are silently dropped when unauthorized.' - ) -} (strato.graphql.typename='TweetMediaEntity') - -struct TweetMedia { - 1: list media_entities = [] ( - strato.description='You may include up to 4 photos or 1 animated GIF or 1 video in a Tweet.' - ) - - /** - * @deprecated @see com.twitter.tweetypie.thriftscala.PostTweetRequest.possibly_sensitive for - * more details on why this field is ignored. - */ - 2: bool possibly_sensitive = 0 ( - strato.description='Mark this tweet as possibly containing objectionable media.' - ) -} (strato.graphql.typename='TweetMedia') - -//This is similar to the APITweetAnnotation struct except that here all the id fields are required. -struct TweetAnnotation { - 1: i64 group_id (strato.json.numbers.type='string') - 2: i64 domain_id (strato.json.numbers.type='string') - 3: i64 entity_id (strato.json.numbers.type='string') -} (strato.graphql.typename='TweetAnnotation', strato.case.format='preserve') - -struct TweetGeoCoordinates { - 1: double latitude (strato.description='The latitude of the location this Tweet refers to. The valid range for latitude is -90.0 to +90.0 (North is positive) inclusive.') - 2: double longitude (strato.description='The longitude of the location this Tweet refers to. The valid range for longitude is -180.0 to +180.0 (East is positive) inclusive.') - 3: bool display_coordinates = 1 (strato.description='Whether or not make the coordinates public. When false, geo coordinates are persisted with the Tweet but are not shared publicly.') -} (strato.graphql.typename='TweetGeoCoordinates') - -struct TweetGeo { - 1: optional TweetGeoCoordinates coordinates ( - strato.description='The geo coordinates of the location this Tweet refers to.' - ) - 2: optional string place_id ( - strato.description='A place in the world. See also https://developer.twitter.com/en/docs/twitter-api/v1/data-dictionary/object-model/geo#place' - ) - 3: optional string geo_search_request_id ( - strato.description='See https://confluence.twitter.biz/display/GEO/Passing+the+geo+search+request+ID' - ) -} ( - strato.graphql.typename='TweetGeo', - strato.description='Tweet geo location metadata. See https://developer.twitter.com/en/docs/twitter-api/v1/data-dictionary/object-model/geo' -) - -enum BatchComposeMode { - BATCH_FIRST = 1 (strato.description='This is the first Tweet in a batch.') - BATCH_SUBSEQUENT = 2 (strato.description='This is any of the subsequent Tweets in a batch.') -}( - strato.graphql.typename='BatchComposeMode', - strato.description='Indicates whether a Tweet was created using a batch composer, and if so position of a Tweet within the batch. A value of None, indicates that the tweet was not created in a batch. More info: go/batchcompose.' -) - -/** - * Conversation Controls - * See also: - * tweet.thrift/Tweet.conversation_control - * tweet_service.thrift/TweetCreateConversationControl - * tweet_service.thrift/PostTweetRequest.conversation_control - * - * These types are isomorphic/equivalent to tweet_service.thrift/TweetCreateConversationControl* to - * avoid exposing internal service thrift types. - */ -enum ConversationControlMode { - BY_INVITATION = 1 (strato.description='Users that the conversation owner mentions by @screenname in the tweet text are invited.') - COMMUNITY = 2 (strato.description='The conversation owner, invited users, and users who the conversation owner follows can reply.') -} ( - strato.graphql.typename='ConversationControlMode' -) - -struct TweetConversationControl { - 1: ConversationControlMode mode -} ( - strato.graphql.typename='TweetConversationControl', - strato.description='Specifies limits on user participation in a conversation. See also http://go/dont-at-me. Up to one value may be provided. (Conceptually this is a union, however graphql doesn\'t support union types as inputs.)' -) - -// empty for now, but intended to be populated in later iterations of the super follows project. -struct ExclusiveTweetControlOptions {} ( - strato.description='Marks a tweet as exclusive. See go/superfollows.', - strato.graphql.typename='ExclusiveTweetControlOptions', -) - -struct EditOptions { - 1: optional i64 previous_tweet_id (strato.json.numbers.type='string', strato.description='previous Tweet id') -} ( - strato.description='Edit options for a Tweet.', - strato.graphql.typename='EditOptions', -) - -struct TweetPeriscopeContext { - 1: bool is_live = 0 ( - strato.description='Indicates if the tweet contains live streaming video. A value of false is equivalent to this struct being undefined in the CreateTweetRequest.' - ) - - // Note that the REST API also defines a context_periscope_creator_id param. The GraphQL - // API infers this value from the TwitterContext Viewer.userId since it should always be - // the same as the Tweet.coreData.userId which is also inferred from Viewer.userId. -} ( - strato.description='Specifies information about live video streaming. Note that the Periscope product was shut down in March 2021, however some live video streaming features remain in the Twitter app. This struct keeps the Periscope naming convention to retain parity and traceability to other areas of the codebase that also retain the Periscope name.', - strato.graphql.typename='TweetPeriscopeContext', -) - -struct TrustedFriendsControlOptions { - 1: required i64 trusted_friends_list_id ( - strato.json.numbers.type='string', - strato.description='The ID of the Trusted Friends List whose members can view this tweet.' - ) -} ( - strato.description='Specifies information for a Trusted Friends tweet. See go/trusted-friends', - strato.graphql.typename='TrustedFriendsControlOptions', -) - -enum CollabControlType { - COLLAB_INVITATION = 1 (strato.description='This represents a CollabInvitation.') - // Note that a CollabTweet cannot be created through external graphql request, - // rather a user can create a CollabInvitation (which is automatically nullcasted) and a - // public CollabTweet will be created when all Collaborators have accepted the CollabInvitation, - // triggering a strato column to instantiate the CollabTweet directly -}( - strato.graphql.typename='CollabControlType', -) - -struct CollabControlOptions { - 1: required CollabControlType collabControlType - 2: required list collaborator_user_ids ( - strato.json.numbers.type='string', - strato.description='A list of user ids representing all Collaborators on a CollabTweet or CollabInvitation') -}( - strato.graphql.typename='CollabControlOptions', - strato.description='Specifies information about a CollabTweet or CollabInvitation (a union is used to ensure CollabControl defines one or the other). See more at go/collab-tweets.' -) - -struct NoteTweetOptions { - 1: required i64 note_tweet_id ( - strato.json.numbers.type='string', - strato.description='The ID of the Note Tweet that has to be associated with the created Tweet.') - // Deprecated - 2: optional list mentioned_screen_names ( - strato.description = 'Screen names of the users mentioned in the NoteTweet. This is used to set conversation control on the Tweet.') - - 3: optional list mentioned_user_ids ( - strato.description = 'User ids of mentioned users in the NoteTweet. This is used to set conversation control on the Tweet, send mentioned user ids to TLS' - ) - 4: optional bool is_expandable ( - strato.description = 'Specifies if the Tweet can be expanded into the NoteTweet, or if they have the same text' - ) -} ( - strato.graphql.typename='NoteTweetOptions', - strato.description='Note Tweet options for a Tweet.' -) - -// NOTE: Some clients were using the dark_request directive in GraphQL to signal that a Tweet should not be persisted -// but this is not recommended, since the dark_request directive is not meant to be used for business logic. -struct UndoOptions { - 1: required bool is_undo ( - strato.description='Set to true if the Tweet is undo-able. Tweetypie will process the Tweet but will not persist it.' - ) -} ( - strato.graphql.typename='UndoOptions' -) - -struct CreateTweetRequest { - 1: string tweet_text = "" ( - strato.description='The user-supplied text of the tweet. Defaults to empty string. Leading & trailing whitespace are trimmed, remaining value may be empty if and only if one or more media entity ids are also provided.' - ) - - // @see com.twitter.tweetypie.thriftscala.PostTweetRequest.nullcast - 2: bool nullcast = 0 ( - strato.description='Do not deliver this tweet to a user\'s followers. http://go/nullcast' - ) - - // @see com.twitter.tweetypie.thriftscala.PostTweetRequest.PostTweetRequest.comparison_id - 3: optional string comparison_id ( - strato.description='Correlates requests originating from REST endpoints and GraphQL endpoints. UUID v4 (random) 36 character string.' - ) - - // @see com.twitter.ads.callback.thriftscala.EngagementRequest - 4: optional engagement_request.EngagementRequest engagement_request ( - strato.description='The ad engagement from which this tweet was created.' - ) - - // @see com.twitter.tweetypie.thriftscala.PostTweetRequest.attachment_url - 5: optional string attachment_url ( - strato.description='Tweet permalink (i.e. Quoted Tweet) or Direct Message deep link. This URL is not included in the visible_text_range.' - ) - - // @see com.twitter.tweetypie.thriftscala.Tweet.card_reference - 6: optional string card_uri ( - strato.description='Link to the card to associate with a tweet.' - ) - - 7: optional TweetReply reply ( - strato.description='Reply parameters.' - ) - - 8: optional TweetMedia media ( - strato.description='Media parameters.' - ) - - 9: optional list semantic_annotation_ids ( - strato.description='Escherbird Annotations.' - ) - - 10: optional TweetGeo geo ( - strato.description='Tweet geo location metadata. See https://developer.twitter.com/en/docs/twitter-api/v1/data-dictionary/object-model/geo' - ) - - 11: optional BatchComposeMode batch_compose ( - strato.description='Batch Compose Mode. See go/batchcompose' - ) - - 12: optional ExclusiveTweetControlOptions exclusive_tweet_control_options ( - strato.description='When defined, this tweet will be marked as exclusive. Leave undefined to signify a regular, non-exclusive tweet. See go/superfollows.' - ) - - 13: optional TweetConversationControl conversation_control ( - strato.description='Restrict replies to this tweet. See http://go/dont-at-me-api. Only valid for conversation root tweets. Applies to all replies to this tweet.' - ) - - 14: optional TweetPeriscopeContext periscope ( - strato.description='Specifies information about live video streaming. Note that the Periscope product was shut down in March 2021, however some live video streaming features remain in the Twitter app. This struct keeps the Periscope naming convention to retain parity and traceability to other areas of the codebase that also retain the Periscope name. Note: A value of periscope.isLive=false is equivalent to this struct being left undefined.' - ) - - 15: optional TrustedFriendsControlOptions trusted_friends_control_options ( - strato.description='Trusted Friends parameters.' - ) - - 16: optional CollabControlOptions collab_control_options ( - strato.description='Collab Tweet & Collab Invitation parameters.' - ) - - 17: optional EditOptions edit_options ( - strato.description='when defined, this tweet will be marked as an edit of the tweet represented by previous_tweet_id in edit_options.' - ) - - 18: optional NoteTweetOptions note_tweet_options ( - strato.description='The Note Tweet that is to be associated with the created Tweet.', - strato.graphql.skip='true' - ) - - 19: optional UndoOptions undo_options ( - strato.description='If the user has Undo Tweets enabled, the Tweet is created so that it can be previewed by the client but is not persisted.', - ) -} (strato.graphql.typename='CreateTweetRequest') - -struct CreateTweetResponse { - 1: optional i64 tweet_id ( - strato.space='Tweet', - strato.graphql.fieldname='tweet', - strato.description='The created tweet.' - ) -} (strato.graphql.typename='CreateTweetResponse') - -struct CreateTweetResponseWithSubqueryPrefetchItems { - 1: optional CreateTweetResponse data - 2: optional existsAndPrefetch.PrefetchedData subqueryPrefetchItems -} - -// Request struct, ResponseStruct, ResponseWithPrefetchStruct -struct DeleteTweetRequest { - 1: required i64 tweet_id (strato.json.numbers.type='string') - - // @see com.twitter.tweetypie.thriftscala.PostTweetRequest.PostTweetRequest.comparison_id - 2: optional string comparison_id ( - strato.description='Correlates requests originating from REST endpoints and GraphQL endpoints. UUID v4 (random) 36 character string.' - ) -} (strato.graphql.typename='DeleteTweetRequest') - -struct DeleteTweetResponse { - 1: optional i64 tweet_id ( - strato.space='Tweet', - strato.graphql.fieldname='tweet', - strato.description='The deleted Tweet. Since the Tweet will always be not found after deletion, the TweetResult will always be empty.' - ) -} (strato.graphql.typename='DeleteTweetResponse') - -struct DeleteTweetResponseWithSubqueryPrefetchItems { - 1: optional DeleteTweetResponse data - 2: optional existsAndPrefetch.PrefetchedData subqueryPrefetchItems -} diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/unmentions/unmentions.docx b/tweetypie/common/src/thrift/com/twitter/tweetypie/unmentions/unmentions.docx new file mode 100644 index 000000000..2e658e689 Binary files /dev/null and b/tweetypie/common/src/thrift/com/twitter/tweetypie/unmentions/unmentions.docx differ diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/unmentions/unmentions.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/unmentions/unmentions.thrift deleted file mode 100644 index b01ac21d3..000000000 --- a/tweetypie/common/src/thrift/com/twitter/tweetypie/unmentions/unmentions.thrift +++ /dev/null @@ -1,9 +0,0 @@ -namespace java com.twitter.tweetypie.unmentions.thriftjava -#@ namespace scala com.twitter.tweetypie.unmentions.thriftscala -#@ namespace strato com.twitter.tweetypie.unmentions -namespace py gen.twitter.tweetypie.unmentions - -struct UnmentionData { - 1: optional i64 conversationId (personalDataType = 'TweetConversationId') - 2: optional list mentionedUsers (personalDataType = 'UserId') -} (strato.space = "Unmentions", persisted='true', hasPersonalData = 'true') \ No newline at end of file diff --git a/tweetypie/server/BUILD b/tweetypie/server/BUILD deleted file mode 100644 index 2b17d8a49..000000000 --- a/tweetypie/server/BUILD +++ /dev/null @@ -1,16 +0,0 @@ -target( - tags = ["bazel-compatible"], - dependencies = [ - "tweetypie/server/src/main/scala/com/twitter/tweetypie", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/config", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/handler", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/media", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/repository", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/service", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/store", - ], -) \ No newline at end of file diff --git a/tweetypie/server/BUILD.docx b/tweetypie/server/BUILD.docx new file mode 100644 index 000000000..f8aa5e22c Binary files /dev/null and b/tweetypie/server/BUILD.docx differ diff --git a/tweetypie/server/README.docx b/tweetypie/server/README.docx new file mode 100644 index 000000000..956006aa9 Binary files /dev/null and b/tweetypie/server/README.docx differ diff --git a/tweetypie/server/README.md b/tweetypie/server/README.md deleted file mode 100644 index 49d3c7ef7..000000000 --- a/tweetypie/server/README.md +++ /dev/null @@ -1,45 +0,0 @@ -# Tweetypie - -## Overview - -Tweetypie is the core Tweet service that handles the reading and writing of Tweet data. It is called by the Twitter clients (through GraphQL), as well as various internal Twitter services, to fetch, create, delete, and edit Tweets. Tweetypie calls several backends to hydrate Tweet related data to return to callers. - -## How It Works - -The next sections describe the layers involved in the read and create paths for Tweets. - -### Read Path - -In the read path, Tweetypie fetches the Tweet data from [Manhattan](https://blog.twitter.com/engineering/en_us/a/2014/manhattan-our-real-time-multi-tenant-distributed-database-for-twitter-scale) or [Twemcache](https://blog.twitter.com/engineering/en_us/a/2012/caching-with-twemcache), and hydrates data about the Tweet from various other backend services. - -#### Relevant Packages - -- [backends](src/main/scala/com/twitter/tweetypie/backends/): A "backend" is a wrapper around a thrift service that Tweetypie calls. For example [Talon.scala](src/main/scala/com/twitter/tweetypie/backends/Talon.scala) is the backend for Talon, the URL shortener. -- [repository](src/main/scala/com/twitter/tweetypie/repository/): A "repository" wraps a backend and provides a structured interface for retrieving data from the backend. [UrlRepository.scala](src/main/scala/com/twitter/tweetypie/repository/UrlRepository.scala) is the repository for the Talon backend. -- [hydrator](src/main/scala/com/twitter/tweetypie/hydrator/): Tweetypie doesn't store all the data associated with Tweets. For example, it doesn't store User objects, but it stores screennames in the Tweet text (as mentions). It stores media IDs, but it doesn't store the media metadata. Hydrators take the raw Tweet data from Manhattan or Cache and return it with some additional information, along with hydration metadata that says whether the hydration took place. This information is usually fetched using a repository. For example, during the hydration process, the [UrlEntityHydrator](src/main/scala/com/twitter/tweetypie/hydrator/UrlEntityHydrator.scala) calls Talon using the [UrlRepository](src/main/scala/com/twitter/tweetypie/repository/UrlRepository.scala) and fetches the expanded URLs for the t.co links in the Tweet. -- [handler](src/main/scala/com/twitter/tweetypie/handler/): A handler is a function that handles requests to one of the Tweetypie endpoints. The [GetTweetsHandler](src/main/scala/com/twitter/tweetypie/handler/GetTweetsHandler.scala) handles requests to `get_tweets`, one of the endpoints used to fetch Tweets. - -#### Through the Read Path - -At a high level, the path a `get_tweets` request takes is as follows. - -- The request is handled by [GetTweetsHandler](src/main/scala/com/twitter/tweetypie/handler/GetTweetsHandler.scala). -- GetTweetsHandler uses the TweetResultRepository (defined in [LogicalRepositories.scala](src/main/scala/com/twitter/tweetypie/config/LogicalRepositories#L301)). The TweetResultRepository has at its core a [ManhattanTweetRespository](src/main/scala/com/twitter/tweetypie/repository/ManhattanTweetRepository.scala) (that fetches the Tweet data from Manhattan), wrapped in a [CachingTweetRepository](src/main/scala/com/twitter/tweetypie/repository/ManhattanTweetRepository.scala) (that applies caching using Twemcache). Finally, the caching repository is wrapped in a hydration layer (provided by [TweetHydration.hydrateRepo](src/main/scala/com/twitter/tweetypie/hydrator/TweetHydration.scala#L789)). Essentially, the TweetResultRepository fetches the Tweet data from cache or Manhattan, and passes it through the hydration pipeline. -- The hydration pipeline is described in [TweetHydration.scala](src/main/scala/com/twitter/tweetypie/hydrator/TweetHydration.scala), where all the hydrators are combined together. - -### Write Path - -The write path follows different patterns to the read path, but reuses some of the code. - -#### Relevant Packages - -- [store](src/main/scala/com/twitter/tweetypie/store/): The store package includes the code for updating backends on write, and the coordination code for describing which backends need to be updated for which endpoints. There are two types of file in this package: stores and store modules. Files that end in Store are stores and define the logic for updating a backend, for example [ManhattanTweetStore](src/main/scala/com/twitter/tweetypie/store/ManhattanTweetStore.scala) writes Tweets to Manhattan. Most of the files that don't end in Store are store modules and define the logic for handling a write endpoint, and describe which stores are called, for example [InsertTweet](src/main/scala/com/twitter/tweetypie/store/InsertTweet.scala) which handles the `post_tweet` endpoint. Modules define which stores they call, and stores define which modules they handle. - -#### Through the Write Path - -The path a `post_tweet` request takes is as follows. - -- The request is handled in [PostTweet.scala](src/main/scala/com/twitter/tweetypie/handler/PostTweet.scala#L338). -- [TweetBuilder](src/main/scala/com/twitter/tweetypie/handler/TweetBuilder.scala) creates a Tweet from the request, after performing text processing, validation, URL shortening, media processing, checking for duplicates etc. -- [WritePathHydration.hydrateInsertTweet](src/main/scala/com/twitter/tweetypie/config/WritePathHydration.scala#L54) passes the Tweet through the hydration pipeline to return the caller. -- The Tweet data is written to various stores as described in [InsertTweet.scala](src/main/scala/com/twitter/tweetypie/store/InsertTweet.scala#L84). diff --git a/tweetypie/server/config/BUILD b/tweetypie/server/config/BUILD deleted file mode 100644 index 45a90181c..000000000 --- a/tweetypie/server/config/BUILD +++ /dev/null @@ -1,7 +0,0 @@ -resources( - sources = [ - "!**/*.pyc", - "!BUILD*", - "**/*", - ], -) diff --git a/tweetypie/server/config/BUILD.docx b/tweetypie/server/config/BUILD.docx new file mode 100644 index 000000000..25d80c950 Binary files /dev/null and b/tweetypie/server/config/BUILD.docx differ diff --git a/tweetypie/server/config/decider.docx b/tweetypie/server/config/decider.docx new file mode 100644 index 000000000..8761e3aed Binary files /dev/null and b/tweetypie/server/config/decider.docx differ diff --git a/tweetypie/server/config/decider.yml b/tweetypie/server/config/decider.yml deleted file mode 100644 index 6d3042fbc..000000000 --- a/tweetypie/server/config/decider.yml +++ /dev/null @@ -1,313 +0,0 @@ -stratofed_forward_dark_traffic: - comment: - Forward Federated Strato traffic to DarkTrafficProxy (DTP). - Note, this decider is not overrideable via Tweetypie tooling, - as it is only used by the StratoFedServer frameworkDecider instance. - Note, you cannot use this decider as a TweetypieDeciderGates. - default_availability: 0 - -tweetypie_enable_community_tweet_creates: - comment: When enable, it allows the creation of community tweets - default_availability: 10000 - -tweetypie_hydrate_scrub_engagements: - comment: Redact Tweet engagement related data (StatusCounts) from Interstital Public Interest (IPI) Tweets. - default_availability: 10000 - -tweetypie_check_spam_on_retweet: - comment: Enable Scarecrow spam check during retweet creation - default_availability: 10000 - -tweetypie_check_spam_on_tweet: - comment: Enable Scarecrow spam check during tweet creation - default_availability: 10000 - -tweetypie_conversation_control_use_feature_switch_results: - comment: - Controls whether Tweetypie uses feature switch results during conversation control parameter validation. This decider supports migration of feature switches from macaw-tweets to tweetypie. - default_availability: 0 - -tweetypie_conversation_control_tweet_create_enabled: - comment: - Controls whether we will enforce conversation control policy on tweet create. http://go/dont-at-me-backend-tdd - default_availability: 0 - -tweetypie_enable_exclusive_tweet_control_validation: - comment: - Controls whether we will restrict the exclusiveTweetControlOptions parameter to only be usable by creators. http://go/superfollows - default_availability: 0 - -tweetypie_delay_erase_user_tweets: - comment: sleep for a fixed number of seconds before deleting a page of tweets during user erasure. Used as a rate limiting mechanism. - default_availability: 5000 - -tweetypie_deny_non_tweet_permalinks: - comment: - Right now we would create a quote-tweet which would contain a non-working permalink - whereas with latest QT + media changes we're blocking the tweet creation. If tweet - from permalink is not found, we would throw an exception if this decider is on else - return a default pass-through response. - default_availability: 0 - -tweetypie_enable_trusted_friends_control_validation: - comment: Controls whether we will enforce trusted friends control policy on replies - default_availability: 0 - -tweetypie_enforce_rate_limited_clients: - comment: - Controls whether we will rate-limit low-priority clients based on per-instance requests per sec. - We enable clients for rate-limiting by setting the rate_limit to true in clients.yml - default_availability: 10000 - -tweetypie_fail_closed_in_vf: - comment: Propagate failure from backends such as Strato when running Visibility Filtering - default_availability: 0 - -tweetypie_fork_dark_traffic: - comment: Forks traffic to Darkhub - default_availability: 0 - -tweetypie_hydrate_conversation_muted: - comment: Hydrates the conversationMuted field if requested. This calls timelineservice. - default_availability: 10000 - -tweetypie_hydrate_counts: - comment: Hydrate status counts, if asked for. This calls TFlock. - default_availability: 10000 - -tweetypie_hydrate_previous_counts: - comment: Hydrate previous engagements on a tweet in an edit chain - default_availability: 0 - -tweetypie_hydrate_device_sources: - comment: Hydrate device sources. This reads from DBs. - default_availability: 10000 - -tweetypie_hydrate_escherbird_annotations: - comment: Hydrate the escherbirdEntityAnnotations additional field on tweet create. This calls the Escherbird Annotation Service. - default_availability: 10000 - -tweetypie_hydrate_gnip_profile_geo_enrichment: - comment: Hydrates each tweet with the profile geo enrichment. - default_availability: 10000 - -tweetypie_hydrate_has_media: - comment: Hydrate the hasMedia field based on whether the tweet has a media entity, a media card, or a URL that matches partner media regexes - default_availability: 10000 - -tweetypie_hydrate_media: - comment: Hydrate media entities. This calls MediaInfo Service. - default_availability: 10000 - -tweetypie_hydrate_media_refs: - comment: Hydrate MediaRefs. Calls Tweetypie for pasted media. - default_availability: 0 - -tweetypie_hydrate_media_tags: - comment: Hydrate media tags. This calls Gizmoduck for user view. - default_availability: 10000 - -tweetypie_hydrate_pasted_media: - comment: Copies media entities and media cards from tweets referenced by a media permalink url entity. - default_availability: 10000 - -tweetypie_hydrate_perspectives: - comment: Hydrate perspectival attributes, if asked for. This calls TLS, which may call TFlock if items are not in cache. - default_availability: 10000 - -tweetypie_hydrate_perspectives_edits_for_timelines: - comment: - Hydrated perspectival attributes across versions of tweet edit, - for timelines safety levels if asked for. - This results in more TLS calls (one for each version of tweet). - default_availability: 0 - -tweetypie_hydrate_perspectives_edits_for_tweet_details: - comment: - Hydrated perspectival attributes across versions of tweet edit, - for tweet detail safety levels if asked for. - This results in more TLS calls (one for each version of tweet). - default_availability: 0 - -tweetypie_hydrate_perspectives_edits_for_other_levels: - comment: - Hydrated perspectival attributes across versions of tweet edit, - for safety levels not mentioned in other deciders, if asked for. - This results in more TLS calls (one for each version of tweet). - default_availability: 0 - -tweetypie_hydrate_places: - comment: Hydrate place data, if asked for. This calls geoduck. - default_availability: 10000 - -tweetypie_jiminy_dark_requests: - comment: Enables dark requests to the Jiminy backend for the specified % of traffic - default_availability: 0 - -tweetypie_log_cache_exceptions: - comment: Enables logging of cache exceptions to loglens. - default_availability: 0 - -tweetypie_log_reads: - comment: Enables logging of reads. - default_availability: 50 - -tweetypie_log_tweet_cache_writes: - comment: Scribe a record for each cache write for tweets that pass this decider. - default_availability: 0 - -tweetypie_log_writes: - comment: Enables logging of status writes. - default_availability: 10000 - -tweetypie_log_young_tweet_cache_writes: - comment: - Scribe a record of cache writes for recently-created tweets that - pass this decider. - default_availability: 0 - -tweetypie_log_vf_dark_read_mismatches: - comment: Log mismatches from the tweetypie_dark_read_safety_labels_from_strato code path. - default_availability: 0 - -tweetypie_max_request_width_enabled: - comment: - Controls whether the max request width limit is enabled or not. - 0 means the limit is disabled, 10000 means it is turned on and - request widths > maxSize will be rejected. - default_availability: 0 - -tweetypie_media_refs_hydrator_include_pasted_media: - comment: - For debugging MediaRefsHydrator - determine if extra calls to pasted media are causing the GC issues. - default_availability: 0 - -tweetypie_prefer_forwarded_service_identifier_for_client_id: - comment: Effective client ID logic prefers forwarded service identifier to service identifier if available. - default_availability: 0 - -tweetypie_rate_limit_by_limiter_service: - comment: - Controls whether we will consult limiter service to see whether a - user is allowed to create more tweets. - default_availability: 10000 - -tweetypie_rate_limit_tweet_creation_failure: - comment: - Controls whether we rate limit tweet creation based on failed - attempts to create tweets via limiter service. This is separate - from the limit of created tweets. It is intended to prevent - unforeseen abuse by failing tweet creation attempts immediately if - the user has had too many recent tweet creation - failures. Disabling this decider will reduce traffic to limiter - service, but will remove the per-user abuse protection. - default_availability: 10000 - -tweetypie_replicate_reads_to_atla: - comment: Send reads to deferredrpc for replication to atla. We can use this to warm caches in atla. - default_availability: 0 - -tweetypie_replicate_reads_to_pdxa: - comment: Send reads to deferredrpc for replication to pdxa. We can use this to warm caches in pdxa. - default_availability: 0 - -tweetypie_disable_invite_via_mention: - comment: - Disables invite via mention field in the conversation control struct of - root tweets + reply tweets. - default_availability: 0 - -tweetypie_shed_read_traffic_voluntarily: - comment: - Preferred way to reject read requests during an incident from a subset of clients that have - volunteered to shed load. These clients have load_shed_envs set in clients.yml, often for - staging environments. Although this decider is available and clients have volunteered, should - still only be used in an emergency. - default_availability: 0 - -tweetypie_validate_card_ref_attachment_android: - comment: - When enabled tweet creates from Android consider CardReference for the TooManyAttachmentTypes error when creating tweets with more than one attachment type. - default_availability: 0 - -tweetypie_validate_card_ref_attachment_non_android: - comment: - When enabled tweet creates from non-Android consider CardReference for the TooManyAttachmentTypes error when creating tweets with more than one attachment type - default_availability: 0 - -# Additional Fields - -tweetypie_short_circuit_likely_partial_tweet_reads_ms: - comment: - Specifies a number of milliseconds before which, we short circuit likely - partial reads from MH and return NotFound tweet response state. - After experimenting decided to go with 1500 ms. - default_availability: 0 - -tweetypie_populate_quoted_tweet_results_as_contextual_tweet_ref: - comment: - CreateTweet and CreateReTweet column to return the 'quoted_tweet_results' PrefetchedItem as - ContextualTweetRef type instead of just tweetId. - This will be used during the quotedTweet.Tweet column migration (see http://go/qt-col-migration) - Post-migration, this will be removed. - default_availability: 0 -tweetypie_enable_unmentions_timeline_warmup: - comment: - When enabled, read-path calls execute an async call to the getUnmentionedUsersFromConverstion - strato column to warm the unmentioned NH/haplolite cache. - http://go/unmention-me-onepager - default_availability: 0 - -tweetypie_tweet_visibility_library_enable_parity_test: - comment: measure TVL parity against VF federated service, for a fraction of traffic - default_availability: 0 - -tweetypie_enable_vf_feature_hydration_in_quoted_tweet_visibility_library_shim: - comment: when enabled, all features are hydrated in QuotedTweetVisibilityLibrary shim - default_availability: 0 - -tweetypie_enable_remove_unmentioned_implicit_mentions: - comment: - When enabled, implicit mentions are filtered based on users that have unmentioned themselves - from the tweet's conversation. http://go/unmention-me-onepager - default_availability: 0 - -tweetypie_enable_stale_tweet_validation: - comment: Controls whether we will enforce stale tweet policy on replies and QT - default_availability: 0 - -tweetypie_disable_promoted_tweet_edit: - comment: - Controls whether we will disable edits on promoted tweets - default_availability: 0 - -tweetypie_should_materialize_containers: - comment: - When enabled, Creatives Container Service will be called to materialize container-backed tweets. - Otherwise, TP will not call CCS and return a StatusState of NotFound. - default_availability: 0 - -tweetypie_check_twitter_blue_subscription_for_edit: - comment: - Controls whether we check if the User is subscribed to Twitter Blue when editing a Tweet. - default_availability: 0 - -tweetypie_hydrate_bookmarks_count: - comment: - Controls whether we hydrate bookmarks count for a Tweet - default_availability: 0 - -tweetypie_hydrate_bookmarks_perspective: - comment: - Controls whether we request the Bookmarked perspective from TLS - default_availability: 0 - -tweetypie_set_edit_time_window_to_sixty_minutes: - comment: - Set time window in which Tweets are editable to 60 minutes - default_availability: 0 - -tweetypie_enable_federated_column_dark_traffic: - comment: - Enable dark traffic for federated column. - default_availability: 0 diff --git a/tweetypie/server/config/decider_staging.docx b/tweetypie/server/config/decider_staging.docx new file mode 100644 index 000000000..9846b6db4 Binary files /dev/null and b/tweetypie/server/config/decider_staging.docx differ diff --git a/tweetypie/server/config/decider_staging.yml b/tweetypie/server/config/decider_staging.yml deleted file mode 100644 index e69de29bb..000000000 diff --git a/tweetypie/server/config/logging/logback-all-include.docx b/tweetypie/server/config/logging/logback-all-include.docx new file mode 100644 index 000000000..3337f9bba Binary files /dev/null and b/tweetypie/server/config/logging/logback-all-include.docx differ diff --git a/tweetypie/server/config/logging/logback-all-include.xml b/tweetypie/server/config/logging/logback-all-include.xml deleted file mode 100644 index 269cb149f..000000000 --- a/tweetypie/server/config/logging/logback-all-include.xml +++ /dev/null @@ -1,23 +0,0 @@ - - - tweetypie-all.log - true - - tweetypie-all-%i.log - 1 - 20 - - - 800MB - - - - %date [%thread] %-5level %logger{36} - %msg%n - - - - - - - - diff --git a/tweetypie/server/config/logging/logback-without-loglens.docx b/tweetypie/server/config/logging/logback-without-loglens.docx new file mode 100644 index 000000000..809024624 Binary files /dev/null and b/tweetypie/server/config/logging/logback-without-loglens.docx differ diff --git a/tweetypie/server/config/logging/logback-without-loglens.xml b/tweetypie/server/config/logging/logback-without-loglens.xml deleted file mode 100644 index b2e6c4d6a..000000000 --- a/tweetypie/server/config/logging/logback-without-loglens.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - true - - - - - - - - diff --git a/tweetypie/server/config/logging/logback.docx b/tweetypie/server/config/logging/logback.docx new file mode 100644 index 000000000..12ad98bd5 Binary files /dev/null and b/tweetypie/server/config/logging/logback.docx differ diff --git a/tweetypie/server/config/logging/logback.xml b/tweetypie/server/config/logging/logback.xml deleted file mode 100644 index 04d686c20..000000000 --- a/tweetypie/server/config/logging/logback.xml +++ /dev/null @@ -1,146 +0,0 @@ - - - - - true - - - - - - - - tweetypie-important.log - true - - tweetypie-important-%i.log - 1 - 20 - - - 800MB - - - - - %date [%thread] %-5level %logger{36} - %msg%n - - - - - - - - - - true - ${log.lens.tag} - ${log.lens.index} - loglens - - %msg - - - - - - - - - alertable-exception.log - true - - alertable-exception-%i.log - 1 - 17 - - - 100MB - - - - - %date [%thread] %-5level %logger{36} - %msg%n - - - - - - - - - - true - ${log.lens.tag} - ${log.lens.index} - loglens - - ${ALERTABLE_MESSAGE_FORMAT} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tweetypie/server/config/partner_media.docx b/tweetypie/server/config/partner_media.docx new file mode 100644 index 000000000..6e0a9649a Binary files /dev/null and b/tweetypie/server/config/partner_media.docx differ diff --git a/tweetypie/server/config/partner_media.yml b/tweetypie/server/config/partner_media.yml deleted file mode 100644 index f737dd7c1..000000000 --- a/tweetypie/server/config/partner_media.yml +++ /dev/null @@ -1,30 +0,0 @@ -http_or_https: - - 'vine\.co/v/[a-zA-Z0-9]+' - - 'amp\.twimg\.com/' - - '(www\.)?dailymotion.com/video/[a-zA-Z0-9_\-/]+' - - '(www\.)?dai.ly/[a-zA-Z0-9_\-/]+' - - '(www\.)?youtu\.be/[a-zA-Z0-9_\-\?\&\=/]+' - - '(www\.)?youtube\.com/watch[a-zA-Z0-9_\-\?\&\=/]+' - - '(www\.)?ustream\.tv/recorded/\d+' - - '(www\.)?vevo\.com/watch/[\w-]+/[\w-]+/[a-zA-Z0-9_]+' - - '(www\.)?flickr\.com/photos/[\w\@\-]+/\d+/?' - - '(www\.)?flic\.kr/p/[A-Z0-9a-z\-]+' - - '([\w\-]+\.)deviantart\.com/(art|deviation|view)/[\w\@-]+' - - '(www\.)?vimeo\.com/\d+' - - '(www\.)?photozou\.(com|jp)/photo/show/\d+/\d+' - - '(www\.)?twitpic\.com/(?!(place|photos|events)/)([a-zA-Z0-9\?\=\-]+)' - - '(www\.)?mtv\.com/videos/([a-z0-9\-\_]+/)+[0-9]+/[a-z0-9\-\_]+\.jhtml(#[a-z0-9\=\&]+)?' - - '([\w\-\_]+\.)?washingtonpost\.com/wp-dyn/content/video/\d{4}/\d{2}/\d{2}/VI\d+\.html([a-zA-Z0-9_#\.\-\?\&\=/]+)?' - - '([\w\-\_]+\.)?msnbc\.msn\.com/id/\d{1,8}/vp/\d{1,8}([a-zA-Z0-9_#\.\-\?\&\=/]+)?' - - '((www|edition|us)\.)?cnn\.com/video/[\?|#]/[a-zA-Z0-9_#\.\-\?\&\=/]+' - - 'itunes\.apple\.com(/[a-z][a-z])?/(music-)?video/' - - '(www\.)?blip\.tv/((file/[\w-]+)|(([\w-]+/)?[\w-]+-\d+))/?' - - 'online\.wsj\.com/video/[A-Z0-9a-z\-]+/[A-Z0-9a-z\-]+\.html' - - '(www\.)?hulu\.com/w(atch)?/[a-zA-Z0-9]+' - - 'video\.([a-z]{4,11}\.)?nhl\.com/videocenter/console\?(((catid=-?\d+&)?id=\d+)|(hlg=\d{8},\d,\d{1,4}(&event=[A-Z0-9]{4,6})?)|(hlp=\d{5,10}(&event=[A-Z0-9]{4,6})?))' - - '([a-zA-Z0-9\-]+\.)*grabyo\.com/((g/v/[a-zA-Z0-9]{11})|((studio/)?studiotimeline\.jsp\?shareId=[a-zA-Z0-9]{11}))[a-zA-Z0-9_?&=#:%/\.\-]*' - -http_only: - - 'on\.nba\.com/[a-zA-Z0-9]+' - - 'on\.nfl\.com/[a-zA-Z0-9]+' - - 'snpy\.tv/[a-zA-Z0-9]+' diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/BUILD deleted file mode 100644 index 261fcb099..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/BUILD +++ /dev/null @@ -1,19 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - platform = "java8", - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "mediaservices/commons/src/main/thrift:thrift-scala", - "tweetypie/servo/util", - "snowflake:id", - "src/thrift/com/twitter/gizmoduck:thrift-scala", - "src/thrift/com/twitter/gizmoduck:user-thrift-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", - "tweetypie/server/src/main/thrift:compiled-scala", - "tweetypie/common/src/scala/com/twitter/tweetypie/util", - "util/util-slf4j-api", - ], -) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/BUILD.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/BUILD.docx new file mode 100644 index 000000000..e50b216a7 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/BUILD.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/BUILD deleted file mode 100644 index ab03f48f3..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/BUILD +++ /dev/null @@ -1,48 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - platform = "java8", - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/com/twitter/bijection:thrift", - "3rdparty/jvm/org/apache/thrift:libthrift", - "configbus/client/src/main/scala/com/twitter/configbus/client", - "creatives-container/thrift/src/main/thrift:creatives-container-service-scala", - "finagle/finagle-core/src/main", - "finagle/finagle-thriftmux/src/main/scala", - # "finatra-internal/kafka/src/main/scala/com/twitter/finatra/kafka/producers", - "finatra-internal/messaging/kafka/src/main/scala", - "finatra-internal/thrift/src/main/thrift:thrift-scala", - "flock-client/src/main/scala", - "flock-client/src/main/thrift:thrift-scala", - # "kafka/finagle-kafka/finatra-kafka/src/main/scala", - "limiter/thrift-only/src/main/thrift:thrift-scala", - "mediaservices/mediainfo-server/thrift/src/main/thrift:thrift-scala", - "tweetypie/servo/util", - "src/thrift/com/twitter/dataproducts:service-scala", - "src/thrift/com/twitter/escherbird:annotation-service-scala", - "src/thrift/com/twitter/escherbird:tweet-annotation-scala", - "src/thrift/com/twitter/escherbird/metadata:metadata-service-scala", - "src/thrift/com/twitter/expandodo:only-scala", - "src/thrift/com/twitter/gizmoduck:thrift-scala", - "src/thrift/com/twitter/gizmoduck:user-thrift-scala", - "src/thrift/com/twitter/service/scarecrow/gen:scarecrow-scala", - "src/thrift/com/twitter/service/scarecrow/gen:tiered-actions-scala", - "src/thrift/com/twitter/service/talon/gen:thrift-scala", - "src/thrift/com/twitter/servo:servo-exception-scala", - "src/thrift/com/twitter/socialgraph:thrift-scala", - "src/thrift/com/twitter/timelineservice:thrift-scala", - "src/thrift/com/twitter/timelineservice/server/internal:thrift-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", - "stitch/stitch-core", - "storage/clients/manhattan/client/src/main/scala", - "tweetypie/server/src/main/scala/com/twitter/tweetypie", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", - "tweetypie/common/src/scala/com/twitter/tweetypie/storage", - "tweetypie/common/src/scala/com/twitter/tweetypie/util", - "user-image-service/thrift/src/main/thrift:thrift-scala", - "util/util-stats/src/main/scala", - ], -) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/BUILD.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/BUILD.docx new file mode 100644 index 000000000..34c8c60de Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/BUILD.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Backend.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Backend.docx new file mode 100644 index 000000000..f405ac3e0 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Backend.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Backend.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Backend.scala deleted file mode 100644 index 2daa79e87..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Backend.scala +++ /dev/null @@ -1,172 +0,0 @@ -package com.twitter.tweetypie -package backends - -import com.twitter.finagle.context.Deadline -import com.twitter.finagle.service.RetryBudget -import com.twitter.finagle.service.RetryPolicy -import com.twitter.servo.util.FutureArrow -import com.twitter.servo.util.RetryHandler -import com.twitter.tweetypie.core.OverCapacity -import com.twitter.util.Timer -import com.twitter.util.TimeoutException - -object Backend { - val log: Logger = Logger(getClass) - - /** - * Common stuff that is needed as part of the configuration of all - * of the backends. - */ - case class Context(val timer: Timer, val stats: StatsReceiver) - - /** - * All backend operations are encapsulated in the FutureArrow type. The Builder type - * represents functions that can decorate the FutureArrow, typically by calling the various - * combinator methods on FutureArrow. - */ - type Builder[A, B] = FutureArrow[A, B] => FutureArrow[A, B] - - /** - * A Policy defines some behavior to apply to a FutureArrow that wraps an endpoint. - */ - trait Policy { - - /** - * Using an endpoint name and Context, returns a Builder that does the actual - * application of the policy to the FutureArrow. - */ - def apply[A, B](name: String, ctx: Context): Builder[A, B] - - /** - * Sequentially combines policies, first applying this policy and then applying - * the next policy. Order matters! For example, to retry on timeouts, the FailureRetryPolicy - * needs to be applied after the TimeoutPolicy: - * - * TimeoutPolicy(100.milliseconds) >>> FailureRetryPolicy(retryPolicy) - */ - def andThen(next: Policy): Policy = { - val first = this - new Policy { - def apply[A, B](name: String, ctx: Context): Builder[A, B] = - first(name, ctx).andThen(next(name, ctx)) - - override def toString = s"$first >>> $next" - } - } - - /** - * An alias for `andThen`. - */ - def >>>(next: Policy): Policy = andThen(next) - } - - /** - * Applies a timeout to the underlying FutureArrow. - */ - case class TimeoutPolicy(timeout: Duration) extends Policy { - def apply[A, B](name: String, ctx: Context): Builder[A, B] = { - val stats = ctx.stats.scope(name) - val ex = new TimeoutException(name + ": " + timeout) - (_: FutureArrow[A, B]).raiseWithin(ctx.timer, timeout, ex) - } - } - - /** - * Attaches a RetryHandler with the given RetryPolicy to retry failures. - */ - case class FailureRetryPolicy( - retryPolicy: RetryPolicy[Try[Nothing]], - retryBudget: RetryBudget = RetryBudget()) - extends Policy { - def apply[A, B](name: String, ctx: Context): Builder[A, B] = { - val stats = ctx.stats.scope(name) - (_: FutureArrow[A, B]) - .retry(RetryHandler.failuresOnly(retryPolicy, ctx.timer, stats, retryBudget)) - } - } - - /** - * This policy applies standardized endpoint metrics. This should be used with every endpoint. - */ - case object TrackPolicy extends Policy { - def apply[A, B](name: String, ctx: Context): Builder[A, B] = { - val stats = ctx.stats.scope(name) - (_: FutureArrow[A, B]) - .onFailure(countOverCapacityExceptions(stats)) - .trackOutcome(ctx.stats, (_: A) => name) - .trackLatency(ctx.stats, (_: A) => name) - } - } - - /** - * The default "policy" for timeouts, retries, exception counting, latency tracking, etc. to - * apply to each backend operation. This returns a Builder type (an endofunction on FutureArrow), - * which can be composed with other Builders via simple function composition. - */ - def defaultPolicy[A, B]( - name: String, - requestTimeout: Duration, - retryPolicy: RetryPolicy[Try[B]], - ctx: Context, - retryBudget: RetryBudget = RetryBudget(), - totalTimeout: Duration = Duration.Top, - exceptionCategorizer: Throwable => Option[String] = _ => None - ): Builder[A, B] = { - val scopedStats = ctx.stats.scope(name) - val requestTimeoutException = new TimeoutException( - s"$name: hit request timeout of $requestTimeout" - ) - val totalTimeoutException = new TimeoutException(s"$name: hit total timeout of $totalTimeout") - base => - base - .raiseWithin( - ctx.timer, - // We defer to a per-request deadline. When the deadline is missing or wasn't toggled, - // 'requestTimeout' is used instead. This mimics the behavior happening within a standard - // Finagle client stack and its 'TimeoutFilter'. - Deadline.currentToggled.fold(requestTimeout)(_.remaining), - requestTimeoutException - ) - .retry(RetryHandler(retryPolicy, ctx.timer, scopedStats, retryBudget)) - .raiseWithin(ctx.timer, totalTimeout, totalTimeoutException) - .onFailure(countOverCapacityExceptions(scopedStats)) - .trackOutcome(ctx.stats, (_: A) => name, exceptionCategorizer) - .trackLatency(ctx.stats, (_: A) => name) - } - - /** - * An onFailure FutureArrow callback that counts OverCapacity exceptions to a special counter. - * These will also be counted as failures and by exception class name, but having a special - * counter for this is easier to use in success rate computations where you want to factor out - * backpressure responses. - */ - def countOverCapacityExceptions[A](scopedStats: StatsReceiver): (A, Throwable) => Unit = { - val overCapacityCounter = scopedStats.counter("over_capacity") - - { - case (_, ex: OverCapacity) => overCapacityCounter.incr() - case _ => () - } - } - - /** - * Provides a simple mechanism for applying a Policy to an endpoint FutureArrow from - * an underlying service interface. - */ - class PolicyAdvocate[S](backendName: String, ctx: Backend.Context, svc: S) { - - /** - * Tacks on the TrackPolicy to the given base policy, and then applies the policy to - * a FutureArrow. This is more of a convenience method that every Backend can use to - * build the fully configured FutureArrow. - */ - def apply[A, B]( - endpointName: String, - policy: Policy, - endpoint: S => FutureArrow[A, B] - ): FutureArrow[A, B] = { - log.info(s"appling policy to $backendName.$endpointName: $policy") - policy.andThen(TrackPolicy)(endpointName, ctx)(endpoint(svc)) - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/ConfigBus.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/ConfigBus.docx new file mode 100644 index 000000000..da77aa9dd Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/ConfigBus.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/ConfigBus.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/ConfigBus.scala deleted file mode 100644 index f77ad3d77..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/ConfigBus.scala +++ /dev/null @@ -1,50 +0,0 @@ -package com.twitter.tweetypie.backends - -import com.twitter.configbus.client.ConfigbusClientException -import com.twitter.configbus.client.file.PollingConfigSourceBuilder -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.logging.Logger -import com.twitter.util.Activity -import com.twitter.util.Activity._ -import com.twitter.conversions.DurationOps._ -import com.twitter.io.Buf - -trait ConfigBus { - def file(path: String): Activity[String] -} - -object ConfigBus { - private[this] val basePath = "appservices/tweetypie" - private[this] val log = Logger(getClass) - - def apply(stats: StatsReceiver, instanceId: Int, instanceCount: Int): ConfigBus = { - - val client = PollingConfigSourceBuilder() - .statsReceiver(stats) - .pollPeriod(30.seconds) - .instanceId(instanceId) - .numberOfInstances(instanceCount) - .build() - - val validBuffer = stats.counter("valid_buffer") - - def subscribe(path: String) = - client.subscribe(s"$basePath/$path").map(_.configs).map { - case Buf.Utf8(string) => - validBuffer.incr() - string - } - - new ConfigBus { - def file(path: String): Activity[String] = { - val changes = subscribe(path).run.changes.dedupWith { - case (Failed(e1: ConfigbusClientException), Failed(e2: ConfigbusClientException)) => - e1.getMessage == e2.getMessage - case other => - false - } - Activity(changes) - } - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/CreativesContainerService.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/CreativesContainerService.docx new file mode 100644 index 000000000..f76cfec67 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/CreativesContainerService.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/CreativesContainerService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/CreativesContainerService.scala deleted file mode 100644 index 781e2ad81..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/CreativesContainerService.scala +++ /dev/null @@ -1,71 +0,0 @@ -package com.twitter.tweetypie.backends - -import com.twitter.container.{thriftscala => ccs} -import com.twitter.finagle.Backoff -import com.twitter.finagle.service.RetryPolicy -import com.twitter.finatra.thrift.thriftscala.ServerError -import com.twitter.finatra.thrift.thriftscala.ServerErrorCause -import com.twitter.servo.util.FutureArrow -import com.twitter.tweetypie.Duration -import com.twitter.tweetypie.Future -import com.twitter.tweetypie.Try -import com.twitter.tweetypie.util.RetryPolicyBuilder -import com.twitter.tweetypie.{thriftscala => tp} -import com.twitter.util.Throw - -object CreativesContainerService { - import Backend._ - - type MaterializeAsTweet = FutureArrow[ccs.MaterializeAsTweetRequests, Seq[tp.GetTweetResult]] - type MaterializeAsTweetFields = - FutureArrow[ccs.MaterializeAsTweetFieldsRequests, Seq[tp.GetTweetFieldsResult]] - - def fromClient( - client: ccs.CreativesContainerService.MethodPerEndpoint - ): CreativesContainerService = - new CreativesContainerService { - val materializeAsTweet: MaterializeAsTweet = FutureArrow(client.materializeAsTweets) - val materializeAsTweetFields: MaterializeAsTweetFields = FutureArrow( - client.materializeAsTweetFields) - - def ping(): Future[Unit] = client.materializeAsTweets(ccs.MaterializeAsTweetRequests()).unit - } - - case class Config( - requestTimeout: Duration, - timeoutBackoffs: Stream[Duration], - serverErrorBackoffs: Stream[Duration]) { - def apply(svc: CreativesContainerService, ctx: Backend.Context): CreativesContainerService = - new CreativesContainerService { - override val materializeAsTweet: MaterializeAsTweet = - policy("materializeAsTweets", ctx)(svc.materializeAsTweet) - - override val materializeAsTweetFields: MaterializeAsTweetFields = - policy("materializeAsTweetFields", ctx)(svc.materializeAsTweetFields) - - override def ping(): Future[Unit] = svc.ping() - } - - private[this] def policy[A, B](name: String, ctx: Context): Builder[A, B] = - defaultPolicy(name, requestTimeout, retryPolicy, ctx) - - private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = - RetryPolicy.combine[Try[B]]( - RetryPolicyBuilder.timeouts[B](timeoutBackoffs), - RetryPolicy.backoff(Backoff.fromStream(serverErrorBackoffs)) { - case Throw(ex: ServerError) if ex.errorCause != ServerErrorCause.NotImplemented => true - } - ) - - implicit val warmup: Warmup[CreativesContainerService] = - Warmup[CreativesContainerService]("creativesContainerService")(_.ping()) - } -} - -trait CreativesContainerService { - import CreativesContainerService._ - - val materializeAsTweet: MaterializeAsTweet - val materializeAsTweetFields: MaterializeAsTweetFields - def ping(): Future[Unit] -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Escherbird.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Escherbird.docx new file mode 100644 index 000000000..bb2e4c59e Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Escherbird.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Escherbird.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Escherbird.scala deleted file mode 100644 index fc9e1acc3..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Escherbird.scala +++ /dev/null @@ -1,43 +0,0 @@ -package com.twitter.tweetypie -package backends - -import com.twitter.escherbird.thriftscala.TweetEntityAnnotation -import com.twitter.escherbird.{thriftscala => escherbird} -import com.twitter.finagle.service.RetryPolicy -import com.twitter.servo.util.FutureArrow -import com.twitter.tweetypie.util.RetryPolicyBuilder - -object Escherbird { - import Backend._ - - type Annotate = FutureArrow[Tweet, Seq[TweetEntityAnnotation]] - - def fromClient(client: escherbird.TweetEntityAnnotationService.MethodPerEndpoint): Escherbird = - new Escherbird { - val annotate = FutureArrow(client.annotate) - } - - case class Config(requestTimeout: Duration, timeoutBackoffs: Stream[Duration]) { - - def apply(svc: Escherbird, ctx: Backend.Context): Escherbird = - new Escherbird { - val annotate: FutureArrow[Tweet, Seq[TweetEntityAnnotation]] = - policy("annotate", requestTimeout, ctx)(svc.annotate) - } - - private[this] def policy[A, B]( - name: String, - requestTimeout: Duration, - ctx: Context - ): Builder[A, B] = - defaultPolicy(name, requestTimeout, retryPolicy, ctx) - - private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = - RetryPolicyBuilder.timeouts[Any](timeoutBackoffs) - } -} - -trait Escherbird { - import Escherbird._ - val annotate: Annotate -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Expandodo.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Expandodo.docx new file mode 100644 index 000000000..d68076bd8 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Expandodo.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Expandodo.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Expandodo.scala deleted file mode 100644 index 10cdc28e1..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Expandodo.scala +++ /dev/null @@ -1,83 +0,0 @@ -package com.twitter.tweetypie -package backends - -import com.twitter.expandodo.thriftscala.AttachmentEligibilityRequest -import com.twitter.expandodo.thriftscala.AttachmentEligibilityResponses -import com.twitter.expandodo.thriftscala.Card2Request -import com.twitter.expandodo.thriftscala.Card2RequestOptions -import com.twitter.expandodo.thriftscala.Card2Responses -import com.twitter.expandodo.thriftscala.CardsResponse -import com.twitter.expandodo.thriftscala.GetCardUsersRequests -import com.twitter.expandodo.thriftscala.GetCardUsersResponses -import com.twitter.expandodo.{thriftscala => expandodo} -import com.twitter.finagle.Backoff -import com.twitter.finagle.service.RetryPolicy -import com.twitter.servo.util.FutureArrow -import com.twitter.tweetypie.util.RetryPolicyBuilder - -object Expandodo { - import Backend._ - - type GetCards = FutureArrow[Set[String], collection.Map[String, expandodo.CardsResponse]] - type GetCards2 = FutureArrow[ - (Seq[expandodo.Card2Request], expandodo.Card2RequestOptions), - expandodo.Card2Responses - ] - type GetCardUsers = FutureArrow[expandodo.GetCardUsersRequests, expandodo.GetCardUsersResponses] - type CheckAttachmentEligibility = - FutureArrow[Seq[ - expandodo.AttachmentEligibilityRequest - ], expandodo.AttachmentEligibilityResponses] - - def fromClient(client: expandodo.CardsService.MethodPerEndpoint): Expandodo = - new Expandodo { - val getCards = FutureArrow(client.getCards _) - val getCards2 = FutureArrow((client.getCards2 _).tupled) - val getCardUsers = FutureArrow(client.getCardUsers _) - val checkAttachmentEligibility = FutureArrow(client.checkAttachmentEligibility _) - } - - case class Config( - requestTimeout: Duration, - timeoutBackoffs: Stream[Duration], - serverErrorBackoffs: Stream[Duration]) { - def apply(svc: Expandodo, ctx: Backend.Context): Expandodo = - new Expandodo { - val getCards: FutureArrow[Set[String], collection.Map[String, CardsResponse]] = - policy("getCards", ctx)(svc.getCards) - val getCards2: FutureArrow[(Seq[Card2Request], Card2RequestOptions), Card2Responses] = - policy("getCards2", ctx)(svc.getCards2) - val getCardUsers: FutureArrow[GetCardUsersRequests, GetCardUsersResponses] = - policy("getCardUsers", ctx)(svc.getCardUsers) - val checkAttachmentEligibility: FutureArrow[Seq[ - AttachmentEligibilityRequest - ], AttachmentEligibilityResponses] = - policy("checkAttachmentEligibility", ctx)(svc.checkAttachmentEligibility) - } - - private[this] def policy[A, B](name: String, ctx: Context): Builder[A, B] = - defaultPolicy(name, requestTimeout, retryPolicy, ctx) - - private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = - RetryPolicy.combine[Try[B]]( - RetryPolicyBuilder.timeouts[B](timeoutBackoffs), - RetryPolicy.backoff(Backoff.fromStream(serverErrorBackoffs)) { - case Throw(ex: expandodo.InternalServerError) => true - } - ) - } - - implicit val warmup: Warmup[Expandodo] = - Warmup[Expandodo]("expandodo")( - _.getCards2((Seq.empty, expandodo.Card2RequestOptions("iPhone-13"))) - ) -} - -trait Expandodo { - import Expandodo._ - - val getCards: GetCards - val getCards2: GetCards2 - val getCardUsers: GetCardUsers - val checkAttachmentEligibility: CheckAttachmentEligibility -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GeoScrubEventStore.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GeoScrubEventStore.docx new file mode 100644 index 000000000..a0a433067 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GeoScrubEventStore.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GeoScrubEventStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GeoScrubEventStore.scala deleted file mode 100644 index e05d9950e..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GeoScrubEventStore.scala +++ /dev/null @@ -1,84 +0,0 @@ -package com.twitter.tweetypie -package backends - -import com.twitter.servo.util.FutureArrow -import com.twitter.stitch.Stitch -import com.twitter.storage.client.manhattan.bijections.Bijections._ -import com.twitter.storage.client.manhattan.kv._ -import com.twitter.storage.client.manhattan.kv.impl._ -import com.twitter.util.Time - -/** - * Read and write the timestamp of the last delete_location_data request - * for a user. This is used as a safeguard to prevent leaking geo data - * with tweets that have not yet been scrubbed or were missed during the - * geo scrubbing process. - */ -object GeoScrubEventStore { - type GetGeoScrubTimestamp = UserId => Stitch[Option[Time]] - type SetGeoScrubTimestamp = FutureArrow[(UserId, Time), Unit] - - private[this] val KeyDesc = - KeyDescriptor( - Component(LongInjection), - Component(LongInjection, StringInjection) - ).withDataset("geo_scrub") - - private[this] val ValDesc = ValueDescriptor(LongInjection) - - // This modulus determines how user ids get assigned to PKeys, and - // thus to shards within the MH cluster. The origin of the specific - // value has been lost to time, but it's important that we don't - // change it, or else the existing data will be inaccessible. - private[this] val PKeyModulus: Long = 25000L - - private[this] def toKey(userId: Long) = - KeyDesc - .withPkey(userId % PKeyModulus) - .withLkey(userId, "_last_scrub") - - def apply(client: ManhattanKVClient, config: Config, ctx: Backend.Context): GeoScrubEventStore = { - new GeoScrubEventStore { - val getGeoScrubTimestamp: UserId => Stitch[Option[Time]] = { - val endpoint = config.read.endpoint(client) - - (userId: UserId) => { - endpoint - .get(toKey(userId), ValDesc) - .map(_.map(value => Time.fromMilliseconds(value.contents))) - } - } - - val setGeoScrubTimestamp: SetGeoScrubTimestamp = { - val endpoint = config.write.endpoint(client) - - FutureArrow { - case (userId, timestamp) => - val key = toKey(userId) - - // Use the geo scrub timestamp as the MH entry timestamp. This - // ensures that whatever timestamp is highest will win any - // update races. - val value = ValDesc.withValue(timestamp.inMilliseconds, timestamp) - Stitch.run(endpoint.insert(key, value)) - } - } - } - } - - case class EndpointConfig(requestTimeout: Duration, maxRetryCount: Int) { - def endpoint(client: ManhattanKVClient): ManhattanKVEndpoint = - ManhattanKVEndpointBuilder(client) - .defaultMaxTimeout(requestTimeout) - .maxRetryCount(maxRetryCount) - .build() - } - - case class Config(read: EndpointConfig, write: EndpointConfig) -} - -trait GeoScrubEventStore { - import GeoScrubEventStore._ - val getGeoScrubTimestamp: GetGeoScrubTimestamp - val setGeoScrubTimestamp: SetGeoScrubTimestamp -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Gizmoduck.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Gizmoduck.docx new file mode 100644 index 000000000..d7f166ea4 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Gizmoduck.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Gizmoduck.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Gizmoduck.scala deleted file mode 100644 index 79f519250..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Gizmoduck.scala +++ /dev/null @@ -1,93 +0,0 @@ -package com.twitter.tweetypie -package backends - -import com.twitter.finagle.Backoff -import com.twitter.finagle.service.RetryPolicy -import com.twitter.gizmoduck.thriftscala.CountsUpdateField -import com.twitter.gizmoduck.thriftscala.LookupContext -import com.twitter.gizmoduck.thriftscala.ModifiedUser -import com.twitter.gizmoduck.thriftscala.UserResult -import com.twitter.gizmoduck.{thriftscala => gd} -import com.twitter.servo.util.FutureArrow -import com.twitter.tweetypie.core.OverCapacity -import com.twitter.tweetypie.util.RetryPolicyBuilder - -object Gizmoduck { - import Backend._ - - type GetById = FutureArrow[(gd.LookupContext, Seq[UserId], Set[UserField]), Seq[gd.UserResult]] - type GetByScreenName = - FutureArrow[(gd.LookupContext, Seq[String], Set[UserField]), Seq[gd.UserResult]] - type IncrCount = FutureArrow[(UserId, gd.CountsUpdateField, Int), Unit] - type ModifyAndGet = FutureArrow[(gd.LookupContext, UserId, gd.ModifiedUser), gd.User] - - def fromClient(client: gd.UserService.MethodPerEndpoint): Gizmoduck = - new Gizmoduck { - val getById = FutureArrow((client.get _).tupled) - val getByScreenName = FutureArrow((client.getByScreenName _).tupled) - val incrCount = FutureArrow((client.incrCount _).tupled) - val modifyAndGet = FutureArrow((client.modifyAndGet _).tupled) - def ping(): Future[Unit] = client.get(gd.LookupContext(), Seq.empty, Set.empty).unit - } - - case class Config( - readTimeout: Duration, - writeTimeout: Duration, - modifyAndGetTimeout: Duration, - modifyAndGetTimeoutBackoffs: Stream[Duration], - defaultTimeoutBackoffs: Stream[Duration], - gizmoduckExceptionBackoffs: Stream[Duration]) { - - def apply(svc: Gizmoduck, ctx: Backend.Context): Gizmoduck = - new Gizmoduck { - val getById: FutureArrow[(LookupContext, Seq[UserId], Set[UserField]), Seq[UserResult]] = - policy("getById", readTimeout, ctx)(svc.getById) - val getByScreenName: FutureArrow[(LookupContext, Seq[String], Set[UserField]), Seq[ - UserResult - ]] = policy("getByScreenName", readTimeout, ctx)(svc.getByScreenName) - val incrCount: FutureArrow[(UserId, CountsUpdateField, Int), Unit] = - policy("incrCount", writeTimeout, ctx)(svc.incrCount) - val modifyAndGet: FutureArrow[(LookupContext, UserId, ModifiedUser), User] = policy( - "modifyAndGet", - modifyAndGetTimeout, - ctx, - timeoutBackoffs = modifyAndGetTimeoutBackoffs - )(svc.modifyAndGet) - def ping(): Future[Unit] = svc.ping() - } - - private[this] def policy[A, B]( - name: String, - requestTimeout: Duration, - ctx: Context, - timeoutBackoffs: Stream[Duration] = defaultTimeoutBackoffs - ): Builder[A, B] = - translateExceptions andThen - defaultPolicy(name, requestTimeout, retryPolicy(timeoutBackoffs), ctx) - - private[this] def translateExceptions[A, B]: Builder[A, B] = - _.translateExceptions { - case gd.OverCapacity(msg) => OverCapacity(s"gizmoduck: $msg") - } - - private[this] def retryPolicy[B](timeoutBackoffs: Stream[Duration]): RetryPolicy[Try[B]] = - RetryPolicy.combine[Try[B]]( - RetryPolicyBuilder.timeouts[B](timeoutBackoffs), - RetryPolicy.backoff(Backoff.fromStream(gizmoduckExceptionBackoffs)) { - case Throw(ex: gd.InternalServerError) => true - } - ) - } - - implicit val warmup: Warmup[Gizmoduck] = - Warmup[Gizmoduck]("gizmoduck")(_.ping()) -} - -trait Gizmoduck { - import Gizmoduck._ - val getById: GetById - val getByScreenName: GetByScreenName - val incrCount: IncrCount - val modifyAndGet: ModifyAndGet - def ping(): Future[Unit] -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GnipEnricherator.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GnipEnricherator.docx new file mode 100644 index 000000000..f2c217778 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GnipEnricherator.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GnipEnricherator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GnipEnricherator.scala deleted file mode 100644 index 3b716c5b1..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GnipEnricherator.scala +++ /dev/null @@ -1,42 +0,0 @@ -package com.twitter.tweetypie -package backends - -import com.twitter.conversions.PercentOps._ -import com.twitter.conversions.DurationOps._ -import com.twitter.dataproducts.enrichments.thriftscala._ -import com.twitter.dataproducts.enrichments.thriftscala.Enricherator -import com.twitter.finagle.thriftmux.MethodBuilder -import com.twitter.servo.util.FutureArrow - -object GnipEnricherator { - - type HydrateProfileGeo = FutureArrow[ProfileGeoRequest, Seq[ProfileGeoResponse]] - - private def methodPerEndpoint(methodBuilder: MethodBuilder) = - Enricherator.MethodPerEndpoint( - methodBuilder - .servicePerEndpoint[Enricherator.ServicePerEndpoint] - .withHydrateProfileGeo( - methodBuilder - .withTimeoutTotal(300.milliseconds) - .withTimeoutPerRequest(100.milliseconds) - .idempotent(maxExtraLoad = 1.percent) - .servicePerEndpoint[Enricherator.ServicePerEndpoint](methodName = "hydrateProfileGeo") - .hydrateProfileGeo - ) - ) - - def fromMethod(methodBuilder: MethodBuilder): GnipEnricherator = { - val mpe = methodPerEndpoint(methodBuilder) - - new GnipEnricherator { - override val hydrateProfileGeo: HydrateProfileGeo = - FutureArrow(mpe.hydrateProfileGeo) - } - } -} - -trait GnipEnricherator { - import GnipEnricherator._ - val hydrateProfileGeo: HydrateProfileGeo -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterBackend.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterBackend.docx new file mode 100644 index 000000000..c00692d23 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterBackend.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterBackend.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterBackend.scala deleted file mode 100644 index 3bfe1a682..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterBackend.scala +++ /dev/null @@ -1,55 +0,0 @@ -package com.twitter.tweetypie -package backends - -import com.twitter.finagle.service.RetryPolicy -import com.twitter.limiter.thriftscala.FeatureRequest -import com.twitter.limiter.thriftscala.Usage -import com.twitter.limiter.{thriftscala => ls} -import com.twitter.servo.util.FutureArrow -import com.twitter.tweetypie.util.RetryPolicyBuilder - -object LimiterBackend { - import Backend._ - - type IncrementFeature = FutureArrow[(ls.FeatureRequest, Int), Unit] - type GetFeatureUsage = FutureArrow[ls.FeatureRequest, ls.Usage] - - def fromClient(client: ls.LimitService.MethodPerEndpoint): LimiterBackend = - new LimiterBackend { - val incrementFeature: IncrementFeature = - FutureArrow { - case (featureReq, amount) => client.incrementFeature(featureReq, amount).unit - } - - val getFeatureUsage: GetFeatureUsage = - FutureArrow(featureReq => client.getLimitUsage(None, Some(featureReq))) - } - - case class Config(requestTimeout: Duration, timeoutBackoffs: Stream[Duration]) { - - def apply(client: LimiterBackend, ctx: Backend.Context): LimiterBackend = - new LimiterBackend { - val incrementFeature: FutureArrow[(FeatureRequest, Int), Unit] = - policy("incrementFeature", requestTimeout, ctx)(client.incrementFeature) - val getFeatureUsage: FutureArrow[FeatureRequest, Usage] = - policy("getFeatureUsage", requestTimeout, ctx)(client.getFeatureUsage) - } - - private[this] def policy[A, B]( - name: String, - requestTimeout: Duration, - ctx: Context - ): Builder[A, B] = - defaultPolicy(name, requestTimeout, retryPolicy, ctx) - - private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = - RetryPolicyBuilder.timeouts[Any](timeoutBackoffs) - } -} - -trait LimiterBackend { - import LimiterBackend._ - - val incrementFeature: IncrementFeature - val getFeatureUsage: GetFeatureUsage -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterService.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterService.docx new file mode 100644 index 000000000..44b543b32 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterService.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterService.scala deleted file mode 100644 index 289c92c0b..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterService.scala +++ /dev/null @@ -1,193 +0,0 @@ -package com.twitter.tweetypie -package backends - -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.limiter.thriftscala.FeatureRequest -import com.twitter.tweetypie.backends.LimiterBackend.GetFeatureUsage -import com.twitter.tweetypie.backends.LimiterBackend.IncrementFeature -import com.twitter.tweetypie.backends.LimiterService.Feature - -/** - * Why does LimiterService exist? - * - * The underlying Limiter thrift service doesn't support batching. This trait and implementation - * basically exist to allow a batch like interface to the Limiter. This keeps us from having to - * spread batching throughout our code base. - * - * Why is LimiterService in the backends package? - * - * In some ways it is like a backend if the backend supports batching. There is a modest amount of - * business logic LimiterService, but that logic exists here to allow easier consumption throughout - * the tweetypie code base. We did look at moving LimiterService to another package, but all likely - * candidates (service, serverutil) caused circular dependencies. - * - * When I need to add functionality, should I add it to LimiterBackend or LimiterService? - * - * LimiterBackend is used as a simple wrapper around the Limiter thrift client. The LimiterBackend - * should be kept as dumb as possible. You will most likely want to add the functionality in - * LimiterService. - */ -object LimiterService { - type MinRemaining = (UserId, Option[UserId]) => Future[Int] - type HasRemaining = (UserId, Option[UserId]) => Future[Boolean] - type Increment = (UserId, Option[UserId], Int) => Future[Unit] - type IncrementByOne = (UserId, Option[UserId]) => Future[Unit] - - sealed abstract class Feature(val name: String, val hasPerApp: Boolean = false) { - def forUser(userId: UserId): FeatureRequest = FeatureRequest(name, userId = Some(userId)) - def forApp(appId: AppId): Option[FeatureRequest] = - if (hasPerApp) { - Some( - FeatureRequest( - s"${name}_per_app", - applicationId = Some(appId), - identifier = Some(appId.toString) - ) - ) - } else { - None - } - } - object Feature { - case object Updates extends Feature("updates", hasPerApp = true) - case object MediaTagCreate extends Feature("media_tag_create") - case object TweetCreateFailure extends Feature("tweet_creation_failure") - } - - def fromBackend( - incrementFeature: IncrementFeature, - getFeatureUsage: GetFeatureUsage, - getAppId: => Option[ - AppId - ], // the call-by-name here to invoke per request to get the current request's app id - stats: StatsReceiver = NullStatsReceiver - ): LimiterService = - new LimiterService { - def increment( - feature: Feature - )( - userId: UserId, - contributorUserId: Option[UserId], - amount: Int - ): Future[Unit] = { - Future.when(amount > 0) { - def increment(req: FeatureRequest): Future[Unit] = incrementFeature((req, amount)) - - val incrementUser: Option[Future[Unit]] = - Some(increment(feature.forUser(userId))) - - val incrementContributor: Option[Future[Unit]] = - for { - id <- contributorUserId - if id != userId - } yield increment(feature.forUser(id)) - - val incrementPerApp: Option[Future[Unit]] = - for { - appId <- getAppId - req <- feature.forApp(appId) - } yield increment(req) - - Future.collect(Seq(incrementUser, incrementContributor, incrementPerApp).flatten) - } - } - - def minRemaining( - feature: Feature - )( - userId: UserId, - contributorUserId: Option[UserId] - ): Future[Int] = { - def getRemaining(req: FeatureRequest): Future[Int] = getFeatureUsage(req).map(_.remaining) - - val getUserRemaining: Option[Future[Int]] = - Some(getRemaining(feature.forUser(userId))) - - val getContributorRemaining: Option[Future[Int]] = - contributorUserId.map(id => getRemaining(feature.forUser(id))) - - val getPerAppRemaining: Option[Future[Int]] = - for { - appId <- getAppId - req <- feature.forApp(appId) - } yield getRemaining(req) - - Future - .collect(Seq(getUserRemaining, getContributorRemaining, getPerAppRemaining).flatten) - .map(_.min) - } - } -} - -trait LimiterService { - - /** - * Increment the feature count for both the user and the contributor. If either increment fails, - * the resulting future will be the first exception encountered. - * - * @param feature The feature that is incremented - * @param userId The current user tied to the current request - * @param contributorUserId The contributor, if one exists, tied to the current request - * @param amount The amount that each feature should be incremented. - */ - def increment( - feature: Feature - )( - userId: UserId, - contributorUserId: Option[UserId], - amount: Int - ): Future[Unit] - - /** - * Increment the feature count, by one, for both the user and the contributor. If either - * increment fails, the resulting future will be the first exception encountered. - * - * @param feature The feature that is incremented - * @param userId The current user tied to the current request - * @param contributorUserId The contributor, if one exists, tied to the current request - * - * @see [[increment]] if you want to increment a feature by a specified amount - */ - def incrementByOne( - feature: Feature - )( - userId: UserId, - contributorUserId: Option[UserId] - ): Future[Unit] = - increment(feature)(userId, contributorUserId, 1) - - /** - * The minimum remaining limit between the user and contributor. If an exception occurs, then the - * resulting Future will be the first exception encountered. - * - * @param feature The feature that is queried - * @param userId The current user tied to the current request - * @param contributorUserId The contributor, if one exists, tied to the current request - * - * @return a `Future[Int]` with the minimum limit left between the user and contributor - */ - def minRemaining(feature: Feature)(userId: UserId, contributorUserId: Option[UserId]): Future[Int] - - /** - * Can the user and contributor increment the given feature. If the result cannot be determined - * because of an exception, then we assume they can increment. This will allow us to continue - * servicing requests even if the limiter service isn't responding. - * - * @param feature The feature that is queried - * @param userId The current user tied to the current request - * @param contributorUserId The contributor, if one exists, tied to the current request - * @return a `Future[Boolean]` with true if both the user and contributor have remaining limit - * cap. - * - * @see [[minRemaining]] if you would like to handle any exceptions that occur on your own - */ - def hasRemaining( - feature: Feature - )( - userId: UserId, - contributorUserId: Option[UserId] - ): Future[Boolean] = - minRemaining(feature)(userId, contributorUserId) - .map(_ > 0) - .handle { case _ => true } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Manhattan.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Manhattan.docx new file mode 100644 index 000000000..78430977e Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Manhattan.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Manhattan.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Manhattan.scala deleted file mode 100644 index ce4e0838e..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Manhattan.scala +++ /dev/null @@ -1,46 +0,0 @@ -package com.twitter.tweetypie -package backends - -import com.twitter.servo.exception.thriftscala -import com.twitter.servo.exception.thriftscala.ClientErrorCause -import com.twitter.stitch.Stitch -import com.twitter.storage.client.manhattan.kv.TimeoutManhattanException -import com.twitter.tweetypie.core.OverCapacity -import com.twitter.tweetypie.storage.TweetStorageClient.Ping -import com.twitter.tweetypie.storage.ClientError -import com.twitter.tweetypie.storage.RateLimited -import com.twitter.tweetypie.storage.TweetStorageClient -import com.twitter.tweetypie.util.StitchUtils -import com.twitter.util.TimeoutException - -object Manhattan { - def fromClient(underlying: TweetStorageClient): TweetStorageClient = - new TweetStorageClient { - val addTweet = translateExceptions(underlying.addTweet) - val deleteAdditionalFields = translateExceptions(underlying.deleteAdditionalFields) - val getDeletedTweets = translateExceptions(underlying.getDeletedTweets) - val getTweet = translateExceptions(underlying.getTweet) - val getStoredTweet = translateExceptions(underlying.getStoredTweet) - val scrub = translateExceptions(underlying.scrub) - val softDelete = translateExceptions(underlying.softDelete) - val undelete = translateExceptions(underlying.undelete) - val updateTweet = translateExceptions(underlying.updateTweet) - val hardDeleteTweet = translateExceptions(underlying.hardDeleteTweet) - val ping: Ping = underlying.ping - val bounceDelete = translateExceptions(underlying.bounceDelete) - } - - private[backends] object translateExceptions { - private[this] def pf: PartialFunction[Throwable, Throwable] = { - case e: RateLimited => OverCapacity(s"storage: ${e.getMessage}") - case e: TimeoutManhattanException => new TimeoutException(e.getMessage) - case e: ClientError => thriftscala.ClientError(ClientErrorCause.BadRequest, e.message) - } - - def apply[A, B](f: A => Stitch[B]): A => Stitch[B] = - a => StitchUtils.translateExceptions(f(a), pf) - - def apply[A, B, C](f: (A, B) => Stitch[C]): (A, B) => Stitch[C] = - (a, b) => StitchUtils.translateExceptions(f(a, b), pf) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/MediaInfoService.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/MediaInfoService.docx new file mode 100644 index 000000000..95386e961 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/MediaInfoService.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/MediaInfoService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/MediaInfoService.scala deleted file mode 100644 index a355507cf..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/MediaInfoService.scala +++ /dev/null @@ -1,43 +0,0 @@ -package com.twitter.tweetypie -package backends - -import com.twitter.finagle.service.RetryPolicy -import com.twitter.mediainfo.server.thriftscala.GetTweetMediaInfoRequest -import com.twitter.mediainfo.server.thriftscala.GetTweetMediaInfoResponse -import com.twitter.mediainfo.server.{thriftscala => mis} -import com.twitter.servo.util.FutureArrow -import com.twitter.tweetypie.util.RetryPolicyBuilder - -object MediaInfoService { - import Backend._ - - type GetTweetMediaInfo = FutureArrow[mis.GetTweetMediaInfoRequest, mis.GetTweetMediaInfoResponse] - - def fromClient(client: mis.MediaInfoService.MethodPerEndpoint): MediaInfoService = - new MediaInfoService { - val getTweetMediaInfo = FutureArrow(client.getTweetMediaInfo) - } - - case class Config( - requestTimeout: Duration, - totalTimeout: Duration, - timeoutBackoffs: Stream[Duration]) { - - def apply(svc: MediaInfoService, ctx: Backend.Context): MediaInfoService = - new MediaInfoService { - val getTweetMediaInfo: FutureArrow[GetTweetMediaInfoRequest, GetTweetMediaInfoResponse] = - policy("getTweetMediaInfo", ctx)(svc.getTweetMediaInfo) - } - - private[this] def policy[A, B](name: String, ctx: Context): Builder[A, B] = - defaultPolicy(name, requestTimeout, retryPolicy, ctx, totalTimeout = totalTimeout) - - private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = - RetryPolicyBuilder.timeouts[Any](timeoutBackoffs) - } -} - -trait MediaInfoService { - import MediaInfoService._ - val getTweetMediaInfo: GetTweetMediaInfo -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Scarecrow.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Scarecrow.docx new file mode 100644 index 000000000..15f8bb737 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Scarecrow.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Scarecrow.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Scarecrow.scala deleted file mode 100644 index d8df2beb5..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Scarecrow.scala +++ /dev/null @@ -1,73 +0,0 @@ -package com.twitter.tweetypie -package backends - -import com.twitter.conversions.DurationOps._ -import com.twitter.finagle.Backoff -import com.twitter.finagle.service.RetryPolicy -import com.twitter.service.gen.scarecrow.thriftscala.CheckTweetResponse -import com.twitter.service.gen.scarecrow.thriftscala.Retweet -import com.twitter.service.gen.scarecrow.thriftscala.TieredAction -import com.twitter.service.gen.scarecrow.thriftscala.TweetContext -import com.twitter.service.gen.scarecrow.thriftscala.TweetNew -import com.twitter.service.gen.scarecrow.{thriftscala => scarecrow} -import com.twitter.servo.util.FutureArrow -import com.twitter.tweetypie.util.RetryPolicyBuilder - -object Scarecrow { - import Backend._ - - type CheckTweet2 = - FutureArrow[(scarecrow.TweetNew, scarecrow.TweetContext), scarecrow.CheckTweetResponse] - type CheckRetweet = FutureArrow[scarecrow.Retweet, scarecrow.TieredAction] - - def fromClient(client: scarecrow.ScarecrowService.MethodPerEndpoint): Scarecrow = - new Scarecrow { - val checkTweet2 = FutureArrow((client.checkTweet2 _).tupled) - val checkRetweet = FutureArrow(client.checkRetweet _) - def ping(): Future[Unit] = client.ping() - } - - case class Config( - readTimeout: Duration, - writeTimeout: Duration, - timeoutBackoffs: Stream[Duration], - scarecrowExceptionBackoffs: Stream[Duration]) { - def apply(svc: Scarecrow, ctx: Backend.Context): Scarecrow = - new Scarecrow { - val checkTweet2: FutureArrow[(TweetNew, TweetContext), CheckTweetResponse] = - writePolicy("checkTweet2", ctx)(svc.checkTweet2) - val checkRetweet: FutureArrow[Retweet, TieredAction] = - writePolicy("checkRetweet", ctx)(svc.checkRetweet) - def ping(): Future[Unit] = svc.ping() - } - - private[this] def readPolicy[A, B](name: String, ctx: Context): Builder[A, B] = - defaultPolicy(name, readTimeout, readRetryPolicy, ctx) - - private[this] def writePolicy[A, B](name: String, ctx: Context): Builder[A, B] = - defaultPolicy(name, writeTimeout, nullRetryPolicy, ctx) - - private[this] def readRetryPolicy[B]: RetryPolicy[Try[B]] = - RetryPolicy.combine[Try[B]]( - RetryPolicyBuilder.timeouts[B](timeoutBackoffs), - RetryPolicy.backoff(Backoff.fromStream(scarecrowExceptionBackoffs)) { - case Throw(ex: scarecrow.InternalServerError) => true - } - ) - - private[this] def nullRetryPolicy[B]: RetryPolicy[Try[B]] = - // retry policy that runs once, and will not retry on any exception - RetryPolicy.backoff(Backoff.fromStream(Stream(0.milliseconds))) { - case Throw(_) => false - } - } - - implicit val warmup: Warmup[Scarecrow] = Warmup[Scarecrow]("scarecrow")(_.ping()) -} - -trait Scarecrow { - import Scarecrow._ - val checkTweet2: CheckTweet2 - val checkRetweet: CheckRetweet - def ping(): Future[Unit] -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/SocialGraphService.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/SocialGraphService.docx new file mode 100644 index 000000000..0b5954eed Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/SocialGraphService.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/SocialGraphService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/SocialGraphService.scala deleted file mode 100644 index 37ac1243d..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/SocialGraphService.scala +++ /dev/null @@ -1,52 +0,0 @@ -package com.twitter.tweetypie -package backends - -import com.twitter.finagle.service.RetryPolicy -import com.twitter.servo.util.FutureArrow -import com.twitter.socialgraph.thriftscala.ExistsRequest -import com.twitter.socialgraph.thriftscala.ExistsResult -import com.twitter.socialgraph.thriftscala.RequestContext -import com.twitter.socialgraph.{thriftscala => sg} -import com.twitter.tweetypie.util.RetryPolicyBuilder - -object SocialGraphService { - import Backend._ - - type Exists = - FutureArrow[(Seq[sg.ExistsRequest], Option[sg.RequestContext]), Seq[sg.ExistsResult]] - - def fromClient(client: sg.SocialGraphService.MethodPerEndpoint): SocialGraphService = - new SocialGraphService { - val exists = FutureArrow((client.exists _).tupled) - def ping: Future[Unit] = client.ping().unit - } - - case class Config(socialGraphTimeout: Duration, timeoutBackoffs: Stream[Duration]) { - - def apply(svc: SocialGraphService, ctx: Backend.Context): SocialGraphService = - new SocialGraphService { - val exists: FutureArrow[(Seq[ExistsRequest], Option[RequestContext]), Seq[ExistsResult]] = - policy("exists", socialGraphTimeout, ctx)(svc.exists) - def ping(): Future[Unit] = svc.ping() - } - - private[this] def policy[A, B]( - name: String, - requestTimeout: Duration, - ctx: Context - ): Builder[A, B] = - defaultPolicy(name, requestTimeout, retryPolicy, ctx) - - private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = - RetryPolicyBuilder.timeouts[Any](timeoutBackoffs) - } - - implicit val warmup: Warmup[SocialGraphService] = - Warmup[SocialGraphService]("socialgraphservice")(_.ping) -} - -trait SocialGraphService { - import SocialGraphService._ - val exists: Exists - def ping(): Future[Unit] -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TFlock.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TFlock.docx new file mode 100644 index 000000000..358e4ae86 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TFlock.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TFlock.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TFlock.scala deleted file mode 100644 index e056db8c9..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TFlock.scala +++ /dev/null @@ -1,98 +0,0 @@ -package com.twitter.tweetypie -package backends - -import com.twitter.finagle.Backoff -import com.twitter.finagle.service.RetryPolicy -import com.twitter.flockdb.client.{thriftscala => flockdb, _} -import com.twitter.servo -import com.twitter.servo.util.RetryHandler -import com.twitter.tweetypie.core.OverCapacity -import com.twitter.tweetypie.util.RetryPolicyBuilder -import com.twitter.util.Future -import com.twitter.util.TimeoutException - -object TFlock { - val log = Logger(this.getClass) - - case class Config( - requestTimeout: Duration, - timeoutBackoffs: Stream[Duration], - flockExceptionBackoffs: Stream[Duration], - overCapacityBackoffs: Stream[Duration], - defaultPageSize: Int = 1000) { - def apply(svc: flockdb.FlockDB.MethodPerEndpoint, ctx: Backend.Context): TFlockClient = { - val retryHandler = - RetryHandler[Any]( - retryPolicy(timeoutBackoffs, flockExceptionBackoffs, overCapacityBackoffs), - ctx.timer, - ctx.stats - ) - val rescueHandler = translateExceptions.andThen(Future.exception) - val exceptionCounter = new servo.util.ExceptionCounter(ctx.stats, "failures") - val timeoutException = new TimeoutException(s"tflock: $requestTimeout") - val wrapper = - new WrappingFunction { - def apply[T](f: => Future[T]): Future[T] = - retryHandler { - exceptionCounter { - f.raiseWithin(ctx.timer, requestTimeout, timeoutException) - .onFailure(logFlockExceptions) - .rescue(rescueHandler) - } - } - } - - val wrappedClient = new WrappingFlockClient(svc, wrapper, wrapper) - val statsClient = new StatsCollectingFlockService(wrappedClient, ctx.stats) - new TFlockClient(statsClient, defaultPageSize) - } - } - - def isOverCapacity(ex: flockdb.FlockException): Boolean = - ex.errorCode match { - case Some(flockdb.Constants.READ_OVERCAPACITY_ERROR) => true - case Some(flockdb.Constants.WRITE_OVERCAPACITY_ERROR) => true - case _ => false - } - - /** - * Builds a RetryPolicy for tflock operations that will retry timeouts with the specified - * timeout backoffs, and will retry non-overcapacity FlockExceptions with the - * specified flockExceptionBackoffs backoffs, and will retry over-capacity exceptions with - * the specified overCapacityBackoffs. - */ - def retryPolicy( - timeoutBackoffs: Stream[Duration], - flockExceptionBackoffs: Stream[Duration], - overCapacityBackoffs: Stream[Duration] - ): RetryPolicy[Try[Any]] = - RetryPolicy.combine[Try[Any]]( - RetryPolicyBuilder.timeouts[Any](timeoutBackoffs), - RetryPolicy.backoff(Backoff.fromStream(flockExceptionBackoffs)) { - case Throw(ex: flockdb.FlockException) if !isOverCapacity(ex) => true - case Throw(_: flockdb.FlockQuotaException) => false - }, - RetryPolicy.backoff(Backoff.fromStream(overCapacityBackoffs)) { - case Throw(ex: flockdb.FlockException) if isOverCapacity(ex) => true - case Throw(_: flockdb.FlockQuotaException) => true - case Throw(_: OverCapacity) => true - } - ) - - val logFlockExceptions: Throwable => Unit = { - case t: flockdb.FlockException => { - log.info("FlockException from TFlock", t) - } - case _ => - } - - /** - * Converts FlockExceptions with overcapacity codes into tweetypie's OverCapacity. - */ - val translateExceptions: PartialFunction[Throwable, Throwable] = { - case t: flockdb.FlockQuotaException => - OverCapacity(s"tflock: throttled ${t.description}") - case t: flockdb.FlockException if isOverCapacity(t) => - OverCapacity(s"tflock: ${t.description}") - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Talon.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Talon.docx new file mode 100644 index 000000000..3ef195a9a Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Talon.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Talon.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Talon.scala deleted file mode 100644 index 95385b510..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Talon.scala +++ /dev/null @@ -1,94 +0,0 @@ -package com.twitter.tweetypie -package backends - -import com.twitter.finagle.Backoff -import com.twitter.finagle.service.RetryPolicy -import com.twitter.service.talon.thriftscala.ExpandRequest -import com.twitter.service.talon.thriftscala.ExpandResponse -import com.twitter.service.talon.thriftscala.ResponseCode -import com.twitter.service.talon.thriftscala.ShortenRequest -import com.twitter.service.talon.thriftscala.ShortenResponse -import com.twitter.service.talon.{thriftscala => talon} -import com.twitter.servo.util.FutureArrow -import com.twitter.tweetypie.core.OverCapacity -import com.twitter.tweetypie.util.RetryPolicyBuilder - -object Talon { - import Backend._ - - type Expand = FutureArrow[talon.ExpandRequest, talon.ExpandResponse] - type Shorten = FutureArrow[talon.ShortenRequest, talon.ShortenResponse] - - case object TransientError extends Exception() - case object PermanentError extends Exception() - - def fromClient(client: talon.Talon.MethodPerEndpoint): Talon = - new Talon { - val shorten = FutureArrow(client.shorten _) - val expand = FutureArrow(client.expand _) - def ping(): Future[Unit] = client.serviceInfo().unit - } - - case class Config( - shortenTimeout: Duration, - expandTimeout: Duration, - timeoutBackoffs: Stream[Duration], - transientErrorBackoffs: Stream[Duration]) { - def apply(svc: Talon, ctx: Backend.Context): Talon = - new Talon { - val shorten: FutureArrow[ShortenRequest, ShortenResponse] = - policy("shorten", shortenTimeout, shortenResponseCode, ctx)(svc.shorten) - val expand: FutureArrow[ExpandRequest, ExpandResponse] = - policy("expand", expandTimeout, expandResponseCode, ctx)(svc.expand) - def ping(): Future[Unit] = svc.ping() - } - - private[this] def policy[A, B]( - name: String, - requestTimeout: Duration, - getResponseCode: B => talon.ResponseCode, - ctx: Context - ): Builder[A, B] = - handleResponseCodes(name, getResponseCode, ctx) andThen - defaultPolicy(name, requestTimeout, retryPolicy, ctx) - - private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = - RetryPolicy.combine[Try[B]]( - RetryPolicyBuilder.timeouts[B](timeoutBackoffs), - RetryPolicy.backoff(Backoff.fromStream(transientErrorBackoffs)) { - case Throw(TransientError) => true - } - ) - - private[this] def handleResponseCodes[A, B]( - name: String, - extract: B => talon.ResponseCode, - ctx: Context - ): Builder[A, B] = { - val scopedStats = ctx.stats.scope(name) - val responseCodeStats = scopedStats.scope("response_code") - _ andThen FutureArrow[B, B] { res => - val responseCode = extract(res) - responseCodeStats.counter(responseCode.toString).incr() - responseCode match { - case talon.ResponseCode.TransientError => Future.exception(TransientError) - case talon.ResponseCode.PermanentError => Future.exception(PermanentError) - case talon.ResponseCode.ServerOverloaded => Future.exception(OverCapacity("talon")) - case _ => Future.value(res) - } - } - } - } - - def shortenResponseCode(res: talon.ShortenResponse): ResponseCode = res.responseCode - def expandResponseCode(res: talon.ExpandResponse): ResponseCode = res.responseCode - - implicit val warmup: Warmup[Talon] = Warmup[Talon]("talon")(_.ping()) -} - -trait Talon { - import Talon._ - val shorten: Shorten - val expand: Expand - def ping(): Future[Unit] -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TimelineService.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TimelineService.docx new file mode 100644 index 000000000..7a79028b6 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TimelineService.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TimelineService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TimelineService.scala deleted file mode 100644 index a8c9b74db..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TimelineService.scala +++ /dev/null @@ -1,84 +0,0 @@ -package com.twitter.tweetypie -package backends - -import com.twitter.finagle.Backoff -import com.twitter.finagle.service.RetryPolicy -import com.twitter.servo.util.FutureArrow -import com.twitter.timelineservice.thriftscala.Event -import com.twitter.timelineservice.thriftscala.PerspectiveQuery -import com.twitter.timelineservice.thriftscala.PerspectiveResult -import com.twitter.timelineservice.thriftscala.ProcessEventResult -import com.twitter.timelineservice.thriftscala.StatusTimelineResult -import com.twitter.timelineservice.thriftscala.TimelineQuery -import com.twitter.timelineservice.{thriftscala => tls} -import com.twitter.tweetypie.util.RetryPolicyBuilder - -object TimelineService { - import Backend._ - - type GetStatusTimeline = FutureArrow[Seq[tls.TimelineQuery], Seq[tls.StatusTimelineResult]] - type GetPerspectives = FutureArrow[Seq[tls.PerspectiveQuery], Seq[tls.PerspectiveResult]] - type ProcessEvent2 = FutureArrow[tls.Event, tls.ProcessEventResult] - - private val warmupQuery = - // we need a non-empty query, since tls treats empty queries as an error - tls.TimelineQuery( - timelineType = tls.TimelineType.User, - timelineId = 620530287L, // same user id that timelineservice-api uses for warmup - maxCount = 1 - ) - - def fromClient(client: tls.TimelineService.MethodPerEndpoint): TimelineService = - new TimelineService { - val processEvent2 = FutureArrow(client.processEvent2 _) - val getStatusTimeline = FutureArrow(client.getStatusTimeline _) - val getPerspectives = FutureArrow(client.getPerspectives _) - def ping(): Future[Unit] = - client.touchTimeline(Seq(warmupQuery)).handle { case _: tls.InternalServerError => } - } - - case class Config(writeRequestPolicy: Policy, readRequestPolicy: Policy) { - - def apply(svc: TimelineService, ctx: Backend.Context): TimelineService = { - val build = new PolicyAdvocate("TimelineService", ctx, svc) - new TimelineService { - val processEvent2: FutureArrow[Event, ProcessEventResult] = - build("processEvent2", writeRequestPolicy, _.processEvent2) - val getStatusTimeline: FutureArrow[Seq[TimelineQuery], Seq[StatusTimelineResult]] = - build("getStatusTimeline", readRequestPolicy, _.getStatusTimeline) - val getPerspectives: FutureArrow[Seq[PerspectiveQuery], Seq[PerspectiveResult]] = - build("getPerspectives", readRequestPolicy, _.getPerspectives) - def ping(): Future[Unit] = svc.ping() - } - } - } - - case class FailureBackoffsPolicy( - timeoutBackoffs: Stream[Duration] = Stream.empty, - tlsExceptionBackoffs: Stream[Duration] = Stream.empty) - extends Policy { - def toFailureRetryPolicy: FailureRetryPolicy = - FailureRetryPolicy( - RetryPolicy.combine( - RetryPolicyBuilder.timeouts(timeoutBackoffs), - RetryPolicy.backoff(Backoff.fromStream(tlsExceptionBackoffs)) { - case Throw(ex: tls.InternalServerError) => true - } - ) - ) - - def apply[A, B](name: String, ctx: Context): Builder[A, B] = - toFailureRetryPolicy(name, ctx) - } - - implicit val warmup: Warmup[TimelineService] = - Warmup[TimelineService]("timelineservice")(_.ping()) -} - -trait TimelineService { - import TimelineService._ - val processEvent2: ProcessEvent2 - val getStatusTimeline: GetStatusTimeline - val getPerspectives: GetPerspectives - def ping(): Future[Unit] -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/UserImageService.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/UserImageService.docx new file mode 100644 index 000000000..35a718818 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/UserImageService.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/UserImageService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/UserImageService.scala deleted file mode 100644 index e756d5202..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/UserImageService.scala +++ /dev/null @@ -1,71 +0,0 @@ -package com.twitter.tweetypie -package backends - -import com.twitter.finagle.service.RetryPolicy -import com.twitter.servo.util.FutureArrow -import com.twitter.tweetypie.util.RetryPolicyBuilder -import com.twitter.user_image_service.thriftscala.ProcessTweetMediaRequest -import com.twitter.user_image_service.thriftscala.ProcessTweetMediaResponse -import com.twitter.user_image_service.thriftscala.UpdateProductMetadataRequest -import com.twitter.user_image_service.thriftscala.UpdateTweetMediaRequest -import com.twitter.user_image_service.thriftscala.UpdateTweetMediaResponse -import com.twitter.user_image_service.{thriftscala => uis} - -object UserImageService { - import Backend._ - - type ProcessTweetMedia = FutureArrow[uis.ProcessTweetMediaRequest, uis.ProcessTweetMediaResponse] - type UpdateProductMetadata = FutureArrow[uis.UpdateProductMetadataRequest, Unit] - type UpdateTweetMedia = FutureArrow[uis.UpdateTweetMediaRequest, uis.UpdateTweetMediaResponse] - - def fromClient(client: uis.UserImageService.MethodPerEndpoint): UserImageService = - new UserImageService { - val processTweetMedia = FutureArrow(client.processTweetMedia) - val updateProductMetadata: FutureArrow[UpdateProductMetadataRequest, Unit] = FutureArrow( - client.updateProductMetadata).unit - val updateTweetMedia = FutureArrow(client.updateTweetMedia) - } - - case class Config( - processTweetMediaTimeout: Duration, - updateTweetMediaTimeout: Duration, - timeoutBackoffs: Stream[Duration]) { - - def apply(svc: UserImageService, ctx: Backend.Context): UserImageService = - new UserImageService { - val processTweetMedia: FutureArrow[ProcessTweetMediaRequest, ProcessTweetMediaResponse] = - policy("processTweetMedia", processTweetMediaTimeout, ctx)(svc.processTweetMedia) - val updateProductMetadata: FutureArrow[UpdateProductMetadataRequest, Unit] = - policy("updateProductMetadata", processTweetMediaTimeout, ctx)(svc.updateProductMetadata) - val updateTweetMedia: FutureArrow[UpdateTweetMediaRequest, UpdateTweetMediaResponse] = - policy("updateTweetMedia", updateTweetMediaTimeout, ctx)(svc.updateTweetMedia) - } - - private[this] def policy[A, B]( - name: String, - requestTimeout: Duration, - ctx: Context - ): Builder[A, B] = - defaultPolicy( - name = name, - requestTimeout = requestTimeout, - retryPolicy = retryPolicy, - ctx = ctx, - exceptionCategorizer = { - case _: uis.BadRequest => Some("success") - case _ => None - } - ) - - private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = - RetryPolicyBuilder.timeouts[Any](timeoutBackoffs) - } -} - -trait UserImageService { - import UserImageService._ - - val processTweetMedia: ProcessTweetMedia - val updateProductMetadata: UpdateProductMetadata - val updateTweetMedia: UpdateTweetMedia -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Warmup.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Warmup.docx new file mode 100644 index 000000000..7e3b76a57 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Warmup.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Warmup.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Warmup.scala deleted file mode 100644 index 06c61934e..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Warmup.scala +++ /dev/null @@ -1,266 +0,0 @@ -package com.twitter.tweetypie -package backends - -import com.twitter.concurrent.AsyncSemaphore -import com.twitter.util.Timer -import com.twitter.util.Promise -import scala.util.control.NoStackTrace - -/** - * Tools for building warmup actions on backend clients. The basic - * idea is to make requests to backends repeatedly until they succeed. - */ -object Warmup { - - /** - * Signals that a warmup action was aborted because warmup is - * complete. - */ - object WarmupComplete extends Exception with NoStackTrace - - /** - * Configuration for warmup actions. - * - * @param maxOutstandingRequests: Limit on total number of outstanding warmup requests. - * @param maxWarmupDuration: Total amount of time warmup is allowed to take. - * @param requestTimeouts: Time limit for individual warmup actions. - * @param reliability: Criteria for how many times each warmup should be run. - */ - case class Settings( - maxOutstandingRequests: Int, - maxWarmupDuration: Duration, - requestTimeouts: Map[String, Duration], - reliability: Reliably) { - def toRunner(logger: Logger, timer: Timer): Runner = - new WithTimeouts(requestTimeouts, timer) - .within(new Logged(logger)) - .within(new LimitedConcurrency(maxOutstandingRequests)) - .within(reliability) - - def apply[A: Warmup](value: A, logger: Logger, timer: Timer): Future[Unit] = - toRunner(logger, timer) - .run(value) - .raiseWithin(maxWarmupDuration)(timer) - .handle { case _ => } - } - - /** - * Strategy for running Warmup actions. - */ - trait Runner { self => - - /** - * Run one single warmup action. - */ - def runOne(name: String, action: => Future[Unit]): Future[Unit] - - /** - * Compose these two Runners by calling this Runner's runOne - * inside of other's runOne. - */ - final def within(other: Runner): Runner = - new Runner { - override def runOne(name: String, action: => Future[Unit]): Future[Unit] = - other.runOne(name, self.runOne(name, action)) - } - - /** - * Execute all of the warmup actions for the given value using - * this runner. - */ - final def run[T](t: T)(implicit w: Warmup[T]): Future[Unit] = - Future.join(w.actions.toSeq.map { case (name, f) => runOne(name, f(t).unit) }) - } - - /** - * Set a ceiling on the amount of time each kind of warmup action is - * allowed to take. - */ - class WithTimeouts(timeouts: Map[String, Duration], timer: Timer) extends Runner { - override def runOne(name: String, action: => Future[Unit]): Future[Unit] = - timeouts.get(name).map(action.raiseWithin(_)(timer)).getOrElse(action) - } - - /** - * Execute each action until its reliability is estimated to be - * above the given threshold. The reliability is initially assumed - * to be zero. The reliability is estimated as an exponential moving - * average, with the new data point given the appropriate weight so - * that a single data point will no longer be able to push the - * average below the threshold. - * - * The warmup action is considered successful if it does not throw - * an exception. No timeouts are applied. - * - * The threshold must be in the interval [0, 1). - * - * The concurrency level determines how many outstanding requests - * to maintain until the threshold is reached. This allows warmup - * to happen more rapidly when individual requests have high - * latency. - * - * maxAttempts limits the total number of tries that we are allowed - * to try to reach the reliability threshold. This is a safety - * measure to prevent overloading whatever subsystem we are - * attempting to warm up. - */ - case class Reliably(reliabilityThreshold: Double, concurrency: Int, maxAttempts: Int) - extends Runner { - require(reliabilityThreshold < 1) - require(reliabilityThreshold >= 0) - require(concurrency > 0) - require(maxAttempts > 0) - - // Find the weight at which one failure will not push us under the - // reliabilityThreshold. - val weight: Double = 1 - math.pow( - 1 - reliabilityThreshold, - (1 - reliabilityThreshold) / reliabilityThreshold - ) - - // Make sure that rounding error did not cause weight to become zero. - require(weight > 0) - require(weight <= 1) - - // On each iteration, we discount the current reliability by this - // factor before adding in the new reliability data point. - val decay: Double = 1 - weight - - // Make sure that rounding error did not cause decay to be zero. - require(decay < 1) - - override def runOne(name: String, action: => Future[Unit]): Future[Unit] = { - def go(attempts: Int, reliability: Double, outstanding: Seq[Future[Unit]]): Future[Unit] = - if (reliability >= reliabilityThreshold || (attempts == 0 && outstanding.isEmpty)) { - // We hit the threshold or ran out of tries. Don't cancel any - // outstanding requests, just wait for them all to complete. - Future.join(outstanding.map(_.handle { case _ => })) - } else if (attempts > 0 && outstanding.length < concurrency) { - // We have not yet hit the reliability threshold, and we - // still have available concurrency, so make a new request. - go(attempts - 1, reliability, action +: outstanding) - } else { - val sel = Future.select(outstanding) - - // We need this promise wrapper because if the select is - // interrupted, it relays the interrupt to the outstanding - // requests but does not itself return with a - // failure. Wrapping in a promise lets us differentiate - // between an interrupt coming from above and the created - // Future failing for another reason. - val p = new Promise[(Try[Unit], Seq[Future[Unit]])] - p.setInterruptHandler { - case e => - // Interrupt the outstanding requests. - sel.raise(e) - // Halt the computation with a failure. - p.updateIfEmpty(Throw(e)) - } - - // When the select finishes, update the promise with the value. - sel.respond(p.updateIfEmpty) - p.flatMap { - case (tryRes, remaining) => - val delta = if (tryRes.isReturn) weight else 0 - go(attempts, reliability * decay + delta, remaining) - } - } - - go(maxAttempts, 0, Seq.empty) - } - } - - /** - * Write a log message recording each invocation of each warmup - * action. The log message is comma-separated, with the following - * fields: - * - * name: - * The supplied name. - * - * start time: - * The number of milliseconds since the start of the Unix - * epoch. - * - * duration: - * How long this warmup action took, in milliseconds. - * - * result: - * "passed" or "failed" depending on whether the Future - * returned an exception. - * - * exception type: - * If the result "failed", then this will be the name of - * the exception that casued the failure. If it "passed", - * it will be the empty string. - * - * These messages should be sufficient to get a picture of how - * warmup proceeded, since they allow the messages to be ordered - * and sorted by type. You can use this information to tune the - * warmup parameters. - */ - class Logged(logger: Logger) extends Runner { - override def runOne(name: String, action: => Future[Unit]): Future[Unit] = { - val start = Time.now - val startStr = start.sinceEpoch.inMilliseconds.toString - - action.respond { - case Throw(WarmupComplete) => - // Don't log anything for computations that we abandoned - // because warmup is complete. - - case r => - val duration = (Time.now - start).inMilliseconds - val result = r match { - case Throw(e) => "failed," + e.toString.takeWhile(_ != '\n') - case _ => "passed," - } - logger.info(s"$name,${startStr}ms,${duration}ms,$result") - } - } - } - - /** - * Ensure that no more than the specified number of invocations of a - * warmup action are happening at one time. - */ - class LimitedConcurrency(limit: Int) extends Runner { - private[this] val sem = new AsyncSemaphore(limit) - override def runOne(name: String, action: => Future[Unit]): Future[Unit] = - sem.acquireAndRun(action) - } - - /** - * Create a new Warmup that performs this single action. - */ - def apply[A](name: String)(f: A => Future[_]): Warmup[A] = new Warmup(Map(name -> f)) - - /** - * Create a Warmup that does nothing. This is useful in concert with - * warmField. - */ - def empty[A]: Warmup[A] = new Warmup[A](Map.empty) -} - -/** - * A set of independent warmup actions. Each action should be the - * minimum work that can be done in order to exercise a code - * path. Runners can be used to e.g. run the actions repeatedly or - * with timeouts. - */ -class Warmup[A](val actions: Map[String, A => Future[_]]) { - def ++(other: Warmup[A]) = new Warmup[A](actions ++ other.actions) - - /** - * The names of the individual warmup actions that this warmup is - * composed of. - */ - def names: Set[String] = actions.keySet - - /** - * Create a new Warmup that does all of the actions of this warmup - * and additionally does warmup on the value specified by `f`. - */ - def warmField[B](f: A => B)(implicit w: Warmup[B]): Warmup[A] = - new Warmup[A](actions ++ (w.actions.mapValues(f.andThen))) -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BUILD deleted file mode 100644 index 7dc0a6379..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BUILD +++ /dev/null @@ -1,135 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/com/fasterxml/jackson/dataformat:jackson-dataformat-yaml", - "3rdparty/jvm/com/fasterxml/jackson/module:jackson-module-scala", - "3rdparty/jvm/io/netty:netty4-tcnative-boringssl-static", - "3rdparty/jvm/org/apache/kafka:kafka-clients", - "3rdparty/jvm/org/apache/thrift:libthrift", - "ads-common/loggingclient/src/main/scala", - "core-app-services/failed_task/src/scala/com/twitter/coreservices/failed_task/writer", - "creatives-container/thrift/src/main/thrift:creatives-container-service-scala", - "decider", - "deferredrpc/client", - "deferredrpc/client/src/main/thrift:thrift-scala", - "eventbus/client", - "fanoutservice/thrift/src/main/thrift:thrift-scala", - "featureswitches/featureswitches-core:v2", - "featureswitches/featureswitches-core/src/main/scala", - "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", - "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authorization", - "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authorization/server", - "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/client", - "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/server", - "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/transport", - "finagle/finagle-core/src/main", - "finagle/finagle-http/src/main/scala", - "finagle/finagle-memcached/src/main/scala", - "finagle/finagle-mux/src/main/scala", - "finagle/finagle-stats", - "finagle/finagle-thrift", - "finagle/finagle-thrift/src/main/java", - "finagle/finagle-thriftmux", - "finatra-internal/kafka/src/main/scala/com/twitter/finatra/kafka/producers", - "finatra/inject/inject-slf4j/src/main/scala/com/twitter/inject", - "flock-client/src/main/scala", - "flock-client/src/main/thrift:thrift-scala", - "geoduck/service/src/main/scala/com/twitter/geoduck/service/common/clientmodules", - "geoduck/util/src/main/scala/com/twitter/geoduck/util/service", - "kafka/finagle-kafka/finatra-kafka/src/main/scala", - "limiter-client", - "limiter/thrift-only/src/main/thrift:thrift-scala", - "mediaservices/mediainfo-server/thrift/src/main/thrift:thrift-scala", - "passbird/thrift-only/src/main/thrift:thrift-scala", - "quill/capture", - "quill/core/src/main/thrift:thrift-scala", - "scrooge/scrooge-core", - "tweetypie/servo/repo/src/main/scala", - "tweetypie/servo/repo/src/main/thrift:thrift-scala", - "tweetypie/servo/request/src/main/scala", - "tweetypie/servo/util", - "snowflake:client", - "snowflake/src/main/scala/com/twitter/snowflake/id", - "snowflake/src/main/thrift:thrift-scala", - "src/scala/com/twitter/ads/internal/pcl/service", - "src/scala/com/twitter/search/blender/services/strato", - "src/thrift/com/twitter/ads/adserver:adserver_rpc-scala", - "src/thrift/com/twitter/context:twitter-context-scala", - "src/thrift/com/twitter/escherbird:annotation-service-scala", - "src/thrift/com/twitter/escherbird/metadata:metadata-service-scala", - "src/thrift/com/twitter/expandodo:cards-scala", - "src/thrift/com/twitter/expandodo:only-scala", - "src/thrift/com/twitter/geoduck:geoduck-scala", - "src/thrift/com/twitter/gizmoduck:thrift-scala", - "src/thrift/com/twitter/merlin:thrift-scala", - "src/thrift/com/twitter/service/scarecrow/gen:scarecrow-scala", - "src/thrift/com/twitter/service/talon/gen:thrift-scala", - "src/thrift/com/twitter/socialgraph:thrift-scala", - "src/thrift/com/twitter/spam/rtf:tweet-rtf-event-scala", - "src/thrift/com/twitter/timelineservice:thrift-scala", - "src/thrift/com/twitter/timelineservice/server/internal:thrift-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:delete_location_data-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:events-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:stored-tweet-info-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet_comparison_service-scala", - "stitch/stitch-core", - "stitch/stitch-repo/src/main/scala", - "stitch/stitch-timelineservice/src/main/scala", - "storage/clients/manhattan/client/src/main/scala", - "strato/src/main/scala/com/twitter/strato/catalog", - "strato/src/main/scala/com/twitter/strato/client", - "strato/src/main/scala/com/twitter/strato/fed/server", - "strato/src/main/scala/com/twitter/strato/rpc", - "strato/src/main/scala/com/twitter/strato/server", - "strato/src/main/scala/com/twitter/strato/util", - "stringcenter/client/src/main/scala", - "tweetypie/server/config", - "tweetypie/server/src/main/scala/com/twitter/tweetypie", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/handler", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/media", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/repository", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/service", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/store", - "tweetypie/server/src/main/thrift:compiled-scala", - "tweetypie/common/src/scala/com/twitter/tweetypie/caching", - "tweetypie/common/src/scala/com/twitter/tweetypie/client_id", - "tweetypie/common/src/scala/com/twitter/tweetypie/context", - "tweetypie/common/src/scala/com/twitter/tweetypie/decider", - "tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie", - "tweetypie/common/src/scala/com/twitter/tweetypie/matching", - "tweetypie/common/src/scala/com/twitter/tweetypie/media", - "tweetypie/common/src/scala/com/twitter/tweetypie/storage", - "tweetypie/common/src/scala/com/twitter/tweetypie/tflock", - "tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala", - "tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities", - "tweetypie/common/src/scala/com/twitter/tweetypie/tweettext", - "tweetypie/common/src/scala/com/twitter/tweetypie/util", - "twitter-config/yaml", - "twitter-context", - "twitter-server-internal", - "twitter-server/server/src/main/scala", - "user-image-service/thrift/src/main/thrift:thrift-scala", - "util/util-app", - "util/util-hashing/src/main/scala", - "util/util-slf4j-api/src/main/scala", - "util/util-stats/src/main/scala", - "visibility/common/src/main/scala/com/twitter/visibility/common", - "visibility/common/src/main/scala/com/twitter/visibility/common/tflock", - "visibility/lib:tweets", - "visibility/lib/src/main/scala/com/twitter/visibility/util", - "visibility/writer/src/main/scala/com/twitter/visibility/writer", - "visibility/writer/src/main/scala/com/twitter/visibility/writer/interfaces/tweets", - ], -) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BUILD.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BUILD.docx new file mode 100644 index 000000000..67f62d095 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BUILD.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BackendClients.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BackendClients.docx new file mode 100644 index 000000000..705c1ae69 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BackendClients.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BackendClients.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BackendClients.scala deleted file mode 100644 index 044177438..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BackendClients.scala +++ /dev/null @@ -1,796 +0,0 @@ -package com.twitter.tweetypie -package config - -import com.twitter.ads.internal.pcl.service.CallbackPromotedContentLogger -import com.twitter.ads.loggingclient.AdsLoggingClient -import com.twitter.adserver.thriftscala.AdCallbackEvent -import com.twitter.conversions.DurationOps._ -import com.twitter.conversions.PercentOps._ -import com.twitter.container.{thriftscala => ccs} -import com.twitter.deferredrpc.client.DeferredThriftService -import com.twitter.deferredrpc.thrift.Datacenter -import com.twitter.deferredrpc.thrift.DeferredRPC -import com.twitter.deferredrpc.thrift.Target -import com.twitter.escherbird.thriftscala.TweetEntityAnnotationService$FinagleClient -import com.twitter.escherbird.thriftscala.{ - TweetEntityAnnotationService => TweetEntityAnnotationScroogeIface -} -import com.twitter.eventbus.client.EventBusPublisher -import com.twitter.eventbus.client.EventBusPublisherBuilder -import com.twitter.expandodo.thriftscala.CardsService$FinagleClient -import com.twitter.expandodo.thriftscala.{CardsService => CardsScroogeIface} -import com.twitter.finagle._ -import com.twitter.finagle.builder.ClientBuilder -import com.twitter.finagle.client.Transporter -import com.twitter.finagle.factory.TimeoutFactory -import com.twitter.finagle.liveness.FailureAccrualFactory -import com.twitter.finagle.loadbalancer.Balancers -import com.twitter.finagle.mtls.authentication.EmptyServiceIdentifier -import com.twitter.finagle.mtls.client.MtlsClientBuilder._ -import com.twitter.finagle.mtls.client.MtlsStackClient._ -import com.twitter.finagle.partitioning.param -import com.twitter.finagle.service.TimeoutFilter.PropagateDeadlines -import com.twitter.finagle.service._ -import com.twitter.finagle.ssl.OpportunisticTls -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finagle.thrift.ThriftClientRequest -import com.twitter.finagle.thriftmux.MethodBuilder -import com.twitter.finagle.tracing.DefaultTracer -import com.twitter.flockdb.client.thriftscala.FlockDB -import com.twitter.flockdb.client.FlockResponse -import com.twitter.flockdb.client.TFlockClient -import com.twitter.flockdb.client.UserTimelineGraph -import com.twitter.geoduck.backend.hydration.thriftscala.{Hydration => GeoduckHydration} -import com.twitter.geoduck.backend.relevance.thriftscala.Relevance -import com.twitter.geoduck.backend.relevance.thriftscala.Relevance$FinagleClient -import com.twitter.geoduck.backend.relevance.thriftscala.RelevanceContext -import com.twitter.geoduck.service.common.clientmodules.GeoduckGeohashLocate -import com.twitter.geoduck.thriftscala.ReverseGeocoder -import com.twitter.geoduck.util.service.GeoduckLocate -import com.twitter.gizmoduck.thriftscala.UserService -import com.twitter.hashing.KeyHasher -import com.twitter.limiter.client.LimiterClientFactory -import com.twitter.mediainfo.server.thriftscala.MediaInfoService$FinagleClient -import com.twitter.mediainfo.server.thriftscala.{MediaInfoService => MediaInfoScroogeIface} -import com.twitter.merlin.thriftscala.UserRolesService -import com.twitter.passbird.thriftscala.PassbirdService -import com.twitter.passbird.thriftscala.PassbirdService$FinagleClient -import com.twitter.service.gen.scarecrow.thriftscala.ScarecrowService$FinagleClient -import com.twitter.service.gen.scarecrow.thriftscala.{ScarecrowService => ScarecrowScroogeIface} -import com.twitter.service.talon.thriftscala.Talon$FinagleClient -import com.twitter.service.talon.thriftscala.{Talon => TalonScroogeIface} -import com.twitter.snowflake.client.SnowflakeClient -import com.twitter.snowflake.thriftscala.Snowflake -import com.twitter.socialgraph.thriftscala.SocialGraphService$FinagleClient -import com.twitter.socialgraph.thriftscala.{SocialGraphService => SocialGraphScroogeIface} -import com.twitter.storage.client.manhattan.kv.Experiments -import com.twitter.storage.client.manhattan.kv.ManhattanKVClient -import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams -import com.twitter.storage.client.manhattan.kv.NoMtlsParams -import com.twitter.strato.client.Strato -import com.twitter.strato.client.{Client => StratoClient} -import com.twitter.timelineservice.fanout.thriftscala.FanoutService -import com.twitter.timelineservice.fanout.thriftscala.FanoutService$FinagleClient -import com.twitter.timelineservice.{thriftscala => tls} -import com.twitter.tweetypie.backends._ -import com.twitter.tweetypie.client_id.ClientIdHelper -import com.twitter.tweetypie.media.MediaClient -import com.twitter.tweetypie.service.ReplicatingTweetService.GatedReplicationClient -import com.twitter.tweetypie.storage.ManhattanTweetStorageClient -import com.twitter.tweetypie.storage.TweetStorageClient -import com.twitter.tweetypie.store._ -import com.twitter.tweetypie.thriftscala.DeleteLocationData -import com.twitter.tweetypie.thriftscala.RetweetArchivalEvent -import com.twitter.tweetypie.thriftscala.TweetEvent -import com.twitter.tweetypie.thriftscala.TweetServiceInternal$FinagleClient -import com.twitter.user_image_service.thriftscala.UserImageService$FinagleClient -import com.twitter.user_image_service.thriftscala.{UserImageService => UserImageScroogeIface} -import com.twitter.util.Throw -import com.twitter.util.Timer -import com.twitter.util.{TimeoutException => UtilTimeoutException} -import scala.util.Random - -trait BackendClients { - - /** returns all the finagle.Names created while building clients */ - def referencedNames: Seq[Name] - - val asyncRetryTweetService: ThriftTweetService - val asyncTweetDeletionService: ThriftTweetService - val asyncTweetService: ThriftTweetService - val configBus: ConfigBus - val creativesContainerService: CreativesContainerService - val darkTrafficClient: Service[Array[Byte], Array[Byte]] - val deleteLocationDataPublisher: EventBusPublisher[DeleteLocationData] - val escherbird: Escherbird - val expandodo: Expandodo - val fanoutServiceClient: FanoutService.MethodPerEndpoint - val geoHydrationLocate: GeoduckLocate - val geoRelevance: Relevance.MethodPerEndpoint - val geoScrubEventStore: GeoScrubEventStore - val geoduckGeohashLocate: GeoduckGeohashLocate - val gizmoduck: Gizmoduck - val gnipEnricherator: GnipEnricherator - val guano: Guano - val limiterService: LimiterService - val lowQoSReplicationClients: Seq[GatedReplicationClient] - val mediaClient: MediaClient - val mediaInfoService: MediaInfoService - val memcacheClient: memcached.Client - val merlin: UserRolesService.MethodPerEndpoint - val passbirdClient: PassbirdService.MethodPerEndpoint - val replicationClient: ThriftTweetService - val retweetArchivalEventPublisher: EventBusPublisher[RetweetArchivalEvent] - val scarecrow: Scarecrow - val snowflakeClient: SnowflakeClient.SnowflakeClient - val socialGraphService: SocialGraphService - val stratoserverClient: StratoClient - val talon: Talon - val tflockReadClient: TFlockClient - val tflockWriteClient: TFlockClient - val timelineService: TimelineService - val tweetEventsPublisher: EventBusPublisher[TweetEvent] - val tweetStorageClient: TweetStorageClient - val userImageService: UserImageService - val callbackPromotedContentLogger: CallbackPromotedContentLogger -} - -/** - * default implementation of BackendClients that connects to real, remote - * backend services. - */ -object BackendClients { - // for most services, tweetypie typically maintains only a single connection to - // each host in the cluster, and that is enough for normal steady-state work. - // to prevent ddos'ing backends during unusual traffic influxes, we set the host - // connection limit to be 2-3x the steady-state daily peak, giving plenty of head - // room but without allowing an excessive number of connections. - private val defaultHostConnectionLimit = 3 - - // 100ms is greater than most gc pauses; smaller values cause more timeouts - private val defaultConnectTimeout = 100.milliseconds - // tcpConnect timeout is less than half of defaultConnectTimeout, to allow at least - // two tries (except when there is a GC pause) - private val defaultTcpConnectTimeout = 20.milliseconds - - private val WriteExceptionsOnly: PartialFunction[Try[Nothing], Boolean] = - RetryPolicy.WriteExceptionsOnly - - private val ClosedExceptionsOnly: PartialFunction[Try[Nothing], Boolean] = { - case Throw(_: ChannelClosedException) => true - } - - private val TimeoutExceptionsOnly: PartialFunction[Try[Nothing], Boolean] = { - case Throw(_: TimeoutException) => true - case Throw(_: UtilTimeoutException) => true - } - - private val NoBackoff = Backoff.const(0.second) - - private def retry(writeExceptions: Int = 100, closedExceptions: Int = 2, timeouts: Int = 0) = - RetryPolicy.combine( - RetryPolicy.backoff(NoBackoff.take(writeExceptions))(WriteExceptionsOnly), - RetryPolicy.backoff(NoBackoff.take(closedExceptions))(ClosedExceptionsOnly), - RetryPolicy.backoff(NoBackoff.take(timeouts))(TimeoutExceptionsOnly) - ) - - implicit val warmup: Warmup[BackendClients] = { - // Use a random string so that the keys are likely to hash to - // different memcache instances. Request multiple keys at a time so - // that we don't consider the backend warm just because we can get a - // bunch of successful responses to one cache. - val cacheGet = (_: memcached.Client).get(Seq.fill(20)(Random.nextLong.toString)) - - Warmup - .empty[BackendClients] - .warmField(_.expandodo) - .warmField(_.gizmoduck) - .warmField(_.memcacheClient)(Warmup("memcache")(cacheGet)) - .warmField(_.talon) - .warmField(_.tweetStorageClient)(Warmup("tweetstorage")(_.ping())) - .warmField(_.tflockReadClient)(Warmup("tflock")(_.contains(UserTimelineGraph, 0, 0))) - .warmField(_.scarecrow) - .warmField(_.socialGraphService) - .warmField(_.timelineService) - .warmField(_.geoRelevance)(Warmup("geo_relevance")(_.placeSearch(RelevanceContext()))) - } - - def apply( - settings: TweetServiceSettings, - deciderGates: TweetypieDeciderGates, - statsReceiver: StatsReceiver, - hostStatsReceiver: StatsReceiver, - timer: Timer, - clientIdHelper: ClientIdHelper, - ): BackendClients = { - val thriftClientId = settings.thriftClientId - val tracer = DefaultTracer - - val env = settings.env.toString - val zone = settings.zone - val log = Logger(getClass) - val backendsScope = statsReceiver.scope("backends") - - /** a Seq builder of finagle.Names loaded via getName */ - val referencedNamesBuilder = Seq.newBuilder[Name] - - /** the default set of exceptions we believe are safe for Tweetypie to retry */ - val defaultResponseClassifier: ResponseClassifier = - ResponseClassifier.RetryOnChannelClosed.orElse(ResponseClassifier.RetryOnTimeout) - - /** - * Resolve a string into a Finagle Name and record it - * in referencedNames. - */ - def eval(address: String): Name = { - val name = Resolver.eval(address) - referencedNamesBuilder += name - name - } - - def backendContext(name: String) = - Backend.Context(timer, backendsScope.scope(name)) - - // by default, retries on most exceptions (see defaultRetryExceptions). if an rpc is not - // idempotent, it should use a different retry policy. - def clientBuilder(name: String) = { - ClientBuilder() - .name(name) - .reportTo(statsReceiver) - .reportHostStats(hostStatsReceiver) - .tracer(tracer) - .daemon(true) - .tcpConnectTimeout(defaultTcpConnectTimeout) - .connectTimeout(defaultConnectTimeout) - .retryPolicy(retry()) - } - - def thriftMuxClientBuilder(name: String, address: String, clazz: Class[_]) = { - clientBuilder(name) - .stack( - ThriftMux.client - .withClientId(thriftClientId) - .withOpportunisticTls(OpportunisticTls.Required) - .withServiceClass(clazz)) - .loadBalancer(balancer()) - .dest(eval(address)) - .mutualTls(settings.serviceIdentifier) - } - - // Our base ThriftMux.Client - // Prefer using thriftMuxMethodBuilder below but - // can be used to build custom clients (re: darkTrafficClient) - def thriftMuxClient(name: String, propagateDeadlines: Boolean = true): ThriftMux.Client = { - ThriftMux.client - .withClientId(thriftClientId) - .withLabel(name) - .withStatsReceiver(statsReceiver) - .withTracer(tracer) - .withTransport.connectTimeout(defaultTcpConnectTimeout) - .withSession.acquisitionTimeout(defaultConnectTimeout) - .withMutualTls(settings.serviceIdentifier) - .withOpportunisticTls(OpportunisticTls.Required) - .configured(PropagateDeadlines(enabled = propagateDeadlines)) - } - - // If an endpoint is non-idempotent you should add .nonidempotent and - // leave off any ResponseClassifiers (it will remove any placed before but not after) - // If it is unequivocally idempotent you should add .idempotent and - // leave off any ResponseClassifiers (it will retry on all Throws). This will also - // enable backup requests - def thriftMuxMethodBuilder( - name: String, - dest: String, - ): MethodBuilder = { - thriftMuxClient(name) - .withLoadBalancer(balancer(minAperture = 2)) - .methodBuilder(dest) - .withRetryForClassifier(defaultResponseClassifier) - .withTimeoutTotal(2.seconds) // total timeout including 1st attempt and up to 2 retries - } - - def balancer(minAperture: Int = 2) = Balancers.aperture(minAperture = minAperture) - - val eventBusPublisherBuilder = - EventBusPublisherBuilder() - .dest(eval("/s/eventbus/provisioning")) - .clientId(settings.thriftClientId) - // eventbus stats are further scoped by stream, so put all - // publishers under the same stats namespace - .statsReceiver(backendsScope.scope("event_bus")) - // This makes the underlying kps-client to be resolved over WilyNs vs DNS - .serviceIdentifier(settings.serviceIdentifier) - - new BackendClients { - def referencedNames: Seq[Name] = referencedNamesBuilder.result() - - val memcacheClient: memcached.Client = - Memcached.client - .withMutualTls(settings.serviceIdentifier) - .connectionsPerEndpoint(2) - .configured(param.KeyHasher(KeyHasher.FNV1_32)) - .configured(Transporter.ConnectTimeout(100.milliseconds)) - .configured(TimeoutFilter.Param(200.milliseconds)) - .configured(TimeoutFactory.Param(200.milliseconds)) - .configured(param.EjectFailedHost(false)) - .configured(FailureAccrualFactory.Param(numFailures = 20, markDeadFor = 30.second)) - .configured( - PendingRequestFilter.Param(limit = Some(settings.cacheClientPendingRequestLimit)) - ) - .filtered(new MemcacheExceptionLoggingFilter) - .newRichClient(dest = eval(settings.twemcacheDest), label = "memcache") - - /* clients */ - val tweetStorageClient: TweetStorageClient = - Manhattan.fromClient( - new ManhattanTweetStorageClient( - settings.tweetStorageConfig, - statsReceiver = backendsScope.scope("tweet_storage"), - clientIdHelper = clientIdHelper, - ) - ) - - val socialGraphService: SocialGraphService = { - val finagleClient = - new SocialGraphService$FinagleClient( - thriftMuxClientBuilder( - "socialgraph", - "/s/socialgraph/socialgraph", - classOf[SocialGraphScroogeIface.MethodPerEndpoint] - ).loadBalancer(Balancers.aperturePeakEwma(minAperture = 16)) - .build() - ) - - settings.socialGraphSeviceConfig( - SocialGraphService.fromClient(finagleClient), - backendContext("socialgraph") - ) - } - - val tflockClient = - new FlockDB.FinagledClient( - thriftMuxClientBuilder("tflock", "/s/tflock/tflock", classOf[FlockDB.MethodPerEndpoint]) - .loadBalancer(balancer(minAperture = 5)) - .responseClassifier(FlockResponse.classifier) - .build(), - serviceName = "tflock", - stats = statsReceiver - ) - - val tflockReadClient: TFlockClient = - settings.tflockReadConfig(tflockClient, backendContext("tflock")) - - val tflockWriteClient: TFlockClient = - settings.tflockWriteConfig(tflockClient, backendContext("tflock")) - - val gizmoduck: Gizmoduck = { - val clientBuilder = - thriftMuxClientBuilder( - "gizmoduck", - "/s/gizmoduck/gizmoduck", - classOf[UserService.MethodPerEndpoint]) - .loadBalancer(balancer(minAperture = 63)) - val mb = MethodBuilder - .from(clientBuilder) - .idempotent(maxExtraLoad = 1.percent) - .servicePerEndpoint[UserService.ServicePerEndpoint] - - val gizmoduckClient = ThriftMux.Client.methodPerEndpoint(mb) - settings.gizmoduckConfig(Gizmoduck.fromClient(gizmoduckClient), backendContext("gizmoduck")) - } - - val merlin: UserRolesService.MethodPerEndpoint = { - val thriftClient = thriftMuxMethodBuilder("merlin", "/s/merlin/merlin") - .withTimeoutPerRequest(100.milliseconds) - .withTimeoutTotal(400.milliseconds) - .idempotent(0.01) - .servicePerEndpoint[UserRolesService.ServicePerEndpoint] - - ThriftMux.Client.methodPerEndpoint(thriftClient) - } - - val talon: Talon = { - val talonClient = - new Talon$FinagleClient( - thriftMuxClientBuilder( - "talon", - "/s/talon/backend", - classOf[TalonScroogeIface.MethodPerEndpoint]) - .build() - ) - - settings.talonConfig(Talon.fromClient(talonClient), backendContext("talon")) - } - - val guano = Guano() - - val mediaInfoService: MediaInfoService = { - val finagleClient = - new MediaInfoService$FinagleClient( - thriftMuxClientBuilder( - "mediainfo", - "/s/photurkey/mediainfo", - classOf[MediaInfoScroogeIface.MethodPerEndpoint]) - .loadBalancer(balancer(minAperture = 75)) - .build() - ) - - settings.mediaInfoServiceConfig( - MediaInfoService.fromClient(finagleClient), - backendContext("mediainfo") - ) - } - - val userImageService: UserImageService = { - val finagleClient = - new UserImageService$FinagleClient( - thriftMuxClientBuilder( - "userImage", - "/s/user-image-service/uis", - classOf[UserImageScroogeIface.MethodPerEndpoint]) - .build() - ) - - settings.userImageServiceConfig( - UserImageService.fromClient(finagleClient), - backendContext("userImage") - ) - } - - val mediaClient: MediaClient = - MediaClient.fromBackends( - userImageService = userImageService, - mediaInfoService = mediaInfoService - ) - - val timelineService: TimelineService = { - val timelineServiceClient = - new tls.TimelineService$FinagleClient( - thriftMuxClientBuilder( - "timelineService", - "/s/timelineservice/timelineservice", - classOf[tls.TimelineService.MethodPerEndpoint]) - .loadBalancer(balancer(minAperture = 13)) - .build() - ) - - settings.timelineServiceConfig( - TimelineService.fromClient(timelineServiceClient), - backendContext("timelineService") - ) - } - - val expandodo: Expandodo = { - val cardsServiceClient = - new CardsService$FinagleClient( - thriftMuxClientBuilder( - "expandodo", - "/s/expandodo/server", - classOf[CardsScroogeIface.MethodPerEndpoint]) - .loadBalancer(balancer(minAperture = 6)) - .build() - ) - - settings.expandodoConfig( - Expandodo.fromClient(cardsServiceClient), - backendContext("expandodo") - ) - } - - val creativesContainerService: CreativesContainerService = { - val mb = thriftMuxMethodBuilder( - "creativesContainerService", - "/s/creatives-container/creatives-container", - ).withTimeoutTotal(300.milliseconds) - .idempotent(maxExtraLoad = 1.percent) - .servicePerEndpoint[ccs.CreativesContainerService.ServicePerEndpoint] - - settings.creativesContainerServiceConfig( - CreativesContainerService.fromClient(ccs.CreativesContainerService.MethodPerEndpoint(mb)), - backendContext("creativesContainerService") - ) - } - - val scarecrow: Scarecrow = { - val scarecrowClient = new ScarecrowService$FinagleClient( - thriftMuxClientBuilder( - "scarecrow", - "/s/abuse/scarecrow", - classOf[ScarecrowScroogeIface.MethodPerEndpoint]) - .loadBalancer(balancer(minAperture = 6)) - .build(), - serviceName = "scarecrow", - stats = statsReceiver - ) - - settings.scarecrowConfig(Scarecrow.fromClient(scarecrowClient), backendContext("scarecrow")) - } - - val snowflakeClient: Snowflake.MethodPerEndpoint = { - eval("/s/snowflake/snowflake") // eagerly resolve the serverset - val mb = thriftMuxMethodBuilder( - "snowflake", - "/s/snowflake/snowflake" - ).withTimeoutTotal(300.milliseconds) - .withTimeoutPerRequest(100.milliseconds) - .idempotent(maxExtraLoad = 1.percent) - - SnowflakeClient.snowflakeClient(mb) - } - - val deferredRpcClient = - new DeferredRPC.FinagledClient( - thriftMuxClientBuilder( - "deferredrpc", - "/s/kafka-shared/krpc-server-main", - classOf[DeferredRPC.MethodPerEndpoint]) - .requestTimeout(200.milliseconds) - .retryPolicy(retry(timeouts = 3)) - .build(), - serviceName = "deferredrpc", - stats = statsReceiver - ) - - def deferredTweetypie(target: Target): ThriftTweetService = { - // When deferring back to the local datacenter, preserve the finagle - // context and dtabs. This will ensure that developer dtabs are honored - // and that context is preserved in eventbus. (eventbus enqueues only - // happen in async requests within the same datacenter.) - // - // Effectively, this means we consider deferredrpc requests within the - // same datacenter to be part of the same request, but replicated - // requests are not. - val isLocal: Boolean = target.datacenter == Datacenter.Local - - val deferredThriftService: Service[ThriftClientRequest, Array[Byte]] = - new DeferredThriftService( - deferredRpcClient, - target, - serializeFinagleContexts = isLocal, - serializeFinagleDtabs = isLocal - ) - - new TweetServiceInternal$FinagleClient(deferredThriftService) - } - - val replicationClient: ThriftTweetService = - deferredTweetypie(Target(Datacenter.AllOthers, "tweetypie-replication")) - - // used for read endpoints replication - val lowQoSReplicationClients: Seq[GatedReplicationClient] = { - val rampUpGate = Gate.linearRampUp(Time.now, settings.forkingRampUp) - - // Gates to avoid sending replicated reads from a cluster to itself - val inATLA = if (settings.zone == "atla") Gate.True else Gate.False - val inPDXA = if (settings.zone == "pdxa") Gate.True else Gate.False - - Seq( - GatedReplicationClient( - client = deferredTweetypie(Target(Datacenter.Atla, "tweetypie-lowqos")), - gate = rampUpGate & deciderGates.replicateReadsToATLA & !inATLA - ), - GatedReplicationClient( - client = deferredTweetypie(Target(Datacenter.Pdxa, "tweetypie-lowqos")), - gate = rampUpGate & deciderGates.replicateReadsToPDXA & !inPDXA - ) - ) - } - - // used for async operations in the write path - val asyncTweetService: ThriftTweetService = - deferredTweetypie(Target(Datacenter.Local, "tweetypie")) - - // used to trigger asyncEraseUserTweetsRequest - val asyncTweetDeletionService: ThriftTweetService = - deferredTweetypie(Target(Datacenter.Local, "tweetypie-retweet-deletion")) - - // used for async retries - val asyncRetryTweetService: ThriftTweetService = - deferredTweetypie(Target(Datacenter.Local, "tweetypie-async-retry")) - - val darkTrafficClient: Service[Array[Byte], Array[Byte]] = { - val thriftService = - thriftMuxClient( - "tweetypie.dark", - propagateDeadlines = false - ).withRequestTimeout(100.milliseconds) - .newService("/s/tweetypie/proxy") - - val transformer = - new Filter[Array[Byte], Array[Byte], ThriftClientRequest, Array[Byte]] { - override def apply( - request: Array[Byte], - service: Service[ThriftClientRequest, Array[Byte]] - ): Future[Array[Byte]] = - service(new ThriftClientRequest(request, false)) - } - - transformer andThen thriftService - } - - val geoHydrationClient: GeoduckHydration.MethodPerEndpoint = { - val mb = thriftMuxMethodBuilder("geoduck_hydration", "/s/geo/hydration") - .withTimeoutPerRequest(100.millis) - .idempotent(maxExtraLoad = 1.percent) - ThriftMux.Client.methodPerEndpoint( - mb.servicePerEndpoint[GeoduckHydration.ServicePerEndpoint]) - } - - val geoHydrationLocate: GeoduckLocate = geoHydrationClient.locate - - val geoReverseGeocoderClient: ReverseGeocoder.MethodPerEndpoint = { - val mb = thriftMuxMethodBuilder("geoduck_reversegeocoder", "/s/geo/geoduck_reversegeocoder") - .withTimeoutPerRequest(100.millis) - .idempotent(maxExtraLoad = 1.percent) - ThriftMux.Client.methodPerEndpoint( - mb.servicePerEndpoint[ReverseGeocoder.ServicePerEndpoint]) - } - - val geoduckGeohashLocate: GeoduckGeohashLocate = { - new GeoduckGeohashLocate( - reverseGeocoderClient = geoReverseGeocoderClient, - hydrationClient = geoHydrationClient, - classScopedStatsReceiver = statsReceiver.scope("geo_geohash_locate")) - } - - val geoRelevance = - new Relevance$FinagleClient( - thriftMuxClientBuilder( - "geoduck_relevance", - "/s/geo/relevance", - classOf[Relevance.MethodPerEndpoint]) - .requestTimeout(100.milliseconds) - .retryPolicy(retry(timeouts = 1)) - .build(), - stats = statsReceiver - ) - - val fanoutServiceClient = - new FanoutService$FinagleClient( - new DeferredThriftService(deferredRpcClient, Target(Datacenter.Local, "fanoutservice")), - serviceName = "fanoutservice", - stats = statsReceiver - ) - - val limiterService: LimiterService = { - val limiterClient = - new LimiterClientFactory( - name = "limiter", - clientId = thriftClientId, - tracer = tracer, - statsReceiver = statsReceiver, - serviceIdentifier = settings.serviceIdentifier, - opportunisticTlsLevel = OpportunisticTls.Required, - daemonize = true - )(eval("/s/limiter/limiter")) - - val limiterBackend = settings.limiterBackendConfig( - LimiterBackend.fromClient(limiterClient), - backendContext("limiter") - ) - - LimiterService.fromBackend( - limiterBackend.incrementFeature, - limiterBackend.getFeatureUsage, - getAppId, - backendsScope.scope("limiter") - ) - } - - val passbirdClient = - new PassbirdService$FinagleClient( - thriftMuxClientBuilder( - "passbird", - "/s/passbird/passbird", - classOf[PassbirdService.MethodPerEndpoint]) - .requestTimeout(100.milliseconds) - .retryPolicy(retry(timeouts = 1)) - .build(), - serviceName = "passbird", - stats = statsReceiver - ) - - val escherbird: Escherbird = { - val escherbirdClient = - new TweetEntityAnnotationService$FinagleClient( - thriftMuxClientBuilder( - "escherbird", - "/s/escherbird/annotationservice", - classOf[TweetEntityAnnotationScroogeIface.MethodPerEndpoint]) - .build() - ) - settings.escherbirdConfig( - Escherbird.fromClient(escherbirdClient), - backendContext("escherbird") - ) - } - - val geoScrubEventStore: GeoScrubEventStore = { - val mhMtlsParams = - if (settings.serviceIdentifier == EmptyServiceIdentifier) NoMtlsParams - else - ManhattanKVClientMtlsParams( - serviceIdentifier = settings.serviceIdentifier, - opportunisticTls = OpportunisticTls.Required) - - val mhClient = - new ManhattanKVClient( - appId = "geoduck_scrub_datastore", - dest = "/s/manhattan/omega.native-thrift", - mtlsParams = mhMtlsParams, - label = "mh_omega", - Seq(Experiments.ApertureLoadBalancer) - ) - - GeoScrubEventStore( - mhClient, - settings.geoScrubEventStoreConfig, - backendContext("geoScrubEventStore") - ) - } - - val tweetEventsPublisher: EventBusPublisher[TweetEvent] = - eventBusPublisherBuilder - .streamName("tweet_events") - .thriftStruct(TweetEvent) - .publishTimeout(500.milliseconds) - .serializeFinagleDtabs(true) - .build() - - val deleteLocationDataPublisher: EventBusPublisher[DeleteLocationData] = - eventBusPublisherBuilder - .streamName("tweetypie_delete_location_data_prod") - .thriftStruct(DeleteLocationData) - // deleteLocationData is relatively rare, and publishing to - // eventbus is all that the endpoint does. This means that it - // is much more likely that we will have to make a connection, - // which has much greater latency, and also makes us more - // tolerant of slow requests, so we choose a long timeout. - .publishTimeout(2.seconds) - .build() - - val retweetArchivalEventPublisher: EventBusPublisher[RetweetArchivalEvent] = - eventBusPublisherBuilder - .streamName("retweet_archival_events") - .thriftStruct(RetweetArchivalEvent) - .publishTimeout(500.milliseconds) - .build() - - val gnipEnricherator: GnipEnricherator = { - val gnipEnricherator = - thriftMuxMethodBuilder( - "enricherator", - "/s/datadelivery-enrichments/enricherator" - ) - GnipEnricherator.fromMethod(gnipEnricherator) - } - - val stratoserverClient: StratoClient = Strato.client - .withMutualTls( - serviceIdentifier = settings.serviceIdentifier, - opportunisticLevel = OpportunisticTls.Required) - .withLabel("stratoserver") - .withRequestTimeout(100.milliseconds) - .build() - - val configBus: ConfigBus = - ConfigBus(backendsScope.scope("config_bus"), settings.instanceId, settings.instanceCount) - - val callbackPromotedContentLogger: CallbackPromotedContentLogger = { - val publisher = - eventBusPublisherBuilder - .streamName(settings.adsLoggingClientTopicName) - .thriftStruct(AdCallbackEvent) - .publishTimeout(500.milliseconds) - .serializeFinagleDtabs(true) - .maxQueuedEvents(1000) - .kafkaDest("/s/kafka/ads-callback:kafka-tls") - .build() - - val stats = backendsScope.scope("promoted_content") - val adsLoggingClient = AdsLoggingClient(publisher, stats, "Tweetypie") - new CallbackPromotedContentLogger(adsLoggingClient, stats) - } - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Caches.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Caches.docx new file mode 100644 index 000000000..a2bdb015d Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Caches.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Caches.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Caches.scala deleted file mode 100644 index 4a670f4d2..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Caches.scala +++ /dev/null @@ -1,281 +0,0 @@ -package com.twitter.tweetypie -package config - -import com.twitter.conversions.DurationOps._ -import com.twitter.finagle.Backoff -import com.twitter.finagle.memcached -import com.twitter.finagle.stats.Stat -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.servo.cache.{Serializer => CacheSerializer, _} -import com.twitter.tweetypie.client_id.ClientIdHelper -import com.twitter.tweetypie.core._ -import com.twitter.tweetypie.handler.CacheBasedTweetCreationLock -import com.twitter.tweetypie.repository._ -import com.twitter.tweetypie.serverutil._ -import com.twitter.tweetypie.thriftscala._ -import com.twitter.tweetypie.util._ -import com.twitter.util.Timer - -/** - * Provides configured caches (most backed by memcached) wrapped with appropriate metrics and locks. - * - * All memcached-backed caches share: - * - one Finagle memcached client from backends.memcacheClient - * - one in memory caffeine cache - * - one Twemcache pool - * - * Each memcached-backed cache specialization provides its own: - * - key prefix or "namespace" - * - value serializer/deserializer - * - stats scope - * - log name - */ -trait Caches { - val memcachedClientWithInProcessCaching: memcached.Client - val tweetCache: LockingCache[TweetKey, Cached[CachedTweet]] - val tweetResultCache: LockingCache[TweetId, Cached[TweetResult]] - val tweetDataCache: LockingCache[TweetId, Cached[TweetData]] - val tweetCreateLockerCache: Cache[TweetCreationLock.Key, TweetCreationLock.State] - val tweetCountsCache: LockingCache[TweetCountKey, Cached[Count]] - val deviceSourceInProcessCache: LockingCache[String, Cached[DeviceSource]] - val geoScrubCache: LockingCache[UserId, Cached[Time]] -} - -object Caches { - object NoCache extends Caches { - override val memcachedClientWithInProcessCaching: memcached.Client = new NullMemcacheClient() - private val toLockingCache: LockingCacheFactory = NonLockingCacheFactory - val tweetCache: LockingCache[TweetKey, Cached[CachedTweet]] = - toLockingCache(new NullCache) - val tweetResultCache: LockingCache[TweetId, Cached[TweetResult]] = - toLockingCache(new NullCache) - val tweetDataCache: LockingCache[TweetId, Cached[TweetData]] = - toLockingCache(new NullCache) - val tweetCreateLockerCache: Cache[TweetCreationLock.Key, TweetCreationLock.State] = - new NullCache - val tweetCountsCache: LockingCache[TweetCountKey, Cached[Count]] = - toLockingCache(new NullCache) - val deviceSourceInProcessCache: LockingCache[String, Cached[DeviceSource]] = - toLockingCache(new NullCache) - val geoScrubCache: LockingCache[UserId, Cached[Time]] = - toLockingCache(new NullCache) - } - - def apply( - settings: TweetServiceSettings, - stats: StatsReceiver, - timer: Timer, - clients: BackendClients, - tweetKeyFactory: TweetKeyFactory, - deciderGates: TweetypieDeciderGates, - clientIdHelper: ClientIdHelper, - ): Caches = { - val cachesStats = stats.scope("caches") - val cachesInprocessStats = cachesStats.scope("inprocess") - val cachesMemcacheStats = cachesStats.scope("memcache") - val cachesMemcacheObserver = new StatsReceiverCacheObserver(cachesStats, 10000, "memcache") - val cachesMemcacheTweetStats = cachesMemcacheStats.scope("tweet") - val cachesInprocessDeviceSourceStats = cachesInprocessStats.scope("device_source") - val cachesMemcacheCountStats = cachesMemcacheStats.scope("count") - val cachesMemcacheTweetCreateStats = cachesMemcacheStats.scope("tweet_create") - val cachesMemcacheGeoScrubStats = cachesMemcacheStats.scope("geo_scrub") - val memcacheClient = clients.memcacheClient - - val caffieneMemcachedClient = settings.inProcessCacheConfigOpt match { - case Some(inProcessCacheConfig) => - new CaffeineMemcacheClient( - proxyClient = memcacheClient, - inProcessCacheConfig.maximumSize, - inProcessCacheConfig.ttl, - cachesMemcacheStats.scope("caffeine") - ) - case None => - memcacheClient - } - - val observedMemcacheWithCaffeineClient = - new ObservableMemcache( - new FinagleMemcache( - caffieneMemcachedClient - ), - cachesMemcacheObserver - ) - - def observeCache[K, V]( - cache: Cache[K, V], - stats: StatsReceiver, - logName: String, - windowSize: Int = 10000 - ) = - ObservableCache( - cache, - stats, - windowSize, - // Need to use an old-school c.t.logging.Logger because that's what servo needs - com.twitter.logging.Logger(s"com.twitter.tweetypie.cache.$logName") - ) - - def mkCache[K, V]( - ttl: Duration, - serializer: CacheSerializer[V], - perCacheStats: StatsReceiver, - logName: String, - windowSize: Int = 10000 - ): Cache[K, V] = { - observeCache( - new MemcacheCache[K, V]( - observedMemcacheWithCaffeineClient, - ttl, - serializer - ), - perCacheStats, - logName, - windowSize - ) - } - - def toLockingCache[K, V]( - cache: Cache[K, V], - stats: StatsReceiver, - backoffs: Stream[Duration] = settings.lockingCacheBackoffs - ): LockingCache[K, V] = - new OptimisticLockingCache( - underlyingCache = cache, - backoffs = Backoff.fromStream(backoffs), - observer = new OptimisticLockingCacheObserver(stats), - timer = timer - ) - - def mkLockingCache[K, V]( - ttl: Duration, - serializer: CacheSerializer[V], - stats: StatsReceiver, - logName: String, - windowSize: Int = 10000, - backoffs: Stream[Duration] = settings.lockingCacheBackoffs - ): LockingCache[K, V] = - toLockingCache( - mkCache(ttl, serializer, stats, logName, windowSize), - stats, - backoffs - ) - - def trackTimeInCache[K, V]( - cache: Cache[K, Cached[V]], - stats: StatsReceiver - ): Cache[K, Cached[V]] = - new CacheWrapper[K, Cached[V]] { - val ageStat: Stat = stats.stat("time_in_cache_ms") - val underlyingCache: Cache[K, Cached[V]] = cache - - override def get(keys: Seq[K]): Future[KeyValueResult[K, Cached[V]]] = - underlyingCache.get(keys).onSuccess(record) - - private def record(res: KeyValueResult[K, Cached[V]]): Unit = { - val now = Time.now - for (c <- res.found.values) { - ageStat.add(c.cachedAt.until(now).inMilliseconds) - } - } - } - - new Caches { - override val memcachedClientWithInProcessCaching: memcached.Client = caffieneMemcachedClient - - private val observingTweetCache: Cache[TweetKey, Cached[CachedTweet]] = - trackTimeInCache( - mkCache( - ttl = settings.tweetMemcacheTtl, - serializer = Serializer.CachedTweet.CachedCompact, - perCacheStats = cachesMemcacheTweetStats, - logName = "MemcacheTweetCache" - ), - cachesMemcacheTweetStats - ) - - // Wrap the tweet cache with a wrapper that will scribe the cache writes - // that happen to a fraction of tweets. This was added as part of the - // investigation into missing place ids and cache inconsistencies that - // were discovered by the additional fields hydrator. - private[this] val writeLoggingTweetCache = - new ScribeTweetCacheWrites( - underlyingCache = observingTweetCache, - logYoungTweetCacheWrites = deciderGates.logYoungTweetCacheWrites, - logTweetCacheWrites = deciderGates.logTweetCacheWrites - ) - - val tweetCache: LockingCache[TweetKey, Cached[CachedTweet]] = - toLockingCache( - cache = writeLoggingTweetCache, - stats = cachesMemcacheTweetStats - ) - - val tweetDataCache: LockingCache[TweetId, Cached[TweetData]] = - toLockingCache( - cache = TweetDataCache(tweetCache, tweetKeyFactory.fromId), - stats = cachesMemcacheTweetStats - ) - - val tweetResultCache: LockingCache[TweetId, Cached[TweetResult]] = - toLockingCache( - cache = TweetResultCache(tweetDataCache), - stats = cachesMemcacheTweetStats - ) - - val tweetCountsCache: LockingCache[TweetCountKey, Cached[Count]] = - mkLockingCache( - ttl = settings.tweetCountsMemcacheTtl, - serializer = Serializers.CachedLong.Compact, - stats = cachesMemcacheCountStats, - logName = "MemcacheTweetCountCache", - windowSize = 1000, - backoffs = Backoff.linear(0.millis, 2.millis).take(2).toStream - ) - - val tweetCreateLockerCache: Cache[TweetCreationLock.Key, TweetCreationLock.State] = - observeCache( - new TtlCacheToCache( - underlyingCache = new KeyValueTransformingTtlCache( - underlyingCache = observedMemcacheWithCaffeineClient, - transformer = TweetCreationLock.State.Serializer, - underlyingKey = (_: TweetCreationLock.Key).toString - ), - ttl = CacheBasedTweetCreationLock.ttlChooser( - shortTtl = settings.tweetCreateLockingMemcacheTtl, - longTtl = settings.tweetCreateLockingMemcacheLongTtl - ) - ), - stats = cachesMemcacheTweetCreateStats, - logName = "MemcacheTweetCreateLockingCache", - windowSize = 1000 - ) - - val deviceSourceInProcessCache: LockingCache[String, Cached[DeviceSource]] = - toLockingCache( - observeCache( - new ExpiringLruCache( - ttl = settings.deviceSourceInProcessTtl, - maximumSize = settings.deviceSourceInProcessCacheMaxSize - ), - stats = cachesInprocessDeviceSourceStats, - logName = "InprocessDeviceSourceCache" - ), - stats = cachesInprocessDeviceSourceStats - ) - - val geoScrubCache: LockingCache[UserId, Cached[Time]] = - toLockingCache[UserId, Cached[Time]]( - new KeyTransformingCache( - mkCache[GeoScrubTimestampKey, Cached[Time]]( - ttl = settings.geoScrubMemcacheTtl, - serializer = Serializer.toCached(CacheSerializer.Time), - perCacheStats = cachesMemcacheGeoScrubStats, - logName = "MemcacheGeoScrubCache" - ), - (userId: UserId) => GeoScrubTimestampKey(userId) - ), - cachesMemcacheGeoScrubStats - ) - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ClientsParser.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ClientsParser.docx new file mode 100644 index 000000000..976442c68 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ClientsParser.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ClientsParser.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ClientsParser.scala deleted file mode 100644 index 5ebca25c2..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ClientsParser.scala +++ /dev/null @@ -1,126 +0,0 @@ -package com.twitter.tweetypie.config - -import com.fasterxml.jackson.databind.ObjectMapper -import com.fasterxml.jackson.dataformat.yaml.YAMLFactory -import com.fasterxml.jackson.module.scala.DefaultScalaModule -import com.twitter.finagle.mtls.authentication.ServiceIdentifier -import com.twitter.util.Try - -case object EmptyConfigException extends Exception - -case class ServiceIdentifierPattern( - role: Option[String], - service: Option[String], - environment: Option[String], -) { - // Service identifier matches if the fields of service identifier - // match all the defined fields of pattern. - def matches(id: ServiceIdentifier): Boolean = - Seq( - role.map(_ == id.role), - service.map(_ == id.service), - environment.map(_ == id.environment), - ) - .flatten - .forall(identity) - - // True if this is the kind of pattern that only specifies environment. - // This should be used in rare cases, for example letting all devel clients - // use permitted methods - like get_tweet_fields. - def onlyEnv: Boolean = - role.isEmpty && service.isEmpty && environment.isDefined -} - -case class Client( - clientId: String, - serviceIdentifiers: Seq[ServiceIdentifierPattern], - tpsLimit: Option[Int], - environments: Seq[String], - loadShedEnvs: Seq[String], - permittedMethods: Set[String], - accessAllMethods: Boolean, - bypassVisibilityFiltering: Boolean, - enforceRateLimit: Boolean) { - - // Client matches a service identifier if any of its patterns - // match. - def matches(id: ServiceIdentifier): Boolean = - serviceIdentifiers.exists(_.matches(id)) -} - -object ClientsParser { - - // Case classes for parsing yaml - should match the structure of clients.yml - private case class YamlServiceIdentifier( - role: Option[String], - service: Option[String], - environment: Option[String], - ) - private case class YamlClient( - client_id: String, - service_identifiers: Option[Seq[YamlServiceIdentifier]], - service_name: String, - tps_quota: String, - contact_email: String, - environments: Seq[String], - load_shed_envs: Option[ - Seq[String] - ], // list of environments we can rejects requests from if load shedding - comment: Option[String], - permitted_methods: Option[Seq[String]], - access_all_methods: Boolean, - bypass_visibility_filtering: Boolean, - bypass_visibility_filtering_reason: Option[String], - rate_limit: Boolean) { - def toClient: Client = { - - // we provision tps_quota for both DCs during white-listing, to account for full fail-over. - val tpsLimit: Option[Int] = Try(tps_quota.replaceAll("[^0-9]", "").toInt * 1000).toOption - - Client( - clientId = client_id, - serviceIdentifiers = service_identifiers.getOrElse(Nil).flatMap { id => - if (id.role.isDefined || id.service.isDefined || id.environment.isDefined) { - Seq(ServiceIdentifierPattern( - role = id.role, - service = id.service, - environment = id.environment, - )) - } else { - Seq() - } - }, - tpsLimit = tpsLimit, - environments = environments, - loadShedEnvs = load_shed_envs.getOrElse(Nil), - permittedMethods = permitted_methods.getOrElse(Nil).toSet, - accessAllMethods = access_all_methods, - bypassVisibilityFiltering = bypass_visibility_filtering, - enforceRateLimit = rate_limit - ) - } - } - - private val mapper: ObjectMapper = new ObjectMapper(new YAMLFactory()) - mapper.registerModule(DefaultScalaModule) - - private val yamlClientTypeFactory = - mapper - .getTypeFactory() - .constructCollectionLikeType( - classOf[Seq[YamlClient]], - classOf[YamlClient] - ) - - def apply(yamlString: String): Seq[Client] = { - val parsed = - mapper - .readValue[Seq[YamlClient]](yamlString, yamlClientTypeFactory) - .map(_.toClient) - - if (parsed.isEmpty) - throw EmptyConfigException - else - parsed - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfig.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfig.docx new file mode 100644 index 000000000..63413d850 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfig.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfig.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfig.scala deleted file mode 100644 index 8d29cdc72..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfig.scala +++ /dev/null @@ -1,100 +0,0 @@ -package com.twitter.tweetypie.config - -import com.twitter.finagle.mtls.authentication.ServiceIdentifier -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.tweetypie.Gate -import com.twitter.tweetypie.backends.ConfigBus -import com.twitter.tweetypie.client_id.ClientIdHelper -import com.twitter.util.Activity - -case class DynamicConfig( - // A map of fully-qualified client ID (including the environment suffix, e.g. tweetypie.prod) to Client case class - clientsByFullyQualifiedId: Option[Map[String, Client]], - // Clients by service identifier parts. - clientsByRole: Option[Map[String, Seq[Client]]] = None, - clientsByService: Option[Map[String, Seq[Client]]] = None, - onlyEnvClients: Option[Seq[Client]] = None, - // These endpoints do not need permissions to be accessed - unprotectedEndpoints: Set[String] = Set("get_tweet_counts", "get_tweet_fields", "get_tweets")) { - - /** - * Function that takes a fully qualified client id and says whether it is included in the allowList - */ - val isAllowListedClient: String => Boolean = - clientsByFullyQualifiedId.map(clients => clients.contains _).getOrElse(_ => true) - - def byServiceIdentifier(serviceIdentifier: ServiceIdentifier): Set[Client] = - Iterable.concat( - get(clientsByRole, serviceIdentifier.role), - get(clientsByService, serviceIdentifier.service), - onlyEnvClients.getOrElse(Seq()), - ) - .filter(_.matches(serviceIdentifier)) - .toSet - - private def get(clientsByKey: Option[Map[String, Seq[Client]]], key: String): Seq[Client] = - clientsByKey match { - case Some(map) => map.getOrElse(key, Seq()) - case None => Seq() - } - - /** - * Take a fully qualified client id and says if the client has offered to shed reads if tweetypie - * is in an emergency - */ - val loadShedEligible: Gate[String] = Gate { (clientId: String) => - val env = ClientIdHelper.getClientIdEnv(clientId) - clientsByFullyQualifiedId.flatMap(clients => clients.get(clientId)).exists { c => - c.loadShedEnvs.contains(env) - } - } -} - -/** - * DynamicConfig uses ConfigBus to update Tweetypie with configuration changes - * dynamically. Every time the config changes, the Activity[DynamicConfig] is - * updated, and anything relying on that config will be reinitialized. - */ -object DynamicConfig { - def fullyQualifiedClientIds(client: Client): Seq[String] = { - val clientId = client.clientId - client.environments match { - case Nil => Seq(clientId) - case envs => envs.map(env => s"$clientId.$env") - } - } - - // Make a Map of fully qualified client id to Client - def byClientId(clients: Seq[Client]): Map[String, Client] = - clients.flatMap { client => - fullyQualifiedClientIds(client).map { fullClientId => fullClientId -> client } - }.toMap - - def by(get: ServiceIdentifierPattern => Option[String])(clients: Seq[Client]): Map[String, Seq[Client]] = - clients.flatMap { c => - c.serviceIdentifiers.collect { - case s if get(s).isDefined => (get(s).get, c) - } - }.groupBy(_._1).mapValues(_.map(_._2)) - - private[this] val clientsPath = "config/clients.yml" - - def apply( - stats: StatsReceiver, - configBus: ConfigBus, - settings: TweetServiceSettings - ): Activity[DynamicConfig] = - DynamicConfigLoader(configBus.file) - .apply(clientsPath, stats.scope("client_allowlist"), ClientsParser.apply) - .map(fromClients) - - def fromClients(clients: Option[Seq[Client]]): DynamicConfig = - DynamicConfig( - clientsByFullyQualifiedId = clients.map(byClientId), - clientsByRole = clients.map(by(_.role)), - clientsByService = clients.map(by(_.service)), - onlyEnvClients = clients.map(_.filter { client => - client.serviceIdentifiers.exists(_.onlyEnv) - }), - ) -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfigLoader.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfigLoader.docx new file mode 100644 index 000000000..5462deb81 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfigLoader.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfigLoader.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfigLoader.scala deleted file mode 100644 index 724f97644..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfigLoader.scala +++ /dev/null @@ -1,69 +0,0 @@ -package com.twitter.tweetypie.config - -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.servo.util.ExceptionCounter -import com.twitter.tweetypie.serverutil.ActivityUtil -import com.twitter.util.{Activity, Return, Try} -import com.twitter.util.logging.Logger - -trait DynamicConfigLoader { - def apply[T](path: String, stats: StatsReceiver, parse: String => T): Activity[Option[T]] -} - -object DynamicConfigLoader { - - def apply(read: String => Activity[String]): DynamicConfigLoader = - new DynamicConfigLoader { - val logger = Logger(getClass) - - private def snoopState[T](stats: StatsReceiver)(a: Activity[T]): Activity[T] = { - val pending = stats.counter("pending") - val failure = stats.counter("failure") - val success = stats.counter("success") - - a.mapState { - case s @ Activity.Ok(_) => - success.incr() - s - case Activity.Pending => - pending.incr() - Activity.Pending - case s @ Activity.Failed(_) => - failure.incr() - s - } - } - - def apply[T](path: String, stats: StatsReceiver, parse: String => T): Activity[Option[T]] = { - val exceptionCounter = new ExceptionCounter(stats) - - val rawActivity: Activity[T] = - snoopState(stats.scope("raw"))( - ActivityUtil - .strict(read(path)) - .map(parse) - .handle { - case e => - exceptionCounter(e) - logger.error(s"Invalid config in $path", e) - throw e - } - ) - - val stableActivity = - snoopState(stats.scope("stabilized"))(rawActivity.stabilize).mapState[Option[T]] { - case Activity.Ok(t) => Activity.Ok(Some(t)) - case _ => Activity.Ok(None) - } - - stats.provideGauge("config_state") { - Try(stableActivity.sample()) match { - case Return(Some(c)) => c.hashCode.abs - case _ => 0 - } - } - - stableActivity - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ExternalRepositories.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ExternalRepositories.docx new file mode 100644 index 000000000..981d6bf3f Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ExternalRepositories.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ExternalRepositories.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ExternalRepositories.scala deleted file mode 100644 index 0f8206ffa..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ExternalRepositories.scala +++ /dev/null @@ -1,182 +0,0 @@ -package com.twitter.tweetypie -package config - -import com.twitter.flockdb.client.StatusGraph -import com.twitter.servo.util.FutureArrow -import com.twitter.stitch.timelineservice.TimelineService.GetPerspectives -import com.twitter.tweetypie.client_id.ClientIdHelper -import com.twitter.tweetypie.repository.DeviceSourceRepository.Type -import com.twitter.tweetypie.repository._ -import com.twitter.tweetypie.serverutil._ -import com.twitter.visibility.common.tflock.UserIsInvitedToConversationRepository - -/** - * Tweetypie's read path composes results from many data sources. This - * trait is a collection of repositories for external data access. - * These repositories should not have (within-Tweetypie) caches, - * deciders, etc. applied to them, since that is done when the - * repositories are composed together. They should be the minimal - * wrapping of the external clients in order to expose an Arrow-based - * interface. - */ -trait ExternalRepositories { - def card2Repo: Card2Repository.Type - def cardRepo: CardRepository.Type - def cardUsersRepo: CardUsersRepository.Type - def conversationIdRepo: ConversationIdRepository.Type - def containerAsTweetRepo: CreativesContainerMaterializationRepository.GetTweetType - def containerAsTweetFieldsRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType - def deviceSourceRepo: DeviceSourceRepository.Type - def escherbirdAnnotationRepo: EscherbirdAnnotationRepository.Type - def stratoSafetyLabelsRepo: StratoSafetyLabelsRepository.Type - def stratoCommunityMembershipRepo: StratoCommunityMembershipRepository.Type - def stratoCommunityAccessRepo: StratoCommunityAccessRepository.Type - def stratoPromotedTweetRepo: StratoPromotedTweetRepository.Type - def stratoSuperFollowEligibleRepo: StratoSuperFollowEligibleRepository.Type - def stratoSuperFollowRelationsRepo: StratoSuperFollowRelationsRepository.Type - def stratoSubscriptionVerificationRepo: StratoSubscriptionVerificationRepository.Type - def unmentionedEntitiesRepo: UnmentionedEntitiesRepository.Type - def geoScrubTimestampRepo: GeoScrubTimestampRepository.Type - def mediaMetadataRepo: MediaMetadataRepository.Type - def perspectiveRepo: PerspectiveRepository.Type - def placeRepo: PlaceRepository.Type - def profileGeoRepo: ProfileGeoRepository.Type - def quoterHasAlreadyQuotedRepo: QuoterHasAlreadyQuotedRepository.Type - def lastQuoteOfQuoterRepo: LastQuoteOfQuoterRepository.Type - def relationshipRepo: RelationshipRepository.Type - def retweetSpamCheckRepo: RetweetSpamCheckRepository.Type - def tweetCountsRepo: TweetCountsRepository.Type - def tweetResultRepo: TweetResultRepository.Type - def tweetSpamCheckRepo: TweetSpamCheckRepository.Type - def urlRepo: UrlRepository.Type - def userIsInvitedToConversationRepo: UserIsInvitedToConversationRepository.Type - def userRepo: UserRepository.Type -} - -class ExternalServiceRepositories( - clients: BackendClients, - statsReceiver: StatsReceiver, - settings: TweetServiceSettings, - clientIdHelper: ClientIdHelper) - extends ExternalRepositories { - - lazy val card2Repo: Card2Repository.Type = - Card2Repository(clients.expandodo.getCards2, maxRequestSize = 5) - - lazy val cardRepo: CardRepository.Type = - CardRepository(clients.expandodo.getCards, maxRequestSize = 5) - - lazy val cardUsersRepo: CardUsersRepository.Type = - CardUsersRepository(clients.expandodo.getCardUsers) - - lazy val conversationIdRepo: ConversationIdRepository.Type = - ConversationIdRepository(clients.tflockReadClient.multiSelectOne) - - lazy val containerAsTweetRepo: CreativesContainerMaterializationRepository.GetTweetType = - CreativesContainerMaterializationRepository( - clients.creativesContainerService.materializeAsTweet) - - lazy val containerAsTweetFieldsRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType = - CreativesContainerMaterializationRepository.materializeAsTweetFields( - clients.creativesContainerService.materializeAsTweetFields) - - lazy val deviceSourceRepo: Type = { - DeviceSourceRepository( - DeviceSourceParser.parseAppId, - FutureArrow(clients.passbirdClient.getClientApplications(_)) - ) - } - - lazy val escherbirdAnnotationRepo: EscherbirdAnnotationRepository.Type = - EscherbirdAnnotationRepository(clients.escherbird.annotate) - - lazy val quoterHasAlreadyQuotedRepo: QuoterHasAlreadyQuotedRepository.Type = - QuoterHasAlreadyQuotedRepository(clients.tflockReadClient) - - lazy val lastQuoteOfQuoterRepo: LastQuoteOfQuoterRepository.Type = - LastQuoteOfQuoterRepository(clients.tflockReadClient) - - lazy val stratoSafetyLabelsRepo: StratoSafetyLabelsRepository.Type = - StratoSafetyLabelsRepository(clients.stratoserverClient) - - lazy val stratoCommunityMembershipRepo: StratoCommunityMembershipRepository.Type = - StratoCommunityMembershipRepository(clients.stratoserverClient) - - lazy val stratoCommunityAccessRepo: StratoCommunityAccessRepository.Type = - StratoCommunityAccessRepository(clients.stratoserverClient) - - lazy val stratoSuperFollowEligibleRepo: StratoSuperFollowEligibleRepository.Type = - StratoSuperFollowEligibleRepository(clients.stratoserverClient) - - lazy val stratoSuperFollowRelationsRepo: StratoSuperFollowRelationsRepository.Type = - StratoSuperFollowRelationsRepository(clients.stratoserverClient) - - lazy val stratoPromotedTweetRepo: StratoPromotedTweetRepository.Type = - StratoPromotedTweetRepository(clients.stratoserverClient) - - lazy val stratoSubscriptionVerificationRepo: StratoSubscriptionVerificationRepository.Type = - StratoSubscriptionVerificationRepository(clients.stratoserverClient) - - lazy val geoScrubTimestampRepo: GeoScrubTimestampRepository.Type = - GeoScrubTimestampRepository(clients.geoScrubEventStore.getGeoScrubTimestamp) - - lazy val mediaMetadataRepo: MediaMetadataRepository.Type = - MediaMetadataRepository(clients.mediaClient.getMediaMetadata) - - lazy val perspectiveRepo: GetPerspectives = - GetPerspectives(clients.timelineService.getPerspectives) - - lazy val placeRepo: PlaceRepository.Type = - GeoduckPlaceRepository(clients.geoHydrationLocate) - - lazy val profileGeoRepo: ProfileGeoRepository.Type = - ProfileGeoRepository(clients.gnipEnricherator.hydrateProfileGeo) - - lazy val relationshipRepo: RelationshipRepository.Type = - RelationshipRepository(clients.socialGraphService.exists, maxRequestSize = 6) - - lazy val retweetSpamCheckRepo: RetweetSpamCheckRepository.Type = - RetweetSpamCheckRepository(clients.scarecrow.checkRetweet) - - lazy val tweetCountsRepo: TweetCountsRepository.Type = - TweetCountsRepository( - clients.tflockReadClient, - maxRequestSize = settings.tweetCountsRepoChunkSize - ) - - lazy val tweetResultRepo: TweetResultRepository.Type = - ManhattanTweetRepository( - clients.tweetStorageClient.getTweet, - clients.tweetStorageClient.getStoredTweet, - settings.shortCircuitLikelyPartialTweetReads, - statsReceiver.scope("manhattan_tweet_repo"), - clientIdHelper, - ) - - lazy val tweetSpamCheckRepo: TweetSpamCheckRepository.Type = - TweetSpamCheckRepository(clients.scarecrow.checkTweet2) - - lazy val unmentionedEntitiesRepo: UnmentionedEntitiesRepository.Type = - UnmentionedEntitiesRepository(clients.stratoserverClient) - - lazy val urlRepo: UrlRepository.Type = - UrlRepository( - clients.talon.expand, - settings.thriftClientId.name, - statsReceiver.scope("talon_url_repo"), - clientIdHelper, - ) - - lazy val userRepo: UserRepository.Type = - GizmoduckUserRepository( - clients.gizmoduck.getById, - clients.gizmoduck.getByScreenName, - maxRequestSize = 100 - ) - - lazy val userIsInvitedToConversationRepo: UserIsInvitedToConversationRepository.Type = - UserIsInvitedToConversationRepository( - FutureArrow(clients.tflockReadClient.multiSelectOne(_)), - FutureArrow((clients.tflockReadClient.contains(_: StatusGraph, _: Long, _: Long)).tupled)) - -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/LogicalRepositories.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/LogicalRepositories.docx new file mode 100644 index 000000000..e9ab3e30b Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/LogicalRepositories.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/LogicalRepositories.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/LogicalRepositories.scala deleted file mode 100644 index 2060e7bcc..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/LogicalRepositories.scala +++ /dev/null @@ -1,807 +0,0 @@ -package com.twitter.tweetypie -package config - -import com.twitter.abdecider.ABDeciderFactory -import com.twitter.config.yaml.YamlConfig -import com.twitter.decider.Decider -import com.twitter.featureswitches.v2.FeatureSwitches -import com.twitter.finagle.memcached -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.servo.cache._ -import com.twitter.servo.cache.{KeyValueResult => _} -import com.twitter.servo.repository._ -import com.twitter.stitch.NotFound -import com.twitter.stitch.Stitch -import com.twitter.stitch.repo.Repo -import com.twitter.stitch.timelineservice.TimelineService -import com.twitter.strato.client.{Client => StratoClient} -import com.twitter.stringcenter.client.ExternalStringRegistry -import com.twitter.stringcenter.client.MultiProjectStringCenter -import com.twitter.translation.Languages -import com.twitter.translation.YamlConfigLanguages -import com.twitter.tweetypie.caching.CacheOperations -import com.twitter.tweetypie.caching.Expiry -import com.twitter.tweetypie.caching.ServoCachedValueSerializer -import com.twitter.tweetypie.caching.StitchCaching -import com.twitter.tweetypie.caching.ValueSerializer -import com.twitter.tweetypie.client_id.ClientIdHelper -import com.twitter.tweetypie.core.FilteredState -import com.twitter.tweetypie.core.TweetResult -import com.twitter.tweetypie.hydrator.TextRepairer -import com.twitter.tweetypie.hydrator.TweetHydration -import com.twitter.tweetypie.hydrator.TweetQueryOptionsExpander -import com.twitter.tweetypie.repository.TweetRepository -import com.twitter.tweetypie.repository.UserRepository -import com.twitter.tweetypie.repository._ -import com.twitter.tweetypie.serverutil.BoringStackTrace -import com.twitter.tweetypie.serverutil.ExceptionCounter -import com.twitter.tweetypie.thriftscala.DeviceSource -import com.twitter.tweetypie.thriftscala.Place -import com.twitter.tweetypie.thriftscala.entities.EntityExtractor -import com.twitter.tweetypie.util.StitchUtils -import com.twitter.util.Duration -import com.twitter.util.FuturePool -import com.twitter.util.Timer -import com.twitter.visibility.VisibilityLibrary -import com.twitter.visibility.common.KeywordMatcher -import com.twitter.visibility.common.LocalizationSource -import com.twitter.visibility.common.TweetMediaMetadataSource -import com.twitter.visibility.common.TweetPerspectiveSource -import com.twitter.visibility.common.UserRelationshipSource -import com.twitter.visibility.common.UserSource -import com.twitter.visibility.common.tflock.UserIsInvitedToConversationRepository -import com.twitter.visibility.configapi.configs.VisibilityDeciderGates -import com.twitter.visibility.generators.CountryNameGenerator -import com.twitter.visibility.generators.LocalizedInterstitialGenerator -import com.twitter.visibility.generators.TombstoneGenerator -import com.twitter.visibility.interfaces.tweets.DeletedTweetVisibilityLibrary -import com.twitter.visibility.interfaces.tweets.QuotedTweetVisibilityLibrary -import com.twitter.visibility.interfaces.tweets.TweetVisibilityLibrary -import com.twitter.visibility.interfaces.tweets.UserUnavailableStateVisibilityLibrary -import com.twitter.visibility.util.DeciderUtil -import com.twitter.visibility.util.FeatureSwitchUtil -import java.util.concurrent.Executors - -/** - * LogicalRepositories is a layer above ExternalRepositories. These repos may have additional - * logic layered in, such as memcache-caching, hot-key caching, etc. There may - * also be multiple logical repositories mapped to an single external repository. - * - * These repositories are used in tweet hydration and tweet creation. - */ -trait LogicalRepositories { - - def card2Repo: Card2Repository.Type - def cardRepo: CardRepository.Type - def cardUsersRepo: CardUsersRepository.Type - def conversationIdRepo: ConversationIdRepository.Type - def conversationControlRepo: ConversationControlRepository.Type - def conversationMutedRepo: ConversationMutedRepository.Type - def containerAsGetTweetResultRepo: CreativesContainerMaterializationRepository.GetTweetType - def containerAsGetTweetFieldsResultRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType - def deviceSourceRepo: DeviceSourceRepository.Type - def escherbirdAnnotationRepo: EscherbirdAnnotationRepository.Type - def geoScrubTimestampRepo: GeoScrubTimestampRepository.Type - def languageRepo: LanguageRepository.Type - def mediaMetadataRepo: MediaMetadataRepository.Type - def pastedMediaRepo: PastedMediaRepository.Type - def perspectiveRepo: PerspectiveRepository.Type - def placeRepo: PlaceRepository.Type - def profileGeoRepo: ProfileGeoRepository.Type - def quoterHasAlreadyQuotedRepo: QuoterHasAlreadyQuotedRepository.Type - def lastQuoteOfQuoterRepo: LastQuoteOfQuoterRepository.Type - def relationshipRepo: RelationshipRepository.Type - def stratoSafetyLabelsRepo: StratoSafetyLabelsRepository.Type - def stratoCommunityMembershipRepo: StratoCommunityMembershipRepository.Type - def stratoCommunityAccessRepo: StratoCommunityAccessRepository.Type - def stratoSuperFollowEligibleRepo: StratoSuperFollowEligibleRepository.Type - def stratoSuperFollowRelationsRepo: StratoSuperFollowRelationsRepository.Type - def stratoPromotedTweetRepo: StratoPromotedTweetRepository.Type - def stratoSubscriptionVerificationRepo: StratoSubscriptionVerificationRepository.Type - def takedownRepo: UserTakedownRepository.Type - def tweetSpamCheckRepo: TweetSpamCheckRepository.Type - def retweetSpamCheckRepo: RetweetSpamCheckRepository.Type - def tweetCountsRepo: TweetCountsRepository.Type - def tweetVisibilityRepo: TweetVisibilityRepository.Type - def quotedTweetVisibilityRepo: QuotedTweetVisibilityRepository.Type - def deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type - def unmentionedEntitiesRepo: UnmentionedEntitiesRepository.Type - def urlRepo: UrlRepository.Type - def userRepo: UserRepository.Type - def optionalUserRepo: UserRepository.Optional - def userIdentityRepo: UserIdentityRepository.Type - def userIsInvitedToConversationRepo: UserIsInvitedToConversationRepository.Type - def userProtectionRepo: UserProtectionRepository.Type - def userViewRepo: UserViewRepository.Type - def userVisibilityRepo: UserVisibilityRepository.Type - - def tweetResultRepo: TweetResultRepository.Type - def tweetRepo: TweetRepository.Type - def optionalTweetRepo: TweetRepository.Optional - - /** - * Not actually repositories, but intimately intertwined. - */ - def tweetHydrators: TweetHydrators -} - -object LogicalRepositories { - - /** - * Middleware is a function that takes a stitch repo and returns a new stitch repo. - */ - type Middleware[K, V] = (K => Stitch[V]) => K => Stitch[V] - - // Middleware2 is a function that takes a two-arg stitch repo and returns a new two-arg stitch repo. - type Middleware2[K, C, V] = ((K, C) => Stitch[V]) => ((K, C) => Stitch[V]) - val exceptionLog: Logger = Logger(getClass) - - // Converts a Middleware2 to a Middleware for use with withMiddleware. - def tupledMiddleware[K, C, V](middleware2: Middleware2[K, C, V]): Middleware[(K, C), V] = - repo => middleware2(Function.untupled(repo)).tupled - - object ObserveStitch { - def apply[K, V]( - repo: K => Stitch[V], - repoName: String, - stats: StatsReceiver - ): K => Stitch[V] = { - val successCounter = stats.counter("success") - val notFoundCounter = stats.counter("not_found") - val latencyStat = stats.stat("latency_ms") - - val exceptionCounter = - ExceptionCounter( - stats, - // don't count FilteredState exceptions - FilteredState.ignoringCategorizer(ExceptionCounter.defaultCategorizer) - ) - - (key: K) => - StitchUtils.trackLatency(latencyStat, repo(key)).respond { - case Return(_) => successCounter.incr() - case Throw(NotFound) => notFoundCounter.incr() - case Throw(t) => - val message = s"$repoName: $key" - if (BoringStackTrace.isBoring(t)) { - exceptionLog.debug(message, t) - } else { - exceptionLog.warn(message, t) - } - - exceptionCounter(t) - } - } - } - - /** - * Add middleware to configure a repository. The stats receiver is - * scoped for the currently-configured repository. The `toRepo` field - * is the repository with some set of middleware applied. Each method - * adds a new middleware to the current repo, and returns it as a - * `RepoConfig`, allowing method chaining. - * - * Since each method call applies a new middleware, the final middleware is - * the outermost middleware, and thus the one that sees the arguments - * first. - */ - class RepoConfig[K, V]( - val toRepo: K => Stitch[V], - stats: StatsReceiver, - name: String, - memcachedClientWithInProcessCaching: memcached.Client) { - def withMiddleware(middleware: Middleware[K, V]): RepoConfig[K, V] = - new RepoConfig[K, V](middleware(toRepo), stats, name, memcachedClientWithInProcessCaching) - - /** - * Wraps a repo with success/failure/latency stats tracking and logs - * exceptions. This will be applied to every repository. - * - * @param repoName Used when logging exceptions thrown by the underlying repo. - */ - def observe(repoName: String = s"${name}_repo"): RepoConfig[K, V] = { - withMiddleware { repo => ObserveStitch[K, V](repo, repoName, stats) } - } - - /** - * Use the supplied cache to wrap the repository with a read-through - * caching layer. - */ - def caching( - cache: LockingCache[K, Cached[V]], - partialHandler: CachedResult.PartialHandler[K, V], - maxCacheRequestSize: Int = Int.MaxValue - ): RepoConfig[K, V] = { - val stitchLockingCache = StitchLockingCache( - underlying = cache, - picker = new PreferNewestCached[V], - maxRequestSize = maxCacheRequestSize - ) - - val handler: CachedResult.Handler[K, V] = - CachedResult.Handler( - CachedResult.PartialHandler.orElse( - partialHandler, - CachedResult.failuresAreDoNotCache - ) - ) - - withMiddleware { repo => - CacheStitch[K, K, V]( - repo = repo, - cache = stitchLockingCache, - identity, - handler = handler, - cacheable = CacheStitch.cacheFoundAndNotFound - ) - } - } - - def newCaching( - keySerializer: K => String, - valueSerializer: ValueSerializer[Try[V]] - ): RepoConfig[K, V] = - withMiddleware { repo => - val logger = Logger(s"com.twitter.tweetypie.config.LogicalRepositories.$name") - - val cacheOperations: CacheOperations[K, Try[V]] = - new CacheOperations( - keySerializer = keySerializer, - valueSerializer = valueSerializer, - memcachedClient = memcachedClientWithInProcessCaching, - statsReceiver = stats.scope("caching"), - logger = logger - ) - - val tryRepo: K => Stitch[Try[V]] = repo.andThen(_.liftToTry) - val cachingTryRepo: K => Stitch[Try[V]] = new StitchCaching(cacheOperations, tryRepo) - cachingTryRepo.andThen(_.lowerFromTry) - } - - def toRepo2[K1, C](implicit tupleToK: ((K1, C)) <:< K): (K1, C) => Stitch[V] = - (k1, c) => toRepo(tupleToK((k1, c))) - } - - def softTtlPartialHandler[K, V]( - softTtl: Option[V] => Duration, - softTtlPerturbationFactor: Float = 0.05f - ): CachedResult.PartialHandler[K, V] = - CachedResult - .softTtlExpiration[K, V](softTtl, CachedResult.randomExpiry(softTtlPerturbationFactor)) - - def apply( - settings: TweetServiceSettings, - stats: StatsReceiver, - timer: Timer, - deciderGates: TweetypieDeciderGates, - external: ExternalRepositories, - caches: Caches, - stratoClient: StratoClient, - hasMedia: Tweet => Boolean, - clientIdHelper: ClientIdHelper, - featureSwitchesWithoutExperiments: FeatureSwitches, - ): LogicalRepositories = { - val repoStats = stats.scope("repositories") - - def repoConfig[K, V](name: String, repo: K => Stitch[V]): RepoConfig[K, V] = - new RepoConfig[K, V]( - name = name, - toRepo = repo, - stats = repoStats.scope(name), - memcachedClientWithInProcessCaching = caches.memcachedClientWithInProcessCaching) - - def repo2Config[K, C, V](name: String, repo: (K, C) => Stitch[V]): RepoConfig[(K, C), V] = - repoConfig[(K, C), V](name, repo.tupled) - - new LogicalRepositories { - // the final tweetResultRepo has a circular dependency, where it depends on hydrators - // that in turn depend on the tweetResultRepo, so we create a `tweetResultRepo` function - // that proxies to `var finalTweetResultRepo`, which gets set at the end of this block. - var finalTweetResultRepo: TweetResultRepository.Type = null - val tweetResultRepo: TweetResultRepository.Type = - (tweetId, opts) => finalTweetResultRepo(tweetId, opts) - val tweetRepo: TweetRepository.Type = TweetRepository.fromTweetResult(tweetResultRepo) - - val optionalTweetRepo: TweetRepository.Optional = TweetRepository.optional(tweetRepo) - - val userRepo: UserRepository.Type = - repo2Config(repo = external.userRepo, name = "user") - .observe() - .toRepo2 - - val optionalUserRepo: UserRepository.Optional = UserRepository.optional(userRepo) - - private[this] val tweetVisibilityStatsReceiver: StatsReceiver = - repoStats.scope("tweet_visibility_library") - private[this] val userUnavailableVisibilityStatsReceiver: StatsReceiver = - repoStats.scope("user_unavailable_visibility_library") - private[this] val quotedTweetVisibilityStatsReceiver: StatsReceiver = - repoStats.scope("quoted_tweet_visibility_library") - private[this] val deletedTweetVisibilityStatsReceiver: StatsReceiver = - repoStats.scope("deleted_tweet_visibility_library") - // TweetVisibilityLibrary still uses the old c.t.logging.Logger - private[this] val tweetVisibilityLogger = - com.twitter.logging.Logger("com.twitter.tweetypie.TweetVisibility") - private[this] val visibilityDecider: Decider = DeciderUtil.mkDecider( - deciderOverlayPath = settings.vfDeciderOverlayFilename, - useLocalDeciderOverrides = true) - private[this] val visibilityDeciderGates = VisibilityDeciderGates(visibilityDecider) - - private[this] def visibilityLibrary(statsReceiver: StatsReceiver) = VisibilityLibrary - .Builder( - log = tweetVisibilityLogger, - statsReceiver = statsReceiver, - memoizeSafetyLevelParams = visibilityDeciderGates.enableMemoizeSafetyLevelParams - ) - .withDecider(visibilityDecider) - .withDefaultABDecider(isLocal = false) - .withCaptureDebugStats(Gate.True) - .withEnableComposableActions(Gate.True) - .withEnableFailClosed(Gate.True) - .withEnableShortCircuiting(visibilityDeciderGates.enableShortCircuitingTVL) - .withSpecialLogging(visibilityDeciderGates.enableSpecialLogging) - .build() - - def countryNameGenerator(statsReceiver: StatsReceiver) = { - // TweetVisibilityLibrary, DeletedTweetVisibilityLibrary, and - // UserUnavailableVisibilityLibrary do not evaluate any Rules - // that require the display of country names in copy - CountryNameGenerator.providesWithCustomMap(Map.empty, statsReceiver) - } - - def tombstoneGenerator( - countryNameGenerator: CountryNameGenerator, - statsReceiver: StatsReceiver - ) = - TombstoneGenerator( - visibilityLibrary(statsReceiver).visParams, - countryNameGenerator, - statsReceiver) - - private[this] val userUnavailableVisibilityLibrary = - UserUnavailableStateVisibilityLibrary( - visibilityLibrary(userUnavailableVisibilityStatsReceiver), - visibilityDecider, - tombstoneGenerator( - countryNameGenerator(userUnavailableVisibilityStatsReceiver), - userUnavailableVisibilityStatsReceiver - ), - LocalizedInterstitialGenerator(visibilityDecider, userUnavailableVisibilityStatsReceiver) - ) - - val userIdentityRepo: UserIdentityRepository.Type = - repoConfig(repo = UserIdentityRepository(userRepo), name = "user_identity") - .observe() - .toRepo - - val userProtectionRepo: UserProtectionRepository.Type = - repoConfig(repo = UserProtectionRepository(userRepo), name = "user_protection") - .observe() - .toRepo - - val userViewRepo: UserViewRepository.Type = - repoConfig(repo = UserViewRepository(userRepo), name = "user_view") - .observe() - .toRepo - - val userVisibilityRepo: UserVisibilityRepository.Type = - repoConfig( - repo = UserVisibilityRepository(userRepo, userUnavailableVisibilityLibrary), - name = "user_visibility" - ).observe().toRepo - - val urlRepo: UrlRepository.Type = - repoConfig(repo = external.urlRepo, name = "url") - .observe() - .toRepo - - val profileGeoRepo: ProfileGeoRepository.Type = - repoConfig(repo = external.profileGeoRepo, name = "profile_geo") - .observe() - .toRepo - - val quoterHasAlreadyQuotedRepo: QuoterHasAlreadyQuotedRepository.Type = - repo2Config(repo = external.quoterHasAlreadyQuotedRepo, name = "quoter_has_already_quoted") - .observe() - .toRepo2 - - val lastQuoteOfQuoterRepo: LastQuoteOfQuoterRepository.Type = - repo2Config(repo = external.lastQuoteOfQuoterRepo, name = "last_quote_of_quoter") - .observe() - .toRepo2 - - val mediaMetadataRepo: MediaMetadataRepository.Type = - repoConfig(repo = external.mediaMetadataRepo, name = "media_metadata") - .observe() - .toRepo - - val perspectiveRepo: PerspectiveRepository.Type = - repoConfig(repo = external.perspectiveRepo, name = "perspective") - .observe() - .toRepo - - val conversationMutedRepo: ConversationMutedRepository.Type = - TimelineService.GetPerspectives.getConversationMuted(perspectiveRepo) - - // Because observe is applied before caching, only cache misses - // (i.e. calls to the underlying repo) are observed. - // Note that `newCaching` has stats around cache hit/miss but `caching` does not. - val deviceSourceRepo: DeviceSourceRepository.Type = - repoConfig(repo = external.deviceSourceRepo, name = "device_source") - .observe() - .newCaching( - keySerializer = appIdStr => DeviceSourceKey(appIdStr).toString, - valueSerializer = ServoCachedValueSerializer( - codec = DeviceSource, - expiry = Expiry.byAge(settings.deviceSourceMemcacheTtl), - softTtl = settings.deviceSourceMemcacheSoftTtl - ) - ) - .caching( - cache = caches.deviceSourceInProcessCache, - partialHandler = softTtlPartialHandler(_ => settings.deviceSourceInProcessSoftTtl) - ) - .toRepo - - // Because observe is applied before caching, only cache misses - // (i.e. calls to the underlying repo) are observed - // Note that `newCaching` has stats around cache hit/miss but `caching` does not. - val placeRepo: PlaceRepository.Type = - repoConfig(repo = external.placeRepo, name = "place") - .observe() - .newCaching( - keySerializer = placeKey => placeKey.toString, - valueSerializer = ServoCachedValueSerializer( - codec = Place, - expiry = Expiry.byAge(settings.placeMemcacheTtl), - softTtl = settings.placeMemcacheSoftTtl - ) - ) - .toRepo - - val cardRepo: CardRepository.Type = - repoConfig(repo = external.cardRepo, name = "cards") - .observe() - .toRepo - - val card2Repo: Card2Repository.Type = - repo2Config(repo = external.card2Repo, name = "card2") - .observe() - .toRepo2 - - val cardUsersRepo: CardUsersRepository.Type = - repo2Config(repo = external.cardUsersRepo, name = "card_users") - .observe() - .toRepo2 - - val relationshipRepo: RelationshipRepository.Type = - repoConfig(repo = external.relationshipRepo, name = "relationship") - .observe() - .toRepo - - val conversationIdRepo: ConversationIdRepository.Type = - repoConfig(repo = external.conversationIdRepo, name = "conversation_id") - .observe() - .toRepo - - val conversationControlRepo: ConversationControlRepository.Type = - repo2Config( - repo = ConversationControlRepository(tweetRepo, stats.scope("conversation_control")), - name = "conversation_control" - ).observe().toRepo2 - - val containerAsGetTweetResultRepo: CreativesContainerMaterializationRepository.GetTweetType = - repo2Config( - repo = external.containerAsTweetRepo, - name = "container_as_tweet" - ).observe().toRepo2 - - val containerAsGetTweetFieldsResultRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType = - repo2Config( - repo = external.containerAsTweetFieldsRepo, - name = "container_as_tweet_fields" - ).observe().toRepo2 - - val languageRepo: LanguageRepository.Type = { - val pool = FuturePool(Executors.newFixedThreadPool(settings.numPenguinThreads)) - repoConfig(repo = PenguinLanguageRepository(pool), name = "language") - .observe() - .toRepo - } - - // Because observe is applied before caching, only cache misses - // (i.e. calls to the underlying repo) are observed - // Note that `newCaching` has stats around cache hit/miss but `caching` does not. - val tweetCountsRepo: TweetCountsRepository.Type = - repoConfig(repo = external.tweetCountsRepo, name = "counts") - .observe() - .caching( - cache = caches.tweetCountsCache, - partialHandler = softTtlPartialHandler { - case Some(0) => settings.tweetCountsMemcacheZeroSoftTtl - case _ => settings.tweetCountsMemcacheNonZeroSoftTtl - }, - maxCacheRequestSize = settings.tweetCountsCacheChunkSize - ) - .toRepo - - val pastedMediaRepo: PastedMediaRepository.Type = - repo2Config(repo = PastedMediaRepository(tweetRepo), name = "pasted_media") - .observe() - .toRepo2 - - val escherbirdAnnotationRepo: EscherbirdAnnotationRepository.Type = - repoConfig(repo = external.escherbirdAnnotationRepo, name = "escherbird_annotations") - .observe() - .toRepo - - val stratoSafetyLabelsRepo: StratoSafetyLabelsRepository.Type = - repo2Config(repo = external.stratoSafetyLabelsRepo, name = "strato_safety_labels") - .observe() - .toRepo2 - - val stratoCommunityMembershipRepo: StratoCommunityMembershipRepository.Type = - repoConfig( - repo = external.stratoCommunityMembershipRepo, - name = "strato_community_memberships") - .observe() - .toRepo - - val stratoCommunityAccessRepo: StratoCommunityAccessRepository.Type = - repoConfig(repo = external.stratoCommunityAccessRepo, name = "strato_community_access") - .observe() - .toRepo - - val stratoSuperFollowEligibleRepo: StratoSuperFollowEligibleRepository.Type = - repoConfig( - repo = external.stratoSuperFollowEligibleRepo, - name = "strato_super_follow_eligible") - .observe() - .toRepo - - val stratoSuperFollowRelationsRepo: StratoSuperFollowRelationsRepository.Type = - repo2Config( - repo = external.stratoSuperFollowRelationsRepo, - name = "strato_super_follow_relations") - .observe() - .toRepo2 - - val stratoPromotedTweetRepo: StratoPromotedTweetRepository.Type = - repoConfig(repo = external.stratoPromotedTweetRepo, name = "strato_promoted_tweet") - .observe() - .toRepo - - val stratoSubscriptionVerificationRepo: StratoSubscriptionVerificationRepository.Type = - repo2Config( - repo = external.stratoSubscriptionVerificationRepo, - name = "strato_subscription_verification") - .observe() - .toRepo2 - - val unmentionedEntitiesRepo: UnmentionedEntitiesRepository.Type = - repo2Config(repo = external.unmentionedEntitiesRepo, name = "unmentioned_entities") - .observe() - .toRepo2 - - private[this] val userSource = - UserSource.fromRepo( - Repo { (k, _) => - val opts = UserQueryOptions(k.fields, UserVisibility.All) - userRepo(UserKey(k.id), opts) - } - ) - - private[this] val userRelationshipSource = - UserRelationshipSource.fromRepo( - Repo[UserRelationshipSource.Key, Unit, Boolean] { (key, _) => - relationshipRepo( - RelationshipKey(key.subjectId, key.objectId, key.relationship) - ) - } - ) - - private[this] val tweetPerspectiveSource = - TweetPerspectiveSource.fromGetPerspectives(perspectiveRepo) - private[this] val tweetMediaMetadataSource = - TweetMediaMetadataSource.fromFunction(mediaMetadataRepo) - - val userIsInvitedToConversationRepo: UserIsInvitedToConversationRepository.Type = - repo2Config( - repo = external.userIsInvitedToConversationRepo, - name = "user_is_invited_to_conversation") - .observe() - .toRepo2 - - private[this] val stringCenterClient: MultiProjectStringCenter = { - val stringCenterProjects = settings.flags.stringCenterProjects().toList - - val languages: Languages = new YamlConfigLanguages( - new YamlConfig(settings.flags.languagesConfig())) - - val loggingAbDecider = ABDeciderFactory("/usr/local/config/abdecider/abdecider.yml") - .withEnvironment("production") - .buildWithLogging() - - MultiProjectStringCenter( - projects = stringCenterProjects, - defaultBundlePath = MultiProjectStringCenter.StandardDefaultBundlePath, - refreshingBundlePath = MultiProjectStringCenter.StandardRefreshingBundlePath, - refreshingInterval = MultiProjectStringCenter.StandardRefreshingInterval, - requireDefaultBundleExists = true, - languages = languages, - statsReceiver = tweetVisibilityStatsReceiver, - loggingABDecider = loggingAbDecider - ) - } - private[this] val stringRegistry: ExternalStringRegistry = new ExternalStringRegistry() - private[this] val localizationSource: LocalizationSource = - LocalizationSource.fromMultiProjectStringCenterClient(stringCenterClient, stringRegistry) - - val tweetVisibilityRepo: TweetVisibilityRepository.Type = { - val tweetVisibilityLibrary: TweetVisibilityLibrary.Type = - TweetVisibilityLibrary( - visibilityLibrary(tweetVisibilityStatsReceiver), - userSource = userSource, - userRelationshipSource = userRelationshipSource, - keywordMatcher = KeywordMatcher.defaultMatcher(stats), - stratoClient = stratoClient, - localizationSource = localizationSource, - decider = visibilityDecider, - invitedToConversationRepo = userIsInvitedToConversationRepo, - tweetPerspectiveSource = tweetPerspectiveSource, - tweetMediaMetadataSource = tweetMediaMetadataSource, - tombstoneGenerator = tombstoneGenerator( - countryNameGenerator(tweetVisibilityStatsReceiver), - tweetVisibilityStatsReceiver - ), - interstitialGenerator = - LocalizedInterstitialGenerator(visibilityDecider, tweetVisibilityStatsReceiver), - limitedActionsFeatureSwitches = - FeatureSwitchUtil.mkLimitedActionsFeatureSwitches(tweetVisibilityStatsReceiver), - enableParityTest = deciderGates.tweetVisibilityLibraryEnableParityTest - ) - - val underlying = - TweetVisibilityRepository( - tweetVisibilityLibrary, - visibilityDeciderGates, - tweetVisibilityLogger, - repoStats.scope("tweet_visibility_repo") - ) - - repoConfig(repo = underlying, name = "tweet_visibility") - .observe() - .toRepo - } - - val quotedTweetVisibilityRepo: QuotedTweetVisibilityRepository.Type = { - val quotedTweetVisibilityLibrary: QuotedTweetVisibilityLibrary.Type = - QuotedTweetVisibilityLibrary( - visibilityLibrary(quotedTweetVisibilityStatsReceiver), - userSource = userSource, - userRelationshipSource = userRelationshipSource, - visibilityDecider, - userStateVisibilityLibrary = userUnavailableVisibilityLibrary, - enableVfFeatureHydration = deciderGates.enableVfFeatureHydrationInQuotedTweetVLShim - ) - - val underlying = - QuotedTweetVisibilityRepository(quotedTweetVisibilityLibrary, visibilityDeciderGates) - - repoConfig(repo = underlying, name = "quoted_tweet_visibility") - .observe() - .toRepo - } - - val deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type = { - val deletedTweetVisibilityLibrary: DeletedTweetVisibilityLibrary.Type = - DeletedTweetVisibilityLibrary( - visibilityLibrary(deletedTweetVisibilityStatsReceiver), - visibilityDecider, - tombstoneGenerator( - countryNameGenerator(deletedTweetVisibilityStatsReceiver), - deletedTweetVisibilityStatsReceiver - ) - ) - - val underlying = DeletedTweetVisibilityRepository.apply( - deletedTweetVisibilityLibrary - ) - - repoConfig(repo = underlying, name = "deleted_tweet_visibility") - .observe() - .toRepo - } - - val takedownRepo: UserTakedownRepository.Type = - repoConfig(repo = UserTakedownRepository(userRepo), name = "takedowns") - .observe() - .toRepo - - val tweetSpamCheckRepo: TweetSpamCheckRepository.Type = - repo2Config(repo = external.tweetSpamCheckRepo, name = "tweet_spam_check") - .observe() - .toRepo2 - - val retweetSpamCheckRepo: RetweetSpamCheckRepository.Type = - repoConfig(repo = external.retweetSpamCheckRepo, name = "retweet_spam_check") - .observe() - .toRepo - - // Because observe is applied before caching, only cache misses - // (i.e. calls to the underlying repo) are observed - // Note that `newCaching` has stats around cache hit/miss but `caching` does not. - val geoScrubTimestampRepo: GeoScrubTimestampRepository.Type = - repoConfig(repo = external.geoScrubTimestampRepo, name = "geo_scrub") - .observe() - .caching( - cache = caches.geoScrubCache, - partialHandler = (_ => None) - ) - .toRepo - - val tweetHydrators: TweetHydrators = - TweetHydrators( - stats = stats, - deciderGates = deciderGates, - repos = this, - tweetDataCache = caches.tweetDataCache, - hasMedia = hasMedia, - featureSwitchesWithoutExperiments = featureSwitchesWithoutExperiments, - clientIdHelper = clientIdHelper, - ) - - val queryOptionsExpander: TweetQueryOptionsExpander.Type = - TweetQueryOptionsExpander.threadLocalMemoize( - TweetQueryOptionsExpander.expandDependencies - ) - - // mutations to tweets that we only need to apply when reading from the external - // repository, and not when reading from cache - val tweetMutation: Mutation[Tweet] = - Mutation - .all( - Seq( - EntityExtractor.mutationAll, - TextRepairer.BlankLineCollapser, - TextRepairer.CoreTextBugPatcher - ) - ).onlyIf(_.coreData.isDefined) - - val cachingTweetRepo: TweetResultRepository.Type = - repo2Config(repo = external.tweetResultRepo, name = "saved_tweet") - .observe() - .withMiddleware { repo => - // applies tweetMutation to the results of TweetResultRepository - val mutateResult = TweetResult.mutate(tweetMutation) - repo.andThen(stitchResult => stitchResult.map(mutateResult)) - } - .withMiddleware( - tupledMiddleware( - CachingTweetRepository( - caches.tweetResultCache, - settings.tweetTombstoneTtl, - stats.scope("saved_tweet", "cache"), - clientIdHelper, - deciderGates.logCacheExceptions, - ) - ) - ) - .toRepo2 - - finalTweetResultRepo = repo2Config(repo = cachingTweetRepo, name = "tweet") - .withMiddleware( - tupledMiddleware( - TweetHydration.hydrateRepo( - tweetHydrators.hydrator, - tweetHydrators.cacheChangesEffect, - queryOptionsExpander - ) - ) - ) - .observe() - .withMiddleware(tupledMiddleware(TweetResultRepository.shortCircuitInvalidIds)) - .toRepo2 - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Main.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Main.docx new file mode 100644 index 000000000..8f6153c23 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Main.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Main.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Main.scala deleted file mode 100644 index 22623039b..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Main.scala +++ /dev/null @@ -1,314 +0,0 @@ -package com.twitter.tweetypie -package config - -import com.twitter.app.Flag -import com.twitter.app.Flaggable -import com.twitter.app.Flags -import com.twitter.finagle.http.HttpMuxer -import com.twitter.finagle.mtls.authentication.ServiceIdentifier -import com.twitter.finagle.mtls.authorization.server.MtlsServerSessionTrackerFilter -import com.twitter.finagle.mtls.server.MtlsStackServer._ -import com.twitter.finagle.param.Reporter -import com.twitter.finagle.ssl.OpportunisticTls -import com.twitter.finagle.util.NullReporterFactory -import com.twitter.finagle.Thrift -import com.twitter.finagle.ThriftMux -import com.twitter.flockdb.client.thriftscala.Priority -import com.twitter.inject.Injector -import com.twitter.inject.annotations.{Flags => InjectFlags} -import com.twitter.scrooge.ThriftEnum -import com.twitter.scrooge.ThriftEnumObject -import com.twitter.server.handler.IndexHandler -import com.twitter.strato.catalog.Catalog -import com.twitter.strato.fed.StratoFed -import com.twitter.strato.fed.server.StratoFedServer -import com.twitter.strato.util.Ref -import com.twitter.strato.warmup.Warmer -import com.twitter.tweetypie.federated.StratoCatalogBuilder -import com.twitter.tweetypie.federated.warmups.StratoCatalogWarmups -import com.twitter.tweetypie.serverutil.ActivityService -import java.net.InetSocketAddress -import scala.reflect.ClassTag - -object Env extends Enumeration { - val dev: Env.Value = Value - val staging: Env.Value = Value - val prod: Env.Value = Value -} - -class TweetServiceFlags(flag: Flags, injector: => Injector) { - implicit object EnvFlaggable extends Flaggable[Env.Value] { - def parse(s: String): Env.Value = - s match { - // Handle Aurora env names that are different from tweetypie's names - case "devel" => Env.dev - case "test" => Env.staging - // Handle Tweetypie env names - case other => Env.withName(other) - } - } - - val zone: Flag[String] = - flag("zone", "localhost", "One of: atla, pdxa, localhost, etc.") - - val env: Flag[Env.Value] = - flag("env", Env.dev, "One of: testbox, dev, staging, prod") - - val twemcacheDest: Flag[String] = - flag( - "twemcacheDest", - "/s/cache/tweetypie:twemcaches", - "The Name for the tweetypie cache cluster." - ) - - val deciderOverrides: Flag[Map[String, Boolean]] = - flag( - "deciderOverrides", - Map.empty[String, Boolean], - "Set deciders to constant values, overriding decider configuration files." - )( - // Unfortunately, the implicit Flaggable[Boolean] has a default - // value and Flaggable.ofMap[K, V] requires that the implicit - // Flaggable[V] not have a default. Even less fortunately, it - // doesn't say why. We're stuck with this. - Flaggable.ofMap(implicitly, Flaggable.mandatory(_.toBoolean)) - ) - - // "/decider.yml" comes from the resources included at - // "tweetypie/server/config", so you should not normally need to - // override this value. This flag is defined as a step toward making - // our command-line usage more similar to the standard - // twitter-server-internal flags. - def deciderBase(): String = - injector.instance[String](InjectFlags.named("decider.base")) - - // Omitting a value for decider overlay flag causes the server to use - // only the static decider. - def deciderOverlay(): String = - injector.instance[String](InjectFlags.named("decider.overlay")) - - // Omitting a value for the VF decider overlay flag causes the server - // to use only the static decider. - val vfDeciderOverlay: Flag[String] = - flag( - "vf.decider.overlay", - "The location of the overlay decider configuration for Visibility Filtering") - - /** - * Warmup Requests happen as part of the initialization process, before any real requests are - * processed. This prevents real requests from ever being served from a competely cold state - */ - val enableWarmupRequests: Flag[Boolean] = - flag( - "enableWarmupRequests", - true, - """| warms up Tweetypie service by generating random requests - | to Tweetypie that are processed prior to the actual client requests """.stripMargin - ) - - val grayListRateLimit: Flag[Double] = - flag("graylistRateLimit", 5.0, "rate-limit for non-allowlisted clients") - - val servicePort: Flag[InetSocketAddress] = - flag("service.port", "port for tweet-service thrift interface") - - val clientId: Flag[String] = - flag("clientId", "tweetypie.staging", "clientId to send in requests") - - val allowlist: Flag[Boolean] = - flag("allowlist", true, "enforce client allowlist") - - val clientHostStats: Flag[Boolean] = - flag("clientHostStats", false, "enable per client host stats") - - val withCache: Flag[Boolean] = - flag("withCache", true, "if set to false, Tweetypie will launch without memcache") - - /** - * Make any [[ThriftEnum]] value parseable as a [[Flag]] value. This - * will parse case-insensitive values that match the unqualified - * names of the values of the enumeration, in the manner of - * [[ThriftEnum]]'s `valueOf` method. - * - * Consider a [[ThriftEnum]] generated from the following Thrift IDL snippet: - * - * {{{ - * enum Priority { - * Low = 1 - * Throttled = 2 - * High = 3 - * } - * }}} - * - * To enable defining flags that specify one of these enum values: - * - * {{{ - * implicit val flaggablePriority: Flaggable[Priority] = flaggableThriftEnum(Priority) - * }}} - * - * In this example, the enumeration value `Priority.Low` can be - * represented as the string "Low", "low", or "LOW". - */ - def flaggableThriftEnum[T <: ThriftEnum: ClassTag](enum: ThriftEnumObject[T]): Flaggable[T] = - Flaggable.mandatory[T] { stringValue: String => - enum - .valueOf(stringValue) - .getOrElse { - val validValues = enum.list.map(_.name).mkString(", ") - throw new IllegalArgumentException( - s"Invalid value ${stringValue}. Valid values include: ${validValues}" - ) - } - } - - implicit val flaggablePriority: Flaggable[Priority] = flaggableThriftEnum(Priority) - - val backgroundIndexingPriority: Flag[Priority] = - flag( - "backgroundIndexingPriority", - Priority.Low, - "specifies the queue to use for \"background\" tflock operations, such as removing edges " + - "for deleted Tweets. This exists for testing scenarios, when it is useful to see the " + - "effects of background indexing operations sooner. In production, this should always be " + - "set to \"low\" (the default)." - ) - - val tflockPageSize: Flag[Int] = - flag("tflockPageSize", 1000, "Number of items to return in each page when querying tflock") - - val enableInProcessCache: Flag[Boolean] = - flag( - "enableInProcessCache", - true, - "if set to false, Tweetypie will not use the in-process cache" - ) - - val inProcessCacheSize: Flag[Int] = - flag("inProcessCacheSize", 1700, "maximum items in in-process cache") - - val inProcessCacheTtlMs: Flag[Int] = - flag("inProcessCacheTtlMs", 10000, "milliseconds that hot keys are stored in memory") - - val memcachePendingRequestLimit: Flag[Int] = - flag( - "memcachePendingRequestLimit", - 100, - "Number of requests that can be queued on a single memcache connection (4 per cache server)" - ) - - val instanceId: Flag[Int] = - flag( - "configbus.instanceId", - -1, - "InstanceId of the tweetypie service instance for staged configuration distribution" - ) - - val instanceCount: Flag[Int] = - flag( - "configbus.instanceCount", - -1, - "Total number of tweetypie service instances for staged configuration distribution" - ) - - def serviceIdentifier(): ServiceIdentifier = - injector.instance[ServiceIdentifier] - - val enableReplication: Flag[Boolean] = - flag( - "enableReplication", - true, - "Enable replication of reads (configurable via tweetypie_replicate_reads decider) and writes (100%) via DRPC" - ) - - val simulateDeferredrpcCallbacks: Flag[Boolean] = - flag( - "simulateDeferredrpcCallbacks", - false, - """|For async write path, call back into current instance instead of via DRPC. - |This is used for test and devel instances so we can ensure the test traffic - |is going to the test instance.""".stripMargin - ) - - val shortCircuitLikelyPartialTweetReadsMs: Flag[Int] = - flag( - "shortCircuitLikelyPartialTweetReadsMs", - 1500, - """|Specifies a number of milliseconds before which we will short-circuit likely - |partial reads from MH and return a NotFound tweet response state. After - |experimenting we went with 1500 ms.""".stripMargin - ) - - val stringCenterProjects: Flag[Seq[String]] = - flag( - "stringcenter.projects", - Seq.empty[String], - "String Center project names, comma separated")(Flaggable.ofSeq(Flaggable.ofString)) - - val languagesConfig: Flag[String] = - flag("international.languages", "Supported languages config file") -} - -class TweetypieMain extends StratoFedServer { - override def dest: String = "/s/tweetypie/tweetypie:federated" - - val tweetServiceFlags: TweetServiceFlags = new TweetServiceFlags(flag, injector) - - // display all the registered HttpMuxer handlers - HttpMuxer.addHandler("", new IndexHandler) - - private[this] lazy val serverBuilder = { - val settings = new TweetServiceSettings(tweetServiceFlags) - val serverBuilder = new TweetServerBuilder(settings) - - val mtlsSessionTrackerFilter = - new MtlsServerSessionTrackerFilter[Array[Byte], Array[Byte]](statsReceiver) - - val mtlsTrackedService = mtlsSessionTrackerFilter.andThen(ActivityService(serverBuilder.build)) - - val thriftMuxServer = ThriftMux.server - // by default, finagle logs exceptions to chickadee, which is deprecated and - // basically unused. to avoid wasted overhead, we explicitly disable the reporter. - .configured(Reporter(NullReporterFactory)) - .withLabel("tweetypie") - .withMutualTls(tweetServiceFlags.serviceIdentifier()) - .withOpportunisticTls(OpportunisticTls.Required) - .configured(Thrift.param.ServiceClass(Some(classOf[ThriftTweetService]))) - .serve(tweetServiceFlags.servicePort(), mtlsTrackedService) - - closeOnExit(thriftMuxServer) - await(thriftMuxServer) - - serverBuilder - } - - override def configureRefCatalog( - catalog: Ref[Catalog[StratoFed.Column]] - ): Ref[Catalog[StratoFed.Column]] = - catalog - .join { - Ref( - serverBuilder.stratoTweetService.flatMap { tweetService => - StratoCatalogBuilder.catalog( - tweetService, - serverBuilder.backendClients.stratoserverClient, - serverBuilder.backendClients.gizmoduck.getById, - serverBuilder.backendClients.callbackPromotedContentLogger, - statsReceiver, - serverBuilder.deciderGates.enableCommunityTweetCreates, - ) - } - ) - } - .map { case (l, r) => l ++ r } - - override def configureWarmer(warmer: Warmer): Unit = { - new TweetServiceSettings(tweetServiceFlags).warmupRequestsSettings.foreach { warmupSettings => - warmer.add( - "tweetypie strato catalog", - () => StratoCatalogWarmups.warmup(warmupSettings, composedOps) - ) - } - } -} - -object Main extends TweetypieMain diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/MemcacheExceptionLoggingFilter.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/MemcacheExceptionLoggingFilter.docx new file mode 100644 index 000000000..e9505460d Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/MemcacheExceptionLoggingFilter.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/MemcacheExceptionLoggingFilter.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/MemcacheExceptionLoggingFilter.scala deleted file mode 100644 index 04746792b..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/MemcacheExceptionLoggingFilter.scala +++ /dev/null @@ -1,62 +0,0 @@ -package com.twitter.tweetypie -package config - -import com.twitter.io.Buf -import com.twitter.finagle.{Service, SimpleFilter} -import com.twitter.finagle.memcached.protocol._ - -class MemcacheExceptionLoggingFilter extends SimpleFilter[Command, Response] { - // Using a custom logger name so that we can target logging rules specifically - // for memcache excpetion logging. - val logger: Logger = Logger(getClass) - - def apply(command: Command, service: Service[Command, Response]): Future[Response] = { - service(command).respond { - case Return(Error(e)) => - log(command, e) - case Return(ValuesAndErrors(_, errors)) if errors.nonEmpty => - errors.foreach { - case (Buf.Utf8(keyStr), e) => - log(command.name, keyStr, e) - } - case Throw(e) => - log(command, e) - - case _ => - } - } - - private def log(command: Command, e: Throwable): Unit = { - log(command.name, getKey(command), e) - } - - private def log(commandName: String, keyStr: String, e: Throwable): Unit = { - logger.debug( - s"CACHE_EXCEPTION command: ${commandName} key: ${keyStr} exception: ${e.getClass.getName}", - e, - ) - } - - private def getKey(command: Command): String = command match { - case Get(keys) => toKeyStr(keys) - case Gets(keys) => toKeyStr(keys) - - case Set(Buf.Utf8(key), _, _, _) => key - case Add(Buf.Utf8(key), _, _, _) => key - case Cas(Buf.Utf8(key), _, _, _, _) => key - case Delete(Buf.Utf8(key)) => key - case Replace(Buf.Utf8(key), _, _, _) => key - case Append(Buf.Utf8(key), _, _, _) => key - case Prepend(Buf.Utf8(key), _, _, _) => key - - case Incr(Buf.Utf8(key), _) => key - case Decr(Buf.Utf8(key), _) => key - case Stats(keys) => toKeyStr(keys) - case Quit() => "quit" - case Upsert(Buf.Utf8(key), _, _, _, _) => key - case Getv(keys) => toKeyStr(keys) - } - - private def toKeyStr(keys: Seq[Buf]): String = - keys.map { case Buf.Utf8(key) => key }.mkString(",") -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Resources.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Resources.docx new file mode 100644 index 000000000..22acf1f49 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Resources.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Resources.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Resources.scala deleted file mode 100644 index d1f4721dc..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Resources.scala +++ /dev/null @@ -1,15 +0,0 @@ -package com.twitter.tweetypie.config - -import com.twitter.config.yaml.YamlMap -import com.twitter.tweetypie.serverutil.PartnerMedia -import scala.util.matching.Regex - -/** - * Helpers for loading resources bundled with Tweetypie. We load them - * through this API in order to be able to unit test the resource - * loading code. - */ -object Resources { - def loadPartnerMediaRegexes(): Seq[Regex] = - PartnerMedia.load(YamlMap.load("/partner_media.yml")) -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ScribeTweetCacheWrites.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ScribeTweetCacheWrites.docx new file mode 100644 index 000000000..8f322bcad Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ScribeTweetCacheWrites.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ScribeTweetCacheWrites.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ScribeTweetCacheWrites.scala deleted file mode 100644 index 523c9be1c..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ScribeTweetCacheWrites.scala +++ /dev/null @@ -1,102 +0,0 @@ -package com.twitter.tweetypie.config - -import com.twitter.servo.cache.{Cache, Cached, CachedValue, CachedValueStatus} -import com.twitter.servo.util.Scribe -import com.twitter.tweetypie.TweetId -import com.twitter.tweetypie.repository.TweetKey -import com.twitter.tweetypie.serverutil.logcachewrites.WriteLoggingCache -import com.twitter.snowflake.id.SnowflakeId -import com.twitter.tweetypie.thriftscala.{CachedTweet, ComposerSource, TweetCacheWrite} -import com.twitter.util.Time - -class ScribeTweetCacheWrites( - val underlyingCache: Cache[TweetKey, Cached[CachedTweet]], - logYoungTweetCacheWrites: TweetId => Boolean, - logTweetCacheWrites: TweetId => Boolean) - extends WriteLoggingCache[TweetKey, Cached[CachedTweet]] { - private[this] lazy val scribe = Scribe(TweetCacheWrite, "tweetypie_tweet_cache_writes") - - private[this] def mkTweetCacheWrite( - id: Long, - action: String, - cachedValue: CachedValue, - cachedTweet: Option[CachedTweet] = None - ): TweetCacheWrite = { - /* - * If the Tweet id is a Snowflake id, calculate the offset since Tweet creation. - * If it is not a Snowflake id, then the offset should be 0. See [[TweetCacheWrite]]'s Thrift - * documentation for more details. - */ - val timestampOffset = - if (SnowflakeId.isSnowflakeId(id)) { - SnowflakeId(id).unixTimeMillis.asLong - } else { - 0 - } - - TweetCacheWrite( - tweetId = id, - timestamp = Time.now.inMilliseconds - timestampOffset, - action = action, - cachedValue = cachedValue, - cachedTweet = cachedTweet - ) - } - - /** - * Scribe a TweetCacheWrite record to tweetypie_tweet_cache_writes. We scribe the - * messages instead of writing them to the regular log file because the - * primary use of this logging is to get a record over time of the cache - * actions that affected a tweet, so we need a durable log that we can - * aggregate. - */ - override def log(action: String, k: TweetKey, v: Option[Cached[CachedTweet]]): Unit = - v match { - case Some(cachedTweet) => { - val cachedValue = CachedValue( - status = cachedTweet.status, - cachedAtMsec = cachedTweet.cachedAt.inMilliseconds, - readThroughAtMsec = cachedTweet.readThroughAt.map(_.inMilliseconds), - writtenThroughAtMsec = cachedTweet.writtenThroughAt.map(_.inMilliseconds), - doNotCacheUntilMsec = cachedTweet.doNotCacheUntil.map(_.inMilliseconds), - ) - scribe(mkTweetCacheWrite(k.id, action, cachedValue, cachedTweet.value)) - } - // `v` is only None if the action is a "delete" so set CachedValue with a status `Deleted` - case None => { - val cachedValue = - CachedValue(status = CachedValueStatus.Deleted, cachedAtMsec = Time.now.inMilliseconds) - scribe(mkTweetCacheWrite(k.id, action, cachedValue)) - } - } - - private[this] val YoungTweetThresholdMs = 3600 * 1000 - - private[this] def isYoungTweet(tweetId: TweetId): Boolean = - (SnowflakeId.isSnowflakeId(tweetId) && - ((Time.now.inMilliseconds - SnowflakeId(tweetId).unixTimeMillis.asLong) <= - YoungTweetThresholdMs)) - - /** - * Select all tweets for which the log_tweet_cache_writes decider returns - * true and "young" tweets for which the log_young_tweet_cache_writes decider - * returns true. - */ - override def selectKey(k: TweetKey): Boolean = - // When the tweet is young, we log it if it passes either decider. This is - // because the deciders will (by design) select a different subset of - // tweets. We do this so that we have a full record for all tweets for which - // log_tweet_cache_writes is on, but also cast a wider net for tweets that - // are more likely to be affected by replication lag, race conditions - // between different writes, or other consistency issues - logTweetCacheWrites(k.id) || (isYoungTweet(k.id) && logYoungTweetCacheWrites(k.id)) - - /** - * Log newscamera tweets as well as any tweets for which selectKey returns - * true. Note that for newscamera tweets, we will possibly miss "delete" - * actions since those do not have access to the value, and so do not call - * this method. - */ - override def select(k: TweetKey, v: Cached[CachedTweet]): Boolean = - v.value.exists(_.tweet.composerSource.contains(ComposerSource.Camera)) || selectKey(k) -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetBuilders.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetBuilders.docx new file mode 100644 index 000000000..17ed3c4fe Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetBuilders.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetBuilders.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetBuilders.scala deleted file mode 100644 index eafd02eaa..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetBuilders.scala +++ /dev/null @@ -1,300 +0,0 @@ -package com.twitter.tweetypie -package config - -import com.twitter.featureswitches.v2.FeatureSwitches -import com.twitter.stitch.repo.Repo -import com.twitter.tweetypie.backends.LimiterService.Feature -import com.twitter.tweetypie.handler._ -import com.twitter.tweetypie.jiminy.tweetypie.NudgeBuilder -import com.twitter.tweetypie.repository.RelationshipKey -import com.twitter.tweetypie.store.TotalTweetStore -import com.twitter.tweetypie.thriftscala._ -import com.twitter.tweetypie.tweettext.TweetText -import com.twitter.visibility.common.TrustedFriendsSource -import com.twitter.visibility.common.UserRelationshipSource -import com.twitter.visibility.writer.interfaces.tweets.TweetWriteEnforcementLibrary - -trait TweetBuilders { - val retweetBuilder: RetweetBuilder.Type - val tweetBuilder: TweetBuilder.Type -} - -object TweetBuilders { - - def validateCardRefAttachmentByUserAgentGate( - android: Gate[Unit], - nonAndroid: Gate[Unit] - ): Gate[Option[String]] = - Gate[Option[String]] { (userAgent: Option[String]) => - if (userAgent.exists(_.startsWith("TwitterAndroid"))) { - android() - } else { - nonAndroid() - } - } - - def apply( - settings: TweetServiceSettings, - statsReceiver: StatsReceiver, - deciderGates: TweetypieDeciderGates, - featureSwitchesWithExperiments: FeatureSwitches, - clients: BackendClients, - caches: Caches, - repos: LogicalRepositories, - tweetStore: TotalTweetStore, - hasMedia: Tweet => Boolean, - unretweetEdits: TweetDeletePathHandler.UnretweetEdits, - ): TweetBuilders = { - val urlShortener = - UrlShortener.scribeMalware(clients.guano) { - UrlShortener.fromTalon(clients.talon.shorten) - } - - val urlEntityBuilder = UrlEntityBuilder.fromShortener(urlShortener) - - val geoBuilder = - GeoBuilder( - repos.placeRepo, - ReverseGeocoder.fromGeoduck(clients.geoduckGeohashLocate), - statsReceiver.scope("geo_builder") - ) - - val replyCardUsersFinder: CardUsersFinder.Type = CardUsersFinder(repos.cardUsersRepo) - - val selfThreadBuilder = SelfThreadBuilder(statsReceiver.scope("self_thread_builder")) - - val replyBuilder = - ReplyBuilder( - repos.userIdentityRepo, - repos.optionalTweetRepo, - replyCardUsersFinder, - selfThreadBuilder, - repos.relationshipRepo, - repos.unmentionedEntitiesRepo, - deciderGates.enableRemoveUnmentionedImplicitMentions, - statsReceiver.scope("reply_builder"), - TweetText.MaxMentions - ) - - val mediaBuilder = - MediaBuilder( - clients.mediaClient.processMedia, - CreateMediaTco(urlShortener), - statsReceiver.scope("media_builder") - ) - - val validateAttachments = - AttachmentBuilder.validateAttachments( - statsReceiver, - validateCardRefAttachmentByUserAgentGate( - android = deciderGates.validateCardRefAttachmentAndroid, - nonAndroid = deciderGates.validateCardRefAttachmentNonAndroid - ) - ) - - val attachmentBuilder = - AttachmentBuilder( - repos.optionalTweetRepo, - urlShortener, - validateAttachments, - statsReceiver.scope("attachment_builder"), - deciderGates.denyNonTweetPermalinks - ) - - val validatePostTweetRequest: FutureEffect[PostTweetRequest] = - TweetBuilder.validateAdditionalFields[PostTweetRequest] - - val validateRetweetRequest = - TweetBuilder.validateAdditionalFields[RetweetRequest] - - val tweetIdGenerator = - () => clients.snowflakeClient.get() - - val retweetSpamChecker = - Spam.gated(deciderGates.checkSpamOnRetweet) { - Spam.allowOnException( - ScarecrowRetweetSpamChecker( - statsReceiver.scope("retweet_builder").scope("spam"), - repos.retweetSpamCheckRepo - ) - ) - } - - val tweetSpamChecker = - Spam.gated(deciderGates.checkSpamOnTweet) { - Spam.allowOnException( - ScarecrowTweetSpamChecker.fromSpamCheckRepository( - statsReceiver.scope("tweet_builder").scope("spam"), - repos.tweetSpamCheckRepo - ) - ) - } - - val duplicateTweetFinder = - DuplicateTweetFinder( - settings = settings.duplicateTweetFinderSettings, - tweetSource = DuplicateTweetFinder.TweetSource.fromServices( - tweetRepo = repos.optionalTweetRepo, - getStatusTimeline = clients.timelineService.getStatusTimeline - ) - ) - - val validateUpdateRateLimit = - RateLimitChecker.validate( - clients.limiterService.hasRemaining(Feature.Updates), - statsReceiver.scope("rate_limits", Feature.Updates.name), - deciderGates.rateLimitByLimiterService - ) - - val tweetBuilderStats = statsReceiver.scope("tweet_builder") - - val updateUserCounts = - TweetBuilder.updateUserCounts(hasMedia) - - val filterInvalidData = - TweetBuilder.filterInvalidData( - validateTweetMediaTags = TweetBuilder.validateTweetMediaTags( - tweetBuilderStats.scope("media_tags_filter"), - RateLimitChecker.getMaxMediaTags( - clients.limiterService.minRemaining(Feature.MediaTagCreate), - TweetBuilder.MaxMediaTagCount - ), - repos.optionalUserRepo - ), - cardReferenceBuilder = TweetBuilder.cardReferenceBuilder( - CardReferenceValidationHandler(clients.expandodo.checkAttachmentEligibility), - urlShortener - ) - ) - - val rateLimitFailures = - PostTweet.RateLimitFailures( - validateLimit = RateLimitChecker.validate( - clients.limiterService.hasRemaining(Feature.TweetCreateFailure), - statsReceiver.scope("rate_limits", Feature.TweetCreateFailure.name), - deciderGates.rateLimitTweetCreationFailure - ), - clients.limiterService.incrementByOne(Feature.Updates), - clients.limiterService.incrementByOne(Feature.TweetCreateFailure) - ) - - val countFailures = - PostTweet.CountFailures[TweetBuilderResult](statsReceiver) - - val tweetBuilderFilter: PostTweet.Filter[TweetBuilderResult] = - rateLimitFailures.andThen(countFailures) - - val conversationControlBuilder = ConversationControlBuilder.fromUserIdentityRepo( - statsReceiver = statsReceiver.scope("conversation_control_builder"), - userIdentityRepo = repos.userIdentityRepo - ) - - val conversationControlValidator = ConversationControlBuilder.Validate( - useFeatureSwitchResults = deciderGates.useConversationControlFeatureSwitchResults, - statsReceiver = statsReceiver - ) - - val communitiesValidator: CommunitiesValidator.Type = CommunitiesValidator() - - val collabControlBuilder: CollabControlBuilder.Type = CollabControlBuilder() - - val userRelationshipSource = UserRelationshipSource.fromRepo( - Repo[UserRelationshipSource.Key, Unit, Boolean] { (key, _) => - repos.relationshipRepo( - RelationshipKey(key.subjectId, key.objectId, key.relationship) - ) - } - ) - - val trustedFriendsSource = - TrustedFriendsSource.fromStrato(clients.stratoserverClient, statsReceiver) - - val validateTweetWrite = TweetWriteValidator( - convoCtlRepo = repos.conversationControlRepo, - tweetWriteEnforcementLibrary = TweetWriteEnforcementLibrary( - userRelationshipSource, - trustedFriendsSource, - repos.userIsInvitedToConversationRepo, - repos.stratoSuperFollowEligibleRepo, - repos.tweetRepo, - statsReceiver.scope("tweet_write_enforcement_library") - ), - enableExclusiveTweetControlValidation = deciderGates.enableExclusiveTweetControlValidation, - enableTrustedFriendsControlValidation = deciderGates.enableTrustedFriendsControlValidation, - enableStaleTweetValidation = deciderGates.enableStaleTweetValidation - ) - - val nudgeBuilder = NudgeBuilder( - clients.stratoserverClient, - deciderGates.jiminyDarkRequests, - statsReceiver.scope("nudge_builder") - ) - - val editControlBuilder = EditControlBuilder( - tweetRepo = repos.tweetRepo, - card2Repo = repos.card2Repo, - promotedTweetRepo = repos.stratoPromotedTweetRepo, - subscriptionVerificationRepo = repos.stratoSubscriptionVerificationRepo, - disablePromotedTweetEdit = deciderGates.disablePromotedTweetEdit, - checkTwitterBlueSubscription = deciderGates.checkTwitterBlueSubscriptionForEdit, - setEditWindowToSixtyMinutes = deciderGates.setEditTimeWindowToSixtyMinutes, - stats = statsReceiver, - ) - - val validateEdit = EditValidator(repos.optionalTweetRepo) - - // TweetBuilders builds two distinct TweetBuilders (Tweet and Retweet builders). - new TweetBuilders { - val tweetBuilder: TweetBuilder.Type = - tweetBuilderFilter[PostTweetRequest]( - TweetBuilder( - stats = tweetBuilderStats, - validateRequest = validatePostTweetRequest, - validateEdit = validateEdit, - validateUpdateRateLimit = validateUpdateRateLimit, - tweetIdGenerator = tweetIdGenerator, - userRepo = repos.userRepo, - deviceSourceRepo = repos.deviceSourceRepo, - communityMembershipRepo = repos.stratoCommunityMembershipRepo, - communityAccessRepo = repos.stratoCommunityAccessRepo, - urlShortener = urlShortener, - urlEntityBuilder = urlEntityBuilder, - geoBuilder = geoBuilder, - replyBuilder = replyBuilder, - mediaBuilder = mediaBuilder, - attachmentBuilder = attachmentBuilder, - duplicateTweetFinder = duplicateTweetFinder, - spamChecker = tweetSpamChecker, - filterInvalidData = filterInvalidData, - updateUserCounts = updateUserCounts, - validateConversationControl = conversationControlValidator, - conversationControlBuilder = conversationControlBuilder, - validateTweetWrite = validateTweetWrite, - nudgeBuilder = nudgeBuilder, - communitiesValidator = communitiesValidator, - collabControlBuilder = collabControlBuilder, - editControlBuilder = editControlBuilder, - featureSwitches = featureSwitchesWithExperiments, - ) - ) - - val retweetBuilder: RetweetBuilder.Type = - tweetBuilderFilter[RetweetRequest]( - RetweetBuilder( - validateRequest = validateRetweetRequest, - tweetIdGenerator = tweetIdGenerator, - tweetRepo = repos.tweetRepo, - userRepo = repos.userRepo, - tflock = clients.tflockWriteClient, - deviceSourceRepo = repos.deviceSourceRepo, - validateUpdateRateLimit = validateUpdateRateLimit, - spamChecker = retweetSpamChecker, - updateUserCounts = updateUserCounts, - superFollowRelationsRepo = repos.stratoSuperFollowRelationsRepo, - unretweetEdits = unretweetEdits, - setEditWindowToSixtyMinutes = deciderGates.setEditTimeWindowToSixtyMinutes - ) - ) - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetHydrators.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetHydrators.docx new file mode 100644 index 000000000..713851142 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetHydrators.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetHydrators.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetHydrators.scala deleted file mode 100644 index af71bf89d..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetHydrators.scala +++ /dev/null @@ -1,341 +0,0 @@ -package com.twitter.tweetypie -package config - -import com.twitter.featureswitches.v2.FeatureSwitches -import com.twitter.servo.cache.Cached -import com.twitter.servo.cache.LockingCache -import com.twitter.servo.util.ExceptionCategorizer -import com.twitter.servo.util.ExceptionCounter -import com.twitter.servo.util.FutureEffect -import com.twitter.servo.util.Scribe -import com.twitter.stitch.NotFound -import com.twitter.tweetypie.core.FilteredState -import com.twitter.tweetypie.core.TweetData -import com.twitter.tweetypie.core.ValueState -import com.twitter.tweetypie.hydrator._ -import com.twitter.tweetypie.repository.TweetQuery -import com.twitter.tweetypie.serverutil.{ExceptionCounter => TpExceptionCounter} -import com.twitter.tweetypie.thriftscala._ -import com.twitter.tweetypie.client_id.ClientIdHelper - -trait TweetHydrators { - - /** - * Hydrator that has all the Tweet hydrators (entire "pipeline") configured - * and wired up. - * This hydrator is used both on the read and write path and is - * customized by different TweetQuery.Options. - * Modifications are not automatically written back to cache. - * `cacheChanges` must be used for that. - */ - def hydrator: TweetDataValueHydrator - - /** - * The `Effect` to use to write modified tweets back to cache. - */ - def cacheChangesEffect: Effect[ValueState[TweetData]] -} - -object TweetHydrators { - - /** - * Creates all the hydrators and calls TweetHydration to wire them up. - */ - def apply( - stats: StatsReceiver, - deciderGates: TweetypieDeciderGates, - repos: LogicalRepositories, - tweetDataCache: LockingCache[TweetId, Cached[TweetData]], - hasMedia: Tweet => Boolean, - featureSwitchesWithoutExperiments: FeatureSwitches, - clientIdHelper: ClientIdHelper - ): TweetHydrators = { - import repos._ - - val repairStats = stats.scope("repairs") - val hydratorStats = stats.scope("hydrators") - - def scoped[A](stats: StatsReceiver, name: String)(f: StatsReceiver => A): A = { - val scopedStats = stats.scope(name) - f(scopedStats) - } - - val isFailureException: Throwable => Boolean = { - case _: FilteredState => false - case NotFound => false - case _ => true - } - - def hydratorExceptionCategorizer(failureScope: String) = - ExceptionCategorizer.const("filtered").onlyIf(_.isInstanceOf[FilteredState]) ++ - ExceptionCategorizer.const("not_found").onlyIf(_ == NotFound) ++ - TpExceptionCounter.defaultCategorizer(failureScope).onlyIf(isFailureException) - - val hydratorExceptionCounter: (StatsReceiver, String) => ExceptionCounter = - (stats, scope) => TpExceptionCounter(stats, hydratorExceptionCategorizer(scope)) - - val tweetHydrator = - TweetHydration( - hydratorStats = hydratorStats, - hydrateFeatureSwitchResults = - FeatureSwitchResultsHydrator(featureSwitchesWithoutExperiments, clientIdHelper), - hydrateMentions = MentionEntitiesHydrator - .once(MentionEntityHydrator(userIdentityRepo)) - .observe(hydratorStats.scope("mentions"), hydratorExceptionCounter), - hydrateLanguage = LanguageHydrator(languageRepo) - .observe(hydratorStats.scope("language"), hydratorExceptionCounter), - hydrateUrls = scoped(hydratorStats, "url") { stats => - UrlEntitiesHydrator - .once(UrlEntityHydrator(urlRepo, stats)) - .observe(stats, hydratorExceptionCounter) - }, - hydrateQuotedTweetRef = QuotedTweetRefHydrator - .once( - QuotedTweetRefHydrator(tweetRepo) - ) - .observe(hydratorStats.scope("quoted_tweet_ref"), hydratorExceptionCounter), - hydrateQuotedTweetRefUrls = QuotedTweetRefUrlsHydrator(userIdentityRepo) - .observe(hydratorStats.scope("quoted_tweet_ref_urls"), hydratorExceptionCounter), - hydrateMediaCacheable = MediaEntitiesHydrator.Cacheable - .once( - MediaEntityHydrator.Cacheable( - hydrateMediaUrls = MediaUrlFieldsHydrator() - .observe(hydratorStats.scope("media_urls"), hydratorExceptionCounter), - hydrateMediaIsProtected = MediaIsProtectedHydrator(userProtectionRepo) - .observe(hydratorStats.scope("media_is_protected"), hydratorExceptionCounter) - ) - ) - .observe(hydratorStats.scope("media_cacheable"), hydratorExceptionCounter) - .ifEnabled(deciderGates.hydrateMedia), - hydrateReplyScreenName = ReplyScreenNameHydrator - .once(ReplyScreenNameHydrator(userIdentityRepo)) - .observe(hydratorStats.scope("in_reply_to_screen_name"), hydratorExceptionCounter), - hydrateConvoId = ConversationIdHydrator(conversationIdRepo) - .observe(hydratorStats.scope("conversation_id"), hydratorExceptionCounter), - hydratePerspective = // Don't cache with the tweet because it depends on the request - PerspectiveHydrator( - repo = perspectiveRepo, - shouldHydrateBookmarksPerspective = deciderGates.hydrateBookmarksPerspective, - stats = hydratorStats.scope("perspective_by_safety_label") - ).observe(hydratorStats.scope("perspective"), hydratorExceptionCounter) - .ifEnabled(deciderGates.hydratePerspectives), - hydrateEditPerspective = EditPerspectiveHydrator( - repo = perspectiveRepo, - timelinesGate = deciderGates.hydratePerspectivesEditsForTimelines, - tweetDetailsGate = deciderGates.hydratePerspectivesEditsForTweetDetail, - otherSafetyLevelsGate = deciderGates.hydratePerspectivesEditsForOtherSafetyLevels, - bookmarksGate = deciderGates.hydrateBookmarksPerspective, - stats = hydratorStats - ).observe(hydratorStats.scope("edit_perspective"), hydratorExceptionCounter), - hydrateConversationMuted = // Don't cache because it depends on the request. If - // possible, this hydrator should be in the same stage as - // PerspectiveHydrator, so that the calls can be batched - // together. - ConversationMutedHydrator(conversationMutedRepo) - .observe(hydratorStats.scope("conversation_muted"), hydratorExceptionCounter) - .ifEnabled(deciderGates.hydrateConversationMuted), - hydrateContributor = ContributorHydrator - .once(ContributorHydrator(userIdentityRepo)) - .observe(hydratorStats.scope("contributors"), hydratorExceptionCounter), - hydrateTakedowns = TakedownHydrator(takedownRepo) - .observe(hydratorStats.scope("takedowns"), hydratorExceptionCounter), - hydrateDirectedAt = scoped(hydratorStats, "directed_at") { stats => - DirectedAtHydrator - .once(DirectedAtHydrator(userIdentityRepo, stats)) - .observe(stats, hydratorExceptionCounter) - }, - hydrateGeoScrub = GeoScrubHydrator( - geoScrubTimestampRepo, - Scribe("test_tweetypie_read_time_geo_scrubs") - .contramap[TweetId](_.toString) - ).observe(hydratorStats.scope("geo_scrub"), hydratorExceptionCounter), - hydrateCacheableRepairs = ValueHydrator - .fromMutation[Tweet, TweetQuery.Options]( - RepairMutation( - repairStats.scope("on_read"), - "created_at" -> - new CreatedAtRepairer(Scribe("test_tweetypie_bad_created_at")), - "retweet_media" -> RetweetMediaRepairer, - "parent_status_id" -> RetweetParentStatusIdRepairer.tweetMutation, - "visible_text_range" -> NegativeVisibleTextRangeRepairer.tweetMutation - ) - ) - .lensed(TweetData.Lenses.tweet) - .onlyIf((td, opts) => opts.cause.reading(td.tweet.id)), - hydrateMediaUncacheable = MediaEntityHydrator - .Uncacheable( - hydrateMediaKey = MediaKeyHydrator() - .observe(hydratorStats.scope("media_key"), hydratorExceptionCounter), - hydrateMediaInfo = scoped(hydratorStats, "media_info") { stats => - MediaInfoHydrator(mediaMetadataRepo, stats) - .observe(stats, hydratorExceptionCounter) - } - ) - .observe(hydratorStats.scope("media_uncacheable"), hydratorExceptionCounter) - .liftSeq - .ifEnabled(deciderGates.hydrateMedia), - hydratePostCacheRepairs = - // clean-up partially hydrated entities before any of the hydrators that look at - // url and media entities run, so that they never see bad entities. - ValueHydrator.fromMutation[TweetData, TweetQuery.Options]( - RepairMutation( - repairStats.scope("on_read"), - "partial_entity_cleanup" -> PartialEntityCleaner(repairStats), - "strip_not_display_coords" -> StripHiddenGeoCoordinates - ).lensed(TweetData.Lenses.tweet) - ), - hydrateTweetLegacyFormat = scoped(hydratorStats, "tweet_legacy_formatter") { stats => - TweetLegacyFormatter(stats) - .observe(stats, hydratorExceptionCounter) - .onlyIf((td, opts) => opts.cause.reading(td.tweet.id)) - }, - hydrateQuoteTweetVisibility = QuoteTweetVisibilityHydrator(quotedTweetVisibilityRepo) - .observe(hydratorStats.scope("quote_tweet_visibility"), hydratorExceptionCounter), - hydrateQuotedTweet = QuotedTweetHydrator(tweetResultRepo) - .observe(hydratorStats.scope("quoted_tweet"), hydratorExceptionCounter), - hydratePastedMedia = - // Don't cache with the tweet because we want to automatically drop this media if - // the referenced tweet is deleted or becomes non-public. - PastedMediaHydrator(pastedMediaRepo) - .observe(hydratorStats.scope("pasted_media")) - .ifEnabled(deciderGates.hydratePastedMedia), - hydrateMediaRefs = MediaRefsHydrator( - optionalTweetRepo, - deciderGates.mediaRefsHydratorIncludePastedMedia - ).observe(hydratorStats.scope("media_refs")) - .ifEnabled(deciderGates.hydrateMediaRefs), - hydrateMediaTags = // depends on AdditionalFieldsHydrator - MediaTagsHydrator(userViewRepo) - .observe(hydratorStats.scope("media_tags"), hydratorExceptionCounter) - .ifEnabled(deciderGates.hydrateMediaTags), - hydrateClassicCards = CardHydrator(cardRepo) - .observe(hydratorStats.scope("cards"), hydratorExceptionCounter), - hydrateCard2 = Card2Hydrator(card2Repo) - .observe(hydratorStats.scope("card2")), - hydrateContributorVisibility = - // Filter out contributors field for all but the user who owns the tweet - ContributorVisibilityFilter() - .observe(hydratorStats.scope("contributor_visibility"), hydratorExceptionCounter), - hydrateHasMedia = - // Sets hasMedia. Comes after PastedMediaHydrator in order to include pasted - // pics as well as other media & urls. - HasMediaHydrator(hasMedia) - .observe(hydratorStats.scope("has_media"), hydratorExceptionCounter) - .ifEnabled(deciderGates.hydrateHasMedia), - hydrateTweetCounts = // Don't cache counts with the tweet because it has its own cache with - // a different TTL - TweetCountsHydrator(tweetCountsRepo, deciderGates.hydrateBookmarksCount) - .observe(hydratorStats.scope("tweet_counts"), hydratorExceptionCounter) - .ifEnabled(deciderGates.hydrateCounts), - hydratePreviousTweetCounts = // previous counts are not cached - scoped(hydratorStats, "previous_counts") { stats => - PreviousTweetCountsHydrator(tweetCountsRepo, deciderGates.hydrateBookmarksCount) - .observe(stats, hydratorExceptionCounter) - .ifEnabled(deciderGates.hydratePreviousCounts) - }, - hydratePlace = - // Don't cache with the tweet because Place has its own tweetypie cache keyspace - // with a different TTL, and it's more efficient to store separately. - // See com.twitter.tweetypie.repository.PlaceKey - PlaceHydrator(placeRepo) - .observe(hydratorStats.scope("place"), hydratorExceptionCounter) - .ifEnabled(deciderGates.hydratePlaces), - hydrateDeviceSource = // Don't cache with the tweet because it has its own cache, - // and it's more efficient to cache it separately - DeviceSourceHydrator(deviceSourceRepo) - .observe(hydratorStats.scope("device_source"), hydratorExceptionCounter) - .ifEnabled(deciderGates.hydrateDeviceSources), - hydrateProfileGeo = - // Don't cache gnip profile geo as read request volume is expected to be low - ProfileGeoHydrator(profileGeoRepo) - .observe(hydratorStats.scope("profile_geo"), hydratorExceptionCounter) - .ifEnabled(deciderGates.hydrateGnipProfileGeoEnrichment), - hydrateSourceTweet = scoped(hydratorStats, "source_tweet") { stats => - SourceTweetHydrator( - tweetResultRepo, - stats, - FutureEffect - .inParallel( - Scribe(DetachedRetweet, "tweetypie_detached_retweets"), - Scribe(DetachedRetweet, "test_tweetypie_detached_retweets"), - ) - ).observe(stats, hydratorExceptionCounter) - }, - hydrateIM1837State = IM1837FilterHydrator() - .observe(hydratorStats.scope("im1837_filter"), hydratorExceptionCounter) - .onlyIf { (_, ctx) => - ctx.opts.forExternalConsumption && ctx.opts.cause.reading(ctx.tweetId) - }, - hydrateIM2884State = scoped(hydratorStats, "im2884_filter") { stats => - IM2884FilterHydrator(stats) - .observe(stats, hydratorExceptionCounter) - .onlyIf { (_, ctx) => - ctx.opts.forExternalConsumption && ctx.opts.cause.reading(ctx.tweetId) - } - }, - hydrateIM3433State = scoped(hydratorStats, "im3433_filter") { stats => - IM3433FilterHydrator(stats) - .observe(stats, hydratorExceptionCounter) - .onlyIf { (_, ctx) => - ctx.opts.forExternalConsumption && ctx.opts.cause.reading(ctx.tweetId) - } - }, - hydrateTweetAuthorVisibility = TweetAuthorVisibilityHydrator(userVisibilityRepo) - .observe(hydratorStats.scope("tweet_author_visibility"), hydratorExceptionCounter) - .onlyIf((_, ctx) => ctx.opts.cause.reading(ctx.tweetId)), - hydrateReportedTweetVisibility = ReportedTweetFilter() - .observe(hydratorStats.scope("reported_tweet_filter"), hydratorExceptionCounter), - scrubSuperfluousUrlEntities = ValueHydrator - .fromMutation[Tweet, TweetQuery.Options](SuperfluousUrlEntityScrubber.mutation) - .lensed(TweetData.Lenses.tweet), - copyFromSourceTweet = CopyFromSourceTweet.hydrator - .observe(hydratorStats.scope("copy_from_source_tweet"), hydratorExceptionCounter), - hydrateTweetVisibility = scoped(hydratorStats, "tweet_visibility") { stats => - TweetVisibilityHydrator( - tweetVisibilityRepo, - deciderGates.failClosedInVF, - stats - ).observe(stats, hydratorExceptionCounter) - }, - hydrateEscherbirdAnnotations = EscherbirdAnnotationHydrator(escherbirdAnnotationRepo) - .observe(hydratorStats.scope("escherbird_annotations"), hydratorExceptionCounter) - .ifEnabled(deciderGates.hydrateEscherbirdAnnotations), - hydrateScrubEngagements = ScrubEngagementHydrator() - .observe(hydratorStats.scope("scrub_engagements"), hydratorExceptionCounter) - .ifEnabled(deciderGates.hydrateScrubEngagements), - hydrateConversationControl = scoped(hydratorStats, "tweet_conversation_control") { stats => - ConversationControlHydrator( - conversationControlRepo, - deciderGates.disableInviteViaMention, - stats - ).observe(stats, hydratorExceptionCounter) - }, - hydrateEditControl = scoped(hydratorStats, "tweet_edit_control") { stats => - EditControlHydrator( - tweetRepo, - deciderGates.setEditTimeWindowToSixtyMinutes, - stats - ).observe(stats, hydratorExceptionCounter) - }, - hydrateUnmentionData = UnmentionDataHydrator(), - hydrateNoteTweetSuffix = NoteTweetSuffixHydrator().observe(stats, hydratorExceptionCounter) - ) - - new TweetHydrators { - val hydrator: TweetDataValueHydrator = - tweetHydrator.onlyIf { (tweetData, opts) => - // When the caller requests fetchStoredTweets and Tweets are fetched from Manhattan - // irrespective of state, the stored data for some Tweets may be incomplete. - // We skip the hydration of those Tweets. - !opts.fetchStoredTweets || - tweetData.storedTweetResult.exists(_.canHydrate) - } - - val cacheChangesEffect: Effect[ValueState[TweetData]] = - TweetHydration.cacheChanges( - tweetDataCache, - hydratorStats.scope("tweet_caching") - ) - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServerBuilder.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServerBuilder.docx new file mode 100644 index 000000000..06c87f08b Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServerBuilder.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServerBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServerBuilder.scala deleted file mode 100644 index 795e1b300..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServerBuilder.scala +++ /dev/null @@ -1,300 +0,0 @@ -package com.twitter.tweetypie.config - -import com.twitter.decider.Decider -import com.twitter.decider.DeciderFactory -import com.twitter.decider.LocalOverrides -import com.twitter.featureswitches.v2.builder.FeatureSwitchesBuilder -import com.twitter.finagle.filter.DarkTrafficFilter -import com.twitter.finagle.stats.DefaultStatsReceiver -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finagle.thrift.Protocols -import com.twitter.finagle.util.DefaultTimer -import com.twitter.finagle.Filter -import com.twitter.finagle.Service -import com.twitter.finagle.SimpleFilter -import com.twitter.quill.capture._ -import com.twitter.servo.util.MemoizingStatsReceiver -import com.twitter.servo.util.WaitForServerSets -import com.twitter.tweetypie.ThriftTweetService -import com.twitter.tweetypie.client_id.ClientIdHelper -import com.twitter.tweetypie.client_id.ConditionalServiceIdentifierStrategy -import com.twitter.tweetypie.client_id.PreferForwardedServiceIdentifierForStrato -import com.twitter.tweetypie.client_id.UseTransportServiceIdentifier -import com.twitter.tweetypie.context.TweetypieContext -import com.twitter.tweetypie.matching.Tokenizer -import com.twitter.tweetypie.service._ -import com.twitter.tweetypie.thriftscala.TweetServiceInternal$FinagleService -import com.twitter.util._ -import com.twitter.util.logging.Logger -import scala.util.control.NonFatal - -class TweetServerBuilder(settings: TweetServiceSettings) { - - /** - * A logger used by some of the built-in initializers. - */ - val log: Logger = Logger(getClass) - - /** - * The top-level stats receiver. Defaults to the default StatsReceiver - * embedded in Finagle. - */ - val statsReceiver: StatsReceiver = - new MemoizingStatsReceiver(DefaultStatsReceiver) - - val hostStatsReceiver: StatsReceiver = - if (settings.clientHostStats) - statsReceiver - else - NullStatsReceiver - - /** - * A timer for scheduling various things. - */ - val timer: Timer = DefaultTimer - - /** - * Creates a decider instance by looking up the decider configuration information - * from the settings object. - */ - val decider: Decider = { - val fileBased = DeciderFactory(settings.deciderBaseFilename, settings.deciderOverlayFilename)() - - // Use the tweetypie decider dashboard name for propagating decider overrides. - LocalOverrides.decider("tweetypie").orElse(fileBased) - } - - val deciderGates: TweetypieDeciderGates = { - val deciderGates = TweetypieDeciderGates(decider, settings.deciderOverrides) - - // Write out the configuration overrides to the log so that it's - // easy to confirm how this instance has been customized. - deciderGates.overrides.foreach { - case (overrideName, overrideValue) => - log.info("Decider feature " + overrideName + " overridden to " + overrideValue) - if (deciderGates.unusedOverrides.contains(overrideName)) { - log.error("Unused decider override flag: " + overrideName) - } - } - - val scopedReceiver = statsReceiver.scope("decider_values") - - deciderGates.availabilityMap.foreach { - case (feature, value) => - scopedReceiver.provideGauge(feature) { - // Default value of -1 indicates error state. - value.getOrElse(-1).toFloat - } - } - - deciderGates - } - - val featureSwitchesWithExperiments = FeatureSwitchesBuilder - .createWithExperiments("/features/tweetypie/main") - .build() - - val featureSwitchesWithoutExperiments = FeatureSwitchesBuilder - .createWithNoExperiments("/features/tweetypie/main", Some(statsReceiver)) - .build() - - // ********* initializer ********** - - private[this] def warmupTextTokenization(logger: Logger): Unit = { - logger.info("Warming up text tokenization") - val watch = Stopwatch.start() - Tokenizer.warmUp() - logger.info(s"Warmed up text tokenization in ${watch()}") - } - - private[this] def runWarmup(tweetService: Activity[ThriftTweetService]): Unit = { - val tokenizationLogger = Logger("com.twitter.tweetypie.TweetServerBuilder.TokenizationWarmup") - warmupTextTokenization(tokenizationLogger) - - val warmupLogger = Logger("com.twitter.tweetypie.TweetServerBuilder.BackendWarmup") - // #1 warmup backends - Await.ready(settings.backendWarmupSettings(backendClients, warmupLogger, timer)) - - // #2 warmup Tweet Service - Await.ready { - tweetService.values.toFuture.map(_.get).map { service => - settings.warmupRequestsSettings.foreach(new TweetServiceWarmer(_)(service)) - } - } - } - - private[this] def waitForServerSets(): Unit = { - val names = backendClients.referencedNames - val startTime = Time.now - log.info("will wait for serversets: " + names.mkString("\n", "\t\n", "")) - - try { - Await.result(WaitForServerSets.ready(names, settings.waitForServerSetsTimeout, timer)) - val duration = Time.now.since(startTime) - log.info("resolved all serversets in " + duration) - } catch { - case NonFatal(ex) => log.warn("failed to resolve all serversets", ex) - } - } - - private[this] def initialize(tweetService: Activity[ThriftTweetService]): Unit = { - waitForServerSets() - runWarmup(tweetService) - - // try to force a GC before starting to serve requests; this may or may not do anything - System.gc() - } - - // ********* builders ********** - - val clientIdHelper = new ClientIdHelper( - new ConditionalServiceIdentifierStrategy( - condition = deciderGates.preferForwardedServiceIdentifierForClientId, - ifTrue = PreferForwardedServiceIdentifierForStrato, - ifFalse = UseTransportServiceIdentifier, - ), - ) - - val backendClients: BackendClients = - BackendClients( - settings = settings, - deciderGates = deciderGates, - statsReceiver = statsReceiver, - hostStatsReceiver = hostStatsReceiver, - timer = timer, - clientIdHelper = clientIdHelper, - ) - - val tweetService: Activity[ThriftTweetService] = - TweetServiceBuilder( - settings = settings, - statsReceiver = statsReceiver, - timer = timer, - deciderGates = deciderGates, - featureSwitchesWithExperiments = featureSwitchesWithExperiments, - featureSwitchesWithoutExperiments = featureSwitchesWithoutExperiments, - backendClients = backendClients, - clientIdHelper = clientIdHelper, - ) - - // Strato columns should use this tweetService - def stratoTweetService: Activity[ThriftTweetService] = - tweetService.map { service => - // Add quill functionality to the strato tweet service only - val quillCapture = QuillCaptureBuilder(settings, deciderGates) - new QuillTweetService(quillCapture, service) - } - - def build: Activity[Service[Array[Byte], Array[Byte]]] = { - - val quillCapture = QuillCaptureBuilder(settings, deciderGates) - - val darkTrafficFilter: SimpleFilter[Array[Byte], Array[Byte]] = - if (!settings.trafficForkingEnabled) { - Filter.identity - } else { - new DarkTrafficFilter( - backendClients.darkTrafficClient, - _ => deciderGates.forkDarkTraffic(), - statsReceiver - ) - } - - val serviceFilter = - quillCapture - .getServerFilter(ThriftProto.server) - .andThen(TweetypieContext.Local.filter[Array[Byte], Array[Byte]]) - .andThen(darkTrafficFilter) - - initialize(tweetService) - - // tweetService is an Activity[ThriftTweetService], so this callback - // is called every time that Activity updates (on ConfigBus changes). - tweetService.map { service => - val finagleService = - new TweetServiceInternal$FinagleService( - service, - protocolFactory = Protocols.binaryFactory(), - stats = NullStatsReceiver, - maxThriftBufferSize = settings.maxThriftBufferSize - ) - - serviceFilter andThen finagleService - } - } -} - -object QuillCaptureBuilder { - val tweetServiceWriteMethods: Set[String] = - Set( - "async_delete", - "async_delete_additional_fields", - "async_erase_user_tweets", - "async_incr_fav_count", - "async_insert", - "async_set_additional_fields", - "async_set_retweet_visibility", - "async_takedown", - "async_undelete_tweet", - "async_update_possibly_sensitive_tweet", - "cascaded_delete_tweet", - "delete_additional_fields", - "delete_retweets", - "delete_tweets", - "erase_user_tweets", - "flush", - "incr_fav_count", - "insert", - "post_retweet", - "post_tweet", - "remove", - "replicated_delete_additional_fields", - "replicated_delete_tweet", - "replicated_delete_tweet2", - "replicated_incr_fav_count", - "replicated_insert_tweet2", - "replicated_scrub_geo", - "replicated_set_additional_fields", - "replicated_set_has_safety_labels", - "replicated_set_retweet_visibility", - "replicated_takedown", - "replicated_undelete_tweet2", - "replicated_update_possibly_sensitive_tweet", - "scrub_geo", - "scrub_geo_update_user_timestamp", - "set_additional_fields", - "set_has_safety_labels", - "set_retweet_visibility", - "set_tweet_user_takedown", - "takedown", - "undelete_tweet" - ) - - val tweetServiceReadMethods: Set[String] = - Set( - "get_tweet_counts", - "get_tweet_fields", - "get_tweets", - "replicated_get_tweet_counts", - "replicated_get_tweet_fields", - "replicated_get_tweets" - ) - - def apply(settings: TweetServiceSettings, deciderGates: TweetypieDeciderGates): QuillCapture = { - val writesStore = SimpleScribeMessageStore("tweetypie_writes") - .enabledBy(deciderGates.logWrites) - - val readsStore = SimpleScribeMessageStore("tweetypie_reads") - .enabledBy(deciderGates.logReads) - - val messageStore = - MessageStore.selected { - case msg if tweetServiceWriteMethods.contains(msg.name) => writesStore - case msg if tweetServiceReadMethods.contains(msg.name) => readsStore - case _ => writesStore - } - - new QuillCapture(Store.legacyStore(messageStore), Some(settings.thriftClientId.name)) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceAuthorizers.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceAuthorizers.docx new file mode 100644 index 000000000..6395fcccc Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceAuthorizers.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceAuthorizers.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceAuthorizers.scala deleted file mode 100644 index 765a608a2..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceAuthorizers.scala +++ /dev/null @@ -1,399 +0,0 @@ -package com.twitter.tweetypie -package config - -import com.twitter.finagle.mtls.authentication.ServiceIdentifier -import com.twitter.finagle.mtls.transport.S2STransport -import com.twitter.servo.gate.RateLimitingGate -import com.twitter.servo.request.ClientRequestAuthorizer.UnauthorizedException -import com.twitter.servo.request.{ClientRequestAuthorizer, ClientRequestObserver} -import com.twitter.tweetypie.client_id.ClientIdHelper -import com.twitter.tweetypie.client_id.PreferForwardedServiceIdentifierForStrato -import com.twitter.tweetypie.core.RateLimited -import com.twitter.tweetypie.service.MethodAuthorizer -import com.twitter.tweetypie.thriftscala._ -import com.twitter.util.Future - -/** - * Compose a ClientRequestAuthorizer for - * ClientHandlingTweetService - */ -object ClientHandlingTweetServiceAuthorizer { - private val RateLimitExceeded = - RateLimited("Your ClientId has exceeded the rate limit for non-allowListed clients.") - - def apply( - settings: TweetServiceSettings, - dynamicConfig: DynamicConfig, - statsReceiver: StatsReceiver, - getServiceIdentifier: () => ServiceIdentifier = S2STransport.peerServiceIdentifier _ - ): ClientRequestAuthorizer = { - val authorizer = - if (settings.allowlistingRequired) { - val limitingGate = RateLimitingGate.uniform(settings.nonAllowListedClientRateLimitPerSec) - allowListedOrRateLimitedAuthorizer(dynamicConfig, limitingGate) - .andThen(rejectNonAllowListedProdAuthorizer(dynamicConfig)) - .andThen(permittedMethodsAuthorizer(dynamicConfig)) - .andThen(allowProductionAuthorizer(settings.allowProductionClients)) - } else { - ClientRequestAuthorizer.withClientId - } - - val alternativeClientIdHelper = new ClientIdHelper(PreferForwardedServiceIdentifierForStrato) - // pass the authorizer into an observed authorizer for stats tracking. - // (observed authorizers can't be composed with andThen) - ClientRequestAuthorizer.observed( - authorizer, - new ClientRequestObserver(statsReceiver) { - override def apply( - methodName: String, - clientIdScopesOpt: Option[Seq[String]] - ): Future[Unit] = { - // Monitor for the migration taking into account forwarded service identifier - // as effective client ID for strato. - val alternativeClientIdScopes = alternativeClientIdHelper.effectiveClientId.map(Seq(_)) - if (clientIdScopesOpt != alternativeClientIdScopes) { - scopedReceiver.scope(methodName) - .scope("before_migration") - .scope(clientIdScopesOpt.getOrElse(Seq(ClientIdHelper.UnknownClientId)): _*) - .scope("after_migration") - .counter(alternativeClientIdScopes.getOrElse(Seq(ClientIdHelper.UnknownClientId)): _*) - .incr() - } else { - scopedReceiver.scope(methodName).counter("migration_indifferent").incr() - } - super.apply(methodName, clientIdScopesOpt) - } - - override def authorized(methodName: String, clientIdStr: String): Unit = { - // Monitor for the migration of using service identifier - // as identity instead of client ID. - val serviceIdentifier = getServiceIdentifier() - scopedReceiver.counter( - "authorized_request", - clientIdStr, - serviceIdentifier.role, - serviceIdentifier.service, - serviceIdentifier.environment - ).incr() - val status = dynamicConfig.byServiceIdentifier(serviceIdentifier).toSeq match { - case Seq() => "none" - case Seq(client) if client.clientId == clientIdStr => "equal" - case Seq(_) => "other" - case _ => "ambiguous" - } - scopedReceiver.counter( - "service_id_match_client_id", - clientIdStr, - serviceIdentifier.role, - serviceIdentifier.service, - serviceIdentifier.environment, - status - ).incr() - } - } - ) - } - - /** - * @return A ClientRequestAuthorizer that allows unlimited requests for allowlisted client ids and - * rate-limited requests for unknown clients. - */ - def allowListedOrRateLimitedAuthorizer( - dynamicConfig: DynamicConfig, - nonAllowListedLimiter: Gate[Unit] - ): ClientRequestAuthorizer = - ClientRequestAuthorizer.filtered( - { (_, clientId) => - dynamicConfig.isAllowListedClient(clientId) || nonAllowListedLimiter() - }, - RateLimitExceeded) - - /** - * @return A ClientRequestAuthorizer that rejects requests from non-allowListed prod clients. - */ - def rejectNonAllowListedProdAuthorizer(dynamicConfig: DynamicConfig): ClientRequestAuthorizer = { - object UnallowlistedException - extends UnauthorizedException( - "Traffic is only allowed from allow-listed *.prod clients." + - " Please create a ticket to register your clientId to enable production traffic using http://go/tp-new-client." - ) - - def isProdClient(clientId: String): Boolean = - clientId.endsWith(".prod") || clientId.endsWith(".production") - - ClientRequestAuthorizer.filtered( - { (_, clientId) => - !isProdClient(clientId) || dynamicConfig.isAllowListedClient(clientId) - }, - UnallowlistedException) - } - - /** - * @return A ClientRequestAuthorizer that checks if a given client's - * permittedMethods field includes the method they are calling - */ - def permittedMethodsAuthorizer(dynamicConfig: DynamicConfig): ClientRequestAuthorizer = - dynamicConfig.clientsByFullyQualifiedId match { - case Some(clientsById) => permittedMethodsAuthorizer(dynamicConfig, clientsById) - case None => ClientRequestAuthorizer.permissive - } - - private def permittedMethodsAuthorizer( - dynamicConfig: DynamicConfig, - clientsByFullyQualifiedId: Map[String, Client] - ): ClientRequestAuthorizer = { - ClientRequestAuthorizer.filtered { (methodName, clientId) => - dynamicConfig.unprotectedEndpoints(methodName) || - (clientsByFullyQualifiedId.get(clientId) match { - case Some(client) => - client.accessAllMethods || - client.permittedMethods.contains(methodName) - case None => - false // If client id is unknown, don't allow access - }) - } - } - - /** - * @return A ClientRequestAuthorizer that fails the - * request if it is coming from a production client - * and allowProductionClients is false - */ - def allowProductionAuthorizer(allowProductionClients: Boolean): ClientRequestAuthorizer = - ClientRequestAuthorizer.filtered { (_, clientId) => - allowProductionClients || !(clientId.endsWith(".prod") || clientId.endsWith(".production")) - } -} - -/** - * Compose a MethodAuthorizer for the `getTweets` endpoint. - */ -object GetTweetsAuthorizer { - import ProtectedTweetsAuthorizer.IncludeProtected - - def apply( - config: DynamicConfig, - maxRequestSize: Int, - instanceCount: Int, - enforceRateLimitedClients: Gate[Unit], - maxRequestWidthEnabled: Gate[Unit], - statsReceiver: StatsReceiver, - ): MethodAuthorizer[GetTweetsRequest] = - MethodAuthorizer.all( - Seq( - ProtectedTweetsAuthorizer(config.clientsByFullyQualifiedId) - .contramap[GetTweetsRequest] { r => - IncludeProtected(r.options.exists(_.bypassVisibilityFiltering)) - }, - RequestSizeAuthorizer(maxRequestSize, maxRequestWidthEnabled) - .contramap[GetTweetsRequest](_.tweetIds.size), - RateLimiterAuthorizer(config, instanceCount, enforceRateLimitedClients, statsReceiver) - .contramap[GetTweetsRequest](_.tweetIds.size) - ) - ) -} - -/** - * Compose a MethodAuthorizer for the `getTweetFields` endpoint. - */ -object GetTweetFieldsAuthorizer { - import ProtectedTweetsAuthorizer.IncludeProtected - - def apply( - config: DynamicConfig, - maxRequestSize: Int, - instanceCount: Int, - enforceRateLimitedClients: Gate[Unit], - maxRequestWidthEnabled: Gate[Unit], - statsReceiver: StatsReceiver - ): MethodAuthorizer[GetTweetFieldsRequest] = - MethodAuthorizer.all( - Seq( - ProtectedTweetsAuthorizer(config.clientsByFullyQualifiedId) - .contramap[GetTweetFieldsRequest](r => - IncludeProtected(r.options.visibilityPolicy == TweetVisibilityPolicy.NoFiltering)), - RequestSizeAuthorizer(maxRequestSize, maxRequestWidthEnabled) - .contramap[GetTweetFieldsRequest](_.tweetIds.size), - RateLimiterAuthorizer(config, instanceCount, enforceRateLimitedClients, statsReceiver) - .contramap[GetTweetFieldsRequest](_.tweetIds.size) - ) - ) -} - -object ProtectedTweetsAuthorizer { - case class IncludeProtected(include: Boolean) extends AnyVal - - class BypassVisibilityFilteringNotAuthorizedException(message: String) - extends UnauthorizedException(message) - - def apply(optClientsById: Option[Map[String, Client]]): MethodAuthorizer[IncludeProtected] = { - optClientsById match { - case Some(clientsByFullyQualifiedId) => - val clientsWithBypassVisibilityFiltering = clientsByFullyQualifiedId.filter { - case (_, client) => client.bypassVisibilityFiltering - } - apply(clientId => clientsWithBypassVisibilityFiltering.contains(clientId)) - - case None => - apply((_: String) => true) - } - } - - /** - * A MethodAuthorizer that fails the request if a client requests to bypass visibility - * filtering but doesn't have BypassVisibilityFiltering - */ - def apply(protectedTweetsAllowlist: String => Boolean): MethodAuthorizer[IncludeProtected] = - MethodAuthorizer { (includeProtected, clientId) => - // There is only one unauthorized case, a client requesting - // protected tweets when they are not in the allowlist - Future.when(includeProtected.include && !protectedTweetsAllowlist(clientId)) { - Future.exception( - new BypassVisibilityFilteringNotAuthorizedException( - s"$clientId is not authorized to bypass visibility filtering" - ) - ) - } - } -} - -/** - * A MethodAuthorizer[Int] that fails large requests. - */ -object RequestSizeAuthorizer { - class ExceededMaxWidthException(message: String) extends UnauthorizedException(message) - - def apply( - maxRequestSize: Int, - maxWidthLimitEnabled: Gate[Unit] = Gate.False - ): MethodAuthorizer[Int] = - MethodAuthorizer { (requestSize, clientId) => - Future.when(requestSize > maxRequestSize && maxWidthLimitEnabled()) { - Future.exception( - new ExceededMaxWidthException( - s"$requestSize exceeds bulk request size limit. $clientId can request at most $maxRequestSize items per request" - ) - ) - } - } -} - -object RateLimiterAuthorizer { - - type ClientId = String - - /** - * @return client ID to weighted RateLimitingGate map - * - * We want to rate-limit based on requests per sec for every instance. - * When we allowlist new clients to Tweetypie, we assign tweets per sec quota. - * That's why, we compute perInstanceQuota [1] and create a weighted rate-limiting gate [2] - * which returns true if acquiring requestSize number of permits is successful. [3] - * - * [1] tps quota during allowlisting is for both DCs and instanceCount is for one DC. - * Therefore, we are over-compensating perInstanceQuota for all low-priority clients. - * this will act a fudge-factor to account for cluster-wide traffic imbalances. - * - * val perInstanceQuota : Double = math.max(1.0, math.ceil(tpsLimit.toFloat / instanceCount)) - * - * We have some clients like deferredRPC with 0K tps quota and rate limiter expects > 0 permits. - * - * [2] if a client has multiple environments - staging, devel, prod. We provision the - * same rate-limits for all envs instead of distributing the tps quota across envs. - * - * Example: - * - * val c = Client(..., limit = 10k, ...) - * Map("foo.prod" -> c, "foo.staging" -> c, "foo.devel" -> c) - * - * Above client config turns into 3 separate RateLimitingGate.weighted(), each with 10k - * - * [3] RateLimitingGate will always give permit to the initial request that exceeds - * the limit. ex: starting with rate-limit of 1 tps per instance. first request with - * 100 batch size is allowed. - * - * RateLimitFudgeFactor is a multiplier for per-instance quota to account for: - * - * a) High likelihood of concurrent batches hitting the same tweetypie shard due to - * non-uniform load distribution (this can be alleviated by using Deterministic Aperture) - * b) Clients with no retry backoffs and custom batching/concurrency. - * - * We are adding default stitch batch size to per instance quota, to give more headroom for low-tps clients. - * https://cgit.twitter.biz/source/tree/stitch/stitch-tweetypie/src/main/scala/com/twitter/stitch/tweetypie/TweetyPie.scala#n47 - * - */ - case class RateLimiterConfig(limitingGate: Gate[Int], enforceRateLimit: Boolean) - - def perClientRateLimiters( - dynamicConfig: DynamicConfig, - instanceCount: Int - ): Map[ClientId, RateLimiterConfig] = { - val RateLimitFudgeFactor: Double = 1.5 - val DefaultStitchBatchSize: Double = 25.0 - dynamicConfig.clientsByFullyQualifiedId match { - case Some(clients) => - clients.collect { - case (clientId, client) if client.tpsLimit.isDefined => - val perInstanceQuota: Double = - math.max( - 1.0, - math.ceil( - client.tpsLimit.get.toFloat / instanceCount)) * RateLimitFudgeFactor + DefaultStitchBatchSize - clientId -> RateLimiterConfig( - RateLimitingGate.weighted(perInstanceQuota), - client.enforceRateLimit - ) - } - case None => Map.empty - } - } - - /* - enforce rate-limiting on get_tweets and get_tweet_fields requests - given enable_rate_limited_clients decider is true and rate limiting gate - is not giving any more permits. - */ - def apply( - config: DynamicConfig, - limiters: Map[ClientId, RateLimiterConfig], - instanceCount: Int, - enforceRateLimitedClients: Gate[Unit], - statsReceiver: StatsReceiver - ): MethodAuthorizer[Int] = { - - val tpsExceededScope = statsReceiver.scope("tps_exceeded") - val tpsRejectedScope = statsReceiver.scope("tps_rejected") - val qpsExceededScope = statsReceiver.scope("qps_exceeded") - val qpsRejectedScope = statsReceiver.scope("qps_rejected") - - MethodAuthorizer { (requestSize, clientId) => - val positiveRequestSize = math.max(1, requestSize) - val shouldRateLimit: Boolean = limiters.get(clientId).exists { config => - val exceededLimit = !config.limitingGate(positiveRequestSize) - if (exceededLimit) { - qpsExceededScope.counter(clientId).incr() - tpsExceededScope.counter(clientId).incr(positiveRequestSize) - } - exceededLimit && config.enforceRateLimit - } - - Future.when(shouldRateLimit && enforceRateLimitedClients()) { - qpsRejectedScope.counter(clientId).incr() - tpsRejectedScope.counter(clientId).incr(positiveRequestSize) - Future.exception( - RateLimited(s"Your client ID $clientId has exceeded its reserved tps quota.") - ) - } - } - } - - def apply( - config: DynamicConfig, - instanceCount: Int, - enforceRateLimitedClients: Gate[Unit], - statsReceiver: StatsReceiver - ): MethodAuthorizer[Int] = { - val limiters = perClientRateLimiters(config, instanceCount) - apply(config, limiters, instanceCount, enforceRateLimitedClients, statsReceiver) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceBuilder.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceBuilder.docx new file mode 100644 index 000000000..927035a08 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceBuilder.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceBuilder.scala deleted file mode 100644 index 518d0edcd..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceBuilder.scala +++ /dev/null @@ -1,683 +0,0 @@ -package com.twitter.tweetypie -package config - -import com.twitter.coreservices.failed_task.writer.FailedTaskWriter -import com.twitter.featureswitches.v2.FeatureSwitches -import com.twitter.flockdb.client._ -import com.twitter.servo.forked -import com.twitter.servo.util.FutureArrow -import com.twitter.servo.util.Scribe -import com.twitter.stitch.Stitch -import com.twitter.tweetypie.client_id.ClientIdHelper -import com.twitter.tweetypie.handler._ -import com.twitter.tweetypie.repository._ -import com.twitter.tweetypie.service.ReplicatingTweetService -import com.twitter.tweetypie.service._ -import com.twitter.tweetypie.storage.TweetStorageClient -import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet -import com.twitter.tweetypie.store._ -import com.twitter.tweetypie.thriftscala._ -import com.twitter.util.Activity -import com.twitter.util.Timer - -/** - * Builds a fully configured ThriftTweetService instance. - * - * The core of the tweet service is a DispatchingTweetService, which is responsible - * for dispatching requests to underlying handlers and stores. - * The DispatchingTweetService instance is wrapped in: - * - ObservedTweetService (adds stats counting) - * - ClientHandlingTweetService (authentication, exception handling, etc) - * - ReplicatingTweetService (replicates some reads) - * - * TweetServiceBuilder returns an Activity[ThriftTweetService] which updates - * on config changes. See DynamicConfig.scala for more details. - */ -object TweetServiceBuilder { - def apply( - settings: TweetServiceSettings, - statsReceiver: StatsReceiver, - timer: Timer, - deciderGates: TweetypieDeciderGates, - featureSwitchesWithExperiments: FeatureSwitches, - featureSwitchesWithoutExperiments: FeatureSwitches, - backendClients: BackendClients, - clientIdHelper: ClientIdHelper, - ): Activity[ThriftTweetService] = { - // a forward reference, will be set to the DispatchingTweetService once created - val syncTweetService = new MutableTweetServiceProxy(null) - - val tweetServiceScope = statsReceiver.scope("tweet_service") - - val dispatchingTweetService = - DispatchingTweetServiceBuilder( - settings, - statsReceiver, - tweetServiceScope, - syncTweetService, - timer, - deciderGates, - featureSwitchesWithExperiments, - featureSwitchesWithoutExperiments, - backendClients, - clientIdHelper, - ) - - val failureLoggingTweetService = - // Add the failure writing inside of the authorization filter so - // that we don't write out the failures when authorization fails. - new FailureLoggingTweetService( - failedTaskWriter = FailedTaskWriter("tweetypie_service_failures", identity), - underlying = dispatchingTweetService - ) - - val observedTweetService = - new ObservedTweetService(failureLoggingTweetService, tweetServiceScope, clientIdHelper) - - // Every time config is updated, create a new tweet service. Only - // ClientHandlingTweetService and ReplicatingTweetService need to - // be recreated, as the underlying TweetServices above don't depend - // on the config. - DynamicConfig( - statsReceiver.scope("dynamic_config"), - backendClients.configBus, - settings - ).map { dynamicConfig => - val clientHandlingTweetService = - new ClientHandlingTweetService( - observedTweetService, - tweetServiceScope, - dynamicConfig.loadShedEligible, - deciderGates.shedReadTrafficVoluntarily, - ClientHandlingTweetServiceAuthorizer( - settings = settings, - dynamicConfig = dynamicConfig, - statsReceiver = statsReceiver - ), - GetTweetsAuthorizer( - config = dynamicConfig, - maxRequestSize = settings.maxGetTweetsRequestSize, - instanceCount = settings.instanceCount, - enforceRateLimitedClients = deciderGates.enforceRateLimitedClients, - maxRequestWidthEnabled = deciderGates.maxRequestWidthEnabled, - statsReceiver = tweetServiceScope.scope("get_tweets"), - ), - GetTweetFieldsAuthorizer( - config = dynamicConfig, - maxRequestSize = settings.maxGetTweetsRequestSize, - instanceCount = settings.instanceCount, - enforceRateLimitedClients = deciderGates.enforceRateLimitedClients, - maxRequestWidthEnabled = deciderGates.maxRequestWidthEnabled, - statsReceiver = tweetServiceScope.scope("get_tweet_fields"), - ), - RequestSizeAuthorizer(settings.maxRequestSize, deciderGates.maxRequestWidthEnabled), - clientIdHelper, - ) - - syncTweetService.underlying = clientHandlingTweetService - - val replicatingService = - if (!settings.enableReplication) - clientHandlingTweetService - else { - new ReplicatingTweetService( - underlying = clientHandlingTweetService, - replicationTargets = backendClients.lowQoSReplicationClients, - executor = new forked.QueueExecutor( - 100, - statsReceiver.scope("replicating_tweet_service") - ), - ) - } - - replicatingService - } - } -} - -object DispatchingTweetServiceBuilder { - val hasMedia: Tweet => Boolean = MediaIndexHelper(Resources.loadPartnerMediaRegexes()) - - def apply( - settings: TweetServiceSettings, - statsReceiver: StatsReceiver, - tweetServiceScope: StatsReceiver, - syncTweetService: ThriftTweetService, - timer: Timer, - deciderGates: TweetypieDeciderGates, - featureSwitchesWithExperiments: FeatureSwitches, - featureSwitchesWithoutExperiments: FeatureSwitches, - backendClients: BackendClients, - clientIdHelper: ClientIdHelper, - ): ThriftTweetService = { - val (syncInvocationBuilder, asyncInvocationBuilder) = { - val b = - new ServiceInvocationBuilder(syncTweetService, settings.simulateDeferredrpcCallbacks) - (b.withClientId(settings.thriftClientId), b.withClientId(settings.deferredrpcClientId)) - } - - val tweetKeyFactory = TweetKeyFactory(settings.tweetKeyCacheVersion) - - val caches = - if (!settings.withCache) - Caches.NoCache - else - Caches( - settings = settings, - stats = statsReceiver, - timer = timer, - clients = backendClients, - tweetKeyFactory = tweetKeyFactory, - deciderGates = deciderGates, - clientIdHelper = clientIdHelper, - ) - - val logicalRepos = - LogicalRepositories( - settings = settings, - stats = statsReceiver, - timer = timer, - deciderGates = deciderGates, - external = new ExternalServiceRepositories( - clients = backendClients, - statsReceiver = statsReceiver, - settings = settings, - clientIdHelper = clientIdHelper, - ), - caches = caches, - stratoClient = backendClients.stratoserverClient, - hasMedia = hasMedia, - clientIdHelper = clientIdHelper, - featureSwitchesWithoutExperiments = featureSwitchesWithoutExperiments, - ) - - val tweetCreationLock = - new CacheBasedTweetCreationLock( - cache = caches.tweetCreateLockerCache, - maxTries = 3, - stats = statsReceiver.scope("tweet_save").scope("locker"), - logUniquenessId = - if (settings.scribeUniquenessIds) CacheBasedTweetCreationLock.ScribeUniquenessId - else CacheBasedTweetCreationLock.LogUniquenessId - ) - - val tweetStores = - TweetStores( - settings = settings, - statsReceiver = statsReceiver, - timer = timer, - deciderGates = deciderGates, - tweetKeyFactory = tweetKeyFactory, - clients = backendClients, - caches = caches, - asyncBuilder = asyncInvocationBuilder, - hasMedia = hasMedia, - clientIdHelper = clientIdHelper, - ) - - val tweetDeletePathHandler = - new DefaultTweetDeletePathHandler( - tweetServiceScope, - logicalRepos.tweetResultRepo, - logicalRepos.optionalUserRepo, - logicalRepos.stratoSafetyLabelsRepo, - logicalRepos.lastQuoteOfQuoterRepo, - tweetStores, - getPerspectives = backendClients.timelineService.getPerspectives, - ) - - val tweetBuilders = - TweetBuilders( - settings = settings, - statsReceiver = statsReceiver, - deciderGates = deciderGates, - featureSwitchesWithExperiments = featureSwitchesWithExperiments, - clients = backendClients, - caches = caches, - repos = logicalRepos, - tweetStore = tweetStores, - hasMedia = hasMedia, - unretweetEdits = tweetDeletePathHandler.unretweetEdits, - ) - - val hydrateTweetForInsert = - WritePathHydration.hydrateTweet( - logicalRepos.tweetHydrators.hydrator, - statsReceiver.scope("insert_tweet") - ) - - val defaultTweetQueryOptions = TweetQuery.Options(include = GetTweetsHandler.BaseInclude) - - val parentUserIdRepo: ParentUserIdRepository.Type = - ParentUserIdRepository( - tweetRepo = logicalRepos.tweetRepo - ) - - val undeleteTweetHandler = - UndeleteTweetHandlerBuilder( - backendClients.tweetStorageClient, - logicalRepos, - tweetStores, - parentUserIdRepo, - statsReceiver - ) - - val eraseUserTweetsHandler = - EraseUserTweetsHandlerBuilder( - backendClients, - asyncInvocationBuilder, - deciderGates, - settings, - timer, - tweetDeletePathHandler, - tweetServiceScope - ) - - val setRetweetVisibilityHandler = - SetRetweetVisibilityHandler( - tweetGetter = - TweetRepository.tweetGetter(logicalRepos.optionalTweetRepo, defaultTweetQueryOptions), - tweetStores.setRetweetVisibility - ) - - val takedownHandler = - TakedownHandlerBuilder( - logicalRepos = logicalRepos, - tweetStores = tweetStores - ) - - val updatePossiblySensitiveTweetHandler = - UpdatePossiblySensitiveTweetHandler( - HandlerError.getRequired( - TweetRepository.tweetGetter(logicalRepos.optionalTweetRepo, defaultTweetQueryOptions), - HandlerError.tweetNotFoundException - ), - HandlerError.getRequired( - FutureArrow( - UserRepository - .userGetter( - logicalRepos.optionalUserRepo, - UserQueryOptions(Set(UserField.Safety), UserVisibility.All) - ) - .compose(UserKey.byId) - ), - HandlerError.userNotFoundException - ), - tweetStores.updatePossiblySensitiveTweet - ) - - val userTakedownHandler = - UserTakedownHandlerBuilder( - logicalRepos = logicalRepos, - tweetStores = tweetStores, - stats = tweetServiceScope - ) - - val getDeletedTweetsHandler = - GetDeletedTweetsHandler( - getDeletedTweets = backendClients.tweetStorageClient.getDeletedTweets, - tweetsExist = - GetDeletedTweetsHandler.tweetsExist(backendClients.tweetStorageClient.getTweet), - stats = tweetServiceScope.scope("get_deleted_tweets_handler") - ) - - val hydrateQuotedTweet = - WritePathHydration.hydrateQuotedTweet( - logicalRepos.optionalTweetRepo, - logicalRepos.optionalUserRepo, - logicalRepos.quoterHasAlreadyQuotedRepo - ) - - val deleteLocationDataHandler = - DeleteLocationDataHandler( - backendClients.geoScrubEventStore.getGeoScrubTimestamp, - Scribe(DeleteLocationData, "tweetypie_delete_location_data"), - backendClients.deleteLocationDataPublisher - ) - - val getStoredTweetsHandler = GetStoredTweetsHandler(logicalRepos.tweetResultRepo) - - val getStoredTweetsByUserHandler = GetStoredTweetsByUserHandler( - getStoredTweetsHandler = getStoredTweetsHandler, - getStoredTweet = backendClients.tweetStorageClient.getStoredTweet, - selectPage = FutureArrow { select => - backendClients.tflockReadClient - .selectPage(select, Some(settings.getStoredTweetsByUserPageSize)) - }, - maxPages = settings.getStoredTweetsByUserMaxPages - ) - - val getTweetsHandler = - GetTweetsHandler( - logicalRepos.tweetResultRepo, - logicalRepos.containerAsGetTweetResultRepo, - logicalRepos.deletedTweetVisibilityRepo, - statsReceiver.scope("read_path"), - deciderGates.shouldMaterializeContainers - ) - - val getTweetFieldsHandler = - GetTweetFieldsHandler( - logicalRepos.tweetResultRepo, - logicalRepos.deletedTweetVisibilityRepo, - logicalRepos.containerAsGetTweetFieldsResultRepo, - statsReceiver.scope("read_path"), - deciderGates.shouldMaterializeContainers - ) - - val unretweetHandler = - UnretweetHandler( - tweetDeletePathHandler.deleteTweets, - backendClients.timelineService.getPerspectives, - tweetDeletePathHandler.unretweetEdits, - logicalRepos.tweetRepo, - ) - - val hydrateInsertEvent = - WritePathHydration.hydrateInsertTweetEvent( - hydrateTweet = hydrateTweetForInsert, - hydrateQuotedTweet = hydrateQuotedTweet - ) - - val scrubGeoUpdateUserTimestampBuilder = - ScrubGeoEventBuilder.UpdateUserTimestamp( - stats = tweetServiceScope.scope("scrub_geo_update_user_timestamp"), - userRepo = logicalRepos.optionalUserRepo - ) - - val scrubGeoScrubTweetsBuilder = - ScrubGeoEventBuilder.ScrubTweets( - stats = tweetServiceScope.scope("scrub_geo"), - userRepo = logicalRepos.optionalUserRepo - ) - - val handlerFilter = - PostTweet - .DuplicateHandler( - tweetCreationLock = tweetCreationLock, - getTweets = getTweetsHandler, - stats = statsReceiver.scope("duplicate") - ) - .andThen(PostTweet.RescueTweetCreateFailure) - .andThen(PostTweet.LogFailures) - - val postTweetHandler = - handlerFilter[PostTweetRequest]( - PostTweet.Handler( - tweetBuilder = tweetBuilders.tweetBuilder, - hydrateInsertEvent = hydrateInsertEvent, - tweetStore = tweetStores, - ) - ) - - val postRetweetHandler = - handlerFilter[RetweetRequest]( - PostTweet.Handler( - tweetBuilder = tweetBuilders.retweetBuilder, - hydrateInsertEvent = hydrateInsertEvent, - tweetStore = tweetStores, - ) - ) - - val quotedTweetDeleteBuilder: QuotedTweetDeleteEventBuilder.Type = - QuotedTweetDeleteEventBuilder(logicalRepos.optionalTweetRepo) - - val quotedTweetTakedownBuilder: QuotedTweetTakedownEventBuilder.Type = - QuotedTweetTakedownEventBuilder(logicalRepos.optionalTweetRepo) - - val setAdditionalFieldsBuilder: SetAdditionalFieldsBuilder.Type = - SetAdditionalFieldsBuilder( - tweetRepo = logicalRepos.tweetRepo - ) - - val asyncSetAdditionalFieldsBuilder: AsyncSetAdditionalFieldsBuilder.Type = - AsyncSetAdditionalFieldsBuilder( - userRepo = logicalRepos.userRepo - ) - - val deleteAdditionalFieldsBuilder: DeleteAdditionalFieldsBuilder.Type = - DeleteAdditionalFieldsBuilder( - tweetRepo = logicalRepos.tweetRepo - ) - - val asyncDeleteAdditionalFieldsBuilder: AsyncDeleteAdditionalFieldsBuilder.Type = - AsyncDeleteAdditionalFieldsBuilder( - userRepo = logicalRepos.userRepo - ) - - new DispatchingTweetService( - asyncDeleteAdditionalFieldsBuilder = asyncDeleteAdditionalFieldsBuilder, - asyncSetAdditionalFieldsBuilder = asyncSetAdditionalFieldsBuilder, - deleteAdditionalFieldsBuilder = deleteAdditionalFieldsBuilder, - deleteLocationDataHandler = deleteLocationDataHandler, - deletePathHandler = tweetDeletePathHandler, - eraseUserTweetsHandler = eraseUserTweetsHandler, - getDeletedTweetsHandler = getDeletedTweetsHandler, - getStoredTweetsHandler = getStoredTweetsHandler, - getStoredTweetsByUserHandler = getStoredTweetsByUserHandler, - getTweetsHandler = getTweetsHandler, - getTweetFieldsHandler = getTweetFieldsHandler, - getTweetCountsHandler = GetTweetCountsHandler(logicalRepos.tweetCountsRepo), - postTweetHandler = postTweetHandler, - postRetweetHandler = postRetweetHandler, - quotedTweetDeleteBuilder = quotedTweetDeleteBuilder, - quotedTweetTakedownBuilder = quotedTweetTakedownBuilder, - scrubGeoUpdateUserTimestampBuilder = scrubGeoUpdateUserTimestampBuilder, - scrubGeoScrubTweetsBuilder = scrubGeoScrubTweetsBuilder, - setAdditionalFieldsBuilder = setAdditionalFieldsBuilder, - setRetweetVisibilityHandler = setRetweetVisibilityHandler, - statsReceiver = statsReceiver, - takedownHandler = takedownHandler, - tweetStore = tweetStores, - undeleteTweetHandler = undeleteTweetHandler, - unretweetHandler = unretweetHandler, - updatePossiblySensitiveTweetHandler = updatePossiblySensitiveTweetHandler, - userTakedownHandler = userTakedownHandler, - clientIdHelper = clientIdHelper, - ) - } -} - -object TakedownHandlerBuilder { - type Type = FutureArrow[TakedownRequest, Unit] - - def apply(logicalRepos: LogicalRepositories, tweetStores: TotalTweetStore) = - TakedownHandler( - getTweet = HandlerError.getRequired( - tweetGetter(logicalRepos), - HandlerError.tweetNotFoundException - ), - getUser = HandlerError.getRequired( - userGetter(logicalRepos), - HandlerError.userNotFoundException - ), - writeTakedown = tweetStores.takedown - ) - - def tweetGetter(logicalRepos: LogicalRepositories): FutureArrow[TweetId, Option[Tweet]] = - FutureArrow( - TweetRepository.tweetGetter( - logicalRepos.optionalTweetRepo, - TweetQuery.Options( - include = GetTweetsHandler.BaseInclude.also( - tweetFields = Set( - Tweet.TweetypieOnlyTakedownCountryCodesField.id, - Tweet.TweetypieOnlyTakedownReasonsField.id - ) - ) - ) - ) - ) - - def userGetter(logicalRepos: LogicalRepositories): FutureArrow[UserId, Option[User]] = - FutureArrow( - UserRepository - .userGetter( - logicalRepos.optionalUserRepo, - UserQueryOptions( - Set(UserField.Roles, UserField.Safety, UserField.Takedowns), - UserVisibility.All - ) - ) - .compose(UserKey.byId) - ) -} - -object UserTakedownHandlerBuilder { - def apply( - logicalRepos: LogicalRepositories, - tweetStores: TotalTweetStore, - stats: StatsReceiver - ): UserTakedownHandler.Type = - UserTakedownHandler( - getTweet = TakedownHandlerBuilder.tweetGetter(logicalRepos), - tweetTakedown = tweetStores.takedown, - ) -} - -object EraseUserTweetsHandlerBuilder { - def apply( - backendClients: BackendClients, - asyncInvocationBuilder: ServiceInvocationBuilder, - deciderGates: TweetypieDeciderGates, - settings: TweetServiceSettings, - timer: Timer, - tweetDeletePathHandler: DefaultTweetDeletePathHandler, - tweetServiceScope: StatsReceiver - ): EraseUserTweetsHandler = - EraseUserTweetsHandler( - selectPage(backendClients, settings), - deleteTweet(tweetDeletePathHandler), - eraseUserTweets(backendClients, asyncInvocationBuilder), - tweetServiceScope.scope("erase_user_tweets"), - sleep(deciderGates, settings, timer) - ) - - def selectPage( - backendClients: BackendClients, - settings: TweetServiceSettings - ): FutureArrow[Select[StatusGraph], PageResult[Long]] = - FutureArrow( - backendClients.tflockWriteClient.selectPage(_, Some(settings.eraseUserTweetsPageSize)) - ) - - def deleteTweet( - tweetDeletePathHandler: DefaultTweetDeletePathHandler - ): FutureEffect[(TweetId, UserId)] = - FutureEffect[(TweetId, UserId)] { - case (tweetId, expectedUserId) => - tweetDeletePathHandler - .internalDeleteTweets( - request = DeleteTweetsRequest( - Seq(tweetId), - isUserErasure = true, - expectedUserId = Some(expectedUserId) - ), - byUserId = None, - authenticatedUserId = None, - validate = tweetDeletePathHandler.validateTweetsForUserErasureDaemon - ) - .unit - } - - def eraseUserTweets( - backendClients: BackendClients, - asyncInvocationBuilder: ServiceInvocationBuilder - ): FutureArrow[AsyncEraseUserTweetsRequest, Unit] = - asyncInvocationBuilder - .asyncVia(backendClients.asyncTweetDeletionService) - .method(_.asyncEraseUserTweets) - - def sleep( - deciderGates: TweetypieDeciderGates, - settings: TweetServiceSettings, - timer: Timer - ): () => Future[Unit] = - () => - if (deciderGates.delayEraseUserTweets()) { - Future.sleep(settings.eraseUserTweetsDelay)(timer) - } else { - Future.Unit - } -} - -object UndeleteTweetHandlerBuilder { - def apply( - tweetStorage: TweetStorageClient, - logicalRepos: LogicalRepositories, - tweetStores: TotalTweetStore, - parentUserIdRepo: ParentUserIdRepository.Type, - statsReceiver: StatsReceiver - ): UndeleteTweetHandler.Type = - UndeleteTweetHandler( - undelete = tweetStorage.undelete, - tweetExists = tweetExists(tweetStorage), - getUser = FutureArrow( - UserRepository - .userGetter( - logicalRepos.optionalUserRepo, - UserQueryOptions( - // ExtendedProfile is needed to view a user's birthday to - // guarantee we are not undeleting tweets from when a user was < 13 - TweetBuilder.userFields ++ Set(UserField.ExtendedProfile), - UserVisibility.All, - filteredAsFailure = false - ) - ) - .compose(UserKey.byId) - ), - getDeletedTweets = tweetStorage.getDeletedTweets, - parentUserIdRepo = parentUserIdRepo, - save = save( - logicalRepos, - tweetStores, - statsReceiver - ) - ) - - private def tweetExists(tweetStorage: TweetStorageClient): FutureArrow[TweetId, Boolean] = - FutureArrow { id => - Stitch - .run(tweetStorage.getTweet(id)) - .map { - case _: GetTweet.Response.Found => true - case _ => false - } - } - - // 1. hydrates the undeleted tweet - // 2. hands a UndeleteTweetEvent to relevant stores. - // 3. return the hydrated tweet - def save( - logicalRepos: LogicalRepositories, - tweetStores: TotalTweetStore, - statsReceiver: StatsReceiver - ): FutureArrow[UndeleteTweet.Event, Tweet] = { - - val hydrateTweet = - WritePathHydration.hydrateTweet( - logicalRepos.tweetHydrators.hydrator, - statsReceiver.scope("undelete_tweet") - ) - - val hydrateQuotedTweet = - WritePathHydration.hydrateQuotedTweet( - logicalRepos.optionalTweetRepo, - logicalRepos.optionalUserRepo, - logicalRepos.quoterHasAlreadyQuotedRepo - ) - - val hydrateUndeleteEvent = - WritePathHydration.hydrateUndeleteTweetEvent( - hydrateTweet = hydrateTweet, - hydrateQuotedTweet = hydrateQuotedTweet - ) - - FutureArrow[UndeleteTweet.Event, Tweet] { event => - for { - hydratedEvent <- hydrateUndeleteEvent(event) - _ <- tweetStores.undeleteTweet(hydratedEvent) - } yield hydratedEvent.tweet - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceInvocationBuilder.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceInvocationBuilder.docx new file mode 100644 index 000000000..ec571fa06 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceInvocationBuilder.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceInvocationBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceInvocationBuilder.scala deleted file mode 100644 index d4d6e054f..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceInvocationBuilder.scala +++ /dev/null @@ -1,34 +0,0 @@ -package com.twitter.tweetypie.config - -import com.twitter.finagle.thrift.ClientId -import com.twitter.servo.util.FutureArrow -import com.twitter.tweetypie._ -import com.twitter.tweetypie.service.{ClientIdSettingTweetServiceProxy, TweetServiceProxy} - -/** - * This class builds deciderable ThriftTweetService and FutureArrows that respect the - * simulateDeferredrpcCallbacks decider. When simulateDeferredrpcCallbacks=true, invocations will - * be performed synchronously by the root ThriftTweetService. - */ -class ServiceInvocationBuilder( - val service: ThriftTweetService, - simulateDeferredrpcCallbacks: Boolean) { - - def withClientId(clientId: ClientId): ServiceInvocationBuilder = - new ServiceInvocationBuilder( - new ClientIdSettingTweetServiceProxy(clientId, service), - simulateDeferredrpcCallbacks - ) - - def asyncVia(asyncService: ThriftTweetService): ServiceInvocationBuilder = - new ServiceInvocationBuilder( - new TweetServiceProxy { - override def underlying: ThriftTweetService = - if (simulateDeferredrpcCallbacks) service else asyncService - }, - simulateDeferredrpcCallbacks - ) - - def method[A, B](op: ThriftTweetService => A => Future[B]): FutureArrow[A, B] = - FutureArrow(op(service)) -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceSettings.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceSettings.docx new file mode 100644 index 000000000..053374b5b Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceSettings.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceSettings.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceSettings.scala deleted file mode 100644 index 08592c16d..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceSettings.scala +++ /dev/null @@ -1,475 +0,0 @@ -package com.twitter.tweetypie -package config - -import com.twitter.conversions.DurationOps._ -import com.twitter.finagle.Backoff -import com.twitter.finagle.memcached.exp.localMemcachedPort -import com.twitter.finagle.mtls.authentication.ServiceIdentifier -import com.twitter.finagle.ssl.OpportunisticTls -import com.twitter.finagle.thrift.ClientId -import com.twitter.flockdb.client.thriftscala.Priority -import com.twitter.servo.repository.CachedResult -import com.twitter.servo.util.Availability -import com.twitter.tweetypie.backends._ -import com.twitter.tweetypie.caching.SoftTtl -import com.twitter.tweetypie.handler.DuplicateTweetFinder -import com.twitter.tweetypie.repository.TombstoneTtl -import com.twitter.tweetypie.service._ -import com.twitter.tweetypie.storage.ManhattanTweetStorageClient -import com.twitter.util.Duration - -case class InProcessCacheConfig(ttl: Duration, maximumSize: Int) - -class TweetServiceSettings(val flags: TweetServiceFlags) { - - /** - * Convert a Boolean to an Option - * > optional(true, "my value") - * res: Some(my value) - * - * > optional(false, "my value") - * res: None - */ - def optional[T](b: Boolean, a: => T): Option[T] = if (b) Some(a) else None - - /** atla, localhost, etc. */ - val zone: String = flags.zone() - - /** dc is less specific than zone, zone=atla, dc=atl */ - val dc: String = zone.dropRight(1) - - /** one of: prod, staging, dev, testbox */ - val env: Env.Value = flags.env() - - /** instanceId of this aurora instance */ - lazy val instanceId: Int = flags.instanceId() - - /** total number of tweetypie aurora instances */ - val instanceCount: Int = flags.instanceCount() - - /** The Name to resolve to find the memcached cluster */ - val twemcacheDest: String = - // If twemcacheDest is explicitly set, always prefer that to - // localMemcachedPort. - flags.twemcacheDest.get - // Testbox uses this global flag to specify the location of the - // local memcached instance. - .orElse(localMemcachedPort().map("/$/inet/localhost/" + _)) - // If no explicit Name is specified, use the default. - .getOrElse(flags.twemcacheDest()) - - /** Read/write data through Cache */ - val withCache: Boolean = flags.withCache() - - /** - * The TFlock queue to use for background indexing operations. For - * production, this should always be the low priority queue, to - * allow foreground operations to be processed first. - */ - val backgroundIndexingPriority: Priority = flags.backgroundIndexingPriority() - - /** Set certain decider gates to this overridden value */ - val deciderOverrides: Map[String, Boolean] = - flags.deciderOverrides() - - /** use per host stats? */ - val clientHostStats: Boolean = - flags.clientHostStats() - - val warmupRequestsSettings: Option[WarmupQueriesSettings] = - optional(flags.enableWarmupRequests(), WarmupQueriesSettings()) - - /** enables request authorization via a allowlist */ - val allowlistingRequired: Boolean = - flags.allowlist.get.getOrElse(env == Env.prod) - - /** read rate limit for unknown clients (when allowlistingRequired is enabled) */ - val nonAllowListedClientRateLimitPerSec: Double = - flags.grayListRateLimit() - - /** enables requests from production clients */ - val allowProductionClients: Boolean = - env == Env.prod - - /** enables replication via DRPC */ - val enableReplication: Boolean = flags.enableReplication() - - /** enables forking of some traffic to configured target */ - val trafficForkingEnabled: Boolean = - env == Env.prod - - val scribeUniquenessIds: Boolean = - env == Env.prod - - /** ClientId to send to backend services */ - val thriftClientId: ClientId = - flags.clientId.get.map(ClientId(_)).getOrElse { - env match { - case Env.dev | Env.staging => ClientId("tweetypie.staging") - case Env.prod => ClientId("tweetypie.prod") - } - } - - /** - * Instead of using DRPC for calling into the async code path, call back into the - * current instance. Used for development and test to ensure logic in the current - * instance is being tested. - */ - val simulateDeferredrpcCallbacks: Boolean = flags.simulateDeferredrpcCallbacks() - - /** - * ClientId to set in 'asynchronous' requests when simulateDeferredrpcCallbacks is - * true and Tweetypie ends up just calling itself synchronously. - */ - val deferredrpcClientId: ClientId = ClientId("deferredrpc.prod") - - /** - * ServiceIdentifier used to enable mTLS - */ - val serviceIdentifier: ServiceIdentifier = flags.serviceIdentifier() - - /** - * Decider settings - */ - val deciderBaseFilename: Option[String] = Option(flags.deciderBase()) - val deciderOverlayFilename: Option[String] = Option(flags.deciderOverlay()) - val vfDeciderOverlayFilename: Option[String] = flags.vfDeciderOverlay.get - - /** - * Used to determine whether we should fail requests for Tweets that are likely too young - * to return a non-partial response. We return NotFound for Tweets that are deemed too young. - * Used by [[com.twitter.tweetypie.repository.ManhattanTweetRepository]]. - */ - val shortCircuitLikelyPartialTweetReads: Gate[Duration] = { - // interpret the flag as a duration in milliseconds - val ageCeiling: Duration = flags.shortCircuitLikelyPartialTweetReadsMs().milliseconds - Gate(tweetAge => tweetAge < ageCeiling) - } - - // tweet-service internal settings - - val tweetKeyCacheVersion = 1 - - /** how often to flush aggregated count updates for tweet counts */ - val aggregatedTweetCountsFlushInterval: Duration = 5.seconds - - /** maximum number of keys for which aggregated cached count updates may be cached */ - val maxAggregatedCountsSize = 1000 - - /** ramp up period for decidering up forked traffic (if enabled) to the full decidered value */ - val forkingRampUp: Duration = 3.minutes - - /** how long to wait after startup for serversets to resolve before giving up and moving on */ - val waitForServerSetsTimeout: Duration = 120.seconds - - /** number of threads to use in thread pool for language identification */ - val numPenguinThreads = 4 - - /** maximum number of tweets that clients can request per getTweets RPC call */ - val maxGetTweetsRequestSize = 200 - - /** maximum batch size for any batched request (getTweets is exempt, it has its own limiting) */ - val maxRequestSize = 200 - - /** - * maximum size to allow the thrift response buffer to grow before resetting it. this is set to - * approximately the current value of `srv/thrift/response_payload_bytes.p999`, meaning roughly - * 1 out of 1000 requests will cause the buffer to be reset. - */ - val maxThriftBufferSize: Int = 200 * 1024 - - // ********* timeouts and backoffs ********** - - /** backoffs for OptimisticLockingCache lockAndSet operations */ - val lockingCacheBackoffs: Stream[Duration] = - Backoff.exponentialJittered(10.millisecond, 50.milliseconds).take(3).toStream - - /** retry once on timeout with no backoff */ - val defaultTimeoutBackoffs: Stream[Duration] = Stream(0.milliseconds).toStream - - /** backoffs when user view is missing */ - val gizmoduckMissingUserViewBackoffs: Stream[Duration] = Backoff.const(10.millis).take(3).toStream - - /** backoffs for retrying failed async-write actions after first retry failure */ - val asyncWriteRetryBackoffs: Stream[Duration] = - Backoff.exponential(10.milliseconds, 2).take(9).toStream.map(_ min 1.second) - - /** backoffs for retrying failed deferredrpc enqueues */ - val deferredrpcBackoffs: Stream[Duration] = - Backoff.exponential(10.milliseconds, 2).take(3).toStream - - /** backoffs for retrying failed cache updates for replicated events */ - val replicatedEventCacheBackoffs: Stream[Duration] = - Backoff.exponential(100.milliseconds, 2).take(10).toStream - - val escherbirdConfig: Escherbird.Config = - Escherbird.Config( - requestTimeout = 200.milliseconds, - timeoutBackoffs = defaultTimeoutBackoffs - ) - - val expandodoConfig: Expandodo.Config = - Expandodo.Config( - requestTimeout = 300.milliseconds, - timeoutBackoffs = defaultTimeoutBackoffs, - serverErrorBackoffs = Backoff.const(0.millis).take(3).toStream - ) - - val creativesContainerServiceConfig: CreativesContainerService.Config = - CreativesContainerService.Config( - requestTimeout = 300.milliseconds, - timeoutBackoffs = defaultTimeoutBackoffs, - serverErrorBackoffs = Backoff.const(0.millis).take(3).toStream - ) - - val geoScrubEventStoreConfig: GeoScrubEventStore.Config = - GeoScrubEventStore.Config( - read = GeoScrubEventStore.EndpointConfig( - requestTimeout = 200.milliseconds, - maxRetryCount = 1 - ), - write = GeoScrubEventStore.EndpointConfig( - requestTimeout = 1.second, - maxRetryCount = 1 - ) - ) - - val gizmoduckConfig: Gizmoduck.Config = - Gizmoduck.Config( - readTimeout = 300.milliseconds, - writeTimeout = 300.milliseconds, - // We bump the timeout value to 800ms because modifyAndGet is called only in async request path in GeoScrub daemon - // and we do not expect sync/realtime apps calling this thrift method - modifyAndGetTimeout = 800.milliseconds, - modifyAndGetTimeoutBackoffs = Backoff.const(0.millis).take(3).toStream, - defaultTimeoutBackoffs = defaultTimeoutBackoffs, - gizmoduckExceptionBackoffs = Backoff.const(0.millis).take(3).toStream - ) - - val limiterBackendConfig: LimiterBackend.Config = - LimiterBackend.Config( - requestTimeout = 300.milliseconds, - timeoutBackoffs = defaultTimeoutBackoffs - ) - - val mediaInfoServiceConfig: MediaInfoService.Config = - MediaInfoService.Config( - requestTimeout = 300.milliseconds, - totalTimeout = 500.milliseconds, - timeoutBackoffs = defaultTimeoutBackoffs - ) - - val scarecrowConfig: Scarecrow.Config = - Scarecrow.Config( - readTimeout = 100.milliseconds, - writeTimeout = 400.milliseconds, - timeoutBackoffs = defaultTimeoutBackoffs, - scarecrowExceptionBackoffs = Backoff.const(0.millis).take(3).toStream - ) - - val socialGraphSeviceConfig: SocialGraphService.Config = - SocialGraphService.Config( - socialGraphTimeout = 250.milliseconds, - timeoutBackoffs = defaultTimeoutBackoffs - ) - - val talonConfig: Talon.Config = - Talon.Config( - shortenTimeout = 500.milliseconds, - expandTimeout = 150.milliseconds, - timeoutBackoffs = defaultTimeoutBackoffs, - transientErrorBackoffs = Backoff.const(0.millis).take(3).toStream - ) - - /** - * page size when retrieving tflock pages for tweet deletion and undeletion - * tweet erasures have their own page size eraseUserTweetsPageSize - */ - val tflockPageSize: Int = flags.tflockPageSize() - - val tflockReadConfig: TFlock.Config = - TFlock.Config( - requestTimeout = 300.milliseconds, - timeoutBackoffs = defaultTimeoutBackoffs, - flockExceptionBackoffs = Backoff.const(0.millis).take(3).toStream, - overCapacityBackoffs = Stream.empty, - defaultPageSize = tflockPageSize - ) - - val tflockWriteConfig: TFlock.Config = - TFlock.Config( - requestTimeout = 400.milliseconds, - timeoutBackoffs = defaultTimeoutBackoffs, - flockExceptionBackoffs = Backoff.const(0.millis).take(3).toStream, - overCapacityBackoffs = Backoff.exponential(10.millis, 2).take(3).toStream - ) - - val timelineServiceConfig: TimelineService.Config = { - val tlsExceptionBackoffs = Backoff.const(0.millis).take(3).toStream - TimelineService.Config( - writeRequestPolicy = - Backend.TimeoutPolicy(4.seconds) >>> - TimelineService.FailureBackoffsPolicy( - timeoutBackoffs = defaultTimeoutBackoffs, - tlsExceptionBackoffs = tlsExceptionBackoffs - ), - readRequestPolicy = - Backend.TimeoutPolicy(400.milliseconds) >>> - TimelineService.FailureBackoffsPolicy( - timeoutBackoffs = defaultTimeoutBackoffs, - tlsExceptionBackoffs = tlsExceptionBackoffs - ) - ) - } - - val tweetStorageConfig: ManhattanTweetStorageClient.Config = { - val remoteZone = zone match { - case "atla" => "pdxa" - case "pdxa" => "atla" - case "atla" | "localhost" => "atla" - case _ => - throw new IllegalArgumentException(s"Cannot configure remote DC for unknown zone '$zone'") - } - ManhattanTweetStorageClient.Config( - applicationId = "tbird_mh", - localDestination = "/s/manhattan/cylon.native-thrift", - localTimeout = 290.milliseconds, - remoteDestination = s"/srv#/prod/$remoteZone/manhattan/cylon.native-thrift", - remoteTimeout = 1.second, - maxRequestsPerBatch = 25, - serviceIdentifier = serviceIdentifier, - opportunisticTlsLevel = OpportunisticTls.Required - ) - } - - val userImageServiceConfig: UserImageService.Config = - UserImageService.Config( - processTweetMediaTimeout = 5.seconds, - updateTweetMediaTimeout = 2.seconds, - timeoutBackoffs = defaultTimeoutBackoffs - ) - - val adsLoggingClientTopicName = env match { - case Env.prod => "ads_client_callback_prod" - case Env.dev | Env.staging => "ads_client_callback_staging" - } - - /** Delay between successive cascadedDeleteTweet calls when deleting retweets. Applied via decider. */ - val retweetDeletionDelay: Duration = 20.milliseconds - - /** - * Delay to sleep before each tweet deletion of an eraseUserTweets request. - * This is a simple rate limiting mechanism. The long term solution is - * to move async endpoints like user erasures and retweet deletions out - * of the the main tweetypie cluster and into an async cluster with first class - * rate limiting support - */ - val eraseUserTweetsDelay: Duration = 100.milliseconds - - val eraseUserTweetsPageSize = 100 - - val getStoredTweetsByUserPageSize = 20 - val getStoredTweetsByUserMaxPages = 30 - - // ********* ttls ********** - - // Unfortunately, this tombstone TTL applies equally to the case - // where the tweet was deleted and the case that the tweet does not - // exist or is unavailable. If we could differentiate between those - // cases, we'd cache deleted for a long time and not - // found/unavailable for a short time. We chose 100 - // milliseconds for the minimum TTL because there are known cases in - // which a not found result can be erroneously written to cache on - // tweet creation. This minimum TTL is a trade-off between a - // thundering herd of database requests from clients that just got - // the fanned-out tweet and the window for which these inconsistent - // results will be available. - val tweetTombstoneTtl: CachedResult.CachedNotFound[TweetId] => Duration = - TombstoneTtl.linear(min = 100.milliseconds, max = 1.day, from = 5.minutes, to = 5.hours) - - val tweetMemcacheTtl: Duration = 14.days - val urlMemcacheTtl: Duration = 1.hour - val urlMemcacheSoftTtl: Duration = 1.hour - val deviceSourceMemcacheTtl: Duration = 12.hours - val deviceSourceMemcacheSoftTtl: SoftTtl.ByAge[Nothing] = - SoftTtl.ByAge(softTtl = 1.hour, jitter = 1.minute) - val deviceSourceInProcessTtl: Duration = 8.hours - val deviceSourceInProcessSoftTtl: Duration = 30.minutes - val placeMemcacheTtl: Duration = 1.day - val placeMemcacheSoftTtl: SoftTtl.ByAge[Nothing] = - SoftTtl.ByAge(softTtl = 3.hours, jitter = 1.minute) - val cardMemcacheTtl: Duration = 20.minutes - val cardMemcacheSoftTtl: Duration = 30.seconds - val tweetCreateLockingMemcacheTtl: Duration = 10.seconds - val tweetCreateLockingMemcacheLongTtl: Duration = 12.hours - val geoScrubMemcacheTtl: Duration = 30.minutes - - val tweetCountsMemcacheTtl: Duration = 24.hours - val tweetCountsMemcacheNonZeroSoftTtl: Duration = 3.hours - val tweetCountsMemcacheZeroSoftTtl: Duration = 7.hours - - val cacheClientPendingRequestLimit: Int = flags.memcachePendingRequestLimit() - - val deviceSourceInProcessCacheMaxSize = 10000 - - val inProcessCacheConfigOpt: Option[InProcessCacheConfig] = - if (flags.enableInProcessCache()) { - Some( - InProcessCacheConfig( - ttl = flags.inProcessCacheTtlMs().milliseconds, - maximumSize = flags.inProcessCacheSize() - ) - ) - } else { - None - } - - // Begin returning OverCapacity for tweet repo when cache SR falls below 95%, - // Scale to rejecting 95% of requests when cache SR <= 80% - val tweetCacheAvailabilityFromSuccessRate: Double => Double = - Availability.linearlyScaled(0.95, 0.80, 0.05) - - // ******* repository chunking size ******** - - val tweetCountsRepoChunkSize = 6 - // n times `tweetCountsRepoChunkSize`, so chunking at higher level does not - // generate small batches at lower level. - val tweetCountsCacheChunkSize = 18 - - val duplicateTweetFinderSettings: DuplicateTweetFinder.Settings = - DuplicateTweetFinder.Settings(numTweetsToCheck = 10, maxDuplicateAge = 12.hours) - - val backendWarmupSettings: Warmup.Settings = - Warmup.Settings( - // Try for twenty seconds to warm up the backends before giving - // up. - maxWarmupDuration = 20.seconds, - // Only allow up to 50 outstanding warmup requests of any kind - // to be outstanding at a time. - maxOutstandingRequests = 50, - // These timeouts are just over the p999 latency observed in ATLA - // for requests to these backends. - requestTimeouts = Map( - "expandodo" -> 120.milliseconds, - "geo_relevance" -> 50.milliseconds, - "gizmoduck" -> 200.milliseconds, - "memcache" -> 50.milliseconds, - "scarecrow" -> 120.milliseconds, - "socialgraphservice" -> 180.milliseconds, - "talon" -> 70.milliseconds, - "tflock" -> 320.milliseconds, - "timelineservice" -> 200.milliseconds, - "tweetstorage" -> 50.milliseconds - ), - reliability = Warmup.Reliably( - // Consider a backend warmed up if 99% of requests are succeeding. - reliabilityThreshold = 0.99, - // When performing warmup, use a maximum of 10 concurrent - // requests to each backend. - concurrency = 10, - // Do not allow more than this many attempts to perform the - // warmup action before giving up. - maxAttempts = 1000 - ) - ) -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetStores.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetStores.docx new file mode 100644 index 000000000..a71d348fe Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetStores.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetStores.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetStores.scala deleted file mode 100644 index 49cc53fb5..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetStores.scala +++ /dev/null @@ -1,577 +0,0 @@ -package com.twitter.tweetypie -package config - -import com.twitter.servo.util.FutureArrow -import com.twitter.servo.util.RetryHandler -import com.twitter.servo.util.Scribe -import com.twitter.tweetypie.backends.LimiterService.Feature.MediaTagCreate -import com.twitter.tweetypie.backends.LimiterService.Feature.Updates -import com.twitter.tweetypie.client_id.ClientIdHelper -import com.twitter.tweetypie.handler.TweetBuilder -import com.twitter.tweetypie.repository.TweetKeyFactory -import com.twitter.tweetypie.store._ -import com.twitter.tweetypie.tflock.TFlockIndexer -import com.twitter.tweetypie.thriftscala._ -import com.twitter.tweetypie.util.RetryPolicyBuilder -import com.twitter.util.Timer - -object TweetStores { - def apply( - settings: TweetServiceSettings, - statsReceiver: StatsReceiver, - timer: Timer, - deciderGates: TweetypieDeciderGates, - tweetKeyFactory: TweetKeyFactory, - clients: BackendClients, - caches: Caches, - asyncBuilder: ServiceInvocationBuilder, - hasMedia: Tweet => Boolean, - clientIdHelper: ClientIdHelper, - ): TotalTweetStore = { - - val deferredrpcRetryPolicy = - // retry all application exceptions for now. however, in the future, deferredrpc - // may throw a backpressure exception that should not be retried. - RetryPolicyBuilder.anyFailure(settings.deferredrpcBackoffs) - - val asyncWriteRetryPolicy = - // currently retries all failures with the same back-off times. might need - // to update to handle backpressure exceptions differently. - RetryPolicyBuilder.anyFailure(settings.asyncWriteRetryBackoffs) - - val replicatedEventRetryPolicy = - RetryPolicyBuilder.anyFailure(settings.replicatedEventCacheBackoffs) - - val logLensStore = - LogLensStore( - tweetCreationsLogger = Logger("com.twitter.tweetypie.store.TweetCreations"), - tweetDeletionsLogger = Logger("com.twitter.tweetypie.store.TweetDeletions"), - tweetUndeletionsLogger = Logger("com.twitter.tweetypie.store.TweetUndeletions"), - tweetUpdatesLogger = Logger("com.twitter.tweetypie.store.TweetUpdates"), - clientIdHelper = clientIdHelper, - ) - - val tweetStoreStats = statsReceiver.scope("tweet_store") - - val tweetStatsStore = TweetStatsStore(tweetStoreStats.scope("stats")) - - val asyncRetryConfig = - new TweetStore.AsyncRetry( - asyncWriteRetryPolicy, - deferredrpcRetryPolicy, - timer, - clients.asyncRetryTweetService, - Scribe(FailedAsyncWrite, "tweetypie_failed_async_writes") - )(_, _) - - val manhattanStore = { - val scopedStats = tweetStoreStats.scope("base") - ManhattanTweetStore(clients.tweetStorageClient) - .tracked(scopedStats) - .asyncRetry(asyncRetryConfig(scopedStats, ManhattanTweetStore.Action)) - } - - val cachingTweetStore = { - val cacheStats = tweetStoreStats.scope("caching") - CachingTweetStore( - tweetKeyFactory = tweetKeyFactory, - tweetCache = caches.tweetCache, - stats = cacheStats - ).tracked(cacheStats) - .asyncRetry(asyncRetryConfig(cacheStats, CachingTweetStore.Action)) - .replicatedRetry(RetryHandler.failuresOnly(replicatedEventRetryPolicy, timer, cacheStats)) - } - - val indexingStore = { - val indexingStats = tweetStoreStats.scope("indexing") - TweetIndexingStore( - new TFlockIndexer( - tflock = clients.tflockWriteClient, - hasMedia = hasMedia, - backgroundIndexingPriority = settings.backgroundIndexingPriority, - stats = indexingStats - ) - ).tracked(indexingStats) - .asyncRetry(asyncRetryConfig(indexingStats, TweetIndexingStore.Action)) - } - - val timelineUpdatingStore = { - val tlsScope = tweetStoreStats.scope("timeline_updating") - TlsTimelineUpdatingStore( - processEvent2 = clients.timelineService.processEvent2, - hasMedia = hasMedia, - stats = tlsScope - ).tracked(tlsScope) - .asyncRetry(asyncRetryConfig(tlsScope, TlsTimelineUpdatingStore.Action)) - } - - val guanoServiceStore = { - val guanoStats = tweetStoreStats.scope("guano") - GuanoServiceStore(clients.guano, guanoStats) - .tracked(guanoStats) - .asyncRetry(asyncRetryConfig(guanoStats, GuanoServiceStore.Action)) - } - - val mediaServiceStore = { - val mediaStats = tweetStoreStats.scope("media") - MediaServiceStore(clients.mediaClient.deleteMedia, clients.mediaClient.undeleteMedia) - .tracked(mediaStats) - .asyncRetry(asyncRetryConfig(mediaStats, MediaServiceStore.Action)) - } - - val userCountsUpdatingStore = { - val userCountsStats = tweetStoreStats.scope("user_counts") - GizmoduckUserCountsUpdatingStore(clients.gizmoduck.incrCount, hasMedia) - .tracked(userCountsStats) - .ignoreFailures - } - - val tweetCountsUpdatingStore = { - val cacheScope = statsReceiver.scope("tweet_counts_cache") - val tweetCountsStats = tweetStoreStats.scope("tweet_counts") - - val memcacheCountsStore = { - val lockingCacheCountsStore = - CachedCountsStore.fromLockingCache(caches.tweetCountsCache) - - new AggregatingCachedCountsStore( - lockingCacheCountsStore, - timer, - settings.aggregatedTweetCountsFlushInterval, - settings.maxAggregatedCountsSize, - cacheScope - ) - } - - TweetCountsCacheUpdatingStore(memcacheCountsStore) - .tracked(tweetCountsStats) - .ignoreFailures - } - - val replicatingStore = { - val replicateStats = tweetStoreStats.scope("replicate_out") - ReplicatingTweetStore( - clients.replicationClient - ).tracked(replicateStats) - .retry(RetryHandler.failuresOnly(deferredrpcRetryPolicy, timer, replicateStats)) - .asyncRetry(asyncRetryConfig(replicateStats, ReplicatingTweetStore.Action)) - .enabledBy(Gate.const(settings.enableReplication)) - } - - val scribeMediaTagStore = - ScribeMediaTagStore() - .tracked(tweetStoreStats.scope("scribe_media_tag_store")) - - val limiterStore = - LimiterStore( - clients.limiterService.incrementByOne(Updates), - clients.limiterService.increment(MediaTagCreate) - ).tracked(tweetStoreStats.scope("limiter_store")) - - val geoSearchRequestIDStore = { - val statsScope = tweetStoreStats.scope("geo_search_request_id") - GeoSearchRequestIDStore(FutureArrow(clients.geoRelevance.reportConversion _)) - .tracked(statsScope) - .asyncRetry(asyncRetryConfig(statsScope, GeoSearchRequestIDStore.Action)) - } - - val userGeotagUpdateStore = { - val geotagScope = tweetStoreStats.scope("gizmoduck_user_geotag_updating") - GizmoduckUserGeotagUpdateStore( - clients.gizmoduck.modifyAndGet, - geotagScope - ).tracked(geotagScope) - .asyncRetry(asyncRetryConfig(geotagScope, GizmoduckUserGeotagUpdateStore.Action)) - } - - val fanoutServiceStore = { - val fanoutStats = tweetStoreStats.scope("fanout_service_delivery") - FanoutServiceStore(clients.fanoutServiceClient, fanoutStats) - .tracked(fanoutStats) - .asyncRetry(asyncRetryConfig(fanoutStats, FanoutServiceStore.Action)) - } - - /** - * A store that converts Tweetypie TweetEvents to EventBus TweetEvents and sends each event to - * the underlying FutureEffect[eventbus.TweetEvent] - */ - val eventBusEnqueueStore = { - val enqueueStats = tweetStoreStats.scope("event_bus_enqueueing") - val enqueueEffect = FutureEffect[TweetEvent](clients.tweetEventsPublisher.publish) - - TweetEventBusStore( - enqueueEffect - ).tracked(enqueueStats) - .asyncRetry(asyncRetryConfig(enqueueStats, AsyncWriteAction.EventBusEnqueue)) - } - - val retweetArchivalEnqueueStore = { - val enqueueStats = tweetStoreStats.scope("retweet_archival_enqueueing") - val enqueueEffect = FutureEffect(clients.retweetArchivalEventPublisher.publish) - - RetweetArchivalEnqueueStore(enqueueEffect) - .tracked(enqueueStats) - .asyncRetry(asyncRetryConfig(enqueueStats, AsyncWriteAction.RetweetArchivalEnqueue)) - } - - val asyncEnqueueStore = { - val asyncEnqueueStats = tweetStoreStats.scope("async_enqueueing") - AsyncEnqueueStore( - asyncBuilder.asyncVia(clients.asyncTweetService).service, - TweetBuilder.scrubUserInAsyncInserts, - TweetBuilder.scrubSourceTweetInAsyncInserts, - TweetBuilder.scrubSourceUserInAsyncInserts - ).tracked(asyncEnqueueStats) - .retry(RetryHandler.failuresOnly(deferredrpcRetryPolicy, timer, asyncEnqueueStats)) - } - - val insertTweetStore = - InsertTweet.Store( - logLensStore = logLensStore, - manhattanStore = manhattanStore, - tweetStatsStore = tweetStatsStore, - cachingTweetStore = cachingTweetStore, - limiterStore = limiterStore, - asyncEnqueueStore = asyncEnqueueStore, - userCountsUpdatingStore = userCountsUpdatingStore, - tweetCountsUpdatingStore = tweetCountsUpdatingStore - ) - - val asyncInsertStore = - AsyncInsertTweet.Store( - replicatingStore = replicatingStore, - indexingStore = indexingStore, - tweetCountsUpdatingStore = tweetCountsUpdatingStore, - timelineUpdatingStore = timelineUpdatingStore, - eventBusEnqueueStore = eventBusEnqueueStore, - fanoutServiceStore = fanoutServiceStore, - scribeMediaTagStore = scribeMediaTagStore, - userGeotagUpdateStore = userGeotagUpdateStore, - geoSearchRequestIDStore = geoSearchRequestIDStore - ) - - val replicatedInsertTweetStore = - ReplicatedInsertTweet.Store( - cachingTweetStore = cachingTweetStore, - tweetCountsUpdatingStore = tweetCountsUpdatingStore - ) - - val deleteTweetStore = - DeleteTweet.Store( - cachingTweetStore = cachingTweetStore, - asyncEnqueueStore = asyncEnqueueStore, - userCountsUpdatingStore = userCountsUpdatingStore, - tweetCountsUpdatingStore = tweetCountsUpdatingStore, - logLensStore = logLensStore - ) - - val asyncDeleteTweetStore = - AsyncDeleteTweet.Store( - manhattanStore = manhattanStore, - cachingTweetStore = cachingTweetStore, - replicatingStore = replicatingStore, - indexingStore = indexingStore, - eventBusEnqueueStore = eventBusEnqueueStore, - timelineUpdatingStore = timelineUpdatingStore, - tweetCountsUpdatingStore = tweetCountsUpdatingStore, - guanoServiceStore = guanoServiceStore, - mediaServiceStore = mediaServiceStore - ) - - val replicatedDeleteTweetStore = - ReplicatedDeleteTweet.Store( - cachingTweetStore = cachingTweetStore, - tweetCountsUpdatingStore = tweetCountsUpdatingStore - ) - - val incrBookmarkCountStore = - IncrBookmarkCount.Store( - asyncEnqueueStore = asyncEnqueueStore, - replicatingStore = replicatingStore - ) - - val asyncIncrBookmarkCountStore = - AsyncIncrBookmarkCount.Store( - tweetCountsUpdatingStore = tweetCountsUpdatingStore - ) - - val replicatedIncrBookmarkCountStore = - ReplicatedIncrBookmarkCount.Store( - tweetCountsUpdatingStore = tweetCountsUpdatingStore - ) - - val incrFavCountStore = - IncrFavCount.Store( - asyncEnqueueStore = asyncEnqueueStore, - replicatingStore = replicatingStore - ) - - val asyncIncrFavCountStore = - AsyncIncrFavCount.Store( - tweetCountsUpdatingStore = tweetCountsUpdatingStore - ) - - val replicatedIncrFavCountStore = - ReplicatedIncrFavCount.Store( - tweetCountsUpdatingStore = tweetCountsUpdatingStore - ) - - val scrubGeoStore = - ScrubGeo.Store( - logLensStore = logLensStore, - manhattanStore = manhattanStore, - cachingTweetStore = cachingTweetStore, - eventBusEnqueueStore = eventBusEnqueueStore, - replicatingStore = replicatingStore - ) - - val replicatedScrubGeoStore = - ReplicatedScrubGeo.Store( - cachingTweetStore = cachingTweetStore - ) - - val takedownStore = - Takedown.Store( - logLensStore = logLensStore, - manhattanStore = manhattanStore, - cachingTweetStore = cachingTweetStore, - asyncEnqueueStore = asyncEnqueueStore - ) - - val asyncTakedownStore = - AsyncTakedown.Store( - replicatingStore = replicatingStore, - guanoStore = guanoServiceStore, - eventBusEnqueueStore = eventBusEnqueueStore - ) - - val replicatedTakedownStore = - ReplicatedTakedown.Store( - cachingTweetStore = cachingTweetStore - ) - - val updatePossiblySensitiveTweetStore = - UpdatePossiblySensitiveTweet.Store( - manhattanStore = manhattanStore, - cachingTweetStore = cachingTweetStore, - logLensStore = logLensStore, - asyncEnqueueStore = asyncEnqueueStore - ) - - val asyncUpdatePossiblySensitiveTweetStore = - AsyncUpdatePossiblySensitiveTweet.Store( - manhattanStore = manhattanStore, - cachingTweetStore = cachingTweetStore, - replicatingStore = replicatingStore, - guanoStore = guanoServiceStore, - eventBusStore = eventBusEnqueueStore - ) - - val replicatedUpdatePossiblySensitiveTweetStore = - ReplicatedUpdatePossiblySensitiveTweet.Store( - cachingTweetStore = cachingTweetStore - ) - - val setAdditionalFieldsStore = - SetAdditionalFields.Store( - manhattanStore = manhattanStore, - cachingTweetStore = cachingTweetStore, - asyncEnqueueStore = asyncEnqueueStore, - logLensStore = logLensStore - ) - - val asyncSetAdditionalFieldsStore = - AsyncSetAdditionalFields.Store( - replicatingStore = replicatingStore, - eventBusEnqueueStore = eventBusEnqueueStore - ) - - val replicatedSetAdditionalFieldsStore = - ReplicatedSetAdditionalFields.Store( - cachingTweetStore = cachingTweetStore - ) - - val setRetweetVisibilityStore = - SetRetweetVisibility.Store(asyncEnqueueStore = asyncEnqueueStore) - - val asyncSetRetweetVisibilityStore = - AsyncSetRetweetVisibility.Store( - tweetIndexingStore = indexingStore, - tweetCountsCacheUpdatingStore = tweetCountsUpdatingStore, - replicatingTweetStore = replicatingStore, - retweetArchivalEnqueueStore = retweetArchivalEnqueueStore - ) - - val replicatedSetRetweetVisibilityStore = - ReplicatedSetRetweetVisibility.Store( - tweetCountsCacheUpdatingStore = tweetCountsUpdatingStore - ) - - val deleteAdditionalFieldsStore = - DeleteAdditionalFields.Store( - cachingTweetStore = cachingTweetStore, - asyncEnqueueStore = asyncEnqueueStore, - logLensStore = logLensStore - ) - - val asyncDeleteAdditionalFieldsStore = - AsyncDeleteAdditionalFields.Store( - manhattanStore = manhattanStore, - cachingTweetStore = cachingTweetStore, - replicatingStore = replicatingStore, - eventBusEnqueueStore = eventBusEnqueueStore - ) - - val replicatedDeleteAdditionalFieldsStore = - ReplicatedDeleteAdditionalFields.Store( - cachingTweetStore = cachingTweetStore - ) - - /* - * This composed store handles all synchronous side effects of an undelete - * but does not execute the undeletion. - * - * This store is executed after the actual undelete request succeeds. - * The undeletion request is initiated by Undelete.apply() - */ - val undeleteTweetStore = - UndeleteTweet.Store( - logLensStore = logLensStore, - cachingTweetStore = cachingTweetStore, - tweetCountsUpdatingStore = tweetCountsUpdatingStore, - asyncEnqueueStore = asyncEnqueueStore - ) - - val asyncUndeleteTweetStore = - AsyncUndeleteTweet.Store( - cachingTweetStore = cachingTweetStore, - eventBusEnqueueStore = eventBusEnqueueStore, - indexingStore = indexingStore, - replicatingStore = replicatingStore, - mediaServiceStore = mediaServiceStore, - timelineUpdatingStore = timelineUpdatingStore - ) - - val replicatedUndeleteTweetStore = - ReplicatedUndeleteTweet.Store( - cachingTweetStore = cachingTweetStore, - tweetCountsUpdatingStore = tweetCountsUpdatingStore - ) - - val flushStore = - Flush.Store( - cachingTweetStore = cachingTweetStore, - tweetCountsUpdatingStore = tweetCountsUpdatingStore - ) - - val scrubGeoUpdateUserTimestampStore = - ScrubGeoUpdateUserTimestamp.Store( - cache = caches.geoScrubCache, - setInManhattan = clients.geoScrubEventStore.setGeoScrubTimestamp, - geotagUpdateStore = userGeotagUpdateStore, - tweetEventBusStore = eventBusEnqueueStore - ) - - val quotedTweetDeleteStore = - QuotedTweetDelete.Store( - eventBusEnqueueStore = eventBusEnqueueStore - ) - - val quotedTweetTakedownStore = - QuotedTweetTakedown.Store( - eventBusEnqueueStore = eventBusEnqueueStore - ) - - new TotalTweetStore { - val asyncDeleteAdditionalFields: FutureEffect[AsyncDeleteAdditionalFields.Event] = - asyncDeleteAdditionalFieldsStore.asyncDeleteAdditionalFields - val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = - asyncDeleteTweetStore.asyncDeleteTweet - val asyncIncrBookmarkCount: FutureEffect[AsyncIncrBookmarkCount.Event] = - asyncIncrBookmarkCountStore.asyncIncrBookmarkCount - val asyncIncrFavCount: FutureEffect[AsyncIncrFavCount.Event] = - asyncIncrFavCountStore.asyncIncrFavCount - val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = asyncInsertStore.asyncInsertTweet - val asyncSetAdditionalFields: FutureEffect[AsyncSetAdditionalFields.Event] = - asyncSetAdditionalFieldsStore.asyncSetAdditionalFields - val asyncSetRetweetVisibility: FutureEffect[AsyncSetRetweetVisibility.Event] = - asyncSetRetweetVisibilityStore.asyncSetRetweetVisibility - val asyncTakedown: FutureEffect[AsyncTakedown.Event] = asyncTakedownStore.asyncTakedown - val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = - asyncUndeleteTweetStore.asyncUndeleteTweet - val asyncUpdatePossiblySensitiveTweet: FutureEffect[AsyncUpdatePossiblySensitiveTweet.Event] = - asyncUpdatePossiblySensitiveTweetStore.asyncUpdatePossiblySensitiveTweet - val deleteAdditionalFields: FutureEffect[DeleteAdditionalFields.Event] = - deleteAdditionalFieldsStore.deleteAdditionalFields - val deleteTweet: FutureEffect[DeleteTweet.Event] = deleteTweetStore.deleteTweet - val flush: FutureEffect[Flush.Event] = flushStore.flush - val incrBookmarkCount: FutureEffect[IncrBookmarkCount.Event] = - incrBookmarkCountStore.incrBookmarkCount - val incrFavCount: FutureEffect[IncrFavCount.Event] = incrFavCountStore.incrFavCount - val insertTweet: FutureEffect[InsertTweet.Event] = insertTweetStore.insertTweet - val quotedTweetDelete: FutureEffect[QuotedTweetDelete.Event] = - quotedTweetDeleteStore.quotedTweetDelete - val quotedTweetTakedown: FutureEffect[QuotedTweetTakedown.Event] = - quotedTweetTakedownStore.quotedTweetTakedown - val replicatedDeleteAdditionalFields: FutureEffect[ReplicatedDeleteAdditionalFields.Event] = - replicatedDeleteAdditionalFieldsStore.replicatedDeleteAdditionalFields - val replicatedDeleteTweet: FutureEffect[ReplicatedDeleteTweet.Event] = - replicatedDeleteTweetStore.replicatedDeleteTweet - val replicatedIncrBookmarkCount: FutureEffect[ReplicatedIncrBookmarkCount.Event] = - replicatedIncrBookmarkCountStore.replicatedIncrBookmarkCount - val replicatedIncrFavCount: FutureEffect[ReplicatedIncrFavCount.Event] = - replicatedIncrFavCountStore.replicatedIncrFavCount - val replicatedInsertTweet: FutureEffect[ReplicatedInsertTweet.Event] = - replicatedInsertTweetStore.replicatedInsertTweet - val replicatedScrubGeo: FutureEffect[ReplicatedScrubGeo.Event] = - replicatedScrubGeoStore.replicatedScrubGeo - val replicatedSetAdditionalFields: FutureEffect[ReplicatedSetAdditionalFields.Event] = - replicatedSetAdditionalFieldsStore.replicatedSetAdditionalFields - val replicatedSetRetweetVisibility: FutureEffect[ReplicatedSetRetweetVisibility.Event] = - replicatedSetRetweetVisibilityStore.replicatedSetRetweetVisibility - val replicatedTakedown: FutureEffect[ReplicatedTakedown.Event] = - replicatedTakedownStore.replicatedTakedown - val replicatedUndeleteTweet: FutureEffect[ReplicatedUndeleteTweet.Event] = - replicatedUndeleteTweetStore.replicatedUndeleteTweet - val replicatedUpdatePossiblySensitiveTweet: FutureEffect[ - ReplicatedUpdatePossiblySensitiveTweet.Event - ] = - replicatedUpdatePossiblySensitiveTweetStore.replicatedUpdatePossiblySensitiveTweet - val retryAsyncDeleteAdditionalFields: FutureEffect[ - TweetStoreRetryEvent[AsyncDeleteAdditionalFields.Event] - ] = - asyncDeleteAdditionalFieldsStore.retryAsyncDeleteAdditionalFields - val retryAsyncDeleteTweet: FutureEffect[TweetStoreRetryEvent[AsyncDeleteTweet.Event]] = - asyncDeleteTweetStore.retryAsyncDeleteTweet - val retryAsyncInsertTweet: FutureEffect[TweetStoreRetryEvent[AsyncInsertTweet.Event]] = - asyncInsertStore.retryAsyncInsertTweet - val retryAsyncSetAdditionalFields: FutureEffect[ - TweetStoreRetryEvent[AsyncSetAdditionalFields.Event] - ] = - asyncSetAdditionalFieldsStore.retryAsyncSetAdditionalFields - val retryAsyncSetRetweetVisibility: FutureEffect[ - TweetStoreRetryEvent[AsyncSetRetweetVisibility.Event] - ] = - asyncSetRetweetVisibilityStore.retryAsyncSetRetweetVisibility - val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[AsyncTakedown.Event]] = - asyncTakedownStore.retryAsyncTakedown - val retryAsyncUndeleteTweet: FutureEffect[TweetStoreRetryEvent[AsyncUndeleteTweet.Event]] = - asyncUndeleteTweetStore.retryAsyncUndeleteTweet - val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncUpdatePossiblySensitiveTweet.Event] - ] = - asyncUpdatePossiblySensitiveTweetStore.retryAsyncUpdatePossiblySensitiveTweet - val scrubGeo: FutureEffect[ScrubGeo.Event] = scrubGeoStore.scrubGeo - val setAdditionalFields: FutureEffect[SetAdditionalFields.Event] = - setAdditionalFieldsStore.setAdditionalFields - val setRetweetVisibility: FutureEffect[SetRetweetVisibility.Event] = - setRetweetVisibilityStore.setRetweetVisibility - val takedown: FutureEffect[Takedown.Event] = takedownStore.takedown - val undeleteTweet: FutureEffect[UndeleteTweet.Event] = undeleteTweetStore.undeleteTweet - val updatePossiblySensitiveTweet: FutureEffect[UpdatePossiblySensitiveTweet.Event] = - updatePossiblySensitiveTweetStore.updatePossiblySensitiveTweet - val scrubGeoUpdateUserTimestamp: FutureEffect[ScrubGeoUpdateUserTimestamp.Event] = - scrubGeoUpdateUserTimestampStore.scrubGeoUpdateUserTimestamp - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetypieDeciderGates.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetypieDeciderGates.docx new file mode 100644 index 000000000..eacd72f87 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetypieDeciderGates.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetypieDeciderGates.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetypieDeciderGates.scala deleted file mode 100644 index a20def18a..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetypieDeciderGates.scala +++ /dev/null @@ -1,91 +0,0 @@ -package com.twitter.tweetypie -package config - -import com.twitter.decider.Decider -import com.twitter.tweetypie.decider.DeciderGates - -object TweetypieDeciderGates { - def apply( - _decider: Decider, - _overrides: Map[String, Boolean] = Map.empty - ): TweetypieDeciderGates = - new TweetypieDeciderGates { - override def decider: Decider = _decider - override def overrides: Map[String, Boolean] = _overrides - override def prefix: String = "tweetypie" - } -} - -trait TweetypieDeciderGates extends DeciderGates { - val checkSpamOnRetweet: Gate[Unit] = linear("check_spam_on_retweet") - val checkSpamOnTweet: Gate[Unit] = linear("check_spam_on_tweet") - val delayEraseUserTweets: Gate[Unit] = linear("delay_erase_user_tweets") - val denyNonTweetPermalinks: Gate[Unit] = linear("deny_non_tweet_permalinks") - val enableCommunityTweetCreates: Gate[Unit] = linear("enable_community_tweet_creates") - val useConversationControlFeatureSwitchResults: Gate[Unit] = linear( - "conversation_control_use_feature_switch_results") - val enableExclusiveTweetControlValidation: Gate[Unit] = linear( - "enable_exclusive_tweet_control_validation") - val enableTrustedFriendsControlValidation: Gate[Unit] = linear( - "enable_trusted_friends_control_validation" - ) - val enableStaleTweetValidation: Gate[Unit] = linear( - "enable_stale_tweet_validation" - ) - val enforceRateLimitedClients: Gate[Unit] = linear("enforce_rate_limited_clients") - val failClosedInVF: Gate[Unit] = linear("fail_closed_in_vf") - val forkDarkTraffic: Gate[Unit] = linear("fork_dark_traffic") - val hydrateConversationMuted: Gate[Unit] = linear("hydrate_conversation_muted") - val hydrateCounts: Gate[Unit] = linear("hydrate_counts") - val hydratePreviousCounts: Gate[Unit] = linear("hydrate_previous_counts") - val hydrateDeviceSources: Gate[Unit] = linear("hydrate_device_sources") - val hydrateEscherbirdAnnotations: Gate[Unit] = linear("hydrate_escherbird_annotations") - val hydrateGnipProfileGeoEnrichment: Gate[Unit] = linear("hydrate_gnip_profile_geo_enrichment") - val hydrateHasMedia: Gate[Unit] = linear("hydrate_has_media") - val hydrateMedia: Gate[Unit] = linear("hydrate_media") - val hydrateMediaRefs: Gate[Unit] = linear("hydrate_media_refs") - val hydrateMediaTags: Gate[Unit] = linear("hydrate_media_tags") - val hydratePastedMedia: Gate[Unit] = linear("hydrate_pasted_media") - val hydratePerspectives: Gate[Unit] = linear("hydrate_perspectives") - val hydratePerspectivesEditsForTimelines: Gate[Unit] = linear( - "hydrate_perspectives_edits_for_timelines") - val hydratePerspectivesEditsForTweetDetail: Gate[Unit] = linear( - "hydrate_perspectives_edits_for_tweet_details") - val hydratePerspectivesEditsForOtherSafetyLevels: Gate[Unit] = - linear("hydrate_perspectives_edits_for_other_levels") - val hydratePlaces: Gate[Unit] = linear("hydrate_places") - val hydrateScrubEngagements: Gate[Unit] = linear("hydrate_scrub_engagements") - val jiminyDarkRequests: Gate[Unit] = linear("jiminy_dark_requests") - val logCacheExceptions: Gate[Unit] = linear("log_cache_exceptions") - val logReads: Gate[Unit] = linear("log_reads") - val logTweetCacheWrites: Gate[TweetId] = byId("log_tweet_cache_writes") - val logWrites: Gate[Unit] = linear("log_writes") - val logYoungTweetCacheWrites: Gate[TweetId] = byId("log_young_tweet_cache_writes") - val maxRequestWidthEnabled: Gate[Unit] = linear("max_request_width_enabled") - val mediaRefsHydratorIncludePastedMedia: Gate[Unit] = linear( - "media_refs_hydrator_include_pasted_media") - val rateLimitByLimiterService: Gate[Unit] = linear("rate_limit_by_limiter_service") - val rateLimitTweetCreationFailure: Gate[Unit] = linear("rate_limit_tweet_creation_failure") - val replicateReadsToATLA: Gate[Unit] = linear("replicate_reads_to_atla") - val replicateReadsToPDXA: Gate[Unit] = linear("replicate_reads_to_pdxa") - val disableInviteViaMention: Gate[Unit] = linear("disable_invite_via_mention") - val shedReadTrafficVoluntarily: Gate[Unit] = linear("shed_read_traffic_voluntarily") - val preferForwardedServiceIdentifierForClientId: Gate[Unit] = - linear("prefer_forwarded_service_identifier_for_client_id") - val enableRemoveUnmentionedImplicitMentions: Gate[Unit] = linear( - "enable_remove_unmentioned_implicit_mentions") - val validateCardRefAttachmentAndroid: Gate[Unit] = linear("validate_card_ref_attachment_android") - val validateCardRefAttachmentNonAndroid: Gate[Unit] = linear( - "validate_card_ref_attachment_non_android") - val tweetVisibilityLibraryEnableParityTest: Gate[Unit] = linear( - "tweet_visibility_library_enable_parity_test") - val enableVfFeatureHydrationInQuotedTweetVLShim: Gate[Unit] = linear( - "enable_vf_feature_hydration_in_quoted_tweet_visibility_library_shim") - val disablePromotedTweetEdit: Gate[Unit] = linear("disable_promoted_tweet_edit") - val shouldMaterializeContainers: Gate[Unit] = linear("should_materialize_containers") - val checkTwitterBlueSubscriptionForEdit: Gate[Unit] = linear( - "check_twitter_blue_subscription_for_edit") - val hydrateBookmarksCount: Gate[Long] = byId("hydrate_bookmarks_count") - val hydrateBookmarksPerspective: Gate[Long] = byId("hydrate_bookmarks_perspective") - val setEditTimeWindowToSixtyMinutes: Gate[Unit] = linear("set_edit_time_window_to_sixty_minutes") -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/WritePathHydration.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/WritePathHydration.docx new file mode 100644 index 000000000..d9fac5799 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/WritePathHydration.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/WritePathHydration.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/WritePathHydration.scala deleted file mode 100644 index 621bb8148..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/WritePathHydration.scala +++ /dev/null @@ -1,223 +0,0 @@ -package com.twitter.tweetypie -package config - -import com.twitter.servo.util.FutureArrow -import com.twitter.stitch.Stitch -import com.twitter.tweetypie.core._ -import com.twitter.tweetypie.handler.TweetBuilder -import com.twitter.tweetypie.handler.WritePathQueryOptions -import com.twitter.tweetypie.hydrator.EscherbirdAnnotationHydrator -import com.twitter.tweetypie.hydrator.LanguageHydrator -import com.twitter.tweetypie.hydrator.PlaceHydrator -import com.twitter.tweetypie.hydrator.ProfileGeoHydrator -import com.twitter.tweetypie.hydrator.TweetDataValueHydrator -import com.twitter.tweetypie.repository._ -import com.twitter.tweetypie.store.InsertTweet -import com.twitter.tweetypie.store.UndeleteTweet -import com.twitter.tweetypie.thriftscala._ -import com.twitter.tweetypie.util.EditControlUtil - -object WritePathHydration { - type HydrateQuotedTweet = - FutureArrow[(User, QuotedTweet, WritePathHydrationOptions), Option[QuoteTweetMetadata]] - - case class QuoteTweetMetadata( - quotedTweet: Tweet, - quotedUser: User, - quoterHasAlreadyQuotedTweet: Boolean) - - private val log = Logger(getClass) - - val UserFieldsForInsert: Set[UserField] = - TweetBuilder.userFields - - val AllowedMissingFieldsOnWrite: Set[FieldByPath] = - Set( - EscherbirdAnnotationHydrator.hydratedField, - LanguageHydrator.hydratedField, - PlaceHydrator.HydratedField, - ProfileGeoHydrator.hydratedField - ) - - /** - * Builds a FutureArrow that performs the necessary hydration in the write-path for a - * a InsertTweet.Event. There are two separate hydration steps, pre-cache and post-cache. - * The pre-cache hydration step performs the hydration which is safe to cache, while the - * post-cache hydration step performs the hydration whose results we don't want to cache - * on the tweet. - * - * TweetInsertEvent contains two tweet fields, `tweet` and `internalTweet`. `tweet` is - * the input value used for hydration, and in the updated InsertTweet.Event returned by the - * FutureArrow, `tweet` contains the post-cache hydrated tweet while `internalTweet` contains - * the pre-cache hydrated tweet. - */ - def hydrateInsertTweetEvent( - hydrateTweet: FutureArrow[(TweetData, TweetQuery.Options), TweetData], - hydrateQuotedTweet: HydrateQuotedTweet - ): FutureArrow[InsertTweet.Event, InsertTweet.Event] = - FutureArrow { event => - val cause = TweetQuery.Cause.Insert(event.tweet.id) - val hydrationOpts = event.hydrateOptions - val isEditControlEdit = event.tweet.editControl.exists(EditControlUtil.isEditControlEdit) - val queryOpts: TweetQuery.Options = - WritePathQueryOptions.insert(cause, event.user, hydrationOpts, isEditControlEdit) - - val initTweetData = - TweetData( - tweet = event.tweet, - sourceTweetResult = event.sourceTweet.map(TweetResult(_)) - ) - - for { - tweetData <- hydrateTweet((initTweetData, queryOpts)) - hydratedTweet = tweetData.tweet - internalTweet = - tweetData.cacheableTweetResult - .map(_.value.toCachedTweet) - .getOrElse( - throw new IllegalStateException(s"expected cacheableTweetResult, e=${event}")) - - optQt = getQuotedTweet(hydratedTweet) - .orElse(event.sourceTweet.flatMap(getQuotedTweet)) - - hydratedQT <- optQt match { - case None => Future.value(None) - case Some(qt) => hydrateQuotedTweet((event.user, qt, hydrationOpts)) - } - } yield { - event.copy( - tweet = hydratedTweet, - _internalTweet = Some(internalTweet), - quotedTweet = hydratedQT.map { case QuoteTweetMetadata(t, _, _) => t }, - quotedUser = hydratedQT.map { case QuoteTweetMetadata(_, u, _) => u }, - quoterHasAlreadyQuotedTweet = hydratedQT.exists { case QuoteTweetMetadata(_, _, b) => b } - ) - } - } - - /** - * Builds a FutureArrow for retrieving a quoted tweet metadata - * QuotedTweet struct. If either the quoted tweet or the quoted user - * isn't visible to the tweeting user, the FutureArrow will return None. - */ - def hydrateQuotedTweet( - tweetRepo: TweetRepository.Optional, - userRepo: UserRepository.Optional, - quoterHasAlreadyQuotedRepo: QuoterHasAlreadyQuotedRepository.Type - ): HydrateQuotedTweet = { - FutureArrow { - case (tweetingUser, qt, hydrateOptions) => - val tweetQueryOpts = WritePathQueryOptions.quotedTweet(tweetingUser, hydrateOptions) - val userQueryOpts = - UserQueryOptions( - UserFieldsForInsert, - UserVisibility.Visible, - forUserId = Some(tweetingUser.id) - ) - - Stitch.run( - Stitch - .join( - tweetRepo(qt.tweetId, tweetQueryOpts), - userRepo(UserKey.byId(qt.userId), userQueryOpts), - // We're failing open here on tflock exceptions since this should not - // affect the ability to quote tweet if tflock goes down. (although if - // this call doesn't succeed, quote counts may be inaccurate for a brief - // period of time) - quoterHasAlreadyQuotedRepo(qt.tweetId, tweetingUser.id).liftToTry - ) - .map { - case (Some(tweet), Some(user), isAlreadyQuoted) => - Some(QuoteTweetMetadata(tweet, user, isAlreadyQuoted.getOrElse(false))) - case _ => None - } - ) - } - } - - /** - * Builds a FutureArrow that performs any additional hydration on an UndeleteTweet.Event before - * being passed to a TweetStore. - */ - def hydrateUndeleteTweetEvent( - hydrateTweet: FutureArrow[(TweetData, TweetQuery.Options), TweetData], - hydrateQuotedTweet: HydrateQuotedTweet - ): FutureArrow[UndeleteTweet.Event, UndeleteTweet.Event] = - FutureArrow { event => - val cause = TweetQuery.Cause.Undelete(event.tweet.id) - val hydrationOpts = event.hydrateOptions - val isEditControlEdit = event.tweet.editControl.exists(EditControlUtil.isEditControlEdit) - val queryOpts = WritePathQueryOptions.insert(cause, event.user, hydrationOpts, isEditControlEdit) - - // when undeleting a retweet, don't set sourceTweetResult to enable SourceTweetHydrator to - // hydrate it - val initTweetData = TweetData(tweet = event.tweet) - - for { - tweetData <- hydrateTweet((initTweetData, queryOpts)) - hydratedTweet = tweetData.tweet - internalTweet = - tweetData.cacheableTweetResult - .map(_.value.toCachedTweet) - .getOrElse( - throw new IllegalStateException(s"expected cacheableTweetResult, e=${event}")) - - optQt = getQuotedTweet(hydratedTweet) - .orElse(tweetData.sourceTweetResult.map(_.value.tweet).flatMap(getQuotedTweet)) - - hydratedQt <- optQt match { - case None => Future.value(None) - case Some(qt) => hydrateQuotedTweet((event.user, qt, hydrationOpts)) - } - } yield { - event.copy( - tweet = hydratedTweet, - _internalTweet = Some(internalTweet), - sourceTweet = tweetData.sourceTweetResult.map(_.value.tweet), - quotedTweet = hydratedQt.map { case QuoteTweetMetadata(t, _, _) => t }, - quotedUser = hydratedQt.map { case QuoteTweetMetadata(_, u, _) => u }, - quoterHasAlreadyQuotedTweet = hydratedQt.exists { case QuoteTweetMetadata(_, _, b) => b } - ) - } - } - - /** - * Converts a TweetDataValueHydrator into a FutureArrow that hydrates a tweet for the write-path. - */ - def hydrateTweet( - hydrator: TweetDataValueHydrator, - stats: StatsReceiver, - allowedMissingFields: Set[FieldByPath] = AllowedMissingFieldsOnWrite - ): FutureArrow[(TweetData, TweetQuery.Options), TweetData] = { - val hydrationStats = stats.scope("hydration") - val missingFieldsStats = hydrationStats.scope("missing_fields") - - FutureArrow[(TweetData, TweetQuery.Options), TweetData] { - case (td, opts) => - Stitch - .run(hydrator(td, opts)) - .rescue { - case ex => - log.warn("Hydration failed with exception", ex) - Future.exception( - TweetHydrationError("Hydration failed with exception: " + ex, Some(ex)) - ) - } - .flatMap { r => - // Record missing fields even if the request succeeds) - for (missingField <- r.state.failedFields) - missingFieldsStats.counter(missingField.fieldIdPath.mkString(".")).incr() - - if ((r.state.failedFields -- allowedMissingFields).nonEmpty) { - Future.exception( - TweetHydrationError( - "Failed to hydrate. Missing Fields: " + r.state.failedFields.mkString(",") - ) - ) - } else { - Future.value(r.value) - } - } - } - }.trackOutcome(stats, (_: Any) => "hydration") -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/package.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/package.docx new file mode 100644 index 000000000..f8e602bdb Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/package.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/package.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/package.scala deleted file mode 100644 index a452abbd8..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/package.scala +++ /dev/null @@ -1,11 +0,0 @@ -package com.twitter.tweetypie - -import com.twitter.context.thriftscala.Viewer - -package object config { - // Bring Tweetypie permitted TwitterContext into scope - private[config] val TwitterContext = - com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) - - def getAppId: Option[AppId] = TwitterContext().getOrElse(Viewer()).clientApplicationId -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/BUILD deleted file mode 100644 index a1e3cab2d..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/BUILD +++ /dev/null @@ -1,19 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - platform = "java8", - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/com/twitter/bijection:core", - "featureswitches/featureswitches-core/src/main/scala", - "scrooge/scrooge-serializer/src/main/scala", - "tweetypie/servo/repo", - "tweetypie/servo/util", - "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", - "stitch/stitch-core", - "tweetypie/server/src/main/scala/com/twitter/tweetypie", - "tweetypie/server/src/main/thrift:compiled-scala", - ], -) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/BUILD.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/BUILD.docx new file mode 100644 index 000000000..0da1b1db1 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/BUILD.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/CardReferenceUriExtractor.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/CardReferenceUriExtractor.docx new file mode 100644 index 000000000..905baaa82 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/CardReferenceUriExtractor.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/CardReferenceUriExtractor.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/CardReferenceUriExtractor.scala deleted file mode 100644 index ca185eb4a..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/CardReferenceUriExtractor.scala +++ /dev/null @@ -1,32 +0,0 @@ -package com.twitter.tweetypie -package core - -import com.twitter.tweetypie.thriftscala.CardReference -import java.net.URI - -sealed trait CardUri -object Tombstone extends CardUri -case class NonTombstone(uri: String) extends CardUri - -object CardReferenceUriExtractor { - - private def parseAsUri(cardRef: CardReference) = Try(new URI(cardRef.cardUri)).toOption - private def isTombstone(uri: URI) = uri.getScheme == "tombstone" - - /** - * Parses a CardReference to return Option[CardUri] to differentiate among: - * - Some(NonTombstone): hydrate card2 with provided uri - * - Some(Tombstone): don't hydrate card2 - * - None: fallback and attempt to use url entities uris - */ - def unapply(cardRef: CardReference): Option[CardUri] = - parseAsUri(cardRef) match { - case Some(uri) if !isTombstone(uri) => Some(NonTombstone(uri.toString)) - case Some(uri) => Some(Tombstone) - - // If a cardReference is set, but does not parse as a URI, it's likely a https? URL with - // incorrectly encoded query params. Since these occur frequently in the wild, we'll - // attempt a card2 hydration with it - case None => Some(NonTombstone(cardRef.cardUri)) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/EditState.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/EditState.docx new file mode 100644 index 000000000..4df655826 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/EditState.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/EditState.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/EditState.scala deleted file mode 100644 index 8766675cb..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/EditState.scala +++ /dev/null @@ -1,48 +0,0 @@ -package com.twitter.tweetypie.core - -import com.twitter.servo.data.Mutation - -/** - * An EditState is a function that changes a value and may generate - * some state about what was modified. For instance, it may record - * whether an item was changed, or whether there was an error. - * EditStates are useful because they are first-class values that can - * be composed. In particular, it is useful to concurrently access - * external data to build edits and then apply them. - * - * @tparam A The type of the value that is being edited (for instance, - * having fields hydrated with data from another service) - */ -final case class EditState[A](run: A => ValueState[A]) { - - /** - * Composes two EditStates in sequence - */ - def andThen(other: EditState[A]): EditState[A] = - EditState[A] { a0: A => - val ValueState(a1, s1) = run(a0) - val ValueState(a2, s2) = other.run(a1) - ValueState(a2, s1 ++ s2) - } -} - -object EditState { - - /** - * Creates a "passthrough" EditState: - * Leaves A unchanged and produces empty state S - */ - def unit[A]: EditState[A] = - EditState[A](ValueState.unit[A]) - - /** - * Creates an `EditState[A]` using a `Mutation[A]`. - */ - def fromMutation[A](mut: Mutation[A]): EditState[A] = - EditState[A] { a => - mut(a) match { - case None => ValueState.unmodified(a) - case Some(a2) => ValueState.modified(a2) - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Exceptions.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Exceptions.docx new file mode 100644 index 000000000..7e1620ff9 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Exceptions.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Exceptions.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Exceptions.scala deleted file mode 100644 index b4a9fe157..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Exceptions.scala +++ /dev/null @@ -1,14 +0,0 @@ -package com.twitter.tweetypie -package core - -import scala.util.control.NoStackTrace - -case class InternalServerError(message: String) extends Exception(message) with NoStackTrace - -case class OverCapacity(message: String) extends Exception(message) with NoStackTrace - -case class RateLimited(message: String) extends Exception(message) with NoStackTrace - -case class TweetHydrationError(message: String, cause: Option[Throwable] = None) - extends Exception(message, cause.getOrElse(null)) - with NoStackTrace diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/FilteredState.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/FilteredState.docx new file mode 100644 index 000000000..4f118dea5 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/FilteredState.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/FilteredState.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/FilteredState.scala deleted file mode 100644 index 0685446f3..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/FilteredState.scala +++ /dev/null @@ -1,96 +0,0 @@ -package com.twitter.tweetypie.core - -import com.twitter.servo.util.ExceptionCategorizer -import com.twitter.spam.rtf.thriftscala.FilteredReason -import scala.util.control.NoStackTrace - -sealed trait FilteredState - -object FilteredState { - - /** - * The tweet exists and the filtered state was due to business rules - * (e.g. safety label filtering, or protected accounts). Note that - * Suppress and Unavailable can both have a FilteredReason. - */ - sealed trait HasFilteredReason extends FilteredState { - def filteredReason: FilteredReason - } - - /** - * The only FilteredState that is not an exception. It indicates that - * the tweet should be returned along with a suppress reason. This is - * sometimes known as "soft filtering". Only used by VF. - */ - case class Suppress(filteredReason: FilteredReason) extends FilteredState with HasFilteredReason - - /** - * FilteredStates that cause the tweet to be unavailable are modeled - * as an [[Exception]]. (Suppressed filtered states cannot be used as - * exceptions because they should not prevent the tweet from being - * returned.) This is sometimes known as "hard filtering". - */ - sealed abstract class Unavailable extends Exception with FilteredState with NoStackTrace - - object Unavailable { - // Used for Tweets that should be dropped because of VF rules - case class Drop(filteredReason: FilteredReason) extends Unavailable with HasFilteredReason - - // Used for Tweets that should be dropped and replaced with their preview because of VF rules - case class Preview(filteredReason: FilteredReason) extends Unavailable with HasFilteredReason - - // Used for Tweets that should be dropped because of Tweetypie business logic - case object DropUnspecified extends Unavailable with HasFilteredReason { - val filteredReason: FilteredReason = FilteredReason.UnspecifiedReason(true) - } - - // Represents a Deleted tweet (NotFound is represented with stitch.NotFound) - case object TweetDeleted extends Unavailable - - // Represents a Deleted tweet that violated Twitter Rules (see go/bounced-tweet) - case object BounceDeleted extends Unavailable - - // Represents both Deleted and NotFound source tweets - case class SourceTweetNotFound(deleted: Boolean) extends Unavailable - - // Used by the [[ReportedTweetFilter]] to signal that a Tweet has a "reported" perspective from TLS - case object Reported extends Unavailable with HasFilteredReason { - val filteredReason: FilteredReason = FilteredReason.ReportedTweet(true) - } - - // The following objects are used by the [[UserRepository]] to signal problems with the Tweet author - object Author { - case object NotFound extends Unavailable - - case object Deactivated extends Unavailable with HasFilteredReason { - val filteredReason: FilteredReason = FilteredReason.AuthorIsDeactivated(true) - } - - case object Offboarded extends Unavailable with HasFilteredReason { - val filteredReason: FilteredReason = FilteredReason.AuthorAccountIsInactive(true) - } - - case object Suspended extends Unavailable with HasFilteredReason { - val filteredReason: FilteredReason = FilteredReason.AuthorIsSuspended(true) - } - - case object Protected extends Unavailable with HasFilteredReason { - val filteredReason: FilteredReason = FilteredReason.AuthorIsProtected(true) - } - - case object Unsafe extends Unavailable with HasFilteredReason { - val filteredReason: FilteredReason = FilteredReason.AuthorIsUnsafe(true) - } - } - } - - /** - * Creates a new ExceptionCategorizer which returns an empty category for any - * Unavailable value, and forwards to `underlying` for anything else. - */ - def ignoringCategorizer(underlying: ExceptionCategorizer): ExceptionCategorizer = - ExceptionCategorizer { - case _: Unavailable => Set.empty - case t => underlying(t) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/GeoSearchRequestId.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/GeoSearchRequestId.docx new file mode 100644 index 000000000..ccc0715d3 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/GeoSearchRequestId.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/GeoSearchRequestId.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/GeoSearchRequestId.scala deleted file mode 100644 index fae6377dd..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/GeoSearchRequestId.scala +++ /dev/null @@ -1,3 +0,0 @@ -package com.twitter.tweetypie.core - -case class GeoSearchRequestId(requestID: String) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/HydrationState.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/HydrationState.docx new file mode 100644 index 000000000..8c735c597 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/HydrationState.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/HydrationState.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/HydrationState.scala deleted file mode 100644 index 8c6a05a84..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/HydrationState.scala +++ /dev/null @@ -1,122 +0,0 @@ -package com.twitter.tweetypie.core - -import com.twitter.tweetypie.thriftscala.FieldByPath -import com.twitter.tweetypie.thriftscala.HydrationType - -/** - * HydrationState is used to record whether a particular piece of data was modified as a result - * of hydration, and/or if there was a failure to hydrate the data. - */ -sealed trait HydrationState { - def isEmpty: Boolean - def modified: Boolean - def completedHydrations: Set[HydrationType] = Set.empty - def failedFields: Set[FieldByPath] = Set.empty - def cacheErrorEncountered: Boolean = false - def ++(that: HydrationState): HydrationState -} - -object HydrationState { - - /** - * Base `HydrationState`. It acts as an identity value when combined with any other - * `HydrationState`. - */ - case object Empty extends HydrationState { - def isEmpty = true - def modified = false - def ++(that: HydrationState): HydrationState = that - } - - /** - * A `HydrationState` with metadata indicating a non-fatal hydration operation. - */ - case class Success( - override val modified: Boolean = false, - override val completedHydrations: Set[HydrationType] = Set.empty, - override val failedFields: Set[FieldByPath] = Set.empty, - override val cacheErrorEncountered: Boolean = false) - extends HydrationState { - - def isEmpty: Boolean = !modified && failedFields.isEmpty && !cacheErrorEncountered - - def ++(that: HydrationState): HydrationState = - that match { - case Empty => this - case that: Success => - HydrationState( - modified || that.modified, - completedHydrations ++ that.completedHydrations, - failedFields ++ that.failedFields, - cacheErrorEncountered || that.cacheErrorEncountered - ) - } - - /** - * An implementation of `copy` that avoids unnecessary allocations, by - * using the constant `HydrationState.unmodified` and `HydrationState.modified` - * values when possible. - */ - def copy( - modified: Boolean = this.modified, - completedHydrations: Set[HydrationType] = this.completedHydrations, - failedFields: Set[FieldByPath] = this.failedFields, - cacheErrorEncountered: Boolean = this.cacheErrorEncountered - ): HydrationState = - HydrationState(modified, completedHydrations, failedFields, cacheErrorEncountered) - } - - val empty: HydrationState = Empty - val modified: HydrationState = Success(true) - - def modified(completedHydration: HydrationType): HydrationState = - modified(Set(completedHydration)) - - def modified(completedHydrations: Set[HydrationType]): HydrationState = - Success(modified = true, completedHydrations = completedHydrations) - - def partial(failedField: FieldByPath): HydrationState = - partial(Set(failedField)) - - def partial(failedFields: Set[FieldByPath]): HydrationState = - Success(modified = false, failedFields = failedFields) - - def apply( - modified: Boolean, - completedHydrations: Set[HydrationType] = Set.empty, - failedFields: Set[FieldByPath] = Set.empty, - cacheErrorEncountered: Boolean = false - ): HydrationState = - if (completedHydrations.nonEmpty || failedFields.nonEmpty || cacheErrorEncountered) { - Success(modified, completedHydrations, failedFields, cacheErrorEncountered) - } else if (modified) { - HydrationState.modified - } else { - HydrationState.empty - } - - /** - * Creates a new HydrationState with modified set to true if `next` and `prev` are different, - * or false if they are the same. - */ - def delta[A](prev: A, next: A): HydrationState = - if (next != prev) modified else empty - - /** - * Join a list of HydrationStates into a single HydrationState. - * - * Note: this could just be a reduce over the HydrationStates but that would allocate - * _N_ HydrationStates. This approach also allows for shortcircuiting over the boolean - * fields. - */ - def join(states: HydrationState*): HydrationState = { - val statesSet = states.toSet - - HydrationState( - modified = states.exists(_.modified), - completedHydrations = statesSet.flatMap(_.completedHydrations), - failedFields = statesSet.flatMap(_.failedFields), - cacheErrorEncountered = states.exists(_.cacheErrorEncountered) - ) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/QuotedTweetResult.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/QuotedTweetResult.docx new file mode 100644 index 000000000..0d1d58e64 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/QuotedTweetResult.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/QuotedTweetResult.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/QuotedTweetResult.scala deleted file mode 100644 index 28f38a807..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/QuotedTweetResult.scala +++ /dev/null @@ -1,46 +0,0 @@ -package com.twitter.tweetypie.core - -import com.twitter.spam.rtf.thriftscala.FilteredReason -import com.twitter.util.Return -import com.twitter.util.Throw -import com.twitter.util.Try - -/** - * The data about a quoted tweet that needs to be carried forward to - * Tweetypie clients. - */ -sealed trait QuotedTweetResult { - def filteredReason: Option[FilteredReason] - def toOption: Option[TweetResult] - def map(f: TweetResult => TweetResult): QuotedTweetResult -} - -object QuotedTweetResult { - case object NotFound extends QuotedTweetResult { - def filteredReason: None.type = None - def toOption: None.type = None - def map(f: TweetResult => TweetResult): NotFound.type = this - } - case class Filtered(state: FilteredState.Unavailable) extends QuotedTweetResult { - def filteredReason: Option[FilteredReason] = - state match { - case st: FilteredState.HasFilteredReason => Some(st.filteredReason) - case _ => None - } - def toOption: None.type = None - def map(f: TweetResult => TweetResult): Filtered = this - } - case class Found(result: TweetResult) extends QuotedTweetResult { - def filteredReason: Option[FilteredReason] = result.value.suppress.map(_.filteredReason) - def toOption: Option[TweetResult] = Some(result) - def map(f: TweetResult => TweetResult): QuotedTweetResult = Found(f(result)) - } - - def fromTry(tryResult: Try[TweetResult]): Try[QuotedTweetResult] = - tryResult match { - case Return(result) => Return(Found(result)) - case Throw(state: FilteredState.Unavailable) => Return(Filtered(state)) - case Throw(com.twitter.stitch.NotFound) => Return(NotFound) - case Throw(e) => Throw(e) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Serializer.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Serializer.docx new file mode 100644 index 000000000..bbb1bf8e1 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Serializer.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Serializer.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Serializer.scala deleted file mode 100644 index e367a8481..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Serializer.scala +++ /dev/null @@ -1,31 +0,0 @@ -package com.twitter.tweetypie.core - -import com.twitter.servo.cache -import com.twitter.servo.cache.CachedSerializer -import com.twitter.tweetypie.thriftscala -import com.twitter.tweetypie.thriftscala.CachedTweet -import com.twitter.tweetypie.thriftscala.Tweet -import org.apache.thrift.protocol.TCompactProtocol - -/** - * A container object for serializers. - * Creates a serializer for every object type cached by the tweetypie service - */ -object Serializer { - lazy val CompactProtocolFactory: TCompactProtocol.Factory = new TCompactProtocol.Factory - - def toCached[T](underlying: cache.Serializer[T]): cache.CachedSerializer[T] = - new cache.CachedSerializer(underlying, CompactProtocolFactory) - - object Tweet { - lazy val Compact: cache.ThriftSerializer[thriftscala.Tweet] = - new cache.ThriftSerializer(thriftscala.Tweet, CompactProtocolFactory) - lazy val CachedCompact: CachedSerializer[Tweet] = toCached(Compact) - } - - object CachedTweet { - lazy val Compact: cache.ThriftSerializer[thriftscala.CachedTweet] = - new cache.ThriftSerializer(thriftscala.CachedTweet, CompactProtocolFactory) - lazy val CachedCompact: CachedSerializer[CachedTweet] = toCached(Compact) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/StoredTweetResult.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/StoredTweetResult.docx new file mode 100644 index 000000000..86b5ad610 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/StoredTweetResult.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/StoredTweetResult.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/StoredTweetResult.scala deleted file mode 100644 index ab81552e9..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/StoredTweetResult.scala +++ /dev/null @@ -1,42 +0,0 @@ -package com.twitter.tweetypie.core - -sealed trait StoredTweetResult { - def canHydrate: Boolean -} - -object StoredTweetResult { - sealed trait Error - object Error { - case object Corrupt extends Error - case object ScrubbedFieldsPresent extends Error - case object FieldsMissingOrInvalid extends Error - case object ShouldBeHardDeleted extends Error - } - - case class Present(errors: Seq[Error], canHydrate: Boolean) extends StoredTweetResult - - case class HardDeleted(softDeletedAtMsec: Long, hardDeletedAtMsec: Long) - extends StoredTweetResult { - override def canHydrate: Boolean = false - } - - case class SoftDeleted(softDeletedAtMsec: Long, errors: Seq[Error], canHydrate: Boolean) - extends StoredTweetResult - - case class BounceDeleted(deletedAtMsec: Long, errors: Seq[Error], canHydrate: Boolean) - extends StoredTweetResult - - case class Undeleted(undeletedAtMsec: Long, errors: Seq[Error], canHydrate: Boolean) - extends StoredTweetResult - - case class ForceAdded(addedAtMsec: Long, errors: Seq[Error], canHydrate: Boolean) - extends StoredTweetResult - - case class Failed(errors: Seq[Error]) extends StoredTweetResult { - override def canHydrate: Boolean = false - } - - object NotFound extends StoredTweetResult { - override def canHydrate: Boolean = false - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetCreateFailure.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetCreateFailure.docx new file mode 100644 index 000000000..c2698a279 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetCreateFailure.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetCreateFailure.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetCreateFailure.scala deleted file mode 100644 index bc4402fa2..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetCreateFailure.scala +++ /dev/null @@ -1,39 +0,0 @@ -package com.twitter.tweetypie.core - -import com.twitter.bouncer.thriftscala.Bounce -import com.twitter.tweetypie.TweetId -import com.twitter.incentives.jiminy.thriftscala.TweetNudge -import com.twitter.tweetypie.thriftscala.PostTweetResult -import com.twitter.tweetypie.thriftscala.TweetCreateState - -sealed abstract class TweetCreateFailure extends Exception { - def toPostTweetResult: PostTweetResult -} - -object TweetCreateFailure { - case class Bounced(bounce: Bounce) extends TweetCreateFailure { - override def toPostTweetResult: PostTweetResult = - PostTweetResult(state = TweetCreateState.Bounce, bounce = Some(bounce)) - } - - case class AlreadyRetweeted(retweetId: TweetId) extends TweetCreateFailure { - override def toPostTweetResult: PostTweetResult = - PostTweetResult(state = TweetCreateState.AlreadyRetweeted) - } - - case class Nudged(nudge: TweetNudge) extends TweetCreateFailure { - override def toPostTweetResult: PostTweetResult = - PostTweetResult(state = TweetCreateState.Nudge, nudge = Some(nudge)) - } - - case class State(state: TweetCreateState, reason: Option[String] = None) - extends TweetCreateFailure { - require(state != TweetCreateState.Bounce) - require(state != TweetCreateState.Ok) - require(state != TweetCreateState.Nudge) - - override def toPostTweetResult: PostTweetResult = - PostTweetResult(state = state, failureReason = reason) - override def toString: String = s"TweetCreateFailure$$State($state, $reason)" - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetData.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetData.docx new file mode 100644 index 000000000..14c7e2e48 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetData.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetData.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetData.scala deleted file mode 100644 index 8e72f1e89..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetData.scala +++ /dev/null @@ -1,86 +0,0 @@ -package com.twitter.tweetypie -package core - -import com.twitter.featureswitches.v2.FeatureSwitchResults -import com.twitter.tweetypie.thriftscala._ - -object TweetData { - object Lenses { - val tweet: Lens[TweetData, Tweet] = Lens[TweetData, Tweet](_.tweet, _.copy(_)) - - val suppress: Lens[TweetData, Option[FilteredState.Suppress]] = - Lens[TweetData, Option[FilteredState.Suppress]]( - _.suppress, - (td, suppress) => td.copy(suppress = suppress) - ) - - val sourceTweetResult: Lens[TweetData, Option[TweetResult]] = - Lens[TweetData, Option[TweetResult]]( - _.sourceTweetResult, - (td, sourceTweetResult) => td.copy(sourceTweetResult = sourceTweetResult) - ) - - val quotedTweetResult: Lens[TweetData, Option[QuotedTweetResult]] = - Lens[TweetData, Option[QuotedTweetResult]]( - _.quotedTweetResult, - (td, quotedTweetResult) => td.copy(quotedTweetResult = quotedTweetResult) - ) - - val cacheableTweetResult: Lens[TweetData, Option[TweetResult]] = - Lens[TweetData, Option[TweetResult]]( - _.cacheableTweetResult, - (td, cacheableTweetResult) => td.copy(cacheableTweetResult = cacheableTweetResult) - ) - - val tweetCounts: Lens[TweetData, Option[StatusCounts]] = - Lens[TweetData, Option[StatusCounts]]( - _.tweet.counts, - (td, tweetCounts) => td.copy(tweet = td.tweet.copy(counts = tweetCounts)) - ) - } - - def fromCachedTweet(cachedTweet: CachedTweet, cachedAt: Time): TweetData = - TweetData( - tweet = cachedTweet.tweet, - completedHydrations = cachedTweet.completedHydrations.toSet, - cachedAt = Some(cachedAt), - isBounceDeleted = cachedTweet.isBounceDeleted.contains(true) - ) -} - -/** - * Encapsulates a tweet and some hydration metadata in the hydration pipeline. - * - * @param cachedAt if the tweet was read from cache, `cachedAt` contains the time at which - * the tweet was written to cache. - */ -case class TweetData( - tweet: Tweet, - suppress: Option[FilteredState.Suppress] = None, - completedHydrations: Set[HydrationType] = Set.empty, - cachedAt: Option[Time] = None, - sourceTweetResult: Option[TweetResult] = None, - quotedTweetResult: Option[QuotedTweetResult] = None, - cacheableTweetResult: Option[TweetResult] = None, - storedTweetResult: Option[StoredTweetResult] = None, - featureSwitchResults: Option[FeatureSwitchResults] = None, - // The isBounceDeleted flag is only used when reading from an underlying - // tweet repo and caching records for not-found tweets. It only exists - // as a flag on TweetData to marshal bounce-deleted through the layered - // transforming caches injected into CachingTweetRepository, ultimately - // storing this flag in thrift on CachedTweet. - // - // During tweet hydration, TweetData.isBounceDeleted is unused and - // should always be false. - isBounceDeleted: Boolean = false) { - - def addHydrated(fieldIds: Set[HydrationType]): TweetData = - copy(completedHydrations = completedHydrations ++ fieldIds) - - def toCachedTweet: CachedTweet = - CachedTweet( - tweet = tweet, - completedHydrations = completedHydrations, - isBounceDeleted = if (isBounceDeleted) Some(true) else None - ) -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetResult.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetResult.docx new file mode 100644 index 000000000..75cf25b14 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetResult.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetResult.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetResult.scala deleted file mode 100644 index 317309be2..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetResult.scala +++ /dev/null @@ -1,39 +0,0 @@ -package com.twitter.tweetypie.core - -import com.twitter.servo.data.Lens -import com.twitter.tweetypie.Mutation -import com.twitter.tweetypie.thriftscala.Tweet - -/** - * Helper class for building instances of `TweetResult`, which is a type alias - * for `ValueState[TweetData]`. - */ -object TweetResult { - object Lenses { - val value: Lens[TweetResult, TweetData] = - Lens[TweetResult, TweetData](_.value, (r, value) => r.copy(value = value)) - val state: Lens[TweetResult, HydrationState] = - Lens[TweetResult, HydrationState](_.state, (r, state) => r.copy(state = state)) - val tweet: Lens[TweetResult, Tweet] = value.andThen(TweetData.Lenses.tweet) - } - - def apply(value: TweetData, state: HydrationState = HydrationState.empty): TweetResult = - ValueState(value, state) - - def apply(tweet: Tweet): TweetResult = - apply(TweetData(tweet = tweet)) - - /** - * Apply this mutation to the tweet contained in the result, updating the modified flag if the mutation modifies the tweet. - */ - def mutate(mutation: Mutation[Tweet]): TweetResult => TweetResult = - (result: TweetResult) => - mutation(result.value.tweet) match { - case None => result - case Some(updatedTweet) => - TweetResult( - result.value.copy(tweet = updatedTweet), - result.state ++ HydrationState.modified - ) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/UpstreamFailure.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/UpstreamFailure.docx new file mode 100644 index 000000000..473775320 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/UpstreamFailure.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/UpstreamFailure.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/UpstreamFailure.scala deleted file mode 100644 index 3acc2b2d8..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/UpstreamFailure.scala +++ /dev/null @@ -1,37 +0,0 @@ -package com.twitter.tweetypie.core - -import scala.util.control.NoStackTrace - -/** - * Parent exception class for failures while talking to upstream services. These will - * be counted and then converted to servo.ServerError.DependencyError - */ -sealed abstract class UpstreamFailure(msg: String) extends Exception(msg) with NoStackTrace - -object UpstreamFailure { - case class SnowflakeFailure(t: Throwable) extends UpstreamFailure(t.toString) - - case object UserProfileEmptyException extends UpstreamFailure("User.profile is empty") - - case object UserViewEmptyException extends UpstreamFailure("User.view is empty") - - case object UserSafetyEmptyException extends UpstreamFailure("User.safety is empty") - - case class TweetLookupFailure(t: Throwable) extends UpstreamFailure(t.toString) - - case class UserLookupFailure(t: Throwable) extends UpstreamFailure(t.toString) - - case class DeviceSourceLookupFailure(t: Throwable) extends UpstreamFailure(t.toString) - - case class TFlockLookupFailure(t: Throwable) extends UpstreamFailure(t.toString) - - case class UrlShorteningFailure(t: Throwable) extends UpstreamFailure(t.toString) - - case object MediaShortenUrlMalformedFailure - extends UpstreamFailure("Media shortened url is malformed") - - case object MediaExpandedUrlNotValidFailure - extends UpstreamFailure("Talon returns badInput on media expanded url") - - case class MediaServiceServerError(t: Throwable) extends UpstreamFailure(t.toString) -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/ValueState.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/ValueState.docx new file mode 100644 index 000000000..eb1906e7c Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/ValueState.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/ValueState.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/ValueState.scala deleted file mode 100644 index e1b9ec0a4..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/ValueState.scala +++ /dev/null @@ -1,452 +0,0 @@ -package com.twitter.tweetypie.core - -import com.twitter.servo.data.Lens -import com.twitter.stitch.Stitch -import com.twitter.tweetypie.thriftscala.FieldByPath -import com.twitter.tweetypie.thriftscala.HydrationType - -/** - * Encapsulates a value and associated HydrationState. This class is intended to be used - * with `ValueHydrator`, as the result type for hydrators that directly produce updated values, - * in contrast with EditHydrator which uses `EditState` as a result type to produce update functions. - * - * @tparam A The type of the enclosed value, which is the result of hydration. - */ -final case class ValueState[+A](value: A, state: HydrationState) { - - /** - * Applies a function to the enclosed value and produces a new `ValueState` instance. - */ - def map[B](f: A => B): ValueState[B] = - ValueState(f(value), state) - - /** - * Produces a new `ValueState` that contains the value generated by `f`, but with state that is - * the sum of the state from this `ValueState` and the one produced by `f`. - */ - def flatMap[B](f: A => ValueState[B]): ValueState[B] = { - val ValueState(value2, state2) = f(value) - ValueState(value2, state ++ state2) - } - - /** - * Applies a function to the enclosed state and produces a new `ValueState` instance. - */ - def mapState[T](f: HydrationState => HydrationState): ValueState[A] = - ValueState(value, f(state)) - - /** - * Converts a `ValueState[A]` to an `EditState[B]`, using a lens. The resulting `EditState` - * will overwrite the lensed field with the value from this `ValueState`. - */ - def edit[B, A2 >: A](lens: Lens[B, A2]): EditState[B] = - EditState[B](b => ValueState(lens.set(b, value), state)) -} - -object ValueState { - val UnmodifiedNone: ValueState[None.type] = unmodified(None) - val StitchUnmodifiedNone: Stitch[ValueState[None.type]] = Stitch.value(UnmodifiedNone) - - val UnmodifiedUnit: ValueState[Unit] = unmodified(()) - val StitchUnmodifiedUnit: Stitch[ValueState[Unit]] = Stitch.value(UnmodifiedUnit) - - val UnmodifiedNil: ValueState[Nil.type] = unmodified(Nil) - val StitchUnmodifiedNil: Stitch[ValueState[Nil.type]] = Stitch.value(UnmodifiedNil) - - /** - * Produces a ValueState instance with the given value and an empty state HydrationState. - */ - def unit[A](value: A): ValueState[A] = - ValueState[A](value, HydrationState.empty) - - def unmodified[A](value: A): ValueState[A] = - ValueState(value, HydrationState.empty) - - def modified[A](value: A): ValueState[A] = - ValueState(value, HydrationState.modified) - - def modified[A](value: A, hydrationType: HydrationType): ValueState[A] = - ValueState(value, HydrationState.modified(hydrationType)) - - def success[A](value: A, modified: Boolean): ValueState[A] = - ValueState(value, HydrationState(modified)) - - def delta[A](prev: A, next: A): ValueState[A] = - ValueState(next, HydrationState.delta(prev, next)) - - def partial[A](value: A, field: FieldByPath): ValueState[A] = - ValueState(value, HydrationState.partial(field)) - - def partial[A](value: A, fields: Set[FieldByPath]): ValueState[A] = - ValueState(value, HydrationState.partial(fields)) - - /** - * Converts a `Seq` of `ValueState[A]` to a `ValueState` of `Seq[A]`. - */ - def sequence[A](seq: Seq[ValueState[A]]): ValueState[Seq[A]] = { - ValueState( - value = seq.map(_.value), - state = HydrationState.join(seq.map(_.state): _*) - ) - } - - def join[A, B](va: ValueState[A], vb: ValueState[B]): ValueState[(A, B)] = { - val state = - HydrationState.join( - va.state, - vb.state - ) - - val value = ( - va.value, - vb.value - ) - - ValueState(value, state) - } - - def join[A, B, C]( - va: ValueState[A], - vb: ValueState[B], - vc: ValueState[C] - ): ValueState[(A, B, C)] = { - val state = - HydrationState.join( - va.state, - vb.state, - vc.state - ) - - val value = ( - va.value, - vb.value, - vc.value - ) - - ValueState(value, state) - } - - def join[A, B, C, D]( - va: ValueState[A], - vb: ValueState[B], - vc: ValueState[C], - vd: ValueState[D] - ): ValueState[(A, B, C, D)] = { - val state = - HydrationState.join( - va.state, - vb.state, - vc.state, - vd.state - ) - - val value = ( - va.value, - vb.value, - vc.value, - vd.value - ) - - ValueState(value, state) - } - - def join[A, B, C, D, E]( - va: ValueState[A], - vb: ValueState[B], - vc: ValueState[C], - vd: ValueState[D], - ve: ValueState[E] - ): ValueState[(A, B, C, D, E)] = { - val state = - HydrationState.join( - va.state, - vb.state, - vc.state, - vd.state, - ve.state - ) - - val value = ( - va.value, - vb.value, - vc.value, - vd.value, - ve.value - ) - - ValueState(value, state) - } - - def join[A, B, C, D, E, F]( - va: ValueState[A], - vb: ValueState[B], - vc: ValueState[C], - vd: ValueState[D], - ve: ValueState[E], - vf: ValueState[F] - ): ValueState[(A, B, C, D, E, F)] = { - val state = - HydrationState.join( - va.state, - vb.state, - vc.state, - vd.state, - ve.state, - vf.state - ) - - val value = ( - va.value, - vb.value, - vc.value, - vd.value, - ve.value, - vf.value - ) - - ValueState(value, state) - } - - def join[A, B, C, D, E, F, G]( - va: ValueState[A], - vb: ValueState[B], - vc: ValueState[C], - vd: ValueState[D], - ve: ValueState[E], - vf: ValueState[F], - vg: ValueState[G] - ): ValueState[(A, B, C, D, E, F, G)] = { - val state = - HydrationState.join( - va.state, - vb.state, - vc.state, - vd.state, - ve.state, - vf.state, - vg.state - ) - - val value = ( - va.value, - vb.value, - vc.value, - vd.value, - ve.value, - vf.value, - vg.value - ) - - ValueState(value, state) - } - - def join[A, B, C, D, E, F, G, H]( - va: ValueState[A], - vb: ValueState[B], - vc: ValueState[C], - vd: ValueState[D], - ve: ValueState[E], - vf: ValueState[F], - vg: ValueState[G], - vh: ValueState[H] - ): ValueState[(A, B, C, D, E, F, G, H)] = { - val state = - HydrationState.join( - va.state, - vb.state, - vc.state, - vd.state, - ve.state, - vf.state, - vg.state, - vh.state - ) - - val value = ( - va.value, - vb.value, - vc.value, - vd.value, - ve.value, - vf.value, - vg.value, - vh.value - ) - - ValueState(value, state) - } - - def join[A, B, C, D, E, F, G, H, I]( - va: ValueState[A], - vb: ValueState[B], - vc: ValueState[C], - vd: ValueState[D], - ve: ValueState[E], - vf: ValueState[F], - vg: ValueState[G], - vh: ValueState[H], - vi: ValueState[I] - ): ValueState[(A, B, C, D, E, F, G, H, I)] = { - val state = - HydrationState.join( - va.state, - vb.state, - vc.state, - vd.state, - ve.state, - vf.state, - vg.state, - vh.state, - vi.state - ) - - val value = ( - va.value, - vb.value, - vc.value, - vd.value, - ve.value, - vf.value, - vg.value, - vh.value, - vi.value - ) - - ValueState(value, state) - } - - def join[A, B, C, D, E, F, G, H, I, J]( - va: ValueState[A], - vb: ValueState[B], - vc: ValueState[C], - vd: ValueState[D], - ve: ValueState[E], - vf: ValueState[F], - vg: ValueState[G], - vh: ValueState[H], - vi: ValueState[I], - vj: ValueState[J] - ): ValueState[(A, B, C, D, E, F, G, H, I, J)] = { - val state = - HydrationState.join( - va.state, - vb.state, - vc.state, - vd.state, - ve.state, - vf.state, - vg.state, - vh.state, - vi.state, - vj.state - ) - - val value = ( - va.value, - vb.value, - vc.value, - vd.value, - ve.value, - vf.value, - vg.value, - vh.value, - vi.value, - vj.value - ) - - ValueState(value, state) - } - - def join[A, B, C, D, E, F, G, H, I, J, K]( - va: ValueState[A], - vb: ValueState[B], - vc: ValueState[C], - vd: ValueState[D], - ve: ValueState[E], - vf: ValueState[F], - vg: ValueState[G], - vh: ValueState[H], - vi: ValueState[I], - vj: ValueState[J], - vk: ValueState[K] - ): ValueState[(A, B, C, D, E, F, G, H, I, J, K)] = { - val state = - HydrationState.join( - va.state, - vb.state, - vc.state, - vd.state, - ve.state, - vf.state, - vg.state, - vh.state, - vi.state, - vj.state, - vk.state - ) - - val value = ( - va.value, - vb.value, - vc.value, - vd.value, - ve.value, - vf.value, - vg.value, - vh.value, - vi.value, - vj.value, - vk.value - ) - - ValueState(value, state) - } - - def join[A, B, C, D, E, F, G, H, I, J, K, L]( - va: ValueState[A], - vb: ValueState[B], - vc: ValueState[C], - vd: ValueState[D], - ve: ValueState[E], - vf: ValueState[F], - vg: ValueState[G], - vh: ValueState[H], - vi: ValueState[I], - vj: ValueState[J], - vk: ValueState[K], - vl: ValueState[L] - ): ValueState[(A, B, C, D, E, F, G, H, I, J, K, L)] = { - val state = - HydrationState.join( - va.state, - vb.state, - vc.state, - vd.state, - ve.state, - vf.state, - vg.state, - vh.state, - vi.state, - vj.state, - vk.state, - vl.state - ) - - val value = ( - va.value, - vb.value, - vc.value, - vd.value, - ve.value, - vf.value, - vg.value, - vh.value, - vi.value, - vj.value, - vk.value, - vl.value - ) - - ValueState(value, state) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/package.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/package.docx new file mode 100644 index 000000000..a475b6f05 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/package.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/package.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/package.scala deleted file mode 100644 index 50952e98a..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/package.scala +++ /dev/null @@ -1,5 +0,0 @@ -package com.twitter.tweetypie - -package object core { - type TweetResult = ValueState[TweetData] -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/BUILD deleted file mode 100644 index 967226ca4..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/BUILD +++ /dev/null @@ -1,35 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", - "merlin/util/src/main/scala", - "tweetypie/servo/util/src/main/scala", - "tweetypie/servo/util/src/main/scala:exception", - "src/scala/com/twitter/ads/internal/pcl/service", - "src/thrift/com/twitter/ads/adserver:adserver_rpc-scala", - "src/thrift/com/twitter/gizmoduck:thrift-scala", - "src/thrift/com/twitter/merlin:thrift-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet_service_graphql-scala", - "stitch/stitch-core/src/main/scala/com/twitter/stitch", - "strato/src/main/scala/com/twitter/strato/access/filters", - "strato/src/main/scala/com/twitter/strato/catalog", - "strato/src/main/scala/com/twitter/strato/client", - "strato/src/main/scala/com/twitter/strato/config", - "strato/src/main/scala/com/twitter/strato/fed", - "strato/src/main/scala/com/twitter/strato/server/context", - "strato/src/main/scala/com/twitter/strato/thrift", - "tweetypie/server/src/main/scala/com/twitter/tweetypie", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/repository", - "tweetypie/server/src/main/thrift:compiled-scala", - "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", - ], -) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/BUILD.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/BUILD.docx new file mode 100644 index 000000000..f73acf219 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/BUILD.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/StratoCatalogBuilder.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/StratoCatalogBuilder.docx new file mode 100644 index 000000000..aa27d0f3a Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/StratoCatalogBuilder.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/StratoCatalogBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/StratoCatalogBuilder.scala deleted file mode 100644 index 42841d393..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/StratoCatalogBuilder.scala +++ /dev/null @@ -1,128 +0,0 @@ -package com.twitter.tweetypie.federated - -import com.twitter.ads.internal.pcl.service.CallbackPromotedContentLogger -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.scrooge.ThriftStructFieldInfo -import com.twitter.servo.util.Gate -import com.twitter.strato.catalog.Catalog -import com.twitter.strato.client.Client -import com.twitter.strato.fed.StratoFed -import com.twitter.strato.thrift.ScroogeConv -import com.twitter.tweetypie.ThriftTweetService -import com.twitter.tweetypie.Tweet -import com.twitter.tweetypie.backends.Gizmoduck -import com.twitter.tweetypie.federated.columns._ -import com.twitter.tweetypie.federated.context.GetRequestContext -import com.twitter.tweetypie.federated.prefetcheddata.PrefetchedDataRepositoryBuilder -import com.twitter.tweetypie.federated.promotedcontent.TweetPromotedContentLogger -import com.twitter.tweetypie.repository.UnmentionInfoRepository -import com.twitter.tweetypie.repository.VibeRepository -import com.twitter.util.Activity -import com.twitter.util.logging.Logger - -object StratoCatalogBuilder { - - def catalog( - thriftTweetService: ThriftTweetService, - stratoClient: Client, - getUserResultsById: Gizmoduck.GetById, - callbackPromotedContentLogger: CallbackPromotedContentLogger, - statsReceiver: StatsReceiver, - enableCommunityTweetCreatesDecider: Gate[Unit], - ): Activity[Catalog[StratoFed.Column]] = { - val log = Logger(getClass) - - val getRequestContext = new GetRequestContext() - val prefetchedDataRepository = - PrefetchedDataRepositoryBuilder(getUserResultsById, statsReceiver) - val unmentionInfoRepository = UnmentionInfoRepository(stratoClient) - val vibeRepository = VibeRepository(stratoClient) - - val tweetPromotedContentLogger = - TweetPromotedContentLogger(callbackPromotedContentLogger) - - // A stitch group builder to be used for Federated Field Column requests. The handler must be the same across - // all Federated Field Columns to ensure requests are batched across columns for different fields - val federatedFieldGroupBuilder: FederatedFieldGroupBuilder.Type = FederatedFieldGroupBuilder( - thriftTweetService.getTweetFields) - - val columns: Seq[StratoFed.Column] = Seq( - new UnretweetColumn( - thriftTweetService.unretweet, - getRequestContext, - ), - new CreateRetweetColumn( - thriftTweetService.postRetweet, - getRequestContext, - prefetchedDataRepository, - tweetPromotedContentLogger, - statsReceiver - ), - new CreateTweetColumn( - thriftTweetService.postTweet, - getRequestContext, - prefetchedDataRepository, - unmentionInfoRepository, - vibeRepository, - tweetPromotedContentLogger, - statsReceiver, - enableCommunityTweetCreatesDecider, - ), - new DeleteTweetColumn( - thriftTweetService.deleteTweets, - getRequestContext, - ), - new GetTweetFieldsColumn(thriftTweetService.getTweetFields, statsReceiver), - new GetStoredTweetsColumn(thriftTweetService.getStoredTweets), - new GetStoredTweetsByUserColumn(thriftTweetService.getStoredTweetsByUser) - ) - - // Gather tweet field ids that are eligible to be federated field columns - val federatedFieldInfos = - Tweet.fieldInfos - .filter((info: ThriftStructFieldInfo) => - FederatedFieldColumn.isFederatedField(info.tfield.id)) - - // Instantiate the federated field columns - val federatedFieldColumns: Seq[FederatedFieldColumn] = - federatedFieldInfos.map { fieldInfo: ThriftStructFieldInfo => - val path = FederatedFieldColumn.makeColumnPath(fieldInfo.tfield) - val stratoType = ScroogeConv.typeOfFieldInfo(fieldInfo) - log.info(f"creating federated column: $path") - new FederatedFieldColumn( - federatedFieldGroupBuilder, - thriftTweetService.setAdditionalFields, - stratoType, - fieldInfo.tfield, - ) - } - - // Instantiate the federated V1 field columns - val federatedV1FieldColumns: Seq[FederatedFieldColumn] = - federatedFieldInfos - .filter(f => FederatedFieldColumn.isMigrationFederatedField(f.tfield)) - .map { fieldInfo: ThriftStructFieldInfo => - val v1Path = FederatedFieldColumn.makeV1ColumnPath(fieldInfo.tfield) - val stratoType = ScroogeConv.typeOfFieldInfo(fieldInfo) - log.info(f"creating V1 federated column: $v1Path") - new FederatedFieldColumn( - federatedFieldGroupBuilder, - thriftTweetService.setAdditionalFields, - stratoType, - fieldInfo.tfield, - Some(v1Path) - ) - } - - // Combine the dynamic and hard coded federated columns - val allColumns: Seq[StratoFed.Column] = - columns ++ federatedFieldColumns ++ federatedV1FieldColumns - - Activity.value( - Catalog( - allColumns.map { column => - column.path -> column - }: _* - )) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/AccessPolicy.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/AccessPolicy.docx new file mode 100644 index 000000000..b005bf977 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/AccessPolicy.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/AccessPolicy.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/AccessPolicy.scala deleted file mode 100644 index af3ee9fd2..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/AccessPolicy.scala +++ /dev/null @@ -1,41 +0,0 @@ -package com.twitter.tweetypie.federated.columns - -import com.twitter.passbird.bitfield.clientprivileges.thriftscala.{Constants => ClientAppPrivileges} -import com.twitter.strato.access.Access.AuthenticatedTwitterUserNotSuspended -import com.twitter.strato.access.Access.ClientApplicationPrivilege -import com.twitter.strato.access.Access.TwitterUserNotSuspended -import com.twitter.strato.access.ClientApplicationPrivilegeVariant -import com.twitter.strato.config._ - -object AccessPolicy { - - /** - * All Tweet Mutation operations require all of: - * - Twitter user authentication - * - Twitter user is not suspended - * - Contributor user, if provided, is not suspended - * - "Teams Access": user is acting their own behalf, or is a - * contributor using a client with ClientAppPriviledges.CONTRIBUTORS - * - Write privileges - */ - val TweetMutationCommonAccessPolicies: Policy = - AllOf( - Seq( - AllowTwitterUserId, - Has( - TwitterUserNotSuspended - ), - Has( - AuthenticatedTwitterUserNotSuspended - ), - AnyOf( - Seq( - TwitterUserContributingAsSelf, - Has(principal = ClientApplicationPrivilege(ClientApplicationPrivilegeVariant - .byId(ClientAppPrivileges.CONTRIBUTORS.toShort).get)) - )), - AllowWritableAccessToken - ) - ) - -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/ApiErrors.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/ApiErrors.docx new file mode 100644 index 000000000..8133a7c18 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/ApiErrors.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/ApiErrors.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/ApiErrors.scala deleted file mode 100644 index 82550e2c5..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/ApiErrors.scala +++ /dev/null @@ -1,110 +0,0 @@ -package com.twitter.tweetypie.federated -package columns - -import com.twitter.bouncer.thriftscala.Bounce -import com.twitter.finagle.http.Status -import com.twitter.finatra.api11 -import com.twitter.finatra.api11.ApiError -import com.twitter.strato.response.Err - -object ApiErrors { - // Errs ported from StatusesRetweetController - val GenericAccessDeniedErr = toErr(ApiError.GenericAccessDenied) - val AlreadyRetweetedErr = toErr(ApiError.AlreadyRetweeted) - val DuplicateStatusErr = toErr(ApiError.DuplicateStatusError) - val InvalidRetweetForStatusErr = toErr(ApiError.InvalidRetweetForStatus) - val StatusNotFoundErr = toErr(ApiError.StatusNotFound) - val BlockedUserErr = - toErr(ApiError.BlockedUserError, "retweeting this user's tweets at their request") - val ClientNotPrivilegedErr = toErr(ApiError.ClientNotPrivileged) - val UserDeniedRetweetErr = toErr(ApiError.CurrentUserSuspended) - - // Errs ported from StatusesUpdateController - val RateLimitExceededErr = toErr(ApiError.OverStatusUpdateLimit, "User") - val TweetUrlSpamErr = toErr(ApiError.TieredActionTweetUrlSpam) - val TweetSpammerErr = toErr(ApiError.TieredActionTweetSpammer) - val CaptchaChallengeErr = toErr(ApiError.TieredActionChallengeCaptcha) - val SafetyRateLimitExceededErr = toErr(ApiError.UserActionRateLimitExceeded, "User") - val TweetCannotBeBlankErr = // was MissingRequiredParameterException - toErr(ApiError.ForbiddenMissingParameter, "tweet_text or media") - val TweetTextTooLongErr = toErr(ApiError.StatusTooLongError) - val MalwareTweetErr = toErr(ApiError.StatusMalwareError) - val DuplicateTweetErr = toErr(ApiError.DuplicateStatusError) - val CurrentUserSuspendedErr = toErr(ApiError.CurrentUserSuspended) - val MentionLimitExceededErr = toErr(ApiError.MentionLimitInTweetExceeded) - val UrlLimitExceededErr = toErr(ApiError.UrlLimitInTweetExceeded) - val HashtagLimitExceededErr = toErr(ApiError.HashtagLimitInTweetExceeded) - val CashtagLimitExceededErr = toErr(ApiError.CashtagLimitInTweetExceeded) - val HashtagLengthLimitExceededErr = toErr(ApiError.HashtagLengthLimitInTweetExceeded) - val TooManyAttachmentTypesErr = toErr(ApiError.AttachmentTypesLimitInTweetExceeded) - val InvalidAttachmentUrlErr = toErr(ApiError.InvalidParameter("attachment_url")) - val InReplyToTweetNotFoundErr = toErr(ApiError.InReplyToTweetNotFound) - val InvalidAdditionalFieldErr = toErr(ApiError.GenericBadRequest) - def invalidAdditionalFieldWithReasonErr(failureReason: String) = - toErr(ApiError.GenericBadRequest.copy(message = failureReason)) - val InvalidUrlErr = toErr(ApiError.InvalidUrl) - val InvalidCoordinatesErr = toErr(ApiError.InvalidCoordinates) - val InvalidGeoSearchRequestIdErr = - toErr(ApiError.InvalidParameter("geo_search_request_id")) - val ConversationControlNotAuthorizedErr = toErr(ApiError.ConversationControlNotAuthorized) - val ConversationControlInvalidErr = toErr(ApiError.ConversationControlInvalid) - val ConversationControlReplyRestricted = toErr(ApiError.ConversationControlReplyRestricted) - - // Errors ported from StatusesDestroyController - val DeletePermissionErr = toErr(ApiError.StatusActionPermissionError("delete")) - - // See StatusesUpdateController#GenericErrorException - val GenericTweetCreateErr = toErr(ApiError.UnknownInterpreterError, "Tweet creation failed") - val InvalidBatchModeParameterErr = toErr(ApiError.InvalidParameter("batch_mode")) - val CannotConvoControlAndCommunitiesErr = - toErr(ApiError.CommunityInvalidParams, "conversation_control") - val TooManyCommunitiesErr = toErr(ApiError.CommunityInvalidParams, "communities") - val CommunityReplyTweetNotAllowedErr = toErr(ApiError.CommunityReplyTweetNotAllowed) - val ConversationControlNotSupportedErr = toErr(ApiError.ConversationControlNotSupported) - val CommunityUserNotAuthorizedErr = toErr(ApiError.CommunityUserNotAuthorized) - val CommunityNotFoundErr = toErr(ApiError.CommunityNotFound) - val CommunityProtectedUserCannotTweetErr = toErr(ApiError.CommunityProtectedUserCannotTweet) - - val SuperFollowCreateNotAuthorizedErr = toErr(ApiError.SuperFollowsCreateNotAuthorized) - val SuperFollowInvalidParamsErr = toErr(ApiError.SuperFollowsInvalidParams) - val ExclusiveTweetEngagementNotAllowedErr = toErr(ApiError.ExclusiveTweetEngagementNotAllowed) - - val SafetyLevelMissingErr = toErr(ApiError.MissingParameter("safety_level")) - - def accessDeniedByBouncerErr(bounce: Bounce) = - toErr(ApiError.AccessDeniedByBouncer, bounce.errorMessage.getOrElse(Seq.empty)) - - def tweetEngagementLimitedErr(failureReason: String) = - toErr(ApiError.TweetEngagementsLimited(failureReason)) - - def invalidMediaErr(failureReason: Option[String]) = - toErr(ApiError.invalidMediaId(failureReason)) - - val TrustedFriendsInvalidParamsErr = toErr(ApiError.TrustedFriendsInvalidParams) - val TrustedFriendsRetweetNotAllowedErr = toErr(ApiError.TrustedFriendsRetweetNotAllowed) - val TrustedFriendsEngagementNotAllowedErr = toErr(ApiError.TrustedFriendsEngagementNotAllowed) - val TrustedFriendsCreateNotAllowedErr = toErr(ApiError.TrustedFriendsCreateNotAllowed) - val TrustedFriendsQuoteTweetNotAllowedErr = toErr(ApiError.TrustedFriendsQuoteTweetNotAllowed) - - val StaleTweetEngagementNotAllowedErr = toErr(ApiError.StaleTweetEngagementNotAllowed) - val StaleTweetQuoteTweetNotAllowedErr = toErr(ApiError.StaleTweetQuoteTweetNotAllowed) - val StaleTweetRetweetNotAllowedErr = toErr(ApiError.StaleTweetRetweetNotAllowed) - - val CollabTweetInvalidParamsErr = toErr(ApiError.CollabTweetInvalidParams) - - val FieldEditNotAllowedErr = toErr(ApiError.FieldEditNotAllowed) - val NotEligibleForEditErr = toErr(ApiError.NotEligibleForEdit) - - def toErr(apiError: api11.ApiError, args: Any*): Err = { - val errCode = apiError.status match { - case Status.Forbidden => Err.Authorization - case Status.Unauthorized => Err.Authentication - case Status.NotFound => Err.BadRequest - case Status.BadRequest => Err.BadRequest - case _ => Err.BadRequest - } - val errMessage = s"${apiError.message.format(args.mkString(","))} (${apiError.code})" - val errContext = Some(Err.Context.Api11Error(apiError.code)) - Err(errCode, errMessage, errContext) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/BUILD deleted file mode 100644 index 7148dfa4b..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/BUILD +++ /dev/null @@ -1,43 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "decider", - "finagle/finagle-base-http/src/main", - "finatra-internal/api11/src/main/scala/com/twitter/finatra/api11:errors", - "geo/model/src/main/scala/com/twitter/geo/model", - "passbird/bitfields-thrift/src/main/thrift:thrift-scala", - "tweetypie/servo/util/src/main/scala:exception", - "src/scala/com/twitter/accounts/util:safety-meta", - "src/thrift/com/twitter/ads/adserver:ad_engagement_details-scala", - "src/thrift/com/twitter/ads/adserver:preroll_metadata-scala", - "src/thrift/com/twitter/ads/callback:engagement_request-scala", - "src/thrift/com/twitter/bouncer:bounce-action-thrift-scala", - "src/thrift/com/twitter/consumer_privacy/mention_controls:thrift-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:stored-tweet-info-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-service-federated-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet_service_graphql-scala", - "stitch/stitch-core/src/main/scala/com/twitter/stitch", - "strato/config/src/thrift/com/twitter/strato/graphql:api-media-graphql-scala", - "strato/config/src/thrift/com/twitter/strato/graphql:graphql-scala", - "strato/config/src/thrift/com/twitter/strato/graphql:topics-graphql-scala", - "strato/src/main/scala/com/twitter/strato/client", - "strato/src/main/scala/com/twitter/strato/fed", - "strato/src/main/scala/com/twitter/strato/response", - "strato/src/main/scala/com/twitter/strato/thrift", - "tweetypie/server/src/main/scala/com/twitter/tweetypie", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/repository", - "tweetypie/server/src/main/thrift:compiled-scala", - "tweetypie/common/src/scala/com/twitter/tweetypie/client_id", - "tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides", - "tweetypie/common/src/scala/com/twitter/tweetypie/util", - "vibes/src/main/thrift/com/twitter/vibes:vibes-scala", - "weaverbird/common/src/main/scala/com/twitter/weaverbird/common", - ], -) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/BUILD.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/BUILD.docx new file mode 100644 index 000000000..1f2fbbbd2 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/BUILD.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateRetweetColumn.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateRetweetColumn.docx new file mode 100644 index 000000000..a1ef141e4 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateRetweetColumn.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateRetweetColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateRetweetColumn.scala deleted file mode 100644 index 0acf695d2..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateRetweetColumn.scala +++ /dev/null @@ -1,184 +0,0 @@ -package com.twitter.tweetypie -package federated.columns - -import com.twitter.accounts.util.SafetyMetadataUtils -import com.twitter.ads.callback.thriftscala.EngagementRequest -import com.twitter.bouncer.thriftscala.{Bounce => BouncerBounce} -import com.twitter.stitch.Stitch -import com.twitter.strato.catalog.OpMetadata -import com.twitter.strato.config.AllOf -import com.twitter.strato.config.BouncerAccess -import com.twitter.strato.config.ContactInfo -import com.twitter.strato.config.Policy -import com.twitter.strato.data.Conv -import com.twitter.strato.data.Description.PlainText -import com.twitter.strato.data.Lifecycle.Production -import com.twitter.strato.fed.StratoFed -import com.twitter.strato.opcontext.OpContext -import com.twitter.strato.response.Err -import com.twitter.strato.thrift.ScroogeConv -import com.twitter.tweetypie.federated.columns.ApiErrors._ -import com.twitter.tweetypie.federated.columns.CreateRetweetColumn.toCreateRetweetErr -import com.twitter.tweetypie.federated.context.GetRequestContext -import com.twitter.tweetypie.federated.prefetcheddata.PrefetchedDataRequest -import com.twitter.tweetypie.federated.prefetcheddata.PrefetchedDataResponse -import com.twitter.tweetypie.federated.promotedcontent.TweetPromotedContentLogger -import com.twitter.tweetypie.federated.promotedcontent.TweetPromotedContentLogger.RetweetEngagement -import com.twitter.tweetypie.thriftscala.TweetCreateState._ -import com.twitter.tweetypie.thriftscala.{graphql => gql} -import com.twitter.tweetypie.{thriftscala => thrift} -import com.twitter.weaverbird.common.{GetRequestContext => WGetRequestContext} - -class CreateRetweetColumn( - retweet: thrift.RetweetRequest => Future[thrift.PostTweetResult], - getRequestContext: GetRequestContext, - prefetchedDataRepository: PrefetchedDataRequest => Stitch[PrefetchedDataResponse], - logTweetPromotedContent: TweetPromotedContentLogger.Type, - statsReceiver: StatsReceiver, -) extends StratoFed.Column(CreateRetweetColumn.Path) - with StratoFed.Execute.StitchWithContext - with StratoFed.HandleDarkRequests { - - override val policy: Policy = AllOf( - Seq(AccessPolicy.TweetMutationCommonAccessPolicies, BouncerAccess())) - - // The underlying call to thriftTweetService.postRetweet is not idempotent - override val isIdempotent: Boolean = false - - override type Arg = gql.CreateRetweetRequest - override type Result = gql.CreateRetweetResponseWithSubqueryPrefetchItems - - override val argConv: Conv[Arg] = ScroogeConv.fromStruct - override val resultConv: Conv[Result] = ScroogeConv.fromStruct - - override val contactInfo: ContactInfo = TweetypieContactInfo - override val metadata: OpMetadata = OpMetadata( - Some(Production), - Some(PlainText("Creates a retweet by the calling Twitter user of the given source tweet."))) - - private val getWeaverbirdCtx = new WGetRequestContext() - - override def execute(request: Arg, opContext: OpContext): Stitch[Result] = { - val ctx = getRequestContext(opContext) - - // First, do any request parameter validation that can result in an error - // prior to calling into thriftTweetService.retweet. - val safetyLevel = ctx.safetyLevel.getOrElse(throw SafetyLevelMissingErr) - - // Macaw-tweets returns ApiError.ClientNotPrivileged if the caller provides - // an impression_id but lacks the PROMOTED_TWEETS_IN_TIMELINE privilege. - val trackingId = request.engagementRequest match { - case Some(engagementRequest: EngagementRequest) if ctx.hasPrivilegePromotedTweetsInTimeline => - TrackingId.parse(engagementRequest.impressionId, statsReceiver) - case Some(e: EngagementRequest) => - throw ClientNotPrivilegedErr - case None => - None - } - - // DeviceSource is an oauth string computed from the ClientApplicationId. - // Macaw-tweets allows non-oauth callers, but GraphQL does not. An undefined - // ClientApplicationId is similar to TweetCreateState.DeviceSourceNotFound, - // which Macaw-tweets handles via a catch-all that returns - // ApiError.GenericAccessDenied - val deviceSource = ctx.deviceSource.getOrElse(throw GenericAccessDeniedErr) - - // Macaw-tweets doesn't perform any parameter validation for the components - // used as input to makeSafetyMetaData. - val safetyMetadata = SafetyMetadataUtils.makeSafetyMetaData( - sessionHash = ctx.sessionHash, - knownDeviceToken = ctx.knownDeviceToken, - contributorId = ctx.contributorId - ) - - val thriftRetweetRequest = thrift.RetweetRequest( - sourceStatusId = request.tweetId, - userId = ctx.twitterUserId, - contributorUserId = None, // no longer supported, per tweet_service.thrift - createdVia = deviceSource, - nullcast = request.nullcast, - trackingId = trackingId, - dark = ctx.isDarkRequest, - hydrationOptions = Some(HydrationOptions.writePathHydrationOptions(ctx.cardsPlatformKey)), - safetyMetaData = Some(safetyMetadata), - ) - - val stitchRetweet = Stitch.callFuture(retweet(thriftRetweetRequest)) - - request.engagementRequest.foreach { engagement => - logTweetPromotedContent(engagement, RetweetEngagement, ctx.isDarkRequest) - } - - stitchRetweet.flatMap { result: thrift.PostTweetResult => - result.state match { - case thrift.TweetCreateState.Ok => - val r = PrefetchedDataRequest( - tweet = result.tweet.get, - sourceTweet = result.sourceTweet, - quotedTweet = result.quotedTweet, - safetyLevel = safetyLevel, - requestContext = getWeaverbirdCtx() - ) - - prefetchedDataRepository(r) - .liftToOption() - .map((prefetchedData: Option[PrefetchedDataResponse]) => { - gql.CreateRetweetResponseWithSubqueryPrefetchItems( - data = Some(gql.CreateRetweetResponse(result.tweet.map(_.id))), - subqueryPrefetchItems = prefetchedData.map(_.value) - ) - }) - case errState => - throw toCreateRetweetErr(errState, result.bounce, result.failureReason) - } - } - } -} - -object CreateRetweetColumn { - val Path = "tweetypie/createRetweet.Tweet" - - /** - * Ported from: - * StatusesRetweetController#retweetStatus rescue block - * TweetyPieStatusRepository.toRetweetException - */ - def toCreateRetweetErr( - errState: thrift.TweetCreateState, - bounce: Option[BouncerBounce], - failureReason: Option[String] - ): Err = errState match { - case CannotRetweetBlockingUser => - BlockedUserErr - case AlreadyRetweeted => - AlreadyRetweetedErr - case Duplicate => - DuplicateStatusErr - case CannotRetweetOwnTweet | CannotRetweetProtectedTweet | CannotRetweetSuspendedUser => - InvalidRetweetForStatusErr - case UserNotFound | SourceTweetNotFound | SourceUserNotFound | CannotRetweetDeactivatedUser => - StatusNotFoundErr - case UserDeactivated | UserSuspended => - UserDeniedRetweetErr - case RateLimitExceeded => - RateLimitExceededErr - case UrlSpam => - TweetUrlSpamErr - case Spam | UserReadonly => - TweetSpammerErr - case SafetyRateLimitExceeded => - SafetyRateLimitExceededErr - case Bounce if bounce.isDefined => - accessDeniedByBouncerErr(bounce.get) - case DisabledByIpiPolicy => - failureReason - .map(tweetEngagementLimitedErr) - .getOrElse(GenericAccessDeniedErr) - case TrustedFriendsRetweetNotAllowed => - TrustedFriendsRetweetNotAllowedErr - case StaleTweetRetweetNotAllowed => - StaleTweetRetweetNotAllowedErr - case _ => - GenericAccessDeniedErr - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateTweetColumn.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateTweetColumn.docx new file mode 100644 index 000000000..52d7ef93b Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateTweetColumn.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateTweetColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateTweetColumn.scala deleted file mode 100644 index 3530d68d8..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateTweetColumn.scala +++ /dev/null @@ -1,546 +0,0 @@ -package com.twitter.tweetypie -package federated.columns - -import com.twitter.accounts.util.SafetyMetadataUtils -import com.twitter.ads.callback.thriftscala.EngagementRequest -import com.twitter.bouncer.thriftscala.{Bounce => BouncerBounce} -import com.twitter.escherbird.thriftscala.TweetEntityAnnotation -import com.twitter.geo.model.LatitudeLongitude -import com.twitter.stitch.Stitch -import com.twitter.strato.catalog.OpMetadata -import com.twitter.strato.config.AllOf -import com.twitter.strato.config.BouncerAccess -import com.twitter.strato.config.ContactInfo -import com.twitter.strato.config.Policy -import com.twitter.strato.data.Conv -import com.twitter.strato.data.Description.PlainText -import com.twitter.strato.data.Lifecycle.Production -import com.twitter.strato.fed.StratoFed -import com.twitter.strato.opcontext.OpContext -import com.twitter.strato.response.Err -import com.twitter.strato.thrift.ScroogeConv -import com.twitter.tweetypie.decider.overrides.TweetyPieDeciderOverrides -import com.twitter.tweetypie.federated.columns.ApiErrors._ -import com.twitter.tweetypie.federated.columns.CreateTweetColumn.toCreateTweetErr -import com.twitter.tweetypie.federated.context.GetRequestContext -import com.twitter.tweetypie.federated.prefetcheddata.PrefetchedDataRequest -import com.twitter.tweetypie.federated.prefetcheddata.PrefetchedDataResponse -import com.twitter.tweetypie.federated.promotedcontent.TweetPromotedContentLogger -import com.twitter.tweetypie.federated.promotedcontent.TweetPromotedContentLogger._ -import com.twitter.tweetypie.repository.UnmentionInfoRepository -import com.twitter.tweetypie.repository.VibeRepository -import com.twitter.tweetypie.thriftscala.TransientCreateContext -import com.twitter.tweetypie.thriftscala.TweetCreateContextKey -import com.twitter.tweetypie.thriftscala.TweetCreateState._ -import com.twitter.tweetypie.thriftscala.{graphql => gql} -import com.twitter.tweetypie.util.CommunityAnnotation -import com.twitter.tweetypie.util.ConversationControls -import com.twitter.tweetypie.util.TransientContextUtil -import com.twitter.tweetypie.{thriftscala => thrift} -import com.twitter.util.Throwables -import com.twitter.weaverbird.common.{GetRequestContext => WGetRequestContext} - -class CreateTweetColumn( - postTweet: thrift.PostTweetRequest => Future[thrift.PostTweetResult], - getRequestContext: GetRequestContext, - prefetchedDataRepository: PrefetchedDataRequest => Stitch[PrefetchedDataResponse], - unmentionInfoRepository: UnmentionInfoRepository.Type, - vibeRepository: VibeRepository.Type, - logTweetPromotedContent: TweetPromotedContentLogger.Type, - statsReceiver: StatsReceiver, - enableCommunityTweetCreatesDecider: Gate[Unit], -) extends StratoFed.Column(CreateTweetColumn.Path) - with StratoFed.Execute.StitchWithContext - with StratoFed.HandleDarkRequests { - - override val policy: Policy = AllOf( - Seq(AccessPolicy.TweetMutationCommonAccessPolicies, BouncerAccess())) - - // The underlying call to thriftTweetService.postRetweet is not idempotent - override val isIdempotent: Boolean = false - - override type Arg = gql.CreateTweetRequest - override type Result = gql.CreateTweetResponseWithSubqueryPrefetchItems - - override val argConv: Conv[Arg] = ScroogeConv.fromStruct - override val resultConv: Conv[Result] = ScroogeConv.fromStruct - - override val contactInfo: ContactInfo = TweetypieContactInfo - override val metadata: OpMetadata = - OpMetadata( - Some(Production), - Some( - PlainText( - """ - Creates a tweet using the calling authenticated Twitter user as author. - NOTE, not all Tweet space fields are GraphQL queryable in the CreateTweet mutation response. - See http://go/missing-create-tweet-fields. - """)) - ) - - private val getWeaverbirdCtx = new WGetRequestContext() - - override def execute(request: Arg, opContext: OpContext): Stitch[Result] = { - - val ctx = getRequestContext(opContext) - - // First, do any request parameter validation that can result in an error - // prior to calling into thriftTweetService.postTweet. - val safetyLevel = ctx.safetyLevel.getOrElse(throw SafetyLevelMissingErr) - - val trackingId = request.engagementRequest match { - case Some(engagementRequest: EngagementRequest) if ctx.hasPrivilegePromotedTweetsInTimeline => - TrackingId.parse(engagementRequest.impressionId, statsReceiver) - case Some(e: EngagementRequest) => - throw ClientNotPrivilegedErr - case None => - None - } - - val deviceSource = ctx.deviceSource.getOrElse(throw GenericAccessDeniedErr) - - if (request.nullcast && !ctx.hasPrivilegeNullcastingAccess) { - throw GenericAccessDeniedErr - } - - val safetyMetadata = SafetyMetadataUtils.makeSafetyMetaData( - sessionHash = ctx.sessionHash, - knownDeviceToken = ctx.knownDeviceToken, - contributorId = ctx.contributorId - ) - - val cardReference: Option[thrift.CardReference] = - request.cardUri.filter(_.nonEmpty).map(thrift.CardReference(_)) - - val escherbirdEntityAnnotations: Option[thrift.EscherbirdEntityAnnotations] = - request.semanticAnnotationIds - .filter(_.nonEmpty) - .map((seq: Seq[gql.TweetAnnotation]) => seq.map(parseTweetEntityAnnotation)) - .map(thrift.EscherbirdEntityAnnotations(_)) - - val mediaEntities = request.media.map(_.mediaEntities) - val mediaUploadIds = mediaEntities.map(_.map(_.mediaId)).filter(_.nonEmpty) - - val mediaTags: Option[thrift.TweetMediaTags] = { - val mediaTagsAuthorized = !ctx.isContributorRequest - - val tagMap: Map[MediaId, Seq[thrift.MediaTag]] = - mediaEntities - .getOrElse(Nil) - .filter(_ => mediaTagsAuthorized) - .filter(_.taggedUsers.nonEmpty) - .map(mediaEntity => - mediaEntity.mediaId -> - mediaEntity.taggedUsers - .map(user_id => thrift.MediaTag(thrift.MediaTagType.User, Some(user_id)))) - .toMap - - Option(tagMap) - .filter(_.nonEmpty) - .map(thrift.TweetMediaTags(_)) - } - - // Can not have both conversation controls and communities defined for a tweet - // as they have conflicting permissions on who can reply to the tweet. - val communities = parseCommunityIds(escherbirdEntityAnnotations) - if (request.conversationControl.isDefined && communities.nonEmpty) { - throw CannotConvoControlAndCommunitiesErr - } - - // Currently we do not support posting to multiple communities. - if (communities.length > 1) { - throw TooManyCommunitiesErr - } - - // Kill switch for community tweets in case we need to disable them for app security. - if (communities.nonEmpty && !enableCommunityTweetCreatesDecider()) { - throw CommunityUserNotAuthorizedErr - } - - // additionalFields is used to marshal multiple input params and - // should only be defined if one or more of those params are defined. - val additionalFields: Option[Tweet] = - cardReference - .orElse(escherbirdEntityAnnotations) - .orElse(mediaTags) - .map(_ => - thrift.Tweet( - 0L, - cardReference = cardReference, - escherbirdEntityAnnotations = escherbirdEntityAnnotations, - mediaTags = mediaTags - )) - - val transientContext: Option[TransientCreateContext] = - parseTransientContext( - request.batchCompose, - request.periscope, - ctx.twitterUserId, - ) - - // PostTweetRequest.additionalContext is marked as deprecated in favor of .transientContext, - // but the REST API still supports it and it is still passed along through Tweetypie, and - // FanoutService and Notifications still depend on it. - val additionalContext: Option[Map[TweetCreateContextKey, String]] = - transientContext.map(TransientContextUtil.toAdditionalContext) - - val thriftPostTweetRequest = thrift.PostTweetRequest( - userId = ctx.twitterUserId, - text = request.tweetText, - createdVia = deviceSource, - inReplyToTweetId = request.reply.map(_.inReplyToTweetId), - geo = request.geo.flatMap(parseTweetCreateGeo), - autoPopulateReplyMetadata = request.reply.isDefined, - excludeReplyUserIds = request.reply.map(_.excludeReplyUserIds).filter(_.nonEmpty), - nullcast = request.nullcast, - // Send a dark request to Tweetypie if the dark_request directive is set or - // if the Tweet is undo-able. - dark = ctx.isDarkRequest || request.undoOptions.exists(_.isUndo), - hydrationOptions = Some(HydrationOptions.writePathHydrationOptions(ctx.cardsPlatformKey)), - remoteHost = ctx.remoteHost, - safetyMetaData = Some(safetyMetadata), - attachmentUrl = request.attachmentUrl, - mediaUploadIds = mediaUploadIds, - mediaMetadata = None, - transientContext = transientContext, - additionalContext = additionalContext, - conversationControl = request.conversationControl.map(parseTweetCreateConversationControl), - exclusiveTweetControlOptions = request.exclusiveTweetControlOptions.map { _ => - thrift.ExclusiveTweetControlOptions() - }, - trustedFriendsControlOptions = - request.trustedFriendsControlOptions.map(parseTrustedFriendsControlOptions), - editOptions = request.editOptions.flatMap(_.previousTweetId.map(thrift.EditOptions(_))), - collabControlOptions = request.collabControlOptions.map(parseCollabControlOptions), - additionalFields = additionalFields, - trackingId = trackingId, - noteTweetOptions = request.noteTweetOptions.map(options => - thrift.NoteTweetOptions( - options.noteTweetId, - options.mentionedScreenNames, - options.mentionedUserIds, - options.isExpandable)) - ) - - val stitchPostTweet = - Stitch.callFuture { - TweetyPieDeciderOverrides.ConversationControlUseFeatureSwitchResults.On { - postTweet(thriftPostTweetRequest) - } - } - - for { - engagement <- request.engagementRequest - if !request.reply.exists(_.inReplyToTweetId == 0) // no op per go/rb/845242 - engagementType = if (request.reply.isDefined) ReplyEngagement else TweetEngagement - } logTweetPromotedContent(engagement, engagementType, ctx.isDarkRequest) - - stitchPostTweet.flatMap { result: thrift.PostTweetResult => - result.state match { - - case thrift.TweetCreateState.Ok => - val unmentionSuccessCounter = statsReceiver.counter("unmention_info_success") - val unmentionFailuresCounter = statsReceiver.counter("unmention_info_failures") - val unmentionFailuresScope = statsReceiver.scope("unmention_info_failures") - - val unmentionInfoStitch = result.tweet match { - case Some(tweet) => - unmentionInfoRepository(tweet) - .onFailure { t => - unmentionFailuresCounter.incr() - unmentionFailuresScope.counter(Throwables.mkString(t): _*).incr() - } - .onSuccess { _ => - unmentionSuccessCounter.incr() - } - .rescue { - case _ => - Stitch.None - } - case _ => - Stitch.None - } - - val vibeSuccessCounter = statsReceiver.counter("vibe_success") - val vibeFailuresCounter = statsReceiver.counter("vibe_failures") - val vibeFailuresScope = statsReceiver.scope("vibe_failures") - - val vibeStitch = result.tweet match { - case Some(tweet) => - vibeRepository(tweet) - .onSuccess { _ => - vibeSuccessCounter.incr() - } - .onFailure { t => - vibeFailuresCounter.incr() - vibeFailuresScope.counter(Throwables.mkString(t): _*).incr() - } - .rescue { - case _ => - Stitch.None - } - case _ => - Stitch.None - } - - Stitch - .join(unmentionInfoStitch, vibeStitch) - .liftToOption() - .flatMap { prefetchFields => - val r = PrefetchedDataRequest( - tweet = result.tweet.get, - sourceTweet = result.sourceTweet, - quotedTweet = result.quotedTweet, - safetyLevel = safetyLevel, - unmentionInfo = prefetchFields.flatMap(params => params._1), - vibe = prefetchFields.flatMap(params => params._2), - requestContext = getWeaverbirdCtx() - ) - - prefetchedDataRepository(r) - .liftToOption() - .map((prefetchedData: Option[PrefetchedDataResponse]) => { - gql.CreateTweetResponseWithSubqueryPrefetchItems( - data = Some(gql.CreateTweetResponse(result.tweet.map(_.id))), - subqueryPrefetchItems = prefetchedData.map(_.value) - ) - }) - } - - case errState => - throw toCreateTweetErr(errState, result.bounce, result.failureReason) - } - } - } - - private[this] def parseTweetCreateGeo(gqlGeo: gql.TweetGeo): Option[thrift.TweetCreateGeo] = { - val coordinates: Option[thrift.GeoCoordinates] = - gqlGeo.coordinates.map { coords => - LatitudeLongitude.of(coords.latitude, coords.longitude) match { - case Return(latlon: LatitudeLongitude) => - thrift.GeoCoordinates( - latitude = latlon.latitudeDegrees, - longitude = latlon.longitudeDegrees, - geoPrecision = latlon.precision, - display = coords.displayCoordinates - ) - case Throw(_) => - throw InvalidCoordinatesErr - } - } - - val geoSearchRequestId = gqlGeo.geoSearchRequestId.map { id => - if (id.isEmpty) { - throw InvalidGeoSearchRequestIdErr - } - thrift.TweetGeoSearchRequestID(id) - } - - if (coordinates.isEmpty && gqlGeo.placeId.isEmpty) { - None - } else { - Some( - thrift.TweetCreateGeo( - coordinates = coordinates, - placeId = gqlGeo.placeId, - geoSearchRequestId = geoSearchRequestId - )) - } - } - - private[this] def parseTweetCreateConversationControl( - gqlCC: gql.TweetConversationControl - ): thrift.TweetCreateConversationControl = - gqlCC.mode match { - case gql.ConversationControlMode.ByInvitation => - ConversationControls.Create.byInvitation() - case gql.ConversationControlMode.Community => - ConversationControls.Create.community() - case gql.ConversationControlMode.EnumUnknownConversationControlMode(_) => - throw ConversationControlNotSupportedErr - } - - private[this] def parseTweetEntityAnnotation( - gqlTweetAnnotation: gql.TweetAnnotation - ): TweetEntityAnnotation = - TweetEntityAnnotation( - gqlTweetAnnotation.groupId, - gqlTweetAnnotation.domainId, - gqlTweetAnnotation.entityId - ) - - private[this] def parseCommunityIds( - escherbirdAnnotations: Option[thrift.EscherbirdEntityAnnotations] - ): Seq[Long] = - escherbirdAnnotations - .map(_.entityAnnotations).getOrElse(Nil) - .flatMap { - case CommunityAnnotation(id) => Seq(id) - case _ => Nil - } - - private[this] def parseBatchMode( - gqlBatchComposeMode: gql.BatchComposeMode - ): thrift.BatchComposeMode = { - - gqlBatchComposeMode match { - case gql.BatchComposeMode.BatchFirst => - thrift.BatchComposeMode.BatchFirst - case gql.BatchComposeMode.BatchSubsequent => - thrift.BatchComposeMode.BatchSubsequent - case gql.BatchComposeMode.EnumUnknownBatchComposeMode(_) => - throw InvalidBatchModeParameterErr - } - } - - private[this] def parseTransientContext( - gqlBatchComposeMode: Option[gql.BatchComposeMode], - gqlPeriscope: Option[gql.TweetPeriscopeContext], - twitterUserId: UserId, - ): Option[TransientCreateContext] = { - val batchComposeMode = gqlBatchComposeMode.map(parseBatchMode) - - // Per c.t.fanoutservice.model.Tweet#deviceFollowType, isLive=None and Some(false) are - // equivalent and the creatorId is discarded in both cases. - val periscopeIsLive = gqlPeriscope.map(_.isLive).filter(_ == true) - val periscopeCreatorId = if (periscopeIsLive.isDefined) Some(twitterUserId) else None - - if (batchComposeMode.isDefined || periscopeIsLive.isDefined) { - Some( - thrift.TransientCreateContext( - batchCompose = batchComposeMode, - periscopeIsLive = periscopeIsLive, - periscopeCreatorId = periscopeCreatorId - ) - ) - } else { - None - } - } - - private[this] def parseTrustedFriendsControlOptions( - gqlTrustedFriendsControlOptions: gql.TrustedFriendsControlOptions - ): thrift.TrustedFriendsControlOptions = { - thrift.TrustedFriendsControlOptions( - trustedFriendsListId = gqlTrustedFriendsControlOptions.trustedFriendsListId - ) - } - - private[this] def parseCollabControlOptions( - gqlCollabControlOptions: gql.CollabControlOptions - ): thrift.CollabControlOptions = { - gqlCollabControlOptions.collabControlType match { - case gql.CollabControlType.CollabInvitation => - thrift.CollabControlOptions.CollabInvitation( - thrift.CollabInvitationOptions( - collaboratorUserIds = gqlCollabControlOptions.collaboratorUserIds - ) - ) - case gql.CollabControlType.EnumUnknownCollabControlType(_) => - throw CollabTweetInvalidParamsErr - } - } -} - -object CreateTweetColumn { - val Path = "tweetypie/createTweet.Tweet" - - def toCreateTweetErr( - errState: thrift.TweetCreateState, - bounce: Option[BouncerBounce], - failureReason: Option[String] - ): Err = errState match { - case TextCannotBeBlank => - TweetCannotBeBlankErr - case TextTooLong => - TweetTextTooLongErr - case Duplicate => - DuplicateStatusErr - case MalwareUrl => - MalwareTweetErr - case UserDeactivated | UserSuspended => - // should not occur since this condition is caught by access policy filters - CurrentUserSuspendedErr - case RateLimitExceeded => - RateLimitExceededErr - case UrlSpam => - TweetUrlSpamErr - case Spam | UserReadonly => - TweetSpammerErr - case SpamCaptcha => - CaptchaChallengeErr - case SafetyRateLimitExceeded => - SafetyRateLimitExceededErr - case Bounce if bounce.isDefined => - accessDeniedByBouncerErr(bounce.get) - case MentionLimitExceeded => - MentionLimitExceededErr - case UrlLimitExceeded => - UrlLimitExceededErr - case HashtagLimitExceeded => - HashtagLimitExceededErr - case CashtagLimitExceeded => - CashtagLimitExceededErr - case HashtagLengthLimitExceeded => - HashtagLengthLimitExceededErr - case TooManyAttachmentTypes => - TooManyAttachmentTypesErr - case InvalidUrl => - InvalidUrlErr - case DisabledByIpiPolicy => - failureReason - .map(tweetEngagementLimitedErr) - .getOrElse(GenericTweetCreateErr) - case InvalidAdditionalField => - failureReason - .map(invalidAdditionalFieldWithReasonErr) - .getOrElse(InvalidAdditionalFieldErr) - // InvalidImage has been deprecated by tweetypie. Use InvalidMedia instead. - case InvalidMedia | InvalidImage | MediaNotFound => - invalidMediaErr(failureReason) - case InReplyToTweetNotFound => - InReplyToTweetNotFoundErr - case InvalidAttachmentUrl => - InvalidAttachmentUrlErr - case ConversationControlNotAllowed => - ConversationControlNotAuthorizedErr - case InvalidConversationControl => - ConversationControlInvalidErr - case ReplyTweetNotAllowed => - ConversationControlReplyRestricted - case ExclusiveTweetEngagementNotAllowed => - ExclusiveTweetEngagementNotAllowedErr - case CommunityReplyTweetNotAllowed => - CommunityReplyTweetNotAllowedErr - case CommunityUserNotAuthorized => - CommunityUserNotAuthorizedErr - case CommunityNotFound => - CommunityNotFoundErr - case SuperFollowsInvalidParams => - SuperFollowInvalidParamsErr - case SuperFollowsCreateNotAuthorized => - SuperFollowCreateNotAuthorizedErr - case CommunityProtectedUserCannotTweet => - CommunityProtectedUserCannotTweetErr - case TrustedFriendsInvalidParams => - TrustedFriendsInvalidParamsErr - case TrustedFriendsEngagementNotAllowed => - TrustedFriendsEngagementNotAllowedErr - case TrustedFriendsCreateNotAllowed => - TrustedFriendsCreateNotAllowedErr - case TrustedFriendsQuoteTweetNotAllowed => - TrustedFriendsQuoteTweetNotAllowedErr - case CollabTweetInvalidParams => - CollabTweetInvalidParamsErr - case StaleTweetEngagementNotAllowed => - StaleTweetEngagementNotAllowedErr - case StaleTweetQuoteTweetNotAllowed => - StaleTweetQuoteTweetNotAllowedErr - case FieldEditNotAllowed => - FieldEditNotAllowedErr - case NotEligibleForEdit => - NotEligibleForEditErr - case _ => - GenericTweetCreateErr - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/DeleteTweetColumn.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/DeleteTweetColumn.docx new file mode 100644 index 000000000..684f55bc6 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/DeleteTweetColumn.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/DeleteTweetColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/DeleteTweetColumn.scala deleted file mode 100644 index 48828d7da..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/DeleteTweetColumn.scala +++ /dev/null @@ -1,81 +0,0 @@ -package com.twitter.tweetypie.federated.columns - -import com.twitter.stitch.Stitch -import com.twitter.strato.catalog.OpMetadata -import com.twitter.strato.config.ContactInfo -import com.twitter.strato.config.Policy -import com.twitter.strato.data.Conv -import com.twitter.strato.data.Description.PlainText -import com.twitter.strato.data.Lifecycle.Production -import com.twitter.strato.fed.StratoFed -import com.twitter.strato.opcontext.OpContext -import com.twitter.strato.thrift.ScroogeConv -import com.twitter.tweetypie.federated.context.GetRequestContext -import com.twitter.tweetypie.federated.prefetcheddata.PrefetchedDataResponse -import com.twitter.tweetypie.thriftscala.TweetDeleteState -import com.twitter.tweetypie.thriftscala.{graphql => gql} -import com.twitter.tweetypie.{thriftscala => thrift} -import com.twitter.util.Future - -class DeleteTweetColumn( - deleteTweet: thrift.DeleteTweetsRequest => Future[Seq[thrift.DeleteTweetResult]], - getRequestContext: GetRequestContext, -) extends StratoFed.Column(DeleteTweetColumn.Path) - with StratoFed.Execute.StitchWithContext - with StratoFed.HandleDarkRequests { - - override val policy: Policy = AccessPolicy.TweetMutationCommonAccessPolicies - - override val isIdempotent: Boolean = true - - override type Arg = gql.DeleteTweetRequest - override type Result = gql.DeleteTweetResponseWithSubqueryPrefetchItems - - override val argConv: Conv[Arg] = ScroogeConv.fromStruct - override val resultConv: Conv[Result] = ScroogeConv.fromStruct - - override val contactInfo: ContactInfo = TweetypieContactInfo - override val metadata: OpMetadata = - OpMetadata(Some(Production), Some(PlainText("Deletes a tweet by the calling Twitter user."))) - - override def execute(request: Arg, opContext: OpContext): Stitch[Result] = { - val ctx = getRequestContext(opContext) - - val thriftDeleteTweetRequest = thrift.DeleteTweetsRequest( - tweetIds = Seq(request.tweetId), - // byUserId is picked up by the context in tweetypie.deleteTweet, - // but we're passing it in here to be explicit - byUserId = Some(ctx.twitterUserId), - ) - - val stitchDeleteTweet = handleDarkRequest(opContext)( - light = { - Stitch.callFuture(deleteTweet(thriftDeleteTweetRequest)) - }, - // For dark requests, we don't want to send traffic to tweetypie. - // Since the response is the same regardless of the request, we take a no-op - // action instead. - dark = Stitch.value(Seq(thrift.DeleteTweetResult(request.tweetId, TweetDeleteState.Ok))) - ) - - stitchDeleteTweet.map { result: Seq[thrift.DeleteTweetResult] => - result.headOption match { - case Some(thrift.DeleteTweetResult(id, TweetDeleteState.Ok)) => - gql.DeleteTweetResponseWithSubqueryPrefetchItems( - data = Some(gql.DeleteTweetResponse(Some(id))), - // Prefetch data is always NotFound to prevent subqueries from hydrating via weaverbird - // and possibly returning inconsistent results, i.e. a Found tweet. - subqueryPrefetchItems = Some(PrefetchedDataResponse.notFound(id).value) - ) - case Some(thrift.DeleteTweetResult(_, TweetDeleteState.PermissionError)) => - throw ApiErrors.DeletePermissionErr - case _ => - throw ApiErrors.GenericAccessDeniedErr - } - } - } -} - -object DeleteTweetColumn { - val Path = "tweetypie/deleteTweet.Tweet" -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldColumn.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldColumn.docx new file mode 100644 index 000000000..b250529b1 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldColumn.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldColumn.scala deleted file mode 100644 index c6b3cf246..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldColumn.scala +++ /dev/null @@ -1,141 +0,0 @@ -package com.twitter.tweetypie -package federated.columns - -import com.twitter.io.Buf -import com.twitter.scrooge.TFieldBlob -import com.twitter.stitch.Stitch -import com.twitter.strato.access.Access -import com.twitter.strato.catalog.OpMetadata -import com.twitter.strato.config.AllowAll -import com.twitter.strato.config.ContactInfo -import com.twitter.strato.config.Policy -import com.twitter.strato.data.Conv -import com.twitter.strato.data.Description.PlainText -import com.twitter.strato.data.Lifecycle.Production -import com.twitter.strato.data.Type -import com.twitter.strato.data.Val -import com.twitter.strato.fed.StratoFed -import com.twitter.strato.opcontext.OpContext -import com.twitter.strato.serialization.MVal -import com.twitter.strato.serialization.Thrift -import com.twitter.strato.util.Strings -import com.twitter.tweetypie.thriftscala.GetTweetFieldsResult -import com.twitter.tweetypie.thriftscala.SetAdditionalFieldsRequest -import com.twitter.tweetypie.thriftscala.Tweet -import com.twitter.tweetypie.thriftscala.TweetFieldsResultState.Found -import com.twitter.util.Future -import org.apache.thrift.protocol.TField - -/** - * Federated strato column to return tweet fields - * @param federatedFieldsGroup Group to be used for Stitch batching. - * This is a function that takes a GroupOptions and returns a FederatedFieldGroup. - * Using a function that accepts a GroupOptions allows for Stitch to handle a new group for distinct GroupOptions. - * @param setAdditionalFields Handler to set additional fields on tweets. - * @param stratoValueType Type to be returned by the strato column. - * @param tfield Tweet thrift field to be stored - * @param pathName Path to be used in the strato catalog - */ -class FederatedFieldColumn( - federatedFieldsGroup: FederatedFieldGroupBuilder.Type, - setAdditionalFields: SetAdditionalFieldsRequest => Future[Unit], - stratoValueType: Type, - tfield: TField, - pathOverride: Option[String] = None) - extends StratoFed.Column(pathOverride.getOrElse(FederatedFieldColumn.makeColumnPath(tfield))) - with StratoFed.Fetch.StitchWithContext - with StratoFed.Put.Stitch { - - type Key = Long - type View = Unit - type Value = Val.T - - override val keyConv: Conv[Key] = Conv.ofType - override val viewConv: Conv[View] = Conv.ofType - override val valueConv: Conv[Value] = Conv(stratoValueType, identity, identity) - - override val policy: Policy = AllowAll - - /* - * A fetch that proxies GetTweetFieldsColumn.fetch but only requests and - * returns one specific field. - */ - override def fetch(tweetId: Key, view: View, opContext: OpContext): Stitch[Result[Value]] = { - - val twitterUserId: Option[UserId] = Access.getTwitterUserId match { - // Access.getTwitterUserId should return a value when request is made on behalf of a user - // and will not return a value otherwise - case Some(twitterUser) => Some(twitterUser.id) - case None => None - } - - val stitchGroup = federatedFieldsGroup(GroupOptions(twitterUserId)) - - Stitch - .call(FederatedFieldReq(tweetId, tfield.id), stitchGroup).map { - result: GetTweetFieldsResult => - result.tweetResult match { - case Found(f) => - f.tweet.getFieldBlob(tfield.id) match { - case Some(v: TFieldBlob) => - found(blobToVal(v)) - case None => missing - } - case _ => missing - } - } - - } - - /* - * A strato put interface for writing a single additional field to a tweet - */ - override def put(tweetId: Key, value: Val.T): Stitch[Unit] = { - val tweet: Tweet = Tweet(id = tweetId).setField(valToBlob(value)) - val request: SetAdditionalFieldsRequest = SetAdditionalFieldsRequest(tweet) - Stitch.callFuture(setAdditionalFields(request)) - } - - val mval: Thrift.Codec = MVal.codec(stratoValueType).thrift(4) - - def valToBlob(value: Val.T): TFieldBlob = - TFieldBlob(tfield, mval.write[Buf](value, Thrift.compactProto)) - - def blobToVal(thriftFieldBlob: TFieldBlob): Val.T = - mval.read(thriftFieldBlob.content, Thrift.compactProto) - - override val contactInfo: ContactInfo = TweetypieContactInfo - override val metadata: OpMetadata = OpMetadata( - lifecycle = Some(Production), - description = Some(PlainText(s"A federated column for the field tweet.$stratoValueType")) - ) -} - -object FederatedFieldColumn { - val idAllowlist: Seq[Short] = Seq( - Tweet.CoreDataField.id, - Tweet.LanguageField.id, - Tweet.ConversationMutedField.id - ) - val ID_START = 157 - val ID_END = 32000 - - private val MigrationFields: Seq[Short] = Seq(157) - - def isFederatedField(id: Short) = id >= ID_START && id < ID_END || idAllowlist.contains(id) - - def isMigrationFederatedField(tField: TField): Boolean = MigrationFields.contains(tField.id) - - /* federated field column strato configs must conform to this - * path name scheme for tweetypie to pick them up - */ - def makeColumnPath(tField: TField) = { - val columnName = Strings.toCamelCase(tField.name.stripSuffix("id")) - s"tweetypie/fields/${columnName}.Tweet" - } - - def makeV1ColumnPath(tField: TField): String = { - val columnName = Strings.toCamelCase(tField.name.stripSuffix("id")) - s"tweetypie/fields/$columnName-V1.Tweet" - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldGroup.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldGroup.docx new file mode 100644 index 000000000..40e363737 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldGroup.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldGroup.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldGroup.scala deleted file mode 100644 index 88b9db624..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldGroup.scala +++ /dev/null @@ -1,88 +0,0 @@ -package com.twitter.tweetypie.federated.columns - -import com.twitter.spam.rtf.thriftscala.SafetyLevel -import com.twitter.stitch.MapGroup -import com.twitter.tweetypie.UserId -import com.twitter.tweetypie.federated.columns.FederatedFieldGroupBuilder.allCountFields -import com.twitter.tweetypie.federated.columns.FederatedFieldGroupBuilder.countTweetFields -import com.twitter.tweetypie.thriftscala.GetTweetFieldsOptions -import com.twitter.tweetypie.thriftscala.GetTweetFieldsRequest -import com.twitter.tweetypie.thriftscala.GetTweetFieldsResult -import com.twitter.tweetypie.thriftscala.StatusCounts -import com.twitter.tweetypie.thriftscala.Tweet -import com.twitter.tweetypie.thriftscala.TweetInclude -import com.twitter.util.Future -import com.twitter.util.Throw -import com.twitter.util.Try - -case class GroupOptions(twitterUserId: Option[UserId]) - -object FederatedFieldGroupBuilder { - type Type = GroupOptions => MapGroup[FederatedFieldReq, GetTweetFieldsResult] - - def apply( - getTweetFieldsHandler: GetTweetFieldsRequest => Future[Seq[GetTweetFieldsResult]] - ): Type = { - FederatedFieldGroup(getTweetFieldsHandler, _) - } - - // The set of non-deprecated count field includes - val allCountFields: Set[TweetInclude] = Set( - TweetInclude.CountsFieldId(StatusCounts.RetweetCountField.id), - TweetInclude.CountsFieldId(StatusCounts.QuoteCountField.id), - TweetInclude.CountsFieldId(StatusCounts.FavoriteCountField.id), - TweetInclude.CountsFieldId(StatusCounts.ReplyCountField.id), - TweetInclude.CountsFieldId(StatusCounts.BookmarkCountField.id), - ) - - // Tweet field includes which contain counts. These are the only fields where count field includes are relevant. - val countTweetFields: Set[TweetInclude] = Set( - TweetInclude.TweetFieldId(Tweet.CountsField.id), - TweetInclude.TweetFieldId(Tweet.PreviousCountsField.id)) -} - -case class FederatedFieldGroup( - getTweetFieldsHandler: GetTweetFieldsRequest => Future[Seq[GetTweetFieldsResult]], - options: GroupOptions) - extends MapGroup[FederatedFieldReq, GetTweetFieldsResult] { - override protected def run( - reqs: Seq[FederatedFieldReq] - ): Future[FederatedFieldReq => Try[GetTweetFieldsResult]] = { - - // requesting the field ids of the requested additional field ids in this group - val fieldIncludes: Set[TweetInclude] = reqs.map { req: FederatedFieldReq => - TweetInclude.TweetFieldId(req.fieldId) - }.toSet - - val allIncludes: Set[TweetInclude] = if (fieldIncludes.intersect(countTweetFields).nonEmpty) { - // if counts are being requested we include all count fields by default - // because there is no way to specify them individually with federated fields, - fieldIncludes ++ allCountFields - } else { - fieldIncludes - } - - val gtfOptions = GetTweetFieldsOptions( - tweetIncludes = allIncludes, - forUserId = options.twitterUserId, - // visibility filtering happens at the api layer / tweet top level - // and therefore is not required at individual field level - safetyLevel = Some(SafetyLevel.FilterNone) - ) - getTweetFieldsHandler( - GetTweetFieldsRequest( - tweetIds = reqs.map(_.tweetId).distinct, - options = gtfOptions - ) - ).map { - response => - { req => - response.find(_.tweetId == req.tweetId) match { - case Some(result) => Try(result) - case None => - Throw(new NoSuchElementException(s"response not found for tweet: ${req.tweetId}")) - } - } - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldReq.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldReq.docx new file mode 100644 index 000000000..0c1b58135 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldReq.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldReq.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldReq.scala deleted file mode 100644 index 594f46273..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldReq.scala +++ /dev/null @@ -1,7 +0,0 @@ -package com.twitter.tweetypie.federated.columns - -import com.twitter.tweetypie.TweetId - -// Case class to be used for grouping Stitch requests -// for Federated Fields -case class FederatedFieldReq(tweetId: TweetId, fieldId: Short) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsByUserColumn.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsByUserColumn.docx new file mode 100644 index 000000000..a2cb64554 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsByUserColumn.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsByUserColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsByUserColumn.scala deleted file mode 100644 index f4aaa6e12..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsByUserColumn.scala +++ /dev/null @@ -1,83 +0,0 @@ -package com.twitter.tweetypie.federated.columns - -import com.twitter.stitch.Stitch -import com.twitter.strato.access.Access.LdapGroup -import com.twitter.strato.catalog.Fetch -import com.twitter.strato.catalog.OpMetadata -import com.twitter.strato.config.AnyOf -import com.twitter.strato.config.ContactInfo -import com.twitter.strato.config.FromColumns -import com.twitter.strato.config.Has -import com.twitter.strato.config.Path -import com.twitter.strato.config.Policy -import com.twitter.strato.data.Conv -import com.twitter.strato.data.Description.PlainText -import com.twitter.strato.data.Lifecycle.Production -import com.twitter.strato.fed.StratoFed -import com.twitter.strato.response.Err -import com.twitter.strato.thrift.ScroogeConv -import com.twitter.tweetypie.UserId -import com.twitter.tweetypie.thriftscala.federated.GetStoredTweetsByUserView -import com.twitter.tweetypie.thriftscala.federated.GetStoredTweetsByUserResponse -import com.twitter.tweetypie.{thriftscala => thrift} -import com.twitter.util.Future - -class GetStoredTweetsByUserColumn( - handler: thrift.GetStoredTweetsByUserRequest => Future[thrift.GetStoredTweetsByUserResult]) - extends StratoFed.Column(GetStoredTweetsByUserColumn.Path) - with StratoFed.Fetch.Stitch { - - override val contactInfo: ContactInfo = TweetypieContactInfo - override val metadata: OpMetadata = OpMetadata( - lifecycle = Some(Production), - description = - Some(PlainText("Fetches hydrated Tweets for a particular User regardless of Tweet state.")) - ) - override val policy: Policy = AnyOf( - Seq( - FromColumns(Set(Path("tweetypie/data-provider/storedTweets.User"))), - Has(LdapGroup("tweetypie-team")) - )) - - override type Key = UserId - override type View = GetStoredTweetsByUserView - override type Value = GetStoredTweetsByUserResponse - - override val keyConv: Conv[Key] = Conv.ofType - override val viewConv: Conv[View] = ScroogeConv.fromStruct[GetStoredTweetsByUserView] - override val valueConv: Conv[Value] = ScroogeConv.fromStruct[GetStoredTweetsByUserResponse] - - override def fetch(key: Key, view: View): Stitch[Result[Value]] = { - val request = thrift.GetStoredTweetsByUserRequest( - userId = key, - options = Some( - thrift.GetStoredTweetsByUserOptions( - bypassVisibilityFiltering = view.bypassVisibilityFiltering, - setForUserId = view.setForUserId, - startTimeMsec = view.startTimeMsec, - endTimeMsec = view.endTimeMsec, - cursor = view.cursor, - startFromOldest = view.startFromOldest, - additionalFieldIds = view.additionalFieldIds - )) - ) - - Stitch - .callFuture(handler(request)) - .map { result => - Fetch.Result.found( - GetStoredTweetsByUserResponse( - storedTweets = result.storedTweets, - cursor = result.cursor - )) - } - .rescue { - case _ => Stitch.exception(Err(Err.Internal)) - } - } - -} - -object GetStoredTweetsByUserColumn { - val Path = "tweetypie/internal/getStoredTweets.User" -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsColumn.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsColumn.docx new file mode 100644 index 000000000..4132cb52d Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsColumn.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsColumn.scala deleted file mode 100644 index 20afd87e1..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsColumn.scala +++ /dev/null @@ -1,99 +0,0 @@ -package com.twitter.tweetypie.federated.columns - -import com.twitter.stitch.MapGroup -import com.twitter.stitch.Stitch -import com.twitter.strato.access.Access.LdapGroup -import com.twitter.strato.catalog.Fetch -import com.twitter.strato.catalog.OpMetadata -import com.twitter.strato.config.AnyOf -import com.twitter.strato.config.ContactInfo -import com.twitter.strato.config.FromColumns -import com.twitter.strato.config.Has -import com.twitter.strato.config.Path -import com.twitter.strato.config.Policy -import com.twitter.strato.data.Conv -import com.twitter.strato.data.Description.PlainText -import com.twitter.strato.data.Lifecycle.Production -import com.twitter.strato.fed.StratoFed -import com.twitter.strato.response.Err -import com.twitter.strato.thrift.ScroogeConv -import com.twitter.tweetypie.{thriftscala => thrift} -import com.twitter.tweetypie.TweetId -import com.twitter.tweetypie.thriftscala.federated.GetStoredTweetsView -import com.twitter.tweetypie.thriftscala.federated.GetStoredTweetsResponse -import com.twitter.util.Future -import com.twitter.util.Return -import com.twitter.util.Throw -import com.twitter.util.Try - -class GetStoredTweetsColumn( - getStoredTweets: thrift.GetStoredTweetsRequest => Future[Seq[thrift.GetStoredTweetsResult]]) - extends StratoFed.Column(GetStoredTweetsColumn.Path) - with StratoFed.Fetch.Stitch { - - override val contactInfo: ContactInfo = TweetypieContactInfo - override val metadata: OpMetadata = OpMetadata( - lifecycle = Some(Production), - description = Some(PlainText("Fetches hydrated Tweets regardless of Tweet state.")) - ) - override val policy: Policy = AnyOf( - Seq( - FromColumns( - Set( - Path("tweetypie/data-provider/storedTweets.User"), - Path("note_tweet/data-provider/noteTweetForZipbird.User"))), - Has(LdapGroup("tweetypie-team")) - )) - - override type Key = TweetId - override type View = GetStoredTweetsView - override type Value = GetStoredTweetsResponse - - override val keyConv: Conv[Key] = Conv.ofType - override val viewConv: Conv[View] = ScroogeConv.fromStruct[GetStoredTweetsView] - override val valueConv: Conv[Value] = ScroogeConv.fromStruct[GetStoredTweetsResponse] - - override def fetch(key: Key, view: View): Stitch[Result[Value]] = { - Stitch.call(key, Group(view)) - } - - private case class Group(view: GetStoredTweetsView) - extends MapGroup[TweetId, Fetch.Result[GetStoredTweetsResponse]] { - override protected def run( - keys: Seq[TweetId] - ): Future[TweetId => Try[Result[GetStoredTweetsResponse]]] = { - val options = thrift.GetStoredTweetsOptions( - bypassVisibilityFiltering = view.bypassVisibilityFiltering, - forUserId = view.forUserId, - additionalFieldIds = view.additionalFieldIds - ) - - getStoredTweets(thrift.GetStoredTweetsRequest(keys, Some(options))) - .map(transformAndGroupByTweetId) - .handle { - case _ => - _ => Throw[Result[GetStoredTweetsResponse]](Err(Err.Internal)) - } - } - - private def transformAndGroupByTweetId( - results: Seq[thrift.GetStoredTweetsResult] - ): Map[TweetId, Try[Fetch.Result[GetStoredTweetsResponse]]] = { - results - .map(result => GetStoredTweetsResponse(result.storedTweet)) - .groupBy(_.storedTweet.tweetId) - .map { - case (tweetId, Seq(result)) => (tweetId, Return(Fetch.Result.found(result))) - case (tweetId, multipleResults) => - ( - tweetId, - Throw(Err(Err.BadRequest, s"Got ${multipleResults.size} results for $tweetId"))) - } - } - - } -} - -object GetStoredTweetsColumn { - val Path = "tweetypie/internal/getStoredTweets.Tweet" -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetTweetFieldsColumn.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetTweetFieldsColumn.docx new file mode 100644 index 000000000..a8b212908 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetTweetFieldsColumn.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetTweetFieldsColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetTweetFieldsColumn.scala deleted file mode 100644 index 2daa9bdb4..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetTweetFieldsColumn.scala +++ /dev/null @@ -1,172 +0,0 @@ -package com.twitter.tweetypie.federated.columns - -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.stitch.MapGroup -import com.twitter.stitch.Stitch -import com.twitter.strato.catalog.Fetch -import com.twitter.strato.catalog.OpMetadata -import com.twitter.strato.config.AllowAll -import com.twitter.strato.config.ContactInfo -import com.twitter.strato.config.Policy -import com.twitter.strato.data.Conv -import com.twitter.strato.data.Description.PlainText -import com.twitter.strato.data.Lifecycle.Production -import com.twitter.strato.fed.StratoFed -import com.twitter.strato.opcontext.OpContext -import com.twitter.strato.response.Err -import com.twitter.strato.thrift.ScroogeConv -import com.twitter.tweetypie.TweetId -import com.twitter.tweetypie.client_id.PreferForwardedServiceIdentifierForStrato -import com.twitter.tweetypie.thriftscala.GetTweetFieldsOptions -import com.twitter.tweetypie.thriftscala.GetTweetFieldsRequest -import com.twitter.tweetypie.thriftscala.GetTweetFieldsResult -import com.twitter.tweetypie.thriftscala.TweetVisibilityPolicy -import com.twitter.util.Future -import com.twitter.util.Try - -/** - * Strato federated column implementing GetTweetFields as a Fetch. - */ -class GetTweetFieldsColumn( - handler: GetTweetFieldsRequest => Future[Seq[GetTweetFieldsResult]], - stats: StatsReceiver) - extends StratoFed.Column(GetTweetFieldsColumn.Path) - with StratoFed.Fetch.StitchWithContext { - - /** - * At this point, this fetch op will reject any requests that specify - * visibilityPolicy other than USER_VISIBLE, so no access control is needed. - */ - override val policy: Policy = AllowAll - - override type Key = TweetId - override type View = GetTweetFieldsOptions - override type Value = GetTweetFieldsResult - - override val keyConv: Conv[Key] = Conv.ofType - override val viewConv: Conv[View] = ScroogeConv.fromStruct[GetTweetFieldsOptions] - override val valueConv: Conv[Value] = ScroogeConv.fromStruct[GetTweetFieldsResult] - - override val contactInfo: ContactInfo = TweetypieContactInfo - override val metadata: OpMetadata = OpMetadata( - lifecycle = Some(Production), - description = - Some(PlainText("Get of tweets that allows fetching only specific subsets of the data.")), - ) - - val safetyOpContextOnlyCounter = stats.counter("safety_op_context_only") - val safetyOpContextOnlyValueScope = stats.scope("safety_op_context_only_value") - val safetyOpContextOnlyCallerScope = stats.scope("safety_op_context_only_caller") - - val safetyViewOnlyCounter = stats.counter("safety_view_only") - val safetyViewOnlyValueScope = stats.scope("safety_view_only_value") - val safetyViewOnlyCallerScope = stats.scope("safety_view_only_caller") - - val safetyLevelInconsistencyCounter = stats.counter("safety_level_inconsistency") - val safetyLevelInconsistencyValueScope = stats.scope("safety_level_inconsistency_value") - val safetyLevelInconsistencyCallerScope = stats.scope("safety_level_inconsistency_caller") - - override def fetch(key: Key, view: View, ctx: OpContext): Stitch[Result[Value]] = { - compareSafetyLevel(view, ctx) - checkVisibilityPolicyUserVisible(view).flatMap { _ => - Stitch.call(key, Group(view)) - } - } - - /** - * Only allow [[TweetVisibilityPolicy.UserVisible]] visibilityPolicy. - * - * This column requires access policy in order to serve requests with visibilityPolicy - * other than [[TweetVisibilityPolicy.UserVisible]]. Before we support access control, - * reject all requests that are not safe. - */ - private def checkVisibilityPolicyUserVisible(view: View): Stitch[Unit] = - view.visibilityPolicy match { - case TweetVisibilityPolicy.UserVisible => Stitch.value(Unit) - case otherValue => - Stitch.exception( - Err( - Err.BadRequest, - "GetTweetFields does not support access control on Strato yet. " - + s"Hence visibilityPolicy can only take the default ${TweetVisibilityPolicy.UserVisible} value, " - + s"got: ${otherValue}." - )) - } - - /** Compare the SafetyLevels in the View and OpContext */ - private def compareSafetyLevel(view: View, ctx: OpContext): Unit = - (view.safetyLevel, ctx.safetyLevel) match { - case (None, None) => - case (Some(viewSafety), None) => { - safetyViewOnlyCounter.incr() - safetyViewOnlyValueScope.counter(viewSafety.name).incr() - PreferForwardedServiceIdentifierForStrato.serviceIdentifier - .foreach(serviceId => safetyViewOnlyCallerScope.counter(serviceId.toString).incr()) - } - case (None, Some(ctxSafety)) => { - safetyOpContextOnlyCounter.incr() - safetyOpContextOnlyValueScope.counter(ctxSafety.name).incr() - PreferForwardedServiceIdentifierForStrato.serviceIdentifier - .foreach(serviceId => safetyOpContextOnlyCallerScope.counter(serviceId.toString).incr()) - } - case (Some(viewSafety), Some(ctxSafety)) => - def safeStringEquals(a: String, b: String) = - a.toLowerCase().trim().equals(b.toLowerCase().trim()) - if (!safeStringEquals(viewSafety.name, ctxSafety.name)) { - safetyLevelInconsistencyCounter.incr() - safetyLevelInconsistencyValueScope.counter(viewSafety.name + '-' + ctxSafety.name).incr() - PreferForwardedServiceIdentifierForStrato.serviceIdentifier - .foreach(serviceId => - safetyLevelInconsistencyCallerScope.counter(serviceId.toString).incr()) - } - } - - /** - * Means of batching of [[GetTweetFieldsColumn]] calls. - * - * Only calls issued against the same instance of [[GetTweetFieldsColumn]] - * are batched as Stitch clusters group objects based on equality, - * and nested case class implicitly captures [[GetTweetFieldsColumn]] reference. - */ - private case class Group(view: GetTweetFieldsOptions) - extends MapGroup[TweetId, Fetch.Result[GetTweetFieldsResult]] { - - /** - * Batches given [[TweetId]] lookups in a single [[GetTweetFieldsRequest]] - * and returns a result mapped by [[TweetId]]. - */ - override protected def run( - keys: Seq[TweetId] - ): Future[TweetId => Try[Fetch.Result[GetTweetFieldsResult]]] = - handler( - GetTweetFieldsRequest( - // Sorting the keys makes for simpler matchers in the tests - // as matching on a Seq needs to be in order. - tweetIds = keys.sorted, - options = view, - )).map(groupByTweetId) - - /** - * Groups given [[GetTweetFieldsResult]] objects by [[TweetId]] and returns the mapping. - */ - private def groupByTweetId( - allResults: Seq[GetTweetFieldsResult] - ): TweetId => Try[Fetch.Result[GetTweetFieldsResult]] = { - allResults - .groupBy(_.tweetId) - .mapValues { - case Seq(result) => Try(Fetch.Result.found(result)) - case manyResults => - Try { - throw Err( - Err.Dependency, - s"Expected one result per tweeet ID, got ${manyResults.length}") - } - } - } - } -} - -object GetTweetFieldsColumn { - val Path = "tweetypie/getTweetFields.Tweet" -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/HydrationOptions.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/HydrationOptions.docx new file mode 100644 index 000000000..a0b3064c3 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/HydrationOptions.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/HydrationOptions.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/HydrationOptions.scala deleted file mode 100644 index d1e00821a..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/HydrationOptions.scala +++ /dev/null @@ -1,22 +0,0 @@ -package com.twitter.tweetypie.federated.columns - -import com.twitter.tweetypie.{thriftscala => thrift} - -object HydrationOptions { - - def writePathHydrationOptions( - cardsPlatformKey: Option[String] - ) = - thrift.WritePathHydrationOptions( - // The GraphQL API extracts or "lifts" the ApiTweet.card reference field from the - // ApiTweet.card.url returned by Tweetypie. Tweetypie's card hydration business logic - // selects the single correct Card URL by first making Expandodo.getCards2 requests for - // the Tweet's cardReference, or all of the Tweet's URL entities in cases where Tweet - // does not have a stored cardReference, and then selecting the last of the hydrated - // cards returned by Expandodo. - includeCards = true, - cardsPlatformKey = cardsPlatformKey, - // The GraphQL API only supports quoted tweet results formatted per go/simplequotedtweet. - simpleQuotedTweet = true, - ) -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TrackingId.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TrackingId.docx new file mode 100644 index 000000000..caac044d0 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TrackingId.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TrackingId.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TrackingId.scala deleted file mode 100644 index 0030bcd40..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TrackingId.scala +++ /dev/null @@ -1,29 +0,0 @@ -package com.twitter.tweetypie.federated -package columns - -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.tweetypie.StatsReceiver -import com.twitter.util.logging.Logger - -object TrackingId { - private[this] val log = Logger(getClass) - - def parse(s: String, statsReceiver: StatsReceiver = NullStatsReceiver): Option[Long] = { - val trackingStats = statsReceiver.scope("tracking_id_parser") - - val parsedCountCounter = trackingStats.scope("parsed").counter("count") - val parseFailedCounter = trackingStats.scope("parse_failed").counter("count") - Option(s).map(_.trim).filter(_.nonEmpty).flatMap { idStr => - try { - val id = java.lang.Long.parseLong(idStr, 16) - parsedCountCounter.incr() - Some(id) - } catch { - case _: NumberFormatException => - parseFailedCounter.incr() - log.warn(s"invalid tracking ID: '$s'") - None - } - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TweetypieContactInfo.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TweetypieContactInfo.docx new file mode 100644 index 000000000..183702473 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TweetypieContactInfo.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TweetypieContactInfo.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TweetypieContactInfo.scala deleted file mode 100644 index 74bd0569d..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TweetypieContactInfo.scala +++ /dev/null @@ -1,11 +0,0 @@ -package com.twitter.tweetypie.federated.columns - -import com.twitter.strato.config.ContactInfo - -object TweetypieContactInfo - extends ContactInfo( - contactEmail = "", - ldapGroup = "", - jiraProject = "", - slackRoomId = "" - ) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/UnretweetColumn.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/UnretweetColumn.docx new file mode 100644 index 000000000..7b6ccb685 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/UnretweetColumn.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/UnretweetColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/UnretweetColumn.scala deleted file mode 100644 index 489285986..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/UnretweetColumn.scala +++ /dev/null @@ -1,69 +0,0 @@ -package com.twitter.tweetypie -package federated.columns - -import com.twitter.stitch.Stitch -import com.twitter.strato.catalog.OpMetadata -import com.twitter.strato.config.ContactInfo -import com.twitter.strato.config.Policy -import com.twitter.strato.data.Conv -import com.twitter.strato.data.Description.PlainText -import com.twitter.strato.data.Lifecycle.Production -import com.twitter.strato.fed.StratoFed -import com.twitter.strato.opcontext.OpContext -import com.twitter.strato.thrift.ScroogeConv -import com.twitter.tweetypie.federated.context.GetRequestContext -import com.twitter.tweetypie.federated.context.RequestContext -import com.twitter.tweetypie.thriftscala.{graphql => gql} -import com.twitter.tweetypie.{thriftscala => thrift} - -class UnretweetColumn( - unretweet: thrift.UnretweetRequest => Future[thrift.UnretweetResult], - getRequestContext: GetRequestContext, -) extends StratoFed.Column("tweetypie/unretweet.Tweet") - with StratoFed.Execute.StitchWithContext - with StratoFed.HandleDarkRequests { - - override val policy: Policy = AccessPolicy.TweetMutationCommonAccessPolicies - - // It's acceptable to retry or reapply an unretweet operation, - // as multiple calls result in the same end state. - override val isIdempotent: Boolean = true - - override type Arg = gql.UnretweetRequest - override type Result = gql.UnretweetResponseWithSubqueryPrefetchItems - - override val argConv: Conv[Arg] = ScroogeConv.fromStruct - override val resultConv: Conv[Result] = ScroogeConv.fromStruct - - override val contactInfo: ContactInfo = TweetypieContactInfo - override val metadata: OpMetadata = - OpMetadata( - Some(Production), - Some(PlainText("Removes any retweets by the calling user of the given source tweet."))) - - override def execute(gqlRequest: Arg, opContext: OpContext): Stitch[Result] = { - val ctx: RequestContext = getRequestContext(opContext) - val req = thrift.UnretweetRequest( - ctx.twitterUserId, - gqlRequest.sourceTweetId, - ) - - val stitchUnretweet = handleDarkRequest(opContext)( - light = Stitch.callFuture(unretweet(req)), - // For dark requests, we don't want to send traffic to tweetypie. - // Since the response is the same regardless of the request, we take a no-op - // action instead. - dark = Stitch.value(thrift.UnretweetResult(state = thrift.TweetDeleteState.Ok)) - ) - - stitchUnretweet.map { _ => - gql.UnretweetResponseWithSubqueryPrefetchItems( - data = Some(gql.UnretweetResponse(Some(gqlRequest.sourceTweetId))) - ) - } - } -} - -object UnretweetColumn { - val Path = "tweetypie/unretweet.Tweet" -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/BUILD deleted file mode 100644 index 942c66697..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/BUILD +++ /dev/null @@ -1,27 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "finagle/finagle-core/src/main", - "finatra-internal/tfe", - "passbird/bitfields-thrift/src/main/thrift:thrift-scala", - "src/scala/com/twitter/common/ip_address_utils", - "src/thrift/com/twitter/context:feature-context-scala", - "src/thrift/com/twitter/context:twitter-context-scala", - "src/thrift/com/twitter/ip_address_utils:ip-address-utils-thrift-scala", - "src/thrift/com/twitter/spam/rtf:safety-level-scala", - "strato/src/main/scala/com/twitter/strato/access", - "strato/src/main/scala/com/twitter/strato/config", - "strato/src/main/scala/com/twitter/strato/context", - "strato/src/main/scala/com/twitter/strato/data", - "strato/src/main/scala/com/twitter/strato/opcontext", - "strato/src/main/scala/com/twitter/strato/response", - "strato/src/main/scala/com/twitter/strato/thrift", - "strato/src/main/thrift/com/twitter/strato/context:thrift-scala", - "tweetypie/server/src/main/scala/com/twitter/tweetypie", - "twitter-context/src/main/scala", - "weaverbird/common/src/main/scala/com/twitter/weaverbird/common", - ], -) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/BUILD.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/BUILD.docx new file mode 100644 index 000000000..d638418c6 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/BUILD.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/RequestContext.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/RequestContext.docx new file mode 100644 index 000000000..a2a2f1e1b Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/RequestContext.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/RequestContext.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/RequestContext.scala deleted file mode 100644 index 170ba3c5c..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/RequestContext.scala +++ /dev/null @@ -1,131 +0,0 @@ -package com.twitter.tweetypie -package federated.context - -import com.twitter.common.ip_address_utils.ClientIpAddressUtils -import com.twitter.context.thriftscala.Viewer -import com.twitter.context.TwitterContext -import com.twitter.finagle.core.util.InetAddressUtil -import com.twitter.passbird.bitfield.clientprivileges.thriftscala.{Constants => ClientAppPrivileges} -import com.twitter.finatra.tfe.HttpHeaderNames -import com.twitter.spam.rtf.thriftscala.SafetyLevel -import com.twitter.strato.access.Access.ClientApplicationPrivilege -import com.twitter.strato.access.Access -import com.twitter.strato.access.ClientApplicationPrivilegeVariant -import com.twitter.strato.context.StratoContext -import com.twitter.strato.opcontext.OpContext -import com.twitter.strato.response.Err -import com.twitter.weaverbird.common.GetPlatformKey - -/** - * [[RequestContext]] exists to avoid wiring the federated column - * implementations directly to the request data that is derived from the - * contextual environment. Columns should not directly reference - * TwitterContext, StratoContext, strato.access.Access, HTTP headers, etc. - * Each column operation operates on two input parameters: a request (i.e. - * a column operation's Arg) and a [[RequestContext]]. - */ -private[federated] case class RequestContext( - clientApplicationId: Option[AppId] = None, - deviceSource: Option[String] = None, - knownDeviceToken: Option[KnownDeviceToken] = None, - remoteHost: Option[String] = None, - twitterUserId: UserId, - contributorId: Option[UserId] = None, - isDarkRequest: Boolean = false, - hasPrivilegeNullcastingAccess: Boolean = false, - hasPrivilegePromotedTweetsInTimeline: Boolean = false, - sessionHash: Option[String] = None, - cardsPlatformKey: Option[String] = None, - safetyLevel: Option[SafetyLevel] = None, -) { - def isContributorRequest = contributorId.exists(_ != twitterUserId) -} - -/** - * Provides a single place to derive request data from the contextual - * environment. Defined as a sealed class (vs an object) to allow mocking - * in unit tests. - */ -private[federated] sealed class GetRequestContext() { - // Bring Tweetypie permitted TwitterContext into scope - private[this] val TwitterContext: TwitterContext = - com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) - - /** - * When TwitterUserIdNotDefined is thrown, it's likely that the column - * access control configuration lacks `AllowTwitterUserId` or other - * Policy that ensures the caller is authenticated. - */ - private[federated] val TwitterUserIdNotDefined = - Err(Err.Authentication, "User authentication is required for this operation.") - - private[this] val SessionHashHeaderName = "x-tfe-session-hash" - private[this] def hasClientApplicationPrivilege(id: Int): Boolean = - Access.getPrincipals.contains( - ClientApplicationPrivilege( - ClientApplicationPrivilegeVariant - .byId(id.toShort).get)) - - private[this] def getRequestHeader(headerName: String): Option[String] = - StratoContext - .current() - .propagatedHeaders - .flatMap(_.get(headerName)) - - def apply(opContext: OpContext): RequestContext = { - val twitterUserId = Access.getTwitterUserId match { - // Access.getTwitterUserId should return a value as long as the column - // policy includes AllowTwitterUserId, which guarantees the presence of - // the value. - case Some(twitterUser) => twitterUser.id - case None => throw TwitterUserIdNotDefined - } - - // contributorId should only be defined when the authenticated user differs - // from the "Twitter user" - val contributorId = - Access.getAuthenticatedTwitterUserId.map(_.id).filter(_ != twitterUserId) - - val twitterContext = TwitterContext().getOrElse(Viewer()) - - val deviceSource = twitterContext.clientApplicationId.map("oauth:" + _) - - // Ported from StatusesUpdateController#getBirdherdOptions and - // BirdherdOption.UserIp(request.clientHost) - val remoteHost: Option[String] = - getRequestHeader(HttpHeaderNames.X_TWITTER_AUDIT_IP_THRIFT.toLowerCase) // use the new header - .flatMap(ClientIpAddressUtils.decodeClientIpAddress(_)) - .flatMap(ClientIpAddressUtils.getString(_)) - .orElse( - getRequestHeader( - HttpHeaderNames.X_TWITTER_AUDIT_IP.toLowerCase - ) // fallback to old way before migration is completed - .map(h => InetAddressUtil.getByName(h.trim).getHostAddress) - ) - - val isDarkRequest = opContext.darkRequest.isDefined - - val sessionHash = getRequestHeader(SessionHashHeaderName) - - val cardsPlatformKey = twitterContext.clientApplicationId.map(GetPlatformKey(_)) - - val safetyLevel = opContext.safetyLevel - - RequestContext( - clientApplicationId = twitterContext.clientApplicationId, - deviceSource = deviceSource, - knownDeviceToken = twitterContext.knownDeviceToken, - remoteHost = remoteHost, - twitterUserId = twitterUserId, - contributorId = contributorId, - isDarkRequest = isDarkRequest, - hasPrivilegeNullcastingAccess = - hasClientApplicationPrivilege(ClientAppPrivileges.NULLCASTING_ACCESS), - hasPrivilegePromotedTweetsInTimeline = - hasClientApplicationPrivilege(ClientAppPrivileges.PROMOTED_TWEETS_IN_TIMELINE), - sessionHash = sessionHash, - cardsPlatformKey = cardsPlatformKey, - safetyLevel = safetyLevel, - ) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/BUILD deleted file mode 100644 index 06a2a8c10..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/BUILD +++ /dev/null @@ -1,32 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", - "finagle/finagle-core/src/main", - "tweetypie/servo/util/src/main/scala", - "src/thrift/com/twitter/consumer_privacy/mention_controls:thrift-scala", - "src/thrift/com/twitter/gizmoduck:thrift-scala", - "src/thrift/com/twitter/spam/rtf:safety-level-scala", - "src/thrift/com/twitter/spam/rtf:safety-result-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", - "stitch/stitch-compat/src/main/scala/com/twitter/stitch/compat", - "stitch/stitch-core/src/main/scala/com/twitter/stitch", - "stitch/stitch-gizmoduck", - "strato/config/src/thrift/com/twitter/strato/graphql:api-media-graphql-scala", - "strato/config/src/thrift/com/twitter/strato/graphql:graphql-scala", - "strato/config/src/thrift/com/twitter/strato/graphql:topics-graphql-scala", - "strato/src/main/scala/com/twitter/strato/rpc", - "tweetypie/server/src/main/scala/com/twitter/tweetypie", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", - "vibes/src/main/thrift/com/twitter/vibes:vibes-scala", - "weaverbird/common/src/main/scala/com/twitter/weaverbird/common", - "weaverbird/common/src/main/scala/com/twitter/weaverbird/converters/common", - "weaverbird/common/src/main/scala/com/twitter/weaverbird/converters/tweet", - "weaverbird/common/src/main/scala/com/twitter/weaverbird/hydrators", - "weaverbird/common/src/main/scala/com/twitter/weaverbird/mappers", - "weaverbird/common/src/main/scala/com/twitter/weaverbird/repositories", - ], -) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/BUILD.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/BUILD.docx new file mode 100644 index 000000000..757412912 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/BUILD.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/PrefetchedDataRepository.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/PrefetchedDataRepository.docx new file mode 100644 index 000000000..70816802c Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/PrefetchedDataRepository.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/PrefetchedDataRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/PrefetchedDataRepository.scala deleted file mode 100644 index d829955db..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/PrefetchedDataRepository.scala +++ /dev/null @@ -1,166 +0,0 @@ -package com.twitter.tweetypie -package federated -package prefetcheddata - -import com.twitter.consumer_privacy.mention_controls.thriftscala.UnmentionInfo -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.gizmoduck.thriftscala.LookupContext -import com.twitter.gizmoduck.thriftscala.QueryFields -import com.twitter.gizmoduck.thriftscala.UserResult -import com.twitter.spam.rtf.thriftscala.SafetyLevel -import com.twitter.stitch.compat.LegacySeqGroup -import com.twitter.stitch.SeqGroup -import com.twitter.stitch.Stitch -import com.twitter.strato.graphql.thriftscala.CacheMissStrategy -import com.twitter.strato.graphql.thriftscala.PrefetchedData -import com.twitter.strato.graphql.thriftscala.TweetResult -import com.twitter.tweetypie.backends.Gizmoduck -import com.twitter.tweetypie.thriftscala.Tweet -import com.twitter.util.Throwables -import com.twitter.vibes.thriftscala.VibeV2 -import com.twitter.weaverbird.common.GetRequestContext -import com.twitter.weaverbird.common.PerTOOAppCallerStats -import com.twitter.weaverbird.common.RequestContext -import com.twitter.weaverbird.converters.tweet.WeaverbirdEntitySetMutations -import com.twitter.weaverbird.converters.tweet.WeaverbirdTweetMutations -import com.twitter.weaverbird.hydrators._ -import com.twitter.weaverbird.mappers.ApiTweetPrefetchedMapper -import com.twitter.weaverbird.repositories.UserRepository -import com.twitter.weaverbird.converters.common.EntityRenderingOptions - -private[federated] final case class PrefetchedDataRequest( - tweet: Tweet, - sourceTweet: Option[Tweet], - quotedTweet: Option[Tweet], - unmentionInfo: Option[UnmentionInfo] = None, - vibe: Option[VibeV2] = None, - safetyLevel: SafetyLevel, - requestContext: RequestContext) - -private[federated] final case class PrefetchedDataResponse(value: PrefetchedData) - -private[federated] object PrefetchedDataResponse { - // For NotFound, there is no subsequent result or quoted_tweet_results field, so both - // settings are false here. These deciders will be removed post migration. - private[this] val prefetchedMapper = new ApiTweetPrefetchedMapper( - skipTweetResultPrefetchItem = () => false - ) - def notFound(tweetId: Long): PrefetchedDataResponse = - PrefetchedDataResponse( - value = prefetchedMapper.getPrefetchedData( - tweetId = tweetId, - apiTweet = None, - tweetResult = None - ) - ) -} - -private[federated] object PrefetchedDataRepository { - def apply( - thriftTweetToApiTweet: ThriftTweetToApiTweet, - prefetchedMapper: ApiTweetPrefetchedMapper, - statsReceiver: StatsReceiver, - ): PrefetchedDataRequest => Stitch[PrefetchedDataResponse] = - (request: PrefetchedDataRequest) => { - val thriftTweetToApiTweetRequest = ThriftTweetToApiTweetRequest( - tweet = request.tweet, - sourceTweet = request.sourceTweet, - quotedTweet = request.quotedTweet, - // For Tweet writes, filteredReason will always be None. - filteredReason = None, - safetyLevel = request.safetyLevel, - requestContext = request.requestContext, - entityRenderingOptions = EntityRenderingOptions() - ) - - val successCounter = statsReceiver.counter("success") - val failuresCounter = statsReceiver.counter("failures") - val failuresScope = statsReceiver.scope("failures") - - thriftTweetToApiTweet - .arrow(thriftTweetToApiTweetRequest) - .onSuccess(_ => successCounter.incr()) - .onFailure { t => - failuresCounter.incr() - failuresScope.counter(Throwables.mkString(t): _*).incr() - } - .map((resp: ThriftTweetToApiTweetResponse) => { - val prefetchedData: PrefetchedData = prefetchedMapper.getPrefetchedData( - tweetId = request.tweet.id, - apiTweet = Some(resp.apiTweet), - // since ApiTweet was hydrate, we can fabricate a TweetResult.Tweet - tweetResult = Some(TweetResult.Tweet(request.tweet.id)), - unmentionInfo = request.unmentionInfo, - editControl = request.tweet.editControl, - previousCounts = request.tweet.previousCounts, - vibe = request.vibe, - editPerspective = request.tweet.editPerspective, - noteTweet = request.tweet.noteTweet - ) - - // Notify GraphQL API to not attempt hydration for missing - // ApiTweet/TweetResult fields. This is only needed on the - // Tweet write path since the newly created Tweet may not - // be fully persisted yet in tbird Manhattan. - val shortCircuitedPrefetchedData = prefetchedData.copy( - onCacheMiss = CacheMissStrategy.ShortCircuitExisting - ) - - PrefetchedDataResponse(shortCircuitedPrefetchedData) - }) - } -} - -private[federated] object PrefetchedDataRepositoryBuilder { - def apply( - getUserResultsById: Gizmoduck.GetById, - statsReceiver: StatsReceiver - ): PrefetchedDataRequest => Stitch[PrefetchedDataResponse] = { - val repoStats = statsReceiver.scope("repositories") - - case class GetUserResultById( - queryFields: Set[QueryFields], - lookupContext: LookupContext, - ) extends SeqGroup[UserId, UserResult] { - override def run(keys: Seq[UserId]): Future[Seq[Try[UserResult]]] = - LegacySeqGroup.liftToSeqTry(getUserResultsById((lookupContext, keys, queryFields))) - - override def maxSize: Int = 100 - } - - val stitchGetUserResultById: UserRepository.GetUserResultById = - (userId: UserId, queryFields: Set[QueryFields], lookupContext: LookupContext) => - Stitch.call(userId, GetUserResultById(queryFields, lookupContext)) - - val userRepository = new UserRepository(stitchGetUserResultById, repoStats) - - // Note, this is weaverbird.common.GetRequestContext - val getRequestContext = new GetRequestContext() - - // TwiggyUserHydrator is needed to hydrate TwiggyUsers for CWC and misc. logic - val twiggyUserHydrator = new TwiggyUserHydrator(userRepository, getRequestContext) - - val weaverbirdMutations = new WeaverbirdTweetMutations( - new WeaverbirdEntitySetMutations( - new PerTOOAppCallerStats(statsReceiver, getRequestContext) - ) - ) - - val prefetchedMapper = new ApiTweetPrefetchedMapper( - // do not skip this in mutation path as we depends on it - skipTweetResultPrefetchItem = () => false - ) - - val thriftTweetToApiTweet: ThriftTweetToApiTweet = - new FoundThriftTweetToApiTweet( - statsReceiver, - twiggyUserHydrator, - weaverbirdMutations - ) - PrefetchedDataRepository( - thriftTweetToApiTweet, - prefetchedMapper, - repoStats.scope("prefetched_data_repo") - ) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/BUILD deleted file mode 100644 index f0ed3efd0..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/BUILD +++ /dev/null @@ -1,18 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "ads-common/loggingclient/src/main/scala", - "src/scala/com/twitter/ads/internal/pcl/service", - "src/scala/com/twitter/ads/internal/pcl/strato_adaptor", - "src/thrift/com/twitter/ads/adserver:ads_shared_types-scala", - "src/thrift/com/twitter/ads/callback:engagement_request-scala", - "src/thrift/com/twitter/ads/internal/pcl:promoted_content_input-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet_service_graphql-scala", - "strato/src/main/scala/com/twitter/strato/server/context", - "twitter-context/src/main/scala", - "util/util-stats/src/main/scala/com/twitter/finagle/stats", - ], -) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/BUILD.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/BUILD.docx new file mode 100644 index 000000000..d03b3b953 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/BUILD.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/TweetPromotedContentLogger.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/TweetPromotedContentLogger.docx new file mode 100644 index 000000000..1cb196257 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/TweetPromotedContentLogger.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/TweetPromotedContentLogger.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/TweetPromotedContentLogger.scala deleted file mode 100644 index f3a285d65..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/TweetPromotedContentLogger.scala +++ /dev/null @@ -1,40 +0,0 @@ -package com.twitter.tweetypie -package federated -package promotedcontent - -import com.twitter.ads.callback.thriftscala.EngagementRequest -import com.twitter.ads.internal.pcl.service.CallbackPromotedContentLogger -import com.twitter.ads.internal.pcl.strato_adaptor.PromotedContentInputProvider -import com.twitter.ads.internal.pcl.thriftscala.PromotedContentInput -import com.twitter.adserver.thriftscala.EngagementType -import com.twitter.util.Future - -object TweetPromotedContentLogger { - sealed abstract class TweetEngagementType(val engagementType: EngagementType) - case object TweetEngagement extends TweetEngagementType(EngagementType.Send) - case object ReplyEngagement extends TweetEngagementType(EngagementType.Reply) - case object RetweetEngagement extends TweetEngagementType(EngagementType.Retweet) - - type Type = (EngagementRequest, TweetEngagementType, Boolean) => Future[Unit] - - private[this] val TwitterContext = - com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) - - def apply(callbackPromotedContentLogger: CallbackPromotedContentLogger): Type = - ( - engagementRequest: EngagementRequest, - tweetEngagementType: TweetEngagementType, - isDark: Boolean - ) => { - val pci: PromotedContentInput = - PromotedContentInputProvider(TwitterContext, engagementRequest) - - // The real logging is fire-and-forget, so we can create the Future and ignore returning it. - Future.when(!isDark) { - callbackPromotedContentLogger.logNonTrendEngagement( - pci, - tweetEngagementType.engagementType, - pci.impressionId) - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/BUILD deleted file mode 100644 index 0bf98375c..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/BUILD +++ /dev/null @@ -1,43 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "decider", - "finagle/finagle-base-http/src/main", - "finatra-internal/api11/src/main/scala/com/twitter/finatra/api11:errors", - "geo/model/src/main/scala/com/twitter/geo/model", - "passbird/bitfields-thrift/src/main/thrift:thrift-scala", - "tweetypie/servo/util/src/main/scala", - "tweetypie/servo/util/src/main/scala:exception", - "src/scala/com/twitter/accounts/util:safety-meta", - "src/thrift/com/twitter/ads/adserver:ad_engagement_details-scala", - "src/thrift/com/twitter/ads/adserver:preroll_metadata-scala", - "src/thrift/com/twitter/ads/callback:engagement_request-scala", - "src/thrift/com/twitter/bouncer:bounce-action-thrift-scala", - "src/thrift/com/twitter/context:twitter-context-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet_service_graphql-scala", - "stitch/stitch-core/src/main/scala/com/twitter/stitch", - "strato/config/src/thrift/com/twitter/strato/graphql:api-media-graphql-scala", - "strato/config/src/thrift/com/twitter/strato/graphql:graphql-scala", - "strato/config/src/thrift/com/twitter/strato/graphql:topics-graphql-scala", - "strato/src/main/scala/com/twitter/strato/client", - "strato/src/main/scala/com/twitter/strato/context", - "strato/src/main/scala/com/twitter/strato/fed", - "strato/src/main/scala/com/twitter/strato/response", - "strato/src/main/scala/com/twitter/strato/test/config/bouncer", - "strato/src/main/scala/com/twitter/strato/thrift", - "strato/src/main/thrift/com/twitter/strato/context:thrift-scala", - "tweetypie/server/src/main/scala/com/twitter/tweetypie", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/service", - "tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides", - "tweetypie/common/src/scala/com/twitter/tweetypie/util", - "twitter-context/src/main/scala", - ], -) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/BUILD.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/BUILD.docx new file mode 100644 index 000000000..e66ab0c4a Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/BUILD.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/StratoCatalogWarmups.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/StratoCatalogWarmups.docx new file mode 100644 index 000000000..96190667f Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/StratoCatalogWarmups.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/StratoCatalogWarmups.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/StratoCatalogWarmups.scala deleted file mode 100644 index a020bdd3e..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/StratoCatalogWarmups.scala +++ /dev/null @@ -1,140 +0,0 @@ -package com.twitter.tweetypie -package federated -package warmups - -import com.twitter.context.TwitterContext -import com.twitter.context.thriftscala.Viewer -import com.twitter.spam.rtf.thriftscala.SafetyLevel -import com.twitter.stitch.Stitch -import com.twitter.strato.access.Access -import com.twitter.strato.access.Access.AccessToken -import com.twitter.strato.access.Access.AuthenticatedTwitterUserId -import com.twitter.strato.access.Access.AuthenticatedTwitterUserNotSuspended -import com.twitter.strato.access.Access.TwitterUserId -import com.twitter.strato.access.Access.TwitterUserNotSuspended -import com.twitter.strato.catalog.Ops -import com.twitter.strato.client.StaticClient -import com.twitter.strato.context.StratoContext -import com.twitter.strato.opcontext.DarkRequest -import com.twitter.strato.opcontext.OpContext -import com.twitter.strato.test.config.bouncer.TestPrincipals -import com.twitter.strato.thrift.ScroogeConvImplicits._ -import com.twitter.tweetypie.federated.columns.CreateRetweetColumn -import com.twitter.tweetypie.federated.columns.CreateTweetColumn -import com.twitter.tweetypie.federated.columns.DeleteTweetColumn -import com.twitter.tweetypie.federated.columns.UnretweetColumn -import com.twitter.tweetypie.service.WarmupQueriesSettings -import com.twitter.tweetypie.thriftscala.graphql._ -import com.twitter.util.logging.Logger -import com.twitter.util.Future -import com.twitter.util.Stopwatch - -object StratoCatalogWarmups { - private[this] val log = Logger(getClass) - - // Performs warmup queries, failing after 30 seconds - def warmup( - warmupSettings: WarmupQueriesSettings, - catalog: PartialFunction[String, Ops] - ): Future[Unit] = { - val elapsed = Stopwatch.start() - // note: we need to supply bouncer principals here, because the - // columns are gated by a bouncer policy - Access - .withPrincipals(WarmupPrincipals) { - StratoContext.withOpContext(WarmupOpContext) { - TwitterContext.let(viewer = WarmupViewer) { - warmupSettings.clientId.asCurrent { - Stitch.run(executeDarkly(catalog)) - } - } - } - } - .onSuccess { _ => log.info("warmup completed in %s".format(elapsed())) } - .onFailure { t => log.error("could not complete warmup queries before startup.", t) } - } - - private val WarmupTwitterUserId = 0L - - private val WarmupPrincipals = Set( - TestPrincipals.normalStratoBouncerAccessPrincipal, - AuthenticatedTwitterUserId(WarmupTwitterUserId), - TwitterUserId(WarmupTwitterUserId), - TwitterUserNotSuspended, - AuthenticatedTwitterUserNotSuspended, - AccessToken(isWritable = true) - ) - - private[this] val RwebClientId = 0L - - private[this] val WarmupViewer = Viewer( - userId = Some(WarmupTwitterUserId), - authenticatedUserId = Some(WarmupTwitterUserId), - clientApplicationId = Some(RwebClientId), - ) - - private[this] val WarmupOpContext = - OpContext - .safetyLevel(SafetyLevel.TweetWritesApi.name) - .copy(darkRequest = Some(DarkRequest())) - .toThrift() - - private[this] val EllenOscarSelfie = 440322224407314432L - - private[this] val TwitterContext: TwitterContext = - com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) - - private[this] def executeDarkly(catalog: PartialFunction[String, Ops]): Stitch[Unit] = { - val stratoClient = new StaticClient(catalog) - val tweetCreator = - stratoClient.executer[CreateTweetRequest, CreateTweetResponseWithSubqueryPrefetchItems]( - CreateTweetColumn.Path) - - val tweetDeletor = - stratoClient - .executer[DeleteTweetRequest, DeleteTweetResponseWithSubqueryPrefetchItems]( - DeleteTweetColumn.Path) - - val retweetCreator = - stratoClient - .executer[CreateRetweetRequest, CreateRetweetResponseWithSubqueryPrefetchItems]( - CreateRetweetColumn.Path) - - val unretweetor = - stratoClient - .executer[UnretweetRequest, UnretweetResponseWithSubqueryPrefetchItems]( - UnretweetColumn.Path) - - val stitchCreateTweet = - tweetCreator - .execute(CreateTweetRequest("getting warmer")) - .onSuccess(_ => log.info(s"${CreateTweetColumn.Path} warmup success")) - .onFailure(e => log.info(s"${CreateTweetColumn.Path} warmup fail: $e")) - - val stitchDeleteTweet = - tweetDeletor - .execute(DeleteTweetRequest(-1L)) - .onSuccess(_ => log.info(s"${DeleteTweetColumn.Path} warmup success")) - .onFailure(e => log.info(s"${DeleteTweetColumn.Path} warmup fail: $e")) - - val stitchCreateRetweet = - retweetCreator - .execute(CreateRetweetRequest(EllenOscarSelfie)) - .onSuccess(_ => log.info(s"${CreateRetweetColumn.Path} warmup success")) - .onFailure(e => log.info(s"${CreateRetweetColumn.Path} warmup fail: $e")) - - val stitchUnretweet = - unretweetor - .execute(UnretweetRequest(EllenOscarSelfie)) - .onSuccess(_ => log.info(s"${UnretweetColumn.Path} warmup success")) - .onFailure(e => log.info(s"${UnretweetColumn.Path} warmup fail: $e")) - - Stitch - .join( - stitchCreateTweet, - stitchDeleteTweet, - stitchCreateRetweet, - stitchUnretweet, - ).unit - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/AttachmentBuilder.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/AttachmentBuilder.docx new file mode 100644 index 000000000..accb801db Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/AttachmentBuilder.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/AttachmentBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/AttachmentBuilder.scala deleted file mode 100644 index b9c3c8616..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/AttachmentBuilder.scala +++ /dev/null @@ -1,185 +0,0 @@ -package com.twitter.tweetypie -package handler - -import com.twitter.snowflake.id.SnowflakeId -import com.twitter.tweetutil.DmDeepLink -import com.twitter.tweetutil.TweetPermalink -import com.twitter.tweetypie.core.CardReferenceUriExtractor -import com.twitter.tweetypie.core.NonTombstone -import com.twitter.tweetypie.core.TweetCreateFailure -import com.twitter.tweetypie.repository.TweetQuery -import com.twitter.tweetypie.repository.TweetRepository -import com.twitter.tweetypie.thriftscala.CardReference -import com.twitter.tweetypie.thriftscala.DeviceSource -import com.twitter.tweetypie.thriftscala.QuotedTweet -import com.twitter.tweetypie.thriftscala.ShortenedUrl -import com.twitter.tweetypie.thriftscala.Tweet -import com.twitter.tweetypie.thriftscala.TweetCreateState - -case class AttachmentBuilderRequest( - tweetId: TweetId, - user: User, - mediaUploadIds: Option[Seq[Long]], - cardReference: Option[CardReference], - attachmentUrl: Option[String], - remoteHost: Option[String], - darkTraffic: Boolean, - deviceSource: DeviceSource) { - val ctx: ValidationContext = ValidationContext( - user = user, - mediaUploadIds = mediaUploadIds, - cardReference = cardReference - ) - val passThroughResponse: AttachmentBuilderResult = - AttachmentBuilderResult(attachmentUrl = attachmentUrl, validationContext = ctx) -} - -case class ValidationContext( - user: User, - mediaUploadIds: Option[Seq[Long]], - cardReference: Option[CardReference]) - -case class AttachmentBuilderResult( - attachmentUrl: Option[String] = None, - quotedTweet: Option[QuotedTweet] = None, - extraChars: Int = 0, - validationContext: ValidationContext) - -object AttachmentBuilder { - - private[this] val log = Logger(getClass) - private[this] val attachmentCountLogger = Logger( - "com.twitter.tweetypie.handler.CreateAttachmentCount" - ) - - type Type = FutureArrow[AttachmentBuilderRequest, AttachmentBuilderResult] - type ValidationType = FutureEffect[AttachmentBuilderResult] - - def validateAttachmentUrl(attachmentUrl: Option[String]): Unit.type = - attachmentUrl match { - case None => Unit - case Some(TweetPermalink(_, _)) => Unit - case Some(DmDeepLink(_)) => Unit - case _ => throw TweetCreateFailure.State(TweetCreateState.InvalidAttachmentUrl) - } - - def validateAttachments( - stats: StatsReceiver, - validateCardRef: Gate[Option[String]] - ): AttachmentBuilder.ValidationType = - FutureEffect { result: AttachmentBuilderResult => - validateAttachmentUrl(result.attachmentUrl) - - val ctx = result.validationContext - - val cardRef = ctx.cardReference.filter { - case CardReferenceUriExtractor(NonTombstone(_)) => true - case _ => false - } - - if (result.quotedTweet.isDefined && cardRef.isEmpty) { - Future.Unit - } else { - val attachmentCount = - Seq( - ctx.mediaUploadIds, - result.attachmentUrl, - result.quotedTweet - ).count(_.nonEmpty) - - val userAgent = TwitterContext().flatMap(_.userAgent) - if (attachmentCount + cardRef.count(_ => true) > 1) { - attachmentCountLogger.warn( - s"Too many attachment types on tweet create from user: ${ctx.user.id}, " + - s"agent: '${userAgent}', media: ${ctx.mediaUploadIds}, " + - s"attachmentUrl: ${result.attachmentUrl}, cardRef: $cardRef" - ) - stats.counter("too_many_attachment_types_with_cardref").incr() - } - Future.when(attachmentCount + cardRef.count(_ => validateCardRef(userAgent)) > 1) { - Future.exception(TweetCreateFailure.State(TweetCreateState.TooManyAttachmentTypes)) - } - } - } - - private val queryInclude = TweetQuery.Include(Set(Tweet.CoreDataField.id)) - - private val queryOptions = TweetQuery.Options(include = queryInclude) - - def buildUrlShortenerCtx(request: AttachmentBuilderRequest): UrlShortener.Context = - UrlShortener.Context( - tweetId = request.tweetId, - userId = request.user.id, - createdAt = SnowflakeId(request.tweetId).time, - userProtected = request.user.safety.get.isProtected, - clientAppId = request.deviceSource.clientAppId, - remoteHost = request.remoteHost, - dark = request.darkTraffic - ) - - def asQuotedTweet(tweet: Tweet, shortenedUrl: ShortenedUrl): QuotedTweet = - getShare(tweet) match { - case None => QuotedTweet(tweet.id, getUserId(tweet), Some(shortenedUrl)) - case Some(share) => QuotedTweet(share.sourceStatusId, share.sourceUserId, Some(shortenedUrl)) - } - - def tweetPermalink(request: AttachmentBuilderRequest): Option[TweetPermalink] = - request.attachmentUrl.collectFirst { - // prevent tweet-quoting cycles - case TweetPermalink(screenName, quotedTweetId) if request.tweetId > quotedTweetId => - TweetPermalink(screenName, quotedTweetId) - } - - def apply( - tweetRepo: TweetRepository.Optional, - urlShortener: UrlShortener.Type, - validateAttachments: AttachmentBuilder.ValidationType, - stats: StatsReceiver, - denyNonTweetPermalinks: Gate[Unit] = Gate.False - ): Type = { - val tweetGetter = TweetRepository.tweetGetter(tweetRepo, queryOptions) - val attachmentNotPermalinkCounter = stats.counter("attachment_url_not_tweet_permalink") - val quotedTweetFoundCounter = stats.counter("quoted_tweet_found") - val quotedTweetNotFoundCounter = stats.counter("quoted_tweet_not_found") - - def buildAttachmentResult(request: AttachmentBuilderRequest) = - tweetPermalink(request) match { - case Some(qtPermalink) => - tweetGetter(qtPermalink.tweetId).flatMap { - case Some(tweet) => - quotedTweetFoundCounter.incr() - val ctx = buildUrlShortenerCtx(request) - urlShortener((qtPermalink.url, ctx)).map { shortenedUrl => - AttachmentBuilderResult( - quotedTweet = Some(asQuotedTweet(tweet, shortenedUrl)), - extraChars = shortenedUrl.shortUrl.length + 1, - validationContext = request.ctx - ) - } - case None => - quotedTweetNotFoundCounter.incr() - log.warn( - s"unable to extract quote tweet from attachment builder request: $request" - ) - if (denyNonTweetPermalinks()) { - throw TweetCreateFailure.State( - TweetCreateState.SourceTweetNotFound, - Some(s"quoted tweet is not found from given permalink: $qtPermalink") - ) - } else { - Future.value(request.passThroughResponse) - } - } - case _ => - attachmentNotPermalinkCounter.incr() - Future.value(request.passThroughResponse) - } - - FutureArrow { request => - for { - result <- buildAttachmentResult(request) - () <- validateAttachments(result) - } yield result - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/BUILD deleted file mode 100644 index 2475b5f1a..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/BUILD +++ /dev/null @@ -1,88 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/joda-time", - "3rdparty/jvm/org/apache/thrift:libthrift", - "3rdparty/jvm/org/geotools:gt-referencing", - "3rdparty/jvm/org/locationtech/spatial4j", - "compliance/user-consent/src/main/scala/com/twitter/userconsent/compliance/birthdate", - "creatives-container/thrift/src/main/thrift:creatives-container-service-scala", - "diffshow", - "eventbus/client", - "featureswitches/featureswitches-core/src/main/scala", - "finatra/inject/inject-slf4j/src/main/scala/com/twitter/inject", - "flock-client", - "flock-client/src/main/thrift:thrift-scala", - "geoduck/service/src/main/scala/com/twitter/geoduck/service/common/clientmodules", - "geoduck/util/src/main/scala/com/twitter/geoduck/util/primitives", - "geoduck/util/src/main/scala/com/twitter/geoduck/util/service", - "gizmoduck/common/src/main/scala/com/twitter/gizmoduck/util:scala", - "mediaservices/commons/src/main/thrift:thrift-scala", - "scrooge-internal/scrooge-schema/src/main/scala/com/twitter/scrooge/schema", - "scrooge-internal/scrooge-schema/src/main/scala/com/twitter/scrooge/schema/scrooge/scala", - "scrooge-internal/scrooge-schema/src/main/scala/com/twitter/scrooge/schema/tree", - "scrooge-internal/src/main/scala/com/twitter/scrooge_internal/linter/known_annotations", - "scrooge/scrooge-core", - "tweetypie/servo/repo", - "tweetypie/servo/util", - "snowflake:id", - "src/scala/com/twitter/takedown/util", - "src/thrift/com/twitter/botmaker:botmaker-scala", - "src/thrift/com/twitter/bouncer:bounce-action-thrift-scala", - "src/thrift/com/twitter/context:testing-signals-scala", - "src/thrift/com/twitter/context:twitter-context-scala", - "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", - "src/thrift/com/twitter/expandodo:only-scala", - "src/thrift/com/twitter/geoduck:geoduck-scala", - "src/thrift/com/twitter/gizmoduck:thrift-scala", - "src/thrift/com/twitter/gizmoduck:user-thrift-scala", - "src/thrift/com/twitter/gizmoduck:user-type-thrift-scala", - "src/thrift/com/twitter/relevance/feature_store:feature_store-scala", - "src/thrift/com/twitter/service/scarecrow/gen:scarecrow-scala", - "src/thrift/com/twitter/service/scarecrow/gen:tiered-actions-scala", - "src/thrift/com/twitter/service/talon/gen:thrift-scala", - "src/thrift/com/twitter/servo:servo-exception-scala", - "src/thrift/com/twitter/spam/features:safety-meta-data-scala", - "src/thrift/com/twitter/spam/rtf:safety-level-scala", - "src/thrift/com/twitter/spam/rtf:tweet-rtf-event-scala", - "src/thrift/com/twitter/timelineservice/server/internal:thrift-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:audit-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:delete_location_data-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:stored-tweet-info-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", - "stitch/stitch-core", - "tco-util", - "tweet-util/src/main/scala", - "tweetypie/server/src/main/scala/com/twitter/tweetypie", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/media", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/repository", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/store", - "tweetypie/server/src/main/thrift:compiled-scala", - "tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields", - "tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie", - "tweetypie/common/src/scala/com/twitter/tweetypie/media", - "tweetypie/common/src/scala/com/twitter/tweetypie/storage", - "tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities", - "tweetypie/common/src/scala/com/twitter/tweetypie/tweettext", - "tweetypie/common/src/scala/com/twitter/tweetypie/util", - "twitter-context", - "twitter-text/lib/java/src/main/java/com/twitter/twittertext", - "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", - "util/util-stats", - "visibility/common/src/main/scala/com/twitter/visibility/common", - "visibility/lib/src/main/scala/com/twitter/visibility/builder", - "visibility/lib/src/main/scala/com/twitter/visibility/generators", - "visibility/lib/src/main/scala/com/twitter/visibility/models", - "visibility/writer/src/main/scala/com/twitter/visibility/writer", - "visibility/writer/src/main/scala/com/twitter/visibility/writer/interfaces/tweets", - "visibility/writer/src/main/scala/com/twitter/visibility/writer/models", - ], -) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/BUILD.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/BUILD.docx new file mode 100644 index 000000000..5b6b7e43a Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/BUILD.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardReferenceValidationHandler.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardReferenceValidationHandler.docx new file mode 100644 index 000000000..17769554e Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardReferenceValidationHandler.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardReferenceValidationHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardReferenceValidationHandler.scala deleted file mode 100644 index 5a04c611f..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardReferenceValidationHandler.scala +++ /dev/null @@ -1,74 +0,0 @@ -package com.twitter.tweetypie -package handler - -import com.twitter.expandodo.thriftscala.AttachmentEligibilityResponses -import com.twitter.expandodo.{thriftscala => expandodo} -import com.twitter.tweetypie.backends.Expandodo -import com.twitter.twittertext.Extractor -import scala.util.control.NoStackTrace -import scala.util.control.NonFatal -import java.net.URI - -object CardReferenceValidationFailedException extends Exception with NoStackTrace - -object CardReferenceValidationHandler { - type Type = FutureArrow[(UserId, CardUri), CardUri] - - def apply(checkEligibility: Expandodo.CheckAttachmentEligibility): Type = { - def validateAttachmentForUser(userId: UserId, cardUri: CardUri): Future[CardUri] = { - val request = Seq(expandodo.AttachmentEligibilityRequest(cardUri, userId)) - checkEligibility(request) - .flatMap(validatedCardUri) - .rescue { - case NonFatal(_) => Future.exception(CardReferenceValidationFailedException) - } - } - - FutureArrow { - case (userId, cardUri) => - if (shouldSkipValidation(cardUri)) { - Future.value(cardUri) - } else { - validateAttachmentForUser(userId, cardUri) - } - } - } - - private[this] def validatedCardUri(responses: AttachmentEligibilityResponses) = { - responses.results.headOption match { - case Some( - expandodo.AttachmentEligibilityResult - .Success(expandodo.ValidCardUri(validatedCardUri)) - ) => - Future.value(validatedCardUri) - case _ => - Future.exception(CardReferenceValidationFailedException) - } - } - - // We're not changing state between calls, so it's safe to share among threads - private[this] val extractor = { - val extractor = new Extractor - extractor.setExtractURLWithoutProtocol(false) - extractor - } - - // Card References with these URIs don't need validation since cards referenced by URIs in these - // schemes are public and hence not subject to restrictions. - private[handler] val isWhitelistedSchema = Set("http", "https", "tombstone") - - // NOTE: http://www.ietf.org/rfc/rfc2396.txt - private[this] def hasWhitelistedScheme(cardUri: CardUri) = - Try(new URI(cardUri)).toOption - .map(_.getScheme) - .exists(isWhitelistedSchema) - - // Even though URI spec is technically is a superset of http:// and https:// URLs, we have to - // resort to using a Regex based parser here as a fallback because many URLs found in the wild - // have unescaped components that would fail java.net.URI parsing, yet are still considered acceptable. - private[this] def isTwitterUrlEntity(cardUri: CardUri) = - extractor.extractURLs(cardUri).size == 1 - - private[this] def shouldSkipValidation(cardUri: CardUri) = - hasWhitelistedScheme(cardUri) || isTwitterUrlEntity(cardUri) -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardUsersFinder.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardUsersFinder.docx new file mode 100644 index 000000000..8e1ebebde Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardUsersFinder.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardUsersFinder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardUsersFinder.scala deleted file mode 100644 index da483cef5..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardUsersFinder.scala +++ /dev/null @@ -1,52 +0,0 @@ -package com.twitter.tweetypie -package handler - -import com.twitter.stitch.Stitch -import com.twitter.tweetypie.core.CardReferenceUriExtractor -import com.twitter.tweetypie.core.NonTombstone -import com.twitter.tweetypie.core.Tombstone -import com.twitter.tweetypie.repository.CardUsersRepository -import com.twitter.tweetypie.repository.CardUsersRepository.Context -import com.twitter.tweetypie.thriftscala.CardReference - -/** - * Finds a set of UserId that may be mentioned when replying to a tweet that has a card. - * - * Replies created without 'auto_populate_reply_metadata' include both 'site' and 'author' users to - * have a more exhaustive list of mentions to match against. This is needed because iOS and Android - * have had different implementations client-side for years. - */ -object CardUsersFinder { - - case class Request( - cardReference: Option[CardReference], - urls: Seq[String], - perspectiveUserId: UserId) { - val uris: Seq[String] = cardReference match { - case Some(CardReferenceUriExtractor(cardUri)) => - cardUri match { - case NonTombstone(uri) => Seq(uri) - case Tombstone => Nil - } - case _ => urls - } - - val context: CardUsersRepository.Context = Context(perspectiveUserId) - } - - type Type = Request => Stitch[Set[UserId]] - - /** - * From a card-related arguments in [[Request]] select the set of user ids associated with the - * card. - * - * Note that this uses the same "which card do I use?" logic from Card2Hydrator which - * prioritizes CardReferenceUri and then falls back to the last resolvable (non-None) url entity. - */ - def apply(cardUserRepo: CardUsersRepository.Type): Type = - request => - Stitch - .traverse(request.uris) { uri => cardUserRepo(uri, request.context) } - // select the last, non-None Set of users ids - .map(r => r.flatten.reverse.headOption.getOrElse(Set.empty)) -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CollabControlBuilder.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CollabControlBuilder.docx new file mode 100644 index 000000000..18e79a8f3 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CollabControlBuilder.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CollabControlBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CollabControlBuilder.scala deleted file mode 100644 index 058bcbce5..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CollabControlBuilder.scala +++ /dev/null @@ -1,109 +0,0 @@ -package com.twitter.tweetypie -package handler - -import com.twitter.tweetypie.core.TweetCreateFailure -import com.twitter.tweetypie.thriftscala.CollabControl -import com.twitter.tweetypie.thriftscala.CollabControlOptions -import com.twitter.tweetypie.thriftscala.CollabInvitation -import com.twitter.tweetypie.thriftscala.CollabInvitationOptions -import com.twitter.tweetypie.thriftscala.CollabInvitationStatus -import com.twitter.tweetypie.thriftscala.CollabTweet -import com.twitter.tweetypie.thriftscala.CollabTweetOptions -import com.twitter.tweetypie.thriftscala.Communities -import com.twitter.tweetypie.thriftscala.ExclusiveTweetControl -import com.twitter.tweetypie.thriftscala.InvitedCollaborator -import com.twitter.tweetypie.thriftscala.TrustedFriendsControl -import com.twitter.tweetypie.thriftscala.TweetCreateConversationControl -import com.twitter.tweetypie.thriftscala.TweetCreateState.CollabTweetInvalidParams -import com.twitter.tweetypie.util.CommunityUtil - -object CollabControlBuilder { - type Type = Request => Future[Option[CollabControl]] - - case class Request( - collabControlOptions: Option[CollabControlOptions], - replyResult: Option[ReplyBuilder.Result], - communities: Option[Communities], - trustedFriendsControl: Option[TrustedFriendsControl], - conversationControl: Option[TweetCreateConversationControl], - exclusiveTweetControl: Option[ExclusiveTweetControl], - userId: UserId) - - def apply(): Type = { request => - val collabControl = convertToCollabControl(request.collabControlOptions, request.userId) - - validateCollabControlParams( - collabControl, - request.replyResult, - request.communities, - request.trustedFriendsControl, - request.conversationControl, - request.exclusiveTweetControl, - request.userId - ) map { _ => collabControl } - } - - def convertToCollabControl( - collabTweetOptions: Option[CollabControlOptions], - authorId: UserId - ): Option[CollabControl] = { - collabTweetOptions flatMap { - case CollabControlOptions.CollabInvitation( - collabInvitationOptions: CollabInvitationOptions) => - Some( - CollabControl.CollabInvitation( - CollabInvitation( - invitedCollaborators = collabInvitationOptions.collaboratorUserIds.map(userId => { - InvitedCollaborator( - collaboratorUserId = userId, - collabInvitationStatus = - if (userId == authorId) - CollabInvitationStatus.Accepted - else CollabInvitationStatus.Pending - ) - }) - ) - ) - ) - case CollabControlOptions.CollabTweet(collabTweetOptions: CollabTweetOptions) => - Some( - CollabControl.CollabTweet( - CollabTweet( - collaboratorUserIds = collabTweetOptions.collaboratorUserIds - ) - ) - ) - case _ => None - } - } - - def validateCollabControlParams( - collabControl: Option[CollabControl], - replyResult: Option[ReplyBuilder.Result], - communities: Option[Communities], - trustedFriendsControl: Option[TrustedFriendsControl], - conversationControl: Option[TweetCreateConversationControl], - exclusiveTweetControl: Option[ExclusiveTweetControl], - userId: UserId - ): Future[Unit] = { - val isInReplyToTweet = replyResult.exists(_.reply.inReplyToStatusId.isDefined) - - collabControl match { - case Some(_: CollabControl) - if (isInReplyToTweet || - CommunityUtil.hasCommunity(communities) || - exclusiveTweetControl.isDefined || - trustedFriendsControl.isDefined || - conversationControl.isDefined) => - Future.exception(TweetCreateFailure.State(CollabTweetInvalidParams)) - case Some(CollabControl.CollabInvitation(collab_invitation)) - if collab_invitation.invitedCollaborators.head.collaboratorUserId != userId => - Future.exception(TweetCreateFailure.State(CollabTweetInvalidParams)) - case Some(CollabControl.CollabTweet(collab_tweet)) - if collab_tweet.collaboratorUserIds.head != userId => - Future.exception(TweetCreateFailure.State(CollabTweetInvalidParams)) - case _ => - Future.Unit - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CommunitiesValidator.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CommunitiesValidator.docx new file mode 100644 index 000000000..329cbd318 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CommunitiesValidator.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CommunitiesValidator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CommunitiesValidator.scala deleted file mode 100644 index 220a6e1dd..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CommunitiesValidator.scala +++ /dev/null @@ -1,40 +0,0 @@ -package com.twitter.tweetypie.handler - -import com.twitter.featureswitches.v2.FeatureSwitchResults -import com.twitter.servo.util.Gate -import com.twitter.tweetypie.Future -import com.twitter.tweetypie.core.TweetCreateFailure -import com.twitter.tweetypie.thriftscala.Communities -import com.twitter.tweetypie.thriftscala.TweetCreateState.CommunityProtectedUserCannotTweet -import com.twitter.tweetypie.util.CommunityUtil - -object CommunitiesValidator { - case class Request( - matchedResults: Option[FeatureSwitchResults], - isProtected: Boolean, - community: Option[Communities]) - - type Type = Request => Future[Unit] - - val CommunityProtectedCanCreateTweet = "communities_protected_community_tweet_creation_enabled" - - val communityProtectedCanCreateTweetGate: Gate[Request] = Gate { request: Request => - request.matchedResults - .flatMap(_.getBoolean(CommunityProtectedCanCreateTweet, shouldLogImpression = true)) - .contains(false) - } - - def apply(): Type = - (request: Request) => { - // Order is important: the feature-switch gate is checked only when the - // request is both protected & community so that the FS experiment measurements - // are based only on data from requests that are subject to rejection by this validator. - if (request.isProtected && - CommunityUtil.hasCommunity(request.community) && - communityProtectedCanCreateTweetGate(request)) { - Future.exception(TweetCreateFailure.State(CommunityProtectedUserCannotTweet)) - } else { - Future.Unit - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ConversationControlBuilder.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ConversationControlBuilder.docx new file mode 100644 index 000000000..f65822eac Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ConversationControlBuilder.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ConversationControlBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ConversationControlBuilder.scala deleted file mode 100644 index 6eeea01f9..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ConversationControlBuilder.scala +++ /dev/null @@ -1,272 +0,0 @@ -package com.twitter.tweetypie.handler - -import com.twitter.featureswitches.v2.FeatureSwitchResults -import com.twitter.stitch.Stitch -import com.twitter.tweetypie.UserId -import com.twitter.tweetypie._ -import com.twitter.tweetypie.core.TweetCreateFailure -import com.twitter.tweetypie.repository.UserIdentityRepository -import com.twitter.tweetypie.repository.UserKey -import com.twitter.tweetypie.thriftscala.ConversationControl -import com.twitter.tweetypie.thriftscala.Tweet -import com.twitter.tweetypie.thriftscala.TweetCreateConversationControl -import com.twitter.tweetypie.thriftscala.TweetCreateState.ConversationControlNotAllowed -import com.twitter.tweetypie.thriftscala.TweetCreateState.InvalidConversationControl -import com.twitter.tweetypie.util.ConversationControls -import com.twitter.util.logging.Logging - -/** - * Process request parameters into a ConversationControl value. - */ -object ConversationControlBuilder extends Logging { - type Type = Request => Stitch[Option[ConversationControl]] - - type ScreenName = String - - /** - * The fields necessary to create a [[ConversationControl]]. - * - * This is a trait rather than a case class to avoid running the - * code to extract the mentions in the cases where handling the - * request doesn't need to use them (the common case where - * tweetCreateConversationControl is None). - */ - trait Request { - def tweetCreateConversationControl: Option[TweetCreateConversationControl] - def tweetAuthorId: UserId - def mentionedUserScreenNames: Set[String] - - def noteTweetMentionedUserIds: Option[Set[Long]] - } - - object Request { - - /** - * Extract the data necessary to create a [[ConversationControl]] - * for a new [[Tweet]]. This is intended for use when creating - * Tweets. It must be called after the Tweet has had its entities - * extracted. - */ - def fromTweet( - tweet: Tweet, - tweetCreateConversationControl: Option[TweetCreateConversationControl], - noteTweetMentionedUserIdsList: Option[Seq[Long]] - ): Request = { - val cctl = tweetCreateConversationControl - new Request { - def tweetCreateConversationControl: Option[TweetCreateConversationControl] = cctl - def mentionedUserScreenNames: Set[ScreenName] = - tweet.mentions - // Enforce that the Tweet's mentions have already been - // extracted from the text. (Mentions will be None if they - // have not yet been extracted.) - .getOrElse( - throw new RuntimeException( - "Mentions must be extracted before applying ConversationControls")) - .map(_.screenName) - .toSet - - def tweetAuthorId: UserId = tweet.coreData.get.userId - def noteTweetMentionedUserIds: Option[Set[Long]] = - noteTweetMentionedUserIdsList.map(_.toSet) - } - } - } - - /** - * Create a ConversationControlBuilder that looks up user ids for - * screen names using the specified UserIdentityRepository. - */ - def fromUserIdentityRepo( - statsReceiver: StatsReceiver, - userIdentityRepo: UserIdentityRepository.Type - ): Request => Stitch[Option[ConversationControl]] = - ConversationControlBuilder( - getUserId = screenName => userIdentityRepo(UserKey.byScreenName(screenName)).map(_.id), - statsReceiver = statsReceiver - ) - - /** - * Extract the inviteViaMention value which does not exist on the TweetCreateConversationControl - * itself but does exist on the structures it unions. - */ - def inviteViaMention(tccc: TweetCreateConversationControl): Boolean = - tccc match { - case TweetCreateConversationControl.ByInvitation(c) => c.inviteViaMention.contains(true) - case TweetCreateConversationControl.Community(c) => c.inviteViaMention.contains(true) - case TweetCreateConversationControl.Followers(c) => c.inviteViaMention.contains(true) - case _ => false - } - - /** - * Translates the TweetCreateConversationControl into - * ConversationControl using the context from the rest of the tweet - * creation. For the most part, this is just a direct translation, - * plus filling in the contextual user ids (mentioned users and tweet - * author). - */ - def apply( - statsReceiver: StatsReceiver, - getUserId: ScreenName => Stitch[UserId] - ): Request => Stitch[Option[ConversationControl]] = { - val userIdLookupsCounter = statsReceiver.counter("user_id_lookups") - val conversationControlPresentCounter = statsReceiver.counter("conversation_control_present") - val conversationControlInviteViaMentionPresentCounter = - statsReceiver.counter("conversation_control_invite_via_mention_present") - val failureCounter = statsReceiver.counter("failures") - - // Get the user ids for these screen names. Any users who do not - // exist will be silently dropped. - def getExistingUserIds( - screenNames: Set[ScreenName], - mentionedUserIds: Option[Set[Long]] - ): Stitch[Set[UserId]] = { - mentionedUserIds match { - case Some(userIds) => Stitch.value(userIds) - case _ => - Stitch - .traverse(screenNames.toSeq) { screenName => - getUserId(screenName).liftNotFoundToOption - .ensure(userIdLookupsCounter.incr()) - } - .map(userIdOptions => userIdOptions.flatten.toSet) - } - } - - // This is broken out just to make it syntactically nicer to add - // the stats handling - def process(request: Request): Stitch[Option[ConversationControl]] = - request.tweetCreateConversationControl match { - case None => Stitch.None - case Some(cctl) => - cctl match { - case TweetCreateConversationControl.ByInvitation(byInvitationControl) => - for { - invitedUserIds <- getExistingUserIds( - request.mentionedUserScreenNames, - request.noteTweetMentionedUserIds) - } yield Some( - ConversationControls.byInvitation( - invitedUserIds = invitedUserIds.toSeq.filterNot(_ == request.tweetAuthorId), - conversationTweetAuthorId = request.tweetAuthorId, - byInvitationControl.inviteViaMention - ) - ) - - case TweetCreateConversationControl.Community(communityControl) => - for { - invitedUserIds <- getExistingUserIds( - request.mentionedUserScreenNames, - request.noteTweetMentionedUserIds) - } yield Some( - ConversationControls.community( - invitedUserIds = invitedUserIds.toSeq.filterNot(_ == request.tweetAuthorId), - conversationTweetAuthorId = request.tweetAuthorId, - communityControl.inviteViaMention - ) - ) - case TweetCreateConversationControl.Followers(followersControl) => - for { - invitedUserIds <- getExistingUserIds( - request.mentionedUserScreenNames, - request.noteTweetMentionedUserIds) - } yield Some( - ConversationControls.followers( - invitedUserIds = invitedUserIds.toSeq.filterNot(_ == request.tweetAuthorId), - conversationTweetAuthorId = request.tweetAuthorId, - followersControl.inviteViaMention - ) - ) - // This should only ever happen if a new value is added to the - // union and we don't update this code. - case TweetCreateConversationControl.UnknownUnionField(fld) => - throw new RuntimeException(s"Unexpected TweetCreateConversationControl: $fld") - } - } - - (request: Request) => { - // Wrap in Stitch to encapsulate any exceptions that happen - // before making a Stitch call inside of process. - Stitch(process(request)).flatten.respond { response => - // If we count this before doing the work, and the stats are - // collected before the RPC completes, then any failures - // will get counted in a different minute than the request - // that caused it. - request.tweetCreateConversationControl.foreach { cc => - conversationControlPresentCounter.incr() - if (inviteViaMention(cc)) conversationControlInviteViaMentionPresentCounter.incr() - } - - response.onFailure { e => - error(message = "Failed to create conversation control", cause = e) - // Don't bother counting individual exceptions, because - // the cost of keeping those stats is probably not worth - // the convenience of not having to look in the logs. - failureCounter.incr() - } - } - } - } - - /** - * Validates if a conversation control request is allowed by feature switches - * and is only requested on a root tweet. - */ - object Validate { - case class Request( - matchedResults: Option[FeatureSwitchResults], - conversationControl: Option[TweetCreateConversationControl], - inReplyToTweetId: Option[TweetId]) - - type Type = FutureEffect[Request] - - val ExInvalidConversationControl = TweetCreateFailure.State(InvalidConversationControl) - val ExConversationControlNotAllowed = TweetCreateFailure.State(ConversationControlNotAllowed) - val ConversationControlStatusUpdateEnabledKey = "conversation_control_status_update_enabled" - val ConversationControlFollowersEnabledKey = "conversation_control_my_followers_enabled" - - def apply( - useFeatureSwitchResults: Gate[Unit], - statsReceiver: StatsReceiver - ): Type = request => { - def fsDenied(fsKey: String): Boolean = { - val featureEnabledOpt: Option[Boolean] = - // Do not log impressions, which would interfere with shared client experiment data. - request.matchedResults.flatMap(_.getBoolean(fsKey, shouldLogImpression = false)) - val fsEnabled = featureEnabledOpt.contains(true) - if (!fsEnabled) { - statsReceiver.counter(s"check_conversation_control/unauthorized/fs/$fsKey").incr() - } - !fsEnabled - } - - val isCcRequest: Boolean = request.conversationControl.isDefined - - val isCcInvalidParams = isCcRequest && { - val isRootTweet = request.inReplyToTweetId.isEmpty - if (!isRootTweet) { - statsReceiver.counter("check_conversation_control/invalid").incr() - } - !isRootTweet - } - - val isCcDeniedByFs = isCcRequest && { - val isFollower = request.conversationControl.exists { - case _: TweetCreateConversationControl.Followers => true - case _ => false - } - - fsDenied(ConversationControlStatusUpdateEnabledKey) || - (isFollower && fsDenied(ConversationControlFollowersEnabledKey)) - } - - if (isCcDeniedByFs && useFeatureSwitchResults()) { - Future.exception(ExConversationControlNotAllowed) - } else if (isCcInvalidParams) { - Future.exception(ExInvalidConversationControl) - } else { - Future.Unit - } - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteAdditionalFieldsBuilder.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteAdditionalFieldsBuilder.docx new file mode 100644 index 000000000..1484d1da4 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteAdditionalFieldsBuilder.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteAdditionalFieldsBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteAdditionalFieldsBuilder.scala deleted file mode 100644 index c6b1fd0e9..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteAdditionalFieldsBuilder.scala +++ /dev/null @@ -1,66 +0,0 @@ -package com.twitter.tweetypie -package handler - -import com.twitter.stitch.NotFound -import com.twitter.stitch.Stitch -import com.twitter.tweetypie.repository.TweetQuery -import com.twitter.tweetypie.repository.TweetRepository -import com.twitter.tweetypie.repository.UserKey -import com.twitter.tweetypie.repository.UserQueryOptions -import com.twitter.tweetypie.repository.UserRepository -import com.twitter.tweetypie.repository.UserVisibility -import com.twitter.tweetypie.store.AsyncDeleteAdditionalFields -import com.twitter.tweetypie.store.DeleteAdditionalFields -import com.twitter.tweetypie.store.TweetStoreEventOrRetry -import com.twitter.tweetypie.thriftscala.AsyncDeleteAdditionalFieldsRequest -import com.twitter.tweetypie.thriftscala.DeleteAdditionalFieldsRequest - -object DeleteAdditionalFieldsBuilder { - type Type = DeleteAdditionalFieldsRequest => Future[Seq[DeleteAdditionalFields.Event]] - - val tweetQueryOptions = TweetQuery.Options(include = GetTweetsHandler.BaseInclude) - - def apply(tweetRepo: TweetRepository.Type): Type = { - def getTweet(tweetId: TweetId) = - Stitch.run( - tweetRepo(tweetId, tweetQueryOptions) - .rescue(HandlerError.translateNotFoundToClientError(tweetId)) - ) - - request => { - Future.collect( - request.tweetIds.map { tweetId => - getTweet(tweetId).map { tweet => - DeleteAdditionalFields.Event( - tweetId = tweetId, - fieldIds = request.fieldIds, - userId = getUserId(tweet), - timestamp = Time.now - ) - } - } - ) - } - } -} - -object AsyncDeleteAdditionalFieldsBuilder { - type Type = AsyncDeleteAdditionalFieldsRequest => Future[ - TweetStoreEventOrRetry[AsyncDeleteAdditionalFields.Event] - ] - - val userQueryOpts: UserQueryOptions = UserQueryOptions(Set(UserField.Safety), UserVisibility.All) - - def apply(userRepo: UserRepository.Type): Type = { - def getUser(userId: UserId): Future[User] = - Stitch.run( - userRepo(UserKey.byId(userId), userQueryOpts) - .rescue { case NotFound => Stitch.exception(HandlerError.userNotFound(userId)) } - ) - - request => - getUser(request.userId).map { user => - AsyncDeleteAdditionalFields.Event.fromAsyncRequest(request, user) - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteLocationDataHandler.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteLocationDataHandler.docx new file mode 100644 index 000000000..eb55ee590 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteLocationDataHandler.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteLocationDataHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteLocationDataHandler.scala deleted file mode 100644 index 34b588a17..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteLocationDataHandler.scala +++ /dev/null @@ -1,62 +0,0 @@ -package com.twitter.tweetypie -package handler - -import com.twitter.eventbus.client.EventBusPublisher -import com.twitter.stitch.Stitch -import com.twitter.tweetypie.backends.GeoScrubEventStore.GetGeoScrubTimestamp -import com.twitter.tweetypie.thriftscala.DeleteLocationData -import com.twitter.tweetypie.thriftscala.DeleteLocationDataRequest - -/** - * Initiates the process of removing the geo information from a user's - * tweets. - */ -object DeleteLocationDataHandler { - type Type = DeleteLocationDataRequest => Future[Unit] - - def apply( - getLastScrubTime: GetGeoScrubTimestamp, - scribe: DeleteLocationData => Future[Unit], - eventbus: EventBusPublisher[DeleteLocationData] - ): Type = - request => { - // Attempt to bound the time range of the tweets that need to be - // scrubbed by finding the most recent scrub time on record. This - // is an optimization that prevents scrubbing already-scrubbed - // tweets, so it is OK if the value that we find is occasionally - // stale or if the lookup fails. Primarily, this is intended to - // protect against intentional abuse by enqueueing multiple - // delete_location_data events that have to traverse a very long - // timeline. - Stitch - .run(getLastScrubTime(request.userId)) - // If there is no timestamp or the lookup failed, continue with - // an unchanged request. - .handle { case _ => None } - .flatMap { lastScrubTime => - // Due to clock skew, it's possible for the last scrub - // timestamp to be larger than the timestamp from the request, - // but we ignore that so that we keep a faithful record of - // user requests. The execution of such events will end up a - // no-op. - val event = - DeleteLocationData( - userId = request.userId, - timestampMs = Time.now.inMilliseconds, - lastTimestampMs = lastScrubTime.map(_.inMilliseconds) - ) - - Future.join( - Seq( - // Scribe the event so that we can reprocess events if - // there is a bug or operational issue that causes some - // events to be lost. - scribe(event), - // The actual deletion process is handled by the TweetyPie - // geoscrub daemon. - eventbus.publish(event) - ) - ) - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DuplicateTweetFinder.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DuplicateTweetFinder.docx new file mode 100644 index 000000000..0aa7912fd Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DuplicateTweetFinder.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DuplicateTweetFinder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DuplicateTweetFinder.scala deleted file mode 100644 index 168dde9c6..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DuplicateTweetFinder.scala +++ /dev/null @@ -1,254 +0,0 @@ -package com.twitter.tweetypie -package handler - -import com.twitter.stitch.Stitch -import com.twitter.timelineservice.{thriftscala => tls} -import com.twitter.tweetypie.backends.TimelineService -import com.twitter.tweetypie.repository.TweetQuery -import com.twitter.tweetypie.repository.TweetRepository -import com.twitter.tweetypie.thriftscala.CardReference -import com.twitter.tweetypie.thriftscala.ConversationControl -import com.twitter.tweetypie.thriftscala.ConversationControlByInvitation -import com.twitter.tweetypie.thriftscala.ConversationControlCommunity -import com.twitter.tweetypie.thriftscala.ConversationControlFollowers -import com.twitter.tweetypie.thriftscala.EditControl -import com.twitter.tweetypie.thriftscala.EditOptions -import com.twitter.tweetypie.thriftscala.NoteTweetOptions -import com.twitter.tweetypie.thriftscala.PostTweetRequest -import com.twitter.tweetypie.thriftscala.TweetCreateConversationControl -import com.twitter.tweetypie.util.ConversationControls -import com.twitter.tweetypie.util.EditControlUtil -import com.twitter.util.Time - -/** - * Used at tweet creation time to determine whether the tweet creation - * request should be considered a duplicate of an existing tweet. - */ -object DuplicateTweetFinder { - - /** - * Return the ids of any tweets that are found to be duplicates of - * this request. - */ - type Type = RequestInfo => Future[Option[TweetId]] - - final case class Settings( - // The number of tweets that are loaded from the user's timeline - // for the heuristic duplicate check - numTweetsToCheck: Int, - // The oldest that a tweet can be to still be considered a - // duplicate by the heuristic duplicate check - maxDuplicateAge: Duration) - - // Takes a ConversationControl from a Tweet and converts to the equivalent - // TweetCreateConversationControl. Note: this is a lossy conversion because the - // ConversationControl contains additional data from the Tweet. - def toTweetCreateConversationControl( - conversationControl: ConversationControl - ): TweetCreateConversationControl = - conversationControl match { - case ConversationControl.ByInvitation( - ConversationControlByInvitation(_, _, inviteViaMention)) => - ConversationControls.Create.byInvitation(inviteViaMention) - case ConversationControl.Community(ConversationControlCommunity(_, _, inviteViaMention)) => - ConversationControls.Create.community(inviteViaMention) - case ConversationControl.Followers(ConversationControlFollowers(_, _, inviteViaMention)) => - ConversationControls.Create.followers(inviteViaMention) - case _ => throw new IllegalArgumentException - } - - /** - * The parts of the request that we need in order to perform - * duplicate detection. - */ - final case class RequestInfo( - userId: UserId, - isNarrowcast: Boolean, - isNullcast: Boolean, - text: String, - replyToTweetId: Option[TweetId], - mediaUploadIds: Seq[MediaId], - cardReference: Option[CardReference], - conversationControl: Option[TweetCreateConversationControl], - underlyingCreativesContainer: Option[CreativesContainerId], - editOptions: Option[EditOptions] = None, - noteTweetOptions: Option[NoteTweetOptions] = None) { - - def isDuplicateOf(tweet: Tweet, oldestAcceptableTimestamp: Time): Boolean = { - val createdAt = getTimestamp(tweet) - val isDuplicateText = text == getText(tweet) - val isDuplicateReplyToTweetId = replyToTweetId == getReply(tweet).flatMap(_.inReplyToStatusId) - val isDuplicateMedia = getMedia(tweet).map(_.mediaId) == mediaUploadIds - val isDuplicateCardReference = getCardReference(tweet) == cardReference - val isDuplicateConversationControl = - tweet.conversationControl.map(toTweetCreateConversationControl) == conversationControl - val isDuplicateConversationContainerId = { - tweet.underlyingCreativesContainerId == underlyingCreativesContainer - } - - val isDuplicateIfEditRequest = if (editOptions.isDefined) { - // We do not count an incoming edit request as creating a duplicate tweet if: - // 1) The tweet that is considered a duplicate is a previous version of this tweet OR - // 2) The tweet that is considered a duplicate is otherwise stale. - val tweetEditChain = tweet.editControl match { - case Some(EditControl.Initial(initial)) => - initial.editTweetIds - case Some(EditControl.Edit(edit)) => - edit.editControlInitial.map(_.editTweetIds).getOrElse(Nil) - case _ => Nil - } - val tweetIsAPreviousVersion = - editOptions.map(_.previousTweetId).exists(tweetEditChain.contains) - - val tweetIsStale = EditControlUtil.isLatestEdit(tweet.editControl, tweet.id) match { - case Return(false) => true - case _ => false - } - - !(tweetIsStale || tweetIsAPreviousVersion) - } else { - // If not an edit request, this condition is true as duplication checking is not blocked - true - } - - // Note that this does not prevent you from tweeting the same - // image twice with different text, or the same text twice with - // different images, because if you upload the same media twice, - // we will store two copies of it, each with a different media - // URL and thus different t.co URL, and since the text that - // we're checking here has that t.co URL added to it already, it - // is necessarily different. - // - // We shouldn't have to check the user id or whether it's a - // retweet, because we loaded the tweets from the user's - // (non-retweet) timelines, but it doesn't hurt and protects - // against possible future changes. - (oldestAcceptableTimestamp <= createdAt) && - getShare(tweet).isEmpty && - (getUserId(tweet) == userId) && - isDuplicateText && - isDuplicateReplyToTweetId && - isDuplicateMedia && - isDuplicateCardReference && - isDuplicateConversationControl && - isDuplicateConversationContainerId && - isDuplicateIfEditRequest && - noteTweetOptions.isEmpty // Skip duplicate checks for NoteTweets - } - } - - object RequestInfo { - - /** - * Extract the information relevant to the DuplicateTweetFinder - * from the PostTweetRequest. - */ - def fromPostTweetRequest(req: PostTweetRequest, processedText: String): RequestInfo = - RequestInfo( - userId = req.userId, - isNarrowcast = req.narrowcast.nonEmpty, - isNullcast = req.nullcast, - text = processedText, - replyToTweetId = req.inReplyToTweetId, - mediaUploadIds = req.mediaUploadIds.getOrElse[Seq[MediaId]](Seq.empty), - cardReference = req.additionalFields.flatMap(_.cardReference), - conversationControl = req.conversationControl, - underlyingCreativesContainer = req.underlyingCreativesContainerId, - editOptions = req.editOptions, - noteTweetOptions = req.noteTweetOptions - ) - } - - /** - * Encapsulates the external interactions that we need to do for - * duplicate checking. - */ - trait TweetSource { - def loadTweets(tweetIds: Seq[TweetId]): Future[Seq[Tweet]] - def loadUserTimelineIds(userId: UserId, maxCount: Int): Future[Seq[TweetId]] - def loadNarrowcastTimelineIds(userId: UserId, maxCount: Int): Future[Seq[TweetId]] - } - - object TweetSource { - - /** - * Use the provided services to access tweets. - */ - def fromServices( - tweetRepo: TweetRepository.Optional, - getStatusTimeline: TimelineService.GetStatusTimeline - ): TweetSource = - new TweetSource { - // only fields needed by RequestInfo.isDuplicateOf() - private[this] val tweetQueryOption = - TweetQuery.Options( - TweetQuery.Include( - tweetFields = Set( - Tweet.CoreDataField.id, - Tweet.MediaField.id, - Tweet.ConversationControlField.id, - Tweet.EditControlField.id - ), - pastedMedia = true - ) - ) - - private[this] def loadTimeline(query: tls.TimelineQuery): Future[Seq[Long]] = - getStatusTimeline(Seq(query)).map(_.head.entries.map(_.statusId)) - - override def loadUserTimelineIds(userId: UserId, maxCount: Int): Future[Seq[Long]] = - loadTimeline( - tls.TimelineQuery( - timelineType = tls.TimelineType.User, - timelineId = userId, - maxCount = maxCount.toShort - ) - ) - - override def loadNarrowcastTimelineIds(userId: UserId, maxCount: Int): Future[Seq[Long]] = - loadTimeline( - tls.TimelineQuery( - timelineType = tls.TimelineType.Narrowcasted, - timelineId = userId, - maxCount = maxCount.toShort - ) - ) - - override def loadTweets(tweetIds: Seq[TweetId]): Future[Seq[Tweet]] = - if (tweetIds.isEmpty) { - Future.value(Seq[Tweet]()) - } else { - Stitch - .run( - Stitch.traverse(tweetIds) { tweetId => tweetRepo(tweetId, tweetQueryOption) } - ) - .map(_.flatten) - } - } - } - - def apply(settings: Settings, tweetSource: TweetSource): Type = { reqInfo => - if (reqInfo.isNullcast) { - // iff nullcast, we bypass duplication logic all together - Future.None - } else { - val oldestAcceptableTimestamp = Time.now - settings.maxDuplicateAge - val userTweetIdsFut = - tweetSource.loadUserTimelineIds(reqInfo.userId, settings.numTweetsToCheck) - - // Check the narrowcast timeline iff this is a narrowcasted tweet - val narrowcastTweetIdsFut = - if (reqInfo.isNarrowcast) { - tweetSource.loadNarrowcastTimelineIds(reqInfo.userId, settings.numTweetsToCheck) - } else { - Future.value(Seq.empty) - } - - for { - userTweetIds <- userTweetIdsFut - narrowcastTweetIds <- narrowcastTweetIdsFut - candidateTweets <- tweetSource.loadTweets(userTweetIds ++ narrowcastTweetIds) - } yield candidateTweets.find(reqInfo.isDuplicateOf(_, oldestAcceptableTimestamp)).map(_.id) - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditControlBuilder.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditControlBuilder.docx new file mode 100644 index 000000000..e9efb2f5d Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditControlBuilder.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditControlBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditControlBuilder.scala deleted file mode 100644 index d3baa0ae0..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditControlBuilder.scala +++ /dev/null @@ -1,361 +0,0 @@ -package com.twitter.tweetypie -package handler - -import com.twitter.expandodo.thriftscala.Card2RequestOptions -import com.twitter.featureswitches.v2.FeatureSwitchResults -import com.twitter.gizmoduck.util.UserUtil -import com.twitter.stitch.Stitch -import com.twitter.tweetypie.core.TweetCreateFailure -import com.twitter.tweetypie.repository.Card2Repository -import com.twitter.tweetypie.repository.StratoPromotedTweetRepository -import com.twitter.tweetypie.repository.StratoSubscriptionVerificationRepository -import com.twitter.tweetypie.repository.TweetQuery -import com.twitter.tweetypie.repository.TweetRepository -import com.twitter.tweetypie.repository.UrlCard2Key -import com.twitter.tweetypie.thriftscala.EditControl -import com.twitter.tweetypie.thriftscala.EditOptions -import com.twitter.tweetypie.thriftscala.TweetCreateState -import com.twitter.tweetypie.util.EditControlUtil._ -import com.twitter.tweetypie.thriftscala.CardReference -import com.twitter.tweetypie.thriftscala.EditControlInitial -import com.twitter.tweetypie.thriftscala.PostTweetRequest -import com.twitter.tweetypie.util.CommunityAnnotation -import com.twitter.tweetypie.util.EditControlUtil -import com.twitter.util.Future - -object EditControlBuilder { - type Type = Request => Future[Option[EditControl]] - - val editTweetCountStat = "edit_tweet_count" - val editControlQueryOptions = TweetQuery.Options( - TweetQuery.Include(Set(Tweet.CoreDataField.id, Tweet.EditControlField.id)) - ) - val TweetEditCreationEnabledKey = "tweet_edit_creation_enabled" - val TweetEditCreationEnabledForTwitterBlueKey = "tweet_edit_creation_enabled_for_twitter_blue" - - val pollCardNames: Set[String] = Set( - "poll2choice_text_only", - "poll3choice_text_only", - "poll4choice_text_only", - "poll2choice_image", - "poll3choice_image", - "poll4choice_image", - "poll2choice_video", - "poll3choice_video", - "poll4choice_video", - ) - - /** Used just for checking card name for poll check in case cards platform key not provided. */ - val defaultCardsPlatformKey = "iPhone-13" - - /** - * Do we assume a Tweet has a poll (which makes it not editable) when it has a card - * that could be a poll, and it cannot be resolved at create. - */ - val isPollCardAssumption = true - - val tweetEditSubscriptionResource = "feature/tweet_edit" - - val log: Logger = Logger(getClass) - - case class Request( - postTweetRequest: PostTweetRequest, - tweet: Tweet, - matchedResults: Option[FeatureSwitchResults]) { - def editOptions: Option[EditOptions] = postTweetRequest.editOptions - - def authorId: UserId = postTweetRequest.userId - - def createdAt: Time = Time.fromMilliseconds(tweet.coreData.get.createdAtSecs * 1000L) - - def tweetId: TweetId = tweet.id - - def cardReference: Option[CardReference] = - postTweetRequest.additionalFields.flatMap(_.cardReference) - - def cardsPlatformKey: Option[String] = - postTweetRequest.hydrationOptions.flatMap(_.cardsPlatformKey) - } - - def apply( - tweetRepo: TweetRepository.Type, - card2Repo: Card2Repository.Type, - promotedTweetRepo: StratoPromotedTweetRepository.Type, - subscriptionVerificationRepo: StratoSubscriptionVerificationRepository.Type, - disablePromotedTweetEdit: Gate[Unit], - checkTwitterBlueSubscription: Gate[Unit], - setEditWindowToSixtyMinutes: Gate[Unit], - stats: StatsReceiver - ): Type = { - - // Nullcast tweets not allowed, except if the tweet has a community annotation - def isNullcastedButNotCommunityTweet(request: Request): Boolean = { - - val isNullcasted: Boolean = request.tweet.coreData.get.nullcast - - val communityIds: Option[Seq[CommunityId]] = - request.postTweetRequest.additionalFields - .flatMap(CommunityAnnotation.additionalFieldsToCommunityIDs) - - isNullcasted && !(communityIds.exists(_.nonEmpty)) - } - - def isSuperFollow(tweet: Tweet): Boolean = tweet.exclusiveTweetControl.isDefined - - def isCollabTweet(tweet: Tweet): Boolean = tweet.collabControl.isDefined - - def isReplyToTweet(tweet: Tweet): Boolean = - getReply(tweet).flatMap(_.inReplyToStatusId).isDefined - - // When card is tombstone, tweet is not considered a poll, and therefore can be edit eligible. - val cardReferenceUriIsTombstone = stats.counter("edit_control_builder_card_tombstoned") - // We check whether tweets are polls since these are not edit eligible. - // If we are not sure due to lookup failure, we take an `isPollCardAssumption`. - def isPoll( - card2Repo: Card2Repository.Type, - cardReference: CardReference, - cardsPlatformKey: String, - ): Stitch[Boolean] = { - if (cardReference.cardUri == "tombstone://card") { - cardReferenceUriIsTombstone.incr() - Stitch.value(false) - } else { - val key = UrlCard2Key(cardReference.cardUri) - // `allowNonTcoUrls = true` This allows us to check if non-tco urls (e.g. apple.com) have a card - // at this point in tweet builder urls can be in their original form and not tcoified. - val options = Card2RequestOptions( - platformKey = cardsPlatformKey, - allowNonTcoUrls = true - ) - card2Repo(key, options) - .map(card2 => pollCardNames.contains(card2.name)) - } - } - - def isFeatureSwitchEnabled(matchedResults: Option[FeatureSwitchResults], key: String): Boolean = - matchedResults.flatMap(_.getBoolean(key, shouldLogImpression = false)).contains(true) - - def wrapInitial(initial: EditControlInitial): Option[EditControl.Initial] = - Some(EditControl.Initial(initial = initial)) - - // Checks for validity of an edit are implemented as procedures - // that throw an error in case a check fails. This composes way better than - // returning a Try/Future/Stitch because: - // 1. We do not need to decide which of the aforementioned containers to use. - // 2. The checks as below compose with callbacks in all the aforementioned containers. - - val editRequestOutsideOfAllowlist = stats.counter("edit_control_builder_rejected", "allowlist") - - // This method uses two feature switches: - // - TweetEditCreationEnabledKey authorizes the user to edit tweets directly - // - TweetEditCreationEnabledForTwitterBlueKey authorizes the user to edit tweets if they have - // a Twitter Blue subscription - // - // Test users are always authorized to edit tweets. - def checkUserEligibility( - authorId: UserId, - matchedResults: Option[FeatureSwitchResults] - ): Stitch[Unit] = { - val isTestUser = UserUtil.isTestUserId(authorId) - val authorizedWithoutTwitterBlue = - isFeatureSwitchEnabled(matchedResults, TweetEditCreationEnabledKey) - - if (isTestUser || authorizedWithoutTwitterBlue) { - // If the editing user is a test user or is authorized by the non-Twitter Blue feature - // switch, allow editing. - Stitch.Done - } else { - // Otherwise, check if they're authorized by the Twitter Blue feature switch and if they're - // subscribed to Twitter Blue. - val authorizedWithTwitterBlue: Stitch[Boolean] = - if (checkTwitterBlueSubscription() && - isFeatureSwitchEnabled(matchedResults, TweetEditCreationEnabledForTwitterBlueKey)) { - subscriptionVerificationRepo(authorId, tweetEditSubscriptionResource) - } else Stitch.value(false) - - authorizedWithTwitterBlue.flatMap { authorized => - if (!authorized) { - log.error(s"User ${authorId} unauthorized to edit") - editRequestOutsideOfAllowlist.incr() - Stitch.exception(TweetCreateFailure.State(TweetCreateState.EditTweetUserNotAuthorized)) - } else Stitch.Done - } - } - } - - val editRequestByNonAuthor = stats.counter("edit_control_builder_rejected", "not_author") - def checkAuthor( - authorId: UserId, - previousTweetAuthorId: UserId - ): Unit = { - if (authorId != previousTweetAuthorId) { - editRequestByNonAuthor.incr() - throw TweetCreateFailure.State(TweetCreateState.EditTweetUserNotAuthor) - } - } - - val tweetEditForStaleTweet = stats.counter("edit_control_builder_rejected", "stale") - def checkLatestEdit( - previousTweetId: TweetId, - initial: EditControlInitial, - ): Unit = { - if (previousTweetId != initial.editTweetIds.last) { - tweetEditForStaleTweet.incr() - throw TweetCreateFailure.State(TweetCreateState.EditTweetNotLatestVersion) - } - } - - val tweetEditForLimitReached = stats.counter("edit_control_builder_rejected", "edits_limit") - def checkEditsRemaining(initial: EditControlInitial): Unit = { - initial.editsRemaining match { - case Some(number) if number > 0 => // OK - case _ => - tweetEditForLimitReached.incr() - throw TweetCreateFailure.State(TweetCreateState.EditCountLimitReached) - } - } - - val editTweetExpired = stats.counter("edit_control_builder_rejected", "expired") - val editTweetExpiredNoEditControl = - stats.counter("edit_control_builder_rejected", "expired", "no_edit_control") - def checkEditTimeWindow(initial: EditControlInitial): Unit = { - initial.editableUntilMsecs match { - case Some(millis) if Time.now < Time.fromMilliseconds(millis) => // OK - case Some(_) => - editTweetExpired.incr() - throw TweetCreateFailure.State(TweetCreateState.EditTimeLimitReached) - case editable => - editTweetExpired.incr() - if (editable.isEmpty) { - editTweetExpiredNoEditControl.incr() - } - throw TweetCreateFailure.State(TweetCreateState.EditTimeLimitReached) - } - } - - val tweetEditNotEligible = stats.counter("edit_control_builder_rejected", "not_eligible") - def checkIsEditEligible(initial: EditControlInitial): Unit = { - initial.isEditEligible match { - case Some(true) => // OK - case _ => - tweetEditNotEligible.incr() - throw TweetCreateFailure.State(TweetCreateState.NotEligibleForEdit) - } - } - - val editControlInitialMissing = - stats.counter("edit_control_builder_rejected", "initial_missing") - def findEditControlInitial(previousTweet: Tweet): EditControlInitial = { - previousTweet.editControl match { - case Some(EditControl.Initial(initial)) => initial - case Some(EditControl.Edit(edit)) => - edit.editControlInitial.getOrElse { - editControlInitialMissing.incr() - throw new IllegalStateException( - "Encountered edit tweet with missing editControlInitial.") - } - case _ => - throw TweetCreateFailure.State(TweetCreateState.EditTimeLimitReached) - } - } - - val editPromotedTweet = stats.counter("tweet_edit_for_promoted_tweet") - def checkPromotedTweet( - previousTweetId: TweetId, - promotedTweetRepo: StratoPromotedTweetRepository.Type, - disablePromotedTweetEdit: Gate[Unit] - ): Stitch[Unit] = { - if (disablePromotedTweetEdit()) { - promotedTweetRepo(previousTweetId).flatMap { - case false => - Stitch.Done - case true => - editPromotedTweet.incr() - Stitch.exception(TweetCreateFailure.State(TweetCreateState.EditTweetUserNotAuthorized)) - } - } else { - Stitch.Done - } - } - - // Each time edit is made, count how many versions a tweet already has. - // Value should be always between 1 and 4. - val editTweetCount = 0 - .to(EditControlUtil.maxTweetEditsAllowed) - .map(i => i -> stats.counter("edit_control_builder_edits_count", i.toString)) - .toMap - // Overall counter and failures of card resolution for poll lookups. Needed because polls are not editable. - val pollCardResolutionTotal = stats.counter("edit_control_builder_card_resolution", "total") - val pollCardResolutionFailure = - stats.counter("edit_control_builder_card_resolution", "failures") - // Edit of initial tweet requested, and all edit checks successful. - val initialEditTweet = stats.counter("edit_control_builder_initial_edit") - request => - Stitch.run { - request.editOptions match { - case None => - val editControl = - makeEditControlInitial( - tweetId = request.tweetId, - createdAt = request.createdAt, - setEditWindowToSixtyMinutes = setEditWindowToSixtyMinutes - ).initial.copy( - isEditEligible = Some( - !isNullcastedButNotCommunityTweet(request) - && !isSuperFollow(request.tweet) - && !isCollabTweet(request.tweet) - && !isReplyToTweet(request.tweet) - ), - ) - (editControl.isEditEligible, request.cardReference) match { - case (Some(true), Some(reference)) => - pollCardResolutionTotal.incr() - isPoll( - card2Repo = card2Repo, - cardReference = reference, - cardsPlatformKey = request.cardsPlatformKey.getOrElse(defaultCardsPlatformKey), - ).rescue { - // Revert to the assumed value if card cannot be resolved. - case _ => - pollCardResolutionFailure.incr() - Stitch.value(isPollCardAssumption) - } - .map { tweetIsAPoll => - wrapInitial(editControl.copy(isEditEligible = Some(!tweetIsAPoll))) - } - case _ => Stitch.value(wrapInitial(editControl)) - } - case Some(editOptions) => - for { - (previousTweet, _, _) <- Stitch.join( - tweetRepo(editOptions.previousTweetId, editControlQueryOptions), - checkPromotedTweet( - editOptions.previousTweetId, - promotedTweetRepo, - disablePromotedTweetEdit), - checkUserEligibility( - authorId = request.authorId, - matchedResults = request.matchedResults) - ) - } yield { - val initial = findEditControlInitial(previousTweet) - checkAuthor( - authorId = request.authorId, - previousTweetAuthorId = getUserId(previousTweet)) - editTweetCount - .get(initial.editTweetIds.size) - .orElse(editTweetCount.get(EditControlUtil.maxTweetEditsAllowed)) - .foreach(counter => counter.incr()) - checkLatestEdit(previousTweet.id, initial) - checkEditsRemaining(initial) - checkEditTimeWindow(initial) - checkIsEditEligible(initial) - if (initial.editTweetIds == Seq(previousTweet.id)) { - initialEditTweet.incr() - } - Some(editControlEdit(initialTweetId = initial.editTweetIds.head)) - } - } - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditValidator.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditValidator.docx new file mode 100644 index 000000000..31a5414a5 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditValidator.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditValidator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditValidator.scala deleted file mode 100644 index 0177996ec..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditValidator.scala +++ /dev/null @@ -1,137 +0,0 @@ -package com.twitter.tweetypie -package handler - -import com.twitter.scrooge.schema.scrooge.scala.CompiledScroogeDefBuilder -import com.twitter.scrooge.schema.scrooge.scala.CompiledScroogeValueExtractor -import com.twitter.scrooge.schema.tree.DefinitionTraversal -import com.twitter.scrooge.schema.tree.FieldPath -import com.twitter.scrooge.schema.{ThriftDefinitions => DEF} -import com.twitter.scrooge_internal.linter.known_annotations.AllowedAnnotationKeys.TweetEditAllowed -import com.twitter.stitch.Stitch -import com.twitter.tweetypie.core.TweetCreateFailure -import com.twitter.tweetypie.repository.TweetQuery.Options -import com.twitter.tweetypie.repository.TweetQuery -import com.twitter.tweetypie.repository.TweetRepository -import com.twitter.tweetypie.thriftscala.ConversationControl -import com.twitter.tweetypie.thriftscala.TweetCreateState.FieldEditNotAllowed -import com.twitter.tweetypie.thriftscala.TweetCreateState.InitialTweetNotFound -import com.twitter.tweetypie.thriftscala.EditOptions -import com.twitter.tweetypie.thriftscala.Tweet -import com.twitter.util.Future -import com.twitter.util.logging.Logger - -/** - * This class constructs a validator `Tweet => Future[Unit]` which - * takes a new edit tweet and performs some validations. Specifically, it - * - * 1) ensures that no uneditable fields were edited. Uneditable fields are marked - * on the tweet.thrift using the thrift annotation "tweetEditAllowed=false". - * By default, fields with no annotation are treated as editable. - * - * 2) ensures that the conversationControl field (which is editable) remains the - * same type, e.g. a ConversationControl.ByInvitation doesn't change to a - * ConversationControl.Community. - * - * If either of these validations fail, the validator fails with a `FieldEditNotAllowed` - * tweet create state. - */ -object EditValidator { - type Type = (Tweet, Option[EditOptions]) => Future[Unit] - - val log: Logger = Logger(getClass) - - // An object that describes the tweet thrift, used to walk a tweet object looking - // for annotated fields. - val TweetDef = CompiledScroogeDefBuilder.build[Tweet].asInstanceOf[DEF.StructDef] - - // Collect the `FieldPath` for any nested tweet field with a uneditable field annotation - // that is set to false. These are the fields that this validator ensures cannot be edited. - val uneditableFieldPaths: Seq[FieldPath] = { - DefinitionTraversal().collect(TweetDef) { - case (d: DEF.FieldDef, path) if (d.annotations.get(TweetEditAllowed).contains("false")) => - path - } - } - - // A tweet query options which includes - // - any top level tweet field which either is an uneditable field, or contains an uneditable - // subfield. - // - the conversationControl field - // These fields must be present on the initial tweet in order for us to compare them against the - // edit tweet. - val previousTweetQueryOptions = { - // A set of the top level field ids for each (potentially nested) uneditable field. - val topLevelUneditableTweetFields = uneditableFieldPaths.map(_.ids.head).toSet - Options( - TweetQuery.Include( - tweetFields = topLevelUneditableTweetFields + Tweet.ConversationControlField.id - )) - } - - def validateUneditableFields(previousTweet: Tweet, editTweet: Tweet): Unit = { - // Collect uneditable fields that were edited - val invalidEditedFields = uneditableFieldPaths.flatMap { fieldPath => - val previousValue = - FieldPath.lensGet(CompiledScroogeValueExtractor, previousTweet, fieldPath) - val editValue = FieldPath.lensGet(CompiledScroogeValueExtractor, editTweet, fieldPath) - - if (previousValue != editValue) { - Some(fieldPath.toString) - } else { - None - } - } - - if (invalidEditedFields.nonEmpty) { - // If any inequalities are found, log them and return an exception. - val msg = "uneditable fields were edited: " + invalidEditedFields.mkString(",") - log.error(msg) - throw TweetCreateFailure.State(FieldEditNotAllowed, Some(msg)) - } - } - - def validateConversationControl( - previous: Option[ConversationControl], - edit: Option[ConversationControl] - ): Unit = { - import ConversationControl.ByInvitation - import ConversationControl.Community - import ConversationControl.Followers - - (previous, edit) match { - case (None, None) => () - case (Some(ByInvitation(_)), Some(ByInvitation(_))) => () - case (Some(Community(_)), Some(Community(_))) => () - case (Some(Followers(_)), Some(Followers(_))) => () - case (_, _) => - val msg = "conversationControl type was edited" - log.error(msg) - throw TweetCreateFailure.State(FieldEditNotAllowed, Some(msg)) - } - } - - def apply(tweetRepo: TweetRepository.Optional): Type = { (tweet, editOptions) => - Stitch.run( - editOptions match { - case Some(EditOptions(previousTweetId)) => { - // Query for the previous tweet so that we can compare the - // fields between the two tweets. - tweetRepo(previousTweetId, previousTweetQueryOptions).map { - case Some(previousTweet) => - validateUneditableFields(previousTweet, tweet) - validateConversationControl( - previousTweet.conversationControl, - tweet.conversationControl) - case _ => - // If the previous tweet is not found we cannot perform validations that - // compare tweet fields and we have to fail tweet creation. - throw TweetCreateFailure.State(InitialTweetNotFound) - } - } - // This is the case where this isn't an edit tweet (since editOptions = None) - // Since this tweet is not an edit there are no fields to validate. - case _ => Stitch.Unit - } - ) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EraseUserTweetsHandler.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EraseUserTweetsHandler.docx new file mode 100644 index 000000000..295d00532 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EraseUserTweetsHandler.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EraseUserTweetsHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EraseUserTweetsHandler.scala deleted file mode 100644 index 64441439b..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EraseUserTweetsHandler.scala +++ /dev/null @@ -1,102 +0,0 @@ -package com.twitter.tweetypie -package handler - -import com.twitter.finagle.stats.Stat -import com.twitter.flockdb.client._ -import com.twitter.servo.util.FutureArrow -import com.twitter.tweetypie.thriftscala._ - -trait EraseUserTweetsHandler { - - val eraseUserTweetsRequest: FutureArrow[EraseUserTweetsRequest, Unit] - - val asyncEraseUserTweetsRequest: FutureArrow[AsyncEraseUserTweetsRequest, Unit] -} - -/** - * This library allows you to erase all of a users's tweets. It's used to clean up - * tweets after a user deletes their account. - */ -object EraseUserTweetsHandler { - - /** - * Build a FutureEffect which, when called, deletes one page worth of tweets at the - * specified flock cursor. When the page of tweets has been deleted another asyncEraseUserTweets - * request is made with the updated cursor location so that the next page of tweets can be processed. - */ - def apply( - selectPage: FutureArrow[Select[StatusGraph], PageResult[Long]], - deleteTweet: FutureEffect[(TweetId, UserId)], - asyncEraseUserTweets: FutureArrow[AsyncEraseUserTweetsRequest, Unit], - stats: StatsReceiver, - sleep: () => Future[Unit] = () => Future.Unit - ): EraseUserTweetsHandler = - new EraseUserTweetsHandler { - val latencyStat: Stat = stats.stat("latency_ms") - val deletedTweetsStat: Stat = stats.stat("tweets_deleted_for_erased_user") - - val selectUserTweets: AsyncEraseUserTweetsRequest => Select[StatusGraph] = - (request: AsyncEraseUserTweetsRequest) => - UserTimelineGraph - .from(request.userId) - .withCursor(Cursor(request.flockCursor)) - - // For a provided list of tweetIds, delete each one sequentially, sleeping between each call - // This is a rate limiting mechanism to slow down deletions. - def deletePage(page: PageResult[Long], expectedUserId: UserId): Future[Unit] = - page.entries.foldLeft(Future.Unit) { (previousFuture, nextId) => - for { - _ <- previousFuture - _ <- sleep() - _ <- deleteTweet((nextId, expectedUserId)) - } yield () - } - - /** - * If we aren't on the last page, make another EraseUserTweets request to delete - * the next page of tweets - */ - val nextRequestOrEnd: (AsyncEraseUserTweetsRequest, PageResult[Long]) => Future[Unit] = - (request: AsyncEraseUserTweetsRequest, page: PageResult[Long]) => - if (page.nextCursor.isEnd) { - latencyStat.add(Time.fromMilliseconds(request.startTimestamp).untilNow.inMillis) - deletedTweetsStat.add(request.tweetCount + page.entries.size) - Future.Unit - } else { - asyncEraseUserTweets( - request.copy( - flockCursor = page.nextCursor.value, - tweetCount = request.tweetCount + page.entries.size - ) - ) - } - - override val eraseUserTweetsRequest: FutureArrow[EraseUserTweetsRequest, Unit] = - FutureArrow { request => - asyncEraseUserTweets( - AsyncEraseUserTweetsRequest( - userId = request.userId, - flockCursor = Cursor.start.value, - startTimestamp = Time.now.inMillis, - tweetCount = 0L - ) - ) - } - - override val asyncEraseUserTweetsRequest: FutureArrow[AsyncEraseUserTweetsRequest, Unit] = - FutureArrow { request => - for { - _ <- sleep() - - // get one page of tweets - page <- selectPage(selectUserTweets(request)) - - // delete tweets - _ <- deletePage(page, request.userId) - - // make call to delete the next page of tweets - _ <- nextRequestOrEnd(request, page) - } yield () - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GeoBuilder.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GeoBuilder.docx new file mode 100644 index 000000000..9b6ff073e Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GeoBuilder.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GeoBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GeoBuilder.scala deleted file mode 100644 index 19cbbded0..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GeoBuilder.scala +++ /dev/null @@ -1,137 +0,0 @@ -package com.twitter.tweetypie -package handler - -import com.twitter.finagle.stats.Counter -import com.twitter.stitch.Stitch -import com.twitter.tweetypie.repository.PlaceKey -import com.twitter.tweetypie.repository.PlaceRepository -import com.twitter.tweetypie.serverutil.ExceptionCounter -import com.twitter.tweetypie.thriftscala._ - -object GeoStats { - val topTenCountryCodes: Set[PlaceLanguage] = - Set("US", "JP", "GB", "ID", "BR", "SA", "TR", "MX", "ES", "CA") - - def apply(stats: StatsReceiver): Effect[Option[Place]] = { - val totalCount = stats.counter("total") - val notFoundCount = stats.counter("not_found") - val countryStats: Map[String, Counter] = - topTenCountryCodes.map(cc => cc -> stats.scope("with_country_code").counter(cc)).toMap - - val placeTypeStats: Map[PlaceType, Counter] = - Map( - PlaceType.Admin -> stats.counter("admin"), - PlaceType.City -> stats.counter("city"), - PlaceType.Country -> stats.counter("country"), - PlaceType.Neighborhood -> stats.counter("neighborhood"), - PlaceType.Poi -> stats.counter("poi"), - PlaceType.Unknown -> stats.counter("unknown") - ) - - Effect.fromPartial { - case Some(place) => { - totalCount.incr() - placeTypeStats(place.`type`).incr() - place.countryCode.foreach(cc => countryStats.get(cc).foreach(_.incr())) - } - case None => notFoundCount.incr() - } - } -} - -object GeoBuilder { - case class Request(createGeo: TweetCreateGeo, userGeoEnabled: Boolean, language: String) - - case class Result(geoCoordinates: Option[GeoCoordinates], placeId: Option[PlaceId]) - - type Type = FutureArrow[Request, Result] - - def apply(placeRepo: PlaceRepository.Type, rgc: ReverseGeocoder, stats: StatsReceiver): Type = { - val exceptionCounters = ExceptionCounter(stats) - - def ignoreFailures[A](future: Future[Option[A]]): Future[Option[A]] = - exceptionCounters(future).handle { case _ => None } - - def isValidPlaceId(placeId: String) = PlaceIdRegex.pattern.matcher(placeId).matches - - def isValidLatLon(latitude: Double, longitude: Double): Boolean = - latitude >= -90.0 && latitude <= 90.0 && - longitude >= -180.0 && longitude <= 180.0 && - // some clients send (0.0, 0.0) for unknown reasons, but this is highly unlikely to be - // valid and should be treated as if no coordinates were sent. if a place Id is provided, - // that will still be used. - (latitude != 0.0 || longitude != 0.0) - - // Count the number of times we erase geo information based on user preferences. - val geoErasedCounter = stats.counter("geo_erased") - // Count the number of times we override a user's preferences and add geo anyway. - val geoOverriddenCounter = stats.counter("geo_overridden") - - val geoScope = stats.scope("create_geotagged_tweet") - - // Counter for geo tweets with neither lat lon nor place id data - val noGeoCounter = geoScope.counter("no_geo_info") - val invalidCoordinates = geoScope.counter("invalid_coordinates") - val inValidPlaceId = geoScope.counter("invalid_place_id") - val latlonStatsEffect = GeoStats(geoScope.scope("from_latlon")) - val placeIdStatsEffect = GeoStats(geoScope.scope("from_place_id")) - - def validateCoordinates(coords: GeoCoordinates): Option[GeoCoordinates] = - if (isValidLatLon(coords.latitude, coords.longitude)) Some(coords) - else { - invalidCoordinates.incr() - None - } - - def validatePlaceId(placeId: String): Option[String] = - if (isValidPlaceId(placeId)) Some(placeId) - else { - inValidPlaceId.incr() - None - } - - def getPlaceByRGC(coordinates: GeoCoordinates, language: String): Future[Option[Place]] = - ignoreFailures( - rgc((coordinates, language)).onSuccess(latlonStatsEffect) - ) - - def getPlaceById(placeId: String, language: String): Future[Option[Place]] = - ignoreFailures( - Stitch - .run(placeRepo(PlaceKey(placeId, language)).liftNotFoundToOption) - .onSuccess(placeIdStatsEffect) - ) - - FutureArrow[Request, Result] { request => - val createGeo = request.createGeo - val allowGeo = createGeo.overrideUserGeoSetting || request.userGeoEnabled - val overrideGeo = createGeo.overrideUserGeoSetting && !request.userGeoEnabled - - if (createGeo.placeId.isEmpty && createGeo.coordinates.isEmpty) { - noGeoCounter.incr() - Future.value(Result(None, None)) - } else if (!allowGeo) { - // Record that we had geo information but had to erase it based on user preferences. - geoErasedCounter.incr() - Future.value(Result(None, None)) - } else { - if (overrideGeo) geoOverriddenCounter.incr() - - // treat invalidate coordinates the same as no-coordinates - val validatedCoordinates = createGeo.coordinates.flatMap(validateCoordinates) - val validatedPlaceId = createGeo.placeId.flatMap(validatePlaceId) - - for { - place <- (createGeo.placeId, validatedPlaceId, validatedCoordinates) match { - // if the request contains an invalid place id, we want to return None for the - // place instead of reverse-geocoding the coordinates - case (Some(_), None, _) => Future.None - case (_, Some(placeId), _) => getPlaceById(placeId, request.language) - case (_, _, Some(coords)) => getPlaceByRGC(coords, request.language) - case _ => Future.None - } - } yield Result(validatedCoordinates, place.map(_.id)) - } - } - } -}