[docx] split commit for file 6000

Signed-off-by: Ari Archer <ari.web.xyz@gmail.com>
This commit is contained in:
Ari Archer 2024-01-23 19:20:29 +02:00
parent dedacccd1f
commit dbcd08179c
No known key found for this signature in database
GPG Key ID: A50D5B4B599AF8A2
400 changed files with 0 additions and 20528 deletions

View File

@ -1,119 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.core.InternalServerError
import com.twitter.tweetypie.core.OverCapacity
import com.twitter.tweetypie.storage.Response.TweetResponseCode
import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet
import com.twitter.tweetypie.storage.DeleteState
import com.twitter.tweetypie.storage.DeletedTweetResponse
import com.twitter.tweetypie.storage.RateLimited
import com.twitter.tweetypie.storage.TweetStorageClient
import com.twitter.tweetypie.thriftscala._
/**
* Allow access to raw, unhydrated deleted tweet fields from storage backends (currently Manhattan)
*/
object GetDeletedTweetsHandler {
type Type = FutureArrow[GetDeletedTweetsRequest, Seq[GetDeletedTweetResult]]
type TweetsExist = Seq[TweetId] => Stitch[Set[TweetId]]
def processTweetResponse(response: Try[GetTweet.Response]): Stitch[Option[Tweet]] = {
import GetTweet.Response._
response match {
case Return(Found(tweet)) => Stitch.value(Some(tweet))
case Return(Deleted | NotFound | BounceDeleted(_)) => Stitch.None
case Throw(_: RateLimited) => Stitch.exception(OverCapacity("manhattan"))
case Throw(exception) => Stitch.exception(exception)
}
}
def convertDeletedTweetResponse(
r: DeletedTweetResponse,
extantIds: Set[TweetId]
): GetDeletedTweetResult = {
val id = r.tweetId
if (extantIds.contains(id) || r.deleteState == DeleteState.NotDeleted) {
GetDeletedTweetResult(id, DeletedTweetState.NotDeleted)
} else {
r.overallResponse match {
case TweetResponseCode.Success =>
GetDeletedTweetResult(id, convertState(r.deleteState), r.tweet)
case TweetResponseCode.OverCapacity => throw OverCapacity("manhattan")
case _ =>
throw InternalServerError(
s"Unhandled response ${r.overallResponse} from getDeletedTweets for tweet $id"
)
}
}
}
def convertState(d: DeleteState): DeletedTweetState = d match {
case DeleteState.NotFound => DeletedTweetState.NotFound
case DeleteState.NotDeleted => DeletedTweetState.NotDeleted
case DeleteState.SoftDeleted => DeletedTweetState.SoftDeleted
// Callers of this endpoint treat BounceDeleted tweets the same as SoftDeleted
case DeleteState.BounceDeleted => DeletedTweetState.SoftDeleted
case DeleteState.HardDeleted => DeletedTweetState.HardDeleted
}
/**
* Converts [[TweetStorageClient.GetTweet]] into a FutureArrow that returns extant tweet ids from
* the original list. This method is used to check underlying storage againt cache, preferring
* cache if a tweet exists there.
*/
def tweetsExist(getTweet: TweetStorageClient.GetTweet): TweetsExist =
(tweetIds: Seq[TweetId]) =>
for {
response <- Stitch.traverse(tweetIds) { tweetId => getTweet(tweetId).liftToTry }
tweets <- Stitch.collect(response.map(processTweetResponse))
} yield tweets.flatten.map(_.id).toSet.filter(tweetIds.contains)
def apply(
getDeletedTweets: TweetStorageClient.GetDeletedTweets,
tweetsExist: TweetsExist,
stats: StatsReceiver
): Type = {
val notFound = stats.counter("not_found")
val notDeleted = stats.counter("not_deleted")
val softDeleted = stats.counter("soft_deleted")
val hardDeleted = stats.counter("hard_deleted")
val unknown = stats.counter("unknown")
def trackState(results: Seq[GetDeletedTweetResult]): Unit =
results.foreach { r =>
r.state match {
case DeletedTweetState.NotFound => notFound.incr()
case DeletedTweetState.NotDeleted => notDeleted.incr()
case DeletedTweetState.SoftDeleted => softDeleted.incr()
case DeletedTweetState.HardDeleted => hardDeleted.incr()
case _ => unknown.incr()
}
}
FutureArrow { request =>
Stitch.run {
Stitch
.join(
getDeletedTweets(request.tweetIds),
tweetsExist(request.tweetIds)
)
.map {
case (deletedTweetResponses, extantIds) =>
val responseIds = deletedTweetResponses.map(_.tweetId)
assert(
responseIds == request.tweetIds,
s"getDeletedTweets response does not match order of request: Request ids " +
s"(${request.tweetIds.mkString(", ")}) != response ids (${responseIds
.mkString(", ")})"
)
deletedTweetResponses.map { r => convertDeletedTweetResponse(r, extantIds) }
}
}
}
}
}

View File

@ -1,188 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.flockdb.client.Cursor
import com.twitter.flockdb.client.PageResult
import com.twitter.flockdb.client.Select
import com.twitter.flockdb.client.StatusGraph
import com.twitter.flockdb.client.UserTimelineGraph
import com.twitter.flockdb.client.thriftscala.EdgeState
import com.twitter.snowflake.id.SnowflakeId
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.storage.TweetStorageClient
import com.twitter.tweetypie.storage.TweetStorageClient.GetStoredTweet
import com.twitter.tweetypie.thriftscala.GetStoredTweetsByUserOptions
import com.twitter.tweetypie.thriftscala.GetStoredTweetsByUserRequest
import com.twitter.tweetypie.thriftscala.GetStoredTweetsByUserResult
import com.twitter.tweetypie.thriftscala.GetStoredTweetsOptions
import com.twitter.tweetypie.thriftscala.GetStoredTweetsRequest
object GetStoredTweetsByUserHandler {
type Type = FutureArrow[GetStoredTweetsByUserRequest, GetStoredTweetsByUserResult]
def apply(
getStoredTweetsHandler: GetStoredTweetsHandler.Type,
getStoredTweet: TweetStorageClient.GetStoredTweet,
selectPage: FutureArrow[Select[StatusGraph], PageResult[Long]],
maxPages: Int
): Type = {
FutureArrow { request =>
val options = request.options.getOrElse(GetStoredTweetsByUserOptions())
val startTimeMsec: Long = options.startTimeMsec.getOrElse(0L)
val endTimeMsec: Long = options.endTimeMsec.getOrElse(Time.now.inMillis)
val cursor = options.cursor.map(Cursor(_)).getOrElse {
if (options.startFromOldest) Cursor.lowest else Cursor.highest
}
getNextTweetIdsInTimeRange(
request.userId,
startTimeMsec,
endTimeMsec,
cursor,
selectPage,
getStoredTweet,
maxPages,
numTries = 0
).flatMap {
case (tweetIds, cursor) =>
val getStoredTweetsRequest = toGetStoredTweetsRequest(tweetIds, request.userId, options)
getStoredTweetsHandler(getStoredTweetsRequest)
.map { getStoredTweetsResults =>
GetStoredTweetsByUserResult(
storedTweets = getStoredTweetsResults.map(_.storedTweet),
cursor = if (cursor.isEnd) None else Some(cursor.value)
)
}
}
}
}
private def toGetStoredTweetsRequest(
tweetIds: Seq[TweetId],
userId: UserId,
getStoredTweetsByUserOptions: GetStoredTweetsByUserOptions
): GetStoredTweetsRequest = {
val options: GetStoredTweetsOptions = GetStoredTweetsOptions(
bypassVisibilityFiltering = getStoredTweetsByUserOptions.bypassVisibilityFiltering,
forUserId = if (getStoredTweetsByUserOptions.setForUserId) Some(userId) else None,
additionalFieldIds = getStoredTweetsByUserOptions.additionalFieldIds
)
GetStoredTweetsRequest(
tweetIds = tweetIds,
options = Some(options)
)
}
private def getNextTweetIdsInTimeRange(
userId: UserId,
startTimeMsec: Long,
endTimeMsec: Long,
cursor: Cursor,
selectPage: FutureArrow[Select[StatusGraph], PageResult[Long]],
getStoredTweet: TweetStorageClient.GetStoredTweet,
maxPages: Int,
numTries: Int
): Future[(Seq[TweetId], Cursor)] = {
val select = Select(
sourceId = userId,
graph = UserTimelineGraph,
stateIds =
Some(Seq(EdgeState.Archived.value, EdgeState.Positive.value, EdgeState.Removed.value))
).withCursor(cursor)
def inTimeRange(timestamp: Long): Boolean =
timestamp >= startTimeMsec && timestamp <= endTimeMsec
def pastTimeRange(timestamps: Seq[Long]) = {
if (cursor.isAscending) {
timestamps.max > endTimeMsec
} else {
timestamps.min < startTimeMsec
}
}
val pageResultFuture: Future[PageResult[Long]] = selectPage(select)
pageResultFuture.flatMap { pageResult =>
val groupedIds = pageResult.entries.groupBy(SnowflakeId.isSnowflakeId)
val nextCursor = if (cursor.isAscending) pageResult.previousCursor else pageResult.nextCursor
// Timestamps for the creation of Tweets with snowflake IDs can be calculated from the IDs
// themselves.
val snowflakeIdsTimestamps: Seq[(Long, Long)] = groupedIds.getOrElse(true, Seq()).map { id =>
val snowflakeTimeMillis = SnowflakeId.unixTimeMillisFromId(id)
(id, snowflakeTimeMillis)
}
// For non-snowflake Tweets, we need to fetch the Tweet data from Manhattan to see when the
// Tweet was created.
val nonSnowflakeIdsTimestamps: Future[Seq[(Long, Long)]] = Stitch.run(
Stitch
.traverse(groupedIds.getOrElse(false, Seq()))(getStoredTweet)
.map {
_.flatMap {
case GetStoredTweet.Response.FoundAny(tweet, _, _, _, _) => {
if (tweet.coreData.exists(_.createdAtSecs > 0)) {
Some((tweet.id, tweet.coreData.get.createdAtSecs))
} else None
}
case _ => None
}
})
nonSnowflakeIdsTimestamps.flatMap { nonSnowflakeList =>
val allTweetIdsAndTimestamps = snowflakeIdsTimestamps ++ nonSnowflakeList
val filteredTweetIds = allTweetIdsAndTimestamps
.filter {
case (_, ts) => inTimeRange(ts)
}
.map(_._1)
if (nextCursor.isEnd) {
// We've considered the last Tweet for this User. There are no more Tweets to return.
Future.value((filteredTweetIds, Cursor.end))
} else if (allTweetIdsAndTimestamps.nonEmpty &&
pastTimeRange(allTweetIdsAndTimestamps.map(_._2))) {
// At least one Tweet returned from Tflock has a timestamp past our time range, i.e.
// greater than the end time (if we're fetching in an ascending order) or lower than the
// start time (if we're fetching in a descending order). There is no point in looking at
// any more Tweets from this User as they'll all be outside the time range.
Future.value((filteredTweetIds, Cursor.end))
} else if (filteredTweetIds.isEmpty) {
// We're here because one of two things happened:
// 1. allTweetIdsAndTimestamps is empty: Either Tflock has returned an empty page of Tweets
// or we weren't able to fetch timestamps for any of the Tweets Tflock returned. In this
// case, we fetch the next page of Tweets.
// 2. allTweetIdsAndTimestamps is non-empty but filteredTweetIds is empty: The current page
// has no Tweets inside the requested time range. We fetch the next page of Tweets and
// try again.
// If we hit the limit for the maximum number of pages from tflock to be requested, we
// return an empty list of Tweets with the cursor for the caller to try again.
if (numTries == maxPages) {
Future.value((filteredTweetIds, nextCursor))
} else {
getNextTweetIdsInTimeRange(
userId = userId,
startTimeMsec = startTimeMsec,
endTimeMsec = endTimeMsec,
cursor = nextCursor,
selectPage = selectPage,
getStoredTweet = getStoredTweet,
maxPages = maxPages,
numTries = numTries + 1
)
}
} else {
// filteredTweetIds is non-empty: There are some Tweets in this page that are within the
// requested time range, and we aren't out of the time range yet. We return the Tweets we
// have and set the cursor forward for the next request.
Future.value((filteredTweetIds, nextCursor))
}
}
}
}
}

View File

@ -1,161 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.core.StoredTweetResult._
import com.twitter.tweetypie.core.StoredTweetResult
import com.twitter.tweetypie.core.TweetResult
import com.twitter.tweetypie.FieldId
import com.twitter.tweetypie.FutureArrow
import com.twitter.tweetypie.repository.CacheControl
import com.twitter.tweetypie.repository.TweetQuery
import com.twitter.tweetypie.repository.TweetResultRepository
import com.twitter.tweetypie.thriftscala.{BounceDeleted => BounceDeletedState}
import com.twitter.tweetypie.thriftscala.{ForceAdded => ForceAddedState}
import com.twitter.tweetypie.thriftscala.GetStoredTweetsRequest
import com.twitter.tweetypie.thriftscala.GetStoredTweetsOptions
import com.twitter.tweetypie.thriftscala.GetStoredTweetsResult
import com.twitter.tweetypie.thriftscala.{HardDeleted => HardDeletedState}
import com.twitter.tweetypie.thriftscala.{NotFound => NotFoundState}
import com.twitter.tweetypie.thriftscala.{SoftDeleted => SoftDeletedState}
import com.twitter.tweetypie.thriftscala.StatusCounts
import com.twitter.tweetypie.thriftscala.StoredTweetError
import com.twitter.tweetypie.thriftscala.StoredTweetInfo
import com.twitter.tweetypie.thriftscala.StoredTweetState
import com.twitter.tweetypie.thriftscala.{Undeleted => UndeletedState}
object GetStoredTweetsHandler {
type Type = FutureArrow[GetStoredTweetsRequest, Seq[GetStoredTweetsResult]]
def apply(tweetRepo: TweetResultRepository.Type): Type = {
FutureArrow[GetStoredTweetsRequest, Seq[GetStoredTweetsResult]] { request =>
val requestOptions: GetStoredTweetsOptions =
request.options.getOrElse(GetStoredTweetsOptions())
val queryOptions = toTweetQueryOptions(requestOptions)
val result = Stitch
.traverse(request.tweetIds) { tweetId =>
tweetRepo(tweetId, queryOptions)
.map(toStoredTweetInfo)
.map(GetStoredTweetsResult(_))
.handle {
case _ =>
GetStoredTweetsResult(
StoredTweetInfo(
tweetId = tweetId,
errors = Seq(StoredTweetError.FailedFetch)
)
)
}
}
Stitch.run(result)
}
}
private def toTweetQueryOptions(options: GetStoredTweetsOptions): TweetQuery.Options = {
val countsFields: Set[FieldId] = Set(
StatusCounts.FavoriteCountField.id,
StatusCounts.ReplyCountField.id,
StatusCounts.RetweetCountField.id,
StatusCounts.QuoteCountField.id
)
TweetQuery.Options(
include = GetTweetsHandler.BaseInclude.also(
tweetFields = Set(Tweet.CountsField.id) ++ options.additionalFieldIds,
countsFields = countsFields
),
cacheControl = CacheControl.NoCache,
enforceVisibilityFiltering = !options.bypassVisibilityFiltering,
forUserId = options.forUserId,
requireSourceTweet = false,
fetchStoredTweets = true
)
}
private def toStoredTweetInfo(tweetResult: TweetResult): StoredTweetInfo = {
def translateErrors(errors: Seq[StoredTweetResult.Error]): Seq[StoredTweetError] = {
errors.map {
case StoredTweetResult.Error.Corrupt => StoredTweetError.Corrupt
case StoredTweetResult.Error.FieldsMissingOrInvalid =>
StoredTweetError.FieldsMissingOrInvalid
case StoredTweetResult.Error.ScrubbedFieldsPresent => StoredTweetError.ScrubbedFieldsPresent
case StoredTweetResult.Error.ShouldBeHardDeleted => StoredTweetError.ShouldBeHardDeleted
}
}
val tweetData = tweetResult.value
tweetData.storedTweetResult match {
case Some(storedTweetResult) => {
val (tweet, storedTweetState, errors) = storedTweetResult match {
case Present(errors, _) => (Some(tweetData.tweet), None, translateErrors(errors))
case HardDeleted(softDeletedAtMsec, hardDeletedAtMsec) =>
(
Some(tweetData.tweet),
Some(
StoredTweetState.HardDeleted(
HardDeletedState(softDeletedAtMsec, hardDeletedAtMsec))),
Seq()
)
case SoftDeleted(softDeletedAtMsec, errors, _) =>
(
Some(tweetData.tweet),
Some(StoredTweetState.SoftDeleted(SoftDeletedState(softDeletedAtMsec))),
translateErrors(errors)
)
case BounceDeleted(deletedAtMsec, errors, _) =>
(
Some(tweetData.tweet),
Some(StoredTweetState.BounceDeleted(BounceDeletedState(deletedAtMsec))),
translateErrors(errors)
)
case Undeleted(undeletedAtMsec, errors, _) =>
(
Some(tweetData.tweet),
Some(StoredTweetState.Undeleted(UndeletedState(undeletedAtMsec))),
translateErrors(errors)
)
case ForceAdded(addedAtMsec, errors, _) =>
(
Some(tweetData.tweet),
Some(StoredTweetState.ForceAdded(ForceAddedState(addedAtMsec))),
translateErrors(errors)
)
case Failed(errors) => (None, None, translateErrors(errors))
case NotFound => (None, Some(StoredTweetState.NotFound(NotFoundState())), Seq())
}
StoredTweetInfo(
tweetId = tweetData.tweet.id,
tweet = tweet.map(sanitizeNullMediaFields),
storedTweetState = storedTweetState,
errors = errors
)
}
case None =>
StoredTweetInfo(
tweetId = tweetData.tweet.id,
tweet = Some(sanitizeNullMediaFields(tweetData.tweet))
)
}
}
private def sanitizeNullMediaFields(tweet: Tweet): Tweet = {
// Some media fields are initialized as `null` at the storage layer.
// If the Tweet is meant to be hard deleted, or is not hydrated for
// some other reason but the media entities still exist, we sanitize
// these fields to allow serialization.
tweet.copy(media = tweet.media.map(_.map { mediaEntity =>
mediaEntity.copy(
url = Option(mediaEntity.url).getOrElse(""),
mediaUrl = Option(mediaEntity.mediaUrl).getOrElse(""),
mediaUrlHttps = Option(mediaEntity.mediaUrlHttps).getOrElse(""),
displayUrl = Option(mediaEntity.displayUrl).getOrElse(""),
expandedUrl = Option(mediaEntity.expandedUrl).getOrElse(""),
)
}))
}
}

View File

@ -1,44 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.servo.util.FutureArrow
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.repository._
import com.twitter.tweetypie.thriftscala._
/**
* Handler for the `getTweetCounts` endpoint.
*/
object GetTweetCountsHandler {
type Type = FutureArrow[GetTweetCountsRequest, Seq[GetTweetCountsResult]]
def apply(repo: TweetCountsRepository.Type): Type = {
def idToResult(id: TweetId, req: GetTweetCountsRequest): Stitch[GetTweetCountsResult] =
Stitch
.join(
// .liftToOption() converts any failures to None result
if (req.includeRetweetCount) repo(RetweetsKey(id)).liftToOption() else Stitch.None,
if (req.includeReplyCount) repo(RepliesKey(id)).liftToOption() else Stitch.None,
if (req.includeFavoriteCount) repo(FavsKey(id)).liftToOption() else Stitch.None,
if (req.includeQuoteCount) repo(QuotesKey(id)).liftToOption() else Stitch.None,
if (req.includeBookmarkCount) repo(BookmarksKey(id)).liftToOption() else Stitch.None
).map {
case (retweetCount, replyCount, favoriteCount, quoteCount, bookmarkCount) =>
GetTweetCountsResult(
tweetId = id,
retweetCount = retweetCount,
replyCount = replyCount,
favoriteCount = favoriteCount,
quoteCount = quoteCount,
bookmarkCount = bookmarkCount
)
}
FutureArrow[GetTweetCountsRequest, Seq[GetTweetCountsResult]] { request =>
Stitch.run(
Stitch.traverse(request.tweetIds)(idToResult(_, request))
)
}
}
}

View File

@ -1,395 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.container.thriftscala.MaterializeAsTweetFieldsRequest
import com.twitter.context.TestingSignalsContext
import com.twitter.servo.util.FutureArrow
import com.twitter.spam.rtf.thriftscala.FilteredReason
import com.twitter.spam.rtf.thriftscala.SafetyLevel
import com.twitter.stitch.NotFound
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.core.FilteredState
import com.twitter.tweetypie.core._
import com.twitter.tweetypie.repository.DeletedTweetVisibilityRepository
import com.twitter.tweetypie.repository._
import com.twitter.tweetypie.thriftscala.TweetFieldsResultState
import com.twitter.tweetypie.thriftscala._
/**
* Handler for the `getTweetFields` endpoint.
*/
object GetTweetFieldsHandler {
type Type = GetTweetFieldsRequest => Future[Seq[GetTweetFieldsResult]]
def apply(
tweetRepo: TweetResultRepository.Type,
deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type,
containerAsGetTweetFieldsResultRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType,
stats: StatsReceiver,
shouldMaterializeContainers: Gate[Unit]
): Type = {
FutureArrow[GetTweetFieldsRequest, Seq[GetTweetFieldsResult]] { request =>
val queryOptions = toTweetQueryOptions(request.options)
Stitch.run(
Stitch.traverse(request.tweetIds) { id =>
tweetRepo(id, queryOptions).liftToTry.flatMap { tweetResult =>
toGetTweetFieldsResult(
id,
tweetResult,
request.options,
deletedTweetVisibilityRepo,
containerAsGetTweetFieldsResultRepo,
stats,
shouldMaterializeContainers
)
}
}
)
}
}
/**
* Converts a `GetTweetFieldsOptions` into an internal `TweetQuery.Options`.
*/
def toTweetQueryOptions(options: GetTweetFieldsOptions): TweetQuery.Options = {
val includes = options.tweetIncludes
val shouldSkipCache = TestingSignalsContext().flatMap(_.simulateBackPressure).nonEmpty
val cacheControl =
if (shouldSkipCache) CacheControl.NoCache
else if (options.doNotCache) CacheControl.ReadOnlyCache
else CacheControl.ReadWriteCache
TweetQuery.Options(
include = TweetQuery
.Include(
tweetFields = includes.collect {
case TweetInclude.TweetFieldId(id) => id
case TweetInclude.CountsFieldId(_) => Tweet.CountsField.id
case TweetInclude.MediaEntityFieldId(_) => Tweet.MediaField.id
}.toSet,
countsFields = includes.collect { case TweetInclude.CountsFieldId(id) => id }.toSet,
mediaFields = includes.collect { case TweetInclude.MediaEntityFieldId(id) => id }.toSet,
quotedTweet = options.includeQuotedTweet,
pastedMedia = true
).also(
/**
* Always fetching underlying creatives container id. see
* [[hydrateCreativeContainerBackedTweet]] for more detail.
*/
tweetFields = Seq(Tweet.UnderlyingCreativesContainerIdField.id)
),
cacheControl = cacheControl,
enforceVisibilityFiltering = options.visibilityPolicy == TweetVisibilityPolicy.UserVisible,
safetyLevel = options.safetyLevel.getOrElse(SafetyLevel.FilterNone),
forUserId = options.forUserId,
languageTag = options.languageTag.getOrElse("en"),
cardsPlatformKey = options.cardsPlatformKey,
extensionsArgs = options.extensionsArgs,
forExternalConsumption = true,
simpleQuotedTweet = options.simpleQuotedTweet
)
}
def toGetTweetFieldsResult(
tweetId: TweetId,
res: Try[TweetResult],
options: GetTweetFieldsOptions,
deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type,
containerAsGetTweetFieldsResultRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType,
stats: StatsReceiver,
shouldMaterializeContainers: Gate[Unit]
): Stitch[GetTweetFieldsResult] = {
val measureRacyReads: TweetId => Unit = trackLossyReadsAfterWrite(
stats.stat("racy_reads", "get_tweet_fields"),
Duration.fromSeconds(3)
)
res match {
case Throw(NotFound) =>
measureRacyReads(tweetId)
Stitch.value(GetTweetFieldsResult(tweetId, NotFoundResultState))
case Throw(ex) =>
val resultStateStitch = failureResultState(ex) match {
case notFoundResultState @ TweetFieldsResultState.NotFound(_) =>
deletedTweetVisibilityRepo(
DeletedTweetVisibilityRepository.VisibilityRequest(
ex,
tweetId,
options.safetyLevel,
options.forUserId,
isInnerQuotedTweet = false
)
).map(withVisibilityFilteredReason(notFoundResultState, _))
case res => Stitch.value(res)
}
resultStateStitch.map(res => GetTweetFieldsResult(tweetId, res))
case Return(r) =>
toTweetFieldsResult(
r,
options,
deletedTweetVisibilityRepo,
containerAsGetTweetFieldsResultRepo,
stats,
shouldMaterializeContainers
).flatMap { getTweetFieldsResult =>
hydrateCreativeContainerBackedTweet(
r.value.tweet.underlyingCreativesContainerId,
getTweetFieldsResult,
options,
containerAsGetTweetFieldsResultRepo,
tweetId,
stats,
shouldMaterializeContainers
)
}
}
}
private def failureResultState(ex: Throwable): TweetFieldsResultState =
ex match {
case FilteredState.Unavailable.TweetDeleted => DeletedResultState
case FilteredState.Unavailable.BounceDeleted => BounceDeletedResultState
case FilteredState.Unavailable.SourceTweetNotFound(d) => notFoundResultState(deleted = d)
case FilteredState.Unavailable.Author.NotFound => NotFoundResultState
case fs: FilteredState.HasFilteredReason => toFilteredState(fs.filteredReason)
case OverCapacity(_) => toFailedState(overcapacity = true, None)
case _ => toFailedState(overcapacity = false, Some(ex.toString))
}
private val NotFoundResultState = TweetFieldsResultState.NotFound(TweetFieldsResultNotFound())
private val DeletedResultState = TweetFieldsResultState.NotFound(
TweetFieldsResultNotFound(deleted = true)
)
private val BounceDeletedResultState = TweetFieldsResultState.NotFound(
TweetFieldsResultNotFound(deleted = true, bounceDeleted = true)
)
def notFoundResultState(deleted: Boolean): TweetFieldsResultState.NotFound =
if (deleted) DeletedResultState else NotFoundResultState
private def toFailedState(
overcapacity: Boolean,
message: Option[String]
): TweetFieldsResultState =
TweetFieldsResultState.Failed(TweetFieldsResultFailed(overcapacity, message))
private def toFilteredState(reason: FilteredReason): TweetFieldsResultState =
TweetFieldsResultState.Filtered(
TweetFieldsResultFiltered(reason = reason)
)
/**
* Converts a `TweetResult` into a `GetTweetFieldsResult`. For retweets, missing or filtered source
* tweets cause the retweet to be treated as missing or filtered.
*/
private def toTweetFieldsResult(
tweetResult: TweetResult,
options: GetTweetFieldsOptions,
deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type,
creativesContainerRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType,
stats: StatsReceiver,
shouldMaterializeContainers: Gate[Unit]
): Stitch[GetTweetFieldsResult] = {
val primaryResultState = toTweetFieldsResultState(tweetResult, options)
val quotedResultStateStitch = primaryResultState match {
case TweetFieldsResultState.Found(_) if options.includeQuotedTweet =>
val tweetData = tweetResult.value.sourceTweetResult
.getOrElse(tweetResult)
.value
tweetData.quotedTweetResult
.map {
case QuotedTweetResult.NotFound => Stitch.value(NotFoundResultState)
case QuotedTweetResult.Filtered(state) =>
val resultState = failureResultState(state)
(tweetData.tweet.quotedTweet, resultState) match {
//When QT exists => contribute VF filtered reason to result state
case (Some(qt), notFoundResultState @ TweetFieldsResultState.NotFound(_)) =>
deletedTweetVisibilityRepo(
DeletedTweetVisibilityRepository.VisibilityRequest(
state,
qt.tweetId,
options.safetyLevel,
options.forUserId,
isInnerQuotedTweet = true
)
).map(withVisibilityFilteredReason(notFoundResultState, _))
//When QT is absent => result state without filtered reason
case _ => Stitch.value(resultState)
}
case QuotedTweetResult.Found(res) =>
Stitch
.value(toTweetFieldsResultState(res, options))
.flatMap { resultState =>
hydrateCreativeContainerBackedTweet(
creativesContainerId = res.value.tweet.underlyingCreativesContainerId,
originalGetTweetFieldsResult = GetTweetFieldsResult(
tweetId = res.value.tweet.id,
tweetResult = resultState,
),
getTweetFieldsRequestOptions = options,
creativesContainerRepo = creativesContainerRepo,
res.value.tweet.id,
stats,
shouldMaterializeContainers
)
}
.map(_.tweetResult)
}
//Quoted tweet result not requested
case _ => None
}
quotedResultStateStitch
.map(qtStitch => qtStitch.map(Some(_)))
.getOrElse(Stitch.None)
.map(qtResult =>
GetTweetFieldsResult(
tweetId = tweetResult.value.tweet.id,
tweetResult = primaryResultState,
quotedTweetResult = qtResult
))
}
/**
* @return a copy of resultState with filtered reason when @param filteredReasonOpt is present
*/
private def withVisibilityFilteredReason(
resultState: TweetFieldsResultState.NotFound,
filteredReasonOpt: Option[FilteredReason]
): TweetFieldsResultState.NotFound = {
filteredReasonOpt match {
case Some(fs) =>
resultState.copy(
notFound = resultState.notFound.copy(
filteredReason = Some(fs)
))
case _ => resultState
}
}
private def toTweetFieldsResultState(
tweetResult: TweetResult,
options: GetTweetFieldsOptions
): TweetFieldsResultState = {
val tweetData = tweetResult.value
val suppressReason = tweetData.suppress.map(_.filteredReason)
val tweetFailedFields = tweetResult.state.failedFields
val sourceTweetFailedFields =
tweetData.sourceTweetResult.map(_.state.failedFields).getOrElse(Set())
val sourceTweetOpt = tweetData.sourceTweetResult.map(_.value.tweet)
val sourceTweetSuppressReason =
tweetData.sourceTweetResult.flatMap(_.value.suppress.map(_.filteredReason))
val isTweetPartial = tweetFailedFields.nonEmpty || sourceTweetFailedFields.nonEmpty
val tweetFoundResult = tweetData.sourceTweetResult match {
case None =>
// if `sourceTweetResult` is empty, this isn't a retweet
TweetFieldsResultFound(
tweet = tweetData.tweet,
suppressReason = suppressReason
)
case Some(r) =>
// if the source tweet result state is Found, merge that into the primary result
TweetFieldsResultFound(
tweet = tweetData.tweet,
retweetedTweet = sourceTweetOpt.filter(_ => options.includeRetweetedTweet),
suppressReason = suppressReason.orElse(sourceTweetSuppressReason)
)
}
if (isTweetPartial) {
TweetFieldsResultState.Failed(
TweetFieldsResultFailed(
overCapacity = false,
message = Some(
"Failed to load: " + (tweetFailedFields ++ sourceTweetFailedFields).mkString(", ")),
partial = Some(
TweetFieldsPartial(
found = tweetFoundResult,
missingFields = tweetFailedFields,
sourceTweetMissingFields = sourceTweetFailedFields
)
)
)
)
} else {
TweetFieldsResultState.Found(
tweetFoundResult
)
}
}
/**
* if tweet data is backed by creatives container, it'll be hydrated from creatives
* container service.
*/
private def hydrateCreativeContainerBackedTweet(
creativesContainerId: Option[Long],
originalGetTweetFieldsResult: GetTweetFieldsResult,
getTweetFieldsRequestOptions: GetTweetFieldsOptions,
creativesContainerRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType,
tweetId: Long,
stats: StatsReceiver,
shouldMaterializeContainers: Gate[Unit]
): Stitch[GetTweetFieldsResult] = {
// creatives container backed tweet stats
val ccTweetMaterialized = stats.scope("creatives_container", "get_tweet_fields")
val ccTweetMaterializeRequests = ccTweetMaterialized.counter("requests")
val ccTweetMaterializeSuccess = ccTweetMaterialized.counter("success")
val ccTweetMaterializeFailed = ccTweetMaterialized.counter("failed")
val ccTweetMaterializeFiltered = ccTweetMaterialized.scope("filtered")
(
creativesContainerId,
originalGetTweetFieldsResult.tweetResult,
getTweetFieldsRequestOptions.disableTweetMaterialization,
shouldMaterializeContainers()
) match {
// 1. creatives container backed tweet is determined by `underlyingCreativesContainerId` field presence.
// 2. if the frontend tweet is suppressed by any reason, respect that and not do this hydration.
// (this logic can be revisited and improved further)
case (None, _, _, _) =>
Stitch.value(originalGetTweetFieldsResult)
case (Some(_), _, _, false) =>
ccTweetMaterializeFiltered.counter("decider_suppressed").incr()
Stitch.value {
GetTweetFieldsResult(
tweetId = tweetId,
tweetResult = TweetFieldsResultState.NotFound(TweetFieldsResultNotFound())
)
}
case (Some(containerId), TweetFieldsResultState.Found(_), false, _) =>
ccTweetMaterializeRequests.incr()
val materializationRequest =
MaterializeAsTweetFieldsRequest(containerId, tweetId, Some(originalGetTweetFieldsResult))
creativesContainerRepo(
materializationRequest,
getTweetFieldsRequestOptions
).onSuccess(_ => ccTweetMaterializeSuccess.incr())
.onFailure(_ => ccTweetMaterializeFailed.incr())
.handle {
case ex =>
GetTweetFieldsResult(
tweetId = tweetId,
tweetResult = failureResultState(ex)
)
}
case (Some(_), _, true, _) =>
ccTweetMaterializeFiltered.counter("suppressed").incr()
Stitch.value(
GetTweetFieldsResult(
tweetId = tweetId,
tweetResult = TweetFieldsResultState.NotFound(TweetFieldsResultNotFound())
)
)
case (Some(_), state, _, _) =>
ccTweetMaterializeFiltered.counter(state.getClass.getName).incr()
Stitch.value(originalGetTweetFieldsResult)
}
}
}

View File

@ -1,415 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.container.thriftscala.MaterializeAsTweetRequest
import com.twitter.context.TestingSignalsContext
import com.twitter.servo.exception.thriftscala.ClientError
import com.twitter.servo.exception.thriftscala.ClientErrorCause
import com.twitter.servo.util.FutureArrow
import com.twitter.spam.rtf.thriftscala.FilteredReason
import com.twitter.spam.rtf.thriftscala.SafetyLevel
import com.twitter.stitch.NotFound
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.additionalfields.AdditionalFields
import com.twitter.tweetypie.core._
import com.twitter.tweetypie.repository._
import com.twitter.tweetypie.thriftscala._
/**
* Handler for the `getTweets` endpoint.
*/
object GetTweetsHandler {
type Type = FutureArrow[GetTweetsRequest, Seq[GetTweetResult]]
/**
* A `TweetQuery.Include` instance with options set as the default base options
* for the `getTweets` endpoint.
*/
val BaseInclude: TweetQuery.Include =
TweetQuery.Include(
tweetFields = Set(
Tweet.CoreDataField.id,
Tweet.UrlsField.id,
Tweet.MentionsField.id,
Tweet.MediaField.id,
Tweet.HashtagsField.id,
Tweet.CashtagsField.id,
Tweet.TakedownCountryCodesField.id,
Tweet.TakedownReasonsField.id,
Tweet.DeviceSourceField.id,
Tweet.LanguageField.id,
Tweet.ContributorField.id,
Tweet.QuotedTweetField.id,
Tweet.UnderlyingCreativesContainerIdField.id,
),
pastedMedia = true
)
def apply(
tweetRepo: TweetResultRepository.Type,
creativesContainerRepo: CreativesContainerMaterializationRepository.GetTweetType,
deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type,
stats: StatsReceiver,
shouldMaterializeContainers: Gate[Unit]
): Type = {
FutureArrow[GetTweetsRequest, Seq[GetTweetResult]] { request =>
val requestOptions = request.options.getOrElse(GetTweetOptions())
val invalidAdditionalFields =
requestOptions.additionalFieldIds.filter(!AdditionalFields.isAdditionalFieldId(_))
if (invalidAdditionalFields.nonEmpty) {
Future.exception(
ClientError(
ClientErrorCause.BadRequest,
"Requested additional fields contain invalid field id " +
s"${invalidAdditionalFields.mkString(", ")}. Additional fields ids must be greater than 100."
)
)
} else {
val opts = toTweetQueryOptions(requestOptions)
val measureRacyReads: TweetId => Unit = trackLossyReadsAfterWrite(
stats.stat("racy_reads", "get_tweets"),
Duration.fromSeconds(3)
)
Stitch.run(
Stitch.traverse(request.tweetIds) { id =>
tweetRepo(id, opts).liftToTry
.flatMap {
case Throw(NotFound) =>
measureRacyReads(id)
Stitch.value(GetTweetResult(id, StatusState.NotFound))
case Throw(ex) =>
failureResult(deletedTweetVisibilityRepo, id, requestOptions, ex)
case Return(r) =>
toGetTweetResult(
deletedTweetVisibilityRepo,
creativesContainerRepo,
requestOptions,
tweetResult = r,
includeSourceTweet = requestOptions.includeSourceTweet,
includeQuotedTweet = requestOptions.includeQuotedTweet,
stats,
shouldMaterializeContainers
)
}.flatMap { getTweetResult =>
// check if tweet data is backed by creatives container and needs to be hydrated from creatives
// container service.
hydrateCreativeContainerBackedTweet(
getTweetResult,
requestOptions,
creativesContainerRepo,
stats,
shouldMaterializeContainers
)
}
}
)
}
}
}
def toTweetQueryOptions(options: GetTweetOptions): TweetQuery.Options = {
val shouldSkipCache = TestingSignalsContext().flatMap(_.simulateBackPressure).nonEmpty
val cacheControl =
if (shouldSkipCache) CacheControl.NoCache
else if (options.doNotCache) CacheControl.ReadOnlyCache
else CacheControl.ReadWriteCache
val countsFields = toCountsFields(options)
val mediaFields = toMediaFields(options)
TweetQuery.Options(
include = BaseInclude.also(
tweetFields = toTweetFields(options, countsFields),
countsFields = countsFields,
mediaFields = mediaFields,
quotedTweet = Some(options.includeQuotedTweet)
),
cacheControl = cacheControl,
cardsPlatformKey = options.cardsPlatformKey,
excludeReported = options.excludeReported,
enforceVisibilityFiltering = !options.bypassVisibilityFiltering,
safetyLevel = options.safetyLevel.getOrElse(SafetyLevel.FilterDefault),
forUserId = options.forUserId,
languageTag = options.languageTag,
extensionsArgs = options.extensionsArgs,
forExternalConsumption = true,
simpleQuotedTweet = options.simpleQuotedTweet
)
}
private def toTweetFields(opts: GetTweetOptions, countsFields: Set[FieldId]): Set[FieldId] = {
val bldr = Set.newBuilder[FieldId]
bldr ++= opts.additionalFieldIds
if (opts.includePlaces) bldr += Tweet.PlaceField.id
if (opts.forUserId.nonEmpty) {
if (opts.includePerspectivals) bldr += Tweet.PerspectiveField.id
if (opts.includeConversationMuted) bldr += Tweet.ConversationMutedField.id
}
if (opts.includeCards && opts.cardsPlatformKey.isEmpty) bldr += Tweet.CardsField.id
if (opts.includeCards && opts.cardsPlatformKey.nonEmpty) bldr += Tweet.Card2Field.id
if (opts.includeProfileGeoEnrichment) bldr += Tweet.ProfileGeoEnrichmentField.id
if (countsFields.nonEmpty) bldr += Tweet.CountsField.id
if (opts.includeCardUri) bldr += Tweet.CardReferenceField.id
bldr.result()
}
private def toCountsFields(opts: GetTweetOptions): Set[FieldId] = {
val bldr = Set.newBuilder[FieldId]
if (opts.includeRetweetCount) bldr += StatusCounts.RetweetCountField.id
if (opts.includeReplyCount) bldr += StatusCounts.ReplyCountField.id
if (opts.includeFavoriteCount) bldr += StatusCounts.FavoriteCountField.id
if (opts.includeQuoteCount) bldr += StatusCounts.QuoteCountField.id
bldr.result()
}
private def toMediaFields(opts: GetTweetOptions): Set[FieldId] = {
if (opts.includeMediaAdditionalMetadata)
Set(MediaEntity.AdditionalMetadataField.id)
else
Set.empty
}
/**
* Converts a `TweetResult` into a `GetTweetResult`.
*/
def toGetTweetResult(
deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type,
creativesContainerRepo: CreativesContainerMaterializationRepository.GetTweetType,
options: GetTweetOptions,
tweetResult: TweetResult,
includeSourceTweet: Boolean,
includeQuotedTweet: Boolean,
stats: StatsReceiver,
shouldMaterializeContainers: Gate[Unit]
): Stitch[GetTweetResult] = {
val tweetData = tweetResult.value
// only include missing fields if non empty
def asMissingFields(set: Set[FieldByPath]): Option[Set[FieldByPath]] =
if (set.isEmpty) None else Some(set)
val missingFields = asMissingFields(tweetResult.state.failedFields)
val sourceTweetResult =
tweetData.sourceTweetResult
.filter(_ => includeSourceTweet)
val sourceTweetData = tweetData.sourceTweetResult
.getOrElse(tweetResult)
.value
val quotedTweetResult: Option[QuotedTweetResult] = sourceTweetData.quotedTweetResult
.filter(_ => includeQuotedTweet)
val qtFilteredReasonStitch =
((sourceTweetData.tweet.quotedTweet, quotedTweetResult) match {
case (Some(quotedTweet), Some(QuotedTweetResult.Filtered(filteredState))) =>
deletedTweetVisibilityRepo(
DeletedTweetVisibilityRepository.VisibilityRequest(
filteredState,
quotedTweet.tweetId,
options.safetyLevel,
options.forUserId,
isInnerQuotedTweet = true
)
)
case _ => Stitch.None
})
//Use quotedTweetResult filtered reason when VF filtered reason is not present
.map(fsOpt => fsOpt.orElse(quotedTweetResult.flatMap(_.filteredReason)))
val suppress = tweetData.suppress.orElse(tweetData.sourceTweetResult.flatMap(_.value.suppress))
val quotedTweetStitch: Stitch[Option[Tweet]] =
quotedTweetResult match {
// check if quote tweet is backed by creatives container and needs to be hydrated from creatives
// container service. detail see go/creatives-containers-tdd
case Some(QuotedTweetResult.Found(tweetResult)) =>
hydrateCreativeContainerBackedTweet(
originalGetTweetResult = GetTweetResult(
tweetId = tweetResult.value.tweet.id,
tweetState = StatusState.Found,
tweet = Some(tweetResult.value.tweet)
),
getTweetRequestOptions = options,
creativesContainerRepo = creativesContainerRepo,
stats = stats,
shouldMaterializeContainers
).map(_.tweet)
case _ =>
Stitch.value(
quotedTweetResult
.flatMap(_.toOption)
.map(_.value.tweet)
)
}
Stitch.join(qtFilteredReasonStitch, quotedTweetStitch).map {
case (qtFilteredReason, quotedTweet) =>
GetTweetResult(
tweetId = tweetData.tweet.id,
tweetState =
if (suppress.nonEmpty) StatusState.Suppress
else if (missingFields.nonEmpty) StatusState.Partial
else StatusState.Found,
tweet = Some(tweetData.tweet),
missingFields = missingFields,
filteredReason = suppress.map(_.filteredReason),
sourceTweet = sourceTweetResult.map(_.value.tweet),
sourceTweetMissingFields = sourceTweetResult
.map(_.state.failedFields)
.flatMap(asMissingFields),
quotedTweet = quotedTweet,
quotedTweetMissingFields = quotedTweetResult
.flatMap(_.toOption)
.map(_.state.failedFields)
.flatMap(asMissingFields),
quotedTweetFilteredReason = qtFilteredReason
)
}
}
private[this] val AuthorAccountIsInactive = FilteredReason.AuthorAccountIsInactive(true)
def failureResult(
deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type,
tweetId: TweetId,
options: GetTweetOptions,
ex: Throwable
): Stitch[GetTweetResult] = {
def deletedState(deleted: Boolean, statusState: StatusState) =
if (deleted && options.enableDeletedState) {
statusState
} else {
StatusState.NotFound
}
ex match {
case FilteredState.Unavailable.Author.Deactivated =>
Stitch.value(GetTweetResult(tweetId, StatusState.DeactivatedUser))
case FilteredState.Unavailable.Author.NotFound =>
Stitch.value(GetTweetResult(tweetId, StatusState.NotFound))
case FilteredState.Unavailable.Author.Offboarded =>
Stitch.value(
GetTweetResult(tweetId, StatusState.Drop, filteredReason = Some(AuthorAccountIsInactive)))
case FilteredState.Unavailable.Author.Suspended =>
Stitch.value(GetTweetResult(tweetId, StatusState.SuspendedUser))
case FilteredState.Unavailable.Author.Protected =>
Stitch.value(GetTweetResult(tweetId, StatusState.ProtectedUser))
case FilteredState.Unavailable.Author.Unsafe =>
Stitch.value(GetTweetResult(tweetId, StatusState.Drop))
//Handle delete state with optional FilteredReason
case FilteredState.Unavailable.TweetDeleted =>
deletedTweetVisibilityRepo(
DeletedTweetVisibilityRepository.VisibilityRequest(
ex,
tweetId,
options.safetyLevel,
options.forUserId,
isInnerQuotedTweet = false
)
).map(filteredReasonOpt => {
val deleteState = deletedState(deleted = true, StatusState.Deleted)
GetTweetResult(tweetId, deleteState, filteredReason = filteredReasonOpt)
})
case FilteredState.Unavailable.BounceDeleted =>
deletedTweetVisibilityRepo(
DeletedTweetVisibilityRepository.VisibilityRequest(
ex,
tweetId,
options.safetyLevel,
options.forUserId,
isInnerQuotedTweet = false
)
).map(filteredReasonOpt => {
val deleteState = deletedState(deleted = true, StatusState.BounceDeleted)
GetTweetResult(tweetId, deleteState, filteredReason = filteredReasonOpt)
})
case FilteredState.Unavailable.SourceTweetNotFound(d) =>
deletedTweetVisibilityRepo(
DeletedTweetVisibilityRepository.VisibilityRequest(
ex,
tweetId,
options.safetyLevel,
options.forUserId,
isInnerQuotedTweet = false
)
).map(filteredReasonOpt => {
val deleteState = deletedState(d, StatusState.Deleted)
GetTweetResult(tweetId, deleteState, filteredReason = filteredReasonOpt)
})
case FilteredState.Unavailable.Reported =>
Stitch.value(GetTweetResult(tweetId, StatusState.ReportedTweet))
case fs: FilteredState.HasFilteredReason =>
Stitch.value(
GetTweetResult(tweetId, StatusState.Drop, filteredReason = Some(fs.filteredReason)))
case OverCapacity(_) => Stitch.value(GetTweetResult(tweetId, StatusState.OverCapacity))
case _ => Stitch.value(GetTweetResult(tweetId, StatusState.Failed))
}
}
private def hydrateCreativeContainerBackedTweet(
originalGetTweetResult: GetTweetResult,
getTweetRequestOptions: GetTweetOptions,
creativesContainerRepo: CreativesContainerMaterializationRepository.GetTweetType,
stats: StatsReceiver,
shouldMaterializeContainers: Gate[Unit]
): Stitch[GetTweetResult] = {
// creatives container backed tweet stats
val ccTweetMaterialized = stats.scope("creatives_container", "get_tweets")
val ccTweetMaterializeFiltered = ccTweetMaterialized.scope("filtered")
val ccTweetMaterializeSuccess = ccTweetMaterialized.counter("success")
val ccTweetMaterializeFailed = ccTweetMaterialized.counter("failed")
val ccTweetMaterializeRequests = ccTweetMaterialized.counter("requests")
val tweetId = originalGetTweetResult.tweetId
val tweetState = originalGetTweetResult.tweetState
val underlyingCreativesContainerId =
originalGetTweetResult.tweet.flatMap(_.underlyingCreativesContainerId)
(
tweetState,
underlyingCreativesContainerId,
getTweetRequestOptions.disableTweetMaterialization,
shouldMaterializeContainers()
) match {
// 1. creatives container backed tweet is determined by `underlyingCreativesContainerId` field presence.
// 2. if the frontend tweet is suppressed by any reason, respect that and not do this hydration.
// (this logic can be revisited and improved further)
case (_, None, _, _) =>
Stitch.value(originalGetTweetResult)
case (_, Some(_), _, false) =>
ccTweetMaterializeFiltered.counter("decider_suppressed").incr()
Stitch.value(GetTweetResult(tweetId, StatusState.NotFound))
case (StatusState.Found, Some(containerId), false, _) =>
ccTweetMaterializeRequests.incr()
val materializationRequest =
MaterializeAsTweetRequest(containerId, tweetId, Some(originalGetTweetResult))
creativesContainerRepo(
materializationRequest,
Some(getTweetRequestOptions)
).onSuccess(_ => ccTweetMaterializeSuccess.incr())
.onFailure(_ => ccTweetMaterializeFailed.incr())
.handle {
case _ => GetTweetResult(tweetId, StatusState.Failed)
}
case (_, Some(_), true, _) =>
ccTweetMaterializeFiltered.counter("suppressed").incr()
Stitch.value(GetTweetResult(tweetId, StatusState.NotFound))
case (state, Some(_), _, _) =>
ccTweetMaterializeFiltered.counter(state.name).incr()
Stitch.value(originalGetTweetResult)
}
}
}

View File

@ -1,45 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.servo.exception.thriftscala.ClientError
import com.twitter.servo.exception.thriftscala.ClientErrorCause
import com.twitter.stitch.NotFound
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.core.FilteredState.Unavailable._
private[tweetypie] object HandlerError {
def translateNotFoundToClientError[U](tweetId: TweetId): PartialFunction[Throwable, Stitch[U]] = {
case NotFound =>
Stitch.exception(HandlerError.tweetNotFound(tweetId))
case TweetDeleted | BounceDeleted =>
Stitch.exception(HandlerError.tweetNotFound(tweetId, true))
case SourceTweetNotFound(deleted) =>
Stitch.exception(HandlerError.tweetNotFound(tweetId, deleted))
}
def tweetNotFound(tweetId: TweetId, deleted: Boolean = false): ClientError =
ClientError(
ClientErrorCause.BadRequest,
s"tweet ${if (deleted) "deleted" else "not found"}: $tweetId"
)
def userNotFound(userId: UserId): ClientError =
ClientError(ClientErrorCause.BadRequest, s"user not found: $userId")
def tweetNotFoundException(tweetId: TweetId): Future[Nothing] =
Future.exception(tweetNotFound(tweetId))
def userNotFoundException(userId: UserId): Future[Nothing] =
Future.exception(userNotFound(userId))
def getRequired[A, B](
optionFutureArrow: FutureArrow[A, Option[B]],
notFound: A => Future[B]
): FutureArrow[A, B] =
FutureArrow(key =>
optionFutureArrow(key).flatMap {
case Some(x) => Future.value(x)
case None => notFound(key)
})
}

View File

@ -1,176 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.mediaservices.commons.mediainformation.thriftscala.UserDefinedProductMetadata
import com.twitter.mediaservices.commons.thriftscala.MediaKey
import com.twitter.mediaservices.commons.tweetmedia.thriftscala._
import com.twitter.servo.util.FutureArrow
import com.twitter.tco_util.TcoSlug
import com.twitter.tweetypie.core._
import com.twitter.tweetypie.media._
import com.twitter.tweetypie.serverutil.ExceptionCounter
import com.twitter.tweetypie.thriftscala._
import com.twitter.tweetypie.tweettext.Offset
object CreateMediaTco {
import UpstreamFailure._
case class Request(
tweetId: TweetId,
userId: UserId,
userScreenName: String,
isProtected: Boolean,
createdAt: Time,
isVideo: Boolean,
dark: Boolean)
type Type = FutureArrow[Request, Media.MediaTco]
def apply(urlShortener: UrlShortener.Type): Type =
FutureArrow[Request, Media.MediaTco] { req =>
val expandedUrl = MediaUrl.Permalink(req.userScreenName, req.tweetId, req.isVideo)
val shortenCtx =
UrlShortener.Context(
userId = req.userId,
userProtected = req.isProtected,
tweetId = req.tweetId,
createdAt = req.createdAt,
dark = req.dark
)
urlShortener((expandedUrl, shortenCtx))
.flatMap { metadata =>
metadata.shortUrl match {
case TcoSlug(slug) =>
Future.value(
Media.MediaTco(
expandedUrl,
metadata.shortUrl,
MediaUrl.Display.fromTcoSlug(slug)
)
)
case _ =>
// should never get here, since shortened urls from talon
// always start with "http://t.co/", just in case...
Future.exception(MediaShortenUrlMalformedFailure)
}
}
.rescue {
case UrlShortener.InvalidUrlError =>
// should never get here, since media expandedUrl should always be a valid
// input to talon.
Future.exception(MediaExpandedUrlNotValidFailure)
}
}
}
object MediaBuilder {
private val log = Logger(getClass)
case class Request(
mediaUploadIds: Seq[MediaId],
text: String,
tweetId: TweetId,
userId: UserId,
userScreenName: String,
isProtected: Boolean,
createdAt: Time,
dark: Boolean = false,
productMetadata: Option[Map[MediaId, UserDefinedProductMetadata]] = None)
case class Result(updatedText: String, mediaEntities: Seq[MediaEntity], mediaKeys: Seq[MediaKey])
type Type = FutureArrow[Request, Result]
def apply(
processMedia: MediaClient.ProcessMedia,
createMediaTco: CreateMediaTco.Type,
stats: StatsReceiver
): Type =
FutureArrow[Request, Result] {
case Request(
mediaUploadIds,
text,
tweetId,
userId,
screenName,
isProtected,
createdAt,
dark,
productMetadata
) =>
for {
mediaKeys <- processMedia(
ProcessMediaRequest(
mediaUploadIds,
userId,
tweetId,
isProtected,
productMetadata
)
)
mediaTco <- createMediaTco(
CreateMediaTco.Request(
tweetId,
userId,
screenName,
isProtected,
createdAt,
mediaKeys.exists(MediaKeyClassifier.isVideo(_)),
dark
)
)
} yield produceResult(text, mediaTco, isProtected, mediaKeys)
}.countExceptions(
ExceptionCounter(stats)
)
.onFailure[Request] { (req, ex) => log.info(req.toString, ex) }
.translateExceptions {
case e: MediaExceptions.MediaClientException =>
TweetCreateFailure.State(TweetCreateState.InvalidMedia, Some(e.getMessage))
}
def produceResult(
text: String,
mediaTco: Media.MediaTco,
userIsProtected: Boolean,
mediaKeys: Seq[MediaKey]
): Result = {
val newText =
if (text == "") mediaTco.url
else text + " " + mediaTco.url
val to = Offset.CodePoint.length(newText)
val from = to - Offset.CodePoint.length(mediaTco.url)
val mediaEntities =
mediaKeys.map { mediaKey =>
MediaEntity(
mediaKey = Some(mediaKey),
fromIndex = from.toShort,
toIndex = to.toShort,
url = mediaTco.url,
displayUrl = mediaTco.displayUrl,
expandedUrl = mediaTco.expandedUrl,
mediaId = mediaKey.mediaId,
mediaPath = "", // to be hydrated
mediaUrl = null, // to be hydrated
mediaUrlHttps = null, // to be hydrated
nsfw = false, // deprecated
sizes = Set(
MediaSize(
sizeType = MediaSizeType.Orig,
resizeMethod = MediaResizeMethod.Fit,
deprecatedContentType = MediaKeyUtil.contentType(mediaKey),
width = -1, // to be hydrated
height = -1 // to be hydrated
)
)
)
}
Result(newText, mediaEntities, mediaKeys)
}
}

View File

@ -1,395 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.context.thriftscala.FeatureContext
import com.twitter.tweetypie.backends.LimiterService
import com.twitter.tweetypie.core._
import com.twitter.tweetypie.serverutil.ExceptionCounter
import com.twitter.tweetypie.store.InsertTweet
import com.twitter.tweetypie.thriftscala._
import com.twitter.tweetypie.util.TweetCreationLock.{Key => TweetCreationLockKey}
object PostTweet {
type Type[R] = FutureArrow[R, PostTweetResult]
/**
* A type-class to abstract over tweet creation requests.
*/
trait RequestView[R] {
def isDark(req: R): Boolean
def sourceTweetId(req: R): Option[TweetId]
def options(req: R): Option[WritePathHydrationOptions]
def userId(req: R): UserId
def uniquenessId(req: R): Option[Long]
def returnSuccessOnDuplicate(req: R): Boolean
def returnDuplicateTweet(req: R): Boolean =
returnSuccessOnDuplicate(req) || uniquenessId(req).nonEmpty
def lockKey(req: R): TweetCreationLockKey
def geo(req: R): Option[TweetCreateGeo]
def featureContext(req: R): Option[FeatureContext]
def additionalContext(req: R): Option[collection.Map[TweetCreateContextKey, String]]
def transientContext(req: R): Option[TransientCreateContext]
def additionalFields(req: R): Option[Tweet]
def duplicateState: TweetCreateState
def scope: String
def isNullcast(req: R): Boolean
def creativesContainerId(req: R): Option[CreativesContainerId]
def noteTweetMentionedUserIds(req: R): Option[Seq[Long]]
}
/**
* An implementation of `RequestView` for `PostTweetRequest`.
*/
implicit object PostTweetRequestView extends RequestView[PostTweetRequest] {
def isDark(req: PostTweetRequest): Boolean = req.dark
def sourceTweetId(req: PostTweetRequest): None.type = None
def options(req: PostTweetRequest): Option[WritePathHydrationOptions] = req.hydrationOptions
def userId(req: PostTweetRequest): UserId = req.userId
def uniquenessId(req: PostTweetRequest): Option[Long] = req.uniquenessId
def returnSuccessOnDuplicate(req: PostTweetRequest) = false
def lockKey(req: PostTweetRequest): TweetCreationLockKey = TweetCreationLockKey.byRequest(req)
def geo(req: PostTweetRequest): Option[TweetCreateGeo] = req.geo
def featureContext(req: PostTweetRequest): Option[FeatureContext] = req.featureContext
def additionalContext(
req: PostTweetRequest
): Option[collection.Map[TweetCreateContextKey, String]] = req.additionalContext
def transientContext(req: PostTweetRequest): Option[TransientCreateContext] =
req.transientContext
def additionalFields(req: PostTweetRequest): Option[Tweet] = req.additionalFields
def duplicateState: TweetCreateState.Duplicate.type = TweetCreateState.Duplicate
def scope = "tweet"
def isNullcast(req: PostTweetRequest): Boolean = req.nullcast
def creativesContainerId(req: PostTweetRequest): Option[CreativesContainerId] =
req.underlyingCreativesContainerId
def noteTweetMentionedUserIds(req: PostTweetRequest): Option[Seq[Long]] =
req.noteTweetOptions match {
case Some(noteTweetOptions) => noteTweetOptions.mentionedUserIds
case _ => None
}
}
/**
* An implementation of `RequestView` for `RetweetRequest`.
*/
implicit object RetweetRequestView extends RequestView[RetweetRequest] {
def isDark(req: RetweetRequest): Boolean = req.dark
def sourceTweetId(req: RetweetRequest): None.type = None
def options(req: RetweetRequest): Option[WritePathHydrationOptions] = req.hydrationOptions
def userId(req: RetweetRequest): UserId = req.userId
def uniquenessId(req: RetweetRequest): Option[Long] = req.uniquenessId
def returnSuccessOnDuplicate(req: RetweetRequest): Boolean = req.returnSuccessOnDuplicate
def lockKey(req: RetweetRequest): TweetCreationLockKey =
req.uniquenessId match {
case Some(id) => TweetCreationLockKey.byUniquenessId(req.userId, id)
case None => TweetCreationLockKey.bySourceTweetId(req.userId, req.sourceStatusId)
}
def geo(req: RetweetRequest): None.type = None
def featureContext(req: RetweetRequest): Option[FeatureContext] = req.featureContext
def additionalContext(req: RetweetRequest): None.type = None
def transientContext(req: RetweetRequest): None.type = None
def additionalFields(req: RetweetRequest): Option[Tweet] = req.additionalFields
def duplicateState: TweetCreateState.AlreadyRetweeted.type = TweetCreateState.AlreadyRetweeted
def scope = "retweet"
def isNullcast(req: RetweetRequest): Boolean = req.nullcast
def creativesContainerId(req: RetweetRequest): Option[CreativesContainerId] = None
def noteTweetMentionedUserIds(req: RetweetRequest): Option[Seq[Long]] = None
}
/**
* A `Filter` is used to decorate a `FutureArrow` that has a known return type
* and an input type for which there is a `RequestView` type-class instance.
*/
trait Filter[Res] { self =>
type T[Req] = FutureArrow[Req, Res]
/**
* Wraps a base arrow with additional behavior.
*/
def apply[Req: RequestView](base: T[Req]): T[Req]
/**
* Composes two filter. The resulting filter itself composes FutureArrows.
*/
def andThen(next: Filter[Res]): Filter[Res] =
new Filter[Res] {
def apply[Req: RequestView](base: T[Req]): T[Req] =
next(self(base))
}
}
/**
* This filter attempts to prevent some race-condition related duplicate tweet creations,
* via use of a `TweetCreateLock`. When a duplicate is detected, this filter can synthesize
* a successful `PostTweetResult` if applicable, or return the appropriate coded response.
*/
object DuplicateHandler {
def apply(
tweetCreationLock: TweetCreationLock,
getTweets: GetTweetsHandler.Type,
stats: StatsReceiver
): Filter[PostTweetResult] =
new Filter[PostTweetResult] {
def apply[R: RequestView](base: T[R]): T[R] = {
val view = implicitly[RequestView[R]]
val notFoundCount = stats.counter(view.scope, "not_found")
val foundCounter = stats.counter(view.scope, "found")
FutureArrow.rec[R, PostTweetResult] { self => req =>
val duplicateKey = view.lockKey(req)
// attempts to find the duplicate tweet.
//
// if `returnDupTweet` is true and we find the tweet, then we return a
// successful `PostTweetResult` with that tweet. if we don't find the
// tweet, we throw an `InternalServerError`.
//
// if `returnDupTweet` is false and we find the tweet, then we return
// the appropriate duplicate state. if we don't find the tweet, then
// we unlock the duplicate key and try again.
def duplicate(tweetId: TweetId, returnDupTweet: Boolean) =
findDuplicate(tweetId, req).flatMap {
case Some(postTweetResult) =>
foundCounter.incr()
if (returnDupTweet) Future.value(postTweetResult)
else Future.value(PostTweetResult(state = view.duplicateState))
case None =>
notFoundCount.incr()
if (returnDupTweet) {
// If we failed to load the tweet, but we know that it
// should exist, then return an InternalServerError, so that
// the client treats it as a failed tweet creation req.
Future.exception(
InternalServerError("Failed to load duplicate existing tweet: " + tweetId)
)
} else {
// Assume the lock is stale if we can't load the tweet. It's
// possible that the lock is not stale, but the tweet is not
// yet available, which requires that it not be present in
// cache and not yet available from the backend. This means
// that the failure mode is to allow tweeting if we can't
// determine the state, but it should be rare that we can't
// determine it.
tweetCreationLock.unlock(duplicateKey).before(self(req))
}
}
tweetCreationLock(duplicateKey, view.isDark(req), view.isNullcast(req)) {
base(req)
}.rescue {
case TweetCreationInProgress =>
Future.value(PostTweetResult(state = TweetCreateState.Duplicate))
// if tweetCreationLock detected a duplicate, look up the duplicate
// and return the appropriate result
case DuplicateTweetCreation(tweetId) =>
duplicate(tweetId, view.returnDuplicateTweet(req))
// it's possible that tweetCreationLock didn't find a duplicate for a
// retweet attempt, but `RetweetBuilder` did.
case TweetCreateFailure.AlreadyRetweeted(tweetId) if view.returnDuplicateTweet(req) =>
duplicate(tweetId, true)
}
}
}
private def findDuplicate[R: RequestView](
tweetId: TweetId,
req: R
): Future[Option[PostTweetResult]] = {
val view = implicitly[RequestView[R]]
val readRequest =
GetTweetsRequest(
tweetIds = Seq(tweetId),
// Assume that the defaults are OK for all of the hydration
// options except the ones that are explicitly set in the
// req.
options = Some(
GetTweetOptions(
forUserId = Some(view.userId(req)),
includePerspectivals = true,
includeCards = view.options(req).exists(_.includeCards),
cardsPlatformKey = view.options(req).flatMap(_.cardsPlatformKey)
)
)
)
getTweets(readRequest).map {
case Seq(result) =>
if (result.tweetState == StatusState.Found) {
// If the tweet was successfully found, then convert the
// read result into a successful write result.
Some(
PostTweetResult(
TweetCreateState.Ok,
result.tweet,
// if the retweet is really old, the retweet perspective might no longer
// be available, but we want to maintain the invariant that the `postRetweet`
// endpoint always returns a source tweet with the correct perspective.
result.sourceTweet.map { srcTweet =>
TweetLenses.perspective
.update(_.map(_.copy(retweeted = true, retweetId = Some(tweetId))))
.apply(srcTweet)
},
result.quotedTweet
)
)
} else {
None
}
}
}
}
}
/**
* A `Filter` that applies rate limiting to failing requests.
*/
object RateLimitFailures {
def apply(
validateLimit: RateLimitChecker.Validate,
incrementSuccess: LimiterService.IncrementByOne,
incrementFailure: LimiterService.IncrementByOne
): Filter[TweetBuilderResult] =
new Filter[TweetBuilderResult] {
def apply[R: RequestView](base: T[R]): T[R] = {
val view = implicitly[RequestView[R]]
FutureArrow[R, TweetBuilderResult] { req =>
val userId = view.userId(req)
val dark = view.isDark(req)
val contributorUserId: Option[UserId] = getContributor(userId).map(_.userId)
validateLimit((userId, dark))
.before {
base(req).onFailure { _ =>
// We don't increment the failure rate limit if the failure
// was from the failure rate limit so that the user can't
// get in a loop where tweet creation is never attempted. We
// don't increment it if the creation is dark because there
// is no way to perform a dark tweet creation through the
// API, so it's most likey some kind of test traffic like
// tap-compare.
if (!dark) incrementFailure(userId, contributorUserId)
}
}
.onSuccess { resp =>
// If we return a silent failure, then we want to
// increment the rate limit as if the tweet was fully
// created, because we want it to appear that way to the
// user whose creation silently failed.
if (resp.isSilentFail) incrementSuccess(userId, contributorUserId)
}
}
}
}
}
/**
* A `Filter` for counting non-`TweetCreateFailure` failures.
*/
object CountFailures {
def apply[Res](stats: StatsReceiver, scopeSuffix: String = "_builder"): Filter[Res] =
new Filter[Res] {
def apply[R: RequestView](base: T[R]): T[R] = {
val view = implicitly[RequestView[R]]
val exceptionCounter = ExceptionCounter(stats.scope(view.scope + scopeSuffix))
base.onFailure {
case (_, _: TweetCreateFailure) =>
case (_, ex) => exceptionCounter(ex)
}
}
}
}
/**
* A `Filter` for logging failures.
*/
object LogFailures extends Filter[PostTweetResult] {
private[this] val failedTweetCreationsLogger = Logger(
"com.twitter.tweetypie.FailedTweetCreations"
)
def apply[R: RequestView](base: T[R]): T[R] =
FutureArrow[R, PostTweetResult] { req =>
base(req).onFailure {
case failure => failedTweetCreationsLogger.info(s"request: $req\nfailure: $failure")
}
}
}
/**
* A `Filter` for converting a thrown `TweetCreateFailure` into a `PostTweetResult`.
*/
object RescueTweetCreateFailure extends Filter[PostTweetResult] {
def apply[R: RequestView](base: T[R]): T[R] =
FutureArrow[R, PostTweetResult] { req =>
base(req).rescue {
case failure: TweetCreateFailure => Future.value(failure.toPostTweetResult)
}
}
}
/**
* Builds a base handler for `PostTweetRequest` and `RetweetRequest`. The handler
* calls an underlying tweet builder, creates a `InsertTweet.Event`, hydrates
* that, passes it to `tweetStore`, and then converts it to a `PostTweetResult`.
*/
object Handler {
def apply[R: RequestView](
tweetBuilder: FutureArrow[R, TweetBuilderResult],
hydrateInsertEvent: FutureArrow[InsertTweet.Event, InsertTweet.Event],
tweetStore: InsertTweet.Store,
): Type[R] = {
FutureArrow { req =>
for {
bldrRes <- tweetBuilder(req)
event <- hydrateInsertEvent(toInsertTweetEvent(req, bldrRes))
_ <- Future.when(!event.dark)(tweetStore.insertTweet(event))
} yield toPostTweetResult(event)
}
}
/**
* Converts a request/`TweetBuilderResult` pair into an `InsertTweet.Event`.
*/
def toInsertTweetEvent[R: RequestView](
req: R,
bldrRes: TweetBuilderResult
): InsertTweet.Event = {
val view = implicitly[RequestView[R]]
InsertTweet.Event(
tweet = bldrRes.tweet,
user = bldrRes.user,
sourceTweet = bldrRes.sourceTweet,
sourceUser = bldrRes.sourceUser,
parentUserId = bldrRes.parentUserId,
timestamp = bldrRes.createdAt,
dark = view.isDark(req) || bldrRes.isSilentFail,
hydrateOptions = view.options(req).getOrElse(WritePathHydrationOptions()),
featureContext = view.featureContext(req),
initialTweetUpdateRequest = bldrRes.initialTweetUpdateRequest,
geoSearchRequestId = for {
geo <- view.geo(req)
searchRequestID <- geo.geoSearchRequestId
} yield {
GeoSearchRequestId(requestID = searchRequestID.id)
},
additionalContext = view.additionalContext(req),
transientContext = view.transientContext(req),
noteTweetMentionedUserIds = view.noteTweetMentionedUserIds(req)
)
}
/**
* Converts an `InsertTweet.Event` into a successful `PostTweetResult`.
*/
def toPostTweetResult(event: InsertTweet.Event): PostTweetResult =
PostTweetResult(
TweetCreateState.Ok,
Some(event.tweet),
sourceTweet = event.sourceTweet,
quotedTweet = event.quotedTweet
)
}
}

View File

@ -1,34 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.repository.TweetQuery
import com.twitter.tweetypie.repository.TweetRepository
import com.twitter.tweetypie.store.QuotedTweetDelete
import com.twitter.tweetypie.thriftscala.QuotedTweetDeleteRequest
/**
* Create the appropriate QuotedTweetDelete.Event for a QuotedTweetDelete request.
*/
object QuotedTweetDeleteEventBuilder {
type Type = QuotedTweetDeleteRequest => Future[Option[QuotedTweetDelete.Event]]
val queryOptions: TweetQuery.Options =
TweetQuery.Options(GetTweetsHandler.BaseInclude)
def apply(tweetRepo: TweetRepository.Optional): Type =
request =>
Stitch.run(
tweetRepo(request.quotingTweetId, queryOptions).map {
_.map { quotingTweet =>
QuotedTweetDelete.Event(
quotingTweetId = request.quotingTweetId,
quotingUserId = getUserId(quotingTweet),
quotedTweetId = request.quotedTweetId,
quotedUserId = request.quotedUserId,
timestamp = Time.now
)
}
}
)
}

View File

@ -1,36 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.repository.TweetQuery
import com.twitter.tweetypie.repository.TweetRepository
import com.twitter.tweetypie.store.QuotedTweetTakedown
import com.twitter.tweetypie.thriftscala.QuotedTweetTakedownRequest
/**
* Create the appropriate QuotedTweetTakedown.Event for a QuotedTweetTakedown request.
*/
object QuotedTweetTakedownEventBuilder {
type Type = QuotedTweetTakedownRequest => Future[Option[QuotedTweetTakedown.Event]]
val queryOptions: TweetQuery.Options =
TweetQuery.Options(GetTweetsHandler.BaseInclude)
def apply(tweetRepo: TweetRepository.Optional): Type =
request =>
Stitch.run(
tweetRepo(request.quotingTweetId, queryOptions).map {
_.map { quotingTweet =>
QuotedTweetTakedown.Event(
quotingTweetId = request.quotingTweetId,
quotingUserId = getUserId(quotingTweet),
quotedTweetId = request.quotedTweetId,
quotedUserId = request.quotedUserId,
takedownCountryCodes = request.takedownCountryCodes,
takedownReasons = request.takedownReasons,
timestamp = Time.now
)
}
}
)
}

View File

@ -1,49 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.servo.util.FutureArrow
import com.twitter.tweetypie.backends.LimiterService
import com.twitter.tweetypie.core.TweetCreateFailure
import com.twitter.tweetypie.thriftscala.TweetCreateState.RateLimitExceeded
object RateLimitChecker {
type Dark = Boolean
type GetRemaining = FutureArrow[(UserId, Dark), Int]
type Validate = FutureArrow[(UserId, Dark), Unit]
def getMaxMediaTags(minRemaining: LimiterService.MinRemaining, maxMediaTags: Int): GetRemaining =
FutureArrow {
case (userId, dark) =>
if (dark) Future.value(maxMediaTags)
else {
val contributorUserId = getContributor(userId).map(_.userId)
minRemaining(userId, contributorUserId)
.map(_.min(maxMediaTags))
.handle { case _ => maxMediaTags }
}
}
def validate(
hasRemaining: LimiterService.HasRemaining,
featureStats: StatsReceiver,
rateLimitEnabled: () => Boolean
): Validate = {
val exceededCounter = featureStats.counter("exceeded")
val checkedCounter = featureStats.counter("checked")
FutureArrow {
case (userId, dark) =>
if (dark || !rateLimitEnabled()) {
Future.Unit
} else {
checkedCounter.incr()
val contributorUserId = getContributor(userId).map(_.userId)
hasRemaining(userId, contributorUserId).map {
case false =>
exceededCounter.incr()
throw TweetCreateFailure.State(RateLimitExceeded)
case _ => ()
}
}
}
}
}

View File

@ -1,633 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.core.TweetCreateFailure
import com.twitter.tweetypie.repository._
import com.twitter.tweetypie.serverutil.ExceptionCounter
import com.twitter.tweetypie.thriftscala._
import com.twitter.tweetypie.tweettext.Offset
import com.twitter.twittertext.Extractor
import scala.annotation.tailrec
import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.util.control.NoStackTrace
object ReplyBuilder {
private val extractor = new Extractor
private val InReplyToTweetNotFound =
TweetCreateFailure.State(TweetCreateState.InReplyToTweetNotFound)
case class Request(
authorId: UserId,
authorScreenName: String,
inReplyToTweetId: Option[TweetId],
tweetText: String,
prependImplicitMentions: Boolean,
enableTweetToNarrowcasting: Boolean,
excludeUserIds: Seq[UserId],
spamResult: Spam.Result,
batchMode: Option[BatchComposeMode])
/**
* This case class contains the fields that are shared between legacy and simplified replies.
*/
case class BaseResult(
reply: Reply,
conversationId: Option[ConversationId],
selfThreadMetadata: Option[SelfThreadMetadata],
community: Option[Communities] = None,
exclusiveTweetControl: Option[ExclusiveTweetControl] = None,
trustedFriendsControl: Option[TrustedFriendsControl] = None,
editControl: Option[EditControl] = None) {
// Creates a Result by providing the fields that differ between legacy and simplified replies.
def toResult(
tweetText: String,
directedAtMetadata: DirectedAtUserMetadata,
visibleStart: Offset.CodePoint = Offset.CodePoint(0),
): Result =
Result(
reply,
tweetText,
directedAtMetadata,
conversationId,
selfThreadMetadata,
visibleStart,
community,
exclusiveTweetControl,
trustedFriendsControl,
editControl
)
}
/**
* @param reply the Reply object to include in the tweet.
* @param tweetText updated tweet text which may include prepended at-mentions, trimmed
* @param directedAtMetadata see DirectedAtHydrator for usage.
* @param conversationId conversation id to assign to the tweet.
* @param selfThreadMetadata returns the result of `SelfThreadBuilder`
* @param visibleStart offset into `tweetText` separating hideable at-mentions from the
* visible text.
*/
case class Result(
reply: Reply,
tweetText: String,
directedAtMetadata: DirectedAtUserMetadata,
conversationId: Option[ConversationId] = None,
selfThreadMetadata: Option[SelfThreadMetadata] = None,
visibleStart: Offset.CodePoint = Offset.CodePoint(0),
community: Option[Communities] = None,
exclusiveTweetControl: Option[ExclusiveTweetControl] = None,
trustedFriendsControl: Option[TrustedFriendsControl] = None,
editControl: Option[EditControl] = None) {
/**
* @param finalText final tweet text after any server-side additions.
* @return true iff the final tweet text consists exclusively of a hidden reply mention prefix.
* When this happens there's no content to the reply and thus the tweet creation should
* fail.
*/
def replyTextIsEmpty(finalText: String): Boolean = {
// Length of the tweet text originally output via ReplyBuilder.Result before server-side
// additions (e.g. media, quoted-tweet URLs)
val origTextLength = Offset.CodePoint.length(tweetText)
// Length of the tweet text after server-side additions.
val finalTextLength = Offset.CodePoint.length(finalText)
val prefixWasEntireText = origTextLength == visibleStart
val textLenUnchanged = origTextLength == finalTextLength
prefixWasEntireText && textLenUnchanged
}
}
type Type = Request => Future[Option[Result]]
private object InvalidUserException extends NoStackTrace
/**
* A user ID and screen name used for building replies.
*/
private case class User(id: UserId, screenName: String)
/**
* Captures the in-reply-to tweet, its author, and if the user is attempting to reply to a
* retweet, then that retweet and its author.
*/
private case class ReplySource(
srcTweet: Tweet,
srcUser: User,
retweet: Option[Tweet] = None,
rtUser: Option[User] = None) {
private val photoTaggedUsers: Seq[User] =
srcTweet.mediaTags
.map(_.tagMap.values.flatten)
.getOrElse(Nil)
.map(toUser)
.toSeq
private def toUser(mt: MediaTag): User =
mt match {
case MediaTag(_, Some(id), Some(screenName), _) => User(id, screenName)
case _ => throw InvalidUserException
}
private def toUser(e: MentionEntity): User =
e match {
case MentionEntity(_, _, screenName, Some(id), _, _) => User(id, screenName)
case _ => throw InvalidUserException
}
private def toUser(d: DirectedAtUser) = User(d.userId, d.screenName)
def allCardUsers(authorUser: User, cardUsersFinder: CardUsersFinder.Type): Future[Set[UserId]] =
Stitch.run(
cardUsersFinder(
CardUsersFinder.Request(
cardReference = getCardReference(srcTweet),
urls = getUrls(srcTweet).map(_.url),
perspectiveUserId = authorUser.id
)
)
)
def srcTweetMentionedUsers: Seq[User] = getMentions(srcTweet).map(toUser)
private trait ReplyType {
val allExcludedUserIds: Set[UserId]
def directedAt: Option[User]
def requiredTextMention: Option[User]
def isExcluded(u: User): Boolean = allExcludedUserIds.contains(u.id)
def buildPrefix(otherMentions: Seq[User], maxImplicits: Int): String = {
val seen = new mutable.HashSet[UserId]
seen ++= allExcludedUserIds
// Never exclude the required mention
seen --= requiredTextMention.map(_.id)
(requiredTextMention.toSeq ++ otherMentions)
.filter(u => seen.add(u.id))
.take(maxImplicits.max(requiredTextMention.size))
.map(u => s"@${u.screenName}")
.mkString(" ")
}
}
private case class SelfReply(
allExcludedUserIds: Set[UserId],
enableTweetToNarrowcasting: Boolean)
extends ReplyType {
private def srcTweetDirectedAt: Option[User] = getDirectedAtUser(srcTweet).map(toUser)
override def directedAt: Option[User] =
if (!enableTweetToNarrowcasting) None
else Seq.concat(rtUser, srcTweetDirectedAt).find(!isExcluded(_))
override def requiredTextMention: Option[User] =
// Make sure the directedAt user is in the text to avoid confusion
directedAt
}
private case class BatchSubsequentReply(allExcludedUserIds: Set[UserId]) extends ReplyType {
override def directedAt: Option[User] = None
override def requiredTextMention: Option[User] = None
override def buildPrefix(otherMentions: Seq[User], maxImplicits: Int): String = ""
}
private case class RegularReply(
allExcludedUserIds: Set[UserId],
enableTweetToNarrowcasting: Boolean)
extends ReplyType {
override def directedAt: Option[User] =
Some(srcUser)
.filterNot(isExcluded)
.filter(_ => enableTweetToNarrowcasting)
override def requiredTextMention: Option[User] =
// Include the source tweet's author as a mention in the reply, even if the reply is not
// narrowcasted to that user. All non-self-reply tweets require this mention.
Some(srcUser)
}
/**
* Computes an implicit mention prefix to add to the tweet text as well as any directed-at user.
*
* The first implicit mention is the source-tweet's author unless the reply is a self-reply, in
* which case it inherits the DirectedAtUser from the source tweet, though the current author is
* never added. This mention, if it exists, is the only mention that may be used to direct-at a
* user and is the user that ends up in DirectedAtUserMetadata. If the user replied to a
* retweet and the reply doesn't explicitly mention the retweet author, then the retweet author
* will be next, followed by source tweet mentions and source tweet photo-tagged users.
*
* Users in excludedScreenNames originate from the PostTweetRequest and are filtered out of any
* non-leading mention.
*
* Note on maxImplicits:
* This method returns at most 'maxImplicits' mentions unless 'maxImplicits' is 0 and a
* directed-at mention is required, in which case it returns 1. If this happens the reply may
* fail downstream validation checks (e.g. TweetBuilder). With 280 visible character limit it's
* theoretically possible to explicitly mention 93 users (280 / 3) but this bug shouldn't really
* be an issue because:
* 1.) Most replies don't have 50 explicit mentions
* 2.) TOO-clients have switched to batchMode=Subsequent for self-replies which disable
source tweet's directed-at user inheritance
* 3.) Requests rarely are rejected due to mention_limit_exceeded
* If this becomes a problem we could reopen the mention limit discussion, specifically if the
* backend should allow 51 while the explicit limit remains at 50.
*
* Note on batchMode:
* Implicit mention prefix will be empty string if batchMode is BatchSubsequent. This is to
* support batch composer.
*/
def implicitMentionPrefixAndDAU(
maxImplicits: Int,
excludedUsers: Seq[User],
author: User,
enableTweetToNarrowcasting: Boolean,
batchMode: Option[BatchComposeMode]
): (String, Option[User]) = {
def allExcludedUserIds =
(excludedUsers ++ Seq(author)).map(_.id).toSet
val replyType =
if (author.id == srcUser.id) {
if (batchMode.contains(BatchComposeMode.BatchSubsequent)) {
BatchSubsequentReply(allExcludedUserIds)
} else {
SelfReply(allExcludedUserIds, enableTweetToNarrowcasting)
}
} else {
RegularReply(allExcludedUserIds, enableTweetToNarrowcasting)
}
val prefix =
replyType.buildPrefix(
otherMentions = List.concat(rtUser, srcTweetMentionedUsers, photoTaggedUsers),
maxImplicits = maxImplicits
)
(prefix, replyType.directedAt)
}
/**
* Finds the longest possible prefix of whitespace separated @-mentions, restricted to
* @-mentions that are derived from the reply chain.
*/
def hideablePrefix(
text: String,
cardUsers: Seq[User],
explicitMentions: Seq[Extractor.Entity]
): Offset.CodePoint = {
val allowedMentions =
(srcTweetMentionedUsers.toSet + srcUser ++ rtUser.toSet ++ photoTaggedUsers ++ cardUsers)
.map(_.screenName.toLowerCase)
val len = Offset.CodeUnit.length(text)
// To allow NO-BREAK SPACE' (U+00A0) in the prefix need .isSpaceChar
def isWhitespace(c: Char) = c.isWhitespace || c.isSpaceChar
@tailrec
def skipWs(offset: Offset.CodeUnit): Offset.CodeUnit =
if (offset == len || !isWhitespace(text.charAt(offset.toInt))) offset
else skipWs(offset.incr)
@tailrec
def go(offset: Offset.CodeUnit, mentions: Stream[Extractor.Entity]): Offset.CodeUnit =
if (offset == len) offset
else {
mentions match {
// if we are at the next mention, and it is allowed, skip past and recurse
case next #:: tail if next.getStart == offset.toInt =>
if (!allowedMentions.contains(next.getValue.toLowerCase)) offset
else go(skipWs(Offset.CodeUnit(next.getEnd)), tail)
// we found non-mention text
case _ => offset
}
}
go(Offset.CodeUnit(0), explicitMentions.toStream).toCodePoint(text)
}
}
private def replyToUser(user: User, inReplyToStatusId: Option[TweetId] = None): Reply =
Reply(
inReplyToUserId = user.id,
inReplyToScreenName = Some(user.screenName),
inReplyToStatusId = inReplyToStatusId
)
/**
* A builder that generates reply from `inReplyToTweetId` or tweet text
*
* There are two kinds of "reply":
* 1. reply to tweet, which is generated from `inReplyToTweetId`.
*
* A valid reply-to-tweet satisfies the following conditions:
* 1). the tweet that is in-reply-to exists (and is visible to the user creating the tweet)
* 2). the author of the in-reply-to tweet is mentioned anywhere in the tweet, or
* this is a tweet that is in reply to the author's own tweet
*
* 2. reply to user, is generated when the tweet text starts with @user_name. This is only
* attempted if PostTweetRequest.enableTweetToNarrowcasting is true (default).
*/
def apply(
userIdentityRepo: UserIdentityRepository.Type,
tweetRepo: TweetRepository.Optional,
replyCardUsersFinder: CardUsersFinder.Type,
selfThreadBuilder: SelfThreadBuilder,
relationshipRepo: RelationshipRepository.Type,
unmentionedEntitiesRepo: UnmentionedEntitiesRepository.Type,
enableRemoveUnmentionedImplicits: Gate[Unit],
stats: StatsReceiver,
maxMentions: Int
): Type = {
val exceptionCounters = ExceptionCounter(stats)
val modeScope = stats.scope("mode")
val compatModeCounter = modeScope.counter("compat")
val simpleModeCounter = modeScope.counter("simple")
def getUser(key: UserKey): Future[Option[User]] =
Stitch.run(
userIdentityRepo(key)
.map(ident => User(ident.id, ident.screenName))
.liftNotFoundToOption
)
def getUsers(userIds: Seq[UserId]): Future[Seq[ReplyBuilder.User]] =
Stitch.run(
Stitch
.traverse(userIds)(id => userIdentityRepo(UserKey(id)).liftNotFoundToOption)
.map(_.flatten)
.map { identities => identities.map { ident => User(ident.id, ident.screenName) } }
)
val tweetQueryIncludes =
TweetQuery.Include(
tweetFields = Set(
Tweet.CoreDataField.id,
Tweet.CardReferenceField.id,
Tweet.CommunitiesField.id,
Tweet.MediaTagsField.id,
Tweet.MentionsField.id,
Tweet.UrlsField.id,
Tweet.EditControlField.id
) ++ selfThreadBuilder.requiredReplySourceFields.map(_.id)
)
def tweetQueryOptions(forUserId: UserId) =
TweetQuery.Options(
tweetQueryIncludes,
forUserId = Some(forUserId),
enforceVisibilityFiltering = true
)
def getTweet(tweetId: TweetId, forUserId: UserId): Future[Option[Tweet]] =
Stitch.run(tweetRepo(tweetId, tweetQueryOptions(forUserId)))
def checkBlockRelationship(authorId: UserId, result: Result): Future[Unit] = {
val inReplyToBlocksTweeter =
RelationshipKey.blocks(
sourceId = result.reply.inReplyToUserId,
destinationId = authorId
)
Stitch.run(relationshipRepo(inReplyToBlocksTweeter)).flatMap {
case true => Future.exception(InReplyToTweetNotFound)
case false => Future.Unit
}
}
def checkIPIPolicy(request: Request, reply: Reply): Future[Unit] = {
if (request.spamResult == Spam.DisabledByIpiPolicy) {
Future.exception(Spam.DisabledByIpiFailure(reply.inReplyToScreenName))
} else {
Future.Unit
}
}
def getUnmentionedUsers(replySource: ReplySource): Future[Seq[UserId]] = {
if (enableRemoveUnmentionedImplicits()) {
val srcDirectedAt = replySource.srcTweet.directedAtUserMetadata.flatMap(_.userId)
val srcTweetMentions = replySource.srcTweet.mentions.getOrElse(Nil).flatMap(_.userId)
val idsToCheck = srcTweetMentions ++ srcDirectedAt
val conversationId = replySource.srcTweet.coreData.flatMap(_.conversationId)
conversationId match {
case Some(cid) if idsToCheck.nonEmpty =>
stats.counter("unmentioned_implicits_check").incr()
Stitch
.run(unmentionedEntitiesRepo(cid, idsToCheck)).liftToTry.map {
case Return(Some(unmentionedUserIds)) =>
unmentionedUserIds
case _ => Seq[UserId]()
}
case _ => Future.Nil
}
} else {
Future.Nil
}
}
/**
* Constructs a `ReplySource` for the given `tweetId`, which captures the source tweet to be
* replied to, its author, and if `tweetId` is for a retweet of the source tweet, then also
* that retweet and its author. If the source tweet (or a retweet of it), or a corresponding
* author, can't be found or isn't visible to the replier, then `InReplyToTweetNotFound` is
* thrown.
*/
def getReplySource(tweetId: TweetId, forUserId: UserId): Future[ReplySource] =
for {
tweet <- getTweet(tweetId, forUserId).flatMap {
case None => Future.exception(InReplyToTweetNotFound)
case Some(t) => Future.value(t)
}
user <- getUser(UserKey(getUserId(tweet))).flatMap {
case None => Future.exception(InReplyToTweetNotFound)
case Some(u) => Future.value(u)
}
res <- getShare(tweet) match {
case None => Future.value(ReplySource(tweet, user))
case Some(share) =>
// if the user is replying to a retweet, find the retweet source tweet,
// then update with the retweet and author.
getReplySource(share.sourceStatusId, forUserId)
.map(_.copy(retweet = Some(tweet), rtUser = Some(user)))
}
} yield res
/**
* Computes a `Result` for the reply-to-tweet case. If `inReplyToTweetId` is for a retweet,
* the reply will be computed against the source tweet. If `prependImplicitMentions` is true
* and source tweet can't be found or isn't visible to replier, then this method will return
* a `InReplyToTweetNotFound` failure. If `prependImplicitMentions` is false, then the reply
* text must either mention the source tweet user, or it must be a reply to self; if both of
* those conditions fail, then `None` is returned.
*/
def makeReplyToTweet(
inReplyToTweetId: TweetId,
text: String,
author: User,
prependImplicitMentions: Boolean,
enableTweetToNarrowcasting: Boolean,
excludeUserIds: Seq[UserId],
batchMode: Option[BatchComposeMode]
): Future[Option[Result]] = {
val explicitMentions: Seq[Extractor.Entity] =
extractor.extractMentionedScreennamesWithIndices(text).asScala.toSeq
val mentionedScreenNames =
explicitMentions.map(_.getValue.toLowerCase).toSet
/**
* If `prependImplicitMentions` is true, or the reply author is the same as the in-reply-to
* author, then the reply text doesn't have to mention the in-reply-to author. Otherwise,
* check that the text contains a mention of the reply author.
*/
def isValidReplyTo(inReplyToUser: User): Boolean =
prependImplicitMentions ||
(inReplyToUser.id == author.id) ||
mentionedScreenNames.contains(inReplyToUser.screenName.toLowerCase)
getReplySource(inReplyToTweetId, author.id)
.flatMap { replySrc =>
val baseResult = BaseResult(
reply = replyToUser(replySrc.srcUser, Some(replySrc.srcTweet.id)),
conversationId = getConversationId(replySrc.srcTweet),
selfThreadMetadata = selfThreadBuilder.build(author.id, replySrc.srcTweet),
community = replySrc.srcTweet.communities,
// Reply tweets retain the same exclusive
// tweet controls as the tweet being replied to.
exclusiveTweetControl = replySrc.srcTweet.exclusiveTweetControl,
trustedFriendsControl = replySrc.srcTweet.trustedFriendsControl,
editControl = replySrc.srcTweet.editControl
)
if (isValidReplyTo(replySrc.srcUser)) {
if (prependImplicitMentions) {
// Simplified Replies mode - append server-side generated prefix to passed in text
simpleModeCounter.incr()
// remove the in-reply-to tweet author from the excluded users, in-reply-to tweet author will always be a directedAtUser
val filteredExcludedIds =
excludeUserIds.filterNot(uid => uid == TweetLenses.userId(replySrc.srcTweet))
for {
unmentionedUserIds <- getUnmentionedUsers(replySrc)
excludedUsers <- getUsers(filteredExcludedIds ++ unmentionedUserIds)
(prefix, directedAtUser) = replySrc.implicitMentionPrefixAndDAU(
maxImplicits = math.max(0, maxMentions - explicitMentions.size),
excludedUsers = excludedUsers,
author = author,
enableTweetToNarrowcasting = enableTweetToNarrowcasting,
batchMode = batchMode
)
} yield {
// prefix or text (or both) can be empty strings. Add " " separator and adjust
// prefix length only when both prefix and text are non-empty.
val textChunks = Seq(prefix, text).map(_.trim).filter(_.nonEmpty)
val tweetText = textChunks.mkString(" ")
val visibleStart =
if (textChunks.size == 2) {
Offset.CodePoint.length(prefix + " ")
} else {
Offset.CodePoint.length(prefix)
}
Some(
baseResult.toResult(
tweetText = tweetText,
directedAtMetadata = DirectedAtUserMetadata(directedAtUser.map(_.id)),
visibleStart = visibleStart
)
)
}
} else {
// Backwards-compatibility mode - walk from beginning of text until find visibleStart
compatModeCounter.incr()
for {
cardUserIds <- replySrc.allCardUsers(author, replyCardUsersFinder)
cardUsers <- getUsers(cardUserIds.toSeq)
optUserIdentity <- extractReplyToUser(text)
directedAtUserId = optUserIdentity.map(_.id).filter(_ => enableTweetToNarrowcasting)
} yield {
Some(
baseResult.toResult(
tweetText = text,
directedAtMetadata = DirectedAtUserMetadata(directedAtUserId),
visibleStart = replySrc.hideablePrefix(text, cardUsers, explicitMentions),
)
)
}
}
} else {
Future.None
}
}
.handle {
// if `getReplySource` throws this exception, but we aren't computing implicit
// mentions, then we fall back to the reply-to-user case instead of reply-to-tweet
case InReplyToTweetNotFound if !prependImplicitMentions => None
}
}
def makeReplyToUser(text: String): Future[Option[Result]] =
extractReplyToUser(text).map(_.map { user =>
Result(replyToUser(user), text, DirectedAtUserMetadata(Some(user.id)))
})
def extractReplyToUser(text: String): Future[Option[User]] =
Option(extractor.extractReplyScreenname(text)) match {
case None => Future.None
case Some(screenName) => getUser(UserKey(screenName))
}
FutureArrow[Request, Option[Result]] { request =>
exceptionCounters {
(request.inReplyToTweetId.filter(_ > 0) match {
case None =>
Future.None
case Some(tweetId) =>
makeReplyToTweet(
tweetId,
request.tweetText,
User(request.authorId, request.authorScreenName),
request.prependImplicitMentions,
request.enableTweetToNarrowcasting,
request.excludeUserIds,
request.batchMode
)
}).flatMap {
case Some(r) =>
// Ensure that the author of this reply is not blocked by
// the user who they are replying to.
checkBlockRelationship(request.authorId, r)
.before(checkIPIPolicy(request, r.reply))
.before(Future.value(Some(r)))
case None if request.enableTweetToNarrowcasting =>
// We don't check the block relationship when the tweet is
// not part of a conversation (which is to say, we allow
// directed-at tweets from a blocked user.) These tweets
// will not cause notifications for the blocking user,
// despite the presence of the reply struct.
makeReplyToUser(request.tweetText)
case None =>
Future.None
}
}
}
}
}

View File

@ -1,352 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.flockdb.client._
import com.twitter.snowflake.id.SnowflakeId
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.additionalfields.AdditionalFields.setAdditionalFields
import com.twitter.tweetypie.core._
import com.twitter.tweetypie.repository._
import com.twitter.tweetypie.thriftscala._
import com.twitter.tweetypie.thriftscala.entities.EntityExtractor
import com.twitter.tweetypie.tweettext.Truncator
import com.twitter.tweetypie.util.CommunityUtil
import com.twitter.tweetypie.util.EditControlUtil
case class SourceTweetRequest(
tweetId: TweetId,
user: User,
hydrateOptions: WritePathHydrationOptions)
object RetweetBuilder {
import TweetBuilder._
import UpstreamFailure._
type Type = FutureArrow[RetweetRequest, TweetBuilderResult]
val SGSTestRole = "socialgraph"
val log: Logger = Logger(getClass)
/**
* Retweets text gets RT and username prepended
*/
def composeRetweetText(text: String, sourceUser: User): String =
composeRetweetText(text, sourceUser.profile.get.screenName)
/**
* Retweets text gets RT and username prepended
*/
def composeRetweetText(text: String, screenName: String): String =
Truncator.truncateForRetweet("RT @" + screenName + ": " + text)
// We do not want to allow community tweets to be retweeted.
def validateNotCommunityTweet(sourceTweet: Tweet): Future[Unit] =
if (CommunityUtil.hasCommunity(sourceTweet.communities)) {
Future.exception(TweetCreateFailure.State(TweetCreateState.CommunityRetweetNotAllowed))
} else {
Future.Unit
}
// We do not want to allow Trusted Friends tweets to be retweeted.
def validateNotTrustedFriendsTweet(sourceTweet: Tweet): Future[Unit] =
sourceTweet.trustedFriendsControl match {
case Some(trustedFriendsControl) =>
Future.exception(TweetCreateFailure.State(TweetCreateState.TrustedFriendsRetweetNotAllowed))
case None =>
Future.Unit
}
// We do not want to allow retweet of a stale version of a tweet in an edit chain.
def validateStaleTweet(sourceTweet: Tweet): Future[Unit] = {
if (!EditControlUtil.isLatestEdit(sourceTweet.editControl, sourceTweet.id).getOrElse(true)) {
Future.exception(TweetCreateFailure.State(TweetCreateState.StaleTweetRetweetNotAllowed))
} else {
// the source tweet does not have any edit control or the source tweet is the latest tweet
Future.Unit
}
}
/**
* Builds the RetweetBuilder
*/
def apply(
validateRequest: RetweetRequest => Future[Unit],
tweetIdGenerator: TweetIdGenerator,
tweetRepo: TweetRepository.Type,
userRepo: UserRepository.Type,
tflock: TFlockClient,
deviceSourceRepo: DeviceSourceRepository.Type,
validateUpdateRateLimit: RateLimitChecker.Validate,
spamChecker: Spam.Checker[RetweetSpamRequest] = Spam.DoNotCheckSpam,
updateUserCounts: (User, Tweet) => Future[User],
superFollowRelationsRepo: StratoSuperFollowRelationsRepository.Type,
unretweetEdits: TweetDeletePathHandler.UnretweetEdits,
setEditWindowToSixtyMinutes: Gate[Unit]
): RetweetBuilder.Type = {
val entityExtactor = EntityExtractor.mutationAll.endo
val sourceTweetRepo: SourceTweetRequest => Stitch[Tweet] =
req => {
tweetRepo(
req.tweetId,
WritePathQueryOptions.retweetSourceTweet(req.user, req.hydrateOptions)
).rescue {
case _: FilteredState => Stitch.NotFound
}
.rescue {
convertRepoExceptions(TweetCreateState.SourceTweetNotFound, TweetLookupFailure(_))
}
}
val getUser = userLookup(userRepo)
val getSourceUser = sourceUserLookup(userRepo)
val getDeviceSource = deviceSourceLookup(deviceSourceRepo)
/**
* We exempt SGS test users from the check to get them through Block v2 testing.
*/
def isSGSTestRole(user: User): Boolean =
user.roles.exists { roles => roles.roles.contains(SGSTestRole) }
def validateCanRetweet(
user: User,
sourceUser: User,
sourceTweet: Tweet,
request: RetweetRequest
): Future[Unit] =
Future
.join(
validateNotCommunityTweet(sourceTweet),
validateNotTrustedFriendsTweet(sourceTweet),
validateSourceUserRetweetable(user, sourceUser),
validateStaleTweet(sourceTweet),
Future.when(!request.dark) {
if (request.returnSuccessOnDuplicate)
failWithRetweetIdIfAlreadyRetweeted(user, sourceTweet)
else
validateNotAlreadyRetweeted(user, sourceTweet)
}
)
.unit
def validateSourceUserRetweetable(user: User, sourceUser: User): Future[Unit] =
if (sourceUser.profile.isEmpty)
Future.exception(UserProfileEmptyException)
else if (sourceUser.safety.isEmpty)
Future.exception(UserSafetyEmptyException)
else if (sourceUser.view.isEmpty)
Future.exception(UserViewEmptyException)
else if (user.id != sourceUser.id && sourceUser.safety.get.isProtected)
Future.exception(TweetCreateFailure.State(TweetCreateState.CannotRetweetProtectedTweet))
else if (sourceUser.safety.get.deactivated)
Future.exception(TweetCreateFailure.State(TweetCreateState.CannotRetweetDeactivatedUser))
else if (sourceUser.safety.get.suspended)
Future.exception(TweetCreateFailure.State(TweetCreateState.CannotRetweetSuspendedUser))
else if (sourceUser.view.get.blockedBy && !isSGSTestRole(user))
Future.exception(TweetCreateFailure.State(TweetCreateState.CannotRetweetBlockingUser))
else if (sourceUser.profile.get.screenName.isEmpty)
Future.exception(
TweetCreateFailure.State(TweetCreateState.CannotRetweetUserWithoutScreenName)
)
else
Future.Unit
def tflockGraphContains(
graph: StatusGraph,
fromId: Long,
toId: Long,
dir: Direction
): Future[Boolean] =
tflock.contains(graph, fromId, toId, dir).rescue {
case ex: OverCapacity => Future.exception(ex)
case ex => Future.exception(TFlockLookupFailure(ex))
}
def getRetweetIdFromTflock(sourceTweetId: TweetId, userId: UserId): Future[Option[Long]] =
tflock
.selectAll(
Select(
sourceId = sourceTweetId,
graph = RetweetsGraph,
direction = Forward
).intersect(
Select(
sourceId = userId,
graph = UserTimelineGraph,
direction = Forward
)
)
)
.map(_.headOption)
def validateNotAlreadyRetweeted(user: User, sourceTweet: Tweet): Future[Unit] =
// use the perspective object from TLS if available, otherwise, check with tflock
(sourceTweet.perspective match {
case Some(perspective) =>
Future.value(perspective.retweeted)
case None =>
// we have to query the RetweetSourceGraph in the Reverse order because
// it is only defined in that direction, instead of bi-directionally
tflockGraphContains(RetweetSourceGraph, user.id, sourceTweet.id, Reverse)
}).flatMap {
case true =>
Future.exception(TweetCreateFailure.State(TweetCreateState.AlreadyRetweeted))
case false => Future.Unit
}
def failWithRetweetIdIfAlreadyRetweeted(user: User, sourceTweet: Tweet): Future[Unit] =
// use the perspective object from TLS if available, otherwise, check with tflock
(sourceTweet.perspective.flatMap(_.retweetId) match {
case Some(tweetId) => Future.value(Some(tweetId))
case None =>
getRetweetIdFromTflock(sourceTweet.id, user.id)
}).flatMap {
case None => Future.Unit
case Some(tweetId) =>
Future.exception(TweetCreateFailure.AlreadyRetweeted(tweetId))
}
def validateContributor(contributorIdOpt: Option[UserId]): Future[Unit] =
if (contributorIdOpt.isDefined)
Future.exception(TweetCreateFailure.State(TweetCreateState.ContributorNotSupported))
else
Future.Unit
case class RetweetSource(sourceTweet: Tweet, parentUserId: UserId)
/**
* Recursively follows a retweet chain to the root source tweet. Also returns user id from the
* first walked tweet as the 'parentUserId'.
* In practice, the depth of the chain should never be greater than 2 because
* share.sourceStatusId should always reference the root (unlike share.parentStatusId).
*/
def findRetweetSource(
tweetId: TweetId,
forUser: User,
hydrateOptions: WritePathHydrationOptions
): Future[RetweetSource] =
Stitch
.run(sourceTweetRepo(SourceTweetRequest(tweetId, forUser, hydrateOptions)))
.flatMap { tweet =>
getShare(tweet) match {
case None => Future.value(RetweetSource(tweet, getUserId(tweet)))
case Some(share) =>
findRetweetSource(share.sourceStatusId, forUser, hydrateOptions)
.map(_.copy(parentUserId = getUserId(tweet)))
}
}
FutureArrow { request =>
for {
() <- validateRequest(request)
userFuture = Stitch.run(getUser(request.userId))
tweetIdFuture = tweetIdGenerator()
devsrcFuture = Stitch.run(getDeviceSource(request.createdVia))
user <- userFuture
tweetId <- tweetIdFuture
devsrc <- devsrcFuture
rtSource <- findRetweetSource(
request.sourceStatusId,
user,
request.hydrationOptions.getOrElse(WritePathHydrationOptions(simpleQuotedTweet = true))
)
sourceTweet = rtSource.sourceTweet
sourceUser <- Stitch.run(getSourceUser(getUserId(sourceTweet), request.userId))
// We want to confirm that a user is actually allowed to
// retweet an Exclusive Tweet (only available to super followers)
() <- StratoSuperFollowRelationsRepository.Validate(
sourceTweet.exclusiveTweetControl,
user.id,
superFollowRelationsRepo)
() <- validateUser(user)
() <- validateUpdateRateLimit((user.id, request.dark))
() <- validateContributor(request.contributorUserId)
() <- validateCanRetweet(user, sourceUser, sourceTweet, request)
() <- unretweetEdits(sourceTweet.editControl, sourceTweet.id, user.id)
spamRequest = RetweetSpamRequest(
retweetId = tweetId,
sourceUserId = getUserId(sourceTweet),
sourceTweetId = sourceTweet.id,
sourceTweetText = getText(sourceTweet),
sourceUserName = sourceUser.profile.map(_.screenName),
safetyMetaData = request.safetyMetaData
)
spamResult <- spamChecker(spamRequest)
safety = user.safety.get
share = Share(
sourceStatusId = sourceTweet.id,
sourceUserId = sourceUser.id,
parentStatusId = request.sourceStatusId
)
retweetText = composeRetweetText(getText(sourceTweet), sourceUser)
createdAt = SnowflakeId(tweetId).time
coreData = TweetCoreData(
userId = request.userId,
text = retweetText,
createdAtSecs = createdAt.inSeconds,
createdVia = devsrc.internalName,
share = Some(share),
hasTakedown = safety.hasTakedown,
trackingId = request.trackingId,
nsfwUser = safety.nsfwUser,
nsfwAdmin = safety.nsfwAdmin,
narrowcast = request.narrowcast,
nullcast = request.nullcast
)
retweet = Tweet(
id = tweetId,
coreData = Some(coreData),
contributor = getContributor(request.userId),
editControl = Some(
EditControl.Initial(
EditControlUtil
.makeEditControlInitial(
tweetId = tweetId,
createdAt = createdAt,
setEditWindowToSixtyMinutes = setEditWindowToSixtyMinutes
)
.initial
.copy(isEditEligible = Some(false))
)
),
)
retweetWithEntities = entityExtactor(retweet)
retweetWithAdditionalFields = setAdditionalFields(
retweetWithEntities,
request.additionalFields
)
// update the perspective and counts fields of the source tweet to reflect the effects
// of the user performing a retweet, even though those effects haven't happened yet.
updatedSourceTweet = sourceTweet.copy(
perspective = sourceTweet.perspective.map {
_.copy(retweeted = true, retweetId = Some(retweet.id))
},
counts = sourceTweet.counts.map { c => c.copy(retweetCount = c.retweetCount.map(_ + 1)) }
)
user <- updateUserCounts(user, retweetWithAdditionalFields)
} yield {
TweetBuilderResult(
tweet = retweetWithAdditionalFields,
user = user,
createdAt = createdAt,
sourceTweet = Some(updatedSourceTweet),
sourceUser = Some(sourceUser),
parentUserId = Some(rtSource.parentUserId),
isSilentFail = spamResult == Spam.SilentFail
)
}
}
}
}

View File

@ -1,78 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.geoduck.backend.hydration.thriftscala.HydrationContext
import com.twitter.geoduck.common.thriftscala.Constants
import com.twitter.geoduck.common.thriftscala.PlaceQuery
import com.twitter.geoduck.common.thriftscala.PlaceQueryFields
import com.twitter.geoduck.service.common.clientmodules.GeoduckGeohashLocate
import com.twitter.geoduck.service.thriftscala.LocationResponse
import com.twitter.geoduck.util.primitives.LatLon
import com.twitter.geoduck.util.primitives.{Geohash => GDGeohash}
import com.twitter.geoduck.util.primitives.{Place => GDPlace}
import com.twitter.servo.util.FutureArrow
import com.twitter.tweetypie.repository.GeoduckPlaceConverter
import com.twitter.tweetypie.{thriftscala => TP}
object ReverseGeocoder {
val log: Logger = Logger(getClass)
private def validatingRGC(rgc: ReverseGeocoder): ReverseGeocoder =
FutureArrow {
case (coords: TP.GeoCoordinates, language: PlaceLanguage) =>
if (LatLon.isValid(coords.latitude, coords.longitude))
rgc((coords, language))
else
Future.None
}
/**
* create a Geo backed ReverseGeocoder
*/
def fromGeoduck(geohashLocate: GeoduckGeohashLocate): ReverseGeocoder =
validatingRGC(
FutureArrow {
case (geo: TP.GeoCoordinates, language: PlaceLanguage) =>
if (log.isDebugEnabled) {
log.debug("RGC'ing " + geo.toString() + " with geoduck")
}
val hydrationContext =
HydrationContext(
placeFields = Set[PlaceQueryFields](
PlaceQueryFields.PlaceNames
)
)
val gh = GDGeohash(LatLon(lat = geo.latitude, lon = geo.longitude))
val placeQuery = PlaceQuery(placeTypes = Some(Constants.ConsumerPlaceTypes))
geohashLocate
.locateGeohashes(Seq(gh.toThrift), placeQuery, hydrationContext)
.onFailure { case ex => log.warn("failed to rgc " + geo.toString(), ex) }
.map {
(resp: Seq[Try[LocationResponse]]) =>
resp.headOption.flatMap {
case Throw(ex) =>
log.warn("rgc failed for coords: " + geo.toString(), ex)
None
case Return(locationResponse) =>
GDPlace.tryLocationResponse(locationResponse) match {
case Throw(ex) =>
log
.warn("rgc failed in response handling for coords: " + geo.toString(), ex)
None
case Return(tplaces) =>
GDPlace.pickConsumerLocation(tplaces).map { place: GDPlace =>
if (log.isDebugEnabled) {
log.debug("successfully rgc'd " + geo + " to " + place.id)
}
GeoduckPlaceConverter(language, place)
}
}
}
}
}
)
}

View File

@ -1,64 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.finagle.tracing.Trace
import com.twitter.service.gen.scarecrow.thriftscala.Retweet
import com.twitter.service.gen.scarecrow.thriftscala.TieredAction
import com.twitter.service.gen.scarecrow.thriftscala.TieredActionResult
import com.twitter.spam.features.thriftscala.SafetyMetaData
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.core.TweetCreateFailure
import com.twitter.tweetypie.repository.RetweetSpamCheckRepository
import com.twitter.tweetypie.thriftscala.TweetCreateState
case class RetweetSpamRequest(
retweetId: TweetId,
sourceUserId: UserId,
sourceTweetId: TweetId,
sourceTweetText: String,
sourceUserName: Option[String],
safetyMetaData: Option[SafetyMetaData])
/**
* Use the Scarecrow service as the spam checker for retweets.
*/
object ScarecrowRetweetSpamChecker {
val log: Logger = Logger(getClass)
def requestToScarecrowRetweet(req: RetweetSpamRequest): Retweet =
Retweet(
id = req.retweetId,
sourceUserId = req.sourceUserId,
text = req.sourceTweetText,
sourceTweetId = req.sourceTweetId,
safetyMetaData = req.safetyMetaData
)
def apply(
stats: StatsReceiver,
repo: RetweetSpamCheckRepository.Type
): Spam.Checker[RetweetSpamRequest] = {
def handler(request: RetweetSpamRequest): Spam.Checker[TieredAction] =
Spam.handleScarecrowResult(stats) {
case (TieredActionResult.NotSpam, _, _) => Spam.AllowFuture
case (TieredActionResult.SilentFail, _, _) => Spam.SilentFailFuture
case (TieredActionResult.UrlSpam, _, denyMessage) =>
Future.exception(TweetCreateFailure.State(TweetCreateState.UrlSpam, denyMessage))
case (TieredActionResult.Deny, _, denyMessage) =>
Future.exception(TweetCreateFailure.State(TweetCreateState.Spam, denyMessage))
case (TieredActionResult.DenyByIpiPolicy, _, denyMessage) =>
Future.exception(Spam.DisabledByIpiFailure(request.sourceUserName, denyMessage))
case (TieredActionResult.RateLimit, _, denyMessage) =>
Future.exception(
TweetCreateFailure.State(TweetCreateState.SafetyRateLimitExceeded, denyMessage))
case (TieredActionResult.Bounce, Some(b), _) =>
Future.exception(TweetCreateFailure.Bounced(b))
}
req => {
Trace.record("com.twitter.tweetypie.ScarecrowRetweetSpamChecker.retweetId=" + req.retweetId)
Stitch.run(repo(requestToScarecrowRetweet(req))).flatMap(handler(req))
}
}
}

View File

@ -1,106 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.finagle.tracing.Trace
import com.twitter.relevance.feature_store.thriftscala.FeatureData
import com.twitter.relevance.feature_store.thriftscala.FeatureValue
import com.twitter.service.gen.scarecrow.thriftscala.TieredAction
import com.twitter.service.gen.scarecrow.thriftscala.TieredActionResult
import com.twitter.service.gen.scarecrow.thriftscala.TweetContext
import com.twitter.service.gen.scarecrow.thriftscala.TweetNew
import com.twitter.spam.features.thriftscala.SafetyMetaData
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.core.TweetCreateFailure
import com.twitter.tweetypie.handler.Spam.Checker
import com.twitter.tweetypie.repository.TweetSpamCheckRepository
import com.twitter.tweetypie.thriftscala.TweetCreateState
import com.twitter.tweetypie.thriftscala.TweetMediaTags
case class TweetSpamRequest(
tweetId: TweetId,
userId: UserId,
text: String,
mediaTags: Option[TweetMediaTags],
safetyMetaData: Option[SafetyMetaData],
inReplyToTweetId: Option[TweetId],
quotedTweetId: Option[TweetId],
quotedTweetUserId: Option[UserId])
/**
* Use the Scarecrow service as the spam checker for tweets.
*/
object ScarecrowTweetSpamChecker {
val log: Logger = Logger(getClass)
private def requestToScarecrowTweet(req: TweetSpamRequest): TweetNew = {
// compile additional input features for the spam check
val mediaTaggedUserIds = {
val mediaTags = req.mediaTags.getOrElse(TweetMediaTags())
mediaTags.tagMap.values.flatten.flatMap(_.userId).toSet
}
val additionalInputFeatures = {
val mediaTaggedUserFeatures = if (mediaTaggedUserIds.nonEmpty) {
Seq(
"mediaTaggedUsers" -> FeatureData(Some(FeatureValue.LongSetValue(mediaTaggedUserIds))),
"victimIds" -> FeatureData(Some(FeatureValue.LongSetValue(mediaTaggedUserIds)))
)
} else {
Seq.empty
}
val quotedTweetIdFeature = req.quotedTweetId.map { quotedTweetId =>
"quotedTweetId" -> FeatureData(Some(FeatureValue.LongValue(quotedTweetId)))
}
val quotedTweetUserIdFeature = req.quotedTweetUserId.map { quotedTweetUserId =>
"quotedTweetUserId" -> FeatureData(Some(FeatureValue.LongValue(quotedTweetUserId)))
}
val featureMap =
(mediaTaggedUserFeatures ++ quotedTweetIdFeature ++ quotedTweetUserIdFeature).toMap
if (featureMap.nonEmpty) Some(featureMap) else None
}
TweetNew(
id = req.tweetId,
userId = req.userId,
text = req.text,
additionalInputFeatures = additionalInputFeatures,
safetyMetaData = req.safetyMetaData,
inReplyToStatusId = req.inReplyToTweetId
)
}
private def tieredActionHandler(stats: StatsReceiver): Checker[TieredAction] =
Spam.handleScarecrowResult(stats) {
case (TieredActionResult.NotSpam, _, _) => Spam.AllowFuture
case (TieredActionResult.SilentFail, _, _) => Spam.SilentFailFuture
case (TieredActionResult.DenyByIpiPolicy, _, _) => Spam.DisabledByIpiPolicyFuture
case (TieredActionResult.UrlSpam, _, denyMessage) =>
Future.exception(TweetCreateFailure.State(TweetCreateState.UrlSpam, denyMessage))
case (TieredActionResult.Deny, _, denyMessage) =>
Future.exception(TweetCreateFailure.State(TweetCreateState.Spam, denyMessage))
case (TieredActionResult.Captcha, _, denyMessage) =>
Future.exception(TweetCreateFailure.State(TweetCreateState.SpamCaptcha, denyMessage))
case (TieredActionResult.RateLimit, _, denyMessage) =>
Future.exception(
TweetCreateFailure.State(TweetCreateState.SafetyRateLimitExceeded, denyMessage))
case (TieredActionResult.Bounce, Some(b), _) =>
Future.exception(TweetCreateFailure.Bounced(b))
}
def fromSpamCheckRepository(
stats: StatsReceiver,
repo: TweetSpamCheckRepository.Type
): Spam.Checker[TweetSpamRequest] = {
val handler = tieredActionHandler(stats)
req => {
Trace.record("com.twitter.tweetypie.ScarecrowTweetSpamChecker.userId=" + req.userId)
Stitch.run(repo(requestToScarecrowTweet(req), TweetContext.Creation)).flatMap { resp =>
handler(resp.tieredAction)
}
}
}
}

View File

@ -1,72 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.repository._
import com.twitter.tweetypie.store.ScrubGeo
import com.twitter.tweetypie.store.ScrubGeoUpdateUserTimestamp
import com.twitter.tweetypie.thriftscala.DeleteLocationData
import com.twitter.tweetypie.thriftscala.GeoScrub
/**
* Create the appropriate ScrubGeo.Event for a GeoScrub request.
*/
object ScrubGeoEventBuilder {
val userQueryOptions: UserQueryOptions =
UserQueryOptions(
Set(UserField.Safety, UserField.Roles),
UserVisibility.All
)
private def userLoader(
stats: StatsReceiver,
userRepo: UserRepository.Optional
): UserId => Future[Option[User]] = {
val userNotFoundCounter = stats.counter("user_not_found")
userId =>
Stitch.run(
userRepo(UserKey(userId), userQueryOptions)
.onSuccess(userOpt => if (userOpt.isEmpty) userNotFoundCounter.incr())
)
}
object UpdateUserTimestamp {
type Type = DeleteLocationData => Future[ScrubGeoUpdateUserTimestamp.Event]
def apply(
stats: StatsReceiver,
userRepo: UserRepository.Optional,
): Type = {
val timestampDiffStat = stats.stat("now_delta_ms")
val loadUser = userLoader(stats, userRepo)
request: DeleteLocationData =>
loadUser(request.userId).map { userOpt =>
// delta between users requesting deletion and the time we publish to TweetEvents
timestampDiffStat.add((Time.now.inMillis - request.timestampMs).toFloat)
ScrubGeoUpdateUserTimestamp.Event(
userId = request.userId,
timestamp = Time.fromMilliseconds(request.timestampMs),
optUser = userOpt
)
}
}
}
object ScrubTweets {
type Type = GeoScrub => Future[ScrubGeo.Event]
def apply(stats: StatsReceiver, userRepo: UserRepository.Optional): Type = {
val loadUser = userLoader(stats, userRepo)
geoScrub =>
loadUser(geoScrub.userId).map { userOpt =>
ScrubGeo.Event(
tweetIdSet = geoScrub.statusIds.toSet,
userId = geoScrub.userId,
enqueueMax = geoScrub.hosebirdEnqueue,
optUser = userOpt,
timestamp = Time.now
)
}
}
}
}

View File

@ -1,119 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.tweetypie.thriftscala.Reply
import com.twitter.tweetypie.thriftscala.SelfThreadMetadata
import org.apache.thrift.protocol.TField
trait SelfThreadBuilder {
def requiredReplySourceFields: Set[TField] =
Set(
Tweet.CoreDataField, // for Reply and ConversationId
Tweet.SelfThreadMetadataField // for continuing existing self-threads
)
def build(authorUserId: UserId, replySourceTweet: Tweet): Option[SelfThreadMetadata]
}
/**
* SelfThreadBuilder is used to build metadata for self-threads (tweetstorms).
*
* This builder is invoked from ReplyBuilder on tweets that pass in a inReplyToStatusId and create
* a Reply. The invocation is done inside ReplyBuilder as ReplyBuilder has already loaded the
* "reply source tweet" which has all the information needed to determine the self-thread metadata.
*
* Note that Tweet.SelfThreadMetadata schema supports representing two types of self-threads:
* 1. root self-thread : self-thread that begins alone and does not start with replying to another
* tweet. This self-thread has a self-thread ID equal to the conversation ID.
* 2. reply self-thread : self-thread that begins as a reply to another user's tweet.
* This self-thread has a self-thread ID equal to the first tweet in the
* current self-reply chain which will not equal the conversation ID.
*
* Currently only type #1 "root self-thread" is handled.
*/
object SelfThreadBuilder {
def apply(stats: StatsReceiver): SelfThreadBuilder = {
// We want to keep open the possibility for differentiation between root
// self-threads (current functionality) and reply self-threads (possible
// future functionality).
val rootThreadStats = stats.scope("root_thread")
// A tweet becomes a root of a self-thread only after the first self-reply
// is created. root_thread/start is incr()d during the write-path of the
// self-reply tweet, when it is known that the first/root tweet has not
// yet been assigned a SelfThreadMetadata. The write-path of the second
// tweet does not add the SelfThreadMetadata to the first tweet - that
// happens asynchronously by the SelfThreadDaemon.
val rootThreadStartCounter = rootThreadStats.counter("start")
// root_thread/continue provides visibility into the frequency of
// continuation tweets off leaf tweets in a tweet storm. Also incr()d in
// the special case of a reply to the root tweet, which does not yet have a
// SelfThreadMetadata(isLeaf=true).
val rootThreadContinueCounter = rootThreadStats.counter("continue")
// root_thread/branch provides visibility into how frequently self-threads
// get branched - that is, when the author self-replies to a non-leaf tweet
// in an existing thread. Knowing the frequency of branching will help us
// determine the priority of accounting for branching in various
// tweet-delete use cases. Currently we do not fix up the root tweet's
// SelfThreadMetadata when its reply tweets are deleted.
val rootThreadBranchCounter = rootThreadStats.counter("branch")
def observeSelfThreadMetrics(replySourceSTM: Option[SelfThreadMetadata]): Unit = {
replySourceSTM match {
case Some(SelfThreadMetadata(_, isLeaf)) =>
if (isLeaf) rootThreadContinueCounter.incr()
else rootThreadBranchCounter.incr()
case None =>
rootThreadStartCounter.incr()
}
}
new SelfThreadBuilder {
override def build(
authorUserId: UserId,
replySourceTweet: Tweet
): Option[SelfThreadMetadata] = {
// the "reply source tweet"'s author must match the current author
if (getUserId(replySourceTweet) == authorUserId) {
val replySourceSTM = getSelfThreadMetadata(replySourceTweet)
observeSelfThreadMetrics(replySourceSTM)
// determine if replySourceTweet stands alone (non-reply)
getReply(replySourceTweet) match {
case None | Some(Reply(None, _, _)) =>
// 'replySourceTweet' started a new self-thread that stands alone
// which happens when there's no Reply or the Reply does not have
// inReplyToStatusId (directed-at user)
// requiredReplySourceFields requires coreData and conversationId
// is required so this would have previously thrown an exception
// in ReplyBuilder if the read was partial
val convoId = replySourceTweet.coreData.get.conversationId.get
Some(SelfThreadMetadata(id = convoId, isLeaf = true))
case _ =>
// 'replySourceTweet' was also a reply-to-tweet, so continue any
// self-thread by inheriting any SelfThreadMetadata it has
// (though always setting isLeaf to true)
replySourceSTM.map(_.copy(isLeaf = true))
}
} else {
// Replying to a different user currently never creates a self-thread
// as all self-threads must start at the root (and match conversation
// ID).
//
// In the future replying to a different user *might* be part of a
// self-thread but we wouldn't mark it as such until the *next* tweet
// is created (at which time the self_thread daemon goes back and
// marks the first tweet as in the self-thread.
None
}
}
}
}
}

View File

@ -1,61 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.stitch.NotFound
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.repository.TweetQuery
import com.twitter.tweetypie.repository.TweetRepository
import com.twitter.tweetypie.repository.UserKey
import com.twitter.tweetypie.repository.UserQueryOptions
import com.twitter.tweetypie.repository.UserRepository
import com.twitter.tweetypie.repository.UserVisibility
import com.twitter.tweetypie.store.AsyncSetAdditionalFields
import com.twitter.tweetypie.store.SetAdditionalFields
import com.twitter.tweetypie.store.TweetStoreEventOrRetry
import com.twitter.tweetypie.thriftscala.AsyncSetAdditionalFieldsRequest
import com.twitter.tweetypie.thriftscala.SetAdditionalFieldsRequest
object SetAdditionalFieldsBuilder {
type Type = SetAdditionalFieldsRequest => Future[SetAdditionalFields.Event]
val tweetOptions: TweetQuery.Options = TweetQuery.Options(include = GetTweetsHandler.BaseInclude)
def apply(tweetRepo: TweetRepository.Type): Type = {
def getTweet(tweetId: TweetId) =
Stitch.run(
tweetRepo(tweetId, tweetOptions)
.rescue(HandlerError.translateNotFoundToClientError(tweetId))
)
request => {
getTweet(request.additionalFields.id).map { tweet =>
SetAdditionalFields.Event(
additionalFields = request.additionalFields,
userId = getUserId(tweet),
timestamp = Time.now
)
}
}
}
}
object AsyncSetAdditionalFieldsBuilder {
type Type = AsyncSetAdditionalFieldsRequest => Future[
TweetStoreEventOrRetry[AsyncSetAdditionalFields.Event]
]
val userQueryOpts: UserQueryOptions = UserQueryOptions(Set(UserField.Safety), UserVisibility.All)
def apply(userRepo: UserRepository.Type): Type = {
def getUser(userId: UserId): Future[User] =
Stitch.run(
userRepo(UserKey.byId(userId), userQueryOpts)
.rescue { case NotFound => Stitch.exception(HandlerError.userNotFound(userId)) }
)
request =>
getUser(request.userId).map { user =>
AsyncSetAdditionalFields.Event.fromAsyncRequest(request, user)
}
}
}

View File

@ -1,45 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.tweetypie.store.SetRetweetVisibility
import com.twitter.tweetypie.thriftscala.SetRetweetVisibilityRequest
import com.twitter.tweetypie.thriftscala.Share
import com.twitter.tweetypie.thriftscala.Tweet
/**
* Create a [[SetRetweetVisibility.Event]] from a [[SetRetweetVisibilityRequest]] and then
* pipe the event to [[store.SetRetweetVisibility]]. The event contains the information
* to determine if a retweet should be included in its source tweet's retweet count.
*
* Showing/hiding a retweet count is done by calling TFlock to modify an edge's state between
* `Positive` <--> `Archived` in the RetweetsGraph(6) and modifying the count in cache directly.
*/
object SetRetweetVisibilityHandler {
type Type = SetRetweetVisibilityRequest => Future[Unit]
def apply(
tweetGetter: TweetId => Future[Option[Tweet]],
setRetweetVisibilityStore: SetRetweetVisibility.Event => Future[Unit]
): Type =
req =>
tweetGetter(req.retweetId).map {
case Some(retweet) =>
getShare(retweet).map { share: Share =>
val event = SetRetweetVisibility.Event(
retweetId = req.retweetId,
visible = req.visible,
srcId = share.sourceStatusId,
retweetUserId = getUserId(retweet),
srcTweetUserId = share.sourceUserId,
timestamp = Time.now
)
setRetweetVisibilityStore(event)
}
case None =>
// No-op if either the retweet has been deleted or has no source id.
// If deleted, then we do not want to accidentally undelete a legitimately deleted retweets.
// If no source id, then we do not know the source tweet to modify its count.
Unit
}
}

View File

@ -1,99 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.botmaker.thriftscala.BotMakerResponse
import com.twitter.bouncer.thriftscala.Bounce
import com.twitter.finagle.tracing.Trace
import com.twitter.relevance.feature_store.thriftscala.FeatureData
import com.twitter.relevance.feature_store.thriftscala.FeatureValue.StrValue
import com.twitter.service.gen.scarecrow.thriftscala.TieredAction
import com.twitter.service.gen.scarecrow.thriftscala.TieredActionResult
import com.twitter.tweetypie.core.TweetCreateFailure
import com.twitter.tweetypie.thriftscala.TweetCreateState
object Spam {
sealed trait Result
case object Allow extends Result
case object SilentFail extends Result
case object DisabledByIpiPolicy extends Result
val AllowFuture: Future[Allow.type] = Future.value(Allow)
val SilentFailFuture: Future[SilentFail.type] = Future.value(SilentFail)
val DisabledByIpiPolicyFuture: Future[DisabledByIpiPolicy.type] =
Future.value(DisabledByIpiPolicy)
def DisabledByIpiFailure(
userName: Option[String],
customDenyMessage: Option[String] = None
): TweetCreateFailure.State = {
val errorMsg = (customDenyMessage, userName) match {
case (Some(denyMessage), _) => denyMessage
case (_, Some(name)) => s"Some actions on this ${name} Tweet have been disabled by Twitter."
case _ => "Some actions on this Tweet have been disabled by Twitter."
}
TweetCreateFailure.State(TweetCreateState.DisabledByIpiPolicy, Some(errorMsg))
}
type Checker[T] = T => Future[Result]
/**
* Dummy spam checker that always allows requests.
*/
val DoNotCheckSpam: Checker[AnyRef] = _ => AllowFuture
def gated[T](gate: Gate[Unit])(checker: Checker[T]): Checker[T] =
req => if (gate()) checker(req) else AllowFuture
def selected[T](gate: Gate[Unit])(ifTrue: Checker[T], ifFalse: Checker[T]): Checker[T] =
req => gate.select(ifTrue, ifFalse)()(req)
def withEffect[T](check: Checker[T], effect: T => Unit): T => Future[Result] = { t: T =>
effect(t)
check(t)
}
/**
* Wrapper that implicitly allows retweet or tweet creation when spam
* checking fails.
*/
def allowOnException[T](checker: Checker[T]): Checker[T] =
req =>
checker(req).rescue {
case e: TweetCreateFailure => Future.exception(e)
case _ => AllowFuture
}
/**
* Handler for scarecrow result to be used by a Checker.
*/
def handleScarecrowResult(
stats: StatsReceiver
)(
handler: PartialFunction[(TieredActionResult, Option[Bounce], Option[String]), Future[Result]]
): Checker[TieredAction] =
result => {
stats.scope("scarecrow_result").counter(result.resultCode.name).incr()
Trace.record("com.twitter.tweetypie.Spam.scarecrow_result=" + result.resultCode.name)
/*
* A bot can return a custom DenyMessage
*
* If it does, we substitute this for the 'message' in the ValidationError.
*/
val customDenyMessage: Option[String] = for {
botMakeResponse: BotMakerResponse <- result.botMakerResponse
outputFeatures <- botMakeResponse.outputFeatures
denyMessageFeature: FeatureData <- outputFeatures.get("DenyMessage")
denyMessageFeatureValue <- denyMessageFeature.featureValue
denyMessage <- denyMessageFeatureValue match {
case stringValue: StrValue =>
Some(stringValue.strValue)
case _ =>
None
}
} yield denyMessage
handler.applyOrElse(
(result.resultCode, result.bounce, customDenyMessage),
withEffect(DoNotCheckSpam, (_: AnyRef) => stats.counter("unexpected_result").incr())
)
}
}

View File

@ -1,76 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.servo.util.FutureArrow
import com.twitter.takedown.util.TakedownReasons._
import com.twitter.tweetypie.store.Takedown
import com.twitter.tweetypie.thriftscala.TakedownRequest
import com.twitter.tweetypie.thriftscala.Tweet
import com.twitter.tweetypie.util.Takedowns
/**
* This handler processes TakedownRequest objects sent to Tweetypie's takedown endpoint.
* The request object specifies which takedown countries are being added and which are
* being removed. It also includes side effect flags for setting the tweet's has_takedown
* bit, scribing to Guano, and enqueuing to EventBus. For more information about inputs
* to the takedown endpoint, see the TakedownRequest documentation in the thrift definition.
*/
object TakedownHandler {
type Type = FutureArrow[TakedownRequest, Unit]
def apply(
getTweet: FutureArrow[TweetId, Tweet],
getUser: FutureArrow[UserId, User],
writeTakedown: FutureEffect[Takedown.Event]
): Type = {
FutureArrow { request =>
for {
tweet <- getTweet(request.tweetId)
user <- getUser(getUserId(tweet))
userHasTakedowns = user.takedowns.map(userTakedownsToReasons).exists(_.nonEmpty)
existingTweetReasons = Takedowns.fromTweet(tweet).reasons
reasonsToRemove = (request.countriesToRemove.map(countryCodeToReason) ++
request.reasonsToRemove.map(normalizeReason)).distinct.sortBy(_.toString)
reasonsToAdd = (request.countriesToAdd.map(countryCodeToReason) ++
request.reasonsToAdd.map(normalizeReason)).distinct.sortBy(_.toString)
updatedTweetTakedowns =
(existingTweetReasons ++ reasonsToAdd)
.filterNot(reasonsToRemove.contains)
.toSeq
.sortBy(_.toString)
(cs, rs) = Takedowns.partitionReasons(updatedTweetTakedowns)
updatedTweet = Lens.setAll(
tweet,
// these fields are cached on the Tweet in CachingTweetStore and written in
// ManhattanTweetStore
TweetLenses.hasTakedown -> (updatedTweetTakedowns.nonEmpty || userHasTakedowns),
TweetLenses.tweetypieOnlyTakedownCountryCodes -> Some(cs).filter(_.nonEmpty),
TweetLenses.tweetypieOnlyTakedownReasons -> Some(rs).filter(_.nonEmpty)
)
_ <- writeTakedown.when(tweet != updatedTweet) {
Takedown.Event(
tweet = updatedTweet,
timestamp = Time.now,
user = Some(user),
takedownReasons = updatedTweetTakedowns,
reasonsToAdd = reasonsToAdd,
reasonsToRemove = reasonsToRemove,
auditNote = request.auditNote,
host = request.host,
byUserId = request.byUserId,
eventbusEnqueue = request.eventbusEnqueue,
scribeForAudit = request.scribeForAudit,
updateCodesAndReasons = true
)
}
} yield ()
}
}
}

View File

@ -1,402 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.servo.cache.Cache
import com.twitter.servo.util.Scribe
import com.twitter.tweetypie.serverutil.ExceptionCounter
import com.twitter.tweetypie.thriftscala.PostTweetResult
import com.twitter.tweetypie.util.TweetCreationLock.Key
import com.twitter.tweetypie.util.TweetCreationLock.State
import com.twitter.util.Base64Long
import scala.util.Random
import scala.util.control.NoStackTrace
import scala.util.control.NonFatal
/**
* This exception is returned from TweetCreationLock if there is an
* in-progress cache entry for this key. It is possible that the key
* exists because the key was not properly cleaned up, but it's
* impossible to differentiate between these cases. We resolve this by
* returning TweetCreationInProgress and having a (relatively) short TTL
* on the cache entry so that the client and/or user may retry.
*/
case object TweetCreationInProgress extends Exception with NoStackTrace
/**
* Thrown when the TweetCreationLock discovers that there is already
* a tweet with the specified uniqueness id.
*/
case class DuplicateTweetCreation(tweetId: TweetId) extends Exception with NoStackTrace
trait TweetCreationLock {
def apply(
key: Key,
dark: Boolean,
nullcast: Boolean
)(
insert: => Future[PostTweetResult]
): Future[PostTweetResult]
def unlock(key: Key): Future[Unit]
}
object CacheBasedTweetCreationLock {
/**
* Indicates that setting the lock value failed because the state of
* that key in the cache has been changed (by another process or
* cache eviction).
*/
case object UnexpectedCacheState extends Exception with NoStackTrace
/**
* Thrown when the process of updating the lock cache failed more
* than the allowed number of times.
*/
case class RetriesExhausted(failures: Seq[Exception]) extends Exception with NoStackTrace
def shouldRetry(e: Exception): Boolean =
e match {
case TweetCreationInProgress => false
case _: DuplicateTweetCreation => false
case _: RetriesExhausted => false
case _ => true
}
def ttlChooser(shortTtl: Duration, longTtl: Duration): (Key, State) => Duration =
(_, state) =>
state match {
case _: State.AlreadyCreated => longTtl
case _ => shortTtl
}
/**
* The log format is tab-separated (base 64 tweet_id, base 64
* uniqueness_id). It's logged this way in order to minimize the
* storage requirement and to make it easy to analyze. Each log line
* should be 24 bytes, including newline.
*/
val formatUniquenessLogEntry: ((String, TweetId)) => String = {
case (uniquenessId, tweetId) => Base64Long.toBase64(tweetId) + "\t" + uniquenessId
}
/**
* Scribe the uniqueness id paired with the tweet id so that we can
* track the rate of failures of the uniqueness id check by
* detecting multiple tweets created with the same uniqueness id.
*
* Scribe to a test category because we only need to keep this
* information around for long enough to find any duplicates.
*/
val ScribeUniquenessId: FutureEffect[(String, TweetId)] =
Scribe("test_tweetypie_uniqueness_id") contramap formatUniquenessLogEntry
private[this] val UniquenessIdLog = Logger("com.twitter.tweetypie.handler.UniquenessId")
/**
* Log the uniqueness ids to a standard logger (for use when it's
* not production traffic).
*/
val LogUniquenessId: FutureEffect[(String, TweetId)] = FutureEffect[(String, TweetId)] { rec =>
UniquenessIdLog.info(formatUniquenessLogEntry(rec))
Future.Unit
}
private val log = Logger(getClass)
}
/**
* This class adds locking around Tweet creation, to prevent creating
* duplicate tweets when two identical requests arrive simultaneously.
* A lock is created in cache using the user id and a hash of the tweet text
* in the case of tweets, or the source_status_id in the case of retweets.
* If another process attempts to lock for the same user and hash, the request
* fails as a duplicate. The lock lasts for 10 seconds if it is not deleted.
* Given the hard timeout of 5 seconds on all requests, it should never take
* us longer than 5 seconds to create a request, but we've observed times of up
* to 10 seconds to create statuses for some of our more popular users.
*
* When a request with a uniqueness id is successful, the id of the
* created tweet will be stored in the cache so that subsequent
* requests can retrieve the originally-created tweet rather than
* duplicating creation or getting an exception.
*/
class CacheBasedTweetCreationLock(
cache: Cache[Key, State],
maxTries: Int,
stats: StatsReceiver,
logUniquenessId: FutureEffect[(String, TweetId)])
extends TweetCreationLock {
import CacheBasedTweetCreationLock._
private[this] val eventCounters = stats.scope("event")
private[this] def event(k: Key, name: String): Unit = {
log.debug(s"$name:$k")
eventCounters.counter(name).incr()
}
private[this] def retryLoop[A](action: => Future[A]): Future[A] = {
def go(failures: List[Exception]): Future[A] =
if (failures.length >= maxTries) {
Future.exception(RetriesExhausted(failures.reverse))
} else {
action.rescue {
case e: Exception if shouldRetry(e) => go(e :: failures)
}
}
go(Nil)
}
private[this] val lockerExceptions = ExceptionCounter(stats)
/**
* Obtain the lock for creating a tweet. If this method completes
* without throwing an exception, then the lock value was
* successfully set in cache, which indicates a high probability
* that this is the only process that is attempting to create this
* tweet. (The uncertainty comes from the possibility of lock
* entries missing from the cache.)
*
* @throws TweetCreationInProgress if there is another process
* trying to create this tweet.
*
* @throws DuplicateTweetCreation if a tweet has already been
* created for a duplicate request. The exception has the id of
* the created tweet.
*
* @throws RetriesExhausted if obtaining the lock failed more than
* the requisite number of times.
*/
private[this] def obtainLock(k: Key, token: Long): Future[Time] = retryLoop {
val lockTime = Time.now
// Get the current state for this key.
cache
.getWithChecksum(Seq(k))
.flatMap(initialStateKvr => Future.const(initialStateKvr(k)))
.flatMap {
case None =>
// Nothing in cache for this key
cache
.add(k, State.InProgress(token, lockTime))
.flatMap {
case true => Future.value(lockTime)
case false => Future.exception(UnexpectedCacheState)
}
case Some((Throw(e), _)) =>
Future.exception(e)
case Some((Return(st), cs)) =>
st match {
case State.Unlocked =>
// There is an Unlocked entry for this key, which
// implies that a previous attempt was cleaned up.
cache
.checkAndSet(k, State.InProgress(token, lockTime), cs)
.flatMap {
case true => Future.value(lockTime)
case false => Future.exception(UnexpectedCacheState)
}
case State.InProgress(cachedToken, creationStartedTimestamp) =>
if (cachedToken == token) {
// There is an in-progress entry for *this process*. This
// can happen on a retry if the `add` actually succeeds
// but the future fails. The retry can return the result
// of the add that we previously tried.
Future.value(creationStartedTimestamp)
} else {
// There is an in-progress entry for *a different
// process*. This implies that there is another tweet
// creation in progress for *this tweet*.
val tweetCreationAge = Time.now - creationStartedTimestamp
k.uniquenessId.foreach { id =>
log.info(
"Found an in-progress tweet creation for uniqueness id %s %s ago"
.format(id, tweetCreationAge)
)
}
stats.stat("in_progress_age_ms").add(tweetCreationAge.inMilliseconds)
Future.exception(TweetCreationInProgress)
}
case State.AlreadyCreated(tweetId, creationStartedTimestamp) =>
// Another process successfully created a tweet for this
// key.
val tweetCreationAge = Time.now - creationStartedTimestamp
stats.stat("already_created_age_ms").add(tweetCreationAge.inMilliseconds)
Future.exception(DuplicateTweetCreation(tweetId))
}
}
}
/**
* Attempt to remove this process' lock entry from the cache. This
* is done by writing a short-lived tombstone, so that we can ensure
* that we only overwrite the entry if it is still an entry for this
* process instead of another process' entry.
*/
private[this] def cleanupLoop(k: Key, token: Long): Future[Unit] =
retryLoop {
// Instead of deleting the value, we attempt to write Unlocked,
// because we only want to delete it if it was the value that we
// wrote ourselves, and there is no delete call that is
// conditional on the extant value.
cache
.getWithChecksum(Seq(k))
.flatMap(kvr => Future.const(kvr(k)))
.flatMap {
case None =>
// Nothing in the cache for this tweet creation, so cleanup
// is successful.
Future.Unit
case Some((tryV, cs)) =>
// If we failed trying to deserialize the value, then we
// want to let the error bubble up, because there is no good
// recovery procedure, since we can't tell whether the entry
// is ours.
Future.const(tryV).flatMap {
case State.InProgress(presentToken, _) =>
if (presentToken == token) {
// This is *our* in-progress marker, so we want to
// overwrite it with the tombstone. If checkAndSet
// returns false, that's OK, because that means
// someone else overwrote the value, and we don't have
// to clean it up anymore.
cache.checkAndSet(k, State.Unlocked, cs).unit
} else {
// Indicates that another request has overwritten our
// state before we cleaned it up. This should only
// happen when our token was cleared from cache and
// another process started a duplicate create. This
// should be very infrequent. We count it just to be
// sure.
event(k, "other_attempt_in_progress")
Future.Unit
}
case _ =>
// Cleanup has succeeded, because we are not responsible
// for the cache entry anymore.
Future.Unit
}
}
}.onSuccess { _ => event(k, "cleanup_attempt_succeeded") }
.handle {
case _ => event(k, "cleanup_attempt_failed")
}
/**
* Mark that a tweet has been successfully created. Subsequent calls
* to `apply` with this key will receive a DuplicateTweetCreation
* exception with the specified id.
*/
private[this] def creationComplete(k: Key, tweetId: TweetId, lockTime: Time): Future[Unit] =
// Unconditionally set the state because regardless of the
// value present, we know that we want to transition to the
// AlreadyCreated state for this key.
retryLoop(cache.set(k, State.AlreadyCreated(tweetId, lockTime)))
.onSuccess(_ => event(k, "mark_created_succeeded"))
.onFailure { case _ => event(k, "mark_created_failed") }
// If this fails, it's OK for the request to complete
// successfully, because it's more harmful to create the tweet
// and return failure than it is to complete it successfully,
// but fail to honor the uniqueness id next time.
.handle { case NonFatal(_) => }
private[this] def createWithLock(
k: Key,
create: => Future[PostTweetResult]
): Future[PostTweetResult] = {
val token = Random.nextLong
event(k, "lock_attempted")
obtainLock(k, token)
.onSuccess { _ => event(k, "lock_obtained") }
.handle {
// If we run out of retries when trying to get the lock, then
// just go ahead with tweet creation. We should keep an eye on
// how frequently this happens, because this means that the
// only sign that this is happening will be duplicate tweet
// creations.
case RetriesExhausted(failures) =>
event(k, "lock_failure_ignored")
// Treat this as the time that we obtained the lock.
Time.now
}
.onFailure {
case e => lockerExceptions(e)
}
.flatMap { lockTime =>
create.transform {
case r @ Return(PostTweetResult(_, Some(tweet), _, _, _, _, _)) =>
event(k, "create_succeeded")
k.uniquenessId.foreach { u => logUniquenessId((u, tweet.id)) }
// Update the lock entry to remember the id of the tweet we
// created and extend the TTL.
creationComplete(k, tweet.id, lockTime).before(Future.const(r))
case other =>
other match {
case Throw(e) =>
log.debug(s"Tweet creation failed for key $k", e)
case Return(r) =>
log.debug(s"Tweet creation failed for key $k, so unlocking: $r")
}
event(k, "create_failed")
// Attempt to clean up the lock after the failed create.
cleanupLoop(k, token).before(Future.const(other))
}
}
}
/**
* Make a best-effort attempt at removing the duplicate cache entry
* for this key. If this fails, it is not catastrophic. The worst-case
* behavior should be that the user has to wait for the short TTL to
* elapse before tweeting succeeds.
*/
def unlock(k: Key): Future[Unit] =
retryLoop(cache.delete(k).unit).onSuccess(_ => event(k, "deleted"))
/**
* Prevent duplicate tweet creation.
*
* Ensures that no more than one tweet creation for the same key is
* happening at the same time. If `create` fails, then the key will
* be removed from the cache. If it succeeds, then the key will be
* retained.
*
* @throws DuplicateTweetCreation if a tweet has already been
* created by a previous request. The exception has the id of the
* created tweet.
*
* @throws TweetCreationInProgress. See the documentation above.
*/
def apply(
k: Key,
isDark: Boolean,
nullcast: Boolean
)(
create: => Future[PostTweetResult]
): Future[PostTweetResult] =
if (isDark) {
event(k, "dark_create")
create
} else if (nullcast) {
event(k, "nullcast_create")
create
} else {
createWithLock(k, create).onFailure {
// Another process is creating this same tweet (or has already
// created it)
case TweetCreationInProgress =>
event(k, "tweet_creation_in_progress")
case _: DuplicateTweetCreation =>
event(k, "tweet_already_created")
case _ =>
}
}
}

View File

@ -1,811 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.conversions.DurationOps.RichDuration
import com.twitter.servo.exception.thriftscala.ClientError
import com.twitter.servo.exception.thriftscala.ClientErrorCause
import com.twitter.servo.util.FutureArrow
import com.twitter.snowflake.id.SnowflakeId
import com.twitter.stitch.Stitch
import com.twitter.stitch.NotFound
import com.twitter.timelineservice.thriftscala.PerspectiveResult
import com.twitter.timelineservice.{thriftscala => tls}
import com.twitter.tweetypie.core._
import com.twitter.tweetypie.repository._
import com.twitter.tweetypie.store._
import com.twitter.tweetypie.thriftscala._
import com.twitter.util.Time
import com.twitter.util.Try
import Try._
import com.twitter.spam.rtf.thriftscala.SafetyLabelType
import com.twitter.tweetypie.backends.TimelineService.GetPerspectives
import com.twitter.tweetypie.util.EditControlUtil
import scala.util.control.NoStackTrace
case class CascadedDeleteNotAvailable(retweetId: TweetId) extends Exception with NoStackTrace {
override def getMessage: String =
s"""|Cascaded delete tweet failed because tweet $retweetId
|is not present in cache or manhattan.""".stripMargin
}
object TweetDeletePathHandler {
type DeleteTweets =
(DeleteTweetsRequest, Boolean) => Future[Seq[DeleteTweetResult]]
type UnretweetEdits = (Option[EditControl], TweetId, UserId) => Future[Unit]
/** The information from a deleteTweet request that can be inspected by a deleteTweets validator */
case class DeleteTweetsContext(
byUserId: Option[UserId],
authenticatedUserId: Option[UserId],
tweetAuthorId: UserId,
users: Map[UserId, User],
isUserErasure: Boolean,
expectedErasureUserId: Option[UserId],
tweetIsBounced: Boolean,
isBounceDelete: Boolean)
/** Provides reason a tweet deletion was allowed */
sealed trait DeleteAuthorization { def byUserId: Option[UserId] }
case class AuthorizedByTweetOwner(userId: UserId) extends DeleteAuthorization {
def byUserId: Option[UserId] = Some(userId)
}
case class AuthorizedByTweetContributor(contributorUserId: UserId) extends DeleteAuthorization {
def byUserId: Option[UserId] = Some(contributorUserId)
}
case class AuthorizedByAdmin(adminUserId: UserId) extends DeleteAuthorization {
def byUserId: Option[UserId] = Some(adminUserId)
}
case object AuthorizedByErasure extends DeleteAuthorization {
def byUserId: None.type = None
}
// Type for a method that receives all the relevant information about a proposed internal tweet
// deletion and can return Future.exception to cancel the delete due to a validation error or
// return a [[DeleteAuthorization]] specifying the reason the deletion is allowed.
type ValidateDeleteTweets = FutureArrow[DeleteTweetsContext, DeleteAuthorization]
val userFieldsForDelete: Set[UserField] =
Set(UserField.Account, UserField.Profile, UserField.Roles, UserField.Safety)
val userQueryOptions: UserQueryOptions =
UserQueryOptions(
userFieldsForDelete,
UserVisibility.All
)
// user_agent property originates from the client so truncate to a reasonable length
val MaxUserAgentLength = 1000
// Age under which we treat not found tweets in
// cascaded_delete_tweet as a temporary condition (the most likely
// explanation being that the tweet has not yet been
// replicated). Tweets older than this we assume are due to
// *permanently* inconsistent data, either spurious edges in tflock or
// tweets that are not loadable from Manhattan.
val MaxCascadedDeleteTweetTemporaryInconsistencyAge: Duration =
10.minutes
}
trait TweetDeletePathHandler {
import TweetDeletePathHandler.ValidateDeleteTweets
def cascadedDeleteTweet(request: CascadedDeleteTweetRequest): Future[Unit]
def deleteTweets(
request: DeleteTweetsRequest,
isUnretweetEdits: Boolean = false,
): Future[Seq[DeleteTweetResult]]
def internalDeleteTweets(
request: DeleteTweetsRequest,
byUserId: Option[UserId],
authenticatedUserId: Option[UserId],
validate: ValidateDeleteTweets,
isUnretweetEdits: Boolean = false
): Future[Seq[DeleteTweetResult]]
def unretweetEdits(
optEditControl: Option[EditControl],
excludedTweetId: TweetId,
byUserId: UserId
): Future[Unit]
}
/**
* Implementation of TweetDeletePathHandler
*/
class DefaultTweetDeletePathHandler(
stats: StatsReceiver,
tweetResultRepo: TweetResultRepository.Type,
userRepo: UserRepository.Optional,
stratoSafetyLabelsRepo: StratoSafetyLabelsRepository.Type,
lastQuoteOfQuoterRepo: LastQuoteOfQuoterRepository.Type,
tweetStore: TotalTweetStore,
getPerspectives: GetPerspectives)
extends TweetDeletePathHandler {
import TweetDeletePathHandler._
val tweetRepo: TweetRepository.Type = TweetRepository.fromTweetResult(tweetResultRepo)
// attempt to delete tweets was made by someone other than the tweet owner or an admin user
object DeleteTweetsPermissionException extends Exception with NoStackTrace
object ExpectedUserIdMismatchException extends Exception with NoStackTrace
private[this] val log = Logger("com.twitter.tweetypie.store.TweetDeletions")
private[this] val cascadeEditDelete = stats.scope("cascade_edit_delete")
private[this] val cascadeEditDeletesEnqueued = cascadeEditDelete.counter("enqueued")
private[this] val cascadeEditDeleteTweets = cascadeEditDelete.counter("tweets")
private[this] val cascadeEditDeleteFailures = cascadeEditDelete.counter("failures")
private[this] val cascadedDeleteTweet = stats.scope("cascaded_delete_tweet")
private[this] val cascadedDeleteTweetFailures = cascadedDeleteTweet.counter("failures")
private[this] val cascadedDeleteTweetSourceMatch = cascadedDeleteTweet.counter("source_match")
private[this] val cascadedDeleteTweetSourceMismatch =
cascadedDeleteTweet.counter("source_mismatch")
private[this] val cascadedDeleteTweetTweetNotFound =
cascadedDeleteTweet.counter("tweet_not_found")
private[this] val cascadedDeleteTweetTweetNotFoundAge =
cascadedDeleteTweet.stat("tweet_not_found_age")
private[this] val cascadedDeleteTweetUserNotFound = cascadedDeleteTweet.counter("user_not_found")
private[this] val deleteTweets = stats.scope("delete_tweets")
private[this] val deleteTweetsAuth = deleteTweets.scope("per_tweet_auth")
private[this] val deleteTweetsAuthAttempts = deleteTweetsAuth.counter("attempts")
private[this] val deleteTweetsAuthFailures = deleteTweetsAuth.counter("failures")
private[this] val deleteTweetsAuthSuccessAdmin = deleteTweetsAuth.counter("success_admin")
private[this] val deleteTweetsAuthSuccessByUser = deleteTweetsAuth.counter("success_by_user")
private[this] val deleteTweetsTweets = deleteTweets.counter("tweets")
private[this] val deleteTweetsFailures = deleteTweets.counter("failures")
private[this] val deleteTweetsTweetNotFound = deleteTweets.counter("tweet_not_found")
private[this] val deleteTweetsUserNotFound = deleteTweets.counter("user_not_found")
private[this] val userIdMismatchInTweetDelete =
deleteTweets.counter("expected_actual_user_id_mismatch")
private[this] val bounceDeleteFlagNotSet =
deleteTweets.counter("bounce_delete_flag_not_set")
private[this] def getUser(userId: UserId): Future[Option[User]] =
Stitch.run(userRepo(UserKey(userId), userQueryOptions))
private[this] def getUsersForDeleteTweets(userIds: Seq[UserId]): Future[Map[UserId, User]] =
Stitch.run(
Stitch
.traverse(userIds) { userId =>
userRepo(UserKey(userId), userQueryOptions).map {
case Some(u) => Some(userId -> u)
case None => deleteTweetsUserNotFound.incr(); None
}
}
.map(_.flatten.toMap)
)
private[this] def getTweet(tweetId: TweetId): Future[Tweet] =
Stitch.run(tweetRepo(tweetId, WritePathQueryOptions.deleteTweetsWithoutEditControl))
private[this] def getSingleDeletedTweet(
id: TweetId,
isCascadedEditTweetDeletion: Boolean = false
): Stitch[Option[TweetData]] = {
val opts = if (isCascadedEditTweetDeletion) {
// Disable edit control hydration if this is cascade delete of edits.
// When edit control is hydrated, the tweet will actually be considered already deleted.
WritePathQueryOptions.deleteTweetsWithoutEditControl
} else {
WritePathQueryOptions.deleteTweets
}
tweetResultRepo(id, opts)
.map(_.value)
.liftToOption {
// We treat the request the same whether the tweet never
// existed or is in one of the already-deleted states by
// just filtering out those tweets. Any tweets that we
// return should be deleted. If the tweet has been
// bounce-deleted, we never want to soft-delete it, and
// vice versa.
case NotFound | FilteredState.Unavailable.TweetDeleted |
FilteredState.Unavailable.BounceDeleted =>
true
}
}
private[this] def getTweetsForDeleteTweets(
ids: Seq[TweetId],
isCascadedEditTweetDeletion: Boolean
): Future[Map[TweetId, TweetData]] =
Stitch
.run {
Stitch.traverse(ids) { id =>
getSingleDeletedTweet(id, isCascadedEditTweetDeletion)
.map {
// When deleting a tweet that has been edited, we want to instead delete the initial version.
// Because the initial tweet will be hydrated in every request, if it is deleted, later
// revisions will be hidden, and cleaned up asynchronously by TP Daemons
// However, we don't need to do a second lookup if it's already the original tweet
// or if we're doing a cascading edit tweet delete (deleting the entire tweet history)
case Some(tweetData)
if EditControlUtil.isInitialTweet(tweetData.tweet) ||
isCascadedEditTweetDeletion =>
Stitch.value(Some(tweetData))
case Some(tweetData) =>
getSingleDeletedTweet(EditControlUtil.getInitialTweetId(tweetData.tweet))
case None =>
Stitch.value(None)
// We need to preserve the input tweetId, and the initial TweetData
}.flatten.map(tweetData => (id, tweetData))
}
}
.map(_.collect { case (tweetId, Some(tweetData)) => (tweetId, tweetData) }.toMap)
private[this] def getStratoBounceStatuses(
ids: Seq[Long],
isUserErasure: Boolean,
isCascadedEditedTweetDeletion: Boolean
): Future[Map[TweetId, Boolean]] = {
// Don't load bounce label for user erasure tweet deletion.
// User Erasure deletions cause unnecessary spikes of traffic
// to Strato when we read the bounce label that we don't use.
// We also want to always delete a bounced tweet if the rest of the
// edit chain is being deleted in a cascaded edit tweet delete
if (isUserErasure || isCascadedEditedTweetDeletion) {
Future.value(ids.map(id => id -> false).toMap)
} else {
Stitch.run(
Stitch
.traverse(ids) { id =>
stratoSafetyLabelsRepo(id, SafetyLabelType.Bounce).map { label =>
id -> label.isDefined
}
}
.map(_.toMap)
)
}
}
/** A suspended/deactivated user can't delete tweets */
private[this] def userNotSuspendedOrDeactivated(user: User): Try[User] =
user.safety match {
case None => Throw(UpstreamFailure.UserSafetyEmptyException)
case Some(safety) if safety.deactivated =>
Throw(
AccessDenied(
s"User deactivated userId: ${user.id}",
errorCause = Some(AccessDeniedCause.UserDeactivated)
)
)
case Some(safety) if safety.suspended =>
Throw(
AccessDenied(
s"User suspended userId: ${user.id}",
errorCause = Some(AccessDeniedCause.UserSuspended)
)
)
case _ => Return(user)
}
/**
* Ensure that byUser has permission to delete tweet either by virtue of owning the tweet or being
* an admin user. Returns the reason as a DeleteAuthorization or else throws an Exception if not
* authorized.
*/
private[this] def userAuthorizedToDeleteTweet(
byUser: User,
optAuthenticatedUserId: Option[UserId],
tweetAuthorId: UserId
): Try[DeleteAuthorization] = {
def hasAdminPrivilege =
byUser.roles.exists(_.rights.contains("delete_user_tweets"))
deleteTweetsAuthAttempts.incr()
if (byUser.id == tweetAuthorId) {
deleteTweetsAuthSuccessByUser.incr()
optAuthenticatedUserId match {
case Some(uid) =>
Return(AuthorizedByTweetContributor(uid))
case None =>
Return(AuthorizedByTweetOwner(byUser.id))
}
} else if (optAuthenticatedUserId.isEmpty && hasAdminPrivilege) { // contributor may not assume admin role
deleteTweetsAuthSuccessAdmin.incr()
Return(AuthorizedByAdmin(byUser.id))
} else {
deleteTweetsAuthFailures.incr()
Throw(DeleteTweetsPermissionException)
}
}
/**
* expected user id is the id provided on the DeleteTweetsRequest that the indicates which user
* owns the tweets they want to delete. The actualUserId is the actual userId on the tweet we are about to delete.
* we check to ensure they are the same as a safety check against accidental deletion of tweets either from user mistakes
* or from corrupted data (e.g bad tflock edges)
*/
private[this] def expectedUserIdMatchesActualUserId(
expectedUserId: UserId,
actualUserId: UserId
): Try[Unit] =
if (expectedUserId == actualUserId) {
Return.Unit
} else {
userIdMismatchInTweetDelete.incr()
Throw(ExpectedUserIdMismatchException)
}
/**
* Validation for the normal public tweet delete case, the user must be found and must
* not be suspended or deactivated.
*/
val validateTweetsForPublicDelete: ValidateDeleteTweets = FutureArrow {
ctx: DeleteTweetsContext =>
Future.const(
for {
// byUserId must be present
byUserId <- ctx.byUserId.orThrow(
ClientError(ClientErrorCause.BadRequest, "Missing byUserId")
)
// the byUser must be found
byUserOpt = ctx.users.get(byUserId)
byUser <- byUserOpt.orThrow(
ClientError(ClientErrorCause.BadRequest, s"User $byUserId not found")
)
_ <- userNotSuspendedOrDeactivated(byUser)
_ <- validateBounceConditions(
ctx.tweetIsBounced,
ctx.isBounceDelete
)
// if there's a contributor, make sure the user is found and not suspended or deactivated
_ <-
ctx.authenticatedUserId
.map { uid =>
ctx.users.get(uid) match {
case None =>
Throw(ClientError(ClientErrorCause.BadRequest, s"Contributor $uid not found"))
case Some(authUser) =>
userNotSuspendedOrDeactivated(authUser)
}
}
.getOrElse(Return.Unit)
// if the expected user id is present, make sure it matches the user id on the tweet
_ <-
ctx.expectedErasureUserId
.map { expectedUserId =>
expectedUserIdMatchesActualUserId(expectedUserId, ctx.tweetAuthorId)
}
.getOrElse(Return.Unit)
// User must own the tweet or be an admin
deleteAuth <- userAuthorizedToDeleteTweet(
byUser,
ctx.authenticatedUserId,
ctx.tweetAuthorId
)
} yield deleteAuth
)
}
private def validateBounceConditions(
tweetIsBounced: Boolean,
isBounceDelete: Boolean
): Try[Unit] = {
if (tweetIsBounced && !isBounceDelete) {
bounceDeleteFlagNotSet.incr()
Throw(ClientError(ClientErrorCause.BadRequest, "Cannot normal delete a Bounced Tweet"))
} else {
Return.Unit
}
}
/**
* Validation for the user erasure case. User may be missing.
*/
val validateTweetsForUserErasureDaemon: ValidateDeleteTweets = FutureArrow {
ctx: DeleteTweetsContext =>
Future
.const(
for {
expectedUserId <- ctx.expectedErasureUserId.orThrow(
ClientError(
ClientErrorCause.BadRequest,
"expectedUserId is required for DeleteTweetRequests"
)
)
// It's critical to always check that the userId on the tweet we want to delete matches the
// userId on the erasure request. This prevents us from accidentally deleting tweets not owned by the
// erased user, even if tflock serves us bad data.
validationResult <- expectedUserIdMatchesActualUserId(expectedUserId, ctx.tweetAuthorId)
} yield validationResult
)
.map(_ => AuthorizedByErasure)
}
/**
* Fill in missing values of AuditDeleteTweet with values from TwitterContext.
*/
def enrichMissingFromTwitterContext(orig: AuditDeleteTweet): AuditDeleteTweet = {
val viewer = TwitterContext()
orig.copy(
host = orig.host.orElse(viewer.flatMap(_.auditIp)),
clientApplicationId = orig.clientApplicationId.orElse(viewer.flatMap(_.clientApplicationId)),
userAgent = orig.userAgent.orElse(viewer.flatMap(_.userAgent)).map(_.take(MaxUserAgentLength))
)
}
/**
* core delete tweets implementation.
*
* The [[deleteTweets]] method wraps this method and provides validation required
* for a public endpoint.
*/
override def internalDeleteTweets(
request: DeleteTweetsRequest,
byUserId: Option[UserId],
authenticatedUserId: Option[UserId],
validate: ValidateDeleteTweets,
isUnretweetEdits: Boolean = false
): Future[Seq[DeleteTweetResult]] = {
val auditDeleteTweet =
enrichMissingFromTwitterContext(request.auditPassthrough.getOrElse(AuditDeleteTweet()))
deleteTweetsTweets.incr(request.tweetIds.size)
for {
tweetDataMap <- getTweetsForDeleteTweets(
request.tweetIds,
request.cascadedEditedTweetDeletion.getOrElse(false)
)
userIds: Seq[UserId] = (tweetDataMap.values.map { td =>
getUserId(td.tweet)
} ++ byUserId ++ authenticatedUserId).toSeq.distinct
users <- getUsersForDeleteTweets(userIds)
stratoBounceStatuses <- getStratoBounceStatuses(
tweetDataMap.keys.toSeq,
request.isUserErasure,
request.cascadedEditedTweetDeletion.getOrElse(false))
results <- Future.collect {
request.tweetIds.map { tweetId =>
tweetDataMap.get(tweetId) match {
// already deleted, so nothing to do
case None =>
deleteTweetsTweetNotFound.incr()
Future.value(DeleteTweetResult(tweetId, TweetDeleteState.Ok))
case Some(tweetData) =>
val tweet: Tweet = tweetData.tweet
val tweetIsBounced = stratoBounceStatuses(tweetId)
val optSourceTweet: Option[Tweet] = tweetData.sourceTweetResult.map(_.value.tweet)
val validation: Future[(Boolean, DeleteAuthorization)] = for {
isLastQuoteOfQuoter <- isFinalQuoteOfQuoter(tweet)
deleteAuth <- validate(
DeleteTweetsContext(
byUserId = byUserId,
authenticatedUserId = authenticatedUserId,
tweetAuthorId = getUserId(tweet),
users = users,
isUserErasure = request.isUserErasure,
expectedErasureUserId = request.expectedUserId,
tweetIsBounced = tweetIsBounced,
isBounceDelete = request.isBounceDelete
)
)
_ <- optSourceTweet match {
case Some(sourceTweet) if !isUnretweetEdits =>
// If this is a retweet and this deletion was not triggered by
// unretweetEdits, unretweet edits of the source Tweet
// before deleting the retweet.
//
// deleteAuth will always contain a byUserId except for erasure deletion,
// in which case the retweets will be deleted individually.
deleteAuth.byUserId match {
case Some(userId) =>
unretweetEdits(sourceTweet.editControl, sourceTweet.id, userId)
case None => Future.Unit
}
case _ => Future.Unit
}
} yield {
(isLastQuoteOfQuoter, deleteAuth)
}
validation
.flatMap {
case (isLastQuoteOfQuoter: Boolean, deleteAuth: DeleteAuthorization) =>
val isAdminDelete = deleteAuth match {
case AuthorizedByAdmin(_) => true
case _ => false
}
val event =
DeleteTweet.Event(
tweet = tweet,
timestamp = Time.now,
user = users.get(getUserId(tweet)),
byUserId = deleteAuth.byUserId,
auditPassthrough = Some(auditDeleteTweet),
isUserErasure = request.isUserErasure,
isBounceDelete = request.isBounceDelete && tweetIsBounced,
isLastQuoteOfQuoter = isLastQuoteOfQuoter,
isAdminDelete = isAdminDelete
)
val numberOfEdits: Int = tweet.editControl
.collect {
case EditControl.Initial(initial) =>
initial.editTweetIds.count(_ != tweet.id)
}
.getOrElse(0)
cascadeEditDeletesEnqueued.incr(numberOfEdits)
tweetStore
.deleteTweet(event)
.map(_ => DeleteTweetResult(tweetId, TweetDeleteState.Ok))
}
.onFailure { _ =>
deleteTweetsFailures.incr()
}
.handle {
case ExpectedUserIdMismatchException =>
DeleteTweetResult(tweetId, TweetDeleteState.ExpectedUserIdMismatch)
case DeleteTweetsPermissionException =>
DeleteTweetResult(tweetId, TweetDeleteState.PermissionError)
}
}
}
}
} yield results
}
private def isFinalQuoteOfQuoter(tweet: Tweet): Future[Boolean] = {
tweet.quotedTweet match {
case Some(qt) =>
Stitch.run {
lastQuoteOfQuoterRepo
.apply(qt.tweetId, getUserId(tweet))
.liftToTry
.map(_.getOrElse(false))
}
case None => Future(false)
}
}
/**
* Validations for the public deleteTweets endpoint.
* - ensures that the byUserId user can be found and is in the correct user state
* - ensures that the tweet is being deleted by the tweet's owner, or by an admin
* If there is a validation error, a future.exception is returned
*
* If the delete request is part of a user erasure, validations are relaxed (the User is allowed to be missing).
*/
val deleteTweetsValidator: ValidateDeleteTweets =
FutureArrow { context =>
if (context.isUserErasure) {
validateTweetsForUserErasureDaemon(context)
} else {
validateTweetsForPublicDelete(context)
}
}
override def deleteTweets(
request: DeleteTweetsRequest,
isUnretweetEdits: Boolean = false,
): Future[Seq[DeleteTweetResult]] = {
// For comparison testing we only want to compare the DeleteTweetsRequests that are generated
// in DeleteTweets path and not the call that comes from the Unretweet path
val context = TwitterContext()
internalDeleteTweets(
request,
byUserId = request.byUserId.orElse(context.flatMap(_.userId)),
context.flatMap(_.authenticatedUserId),
deleteTweetsValidator,
isUnretweetEdits
)
}
// Cascade delete tweet is the logic for removing tweets that are detached
// from their dependency which has been deleted. They are already filtered
// out from serving, so this operation reconciles storage with the view
// presented by Tweetypie.
// This RPC call is delegated from daemons or batch jobs. Currently there
// are two use-cases when this call is issued:
// * Deleting detached retweets after the source tweet was deleted.
// This is done through RetweetsDeletion daemon and the
// CleanupDetachedRetweets job.
// * Deleting edits of an initial tweet that has been deleted.
// This is done by CascadedEditedTweetDelete daemon.
// Note that, when serving the original delete request for an edit,
// the initial tweet is only deleted, which makes all edits hidden.
override def cascadedDeleteTweet(request: CascadedDeleteTweetRequest): Future[Unit] = {
val contextViewer = TwitterContext()
getTweet(request.tweetId)
.transform {
case Throw(
FilteredState.Unavailable.TweetDeleted | FilteredState.Unavailable.BounceDeleted) =>
// The retweet or edit was already deleted via some other mechanism
Future.Unit
case Throw(NotFound) =>
cascadedDeleteTweetTweetNotFound.incr()
val recentlyCreated =
if (SnowflakeId.isSnowflakeId(request.tweetId)) {
val age = Time.now - SnowflakeId(request.tweetId).time
cascadedDeleteTweetTweetNotFoundAge.add(age.inMilliseconds)
age < MaxCascadedDeleteTweetTemporaryInconsistencyAge
} else {
false
}
if (recentlyCreated) {
// Treat the NotFound as a temporary condition, most
// likely due to replication lag.
Future.exception(CascadedDeleteNotAvailable(request.tweetId))
} else {
// Treat the NotFound as a permanent inconsistenty, either
// spurious edges in tflock or invalid data in Manhattan. This
// was happening a few times an hour during the time that we
// were not treating it specially. For now, we will just log that
// it happened, but in the longer term, it would be good
// to collect this data and repair the corruption.
log.warn(
Seq(
"cascaded_delete_tweet_old_not_found",
request.tweetId,
request.cascadedFromTweetId
).mkString("\t")
)
Future.Done
}
// Any other FilteredStates should not be thrown because of
// the options that we used to load the tweet, so we will just
// let them bubble up as an internal server error
case Throw(other) =>
Future.exception(other)
case Return(tweet) =>
Future
.join(
isFinalQuoteOfQuoter(tweet),
getUser(getUserId(tweet))
)
.flatMap {
case (isLastQuoteOfQuoter, user) =>
if (user.isEmpty) {
cascadedDeleteTweetUserNotFound.incr()
}
val tweetSourceId = getShare(tweet).map(_.sourceStatusId)
val initialEditId = tweet.editControl.collect {
case EditControl.Edit(edit) => edit.initialTweetId
}
if (initialEditId.contains(request.cascadedFromTweetId)) {
cascadeEditDeleteTweets.incr()
}
if (tweetSourceId.contains(request.cascadedFromTweetId)
|| initialEditId.contains(request.cascadedFromTweetId)) {
cascadedDeleteTweetSourceMatch.incr()
val deleteEvent =
DeleteTweet.Event(
tweet = tweet,
timestamp = Time.now,
user = user,
byUserId = contextViewer.flatMap(_.userId),
cascadedFromTweetId = Some(request.cascadedFromTweetId),
auditPassthrough = request.auditPassthrough,
isUserErasure = false,
// cascaded deletes of retweets or edits have not been through a bouncer flow,
// so are not considered to be "bounce deleted".
isBounceDelete = false,
isLastQuoteOfQuoter = isLastQuoteOfQuoter,
isAdminDelete = false
)
tweetStore
.deleteTweet(deleteEvent)
.onFailure { _ =>
if (initialEditId.contains(request.cascadedFromTweetId)) {
cascadeEditDeleteFailures.incr()
}
}
} else {
cascadedDeleteTweetSourceMismatch.incr()
log.warn(
Seq(
"cascaded_from_tweet_id_source_mismatch",
request.tweetId,
request.cascadedFromTweetId,
tweetSourceId.orElse(initialEditId).getOrElse("-")
).mkString("\t")
)
Future.Done
}
}
}
.onFailure(_ => cascadedDeleteTweetFailures.incr())
}
// Given a list of edit Tweet ids and a user id, find the retweet ids of those edit ids from the given user
private def editTweetIdRetweetsFromUser(
editTweetIds: Seq[TweetId],
byUserId: UserId
): Future[Seq[TweetId]] = {
if (editTweetIds.isEmpty) {
Future.value(Seq())
} else {
getPerspectives(
Seq(tls.PerspectiveQuery(byUserId, editTweetIds))
).map { res: Seq[PerspectiveResult] =>
res.headOption.toSeq
.flatMap(_.perspectives.flatMap(_.retweetId))
}
}
}
/* This function is called from three places -
* 1. When Tweetypie gets a request to retweet the latest version of an edit chain, all the
* previous revisons should be unretweeted.
* i.e. On Retweet of the latest tweet - unretweets all the previous revisions for this user.
* - create A
* - retweet A'(retweet of A)
* - create edit B(edit of A)
* - retweet B' => Deletes A'
*
* 2. When Tweetypie gets an unretweet request for a source tweet that is an edit tweet, all
* the versions of the edit chain is retweeted.
* i.e. On unretweet of any version in the edit chain - unretweets all the revisions for this user
* - create A
* - retweet A'
* - create B
* - unretweet B => Deletes A' (& also any B' if it existed)
*
* 3. When Tweetypie gets a delete request for a retweet, say A1. & if A happens to the source
* tweet for A1 & if A is an edit tweet, then the entire edit chain should be unretweeted & not
* A. i.e. On delete of a retweet - unretweet all the revisions for this user.
* - create A
* - retweet A'
* - create B
* - delete A' => Deletes A' (& also any B' if it existed)
*
* The following function has two failure scenarios -
* i. when it fails to get perspectives of any of the edit tweets.
* ii. the deletion of any of the retweets of these edits fail.
*
* In either of this scenario, we fail the entire request & the error bubbles up to the top.
* Note: The above unretweet of edits only happens for the current user.
* In normal circumstances, a maximum of one Tweet in the edit chain will have been retweeted,
* but we don't know which one it was. Additionally, there may be circumstances where
* unretweet failed, and we end up with multiple versions retweeted. For these reasons,
* we always unretweet all the revisions (except for `excludedTweetId`).
* This is a no-op if none of these versions have been retweeted.
* */
override def unretweetEdits(
optEditControl: Option[EditControl],
excludedTweetId: TweetId,
byUserId: UserId
): Future[Unit] = {
val editTweetIds: Seq[TweetId] =
EditControlUtil.getEditTweetIds(optEditControl).get().filter(_ != excludedTweetId)
(editTweetIdRetweetsFromUser(editTweetIds, byUserId).flatMap { tweetIds =>
if (tweetIds.nonEmpty) {
deleteTweets(
DeleteTweetsRequest(tweetIds = tweetIds, byUserId = Some(byUserId)),
isUnretweetEdits = true
)
} else {
Future.Nil
}
}).unit
}
}

View File

@ -1,118 +0,0 @@
package com.twitter.tweetypie.handler
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.core.TweetCreateFailure
import com.twitter.tweetypie.repository.ConversationControlRepository
import com.twitter.tweetypie.repository.TweetQuery
import com.twitter.tweetypie.thriftscala.ExclusiveTweetControl
import com.twitter.tweetypie.thriftscala.ExclusiveTweetControlOptions
import com.twitter.tweetypie.thriftscala.QuotedTweet
import com.twitter.tweetypie.thriftscala.TrustedFriendsControl
import com.twitter.tweetypie.thriftscala.TrustedFriendsControlOptions
import com.twitter.tweetypie.thriftscala.TweetCreateState
import com.twitter.tweetypie.FutureEffect
import com.twitter.tweetypie.Gate
import com.twitter.tweetypie.TweetId
import com.twitter.tweetypie.UserId
import com.twitter.tweetypie.thriftscala.EditControl
import com.twitter.tweetypie.thriftscala.EditOptions
import com.twitter.visibility.writer.interfaces.tweets.TweetWriteEnforcementLibrary
import com.twitter.visibility.writer.interfaces.tweets.TweetWriteEnforcementRequest
import com.twitter.visibility.writer.models.ActorContext
import com.twitter.visibility.writer.Allow
import com.twitter.visibility.writer.Deny
import com.twitter.visibility.writer.DenyExclusiveTweetReply
import com.twitter.visibility.writer.DenyStaleTweetQuoteTweet
import com.twitter.visibility.writer.DenyStaleTweetReply
import com.twitter.visibility.writer.DenySuperFollowsCreate
import com.twitter.visibility.writer.DenyTrustedFriendsCreate
import com.twitter.visibility.writer.DenyTrustedFriendsQuoteTweet
import com.twitter.visibility.writer.DenyTrustedFriendsReply
object TweetWriteValidator {
case class Request(
conversationId: Option[TweetId],
userId: UserId,
exclusiveTweetControlOptions: Option[ExclusiveTweetControlOptions],
replyToExclusiveTweetControl: Option[ExclusiveTweetControl],
trustedFriendsControlOptions: Option[TrustedFriendsControlOptions],
inReplyToTrustedFriendsControl: Option[TrustedFriendsControl],
quotedTweetOpt: Option[QuotedTweet],
inReplyToTweetId: Option[TweetId],
inReplyToEditControl: Option[EditControl],
editOptions: Option[EditOptions])
type Type = FutureEffect[Request]
def apply(
convoCtlRepo: ConversationControlRepository.Type,
tweetWriteEnforcementLibrary: TweetWriteEnforcementLibrary,
enableExclusiveTweetControlValidation: Gate[Unit],
enableTrustedFriendsControlValidation: Gate[Unit],
enableStaleTweetValidation: Gate[Unit]
): FutureEffect[Request] =
FutureEffect[Request] { request =>
// We are creating up an empty TweetQuery.Options here so we can use the default
// CacheControl value and avoid hard coding it here.
val queryOptions = TweetQuery.Options(TweetQuery.Include())
Stitch.run {
for {
convoCtl <- request.conversationId match {
case Some(convoId) =>
convoCtlRepo(
convoId,
queryOptions.cacheControl
)
case None =>
Stitch.value(None)
}
result <- tweetWriteEnforcementLibrary(
TweetWriteEnforcementRequest(
rootConversationControl = convoCtl,
convoId = request.conversationId,
exclusiveTweetControlOptions = request.exclusiveTweetControlOptions,
replyToExclusiveTweetControl = request.replyToExclusiveTweetControl,
trustedFriendsControlOptions = request.trustedFriendsControlOptions,
inReplyToTrustedFriendsControl = request.inReplyToTrustedFriendsControl,
quotedTweetOpt = request.quotedTweetOpt,
actorContext = ActorContext(request.userId),
inReplyToTweetId = request.inReplyToTweetId,
inReplyToEditControl = request.inReplyToEditControl,
editOptions = request.editOptions
),
enableExclusiveTweetControlValidation = enableExclusiveTweetControlValidation,
enableTrustedFriendsControlValidation = enableTrustedFriendsControlValidation,
enableStaleTweetValidation = enableStaleTweetValidation
)
_ <- result match {
case Allow =>
Stitch.Done
case Deny =>
Stitch.exception(TweetCreateFailure.State(TweetCreateState.ReplyTweetNotAllowed))
case DenyExclusiveTweetReply =>
Stitch.exception(
TweetCreateFailure.State(TweetCreateState.ExclusiveTweetEngagementNotAllowed))
case DenySuperFollowsCreate =>
Stitch.exception(
TweetCreateFailure.State(TweetCreateState.SuperFollowsCreateNotAuthorized))
case DenyTrustedFriendsReply =>
Stitch.exception(
TweetCreateFailure.State(TweetCreateState.TrustedFriendsEngagementNotAllowed))
case DenyTrustedFriendsCreate =>
Stitch.exception(
TweetCreateFailure.State(TweetCreateState.TrustedFriendsCreateNotAllowed))
case DenyTrustedFriendsQuoteTweet =>
Stitch.exception(
TweetCreateFailure.State(TweetCreateState.TrustedFriendsQuoteTweetNotAllowed))
case DenyStaleTweetReply =>
Stitch.exception(
TweetCreateFailure.State(TweetCreateState.StaleTweetEngagementNotAllowed))
case DenyStaleTweetQuoteTweet =>
Stitch.exception(
TweetCreateFailure.State(TweetCreateState.StaleTweetQuoteTweetNotAllowed))
}
} yield ()
}
}
}

View File

@ -1,21 +0,0 @@
package com.twitter.tweetypie.handler
import com.twitter.compliance.userconsent.compliance.birthdate.GlobalBirthdateUtil
import com.twitter.gizmoduck.thriftscala.User
import com.twitter.tweetypie.thriftscala.DeletedTweet
import org.joda.time.DateTime
/*
* As part of GDPR U13 work, we want to block tweets created from when a user
* was < 13 from being restored.
*/
private[handler] object U13ValidationUtil {
def wasTweetCreatedBeforeUserTurned13(user: User, deletedTweet: DeletedTweet): Boolean =
deletedTweet.createdAtSecs match {
case None =>
throw NoCreatedAtTimeException
case Some(createdAtSecs) =>
GlobalBirthdateUtil.isUnderSomeAge(13, new DateTime(createdAtSecs * 1000L), user)
}
}

View File

@ -1,215 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.servo.util.FutureArrow
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.core.FilteredState
import com.twitter.tweetypie.core.TweetHydrationError
import com.twitter.tweetypie.repository.ParentUserIdRepository
import com.twitter.tweetypie.storage.TweetStorageClient.Undelete
import com.twitter.tweetypie.storage.DeleteState
import com.twitter.tweetypie.storage.DeletedTweetResponse
import com.twitter.tweetypie.storage.TweetStorageClient
import com.twitter.tweetypie.store.UndeleteTweet
import com.twitter.tweetypie.thriftscala.UndeleteTweetState.{Success => TweetypieSuccess, _}
import com.twitter.tweetypie.thriftscala._
import com.twitter.tweetypie.thriftscala.entities.EntityExtractor
import scala.util.control.NoStackTrace
trait UndeleteException extends Exception with NoStackTrace
/**
* Exceptions we return to the user, things that we don't expect to ever happen unless there is a
* problem with the underlying data in Manhattan or a bug in [[com.twitter.tweetypie.storage.TweetStorageClient]]
*/
object NoDeletedAtTimeException extends UndeleteException
object NoCreatedAtTimeException extends UndeleteException
object NoStatusWithSuccessException extends UndeleteException
object NoUserIdWithTweetException extends UndeleteException
object NoDeletedTweetException extends UndeleteException
object SoftDeleteUserIdNotFoundException extends UndeleteException
/**
* represents a problem that we choose to return to the user as a response state
* rather than as an exception.
*/
case class ResponseException(state: UndeleteTweetState) extends Exception with NoStackTrace {
def toResponse: UndeleteTweetResponse = UndeleteTweetResponse(state = state)
}
private[this] object SoftDeleteExpiredException extends ResponseException(SoftDeleteExpired)
private[this] object BounceDeleteException extends ResponseException(TweetIsBounceDeleted)
private[this] object SourceTweetNotFoundException extends ResponseException(SourceTweetNotFound)
private[this] object SourceUserNotFoundException extends ResponseException(SourceUserNotFound)
private[this] object TweetExistsException extends ResponseException(TweetAlreadyExists)
private[this] object TweetNotFoundException extends ResponseException(TweetNotFound)
private[this] object U13TweetException extends ResponseException(TweetIsU13Tweet)
private[this] object UserNotFoundException extends ResponseException(UserNotFound)
/**
* Undelete Notes:
*
* If request.force is set to true, then the undelete will take place even if the undeleted tweet
* is already present in Manhattan. This is useful if a tweet was recently restored to the backend,
* but the async actions portion of the undelete failed and you want to retry them.
*
* Before undeleting the tweet we check if it's a retweet, in which case we require that the sourceTweet
* and sourceUser exist.
*
* Tweets can only be undeleted for N days where N is the number of days before tweets marked with
* the soft_delete_state flag are deleted permanently by the cleanup job
*
*/
object UndeleteTweetHandler {
type Type = FutureArrow[UndeleteTweetRequest, UndeleteTweetResponse]
/** Extract an optional value inside a future or throw if it's missing. */
def required[T](option: Future[Option[T]], ex: => Exception): Future[T] =
option.flatMap {
case None => Future.exception(ex)
case Some(i) => Future.value(i)
}
def apply(
undelete: TweetStorageClient.Undelete,
tweetExists: FutureArrow[TweetId, Boolean],
getUser: FutureArrow[UserId, Option[User]],
getDeletedTweets: TweetStorageClient.GetDeletedTweets,
parentUserIdRepo: ParentUserIdRepository.Type,
save: FutureArrow[UndeleteTweet.Event, Tweet]
): Type = {
def getParentUserId(tweet: Tweet): Future[Option[UserId]] =
Stitch.run {
parentUserIdRepo(tweet)
.handle {
case ParentUserIdRepository.ParentTweetNotFound(id) => None
}
}
val entityExtractor = EntityExtractor.mutationAll.endo
val getDeletedTweet: Long => Future[DeletedTweetResponse] =
id => Stitch.run(getDeletedTweets(Seq(id)).map(_.head))
def getRequiredUser(userId: Option[UserId]): Future[User] =
userId match {
case None => Future.exception(SoftDeleteUserIdNotFoundException)
case Some(id) => required(getUser(id), UserNotFoundException)
}
def getValidatedDeletedTweet(
tweetId: TweetId,
allowNotDeleted: Boolean
): Future[DeletedTweet] = {
import DeleteState._
val deletedTweet = getDeletedTweet(tweetId).map { response =>
response.deleteState match {
case SoftDeleted => response.tweet
// BounceDeleted tweets violated Twitter Rules and may not be undeleted
case BounceDeleted => throw BounceDeleteException
case HardDeleted => throw SoftDeleteExpiredException
case NotDeleted => if (allowNotDeleted) response.tweet else throw TweetExistsException
case NotFound => throw TweetNotFoundException
}
}
required(deletedTweet, NoDeletedTweetException)
}
/**
* Fetch the source tweet's user for a deleted share
*/
def getSourceUser(share: Option[DeletedTweetShare]): Future[Option[User]] =
share match {
case None => Future.value(None)
case Some(s) => required(getUser(s.sourceUserId), SourceUserNotFoundException).map(Some(_))
}
/**
* Ensure that the undelete response contains all the required information to continue with
* the tweetypie undelete.
*/
def validateUndeleteResponse(response: Undelete.Response, force: Boolean): Future[Tweet] =
Future {
(response.code, response.tweet) match {
case (Undelete.UndeleteResponseCode.NotCreated, _) => throw TweetNotFoundException
case (Undelete.UndeleteResponseCode.BackupNotFound, _) => throw SoftDeleteExpiredException
case (Undelete.UndeleteResponseCode.Success, None) => throw NoStatusWithSuccessException
case (Undelete.UndeleteResponseCode.Success, Some(tweet)) =>
// archivedAtMillis is required on the response unless force is present
// or the tweet is a retweet. retweets have no favs or retweets to clean up
// of their own so the original deleted at time is not needed
if (response.archivedAtMillis.isEmpty && !force && !isRetweet(tweet))
throw NoDeletedAtTimeException
else
tweet
case (code, _) => throw new Exception(s"Unknown UndeleteResponseCode $code")
}
}
def enforceU13Compliance(user: User, deletedTweet: DeletedTweet): Future[Unit] =
Future.when(U13ValidationUtil.wasTweetCreatedBeforeUserTurned13(user, deletedTweet)) {
throw U13TweetException
}
/**
* Fetch required data and perform before/after validations for undelete.
* If everything looks good with the undelete, kick off the tweetypie undelete
* event.
*/
FutureArrow { request =>
val hydrationOptions = request.hydrationOptions.getOrElse(WritePathHydrationOptions())
val force = request.force.getOrElse(false)
val tweetId = request.tweetId
(for {
// we must be able to query the tweet from the soft delete table
deletedTweet <- getValidatedDeletedTweet(tweetId, allowNotDeleted = force)
// we always require the user
user <- getRequiredUser(deletedTweet.userId)
// Make sure we're not restoring any u13 tweets.
() <- enforceU13Compliance(user, deletedTweet)
// if a retweet, then sourceUser is required; sourceTweet will be hydrated in save()
sourceUser <- getSourceUser(deletedTweet.share)
// validations passed, perform the undelete.
undeleteResponse <- Stitch.run(undelete(tweetId))
// validate the response
tweet <- validateUndeleteResponse(undeleteResponse, force)
// Extract entities from tweet text
tweetWithEntities = entityExtractor(tweet)
// If a retweet, get user id of parent retweet
parentUserId <- getParentUserId(tweet)
// undeletion was successful, hydrate the tweet and
// kick off tweetypie async undelete actions
hydratedTweet <- save(
UndeleteTweet.Event(
tweet = tweetWithEntities,
user = user,
timestamp = Time.now,
hydrateOptions = hydrationOptions,
deletedAt = undeleteResponse.archivedAtMillis.map(Time.fromMilliseconds),
sourceUser = sourceUser,
parentUserId = parentUserId
)
)
} yield {
UndeleteTweetResponse(TweetypieSuccess, Some(hydratedTweet))
}).handle {
case TweetHydrationError(_, Some(FilteredState.Unavailable.SourceTweetNotFound(_))) =>
SourceTweetNotFoundException.toResponse
case ex: ResponseException =>
ex.toResponse
}
}
}
}

View File

@ -1,65 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.Future
import com.twitter.tweetypie.core.FilteredState
import com.twitter.tweetypie.repository.TweetQuery
import com.twitter.tweetypie.repository.TweetRepository
import com.twitter.tweetypie.thriftscala._
import com.twitter.timelineservice.{thriftscala => tls}
import com.twitter.tweetypie.backends.TimelineService.GetPerspectives
object UnretweetHandler {
type Type = UnretweetRequest => Future[UnretweetResult]
def apply(
deleteTweets: TweetDeletePathHandler.DeleteTweets,
getPerspectives: GetPerspectives,
unretweetEdits: TweetDeletePathHandler.UnretweetEdits,
tweetRepo: TweetRepository.Type,
): Type = { request: UnretweetRequest =>
val handleEdits = getSourceTweet(request.sourceTweetId, tweetRepo).liftToTry.flatMap {
case Return(sourceTweet) =>
// If we're able to fetch the source Tweet, unretweet all its other versions
unretweetEdits(sourceTweet.editControl, request.sourceTweetId, request.userId)
case Throw(_) => Future.Done
}
handleEdits.flatMap(_ => unretweetSourceTweet(request, deleteTweets, getPerspectives))
}
def unretweetSourceTweet(
request: UnretweetRequest,
deleteTweets: TweetDeletePathHandler.DeleteTweets,
getPerspectives: GetPerspectives,
): Future[UnretweetResult] =
getPerspectives(
Seq(tls.PerspectiveQuery(request.userId, Seq(request.sourceTweetId)))
).map { results => results.head.perspectives.headOption.flatMap(_.retweetId) }
.flatMap {
case Some(id) =>
deleteTweets(
DeleteTweetsRequest(tweetIds = Seq(id), byUserId = Some(request.userId)),
false
).map(_.head).map { deleteTweetResult =>
UnretweetResult(Some(deleteTweetResult.tweetId), deleteTweetResult.state)
}
case None => Future.value(UnretweetResult(None, TweetDeleteState.Ok))
}
def getSourceTweet(
sourceTweetId: TweetId,
tweetRepo: TweetRepository.Type
): Future[Tweet] = {
val options: TweetQuery.Options = TweetQuery
.Options(include = TweetQuery.Include(tweetFields = Set(Tweet.EditControlField.id)))
Stitch.run {
tweetRepo(sourceTweetId, options).rescue {
case _: FilteredState => Stitch.NotFound
}
}
}
}

View File

@ -1,46 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.tweetypie.store.UpdatePossiblySensitiveTweet
import com.twitter.tweetypie.thriftscala.UpdatePossiblySensitiveTweetRequest
import com.twitter.tweetypie.util.TweetLenses
object UpdatePossiblySensitiveTweetHandler {
type Type = FutureArrow[UpdatePossiblySensitiveTweetRequest, Unit]
def apply(
tweetGetter: FutureArrow[TweetId, Tweet],
userGetter: FutureArrow[UserId, User],
updatePossiblySensitiveTweetStore: FutureEffect[UpdatePossiblySensitiveTweet.Event]
): Type =
FutureArrow { request =>
val nsfwAdminMutation = Mutation[Boolean](_ => request.nsfwAdmin).checkEq
val nsfwUserMutation = Mutation[Boolean](_ => request.nsfwUser).checkEq
val tweetMutation =
TweetLenses.nsfwAdmin
.mutation(nsfwAdminMutation)
.also(TweetLenses.nsfwUser.mutation(nsfwUserMutation))
for {
originalTweet <- tweetGetter(request.tweetId)
_ <- tweetMutation(originalTweet) match {
case None => Future.Unit
case Some(mutatedTweet) =>
userGetter(getUserId(originalTweet))
.map { user =>
UpdatePossiblySensitiveTweet.Event(
tweet = mutatedTweet,
user = user,
timestamp = Time.now,
byUserId = request.byUserId,
nsfwAdminChange = nsfwAdminMutation(TweetLenses.nsfwAdmin.get(originalTweet)),
nsfwUserChange = nsfwUserMutation(TweetLenses.nsfwUser.get(originalTweet)),
note = request.note,
host = request.host
)
}
.flatMap(updatePossiblySensitiveTweetStore)
}
} yield ()
}
}

View File

@ -1,102 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.tco_util.TcoUrl
import com.twitter.tweetypie.core._
import com.twitter.tweetypie.thriftscala.entities.EntityExtractor
import com.twitter.tweetypie.thriftscala._
import com.twitter.tweetypie.tweettext.IndexConverter
import com.twitter.tweetypie.tweettext.Offset
import com.twitter.tweetypie.tweettext.Preprocessor._
object UrlEntityBuilder {
import UpstreamFailure.UrlShorteningFailure
import UrlShortener.Context
/**
* Extracts URLs from the given tweet text, shortens them, and returns an updated tweet
* text that contains the shortened URLs, along with the generated `UrlEntity`s.
*/
type Type = FutureArrow[(String, Context), (String, Seq[UrlEntity])]
def fromShortener(shortener: UrlShortener.Type): Type =
FutureArrow {
case (text, ctx) =>
Future
.collect(EntityExtractor.extractAllUrls(text).map(shortenEntity(shortener, _, ctx)))
.map(_.flatMap(_.toSeq))
.map(updateTextAndUrls(text, _)(replaceInvisiblesWithWhitespace))
}
/**
* Update a url entity with tco-ed url
*
* @param urlEntity an url entity with long url in the `url` field
* @param ctx additional data needed to build the shortener request
* @return an updated url entity with tco-ed url in the `url` field,
* and long url in the `expanded` field
*/
private def shortenEntity(
shortener: UrlShortener.Type,
entity: UrlEntity,
ctx: Context
): Future[Option[UrlEntity]] =
shortener((TcoUrl.normalizeProtocol(entity.url), ctx))
.map { urlData =>
Some(
entity.copy(
url = urlData.shortUrl,
expanded = Some(urlData.longUrl),
display = Some(urlData.displayText)
)
)
}
.rescue {
// fail tweets with invalid urls
case UrlShortener.InvalidUrlError =>
Future.exception(TweetCreateFailure.State(TweetCreateState.InvalidUrl))
// fail tweets with malware urls
case UrlShortener.MalwareUrlError =>
Future.exception(TweetCreateFailure.State(TweetCreateState.MalwareUrl))
// propagate OverCapacity
case e @ OverCapacity(_) => Future.exception(e)
// convert any other failure into UrlShorteningFailure
case e => Future.exception(UrlShorteningFailure(e))
}
/**
* Applies a text-modification function to all parts of the text not found within a UrlEntity,
* and then updates all the UrlEntity indices as necessary.
*/
def updateTextAndUrls(
text: String,
urlEntities: Seq[UrlEntity]
)(
textMod: String => String
): (String, Seq[UrlEntity]) = {
var offsetInText = Offset.CodePoint(0)
var offsetInNewText = Offset.CodePoint(0)
val newText = new StringBuilder
val newUrlEntities = Seq.newBuilder[UrlEntity]
val indexConverter = new IndexConverter(text)
urlEntities.foreach { e =>
val nonUrl = textMod(indexConverter.substringByCodePoints(offsetInText.toInt, e.fromIndex))
newText.append(nonUrl)
newText.append(e.url)
offsetInText = Offset.CodePoint(e.toIndex.toInt)
val urlFrom = offsetInNewText + Offset.CodePoint.length(nonUrl)
val urlTo = urlFrom + Offset.CodePoint.length(e.url)
val newEntity =
e.copy(fromIndex = urlFrom.toShort, toIndex = urlTo.toShort)
newUrlEntities += newEntity
offsetInNewText = urlTo
}
newText.append(textMod(indexConverter.substringByCodePoints(offsetInText.toInt)))
(newText.toString, newUrlEntities.result())
}
}

View File

@ -1,106 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.service.talon.thriftscala._
import com.twitter.servo.util.FutureArrow
import com.twitter.tco_util.DisplayUrl
import com.twitter.tco_util.TcoUrl
import com.twitter.tweetypie.backends.Talon
import com.twitter.tweetypie.core.OverCapacity
import com.twitter.tweetypie.store.Guano
import com.twitter.tweetypie.thriftscala.ShortenedUrl
import scala.util.control.NoStackTrace
object UrlShortener {
type Type = FutureArrow[(String, Context), ShortenedUrl]
case class Context(
tweetId: TweetId,
userId: UserId,
createdAt: Time,
userProtected: Boolean,
clientAppId: Option[Long] = None,
remoteHost: Option[String] = None,
dark: Boolean = false)
object MalwareUrlError extends Exception with NoStackTrace
object InvalidUrlError extends Exception with NoStackTrace
/**
* Returns a new UrlShortener that checks the response from the underlying shortner
* and, if the request is not dark but fails with a MalwareUrlError, scribes request
* info to guano.
*/
def scribeMalware(guano: Guano)(underlying: Type): Type =
FutureArrow {
case (longUrl, ctx) =>
underlying((longUrl, ctx)).onFailure {
case MalwareUrlError if !ctx.dark =>
guano.scribeMalwareAttempt(
Guano.MalwareAttempt(
longUrl,
ctx.userId,
ctx.clientAppId,
ctx.remoteHost
)
)
case _ =>
}
}
def fromTalon(talonShorten: Talon.Shorten): Type = {
val log = Logger(getClass)
FutureArrow {
case (longUrl, ctx) =>
val request =
ShortenRequest(
userId = ctx.userId,
longUrl = longUrl,
auditMsg = "tweetypie",
directMessage = Some(false),
protectedAccount = Some(ctx.userProtected),
maxShortUrlLength = None,
tweetData = Some(TweetData(ctx.tweetId, ctx.createdAt.inMilliseconds)),
trafficType =
if (ctx.dark) ShortenTrafficType.Testing
else ShortenTrafficType.Production
)
talonShorten(request).flatMap { res =>
res.responseCode match {
case ResponseCode.Ok =>
if (res.malwareStatus == MalwareStatus.UrlBlocked) {
Future.exception(MalwareUrlError)
} else {
val shortUrl =
res.fullShortUrl.getOrElse {
// fall back to fromSlug if talon response does not have the full short url
// Could be replaced with an exception once the initial integration on production
// is done
TcoUrl.fromSlug(res.shortUrl, TcoUrl.isHttps(res.longUrl))
}
Future.value(
ShortenedUrl(
shortUrl = shortUrl,
longUrl = res.longUrl,
displayText = DisplayUrl(shortUrl, Some(res.longUrl), true)
)
)
}
case ResponseCode.BadInput =>
log.warn(s"Talon rejected URL that Extractor thought was fine: $longUrl")
Future.exception(InvalidUrlError)
// we shouldn't see other ResponseCodes, because Talon.Shorten translates them to
// exceptions, but we have this catch-all just in case.
case resCode =>
log.warn(s"Unexpected response code $resCode for '$longUrl'")
Future.exception(OverCapacity("talon"))
}
}
}
}
}

View File

@ -1,79 +0,0 @@
package com.twitter.tweetypie
package handler
import com.twitter.servo.util.FutureArrow
import com.twitter.tweetypie.store.Takedown
import com.twitter.tweetypie.thriftscala.DataError
import com.twitter.tweetypie.thriftscala.DataErrorCause
import com.twitter.tweetypie.thriftscala.SetTweetUserTakedownRequest
trait UserTakedownHandler {
val setTweetUserTakedownRequest: FutureArrow[SetTweetUserTakedownRequest, Unit]
}
/**
* This handler processes SetTweetUserTakedownRequest objects sent to Tweetypie's
* setTweetUserTakedown endpoint. These requests originate from tweetypie daemon and the
* request object specifies the user ID of the user who is being modified, and a boolean value
* to indicate whether takedown is being added or removed.
*
* If takedown is being added, the hasTakedown bit is set on all of the user's tweets.
* If takedown is being removed, we can't automatically unset the hasTakedown bit on all tweets
* since some of the tweets might have tweet-specific takedowns, in which case the hasTakedown bit
* needs to remain set. Instead, we flush the user's tweets from cache, and let the repairer
* unset the bit when hydrating tweets where the bit is set but no user or tweet
* takedown country codes are present.
*/
object UserTakedownHandler {
type Type = FutureArrow[SetTweetUserTakedownRequest, Unit]
def takedownEvent(userHasTakedown: Boolean): Tweet => Option[Takedown.Event] =
tweet => {
val tweetHasTakedown =
TweetLenses.tweetypieOnlyTakedownCountryCodes(tweet).exists(_.nonEmpty) ||
TweetLenses.tweetypieOnlyTakedownReasons(tweet).exists(_.nonEmpty)
val updatedHasTakedown = userHasTakedown || tweetHasTakedown
if (updatedHasTakedown == TweetLenses.hasTakedown(tweet))
None
else
Some(
Takedown.Event(
tweet = TweetLenses.hasTakedown.set(tweet, updatedHasTakedown),
timestamp = Time.now,
eventbusEnqueue = false,
scribeForAudit = false,
updateCodesAndReasons = false
)
)
}
def setHasTakedown(
tweetTakedown: FutureEffect[Takedown.Event],
userHasTakedown: Boolean
): FutureEffect[Seq[Tweet]] =
tweetTakedown.contramapOption(takedownEvent(userHasTakedown)).liftSeq
def verifyTweetUserId(expectedUserId: Option[UserId], tweet: Tweet): Unit = {
val tweetUserId: UserId = getUserId(tweet)
val tweetId: Long = tweet.id
expectedUserId.filter(_ != tweetUserId).foreach { u =>
throw DataError(
message =
s"SetTweetUserTakedownRequest userId $u does not match userId $tweetUserId for Tweet: $tweetId",
errorCause = Some(DataErrorCause.UserTweetRelationship),
)
}
}
def apply(
getTweet: FutureArrow[TweetId, Option[Tweet]],
tweetTakedown: FutureEffect[Takedown.Event],
): Type =
FutureArrow { request =>
for {
tweet <- getTweet(request.tweetId)
_ = tweet.foreach(t => verifyTweetUserId(request.userId, t))
_ <- setHasTakedown(tweetTakedown, request.hasTakedown)(tweet.toSeq)
} yield ()
}
}

View File

@ -1,153 +0,0 @@
package com.twitter.tweetypie.handler
import com.twitter.gizmoduck.thriftscala.User
import com.twitter.spam.rtf.thriftscala.SafetyLevel
import com.twitter.tweetypie.repository.CacheControl
import com.twitter.tweetypie.repository.TweetQuery
import com.twitter.tweetypie.thriftscala.MediaEntity
import com.twitter.tweetypie.thriftscala.StatusCounts
import com.twitter.tweetypie.thriftscala.Tweet
import com.twitter.tweetypie.thriftscala.WritePathHydrationOptions
object WritePathQueryOptions {
/**
* Base TweetQuery.Include for all hydration options.
*/
val BaseInclude: TweetQuery.Include =
GetTweetsHandler.BaseInclude.also(
tweetFields = Set(
Tweet.CardReferenceField.id,
Tweet.MediaTagsField.id,
Tweet.SelfPermalinkField.id,
Tweet.ExtendedTweetMetadataField.id,
Tweet.VisibleTextRangeField.id,
Tweet.NsfaHighRecallLabelField.id,
Tweet.CommunitiesField.id,
Tweet.ExclusiveTweetControlField.id,
Tweet.TrustedFriendsControlField.id,
Tweet.CollabControlField.id,
Tweet.EditControlField.id,
Tweet.EditPerspectiveField.id,
Tweet.NoteTweetField.id
)
)
/**
* Base TweetQuery.Include for all creation-related hydrations.
*/
val BaseCreateInclude: TweetQuery.Include =
BaseInclude
.also(
tweetFields = Set(
Tweet.PlaceField.id,
Tweet.ProfileGeoEnrichmentField.id,
Tweet.SelfThreadMetadataField.id
),
mediaFields = Set(MediaEntity.AdditionalMetadataField.id),
quotedTweet = Some(true),
pastedMedia = Some(true)
)
/**
* Base TweetQuery.Include for all deletion-related hydrations.
*/
val BaseDeleteInclude: TweetQuery.Include = BaseInclude
.also(tweetFields =
Set(Tweet.BounceLabelField.id, Tweet.ConversationControlField.id, Tweet.EditControlField.id))
val AllCounts: Set[Short] = StatusCounts.fieldInfos.map(_.tfield.id).toSet
def insert(
cause: TweetQuery.Cause,
user: User,
options: WritePathHydrationOptions,
isEditControlEdit: Boolean
): TweetQuery.Options =
createOptions(
writePathHydrationOptions = options,
includePerspective = false,
// include counts if tweet edit, otherwise false
includeCounts = isEditControlEdit,
cause = cause,
forUser = user,
// Do not perform any filtering when we are hydrating the tweet we are creating
safetyLevel = SafetyLevel.FilterNone
)
def retweetSourceTweet(user: User, options: WritePathHydrationOptions): TweetQuery.Options =
createOptions(
writePathHydrationOptions = options,
includePerspective = true,
includeCounts = true,
cause = TweetQuery.Cause.Read,
forUser = user,
// If Scarecrow is down, we may proceed with creating a RT. The safetyLevel is necessary
// to prevent so that the inner tweet's count is not sent in the TweetCreateEvent we send
// to EventBus. If this were emitted, live pipeline would publish counts to the clients.
safetyLevel = SafetyLevel.TweetWritesApi
)
def quotedTweet(user: User, options: WritePathHydrationOptions): TweetQuery.Options =
createOptions(
writePathHydrationOptions = options,
includePerspective = true,
includeCounts = true,
cause = TweetQuery.Cause.Read,
forUser = user,
// We pass in the safetyLevel so that the inner tweet's are excluded
// from the TweetCreateEvent we send to EventBus. If this were emitted,
// live pipeline would publish counts to the clients.
safetyLevel = SafetyLevel.TweetWritesApi
)
private def condSet[A](cond: Boolean, item: A): Set[A] =
if (cond) Set(item) else Set.empty
private def createOptions(
writePathHydrationOptions: WritePathHydrationOptions,
includePerspective: Boolean,
includeCounts: Boolean,
cause: TweetQuery.Cause,
forUser: User,
safetyLevel: SafetyLevel,
): TweetQuery.Options = {
val cardsEnabled: Boolean = writePathHydrationOptions.includeCards
val cardsPlatformKeySpecified: Boolean = writePathHydrationOptions.cardsPlatformKey.nonEmpty
val cardsV1Enabled: Boolean = cardsEnabled && !cardsPlatformKeySpecified
val cardsV2Enabled: Boolean = cardsEnabled && cardsPlatformKeySpecified
TweetQuery.Options(
include = BaseCreateInclude.also(
tweetFields =
condSet(includePerspective, Tweet.PerspectiveField.id) ++
condSet(cardsV1Enabled, Tweet.CardsField.id) ++
condSet(cardsV2Enabled, Tweet.Card2Field.id) ++
condSet(includeCounts, Tweet.CountsField.id) ++
// for PreviousCountsField, copy includeCounts state on the write path
condSet(includeCounts, Tweet.PreviousCountsField.id) ++
// hydrate ConversationControl on Reply Tweet creations so clients can consume
Set(Tweet.ConversationControlField.id),
countsFields = if (includeCounts) AllCounts else Set.empty
),
cause = cause,
forUserId = Some(forUser.id),
cardsPlatformKey = writePathHydrationOptions.cardsPlatformKey,
languageTag = forUser.account.map(_.language).getOrElse("en"),
extensionsArgs = writePathHydrationOptions.extensionsArgs,
safetyLevel = safetyLevel,
simpleQuotedTweet = writePathHydrationOptions.simpleQuotedTweet
)
}
def deleteTweets: TweetQuery.Options =
TweetQuery.Options(
include = BaseDeleteInclude,
cacheControl = CacheControl.ReadOnlyCache,
extensionsArgs = None,
requireSourceTweet = false // retweet should be deletable even if source tweet missing
)
def deleteTweetsWithoutEditControl: TweetQuery.Options =
deleteTweets.copy(enableEditControlHydration = false)
}

View File

@ -1,42 +0,0 @@
package com.twitter.tweetypie
import com.twitter.context.thriftscala.Viewer
import com.twitter.tweetypie.thriftscala._
import scala.util.matching.Regex
import com.twitter.context.TwitterContext
import com.twitter.finagle.stats.Stat
import com.twitter.snowflake.id.SnowflakeId
package object handler {
type PlaceLanguage = String
type TweetIdGenerator = () => Future[TweetId]
type NarrowcastValidator = FutureArrow[Narrowcast, Narrowcast]
type ReverseGeocoder = FutureArrow[(GeoCoordinates, PlaceLanguage), Option[Place]]
type CardUri = String
// A narrowcast location can be a PlaceId or a US metro code.
type NarrowcastLocation = String
val PlaceIdRegex: Regex = """(?i)\A[0-9a-fA-F]{16}\Z""".r
// Bring Tweetypie permitted TwitterContext into scope
val TwitterContext: TwitterContext =
com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit)
def getContributor(userId: UserId): Option[Contributor] = {
val viewer = TwitterContext().getOrElse(Viewer())
viewer.authenticatedUserId.filterNot(_ == userId).map(id => Contributor(id))
}
def trackLossyReadsAfterWrite(stat: Stat, windowLength: Duration)(tweetId: TweetId): Unit = {
// If the requested Tweet is NotFound, and the tweet age is less than the defined {{windowLength}} duration,
// then we capture the percentiles of when this request was attempted.
// This is being tracked to understand how lossy the reads are directly after tweet creation.
for {
timestamp <- SnowflakeId.timeFromIdOpt(tweetId)
age = Time.now.since(timestamp)
if age.inMillis <= windowLength.inMillis
} yield stat.add(age.inMillis)
}
}

View File

@ -1,58 +0,0 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"core-app-services/lib:coreservices",
"featureswitches/featureswitches-core:v2",
"featureswitches/featureswitches-core/src/main/scala",
"finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication",
"mediaservices/commons/src/main/thrift:thrift-scala",
"mediaservices/media-util",
"scrooge/scrooge-core",
"tweetypie/servo/repo",
"tweetypie/servo/repo/src/main/thrift:thrift-scala",
"tweetypie/servo/util",
"snowflake/src/main/scala/com/twitter/snowflake/id",
"src/scala/com/twitter/takedown/util",
"src/thrift/com/twitter/context:twitter-context-scala",
"src/thrift/com/twitter/dataproducts:enrichments_profilegeo-scala",
"src/thrift/com/twitter/escherbird:media-annotation-structs-scala",
"src/thrift/com/twitter/escherbird:tweet-annotation-scala",
"src/thrift/com/twitter/escherbird/common:common-scala",
"src/thrift/com/twitter/expandodo:cards-scala",
"src/thrift/com/twitter/expandodo:only-scala",
"src/thrift/com/twitter/gizmoduck:thrift-scala",
"src/thrift/com/twitter/gizmoduck:user-thrift-scala",
"src/thrift/com/twitter/spam/rtf:safety-label-scala",
"src/thrift/com/twitter/spam/rtf:safety-level-scala",
"src/thrift/com/twitter/timelineservice/server/internal:thrift-scala",
"tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala",
"tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala",
"tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala",
"stitch/stitch-core",
"stitch/stitch-timelineservice/src/main/scala",
"strato/src/main/scala/com/twitter/strato/access",
"strato/src/main/scala/com/twitter/strato/callcontext",
"tco-util",
"tweet-util",
"tweetypie/server/src/main/scala/com/twitter/tweetypie",
"tweetypie/server/src/main/scala/com/twitter/tweetypie/core",
"tweetypie/server/src/main/scala/com/twitter/tweetypie/media",
"tweetypie/server/src/main/scala/com/twitter/tweetypie/repository",
"tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil",
"tweetypie/server/src/main/thrift:compiled-scala",
"tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields",
"tweetypie/common/src/scala/com/twitter/tweetypie/client_id",
"tweetypie/common/src/scala/com/twitter/tweetypie/media",
"tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities",
"tweetypie/common/src/scala/com/twitter/tweetypie/tweettext",
"tweetypie/common/src/scala/com/twitter/tweetypie/util",
"twitter-context",
"util/util-slf4j-api/src/main/scala/com/twitter/util/logging",
"util/util-stats/src/main/scala",
"visibility/common/src/main/thrift/com/twitter/visibility:action-scala",
"visibility/results/src/main/scala/com/twitter/visibility/results/counts",
],
)

View File

@ -1,76 +0,0 @@
package com.twitter.tweetypie
package hydrator
import com.twitter.expandodo.thriftscala.Card2
import com.twitter.expandodo.thriftscala.Card2RequestOptions
import com.twitter.featureswitches.v2.FeatureSwitchResults
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.core.CardReferenceUriExtractor
import com.twitter.tweetypie.core.NonTombstone
import com.twitter.tweetypie.core.ValueState
import com.twitter.tweetypie.repository._
import com.twitter.tweetypie.thriftscala._
object Card2Hydrator {
type Type = ValueHydrator[Option[Card2], Ctx]
case class Ctx(
urlEntities: Seq[UrlEntity],
mediaEntities: Seq[MediaEntity],
cardReference: Option[CardReference],
underlyingTweetCtx: TweetCtx,
featureSwitchResults: Option[FeatureSwitchResults])
extends TweetCtx.Proxy
val hydratedField: FieldByPath = fieldByPath(Tweet.Card2Field)
val hydrationUrlBlockListKey = "card_hydration_blocklist"
def apply(repo: Card2Repository.Type): ValueHydrator[Option[Card2], Ctx] =
ValueHydrator[Option[Card2], Ctx] { (_, ctx) =>
val repoCtx = requestOptions(ctx)
val filterURLs = ctx.featureSwitchResults
.flatMap(_.getStringArray(hydrationUrlBlockListKey, false))
.getOrElse(Seq())
val requests =
ctx.cardReference match {
case Some(CardReferenceUriExtractor(cardUri)) =>
cardUri match {
case NonTombstone(uri) if !filterURLs.contains(uri) =>
Seq((UrlCard2Key(uri), repoCtx))
case _ => Nil
}
case _ =>
ctx.urlEntities
.filterNot(e => e.expanded.exists(filterURLs.contains))
.map(e => (UrlCard2Key(e.url), repoCtx))
}
Stitch
.traverse(requests) {
case (key, opts) => repo(key, opts).liftNotFoundToOption
}.liftToTry.map {
case Return(results) =>
results.flatten.lastOption match {
case None => ValueState.UnmodifiedNone
case res => ValueState.modified(res)
}
case Throw(_) => ValueState.partial(None, hydratedField)
}
}.onlyIf { (curr, ctx) =>
curr.isEmpty &&
ctx.tweetFieldRequested(Tweet.Card2Field) &&
ctx.opts.cardsPlatformKey.nonEmpty &&
!ctx.isRetweet &&
ctx.mediaEntities.isEmpty &&
(ctx.cardReference.nonEmpty || ctx.urlEntities.nonEmpty)
}
private[this] def requestOptions(ctx: Ctx) =
Card2RequestOptions(
platformKey = ctx.opts.cardsPlatformKey.get,
perspectiveUserId = ctx.opts.forUserId,
allowNonTcoUrls = ctx.cardReference.nonEmpty,
languageTag = Some(ctx.opts.languageTag)
)
}

View File

@ -1,47 +0,0 @@
package com.twitter.tweetypie
package hydrator
import com.twitter.expandodo.thriftscala.Card
import com.twitter.stitch.NotFound
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.core._
import com.twitter.tweetypie.repository._
import com.twitter.tweetypie.thriftscala._
object CardHydrator {
type Type = ValueHydrator[Option[Seq[Card]], Ctx]
case class Ctx(
urlEntities: Seq[UrlEntity],
mediaEntities: Seq[MediaEntity],
underlyingTweetCtx: TweetCtx)
extends TweetCtx.Proxy
val hydratedField: FieldByPath = fieldByPath(Tweet.CardsField)
private[this] val partialResult = ValueState.partial(None, hydratedField)
def apply(repo: CardRepository.Type): Type = {
def getCards(url: String): Stitch[Seq[Card]] =
repo(url).handle { case NotFound => Nil }
ValueHydrator[Option[Seq[Card]], Ctx] { (_, ctx) =>
val urls = ctx.urlEntities.map(_.url)
Stitch.traverse(urls)(getCards _).liftToTry.map {
case Return(cards) =>
// even though we are hydrating a type of Option[Seq[Card]], we only
// ever return at most one card, and always the last one.
val res = cards.flatten.lastOption.toSeq
if (res.isEmpty) ValueState.UnmodifiedNone
else ValueState.modified(Some(res))
case _ => partialResult
}
}.onlyIf { (curr, ctx) =>
curr.isEmpty &&
ctx.tweetFieldRequested(Tweet.CardsField) &&
!ctx.isRetweet &&
ctx.mediaEntities.isEmpty
}
}
}

View File

@ -1,36 +0,0 @@
package com.twitter.tweetypie
package hydrator
import com.twitter.stitch.NotFound
import com.twitter.tweetypie.core._
import com.twitter.tweetypie.repository._
import com.twitter.tweetypie.thriftscala._
object ContributorHydrator {
type Type = ValueHydrator[Option[Contributor], TweetCtx]
val hydratedField: FieldByPath = fieldByPath(Tweet.ContributorField, Contributor.ScreenNameField)
def once(h: Type): Type =
TweetHydration.completeOnlyOnce(
hydrationType = HydrationType.Contributor,
hydrator = h
)
def apply(repo: UserIdentityRepository.Type): Type =
ValueHydrator[Contributor, TweetCtx] { (curr, _) =>
repo(UserKey(curr.userId)).liftToTry.map {
case Return(userIdent) => ValueState.delta(curr, update(curr, userIdent))
case Throw(NotFound) => ValueState.unmodified(curr)
case Throw(_) => ValueState.partial(curr, hydratedField)
}
}.onlyIf((curr, _) => curr.screenName.isEmpty).liftOption
/**
* Updates a Contributor using the given user data.
*/
private def update(curr: Contributor, userIdent: UserIdentity): Contributor =
curr.copy(
screenName = Some(userIdent.screenName)
)
}

View File

@ -1,42 +0,0 @@
package com.twitter.tweetypie
package hydrator
import com.twitter.tweetypie.core._
import com.twitter.tweetypie.thriftscala._
/**
* Remove contributor data from tweet if it should not be available to the
* caller. The contributor field is populated in the cached
* [[ContributorHydrator]].
*
* Contributor data is always available on the write path. It is available on
* the read path for the tweet author (or user authenticated as the tweet
* author in the case of contributors/teams), or if the caller has disabled
* visibility filtering.
*
* The condition for running this filtering hydrator (onlyIf) has been a
* source of confusion. Keep in mind that the condition expresses when to
* *remove* data, not when to return it.
*
* In short, keep data when:
* !reading || requested by author || !(enforce visibility filtering)
*
* Remove data when none of these conditions apply:
* reading && !(requested by author) && enforce visibility filtering
*
*/
object ContributorVisibilityFilter {
type Type = ValueHydrator[Option[Contributor], TweetCtx]
def apply(): Type =
ValueHydrator
.map[Option[Contributor], TweetCtx] {
case (Some(_), _) => ValueState.modified(None)
case (None, _) => ValueState.unmodified(None)
}
.onlyIf { (_, ctx) =>
ctx.opts.cause.reading(ctx.tweetId) &&
!ctx.opts.forUserId.contains(ctx.userId) &&
ctx.opts.enforceVisibilityFiltering
}
}

View File

@ -1,108 +0,0 @@
package com.twitter.tweetypie
package hydrator
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.core.ValueState
import com.twitter.tweetypie.repository.ConversationControlRepository
import com.twitter.tweetypie.serverutil.ExceptionCounter
import com.twitter.tweetypie.thriftscala.ConversationControl
private object ReplyTweetConversationControlHydrator {
type Type = ConversationControlHydrator.Type
type Ctx = ConversationControlHydrator.Ctx
// The conversation control thrift field was added Feb 17th, 2020.
// No conversation before this will have a conversation control field to hydrate.
// We explicitly short circuit to save resources from querying for tweets we
// know do not have conversation control fields set.
val FirstValidDate: Time = Time.fromMilliseconds(1554076800000L) // 2020-02-17
def apply(
repo: ConversationControlRepository.Type,
stats: StatsReceiver
): Type = {
val exceptionCounter = ExceptionCounter(stats)
ValueHydrator[Option[ConversationControl], Ctx] { (curr, ctx) =>
repo(ctx.conversationId.get, ctx.opts.cacheControl).liftToTry.map {
case Return(conversationControl) =>
ValueState.delta(curr, conversationControl)
case Throw(exception) => {
// In the case where we get an exception, we want to count the
// exception but fail open.
exceptionCounter(exception)
// Reply Tweet Tweet.ConversationControlField hydration should fail open.
// Ideally we would return ValueState.partial here to notify Tweetypie the caller
// that requested the Tweet.ConversationControlField field was not hydrated.
// We cannot do so because GetTweetFields will return TweetFieldsResultFailed
// for partial results which would fail closed.
ValueState.unmodified(curr)
}
}
}.onlyIf { (_, ctx) =>
// This hydrator is specifically for replies so only run when Tweet is a reply
ctx.inReplyToTweetId.isDefined &&
// See comment for FirstValidDate
ctx.createdAt > FirstValidDate &&
// We need conversation id to get ConversationControl
ctx.conversationId.isDefined &&
// Only run if the ConversationControl was requested
ctx.tweetFieldRequested(Tweet.ConversationControlField)
}
}
}
/**
* ConversationControlHydrator is used to hydrate the conversationControl field.
* For root Tweets, this hydrator just passes through the existing conversationControl.
* For reply Tweets, it loads the conversationControl from the root Tweet of the conversation.
* Only root Tweets in a conversation (i.e. the Tweet pointed to by conversationId) have
* a persisted conversationControl, so we have to hydrate that field for all replies in order
* to know if a Tweet in a conversation can be replied to.
*/
object ConversationControlHydrator {
type Type = ValueHydrator[Option[ConversationControl], Ctx]
case class Ctx(conversationId: Option[ConversationId], underlyingTweetCtx: TweetCtx)
extends TweetCtx.Proxy
private def scrubInviteViaMention(
ccOpt: Option[ConversationControl]
): Option[ConversationControl] = {
ccOpt collect {
case ConversationControl.ByInvitation(byInvitation) =>
ConversationControl.ByInvitation(byInvitation.copy(inviteViaMention = None))
case ConversationControl.Community(community) =>
ConversationControl.Community(community.copy(inviteViaMention = None))
case ConversationControl.Followers(followers) =>
ConversationControl.Followers(followers.copy(inviteViaMention = None))
}
}
def apply(
repo: ConversationControlRepository.Type,
disableInviteViaMention: Gate[Unit],
stats: StatsReceiver
): Type = {
val replyTweetConversationControlHydrator = ReplyTweetConversationControlHydrator(
repo,
stats
)
ValueHydrator[Option[ConversationControl], Ctx] { (curr, ctx) =>
val ccUpdated = if (disableInviteViaMention()) {
scrubInviteViaMention(curr)
} else {
curr
}
if (ctx.inReplyToTweetId.isEmpty) {
// For non-reply tweets, pass through the existing conversation control
Stitch.value(ValueState.delta(curr, ccUpdated))
} else {
replyTweetConversationControlHydrator(ccUpdated, ctx)
}
}
}
}

View File

@ -1,33 +0,0 @@
package com.twitter.tweetypie
package hydrator
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.core._
import com.twitter.tweetypie.repository._
import com.twitter.tweetypie.thriftscala._
/**
* Hydrates the conversationId field for any tweet that is a reply to another tweet.
* It uses that other tweet's conversationId.
*/
object ConversationIdHydrator {
type Type = ValueHydrator[Option[ConversationId], TweetCtx]
val hydratedField: FieldByPath =
fieldByPath(Tweet.CoreDataField, TweetCoreData.ConversationIdField)
def apply(repo: ConversationIdRepository.Type): Type =
ValueHydrator[Option[ConversationId], TweetCtx] { (_, ctx) =>
ctx.inReplyToTweetId match {
case None =>
// Not a reply to another tweet, use tweet id as conversation root
Stitch.value(ValueState.modified(Some(ctx.tweetId)))
case Some(parentId) =>
// Lookup conversation id from in-reply-to tweet
repo(ConversationIdKey(ctx.tweetId, parentId)).liftToTry.map {
case Return(rootId) => ValueState.modified(Some(rootId))
case Throw(_) => ValueState.partial(None, hydratedField)
}
}
}.onlyIf((curr, _) => curr.isEmpty)
}

View File

@ -1,54 +0,0 @@
package com.twitter.tweetypie
package hydrator
import com.twitter.tweetypie.core._
import com.twitter.tweetypie.repository._
import com.twitter.tweetypie.thriftscala.FieldByPath
/**
* Hydrates the `conversationMuted` field of Tweet. `conversationMuted`
* will be true if the conversation that this tweet is part of has been
* muted by the user. This field is perspectival, so the result of this
* hydrator should never be cached.
*/
object ConversationMutedHydrator {
type Type = ValueHydrator[Option[Boolean], Ctx]
case class Ctx(conversationId: Option[TweetId], underlyingTweetCtx: TweetCtx)
extends TweetCtx.Proxy
val hydratedField: FieldByPath = fieldByPath(Tweet.ConversationMutedField)
private[this] val partialResult = ValueState.partial(None, hydratedField)
private[this] val modifiedTrue = ValueState.modified(Some(true))
private[this] val modifiedFalse = ValueState.modified(Some(false))
def apply(repo: ConversationMutedRepository.Type): Type = {
ValueHydrator[Option[Boolean], Ctx] { (_, ctx) =>
(ctx.opts.forUserId, ctx.conversationId) match {
case (Some(userId), Some(convoId)) =>
repo(userId, convoId).liftToTry
.map {
case Return(true) => modifiedTrue
case Return(false) => modifiedFalse
case Throw(_) => partialResult
}
case _ =>
ValueState.StitchUnmodifiedNone
}
}.onlyIf { (curr, ctx) =>
// It is unlikely that this field will already be set, but if, for
// some reason, this hydrator is run on a tweet that already has
// this value set, we will skip the work to check again.
curr.isEmpty &&
// We only hydrate this field if it is explicitly requested. At
// the time of this writing, this field is only used for
// displaying UI for toggling the muted state of the relevant
// conversation.
ctx.tweetFieldRequested(Tweet.ConversationMutedField) &&
// Retweets are not part of a conversation, so should not be muted.
!ctx.isRetweet
}
}
}

View File

@ -1,229 +0,0 @@
package com.twitter.tweetypie
package hydrator
import com.twitter.tweetypie.core._
import com.twitter.tweetypie.repository.TweetQuery
import com.twitter.tweetypie.tweettext.TweetText
import com.twitter.tweetypie.thriftscala._
object CopyFromSourceTweet {
/**
* A `ValueHydrator` that copies and/or merges certain fields from a retweet's source
* tweet into the retweet.
*/
def hydrator: ValueHydrator[TweetData, TweetQuery.Options] =
ValueHydrator.map { (td, _) =>
td.sourceTweetResult.map(_.value.tweet) match {
case None => ValueState.unmodified(td)
case Some(src) => ValueState.modified(td.copy(tweet = copy(src, td.tweet)))
}
}
/**
* Updates `dst` with fields from `src`. This is more complicated than you would think, because:
*
* - the tweet has an extra mention entity due to the "RT @user" prefix;
* - the retweet text may be truncated at the end, and doesn't necessarily contain all of the
* the text from the source tweet. truncation may happen in the middle of entity.
* - the text in the retweet may have a different unicode normalization, which affects
* code point indices. this means entities aren't shifted by a fixed amount equal to
* the RT prefix.
* - url entities, when hydrated, may be converted to media entities; url entities may not
* be hydrated in the retweet, so the source tweet may have a media entity that corresponds
* to an unhydrated url entity in the retweet.
* - there may be multiple media entities that map to a single url entity, because the tweet
* may have multiple photos.
*/
def copy(src: Tweet, dst: Tweet): Tweet = {
val srcCoreData = src.coreData.get
val dstCoreData = dst.coreData.get
// get the code point index of the end of the text
val max = getText(dst).codePointCount(0, getText(dst).length).toShort
// get all entities from the source tweet, merged into a single list sorted by fromIndex.
val srcEntities = getWrappedEntities(src)
// same for the retweet, but drop first @mention, add back later
val dstEntities = getWrappedEntities(dst).drop(1)
// merge indices from dst into srcEntities. at the end, resort entities back
// to their original ordering. for media entities, order matters to clients.
val mergedEntities = merge(srcEntities, dstEntities, max).sortBy(_.position)
// extract entities back out by type
val mentions = mergedEntities.collect { case WrappedMentionEntity(e, _) => e }
val hashtags = mergedEntities.collect { case WrappedHashtagEntity(e, _) => e }
val cashtags = mergedEntities.collect { case WrappedCashtagEntity(e, _) => e }
val urls = mergedEntities.collect { case WrappedUrlEntity(e, _) => e }
val media = mergedEntities.collect { case WrappedMediaEntity(e, _) => e }
// merge the updated entities back into the retweet, adding the RT @mention back in
dst.copy(
coreData = Some(
dstCoreData.copy(
hasMedia = srcCoreData.hasMedia,
hasTakedown = dstCoreData.hasTakedown || srcCoreData.hasTakedown
)
),
mentions = Some(getMentions(dst).take(1) ++ mentions),
hashtags = Some(hashtags),
cashtags = Some(cashtags),
urls = Some(urls),
media = Some(media.map(updateSourceStatusId(src.id, getUserId(src)))),
quotedTweet = src.quotedTweet,
card2 = src.card2,
cards = src.cards,
language = src.language,
mediaTags = src.mediaTags,
spamLabel = src.spamLabel,
takedownCountryCodes =
mergeTakedowns(Seq(src, dst).map(TweetLenses.takedownCountryCodes.get): _*),
conversationControl = src.conversationControl,
exclusiveTweetControl = src.exclusiveTweetControl
)
}
/**
* Merges one or more optional lists of takedowns. If no lists are defined, returns None.
*/
private def mergeTakedowns(takedowns: Option[Seq[CountryCode]]*): Option[Seq[CountryCode]] =
if (takedowns.exists(_.isDefined)) {
Some(takedowns.flatten.flatten.distinct.sorted)
} else {
None
}
/**
* A retweet should never have media without a source_status_id or source_user_id
*/
private def updateSourceStatusId(
srcTweetId: TweetId,
srcUserId: UserId
): MediaEntity => MediaEntity =
mediaEntity =>
if (mediaEntity.sourceStatusId.nonEmpty) {
// when sourceStatusId is set this indicates the media is "pasted media" so the values
// should already be correct (retweeting won't change sourceStatusId / sourceUserId)
mediaEntity
} else {
mediaEntity.copy(
sourceStatusId = Some(srcTweetId),
sourceUserId = Some(mediaEntity.sourceUserId.getOrElse(srcUserId))
)
}
/**
* Attempts to match up entities from the source tweet with entities from the retweet,
* and to use the source tweet entities but shifted to the retweet entity indices. If an entity
* got truncated at the end of the retweet text, we drop it and any following entities.
*/
private def merge(
srcEntities: List[WrappedEntity],
rtEntities: List[WrappedEntity],
maxIndex: Short
): List[WrappedEntity] = {
(srcEntities, rtEntities) match {
case (Nil, Nil) =>
// successfully matched all entities!
Nil
case (Nil, _) =>
// no more source tweet entities, but we still have remaining retweet entities.
// this can happen if a a text truncation turns something invalid like #tag1#tag2 or
// @mention1@mention2 into a valid entity. just drop all the remaining retweet entities.
Nil
case (_, Nil) =>
// no more retweet entities, which means the remaining entities have been truncated.
Nil
case (srcHead :: srcTail, rtHead :: rtTail) =>
// we have more entities from the source tweet and the retweet. typically, we can
// match these entities because they have the same normalized text, but the retweet
// entity might be truncated, so we allow for a prefix match if the retweet entity
// ends at the end of the tweet.
val possiblyTruncated = rtHead.toIndex == maxIndex - 1
val exactMatch = srcHead.normalizedText == rtHead.normalizedText
if (exactMatch) {
// there could be multiple media entities for the same t.co url, so we need to find
// contiguous groupings of entities that share the same fromIndex.
val rtTail = rtEntities.dropWhile(_.fromIndex == rtHead.fromIndex)
val srcGroup =
srcEntities
.takeWhile(_.fromIndex == srcHead.fromIndex)
.map(_.shift(rtHead.fromIndex, rtHead.toIndex))
val srcTail = srcEntities.drop(srcGroup.size)
srcGroup ++ merge(srcTail, rtTail, maxIndex)
} else {
// if we encounter a mismatch, it is most likely because of truncation,
// so we stop here.
Nil
}
}
}
/**
* Wraps all the entities with the appropriate WrappedEntity subclasses, merges them into
* a single list, and sorts by fromIndex.
*/
private def getWrappedEntities(tweet: Tweet): List[WrappedEntity] =
(getUrls(tweet).zipWithIndex.map { case (e, p) => WrappedUrlEntity(e, p) } ++
getMedia(tweet).zipWithIndex.map { case (e, p) => WrappedMediaEntity(e, p) } ++
getMentions(tweet).zipWithIndex.map { case (e, p) => WrappedMentionEntity(e, p) } ++
getHashtags(tweet).zipWithIndex.map { case (e, p) => WrappedHashtagEntity(e, p) } ++
getCashtags(tweet).zipWithIndex.map { case (e, p) => WrappedCashtagEntity(e, p) })
.sortBy(_.fromIndex)
.toList
/**
* The thrift-entity classes don't share a common entity parent class, so we wrap
* them with a class that allows us to mix entities together into a single list, and
* to provide a generic interface for shifting indicies.
*/
private sealed abstract class WrappedEntity(
val fromIndex: Short,
val toIndex: Short,
val rawText: String) {
/** the original position of the entity within the entity group */
val position: Int
val normalizedText: String = TweetText.nfcNormalize(rawText).toLowerCase
def shift(fromIndex: Short, toIndex: Short): WrappedEntity
}
private case class WrappedUrlEntity(entity: UrlEntity, position: Int)
extends WrappedEntity(entity.fromIndex, entity.toIndex, entity.url) {
override def shift(fromIndex: Short, toIndex: Short): WrappedUrlEntity =
copy(entity.copy(fromIndex = fromIndex, toIndex = toIndex))
}
private case class WrappedMediaEntity(entity: MediaEntity, position: Int)
extends WrappedEntity(entity.fromIndex, entity.toIndex, entity.url) {
override def shift(fromIndex: Short, toIndex: Short): WrappedMediaEntity =
copy(entity.copy(fromIndex = fromIndex, toIndex = toIndex))
}
private case class WrappedMentionEntity(entity: MentionEntity, position: Int)
extends WrappedEntity(entity.fromIndex, entity.toIndex, entity.screenName) {
override def shift(fromIndex: Short, toIndex: Short): WrappedMentionEntity =
copy(entity.copy(fromIndex = fromIndex, toIndex = toIndex))
}
private case class WrappedHashtagEntity(entity: HashtagEntity, position: Int)
extends WrappedEntity(entity.fromIndex, entity.toIndex, entity.text) {
override def shift(fromIndex: Short, toIndex: Short): WrappedHashtagEntity =
copy(entity.copy(fromIndex = fromIndex, toIndex = toIndex))
}
private case class WrappedCashtagEntity(entity: CashtagEntity, position: Int)
extends WrappedEntity(entity.fromIndex, entity.toIndex, entity.text) {
override def shift(fromIndex: Short, toIndex: Short): WrappedCashtagEntity =
copy(entity.copy(fromIndex = fromIndex, toIndex = toIndex))
}
}

View File

@ -1,49 +0,0 @@
package com.twitter.tweetypie
package hydrator
import com.twitter.conversions.DurationOps._
import com.twitter.snowflake.id.SnowflakeId
object CreatedAtRepairer {
// no createdAt value should be less than this
val jan_01_2006 = 1136073600000L
// no non-snowflake createdAt value should be greater than this
val jan_01_2011 = 1293840000000L
// allow createdAt timestamp to be up to this amount off from the snowflake id
// before applying the correction.
val varianceThreshold: MediaId = 10.minutes.inMilliseconds
}
/**
* Detects tweets with bad createdAt timestamps and attempts to fix, if possible
* using the snowflake id. pre-snowflake tweets are left unmodified.
*/
class CreatedAtRepairer(scribe: FutureEffect[String]) extends Mutation[Tweet] {
import CreatedAtRepairer._
def apply(tweet: Tweet): Option[Tweet] = {
assert(tweet.coreData.nonEmpty, "tweet core data is missing")
val createdAtMillis = getCreatedAt(tweet) * 1000
if (SnowflakeId.isSnowflakeId(tweet.id)) {
val snowflakeMillis = SnowflakeId(tweet.id).unixTimeMillis.asLong
val diff = (snowflakeMillis - createdAtMillis).abs
if (diff >= varianceThreshold) {
scribe(tweet.id + "\t" + createdAtMillis)
val snowflakeSeconds = snowflakeMillis / 1000
Some(TweetLenses.createdAt.set(tweet, snowflakeSeconds))
} else {
None
}
} else {
// not a snowflake id, hard to repair, so just log it
if (createdAtMillis < jan_01_2006 || createdAtMillis > jan_01_2011) {
scribe(tweet.id + "\t" + createdAtMillis)
}
None
}
}
}

View File

@ -1,33 +0,0 @@
package com.twitter.tweetypie
package hydrator
import com.twitter.stitch.NotFound
import com.twitter.tweetypie.core._
import com.twitter.tweetypie.repository._
import com.twitter.tweetypie.serverutil.DeviceSourceParser
import com.twitter.tweetypie.thriftscala.DeviceSource
import com.twitter.tweetypie.thriftscala.FieldByPath
object DeviceSourceHydrator {
type Type = ValueHydrator[Option[DeviceSource], TweetCtx]
// WebOauthId is the created_via value for Macaw-Swift through Woodstar.
// We need to special-case it to return the same device_source as "web",
// since we can't map multiple created_via strings to one device_source.
val WebOauthId: String = s"oauth:${DeviceSourceParser.Web}"
val hydratedField: FieldByPath = fieldByPath(Tweet.DeviceSourceField)
private def convertForWeb(createdVia: String) =
if (createdVia == DeviceSourceHydrator.WebOauthId) "web" else createdVia
def apply(repo: DeviceSourceRepository.Type): Type =
ValueHydrator[Option[DeviceSource], TweetCtx] { (_, ctx) =>
val req = convertForWeb(ctx.createdVia)
repo(req).liftToTry.map {
case Return(deviceSource) => ValueState.modified(Some(deviceSource))
case Throw(NotFound) => ValueState.UnmodifiedNone
case Throw(_) => ValueState.partial(None, hydratedField)
}
}.onlyIf((curr, ctx) => curr.isEmpty && ctx.tweetFieldRequested(Tweet.DeviceSourceField))
}

View File

@ -1,92 +0,0 @@
package com.twitter.tweetypie
package hydrator
import com.twitter.finagle.stats.NullStatsReceiver
import com.twitter.stitch.NotFound
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.core._
import com.twitter.tweetypie.repository._
import com.twitter.tweetypie.thriftscala._
/**
* Hydrates the "directedAtUser" field on the tweet. This hydrators uses one of two paths depending
* if DirectedAtUserMetadata is present:
*
* 1. If DirectedAtUserMetadata exists, we use metadata.userId.
* 2. If DirectedAtUserMetadata does not exist, we use the User screenName from the mention starting
* at index 0 if the tweet also has a reply. Creation of a "reply to user" for
* leading @mentions is controlled by PostTweetRequest.enableTweetToNarrowcasting
*/
object DirectedAtHydrator {
type Type = ValueHydrator[Option[DirectedAtUser], Ctx]
case class Ctx(
mentions: Seq[MentionEntity],
metadata: Option[DirectedAtUserMetadata],
underlyingTweetCtx: TweetCtx)
extends TweetCtx.Proxy {
val directedAtScreenName: Option[String] =
mentions.headOption.filter(_.fromIndex == 0).map(_.screenName)
}
val hydratedField: FieldByPath =
fieldByPath(Tweet.CoreDataField, TweetCoreData.DirectedAtUserField)
def once(h: Type): Type =
TweetHydration.completeOnlyOnce(
hydrationType = HydrationType.DirectedAt,
hydrator = h
)
private val partial = ValueState.partial(None, hydratedField)
def apply(repo: UserIdentityRepository.Type, stats: StatsReceiver = NullStatsReceiver): Type = {
val withMetadata = stats.counter("with_metadata")
val noScreenName = stats.counter("no_screen_name")
val withoutMetadata = stats.counter("without_metadata")
ValueHydrator[Option[DirectedAtUser], Ctx] { (_, ctx) =>
ctx.metadata match {
case Some(DirectedAtUserMetadata(Some(uid))) =>
// 1a. new approach of relying exclusively on directed-at metadata if it exists and has a user id
withMetadata.incr()
repo(UserKey.byId(uid)).liftToTry.map {
case Return(u) =>
ValueState.modified(Some(DirectedAtUser(u.id, u.screenName)))
case Throw(NotFound) =>
// If user is not found, fallback to directedAtScreenName
ctx.directedAtScreenName
.map { screenName => ValueState.modified(Some(DirectedAtUser(uid, screenName))) }
.getOrElse {
// This should never happen, but let's make sure with a counter
noScreenName.incr()
ValueState.UnmodifiedNone
}
case Throw(_) => partial
}
case Some(DirectedAtUserMetadata(None)) =>
withMetadata.incr()
// 1b. new approach of relying exclusively on directed-at metadata if it exists and has no userId
ValueState.StitchUnmodifiedNone
case None =>
// 2. when DirectedAtUserMetadata not present, look for first leading mention when has reply
withoutMetadata.incr()
val userKey = ctx.directedAtScreenName
.filter(_ => ctx.isReply)
.map(UserKey.byScreenName)
val results = userKey.map(repo.apply).getOrElse(Stitch.NotFound)
results.liftToTry.map {
case Return(u) => ValueState.modified(Some(DirectedAtUser(u.id, u.screenName)))
case Throw(NotFound) => ValueState.UnmodifiedNone
case Throw(_) => partial
}
}
}.onlyIf((curr, _) => curr.isEmpty)
}
}

View File

@ -1,132 +0,0 @@
package com.twitter.tweetypie.hydrator
import com.twitter.servo.util.Gate
import com.twitter.spam.rtf.thriftscala.SafetyLevel
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.StatsReceiver
import com.twitter.tweetypie.Tweet
import com.twitter.tweetypie.core.ValueState
import com.twitter.tweetypie.repository.TweetQuery
import com.twitter.tweetypie.repository.TweetRepository
import com.twitter.tweetypie.util.EditControlUtil
import com.twitter.tweetypie.serverutil.ExceptionCounter
import com.twitter.tweetypie.thriftscala.EditControl
import com.twitter.tweetypie.thriftscala.EditControlInitial
import com.twitter.tweetypie.thriftscala.FieldByPath
import com.twitter.tweetypie.util.TweetEditFailure.TweetEditGetInitialEditControlException
import com.twitter.tweetypie.util.TweetEditFailure.TweetEditInvalidEditControlException
/**
* EditControlHydrator is used to hydrate the EditControlEdit arm of the editControl field.
*
* For Tweets without edits and for initial Tweets with subsequent edit(s), this hydrator
* passes through the existing editControl (either None or EditControlInitial).
*
* For edit Tweets, it hydrates the initial Tweet's edit control, set as a field on
* the edit control of the edit Tweet and returns the new edit control.
*/
object EditControlHydrator {
type Type = ValueHydrator[Option[EditControl], TweetCtx]
val hydratedField: FieldByPath = fieldByPath(Tweet.EditControlField)
def apply(
repo: TweetRepository.Type,
setEditTimeWindowToSixtyMinutes: Gate[Unit],
stats: StatsReceiver
): Type = {
val exceptionCounter = ExceptionCounter(stats)
// Count hydration of edit control for tweets that were written before writing edit control initial.
val noEditControlHydration = stats.counter("noEditControlHydration")
// Count hydration of edit control edit tweets
val editControlEditHydration = stats.counter("editControlEditHydration")
// Count edit control edit hydration which successfully found an edit control initial
val editControlEditHydrationSuccessful = stats.counter("editControlEditHydration", "success")
// Count of initial tweets being hydrated.
val editControlInitialHydration = stats.counter("editControlInitialHydration")
// Count of edits loaded where the ID of edit is not present in the initial tweet
val editTweetIdsMissingAnEdit = stats.counter("editTweetIdsMissingAnEdit")
// Count hydrated tweets where edit control is set, but neither initial nor edit
val unknownUnionVariant = stats.counter("unknownEditControlUnionVariant")
ValueHydrator[Option[EditControl], TweetCtx] { (curr, ctx) =>
curr match {
// Tweet was created before we write edit control - hydrate the value at read.
case None =>
noEditControlHydration.incr()
val editControl = EditControlUtil.makeEditControlInitial(
ctx.tweetId,
ctx.createdAt,
setEditTimeWindowToSixtyMinutes)
Stitch.value(ValueState.delta(curr, Some(editControl)))
// Tweet is an initial tweet
case Some(EditControl.Initial(_)) =>
editControlInitialHydration.incr()
Stitch.value(ValueState.unmodified(curr))
// Tweet is an edited version
case Some(EditControl.Edit(edit)) =>
editControlEditHydration.incr()
getInitialTweet(repo, edit.initialTweetId, ctx)
.flatMap(getEditControlInitial(ctx))
.map { initial: Option[EditControlInitial] =>
editControlEditHydrationSuccessful.incr()
initial.foreach { initialTweet =>
// We are able to fetch the initial tweet for this edit but this edit tweet is
// not present in the initial's editTweetIds list
if (!initialTweet.editTweetIds.contains(ctx.tweetId)) {
editTweetIdsMissingAnEdit.incr()
}
}
val updated = edit.copy(editControlInitial = initial)
ValueState.delta(curr, Some(EditControl.Edit(updated)))
}
.onFailure(exceptionCounter(_))
case Some(_) => // Unknown union variant
unknownUnionVariant.incr()
Stitch.exception(TweetEditInvalidEditControlException)
}
}.onlyIf { (_, ctx) => ctx.opts.enableEditControlHydration }
}
def getInitialTweet(
repo: TweetRepository.Type,
initialTweetId: Long,
ctx: TweetCtx,
): Stitch[Tweet] = {
val options = TweetQuery.Options(
include = TweetQuery.Include(Set(Tweet.EditControlField.id)),
cacheControl = ctx.opts.cacheControl,
enforceVisibilityFiltering = false,
safetyLevel = SafetyLevel.FilterNone,
fetchStoredTweets = ctx.opts.fetchStoredTweets
)
repo(initialTweetId, options)
}
def getEditControlInitial(ctx: TweetCtx): Tweet => Stitch[Option[EditControlInitial]] = {
initialTweet: Tweet =>
initialTweet.editControl match {
case Some(EditControl.Initial(initial)) =>
Stitch.value(
if (ctx.opts.cause.writing(ctx.tweetId)) {
// On the write path we hydrate edit control initial
// as if the initial tweet is already updated.
Some(EditControlUtil.plusEdit(initial, ctx.tweetId))
} else {
Some(initial)
}
)
case _ if ctx.opts.fetchStoredTweets =>
// If the fetchStoredTweets parameter is set to true, it means we're fetching
// and hydrating tweets regardless of state. In this case, if the initial tweet
// doesn't exist, we return None here to ensure we still hydrate and return the
// current edit tweet.
Stitch.None
case _ => Stitch.exception(TweetEditGetInitialEditControlException)
}
}
}

View File

@ -1,63 +0,0 @@
package com.twitter.tweetypie
package hydrator
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.core.EditState
/**
* An EditHydrator hydrates a value of type `A`, with a hydration context of type `C`,
* and produces a function that takes a value and context and returns an EditState[A, C]
* (an EditState encapsulates a function that takes a value and returns a new ValueState).
*
* A series of EditHydrators of the same type may be run in parallel via
* `EditHydrator.inParallel`.
*/
class EditHydrator[A, C] private (val run: (A, C) => Stitch[EditState[A]]) {
/**
* Apply this hydrator to a value, producing an EditState.
*/
def apply(a: A, ctx: C): Stitch[EditState[A]] = run(a, ctx)
/**
* Convert this EditHydrator to the equivalent ValueHydrator.
*/
def toValueHydrator: ValueHydrator[A, C] =
ValueHydrator[A, C] { (a, ctx) => this.run(a, ctx).map(editState => editState.run(a)) }
/**
* Runs two EditHydrators in parallel.
*/
def inParallelWith(next: EditHydrator[A, C]): EditHydrator[A, C] =
EditHydrator[A, C] { (x0, ctx) =>
Stitch.joinMap(run(x0, ctx), next.run(x0, ctx)) {
case (r1, r2) => r1.andThen(r2)
}
}
}
object EditHydrator {
/**
* Create an EditHydrator from a function that returns Stitch[EditState[A]].
*/
def apply[A, C](f: (A, C) => Stitch[EditState[A]]): EditHydrator[A, C] =
new EditHydrator[A, C](f)
/**
* Creates a "passthrough" Edit:
* Leaves A unchanged and produces empty HydrationState.
*/
def unit[A, C]: EditHydrator[A, C] =
EditHydrator { (_, _) => Stitch.value(EditState.unit[A]) }
/**
* Runs several EditHydrators in parallel.
*/
def inParallel[A, C](bs: EditHydrator[A, C]*): EditHydrator[A, C] =
bs match {
case Seq(b) => b
case Seq(b1, b2) => b1.inParallelWith(b2)
case _ => bs.reduceLeft(_.inParallelWith(_))
}
}

Some files were not shown because too many files have changed in this diff Show More