the-algorithm/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ReplyBuilder.scala

634 lines
24 KiB
Scala

package com.twitter.tweetypie
package handler
import com.twitter.stitch.Stitch
import com.twitter.tweetypie.core.TweetCreateFailure
import com.twitter.tweetypie.repository._
import com.twitter.tweetypie.serverutil.ExceptionCounter
import com.twitter.tweetypie.thriftscala._
import com.twitter.tweetypie.tweettext.Offset
import com.twitter.twittertext.Extractor
import scala.annotation.tailrec
import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.util.control.NoStackTrace
object ReplyBuilder {
private val extractor = new Extractor
private val InReplyToTweetNotFound =
TweetCreateFailure.State(TweetCreateState.InReplyToTweetNotFound)
case class Request(
authorId: UserId,
authorScreenName: String,
inReplyToTweetId: Option[TweetId],
tweetText: String,
prependImplicitMentions: Boolean,
enableTweetToNarrowcasting: Boolean,
excludeUserIds: Seq[UserId],
spamResult: Spam.Result,
batchMode: Option[BatchComposeMode])
/**
* This case class contains the fields that are shared between legacy and simplified replies.
*/
case class BaseResult(
reply: Reply,
conversationId: Option[ConversationId],
selfThreadMetadata: Option[SelfThreadMetadata],
community: Option[Communities] = None,
exclusiveTweetControl: Option[ExclusiveTweetControl] = None,
trustedFriendsControl: Option[TrustedFriendsControl] = None,
editControl: Option[EditControl] = None) {
// Creates a Result by providing the fields that differ between legacy and simplified replies.
def toResult(
tweetText: String,
directedAtMetadata: DirectedAtUserMetadata,
visibleStart: Offset.CodePoint = Offset.CodePoint(0),
): Result =
Result(
reply,
tweetText,
directedAtMetadata,
conversationId,
selfThreadMetadata,
visibleStart,
community,
exclusiveTweetControl,
trustedFriendsControl,
editControl
)
}
/**
* @param reply the Reply object to include in the tweet.
* @param tweetText updated tweet text which may include prepended at-mentions, trimmed
* @param directedAtMetadata see DirectedAtHydrator for usage.
* @param conversationId conversation id to assign to the tweet.
* @param selfThreadMetadata returns the result of `SelfThreadBuilder`
* @param visibleStart offset into `tweetText` separating hideable at-mentions from the
* visible text.
*/
case class Result(
reply: Reply,
tweetText: String,
directedAtMetadata: DirectedAtUserMetadata,
conversationId: Option[ConversationId] = None,
selfThreadMetadata: Option[SelfThreadMetadata] = None,
visibleStart: Offset.CodePoint = Offset.CodePoint(0),
community: Option[Communities] = None,
exclusiveTweetControl: Option[ExclusiveTweetControl] = None,
trustedFriendsControl: Option[TrustedFriendsControl] = None,
editControl: Option[EditControl] = None) {
/**
* @param finalText final tweet text after any server-side additions.
* @return true iff the final tweet text consists exclusively of a hidden reply mention prefix.
* When this happens there's no content to the reply and thus the tweet creation should
* fail.
*/
def replyTextIsEmpty(finalText: String): Boolean = {
// Length of the tweet text originally output via ReplyBuilder.Result before server-side
// additions (e.g. media, quoted-tweet URLs)
val origTextLength = Offset.CodePoint.length(tweetText)
// Length of the tweet text after server-side additions.
val finalTextLength = Offset.CodePoint.length(finalText)
val prefixWasEntireText = origTextLength == visibleStart
val textLenUnchanged = origTextLength == finalTextLength
prefixWasEntireText && textLenUnchanged
}
}
type Type = Request => Future[Option[Result]]
private object InvalidUserException extends NoStackTrace
/**
* A user ID and screen name used for building replies.
*/
private case class User(id: UserId, screenName: String)
/**
* Captures the in-reply-to tweet, its author, and if the user is attempting to reply to a
* retweet, then that retweet and its author.
*/
private case class ReplySource(
srcTweet: Tweet,
srcUser: User,
retweet: Option[Tweet] = None,
rtUser: Option[User] = None) {
private val photoTaggedUsers: Seq[User] =
srcTweet.mediaTags
.map(_.tagMap.values.flatten)
.getOrElse(Nil)
.map(toUser)
.toSeq
private def toUser(mt: MediaTag): User =
mt match {
case MediaTag(_, Some(id), Some(screenName), _) => User(id, screenName)
case _ => throw InvalidUserException
}
private def toUser(e: MentionEntity): User =
e match {
case MentionEntity(_, _, screenName, Some(id), _, _) => User(id, screenName)
case _ => throw InvalidUserException
}
private def toUser(d: DirectedAtUser) = User(d.userId, d.screenName)
def allCardUsers(authorUser: User, cardUsersFinder: CardUsersFinder.Type): Future[Set[UserId]] =
Stitch.run(
cardUsersFinder(
CardUsersFinder.Request(
cardReference = getCardReference(srcTweet),
urls = getUrls(srcTweet).map(_.url),
perspectiveUserId = authorUser.id
)
)
)
def srcTweetMentionedUsers: Seq[User] = getMentions(srcTweet).map(toUser)
private trait ReplyType {
val allExcludedUserIds: Set[UserId]
def directedAt: Option[User]
def requiredTextMention: Option[User]
def isExcluded(u: User): Boolean = allExcludedUserIds.contains(u.id)
def buildPrefix(otherMentions: Seq[User], maxImplicits: Int): String = {
val seen = new mutable.HashSet[UserId]
seen ++= allExcludedUserIds
// Never exclude the required mention
seen --= requiredTextMention.map(_.id)
(requiredTextMention.toSeq ++ otherMentions)
.filter(u => seen.add(u.id))
.take(maxImplicits.max(requiredTextMention.size))
.map(u => s"@${u.screenName}")
.mkString(" ")
}
}
private case class SelfReply(
allExcludedUserIds: Set[UserId],
enableTweetToNarrowcasting: Boolean)
extends ReplyType {
private def srcTweetDirectedAt: Option[User] = getDirectedAtUser(srcTweet).map(toUser)
override def directedAt: Option[User] =
if (!enableTweetToNarrowcasting) None
else Seq.concat(rtUser, srcTweetDirectedAt).find(!isExcluded(_))
override def requiredTextMention: Option[User] =
// Make sure the directedAt user is in the text to avoid confusion
directedAt
}
private case class BatchSubsequentReply(allExcludedUserIds: Set[UserId]) extends ReplyType {
override def directedAt: Option[User] = None
override def requiredTextMention: Option[User] = None
override def buildPrefix(otherMentions: Seq[User], maxImplicits: Int): String = ""
}
private case class RegularReply(
allExcludedUserIds: Set[UserId],
enableTweetToNarrowcasting: Boolean)
extends ReplyType {
override def directedAt: Option[User] =
Some(srcUser)
.filterNot(isExcluded)
.filter(_ => enableTweetToNarrowcasting)
override def requiredTextMention: Option[User] =
// Include the source tweet's author as a mention in the reply, even if the reply is not
// narrowcasted to that user. All non-self-reply tweets require this mention.
Some(srcUser)
}
/**
* Computes an implicit mention prefix to add to the tweet text as well as any directed-at user.
*
* The first implicit mention is the source-tweet's author unless the reply is a self-reply, in
* which case it inherits the DirectedAtUser from the source tweet, though the current author is
* never added. This mention, if it exists, is the only mention that may be used to direct-at a
* user and is the user that ends up in DirectedAtUserMetadata. If the user replied to a
* retweet and the reply doesn't explicitly mention the retweet author, then the retweet author
* will be next, followed by source tweet mentions and source tweet photo-tagged users.
*
* Users in excludedScreenNames originate from the PostTweetRequest and are filtered out of any
* non-leading mention.
*
* Note on maxImplicits:
* This method returns at most 'maxImplicits' mentions unless 'maxImplicits' is 0 and a
* directed-at mention is required, in which case it returns 1. If this happens the reply may
* fail downstream validation checks (e.g. TweetBuilder). With 280 visible character limit it's
* theoretically possible to explicitly mention 93 users (280 / 3) but this bug shouldn't really
* be an issue because:
* 1.) Most replies don't have 50 explicit mentions
* 2.) TOO-clients have switched to batchMode=Subsequent for self-replies which disable
source tweet's directed-at user inheritance
* 3.) Requests rarely are rejected due to mention_limit_exceeded
* If this becomes a problem we could reopen the mention limit discussion, specifically if the
* backend should allow 51 while the explicit limit remains at 50.
*
* Note on batchMode:
* Implicit mention prefix will be empty string if batchMode is BatchSubsequent. This is to
* support batch composer.
*/
def implicitMentionPrefixAndDAU(
maxImplicits: Int,
excludedUsers: Seq[User],
author: User,
enableTweetToNarrowcasting: Boolean,
batchMode: Option[BatchComposeMode]
): (String, Option[User]) = {
def allExcludedUserIds =
(excludedUsers ++ Seq(author)).map(_.id).toSet
val replyType =
if (author.id == srcUser.id) {
if (batchMode.contains(BatchComposeMode.BatchSubsequent)) {
BatchSubsequentReply(allExcludedUserIds)
} else {
SelfReply(allExcludedUserIds, enableTweetToNarrowcasting)
}
} else {
RegularReply(allExcludedUserIds, enableTweetToNarrowcasting)
}
val prefix =
replyType.buildPrefix(
otherMentions = List.concat(rtUser, srcTweetMentionedUsers, photoTaggedUsers),
maxImplicits = maxImplicits
)
(prefix, replyType.directedAt)
}
/**
* Finds the longest possible prefix of whitespace separated @-mentions, restricted to
* @-mentions that are derived from the reply chain.
*/
def hideablePrefix(
text: String,
cardUsers: Seq[User],
explicitMentions: Seq[Extractor.Entity]
): Offset.CodePoint = {
val allowedMentions =
(srcTweetMentionedUsers.toSet + srcUser ++ rtUser.toSet ++ photoTaggedUsers ++ cardUsers)
.map(_.screenName.toLowerCase)
val len = Offset.CodeUnit.length(text)
// To allow NO-BREAK SPACE' (U+00A0) in the prefix need .isSpaceChar
def isWhitespace(c: Char) = c.isWhitespace || c.isSpaceChar
@tailrec
def skipWs(offset: Offset.CodeUnit): Offset.CodeUnit =
if (offset == len || !isWhitespace(text.charAt(offset.toInt))) offset
else skipWs(offset.incr)
@tailrec
def go(offset: Offset.CodeUnit, mentions: Stream[Extractor.Entity]): Offset.CodeUnit =
if (offset == len) offset
else {
mentions match {
// if we are at the next mention, and it is allowed, skip past and recurse
case next #:: tail if next.getStart == offset.toInt =>
if (!allowedMentions.contains(next.getValue.toLowerCase)) offset
else go(skipWs(Offset.CodeUnit(next.getEnd)), tail)
// we found non-mention text
case _ => offset
}
}
go(Offset.CodeUnit(0), explicitMentions.toStream).toCodePoint(text)
}
}
private def replyToUser(user: User, inReplyToStatusId: Option[TweetId] = None): Reply =
Reply(
inReplyToUserId = user.id,
inReplyToScreenName = Some(user.screenName),
inReplyToStatusId = inReplyToStatusId
)
/**
* A builder that generates reply from `inReplyToTweetId` or tweet text
*
* There are two kinds of "reply":
* 1. reply to tweet, which is generated from `inReplyToTweetId`.
*
* A valid reply-to-tweet satisfies the following conditions:
* 1). the tweet that is in-reply-to exists (and is visible to the user creating the tweet)
* 2). the author of the in-reply-to tweet is mentioned anywhere in the tweet, or
* this is a tweet that is in reply to the author's own tweet
*
* 2. reply to user, is generated when the tweet text starts with @user_name. This is only
* attempted if PostTweetRequest.enableTweetToNarrowcasting is true (default).
*/
def apply(
userIdentityRepo: UserIdentityRepository.Type,
tweetRepo: TweetRepository.Optional,
replyCardUsersFinder: CardUsersFinder.Type,
selfThreadBuilder: SelfThreadBuilder,
relationshipRepo: RelationshipRepository.Type,
unmentionedEntitiesRepo: UnmentionedEntitiesRepository.Type,
enableRemoveUnmentionedImplicits: Gate[Unit],
stats: StatsReceiver,
maxMentions: Int
): Type = {
val exceptionCounters = ExceptionCounter(stats)
val modeScope = stats.scope("mode")
val compatModeCounter = modeScope.counter("compat")
val simpleModeCounter = modeScope.counter("simple")
def getUser(key: UserKey): Future[Option[User]] =
Stitch.run(
userIdentityRepo(key)
.map(ident => User(ident.id, ident.screenName))
.liftNotFoundToOption
)
def getUsers(userIds: Seq[UserId]): Future[Seq[ReplyBuilder.User]] =
Stitch.run(
Stitch
.traverse(userIds)(id => userIdentityRepo(UserKey(id)).liftNotFoundToOption)
.map(_.flatten)
.map { identities => identities.map { ident => User(ident.id, ident.screenName) } }
)
val tweetQueryIncludes =
TweetQuery.Include(
tweetFields = Set(
Tweet.CoreDataField.id,
Tweet.CardReferenceField.id,
Tweet.CommunitiesField.id,
Tweet.MediaTagsField.id,
Tweet.MentionsField.id,
Tweet.UrlsField.id,
Tweet.EditControlField.id
) ++ selfThreadBuilder.requiredReplySourceFields.map(_.id)
)
def tweetQueryOptions(forUserId: UserId) =
TweetQuery.Options(
tweetQueryIncludes,
forUserId = Some(forUserId),
enforceVisibilityFiltering = true
)
def getTweet(tweetId: TweetId, forUserId: UserId): Future[Option[Tweet]] =
Stitch.run(tweetRepo(tweetId, tweetQueryOptions(forUserId)))
def checkBlockRelationship(authorId: UserId, result: Result): Future[Unit] = {
val inReplyToBlocksTweeter =
RelationshipKey.blocks(
sourceId = result.reply.inReplyToUserId,
destinationId = authorId
)
Stitch.run(relationshipRepo(inReplyToBlocksTweeter)).flatMap {
case true => Future.exception(InReplyToTweetNotFound)
case false => Future.Unit
}
}
def checkIPIPolicy(request: Request, reply: Reply): Future[Unit] = {
if (request.spamResult == Spam.DisabledByIpiPolicy) {
Future.exception(Spam.DisabledByIpiFailure(reply.inReplyToScreenName))
} else {
Future.Unit
}
}
def getUnmentionedUsers(replySource: ReplySource): Future[Seq[UserId]] = {
if (enableRemoveUnmentionedImplicits()) {
val srcDirectedAt = replySource.srcTweet.directedAtUserMetadata.flatMap(_.userId)
val srcTweetMentions = replySource.srcTweet.mentions.getOrElse(Nil).flatMap(_.userId)
val idsToCheck = srcTweetMentions ++ srcDirectedAt
val conversationId = replySource.srcTweet.coreData.flatMap(_.conversationId)
conversationId match {
case Some(cid) if idsToCheck.nonEmpty =>
stats.counter("unmentioned_implicits_check").incr()
Stitch
.run(unmentionedEntitiesRepo(cid, idsToCheck)).liftToTry.map {
case Return(Some(unmentionedUserIds)) =>
unmentionedUserIds
case _ => Seq[UserId]()
}
case _ => Future.Nil
}
} else {
Future.Nil
}
}
/**
* Constructs a `ReplySource` for the given `tweetId`, which captures the source tweet to be
* replied to, its author, and if `tweetId` is for a retweet of the source tweet, then also
* that retweet and its author. If the source tweet (or a retweet of it), or a corresponding
* author, can't be found or isn't visible to the replier, then `InReplyToTweetNotFound` is
* thrown.
*/
def getReplySource(tweetId: TweetId, forUserId: UserId): Future[ReplySource] =
for {
tweet <- getTweet(tweetId, forUserId).flatMap {
case None => Future.exception(InReplyToTweetNotFound)
case Some(t) => Future.value(t)
}
user <- getUser(UserKey(getUserId(tweet))).flatMap {
case None => Future.exception(InReplyToTweetNotFound)
case Some(u) => Future.value(u)
}
res <- getShare(tweet) match {
case None => Future.value(ReplySource(tweet, user))
case Some(share) =>
// if the user is replying to a retweet, find the retweet source tweet,
// then update with the retweet and author.
getReplySource(share.sourceStatusId, forUserId)
.map(_.copy(retweet = Some(tweet), rtUser = Some(user)))
}
} yield res
/**
* Computes a `Result` for the reply-to-tweet case. If `inReplyToTweetId` is for a retweet,
* the reply will be computed against the source tweet. If `prependImplicitMentions` is true
* and source tweet can't be found or isn't visible to replier, then this method will return
* a `InReplyToTweetNotFound` failure. If `prependImplicitMentions` is false, then the reply
* text must either mention the source tweet user, or it must be a reply to self; if both of
* those conditions fail, then `None` is returned.
*/
def makeReplyToTweet(
inReplyToTweetId: TweetId,
text: String,
author: User,
prependImplicitMentions: Boolean,
enableTweetToNarrowcasting: Boolean,
excludeUserIds: Seq[UserId],
batchMode: Option[BatchComposeMode]
): Future[Option[Result]] = {
val explicitMentions: Seq[Extractor.Entity] =
extractor.extractMentionedScreennamesWithIndices(text).asScala.toSeq
val mentionedScreenNames =
explicitMentions.map(_.getValue.toLowerCase).toSet
/**
* If `prependImplicitMentions` is true, or the reply author is the same as the in-reply-to
* author, then the reply text doesn't have to mention the in-reply-to author. Otherwise,
* check that the text contains a mention of the reply author.
*/
def isValidReplyTo(inReplyToUser: User): Boolean =
prependImplicitMentions ||
(inReplyToUser.id == author.id) ||
mentionedScreenNames.contains(inReplyToUser.screenName.toLowerCase)
getReplySource(inReplyToTweetId, author.id)
.flatMap { replySrc =>
val baseResult = BaseResult(
reply = replyToUser(replySrc.srcUser, Some(replySrc.srcTweet.id)),
conversationId = getConversationId(replySrc.srcTweet),
selfThreadMetadata = selfThreadBuilder.build(author.id, replySrc.srcTweet),
community = replySrc.srcTweet.communities,
// Reply tweets retain the same exclusive
// tweet controls as the tweet being replied to.
exclusiveTweetControl = replySrc.srcTweet.exclusiveTweetControl,
trustedFriendsControl = replySrc.srcTweet.trustedFriendsControl,
editControl = replySrc.srcTweet.editControl
)
if (isValidReplyTo(replySrc.srcUser)) {
if (prependImplicitMentions) {
// Simplified Replies mode - append server-side generated prefix to passed in text
simpleModeCounter.incr()
// remove the in-reply-to tweet author from the excluded users, in-reply-to tweet author will always be a directedAtUser
val filteredExcludedIds =
excludeUserIds.filterNot(uid => uid == TweetLenses.userId(replySrc.srcTweet))
for {
unmentionedUserIds <- getUnmentionedUsers(replySrc)
excludedUsers <- getUsers(filteredExcludedIds ++ unmentionedUserIds)
(prefix, directedAtUser) = replySrc.implicitMentionPrefixAndDAU(
maxImplicits = math.max(0, maxMentions - explicitMentions.size),
excludedUsers = excludedUsers,
author = author,
enableTweetToNarrowcasting = enableTweetToNarrowcasting,
batchMode = batchMode
)
} yield {
// prefix or text (or both) can be empty strings. Add " " separator and adjust
// prefix length only when both prefix and text are non-empty.
val textChunks = Seq(prefix, text).map(_.trim).filter(_.nonEmpty)
val tweetText = textChunks.mkString(" ")
val visibleStart =
if (textChunks.size == 2) {
Offset.CodePoint.length(prefix + " ")
} else {
Offset.CodePoint.length(prefix)
}
Some(
baseResult.toResult(
tweetText = tweetText,
directedAtMetadata = DirectedAtUserMetadata(directedAtUser.map(_.id)),
visibleStart = visibleStart
)
)
}
} else {
// Backwards-compatibility mode - walk from beginning of text until find visibleStart
compatModeCounter.incr()
for {
cardUserIds <- replySrc.allCardUsers(author, replyCardUsersFinder)
cardUsers <- getUsers(cardUserIds.toSeq)
optUserIdentity <- extractReplyToUser(text)
directedAtUserId = optUserIdentity.map(_.id).filter(_ => enableTweetToNarrowcasting)
} yield {
Some(
baseResult.toResult(
tweetText = text,
directedAtMetadata = DirectedAtUserMetadata(directedAtUserId),
visibleStart = replySrc.hideablePrefix(text, cardUsers, explicitMentions),
)
)
}
}
} else {
Future.None
}
}
.handle {
// if `getReplySource` throws this exception, but we aren't computing implicit
// mentions, then we fall back to the reply-to-user case instead of reply-to-tweet
case InReplyToTweetNotFound if !prependImplicitMentions => None
}
}
def makeReplyToUser(text: String): Future[Option[Result]] =
extractReplyToUser(text).map(_.map { user =>
Result(replyToUser(user), text, DirectedAtUserMetadata(Some(user.id)))
})
def extractReplyToUser(text: String): Future[Option[User]] =
Option(extractor.extractReplyScreenname(text)) match {
case None => Future.None
case Some(screenName) => getUser(UserKey(screenName))
}
FutureArrow[Request, Option[Result]] { request =>
exceptionCounters {
(request.inReplyToTweetId.filter(_ > 0) match {
case None =>
Future.None
case Some(tweetId) =>
makeReplyToTweet(
tweetId,
request.tweetText,
User(request.authorId, request.authorScreenName),
request.prependImplicitMentions,
request.enableTweetToNarrowcasting,
request.excludeUserIds,
request.batchMode
)
}).flatMap {
case Some(r) =>
// Ensure that the author of this reply is not blocked by
// the user who they are replying to.
checkBlockRelationship(request.authorId, r)
.before(checkIPIPolicy(request, r.reply))
.before(Future.value(Some(r)))
case None if request.enableTweetToNarrowcasting =>
// We don't check the block relationship when the tweet is
// not part of a conversation (which is to say, we allow
// directed-at tweets from a blocked user.) These tweets
// will not cause notifications for the blocking user,
// despite the presence of the reply struct.
makeReplyToUser(request.tweetText)
case None =>
Future.None
}
}
}
}
}