the-algorithm/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetTweetHandler.scala

168 lines
6.7 KiB
Scala

package com.twitter.tweetypie.storage
import com.twitter.conversions.DurationOps._
import com.twitter.finagle.stats.Counter
import com.twitter.finagle.stats.NullStatsReceiver
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.logging.Logger
import com.twitter.snowflake.id.SnowflakeId
import com.twitter.stitch.Stitch
import com.twitter.stitch.StitchSeqGroup
import com.twitter.storage.client.manhattan.kv.DeniedManhattanException
import com.twitter.storage.client.manhattan.kv.ManhattanException
import com.twitter.tweetypie.storage.TweetStateRecord.BounceDeleted
import com.twitter.tweetypie.storage.TweetStateRecord.HardDeleted
import com.twitter.tweetypie.storage.TweetStateRecord.SoftDeleted
import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet
import com.twitter.tweetypie.storage.TweetUtils._
import com.twitter.util.Duration
import com.twitter.util.Return
import com.twitter.util.Throw
import com.twitter.util.Time
object GetTweetHandler {
private[this] val logger = Logger(getClass)
//////////////////////////////////////////////////
// Logging racy reads for later validation.
val RacyTweetWindow: Duration = 10.seconds
/**
* If this read is soon after the tweet was created, then we would usually
* expect it to be served from cache. This early read indicates that this
* tweet is prone to consistency issues, so we log what's present in
* Manhattan at the time of the read for later analysis.
*/
private[this] def logRacyRead(tweetId: TweetId, records: Seq[TweetManhattanRecord]): Unit =
if (SnowflakeId.isSnowflakeId(tweetId)) {
val tweetAge = Time.now.since(SnowflakeId(tweetId).time)
if (tweetAge <= RacyTweetWindow) {
val sb = new StringBuilder
sb.append("racy_tweet_read\t")
.append(tweetId)
.append('\t')
.append(tweetAge.inMilliseconds) // Log the age for analysis purposes
records.foreach { rec =>
sb.append('\t')
.append(rec.lkey)
rec.value.timestamp.foreach { ts =>
// If there is a timestamp for this key, log it so that we can tell
// later on whether a value should have been present. We expect
// keys written in a single write to have the same timestamp, and
// generally, keys written in separate writes will have different
// timestamps. The timestamp value is optional in Manhattan, but
// we expect there to always be a value for the timestamp.
sb.append(':')
.append(ts.inMilliseconds)
}
}
logger.info(sb.toString)
}
}
/**
* Convert a set of records from Manhattan into a GetTweet.Response.
*/
def tweetResponseFromRecords(
tweetId: TweetId,
mhRecords: Seq[TweetManhattanRecord],
statsReceiver: StatsReceiver = NullStatsReceiver
): GetTweet.Response =
if (mhRecords.isEmpty) {
GetTweet.Response.NotFound
} else {
// If no internal fields are present or no required fields present, we consider the tweet
// as not returnable (even if some additional fields are present)
def tweetFromRecords(tweetId: TweetId, mhRecords: Seq[TweetManhattanRecord]) = {
val storedTweet = buildStoredTweet(tweetId, mhRecords)
if (storedTweet.getFieldBlobs(expectedFields).nonEmpty) {
if (isValid(storedTweet)) {
statsReceiver.counter("valid").incr()
Some(StorageConversions.fromStoredTweet(storedTweet))
} else {
log.info(s"Invalid Tweet Id: $tweetId")
statsReceiver.counter("invalid").incr()
None
}
} else {
// The Tweet contained none of the fields defined in `expectedFields`
log.info(s"Expected Fields Not Present Tweet Id: $tweetId")
statsReceiver.counter("expected_fields_not_present").incr()
None
}
}
val stateRecord = TweetStateRecord.mostRecent(mhRecords)
stateRecord match {
// some other cases don't require an attempt to construct a Tweet
case Some(_: SoftDeleted) | Some(_: HardDeleted) => GetTweet.Response.Deleted
// all other cases require an attempt to construct a Tweet, which may not be successful
case _ =>
logRacyRead(tweetId, mhRecords)
(stateRecord, tweetFromRecords(tweetId, mhRecords)) match {
// BounceDeleted contains the Tweet data so that callers can access data on the the
// tweet (e.g. hard delete daemon requires conversationId and userId. There are no
// plans for Tweetypie server to make use of the returned tweet at this time.
case (Some(_: BounceDeleted), Some(tweet)) => GetTweet.Response.BounceDeleted(tweet)
case (Some(_: BounceDeleted), None) => GetTweet.Response.Deleted
case (_, Some(tweet)) => GetTweet.Response.Found(tweet)
case _ => GetTweet.Response.NotFound
}
}
}
def apply(read: ManhattanOperations.Read, statsReceiver: StatsReceiver): GetTweet = {
object stats {
val getTweetScope = statsReceiver.scope("getTweet")
val deniedCounter: Counter = getTweetScope.counter("mh_denied")
val mhExceptionCounter: Counter = getTweetScope.counter("mh_exception")
val nonFatalExceptionCounter: Counter = getTweetScope.counter("non_fatal_exception")
val notFoundCounter: Counter = getTweetScope.counter("not_found")
}
object mhGroup extends StitchSeqGroup[TweetId, Seq[TweetManhattanRecord]] {
override def run(tweetIds: Seq[TweetId]): Stitch[Seq[Seq[TweetManhattanRecord]]] = {
Stats.addWidthStat("getTweet", "tweetIds", tweetIds.size, statsReceiver)
Stitch.traverse(tweetIds)(read(_))
}
}
tweetId =>
if (tweetId <= 0) {
Stitch.NotFound
} else {
Stitch
.call(tweetId, mhGroup)
.map(mhRecords => tweetResponseFromRecords(tweetId, mhRecords, stats.getTweetScope))
.liftToTry
.map {
case Throw(mhException: DeniedManhattanException) =>
stats.deniedCounter.incr()
Throw(RateLimited("", mhException))
// Encountered some other Manhattan error
case t @ Throw(_: ManhattanException) =>
stats.mhExceptionCounter.incr()
t
// Something else happened
case t @ Throw(ex) =>
stats.nonFatalExceptionCounter.incr()
TweetUtils.log
.warning(ex, s"Unhandled exception in GetTweetHandler for tweetId: $tweetId")
t
case r @ Return(GetTweet.Response.NotFound) =>
stats.notFoundCounter.incr()
r
case r @ Return(_) => r
}
.lowerFromTry
}
}
}