168 lines
6.7 KiB
Scala
168 lines
6.7 KiB
Scala
package com.twitter.tweetypie.storage
|
|
|
|
import com.twitter.conversions.DurationOps._
|
|
import com.twitter.finagle.stats.Counter
|
|
import com.twitter.finagle.stats.NullStatsReceiver
|
|
import com.twitter.finagle.stats.StatsReceiver
|
|
import com.twitter.logging.Logger
|
|
import com.twitter.snowflake.id.SnowflakeId
|
|
import com.twitter.stitch.Stitch
|
|
import com.twitter.stitch.StitchSeqGroup
|
|
import com.twitter.storage.client.manhattan.kv.DeniedManhattanException
|
|
import com.twitter.storage.client.manhattan.kv.ManhattanException
|
|
import com.twitter.tweetypie.storage.TweetStateRecord.BounceDeleted
|
|
import com.twitter.tweetypie.storage.TweetStateRecord.HardDeleted
|
|
import com.twitter.tweetypie.storage.TweetStateRecord.SoftDeleted
|
|
import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet
|
|
import com.twitter.tweetypie.storage.TweetUtils._
|
|
import com.twitter.util.Duration
|
|
import com.twitter.util.Return
|
|
import com.twitter.util.Throw
|
|
import com.twitter.util.Time
|
|
|
|
object GetTweetHandler {
|
|
private[this] val logger = Logger(getClass)
|
|
|
|
//////////////////////////////////////////////////
|
|
// Logging racy reads for later validation.
|
|
|
|
val RacyTweetWindow: Duration = 10.seconds
|
|
|
|
/**
|
|
* If this read is soon after the tweet was created, then we would usually
|
|
* expect it to be served from cache. This early read indicates that this
|
|
* tweet is prone to consistency issues, so we log what's present in
|
|
* Manhattan at the time of the read for later analysis.
|
|
*/
|
|
private[this] def logRacyRead(tweetId: TweetId, records: Seq[TweetManhattanRecord]): Unit =
|
|
if (SnowflakeId.isSnowflakeId(tweetId)) {
|
|
val tweetAge = Time.now.since(SnowflakeId(tweetId).time)
|
|
if (tweetAge <= RacyTweetWindow) {
|
|
val sb = new StringBuilder
|
|
sb.append("racy_tweet_read\t")
|
|
.append(tweetId)
|
|
.append('\t')
|
|
.append(tweetAge.inMilliseconds) // Log the age for analysis purposes
|
|
records.foreach { rec =>
|
|
sb.append('\t')
|
|
.append(rec.lkey)
|
|
rec.value.timestamp.foreach { ts =>
|
|
// If there is a timestamp for this key, log it so that we can tell
|
|
// later on whether a value should have been present. We expect
|
|
// keys written in a single write to have the same timestamp, and
|
|
// generally, keys written in separate writes will have different
|
|
// timestamps. The timestamp value is optional in Manhattan, but
|
|
// we expect there to always be a value for the timestamp.
|
|
sb.append(':')
|
|
.append(ts.inMilliseconds)
|
|
}
|
|
}
|
|
logger.info(sb.toString)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Convert a set of records from Manhattan into a GetTweet.Response.
|
|
*/
|
|
def tweetResponseFromRecords(
|
|
tweetId: TweetId,
|
|
mhRecords: Seq[TweetManhattanRecord],
|
|
statsReceiver: StatsReceiver = NullStatsReceiver
|
|
): GetTweet.Response =
|
|
if (mhRecords.isEmpty) {
|
|
GetTweet.Response.NotFound
|
|
} else {
|
|
// If no internal fields are present or no required fields present, we consider the tweet
|
|
// as not returnable (even if some additional fields are present)
|
|
def tweetFromRecords(tweetId: TweetId, mhRecords: Seq[TweetManhattanRecord]) = {
|
|
val storedTweet = buildStoredTweet(tweetId, mhRecords)
|
|
if (storedTweet.getFieldBlobs(expectedFields).nonEmpty) {
|
|
if (isValid(storedTweet)) {
|
|
statsReceiver.counter("valid").incr()
|
|
Some(StorageConversions.fromStoredTweet(storedTweet))
|
|
} else {
|
|
log.info(s"Invalid Tweet Id: $tweetId")
|
|
statsReceiver.counter("invalid").incr()
|
|
None
|
|
}
|
|
} else {
|
|
// The Tweet contained none of the fields defined in `expectedFields`
|
|
log.info(s"Expected Fields Not Present Tweet Id: $tweetId")
|
|
statsReceiver.counter("expected_fields_not_present").incr()
|
|
None
|
|
}
|
|
}
|
|
|
|
val stateRecord = TweetStateRecord.mostRecent(mhRecords)
|
|
stateRecord match {
|
|
// some other cases don't require an attempt to construct a Tweet
|
|
case Some(_: SoftDeleted) | Some(_: HardDeleted) => GetTweet.Response.Deleted
|
|
|
|
// all other cases require an attempt to construct a Tweet, which may not be successful
|
|
case _ =>
|
|
logRacyRead(tweetId, mhRecords)
|
|
(stateRecord, tweetFromRecords(tweetId, mhRecords)) match {
|
|
// BounceDeleted contains the Tweet data so that callers can access data on the the
|
|
// tweet (e.g. hard delete daemon requires conversationId and userId. There are no
|
|
// plans for Tweetypie server to make use of the returned tweet at this time.
|
|
case (Some(_: BounceDeleted), Some(tweet)) => GetTweet.Response.BounceDeleted(tweet)
|
|
case (Some(_: BounceDeleted), None) => GetTweet.Response.Deleted
|
|
case (_, Some(tweet)) => GetTweet.Response.Found(tweet)
|
|
case _ => GetTweet.Response.NotFound
|
|
}
|
|
}
|
|
}
|
|
|
|
def apply(read: ManhattanOperations.Read, statsReceiver: StatsReceiver): GetTweet = {
|
|
|
|
object stats {
|
|
val getTweetScope = statsReceiver.scope("getTweet")
|
|
val deniedCounter: Counter = getTweetScope.counter("mh_denied")
|
|
val mhExceptionCounter: Counter = getTweetScope.counter("mh_exception")
|
|
val nonFatalExceptionCounter: Counter = getTweetScope.counter("non_fatal_exception")
|
|
val notFoundCounter: Counter = getTweetScope.counter("not_found")
|
|
}
|
|
|
|
object mhGroup extends StitchSeqGroup[TweetId, Seq[TweetManhattanRecord]] {
|
|
override def run(tweetIds: Seq[TweetId]): Stitch[Seq[Seq[TweetManhattanRecord]]] = {
|
|
Stats.addWidthStat("getTweet", "tweetIds", tweetIds.size, statsReceiver)
|
|
Stitch.traverse(tweetIds)(read(_))
|
|
}
|
|
}
|
|
|
|
tweetId =>
|
|
if (tweetId <= 0) {
|
|
Stitch.NotFound
|
|
} else {
|
|
Stitch
|
|
.call(tweetId, mhGroup)
|
|
.map(mhRecords => tweetResponseFromRecords(tweetId, mhRecords, stats.getTweetScope))
|
|
.liftToTry
|
|
.map {
|
|
case Throw(mhException: DeniedManhattanException) =>
|
|
stats.deniedCounter.incr()
|
|
Throw(RateLimited("", mhException))
|
|
|
|
// Encountered some other Manhattan error
|
|
case t @ Throw(_: ManhattanException) =>
|
|
stats.mhExceptionCounter.incr()
|
|
t
|
|
|
|
// Something else happened
|
|
case t @ Throw(ex) =>
|
|
stats.nonFatalExceptionCounter.incr()
|
|
TweetUtils.log
|
|
.warning(ex, s"Unhandled exception in GetTweetHandler for tweetId: $tweetId")
|
|
t
|
|
|
|
case r @ Return(GetTweet.Response.NotFound) =>
|
|
stats.notFoundCounter.incr()
|
|
r
|
|
|
|
case r @ Return(_) => r
|
|
}
|
|
.lowerFromTry
|
|
}
|
|
}
|
|
}
|