diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/BUILD deleted file mode 100644 index 542f5ee81..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/BUILD +++ /dev/null @@ -1,60 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = [ - "bazel-compatible", - "bazel-incompatible-scaladoc", # see http://go/bazel-incompatible-scaladoc - ], - dependencies = [ - "3rdparty/jvm/com/fasterxml/jackson/core:jackson-databind", - "3rdparty/jvm/com/fasterxml/jackson/module:jackson-module-scala", - "3rdparty/jvm/com/twitter/bijection:core", - "3rdparty/jvm/com/twitter/bijection:thrift", - "3rdparty/jvm/org/apache/thrift:libthrift", - "diffshow", - "fanoutservice/thrift/src/main/thrift:thrift-scala", - "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", - "finagle/finagle-core/src/main", - "flock-client/src/main/scala", - "mediaservices/commons/src/main/thrift:thrift-scala", - "scrooge/scrooge-core", - "tweetypie/servo/repo", - "tweetypie/servo/repo/src/main/thrift:thrift-scala", - "tweetypie/servo/util", - "snowflake/src/main/scala/com/twitter/snowflake/id", - "src/scala/com/twitter/takedown/util", - "src/thrift/com/twitter/context:feature-context-scala", - "src/thrift/com/twitter/context:twitter-context-scala", - "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", - "src/thrift/com/twitter/expandodo:cards-scala", - "src/thrift/com/twitter/geoduck:geoduck-scala", - "src/thrift/com/twitter/gizmoduck:thrift-scala", - "src/thrift/com/twitter/gizmoduck:user-thrift-scala", - "src/thrift/com/twitter/guano:guano-scala", - "src/thrift/com/twitter/timelineservice/server/internal:thrift-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:audit-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:events-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", - "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", - "stitch/stitch-core", - "stitch/stitch-core/src/main/scala/com/twitter/stitch", - "tweetypie/server/src/main/scala/com/twitter/tweetypie", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/media", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/repository", - "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", - "tweetypie/server/src/main/thrift:compiled-scala", - "tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields", - "tweetypie/common/src/scala/com/twitter/tweetypie/client_id", - "tweetypie/common/src/scala/com/twitter/tweetypie/media", - "tweetypie/common/src/scala/com/twitter/tweetypie/storage", - "tweetypie/common/src/scala/com/twitter/tweetypie/tflock", - "tweetypie/common/src/scala/com/twitter/tweetypie/util", - "twitter-context", - "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", - "util/util-stats/src/main/scala", - ], -) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/BUILD.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/BUILD.docx new file mode 100644 index 000000000..aa2065288 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/BUILD.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/CachingTweetStore.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/CachingTweetStore.docx new file mode 100644 index 000000000..d16d2cc61 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/CachingTweetStore.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/CachingTweetStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/CachingTweetStore.scala deleted file mode 100644 index 2f4dd6387..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/CachingTweetStore.scala +++ /dev/null @@ -1,420 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.fasterxml.jackson.databind.ObjectMapper -import com.fasterxml.jackson.module.scala.DefaultScalaModule -import com.twitter.scrooge.TFieldBlob -import com.twitter.servo.cache.LockingCache._ -import com.twitter.servo.cache._ -import com.twitter.tweetypie.additionalfields.AdditionalFields -import com.twitter.tweetypie.repository.CachedBounceDeleted.isBounceDeleted -import com.twitter.tweetypie.repository.CachedBounceDeleted.toBounceDeletedCachedTweet -import com.twitter.tweetypie.repository._ -import com.twitter.tweetypie.store.TweetUpdate._ -import com.twitter.tweetypie.thriftscala._ -import com.twitter.util.Time -import diffshow.DiffShow - -trait CachingTweetStore - extends TweetStoreBase[CachingTweetStore] - with InsertTweet.Store - with ReplicatedInsertTweet.Store - with DeleteTweet.Store - with AsyncDeleteTweet.Store - with ReplicatedDeleteTweet.Store - with UndeleteTweet.Store - with AsyncUndeleteTweet.Store - with ReplicatedUndeleteTweet.Store - with SetAdditionalFields.Store - with ReplicatedSetAdditionalFields.Store - with DeleteAdditionalFields.Store - with AsyncDeleteAdditionalFields.Store - with ReplicatedDeleteAdditionalFields.Store - with ScrubGeo.Store - with ReplicatedScrubGeo.Store - with Takedown.Store - with ReplicatedTakedown.Store - with Flush.Store - with UpdatePossiblySensitiveTweet.Store - with AsyncUpdatePossiblySensitiveTweet.Store - with ReplicatedUpdatePossiblySensitiveTweet.Store { - def wrap(w: TweetStore.Wrap): CachingTweetStore = - new TweetStoreWrapper(w, this) - with CachingTweetStore - with InsertTweet.StoreWrapper - with ReplicatedInsertTweet.StoreWrapper - with DeleteTweet.StoreWrapper - with AsyncDeleteTweet.StoreWrapper - with ReplicatedDeleteTweet.StoreWrapper - with UndeleteTweet.StoreWrapper - with AsyncUndeleteTweet.StoreWrapper - with ReplicatedUndeleteTweet.StoreWrapper - with SetAdditionalFields.StoreWrapper - with ReplicatedSetAdditionalFields.StoreWrapper - with DeleteAdditionalFields.StoreWrapper - with AsyncDeleteAdditionalFields.StoreWrapper - with ReplicatedDeleteAdditionalFields.StoreWrapper - with ScrubGeo.StoreWrapper - with ReplicatedScrubGeo.StoreWrapper - with Takedown.StoreWrapper - with ReplicatedTakedown.StoreWrapper - with Flush.StoreWrapper - with UpdatePossiblySensitiveTweet.StoreWrapper - with AsyncUpdatePossiblySensitiveTweet.StoreWrapper - with ReplicatedUpdatePossiblySensitiveTweet.StoreWrapper -} - -object CachingTweetStore { - val Action: AsyncWriteAction.CacheUpdate.type = AsyncWriteAction.CacheUpdate - - def apply( - tweetCache: LockingCache[TweetKey, Cached[CachedTweet]], - tweetKeyFactory: TweetKeyFactory, - stats: StatsReceiver - ): CachingTweetStore = { - val ops = - new CachingTweetStoreOps( - tweetCache, - tweetKeyFactory, - stats - ) - - new CachingTweetStore { - override val insertTweet: FutureEffect[InsertTweet.Event] = { - FutureEffect[InsertTweet.Event](e => - ops.insertTweet(e.internalTweet, e.initialTweetUpdateRequest)) - } - - override val replicatedInsertTweet: FutureEffect[ReplicatedInsertTweet.Event] = - FutureEffect[ReplicatedInsertTweet.Event](e => - ops.insertTweet(e.cachedTweet, e.initialTweetUpdateRequest)) - - override val deleteTweet: FutureEffect[DeleteTweet.Event] = - FutureEffect[DeleteTweet.Event](e => - ops.deleteTweet(e.tweet.id, updateOnly = true, isBounceDelete = e.isBounceDelete)) - - override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = - FutureEffect[AsyncDeleteTweet.Event](e => - ops.deleteTweet(e.tweet.id, updateOnly = true, isBounceDelete = e.isBounceDelete)) - - override val retryAsyncDeleteTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncDeleteTweet.Event] - ] = - TweetStore.retry(Action, asyncDeleteTweet) - - override val replicatedDeleteTweet: FutureEffect[ReplicatedDeleteTweet.Event] = - FutureEffect[ReplicatedDeleteTweet.Event](e => - ops.deleteTweet( - tweetId = e.tweet.id, - updateOnly = e.isErasure, - isBounceDelete = e.isBounceDelete - )) - - override val undeleteTweet: FutureEffect[UndeleteTweet.Event] = - FutureEffect[UndeleteTweet.Event](e => ops.undeleteTweet(e.internalTweet)) - - override val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = - FutureEffect[AsyncUndeleteTweet.Event](e => ops.undeleteTweet(e.cachedTweet)) - - override val retryAsyncUndeleteTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncUndeleteTweet.Event] - ] = - TweetStore.retry(Action, asyncUndeleteTweet) - - override val replicatedUndeleteTweet: FutureEffect[ReplicatedUndeleteTweet.Event] = - FutureEffect[ReplicatedUndeleteTweet.Event](e => ops.undeleteTweet(e.cachedTweet)) - - override val setAdditionalFields: FutureEffect[SetAdditionalFields.Event] = - FutureEffect[SetAdditionalFields.Event](e => ops.setAdditionalFields(e.additionalFields)) - - override val replicatedSetAdditionalFields: FutureEffect[ - ReplicatedSetAdditionalFields.Event - ] = - FutureEffect[ReplicatedSetAdditionalFields.Event](e => - ops.setAdditionalFields(e.additionalFields)) - - override val deleteAdditionalFields: FutureEffect[DeleteAdditionalFields.Event] = - FutureEffect[DeleteAdditionalFields.Event](e => - ops.deleteAdditionalFields(e.tweetId, e.fieldIds)) - - override val asyncDeleteAdditionalFields: FutureEffect[AsyncDeleteAdditionalFields.Event] = - FutureEffect[AsyncDeleteAdditionalFields.Event](e => - ops.deleteAdditionalFields(e.tweetId, e.fieldIds)) - - override val retryAsyncDeleteAdditionalFields: FutureEffect[ - TweetStoreRetryEvent[AsyncDeleteAdditionalFields.Event] - ] = - TweetStore.retry(Action, asyncDeleteAdditionalFields) - - override val replicatedDeleteAdditionalFields: FutureEffect[ - ReplicatedDeleteAdditionalFields.Event - ] = - FutureEffect[ReplicatedDeleteAdditionalFields.Event](e => - ops.deleteAdditionalFields(e.tweetId, e.fieldIds)) - - override val scrubGeo: FutureEffect[ScrubGeo.Event] = - FutureEffect[ScrubGeo.Event](e => ops.scrubGeo(e.tweetIds)) - - override val replicatedScrubGeo: FutureEffect[ReplicatedScrubGeo.Event] = - FutureEffect[ReplicatedScrubGeo.Event](e => ops.scrubGeo(e.tweetIds)) - - override val takedown: FutureEffect[Takedown.Event] = - FutureEffect[Takedown.Event](e => ops.takedown(e.tweet)) - - override val replicatedTakedown: FutureEffect[ReplicatedTakedown.Event] = - FutureEffect[ReplicatedTakedown.Event](e => ops.takedown(e.tweet)) - - override val flush: FutureEffect[Flush.Event] = - FutureEffect[Flush.Event](e => ops.flushTweets(e.tweetIds, logExisting = e.logExisting)) - .onlyIf(_.flushTweets) - - override val updatePossiblySensitiveTweet: FutureEffect[UpdatePossiblySensitiveTweet.Event] = - FutureEffect[UpdatePossiblySensitiveTweet.Event](e => ops.updatePossiblySensitive(e.tweet)) - - override val replicatedUpdatePossiblySensitiveTweet: FutureEffect[ - ReplicatedUpdatePossiblySensitiveTweet.Event - ] = - FutureEffect[ReplicatedUpdatePossiblySensitiveTweet.Event](e => - ops.updatePossiblySensitive(e.tweet)) - - override val asyncUpdatePossiblySensitiveTweet: FutureEffect[ - AsyncUpdatePossiblySensitiveTweet.Event - ] = - FutureEffect[AsyncUpdatePossiblySensitiveTweet.Event](e => - ops.updatePossiblySensitive(e.tweet)) - - override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncUpdatePossiblySensitiveTweet.Event] - ] = - TweetStore.retry(Action, asyncUpdatePossiblySensitiveTweet) - } - } -} - -private class CachingTweetStoreOps( - tweetCache: LockingCache[TweetKey, Cached[CachedTweet]], - tweetKeyFactory: TweetKeyFactory, - stats: StatsReceiver, - evictionRetries: Int = 3) { - type CachedTweetHandler = Handler[Cached[CachedTweet]] - - private val preferNewestPicker = new PreferNewestCached[CachedTweet] - - private val evictionFailedCounter = stats.counter("eviction_failures") - - private val cacheFlushesLog = Logger("com.twitter.tweetypie.store.CacheFlushesLog") - - private[this] val mapper = new ObjectMapper().registerModule(DefaultScalaModule) - - /** - * Inserts a tweet into cache, recording all compiled additional fields and all - * included passthrough fields. Additionally if the insertion event contains - * a 'InitialTweetUpdateRequest` we will update the cache entry for this tweet's - * initialTweet. - */ - def insertTweet( - ct: CachedTweet, - initialTweetUpdateRequest: Option[InitialTweetUpdateRequest] - ): Future[Unit] = - lockAndSet( - ct.tweet.id, - insertTweetHandler(ct) - ).flatMap { _ => - initialTweetUpdateRequest match { - case Some(request) => - lockAndSet( - request.initialTweetId, - updateTweetHandler(tweet => InitialTweetUpdate.updateTweet(tweet, request)) - ) - case None => - Future.Unit - } - } - - /** - * Writes a `deleted` tombstone to cache. If `updateOnly` is true, then we only - * write the tombstone if the tweet is already in cache. If `isBounceDelete` we - * write a special bounce-deleted CachedTweet record to cache. - */ - def deleteTweet(tweetId: TweetId, updateOnly: Boolean, isBounceDelete: Boolean): Future[Unit] = { - // We only need to store a CachedTweet value the tweet is bounce-deleted to support rendering - // timeline tombstones for tweets that violated the Twitter Rules. see go/bounced-tweet - val cachedValue = if (isBounceDelete) { - found(toBounceDeletedCachedTweet(tweetId)) - } else { - writeThroughCached[CachedTweet](None, CachedValueStatus.Deleted) - } - - val pickerHandler = - if (updateOnly) { - deleteTweetUpdateOnlyHandler(cachedValue) - } else { - deleteTweetHandler(cachedValue) - } - - lockAndSet(tweetId, pickerHandler) - } - - def undeleteTweet(ct: CachedTweet): Future[Unit] = - lockAndSet( - ct.tweet.id, - insertTweetHandler(ct) - ) - - def setAdditionalFields(tweet: Tweet): Future[Unit] = - lockAndSet(tweet.id, setFieldsHandler(AdditionalFields.additionalFields(tweet))) - - def deleteAdditionalFields(tweetId: TweetId, fieldIds: Seq[FieldId]): Future[Unit] = - lockAndSet(tweetId, deleteFieldsHandler(fieldIds)) - - def scrubGeo(tweetIds: Seq[TweetId]): Future[Unit] = - Future.join { - tweetIds.map { id => - // First, attempt to modify any tweets that are in cache to - // avoid having to reload the cached tweet from storage. - lockAndSet(id, scrubGeoHandler).unit.rescue { - case _: OptimisticLockingCache.LockAndSetFailure => - // If the modification fails, then remove whatever is in - // cache. This is much more likely to succeed because it - // does not require multiple successful requests to cache. - // This will force the tweet to be loaded from storage the - // next time it is requested, and the stored tweet will have - // the geo information removed. - // - // This eviction path was added due to frequent failures of - // the in-place modification code path, causing geoscrub - // daemon tasks to fail. - evictOne(tweetKeyFactory.fromId(id), evictionRetries) - } - } - } - - def takedown(tweet: Tweet): Future[Unit] = - lockAndSet(tweet.id, updateCachedTweetHandler(copyTakedownFieldsForUpdate(tweet))) - - def updatePossiblySensitive(tweet: Tweet): Future[Unit] = - lockAndSet(tweet.id, updateTweetHandler(copyNsfwFieldsForUpdate(tweet))) - - def flushTweets(tweetIds: Seq[TweetId], logExisting: Boolean = false): Future[Unit] = { - val tweetKeys = tweetIds.map(tweetKeyFactory.fromId) - - Future.when(logExisting) { logExistingValues(tweetKeys) }.ensure { - evictAll(tweetKeys) - } - } - - /** - * A LockingCache.Handler that inserts a tweet into cache. - */ - private def insertTweetHandler(newValue: CachedTweet): Handler[Cached[CachedTweet]] = - AlwaysSetHandler(Some(writeThroughCached(Some(newValue), CachedValueStatus.Found))) - - private def foundAndNotBounced(c: Cached[CachedTweet]) = - c.status == CachedValueStatus.Found && !isBounceDeleted(c) - - /** - * A LockingCache.Handler that updates an existing CachedTweet in cache. - */ - private def updateTweetHandler(update: Tweet => Tweet): CachedTweetHandler = - inCache => - for { - cached <- inCache.filter(foundAndNotBounced) - cachedTweet <- cached.value - updatedTweet = update(cachedTweet.tweet) - } yield found(cachedTweet.copy(tweet = updatedTweet)) - - /** - * A LockingCache.Handler that updates an existing CachedTweet in cache. - */ - private def updateCachedTweetHandler(update: CachedTweet => CachedTweet): CachedTweetHandler = - inCache => - for { - cached <- inCache.filter(foundAndNotBounced) - cachedTweet <- cached.value - updatedCachedTweet = update(cachedTweet) - } yield found(updatedCachedTweet) - - private def deleteTweetHandler(value: Cached[CachedTweet]): CachedTweetHandler = - PickingHandler(value, preferNewestPicker) - - private def deleteTweetUpdateOnlyHandler(value: Cached[CachedTweet]): CachedTweetHandler = - UpdateOnlyPickingHandler(value, preferNewestPicker) - - private def setFieldsHandler(additional: Seq[TFieldBlob]): CachedTweetHandler = - inCache => - for { - cached <- inCache.filter(foundAndNotBounced) - cachedTweet <- cached.value - updatedTweet = AdditionalFields.setAdditionalFields(cachedTweet.tweet, additional) - updatedCachedTweet = CachedTweet(updatedTweet) - } yield found(updatedCachedTweet) - - private def deleteFieldsHandler(fieldIds: Seq[FieldId]): CachedTweetHandler = - inCache => - for { - cached <- inCache.filter(foundAndNotBounced) - cachedTweet <- cached.value - updatedTweet = AdditionalFields.unsetFields(cachedTweet.tweet, fieldIds) - scrubbedCachedTweet = cachedTweet.copy(tweet = updatedTweet) - } yield found(scrubbedCachedTweet) - - private val scrubGeoHandler: CachedTweetHandler = - inCache => - for { - cached <- inCache.filter(foundAndNotBounced) - cachedTweet <- cached.value - tweet = cachedTweet.tweet - coreData <- tweet.coreData if hasGeo(tweet) - scrubbedCoreData = coreData.copy(coordinates = None, placeId = None) - scrubbedTweet = tweet.copy(coreData = Some(scrubbedCoreData), place = None) - scrubbedCachedTweet = cachedTweet.copy(tweet = scrubbedTweet) - } yield found(scrubbedCachedTweet) - - private def evictOne(key: TweetKey, tries: Int): Future[Int] = - tweetCache.delete(key).transform { - case Throw(_) if tries > 1 => evictOne(key, tries - 1) - case Throw(_) => Future.value(1) - case Return(_) => Future.value(0) - } - - private def evictAll(keys: Seq[TweetKey]): Future[Unit] = - Future - .collect { - keys.map(evictOne(_, evictionRetries)) - } - .onSuccess { (failures: Seq[Int]) => evictionFailedCounter.incr(failures.sum) } - .unit - - private def logExistingValues(keys: Seq[TweetKey]): Future[Unit] = - tweetCache - .get(keys) - .map { existing => - for { - (key, cached) <- existing.found - cachedTweet <- cached.value - tweet = cachedTweet.tweet - } yield { - cacheFlushesLog.info( - mapper.writeValueAsString( - Map( - "key" -> key, - "tweet_id" -> tweet.id, - "tweet" -> DiffShow.show(tweet) - ) - ) - ) - } - } - .unit - - private def found(value: CachedTweet): Cached[CachedTweet] = - writeThroughCached(Some(value), CachedValueStatus.Found) - - private def writeThroughCached[V](value: Option[V], status: CachedValueStatus): Cached[V] = { - val now = Time.now - Cached(value, status, now, None, Some(now)) - } - - private def lockAndSet(tweetId: TweetId, handler: LockingCache.Handler[Cached[CachedTweet]]) = - tweetCache.lockAndSet(tweetKeyFactory.fromId(tweetId), handler).unit -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteAdditionalFields.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteAdditionalFields.docx new file mode 100644 index 000000000..c522425b0 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteAdditionalFields.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteAdditionalFields.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteAdditionalFields.scala deleted file mode 100644 index 726745b7e..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteAdditionalFields.scala +++ /dev/null @@ -1,172 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.tweetypie.thriftscala._ - -object DeleteAdditionalFields extends TweetStore.SyncModule { - - case class Event(tweetId: TweetId, fieldIds: Seq[FieldId], userId: UserId, timestamp: Time) - extends SyncTweetStoreEvent("delete_additional_fields") { - - def toAsyncRequest: AsyncDeleteAdditionalFieldsRequest = - AsyncDeleteAdditionalFieldsRequest( - tweetId = tweetId, - fieldIds = fieldIds, - userId = userId, - timestamp = timestamp.inMillis - ) - } - - trait Store { - val deleteAdditionalFields: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val deleteAdditionalFields: FutureEffect[Event] = wrap( - underlying.deleteAdditionalFields) - } - - object Store { - def apply( - cachingTweetStore: CachingTweetStore, - asyncEnqueueStore: AsyncEnqueueStore, - logLensStore: LogLensStore - ): Store = - new Store { - override val deleteAdditionalFields: FutureEffect[Event] = - FutureEffect.inParallel( - // ignore failures deleting from cache, will be retried in async-path - cachingTweetStore.ignoreFailures.deleteAdditionalFields, - asyncEnqueueStore.deleteAdditionalFields, - logLensStore.deleteAdditionalFields - ) - } - } -} - -object AsyncDeleteAdditionalFields extends TweetStore.AsyncModule { - - object Event { - def fromAsyncRequest( - request: AsyncDeleteAdditionalFieldsRequest, - user: User - ): TweetStoreEventOrRetry[Event] = - TweetStoreEventOrRetry( - Event( - tweetId = request.tweetId, - fieldIds = request.fieldIds, - userId = request.userId, - optUser = Some(user), - timestamp = Time.fromMilliseconds(request.timestamp) - ), - request.retryAction, - RetryEvent - ) - } - - case class Event( - tweetId: TweetId, - fieldIds: Seq[FieldId], - userId: UserId, - optUser: Option[User], - timestamp: Time) - extends AsyncTweetStoreEvent("async_delete_additional_fields") - with TweetStoreTweetEvent { - - def toAsyncRequest( - action: Option[AsyncWriteAction] = None - ): AsyncDeleteAdditionalFieldsRequest = - AsyncDeleteAdditionalFieldsRequest( - tweetId = tweetId, - fieldIds = fieldIds, - userId = userId, - timestamp = timestamp.inMillis, - retryAction = action - ) - - override def toTweetEventData: Seq[TweetEventData] = - Seq( - TweetEventData.AdditionalFieldDeleteEvent( - AdditionalFieldDeleteEvent( - deletedFields = Map(tweetId -> fieldIds), - userId = optUser.map(_.id) - ) - ) - ) - - override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = - service.asyncDeleteAdditionalFields(toAsyncRequest(Some(action))) - } - - case class RetryEvent(action: AsyncWriteAction, event: Event) - extends TweetStoreRetryEvent[Event] { - - override val eventType: AsyncWriteEventType.DeleteAdditionalFields.type = - AsyncWriteEventType.DeleteAdditionalFields - override val scribedTweetOnFailure: None.type = None - } - - trait Store { - val asyncDeleteAdditionalFields: FutureEffect[Event] - val retryAsyncDeleteAdditionalFields: FutureEffect[TweetStoreRetryEvent[Event]] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val asyncDeleteAdditionalFields: FutureEffect[Event] = wrap( - underlying.asyncDeleteAdditionalFields) - override val retryAsyncDeleteAdditionalFields: FutureEffect[TweetStoreRetryEvent[Event]] = wrap( - underlying.retryAsyncDeleteAdditionalFields - ) - } - - object Store { - def apply( - manhattanStore: ManhattanTweetStore, - cachingTweetStore: CachingTweetStore, - replicatingStore: ReplicatingTweetStore, - eventBusEnqueueStore: TweetEventBusStore - ): Store = { - val stores: Seq[Store] = - Seq( - manhattanStore, - cachingTweetStore, - replicatingStore, - eventBusEnqueueStore - ) - - def build[E <: TweetStoreEvent](extract: Store => FutureEffect[E]): FutureEffect[E] = - FutureEffect.inParallel[E](stores.map(extract): _*) - - new Store { - override val asyncDeleteAdditionalFields: FutureEffect[Event] = build( - _.asyncDeleteAdditionalFields) - override val retryAsyncDeleteAdditionalFields: FutureEffect[TweetStoreRetryEvent[Event]] = - build(_.retryAsyncDeleteAdditionalFields) - } - } - } -} - -object ReplicatedDeleteAdditionalFields extends TweetStore.ReplicatedModule { - - case class Event(tweetId: TweetId, fieldIds: Seq[FieldId]) - extends ReplicatedTweetStoreEvent("replicated_delete_additional_fields") - - trait Store { - val replicatedDeleteAdditionalFields: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val replicatedDeleteAdditionalFields: FutureEffect[Event] = - wrap(underlying.replicatedDeleteAdditionalFields) - } - - object Store { - def apply(cachingTweetStore: CachingTweetStore): Store = { - new Store { - override val replicatedDeleteAdditionalFields: FutureEffect[Event] = - cachingTweetStore.replicatedDeleteAdditionalFields - } - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteTweet.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteTweet.docx new file mode 100644 index 000000000..022251015 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteTweet.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteTweet.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteTweet.scala deleted file mode 100644 index c2b315d27..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteTweet.scala +++ /dev/null @@ -1,221 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.tweetypie.store.TweetEventDataScrubber.scrub -import com.twitter.tweetypie.thriftscala._ - -object DeleteTweet extends TweetStore.SyncModule { - case class Event( - tweet: Tweet, - timestamp: Time, - user: Option[User] = None, - byUserId: Option[UserId] = None, - auditPassthrough: Option[AuditDeleteTweet] = None, - cascadedFromTweetId: Option[TweetId] = None, - isUserErasure: Boolean = false, - isBounceDelete: Boolean = false, - isLastQuoteOfQuoter: Boolean = false, - isAdminDelete: Boolean) - extends SyncTweetStoreEvent("delete_tweet") { - - def toAsyncRequest: AsyncDeleteRequest = - AsyncDeleteRequest( - tweet = tweet, - user = user, - byUserId = byUserId, - timestamp = timestamp.inMillis, - auditPassthrough = auditPassthrough, - cascadedFromTweetId = cascadedFromTweetId, - isUserErasure = isUserErasure, - isBounceDelete = isBounceDelete, - isLastQuoteOfQuoter = Some(isLastQuoteOfQuoter), - isAdminDelete = Some(isAdminDelete) - ) - } - - trait Store { - val deleteTweet: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val deleteTweet: FutureEffect[Event] = wrap(underlying.deleteTweet) - } - - object Store { - def apply( - cachingTweetStore: CachingTweetStore, - asyncEnqueueStore: AsyncEnqueueStore, - userCountsUpdatingStore: GizmoduckUserCountsUpdatingStore, - tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore, - logLensStore: LogLensStore - ): Store = - new Store { - override val deleteTweet: FutureEffect[Event] = - FutureEffect.inParallel( - cachingTweetStore.ignoreFailures.deleteTweet, - asyncEnqueueStore.deleteTweet, - userCountsUpdatingStore.deleteTweet, - tweetCountsUpdatingStore.deleteTweet, - logLensStore.deleteTweet - ) - } - } -} - -object AsyncDeleteTweet extends TweetStore.AsyncModule { - - object Event { - def fromAsyncRequest(request: AsyncDeleteRequest): TweetStoreEventOrRetry[Event] = - TweetStoreEventOrRetry( - AsyncDeleteTweet.Event( - tweet = request.tweet, - timestamp = Time.fromMilliseconds(request.timestamp), - optUser = request.user, - byUserId = request.byUserId, - auditPassthrough = request.auditPassthrough, - cascadedFromTweetId = request.cascadedFromTweetId, - isUserErasure = request.isUserErasure, - isBounceDelete = request.isBounceDelete, - isLastQuoteOfQuoter = request.isLastQuoteOfQuoter.getOrElse(false), - isAdminDelete = request.isAdminDelete.getOrElse(false) - ), - request.retryAction, - RetryEvent - ) - } - - case class Event( - tweet: Tweet, - timestamp: Time, - optUser: Option[User] = None, - byUserId: Option[UserId] = None, - auditPassthrough: Option[AuditDeleteTweet] = None, - cascadedFromTweetId: Option[TweetId] = None, - isUserErasure: Boolean = false, - isBounceDelete: Boolean, - isLastQuoteOfQuoter: Boolean = false, - isAdminDelete: Boolean) - extends AsyncTweetStoreEvent("async_delete_tweet") - with TweetStoreTweetEvent { - val tweetEventTweetId: TweetId = tweet.id - - def toAsyncRequest(action: Option[AsyncWriteAction] = None): AsyncDeleteRequest = - AsyncDeleteRequest( - tweet = tweet, - user = optUser, - byUserId = byUserId, - timestamp = timestamp.inMillis, - auditPassthrough = auditPassthrough, - cascadedFromTweetId = cascadedFromTweetId, - retryAction = action, - isUserErasure = isUserErasure, - isBounceDelete = isBounceDelete, - isLastQuoteOfQuoter = Some(isLastQuoteOfQuoter), - isAdminDelete = Some(isAdminDelete) - ) - - override def toTweetEventData: Seq[TweetEventData] = - Seq( - TweetEventData.TweetDeleteEvent( - TweetDeleteEvent( - tweet = scrub(tweet), - user = optUser, - isUserErasure = Some(isUserErasure), - audit = auditPassthrough, - byUserId = byUserId, - isAdminDelete = Some(isAdminDelete) - ) - ) - ) - - override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = - service.asyncDelete(toAsyncRequest(Some(action))) - } - - case class RetryEvent(action: AsyncWriteAction, event: Event) - extends TweetStoreRetryEvent[Event] { - - override val eventType: AsyncWriteEventType.Delete.type = AsyncWriteEventType.Delete - override val scribedTweetOnFailure: Option[Tweet] = Some(event.tweet) - } - - trait Store { - val asyncDeleteTweet: FutureEffect[Event] - val retryAsyncDeleteTweet: FutureEffect[TweetStoreRetryEvent[Event]] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val asyncDeleteTweet: FutureEffect[Event] = wrap(underlying.asyncDeleteTweet) - override val retryAsyncDeleteTweet: FutureEffect[TweetStoreRetryEvent[Event]] = wrap( - underlying.retryAsyncDeleteTweet) - } - - object Store { - def apply( - manhattanStore: ManhattanTweetStore, - cachingTweetStore: CachingTweetStore, - replicatingStore: ReplicatingTweetStore, - indexingStore: TweetIndexingStore, - eventBusEnqueueStore: TweetEventBusStore, - timelineUpdatingStore: TlsTimelineUpdatingStore, - tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore, - guanoServiceStore: GuanoServiceStore, - mediaServiceStore: MediaServiceStore - ): Store = { - val stores: Seq[Store] = - Seq( - manhattanStore, - cachingTweetStore, - replicatingStore, - indexingStore, - eventBusEnqueueStore, - timelineUpdatingStore, - tweetCountsUpdatingStore, - guanoServiceStore, - mediaServiceStore - ) - - def build[E <: TweetStoreEvent](extract: Store => FutureEffect[E]): FutureEffect[E] = - FutureEffect.inParallel[E](stores.map(extract): _*) - - new Store { - override val asyncDeleteTweet: FutureEffect[Event] = build(_.asyncDeleteTweet) - override val retryAsyncDeleteTweet: FutureEffect[TweetStoreRetryEvent[Event]] = build( - _.retryAsyncDeleteTweet) - } - } - } -} - -object ReplicatedDeleteTweet extends TweetStore.ReplicatedModule { - - case class Event( - tweet: Tweet, - isErasure: Boolean, - isBounceDelete: Boolean, - isLastQuoteOfQuoter: Boolean = false) - extends ReplicatedTweetStoreEvent("replicated_delete_tweet") - - trait Store { - val replicatedDeleteTweet: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val replicatedDeleteTweet: FutureEffect[Event] = wrap(underlying.replicatedDeleteTweet) - } - - object Store { - def apply( - cachingTweetStore: CachingTweetStore, - tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore - ): Store = { - new Store { - override val replicatedDeleteTweet: FutureEffect[Event] = - FutureEffect.inParallel( - cachingTweetStore.replicatedDeleteTweet, - tweetCountsUpdatingStore.replicatedDeleteTweet.ignoreFailures - ) - } - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/FanoutServiceStore.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/FanoutServiceStore.docx new file mode 100644 index 000000000..069607bd7 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/FanoutServiceStore.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/FanoutServiceStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/FanoutServiceStore.scala deleted file mode 100644 index ad0104acd..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/FanoutServiceStore.scala +++ /dev/null @@ -1,38 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.timelineservice.fanout.thriftscala.FanoutService -import com.twitter.tweetypie.thriftscala._ - -trait FanoutServiceStore extends TweetStoreBase[FanoutServiceStore] with AsyncInsertTweet.Store { - def wrap(w: TweetStore.Wrap): FanoutServiceStore = - new TweetStoreWrapper(w, this) with FanoutServiceStore with AsyncInsertTweet.StoreWrapper -} - -object FanoutServiceStore { - val Action: AsyncWriteAction.FanoutDelivery.type = AsyncWriteAction.FanoutDelivery - - def apply( - fanoutClient: FanoutService.MethodPerEndpoint, - stats: StatsReceiver - ): FanoutServiceStore = - new FanoutServiceStore { - override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = - FutureEffect[AsyncInsertTweet.Event] { event => - fanoutClient.tweetCreateEvent2( - TweetCreateEvent( - tweet = event.tweet, - user = event.user, - sourceTweet = event.sourceTweet, - sourceUser = event.sourceUser, - additionalContext = event.additionalContext, - transientContext = event.transientContext - ) - ) - } - - override val retryAsyncInsertTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncInsertTweet.Event] - ] = TweetStore.retry(Action, asyncInsertTweet) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Flush.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Flush.docx new file mode 100644 index 000000000..39e1f1252 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Flush.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Flush.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Flush.scala deleted file mode 100644 index 83fbc12af..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Flush.scala +++ /dev/null @@ -1,34 +0,0 @@ -package com.twitter.tweetypie -package store - -object Flush extends TweetStore.SyncModule { - - case class Event( - tweetIds: Seq[TweetId], - flushTweets: Boolean = true, - flushCounts: Boolean = true, - logExisting: Boolean = true) - extends SyncTweetStoreEvent("flush") - - trait Store { - val flush: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val flush: FutureEffect[Event] = wrap(underlying.flush) - } - - object Store { - def apply( - cachingTweetStore: CachingTweetStore, - tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore - ): Store = - new Store { - override val flush: FutureEffect[Event] = - FutureEffect.inParallel( - cachingTweetStore.flush, - tweetCountsUpdatingStore.flush - ) - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GeoSearchRequestIDStore.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GeoSearchRequestIDStore.docx new file mode 100644 index 000000000..3996e0600 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GeoSearchRequestIDStore.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GeoSearchRequestIDStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GeoSearchRequestIDStore.scala deleted file mode 100644 index be29aba1e..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GeoSearchRequestIDStore.scala +++ /dev/null @@ -1,72 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.geoduck.backend.relevance.thriftscala.ReportFailure -import com.twitter.geoduck.backend.relevance.thriftscala.ReportResult -import com.twitter.geoduck.backend.relevance.thriftscala.ConversionReport -import com.twitter.geoduck.backend.searchrequestid.thriftscala.SearchRequestID -import com.twitter.geoduck.backend.tweetid.thriftscala.TweetID -import com.twitter.geoduck.common.thriftscala.GeoduckException -import com.twitter.geoduck.service.identifier.thriftscala.PlaceIdentifier -import com.twitter.servo.util.FutureArrow -import com.twitter.tweetypie.thriftscala._ - -trait GeoSearchRequestIDStore - extends TweetStoreBase[GeoSearchRequestIDStore] - with AsyncInsertTweet.Store { - def wrap(w: TweetStore.Wrap): GeoSearchRequestIDStore = - new TweetStoreWrapper[GeoSearchRequestIDStore](w, this) - with GeoSearchRequestIDStore - with AsyncInsertTweet.StoreWrapper -} - -object GeoSearchRequestIDStore { - type ConversionReporter = FutureArrow[ConversionReport, ReportResult] - - val Action: AsyncWriteAction.GeoSearchRequestId.type = AsyncWriteAction.GeoSearchRequestId - private val log = Logger(getClass) - - object FailureHandler { - def translateException(failure: ReportResult.Failure): GeoduckException = { - failure.failure match { - case ReportFailure.Failure(exception) => exception - case _ => GeoduckException("Unknown failure: " + failure.toString) - } - } - } - - def apply(conversionReporter: ConversionReporter): GeoSearchRequestIDStore = - new GeoSearchRequestIDStore { - - val conversionEffect: FutureEffect[ConversionReport] = - FutureEffect - .fromPartial[ReportResult] { - case unionFailure: ReportResult.Failure => - Future.exception(FailureHandler.translateException(unionFailure)) - } - .contramapFuture(conversionReporter) - - override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = - conversionEffect.contramapOption[AsyncInsertTweet.Event] { event => - for { - isUserProtected <- event.user.safety.map(_.isProtected) - geoSearchRequestID <- event.geoSearchRequestId - placeType <- event.tweet.place.map(_.`type`) - placeId <- event.tweet.coreData.flatMap(_.placeId) - placeIdLong <- Try(java.lang.Long.parseUnsignedLong(placeId, 16)).toOption - if placeType == PlaceType.Poi && isUserProtected == false - } yield { - ConversionReport( - requestID = SearchRequestID(requestID = geoSearchRequestID), - tweetID = TweetID(event.tweet.id), - placeID = PlaceIdentifier(placeIdLong) - ) - } - } - - override val retryAsyncInsertTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncInsertTweet.Event] - ] = - TweetStore.retry(Action, asyncInsertTweet) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserCountsUpdatingStore.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserCountsUpdatingStore.docx new file mode 100644 index 000000000..cd55e7038 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserCountsUpdatingStore.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserCountsUpdatingStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserCountsUpdatingStore.scala deleted file mode 100644 index 4ddc40dc2..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserCountsUpdatingStore.scala +++ /dev/null @@ -1,48 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.gizmoduck.thriftscala.{CountsUpdateField => Field} -import com.twitter.tweetypie.backends.Gizmoduck - -trait GizmoduckUserCountsUpdatingStore - extends TweetStoreBase[GizmoduckUserCountsUpdatingStore] - with InsertTweet.Store - with DeleteTweet.Store { - def wrap(w: TweetStore.Wrap): GizmoduckUserCountsUpdatingStore = - new TweetStoreWrapper(w, this) - with GizmoduckUserCountsUpdatingStore - with InsertTweet.StoreWrapper - with DeleteTweet.StoreWrapper -} - -/** - * A TweetStore implementation that sends user-specific count updates to Gizmoduck. - */ -object GizmoduckUserCountsUpdatingStore { - def isUserTweet(tweet: Tweet): Boolean = - !TweetLenses.nullcast.get(tweet) && TweetLenses.narrowcast.get(tweet).isEmpty - - def apply( - incr: Gizmoduck.IncrCount, - hasMedia: Tweet => Boolean - ): GizmoduckUserCountsUpdatingStore = { - def incrField(field: Field, amt: Int): FutureEffect[Tweet] = - FutureEffect[Tweet](tweet => incr((getUserId(tweet), field, amt))) - - def incrAll(amt: Int): FutureEffect[Tweet] = - FutureEffect.inParallel( - incrField(Field.Tweets, amt).onlyIf(isUserTweet), - incrField(Field.MediaTweets, amt).onlyIf(t => isUserTweet(t) && hasMedia(t)) - ) - - new GizmoduckUserCountsUpdatingStore { - override val insertTweet: FutureEffect[InsertTweet.Event] = - incrAll(1).contramap[InsertTweet.Event](_.tweet) - - override val deleteTweet: FutureEffect[DeleteTweet.Event] = - incrAll(-1) - .contramap[DeleteTweet.Event](_.tweet) - .onlyIf(!_.isUserErasure) - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserGeotagUpdateStore.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserGeotagUpdateStore.docx new file mode 100644 index 000000000..fd4728319 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserGeotagUpdateStore.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserGeotagUpdateStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserGeotagUpdateStore.scala deleted file mode 100644 index fb6c50c4c..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserGeotagUpdateStore.scala +++ /dev/null @@ -1,68 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.gizmoduck.thriftscala.LookupContext -import com.twitter.gizmoduck.thriftscala.ModifiedAccount -import com.twitter.gizmoduck.thriftscala.ModifiedUser -import com.twitter.tweetypie.backends.Gizmoduck -import com.twitter.tweetypie.thriftscala._ - -trait GizmoduckUserGeotagUpdateStore - extends TweetStoreBase[GizmoduckUserGeotagUpdateStore] - with AsyncInsertTweet.Store - with ScrubGeoUpdateUserTimestamp.Store { - def wrap(w: TweetStore.Wrap): GizmoduckUserGeotagUpdateStore = - new TweetStoreWrapper(w, this) - with GizmoduckUserGeotagUpdateStore - with AsyncInsertTweet.StoreWrapper - with ScrubGeoUpdateUserTimestamp.StoreWrapper -} - -/** - * A TweetStore implementation that updates a Gizmoduck user's user_has_geotagged_status flag. - * If a tweet is geotagged and the user's flag is not set, call out to Gizmoduck to update it. - */ -object GizmoduckUserGeotagUpdateStore { - val Action: AsyncWriteAction.UserGeotagUpdate.type = AsyncWriteAction.UserGeotagUpdate - - def apply( - modifyAndGet: Gizmoduck.ModifyAndGet, - stats: StatsReceiver - ): GizmoduckUserGeotagUpdateStore = { - // Counts the number of times that the scrubGeo actually cleared the - // hasGeotaggedStatuses bit for a user. - val clearedCounter = stats.counter("has_geotag_cleared") - - // Counts the number of times that asyncInsertTweet actually set the - // hasGeotaggedStatuses bit for a user. - val setCounter = stats.counter("has_geotag_set") - - def setHasGeotaggedStatuses(value: Boolean): FutureEffect[UserId] = { - val modifiedAccount = ModifiedAccount(hasGeotaggedStatuses = Some(value)) - val modifiedUser = ModifiedUser(account = Some(modifiedAccount)) - FutureEffect(userId => modifyAndGet((LookupContext(), userId, modifiedUser)).unit) - } - - new GizmoduckUserGeotagUpdateStore { - override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = - setHasGeotaggedStatuses(true) - .contramap[AsyncInsertTweet.Event](_.user.id) - .onSuccess(_ => setCounter.incr()) - .onlyIf { e => - // only with geo info and an account that doesn't yet have geotagged statuses flag set - hasGeo(e.tweet) && (e.user.account.exists(!_.hasGeotaggedStatuses)) - } - - override val retryAsyncInsertTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncInsertTweet.Event] - ] = - TweetStore.retry(Action, asyncInsertTweet) - - override val scrubGeoUpdateUserTimestamp: FutureEffect[ScrubGeoUpdateUserTimestamp.Event] = - setHasGeotaggedStatuses(false) - .contramap[ScrubGeoUpdateUserTimestamp.Event](_.userId) - .onlyIf(_.mightHaveGeotaggedStatuses) - .onSuccess(_ => clearedCounter.incr()) - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Guano.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Guano.docx new file mode 100644 index 000000000..6e874e9f5 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Guano.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Guano.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Guano.scala deleted file mode 100644 index d40e6f657..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Guano.scala +++ /dev/null @@ -1,144 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.guano.{thriftscala => guano} -import com.twitter.servo.util.Scribe -import com.twitter.takedown.util.TakedownReasons -import com.twitter.tseng.withholding.thriftscala.TakedownReason -import com.twitter.tweetypie.thriftscala.AuditDeleteTweet - -object Guano { - case class MalwareAttempt( - url: String, - userId: UserId, - clientAppId: Option[Long], - remoteHost: Option[String]) { - def toScribeMessage: guano.ScribeMessage = - guano.ScribeMessage( - `type` = guano.ScribeType.MalwareAttempt, - malwareAttempt = Some( - guano.MalwareAttempt( - timestamp = Time.now.inSeconds, - host = remoteHost, - userId = userId, - url = url, - `type` = guano.MalwareAttemptType.Status, - clientAppId = clientAppId.map(_.toInt) // yikes! - ) - ) - ) - } - - case class DestroyTweet( - tweet: Tweet, - userId: UserId, - byUserId: UserId, - passthrough: Option[AuditDeleteTweet]) { - def toScribeMessage: guano.ScribeMessage = - guano.ScribeMessage( - `type` = guano.ScribeType.DestroyStatus, - destroyStatus = Some( - guano.DestroyStatus( - `type` = Some(guano.DestroyStatusType.Status), - timestamp = Time.now.inSeconds, - userId = userId, - byUserId = byUserId, - statusId = tweet.id, - text = "", - reason = passthrough - .flatMap(_.reason) - .flatMap { r => guano.UserActionReason.valueOf(r.name) } - .orElse(Some(guano.UserActionReason.Other)), - done = passthrough.flatMap(_.done).orElse(Some(true)), - host = passthrough.flatMap(_.host), - bulkId = passthrough.flatMap(_.bulkId), - note = passthrough.flatMap(_.note), - runId = passthrough.flatMap(_.runId), - clientApplicationId = passthrough.flatMap(_.clientApplicationId), - userAgent = passthrough.flatMap(_.userAgent) - ) - ) - ) - } - - case class Takedown( - tweetId: TweetId, - userId: UserId, - reason: TakedownReason, - takendown: Boolean, - note: Option[String], - host: Option[String], - byUserId: Option[UserId]) { - def toScribeMessage: guano.ScribeMessage = - guano.ScribeMessage( - `type` = guano.ScribeType.PctdAction, - pctdAction = Some( - guano.PctdAction( - `type` = guano.PctdActionType.Status, - timestamp = Time.now.inSeconds, - tweetId = Some(tweetId), - userId = userId, - countryCode = - TakedownReasons.reasonToCountryCode.applyOrElse(reason, (_: TakedownReason) => ""), - takendown = takendown, - note = note, - host = host, - byUserId = byUserId.getOrElse(-1L), - reason = Some(reason) - ) - ) - ) - } - - case class UpdatePossiblySensitiveTweet( - tweetId: TweetId, - userId: UserId, - byUserId: UserId, - action: guano.NsfwTweetActionAction, - enabled: Boolean, - host: Option[String], - note: Option[String]) { - def toScribeMessage: guano.ScribeMessage = - guano.ScribeMessage( - `type` = guano.ScribeType.NsfwTweetAction, - nsfwTweetAction = Some( - guano.NsfwTweetAction( - timestamp = Time.now.inSeconds, - host = host, - userId = userId, - byUserId = byUserId, - action = action, - enabled = enabled, - note = note, - tweetId = tweetId - ) - ) - ) - } - - def apply( - scribe: FutureEffect[guano.ScribeMessage] = Scribe(guano.ScribeMessage, - Scribe("trust_eng_audit")) - ): Guano = { - new Guano { - override val scribeMalwareAttempt: FutureEffect[MalwareAttempt] = - scribe.contramap[MalwareAttempt](_.toScribeMessage) - - override val scribeDestroyTweet: FutureEffect[DestroyTweet] = - scribe.contramap[DestroyTweet](_.toScribeMessage) - - override val scribeTakedown: FutureEffect[Takedown] = - scribe.contramap[Takedown](_.toScribeMessage) - - override val scribeUpdatePossiblySensitiveTweet: FutureEffect[UpdatePossiblySensitiveTweet] = - scribe.contramap[UpdatePossiblySensitiveTweet](_.toScribeMessage) - } - } -} - -trait Guano { - val scribeMalwareAttempt: FutureEffect[Guano.MalwareAttempt] - val scribeDestroyTweet: FutureEffect[Guano.DestroyTweet] - val scribeTakedown: FutureEffect[Guano.Takedown] - val scribeUpdatePossiblySensitiveTweet: FutureEffect[Guano.UpdatePossiblySensitiveTweet] -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GuanoServiceStore.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GuanoServiceStore.docx new file mode 100644 index 000000000..0a438b20e Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GuanoServiceStore.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GuanoServiceStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GuanoServiceStore.scala deleted file mode 100644 index a2a284b8f..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GuanoServiceStore.scala +++ /dev/null @@ -1,120 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.guano.thriftscala.NsfwTweetActionAction -import com.twitter.tseng.withholding.thriftscala.TakedownReason -import com.twitter.tweetypie.thriftscala._ - -trait GuanoServiceStore - extends TweetStoreBase[GuanoServiceStore] - with AsyncDeleteTweet.Store - with AsyncTakedown.Store - with AsyncUpdatePossiblySensitiveTweet.Store { - def wrap(w: TweetStore.Wrap): GuanoServiceStore = - new TweetStoreWrapper(w, this) - with GuanoServiceStore - with AsyncDeleteTweet.StoreWrapper - with AsyncTakedown.StoreWrapper - with AsyncUpdatePossiblySensitiveTweet.StoreWrapper -} - -object GuanoServiceStore { - val Action: AsyncWriteAction.GuanoScribe.type = AsyncWriteAction.GuanoScribe - - val toGuanoTakedown: (AsyncTakedown.Event, TakedownReason, Boolean) => Guano.Takedown = - (event: AsyncTakedown.Event, reason: TakedownReason, takendown: Boolean) => - Guano.Takedown( - tweetId = event.tweet.id, - userId = getUserId(event.tweet), - reason = reason, - takendown = takendown, - note = event.auditNote, - host = event.host, - byUserId = event.byUserId - ) - - val toGuanoUpdatePossiblySensitiveTweet: ( - AsyncUpdatePossiblySensitiveTweet.Event, - Boolean, - NsfwTweetActionAction - ) => Guano.UpdatePossiblySensitiveTweet = - ( - event: AsyncUpdatePossiblySensitiveTweet.Event, - updatedValue: Boolean, - action: NsfwTweetActionAction - ) => - Guano.UpdatePossiblySensitiveTweet( - tweetId = event.tweet.id, - host = event.host.orElse(Some("unknown")), - userId = event.user.id, - byUserId = event.byUserId, - action = action, - enabled = updatedValue, - note = event.note - ) - - def apply(guano: Guano, stats: StatsReceiver): GuanoServiceStore = { - val deleteByUserIdCounter = stats.counter("deletes_with_by_user_id") - val deleteScribeCounter = stats.counter("deletes_resulting_in_scribe") - - new GuanoServiceStore { - override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = - FutureEffect[AsyncDeleteTweet.Event] { event => - val tweet = event.tweet - - event.byUserId.foreach(_ => deleteByUserIdCounter.incr()) - - // Guano the tweet deletion action not initiated from the RetweetsDeletionStore - event.byUserId match { - case Some(byUserId) => - deleteScribeCounter.incr() - guano.scribeDestroyTweet( - Guano.DestroyTweet( - tweet = tweet, - userId = getUserId(tweet), - byUserId = byUserId, - passthrough = event.auditPassthrough - ) - ) - case _ => - Future.Unit - } - }.onlyIf(_.cascadedFromTweetId.isEmpty) - - override val retryAsyncDeleteTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncDeleteTweet.Event] - ] = - TweetStore.retry(Action, asyncDeleteTweet) - - override val asyncTakedown: FutureEffect[AsyncTakedown.Event] = - FutureEffect[AsyncTakedown.Event] { event => - val messages = - event.reasonsToAdd.map(toGuanoTakedown(event, _, true)) ++ - event.reasonsToRemove.map(toGuanoTakedown(event, _, false)) - Future.join(messages.map(guano.scribeTakedown)) - }.onlyIf(_.scribeForAudit) - - override val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[AsyncTakedown.Event]] = - TweetStore.retry(Action, asyncTakedown) - - override val asyncUpdatePossiblySensitiveTweet: FutureEffect[ - AsyncUpdatePossiblySensitiveTweet.Event - ] = - FutureEffect[AsyncUpdatePossiblySensitiveTweet.Event] { event => - val messages = - event.nsfwAdminChange.map( - toGuanoUpdatePossiblySensitiveTweet(event, _, NsfwTweetActionAction.NsfwAdmin) - ) ++ - event.nsfwUserChange.map( - toGuanoUpdatePossiblySensitiveTweet(event, _, NsfwTweetActionAction.NsfwUser) - ) - Future.join(messages.toSeq.map(guano.scribeUpdatePossiblySensitiveTweet)) - } - - override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncUpdatePossiblySensitiveTweet.Event] - ] = - TweetStore.retry(Action, asyncUpdatePossiblySensitiveTweet) - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrBookmarkCount.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrBookmarkCount.docx new file mode 100644 index 000000000..a674a46f9 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrBookmarkCount.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrBookmarkCount.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrBookmarkCount.scala deleted file mode 100644 index 5f1f2920a..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrBookmarkCount.scala +++ /dev/null @@ -1,92 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.tweetypie.store.TweetStoreEvent.NoRetry -import com.twitter.tweetypie.store.TweetStoreEvent.RetryStrategy -import com.twitter.tweetypie.thriftscala.AsyncIncrBookmarkCountRequest -import com.twitter.tweetypie.thriftscala.AsyncWriteAction - -object IncrBookmarkCount extends TweetStore.SyncModule { - case class Event(tweetId: TweetId, delta: Int, timestamp: Time) - extends SyncTweetStoreEvent("incr_bookmark_count") { - val toAsyncRequest: AsyncIncrBookmarkCountRequest = - AsyncIncrBookmarkCountRequest(tweetId = tweetId, delta = delta) - } - - trait Store { - val incrBookmarkCount: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val incrBookmarkCount: FutureEffect[Event] = wrap(underlying.incrBookmarkCount) - } - - object Store { - def apply( - asyncEnqueueStore: AsyncEnqueueStore, - replicatingStore: ReplicatingTweetStore - ): Store = { - new Store { - override val incrBookmarkCount: FutureEffect[Event] = - FutureEffect.inParallel( - asyncEnqueueStore.incrBookmarkCount, - replicatingStore.incrBookmarkCount - ) - } - } - } -} - -object AsyncIncrBookmarkCount extends TweetStore.AsyncModule { - case class Event(tweetId: TweetId, delta: Int, timestamp: Time) - extends AsyncTweetStoreEvent("async_incr_bookmark_event") { - override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = - Future.Unit - - override def retryStrategy: RetryStrategy = NoRetry - } - - trait Store { - def asyncIncrBookmarkCount: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val asyncIncrBookmarkCount: FutureEffect[Event] = wrap( - underlying.asyncIncrBookmarkCount) - } - - object Store { - def apply(tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore): Store = { - new Store { - override def asyncIncrBookmarkCount: FutureEffect[AsyncIncrBookmarkCount.Event] = - tweetCountsUpdatingStore.asyncIncrBookmarkCount - } - } - } -} - -object ReplicatedIncrBookmarkCount extends TweetStore.ReplicatedModule { - case class Event(tweetId: TweetId, delta: Int) - extends ReplicatedTweetStoreEvent("replicated_incr_bookmark_count") { - override def retryStrategy: RetryStrategy = NoRetry - } - - trait Store { - val replicatedIncrBookmarkCount: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val replicatedIncrBookmarkCount: FutureEffect[Event] = wrap( - underlying.replicatedIncrBookmarkCount) - } - - object Store { - def apply(tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore): Store = { - new Store { - override val replicatedIncrBookmarkCount: FutureEffect[Event] = { - tweetCountsUpdatingStore.replicatedIncrBookmarkCount - } - } - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrFavCount.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrFavCount.docx new file mode 100644 index 000000000..a53900dac Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrFavCount.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrFavCount.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrFavCount.scala deleted file mode 100644 index b6e1aabcb..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrFavCount.scala +++ /dev/null @@ -1,90 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.tweetypie.store.TweetStoreEvent.NoRetry -import com.twitter.tweetypie.thriftscala._ - -object IncrFavCount extends TweetStore.SyncModule { - - case class Event(tweetId: TweetId, delta: Int, timestamp: Time) - extends SyncTweetStoreEvent("incr_fav_count") { - val toAsyncRequest: AsyncIncrFavCountRequest = AsyncIncrFavCountRequest(tweetId, delta) - } - - trait Store { - val incrFavCount: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val incrFavCount: FutureEffect[Event] = wrap(underlying.incrFavCount) - } - - object Store { - def apply( - asyncEnqueueStore: AsyncEnqueueStore, - replicatingStore: ReplicatingTweetStore - ): Store = - new Store { - override val incrFavCount: FutureEffect[Event] = - FutureEffect.inParallel( - asyncEnqueueStore.incrFavCount, - replicatingStore.incrFavCount - ) - } - } -} - -object AsyncIncrFavCount extends TweetStore.AsyncModule { - - case class Event(tweetId: TweetId, delta: Int, timestamp: Time) - extends AsyncTweetStoreEvent("async_incr_fav_count") { - - override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = - Future.Unit // We need to define this method for TweetStoreEvent.Async but we don't use it - - override def retryStrategy: TweetStoreEvent.RetryStrategy = NoRetry - } - - trait Store { - val asyncIncrFavCount: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val asyncIncrFavCount: FutureEffect[Event] = wrap(underlying.asyncIncrFavCount) - } - - object Store { - def apply(tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore): Store = { - new Store { - override val asyncIncrFavCount: FutureEffect[Event] = - tweetCountsUpdatingStore.asyncIncrFavCount - } - } - } -} - -object ReplicatedIncrFavCount extends TweetStore.ReplicatedModule { - - case class Event(tweetId: TweetId, delta: Int) - extends ReplicatedTweetStoreEvent("replicated_incr_fav_count") { - override def retryStrategy: TweetStoreEvent.NoRetry.type = NoRetry - } - - trait Store { - val replicatedIncrFavCount: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val replicatedIncrFavCount: FutureEffect[Event] = wrap( - underlying.replicatedIncrFavCount) - } - - object Store { - def apply(tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore): Store = { - new Store { - override val replicatedIncrFavCount: FutureEffect[Event] = - tweetCountsUpdatingStore.replicatedIncrFavCount.ignoreFailures - } - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InitialTweetUpdate.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InitialTweetUpdate.docx new file mode 100644 index 000000000..3da80534e Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InitialTweetUpdate.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InitialTweetUpdate.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InitialTweetUpdate.scala deleted file mode 100644 index 3e796d3d8..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InitialTweetUpdate.scala +++ /dev/null @@ -1,31 +0,0 @@ -package com.twitter.tweetypie.store - -import com.twitter.tweetypie.Tweet -import com.twitter.tweetypie.serverutil.ExtendedTweetMetadataBuilder -import com.twitter.tweetypie.thriftscala.EditControl -import com.twitter.tweetypie.thriftscala.InitialTweetUpdateRequest -import com.twitter.tweetypie.util.EditControlUtil - -/* Logic to update the initial tweet with new information when that tweet is edited */ -object InitialTweetUpdate { - - /* Given the initial tweet and update request, copy updated edit - * related fields onto it. - */ - def updateTweet(initialTweet: Tweet, request: InitialTweetUpdateRequest): Tweet = { - - // compute a new edit control initial with updated list of edit tweet ids - val editControl: EditControl.Initial = - EditControlUtil.editControlForInitialTweet(initialTweet, request.editTweetId).get() - - // compute the correct extended metadata for a permalink - val extendedTweetMetadata = - request.selfPermalink.map(link => ExtendedTweetMetadataBuilder(initialTweet, link)) - - initialTweet.copy( - selfPermalink = initialTweet.selfPermalink.orElse(request.selfPermalink), - editControl = Some(editControl), - extendedTweetMetadata = initialTweet.extendedTweetMetadata.orElse(extendedTweetMetadata) - ) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InsertTweet.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InsertTweet.docx new file mode 100644 index 000000000..e43a02856 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InsertTweet.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InsertTweet.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InsertTweet.scala deleted file mode 100644 index 969cc2b5a..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InsertTweet.scala +++ /dev/null @@ -1,284 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.context.thriftscala.FeatureContext -import com.twitter.tweetypie.core.GeoSearchRequestId -import com.twitter.tweetypie.store.TweetEventDataScrubber.scrub -import com.twitter.tweetypie.thriftscala._ - -object InsertTweet extends TweetStore.SyncModule { - - case class Event( - tweet: Tweet, - user: User, - timestamp: Time, - _internalTweet: Option[CachedTweet] = None, - sourceTweet: Option[Tweet] = None, - sourceUser: Option[User] = None, - quotedTweet: Option[Tweet] = None, - quotedUser: Option[User] = None, - parentUserId: Option[UserId] = None, - initialTweetUpdateRequest: Option[InitialTweetUpdateRequest] = None, - dark: Boolean = false, - hydrateOptions: WritePathHydrationOptions = WritePathHydrationOptions(), - featureContext: Option[FeatureContext] = None, - geoSearchRequestId: Option[GeoSearchRequestId] = None, - additionalContext: Option[collection.Map[TweetCreateContextKey, String]] = None, - transientContext: Option[TransientCreateContext] = None, - quoterHasAlreadyQuotedTweet: Boolean = false, - noteTweetMentionedUserIds: Option[Seq[Long]] = None) - extends SyncTweetStoreEvent("insert_tweet") - with QuotedTweetOps { - def internalTweet: CachedTweet = - _internalTweet.getOrElse( - throw new IllegalStateException( - s"internalTweet should have been set in WritePathHydration, ${this}" - ) - ) - - def toAsyncRequest( - scrubUser: User => User, - scrubSourceTweet: Tweet => Tweet, - scrubSourceUser: User => User - ): AsyncInsertRequest = - AsyncInsertRequest( - tweet = tweet, - cachedTweet = internalTweet, - user = scrubUser(user), - sourceTweet = sourceTweet.map(scrubSourceTweet), - sourceUser = sourceUser.map(scrubSourceUser), - quotedTweet = quotedTweet.map(scrubSourceTweet), - quotedUser = quotedUser.map(scrubSourceUser), - parentUserId = parentUserId, - featureContext = featureContext, - timestamp = timestamp.inMillis, - geoSearchRequestId = geoSearchRequestId.map(_.requestID), - additionalContext = additionalContext, - transientContext = transientContext, - quoterHasAlreadyQuotedTweet = Some(quoterHasAlreadyQuotedTweet), - initialTweetUpdateRequest = initialTweetUpdateRequest, - noteTweetMentionedUserIds = noteTweetMentionedUserIds - ) - } - - trait Store { - val insertTweet: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val insertTweet: FutureEffect[Event] = wrap(underlying.insertTweet) - } - - object Store { - def apply( - logLensStore: LogLensStore, - manhattanStore: ManhattanTweetStore, - tweetStatsStore: TweetStatsStore, - cachingTweetStore: CachingTweetStore, - limiterStore: LimiterStore, - asyncEnqueueStore: AsyncEnqueueStore, - userCountsUpdatingStore: GizmoduckUserCountsUpdatingStore, - tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore - ): Store = - new Store { - override val insertTweet: FutureEffect[Event] = - FutureEffect.sequentially( - logLensStore.insertTweet, - manhattanStore.insertTweet, - tweetStatsStore.insertTweet, - FutureEffect.inParallel( - // allow write-through caching to fail without failing entire insert - cachingTweetStore.ignoreFailures.insertTweet, - limiterStore.ignoreFailures.insertTweet, - asyncEnqueueStore.insertTweet, - userCountsUpdatingStore.insertTweet, - tweetCountsUpdatingStore.insertTweet - ) - ) - } - } -} - -object AsyncInsertTweet extends TweetStore.AsyncModule { - - private val log = Logger(getClass) - - object Event { - def fromAsyncRequest(request: AsyncInsertRequest): TweetStoreEventOrRetry[Event] = - TweetStoreEventOrRetry( - Event( - tweet = request.tweet, - cachedTweet = request.cachedTweet, - user = request.user, - optUser = Some(request.user), - timestamp = Time.fromMilliseconds(request.timestamp), - sourceTweet = request.sourceTweet, - sourceUser = request.sourceUser, - parentUserId = request.parentUserId, - featureContext = request.featureContext, - quotedTweet = request.quotedTweet, - quotedUser = request.quotedUser, - geoSearchRequestId = request.geoSearchRequestId, - additionalContext = request.additionalContext, - transientContext = request.transientContext, - quoterHasAlreadyQuotedTweet = request.quoterHasAlreadyQuotedTweet.getOrElse(false), - initialTweetUpdateRequest = request.initialTweetUpdateRequest, - noteTweetMentionedUserIds = request.noteTweetMentionedUserIds - ), - request.retryAction, - RetryEvent - ) - } - - case class Event( - tweet: Tweet, - cachedTweet: CachedTweet, - user: User, - optUser: Option[User], - timestamp: Time, - sourceTweet: Option[Tweet] = None, - sourceUser: Option[User] = None, - parentUserId: Option[UserId] = None, - featureContext: Option[FeatureContext] = None, - quotedTweet: Option[Tweet] = None, - quotedUser: Option[User] = None, - geoSearchRequestId: Option[String] = None, - additionalContext: Option[collection.Map[TweetCreateContextKey, String]] = None, - transientContext: Option[TransientCreateContext] = None, - quoterHasAlreadyQuotedTweet: Boolean = false, - initialTweetUpdateRequest: Option[InitialTweetUpdateRequest] = None, - noteTweetMentionedUserIds: Option[Seq[Long]] = None) - extends AsyncTweetStoreEvent("async_insert_tweet") - with QuotedTweetOps - with TweetStoreTweetEvent { - - def toAsyncRequest(action: Option[AsyncWriteAction] = None): AsyncInsertRequest = - AsyncInsertRequest( - tweet = tweet, - cachedTweet = cachedTweet, - user = user, - sourceTweet = sourceTweet, - sourceUser = sourceUser, - parentUserId = parentUserId, - retryAction = action, - featureContext = featureContext, - timestamp = timestamp.inMillis, - quotedTweet = quotedTweet, - quotedUser = quotedUser, - geoSearchRequestId = geoSearchRequestId, - additionalContext = additionalContext, - transientContext = transientContext, - quoterHasAlreadyQuotedTweet = Some(quoterHasAlreadyQuotedTweet), - initialTweetUpdateRequest = initialTweetUpdateRequest, - noteTweetMentionedUserIds = noteTweetMentionedUserIds - ) - - override def toTweetEventData: Seq[TweetEventData] = - Seq( - TweetEventData.TweetCreateEvent( - TweetCreateEvent( - tweet = scrub(tweet), - user = user, - sourceUser = sourceUser, - sourceTweet = sourceTweet.map(scrub), - retweetParentUserId = parentUserId, - quotedTweet = publicQuotedTweet.map(scrub), - quotedUser = publicQuotedUser, - additionalContext = additionalContext, - transientContext = transientContext, - quoterHasAlreadyQuotedTweet = Some(quoterHasAlreadyQuotedTweet) - ) - ) - ) - - override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = - service.asyncInsert(toAsyncRequest(Some(action))) - } - - case class RetryEvent(action: AsyncWriteAction, event: Event) - extends TweetStoreRetryEvent[Event] { - - override val eventType: AsyncWriteEventType.Insert.type = AsyncWriteEventType.Insert - override val scribedTweetOnFailure: Option[Tweet] = Some(event.tweet) - } - - trait Store { - val asyncInsertTweet: FutureEffect[Event] - val retryAsyncInsertTweet: FutureEffect[TweetStoreRetryEvent[Event]] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val asyncInsertTweet: FutureEffect[Event] = wrap(underlying.asyncInsertTweet) - override val retryAsyncInsertTweet: FutureEffect[TweetStoreRetryEvent[Event]] = wrap( - underlying.retryAsyncInsertTweet) - } - - object Store { - def apply( - replicatingStore: ReplicatingTweetStore, - indexingStore: TweetIndexingStore, - tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore, - timelineUpdatingStore: TlsTimelineUpdatingStore, - eventBusEnqueueStore: TweetEventBusStore, - fanoutServiceStore: FanoutServiceStore, - scribeMediaTagStore: ScribeMediaTagStore, - userGeotagUpdateStore: GizmoduckUserGeotagUpdateStore, - geoSearchRequestIDStore: GeoSearchRequestIDStore - ): Store = { - val stores: Seq[Store] = - Seq( - replicatingStore, - indexingStore, - timelineUpdatingStore, - eventBusEnqueueStore, - fanoutServiceStore, - userGeotagUpdateStore, - tweetCountsUpdatingStore, - scribeMediaTagStore, - geoSearchRequestIDStore - ) - - def build[E <: TweetStoreEvent](extract: Store => FutureEffect[E]): FutureEffect[E] = - FutureEffect.inParallel[E](stores.map(extract): _*) - - new Store { - override val asyncInsertTweet: FutureEffect[Event] = build(_.asyncInsertTweet) - override val retryAsyncInsertTweet: FutureEffect[TweetStoreRetryEvent[Event]] = build( - _.retryAsyncInsertTweet) - } - } - } -} - -object ReplicatedInsertTweet extends TweetStore.ReplicatedModule { - - case class Event( - tweet: Tweet, - cachedTweet: CachedTweet, - quoterHasAlreadyQuotedTweet: Boolean = false, - initialTweetUpdateRequest: Option[InitialTweetUpdateRequest] = None) - extends ReplicatedTweetStoreEvent("replicated_insert_tweet") - - trait Store { - val replicatedInsertTweet: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val replicatedInsertTweet: FutureEffect[Event] = wrap(underlying.replicatedInsertTweet) - } - - object Store { - def apply( - cachingTweetStore: CachingTweetStore, - tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore - ): Store = { - new Store { - override val replicatedInsertTweet: FutureEffect[Event] = - FutureEffect.inParallel( - cachingTweetStore.replicatedInsertTweet, - tweetCountsUpdatingStore.replicatedInsertTweet.ignoreFailures - ) - } - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LimiterStore.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LimiterStore.docx new file mode 100644 index 000000000..7149e0a93 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LimiterStore.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LimiterStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LimiterStore.scala deleted file mode 100644 index fa71a7967..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LimiterStore.scala +++ /dev/null @@ -1,41 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.tweetypie.backends.LimiterService -import com.twitter.tweetypie.thriftscala._ - -trait LimiterStore extends TweetStoreBase[LimiterStore] with InsertTweet.Store { - def wrap(w: TweetStore.Wrap): LimiterStore = - new TweetStoreWrapper(w, this) with LimiterStore with InsertTweet.StoreWrapper -} - -object LimiterStore { - def apply( - incrementCreateSuccess: LimiterService.IncrementByOne, - incrementMediaTags: LimiterService.Increment - ): LimiterStore = - new LimiterStore { - override val insertTweet: FutureEffect[InsertTweet.Event] = - FutureEffect[InsertTweet.Event] { event => - Future.when(!event.dark) { - val userId = event.user.id - val contributorUserId: Option[UserId] = event.tweet.contributor.map(_.userId) - - val mediaTags = getMediaTagMap(event.tweet) - val mediaTagCount = countDistinctUserMediaTags(mediaTags) - Future - .join( - incrementCreateSuccess(userId, contributorUserId), - incrementMediaTags(userId, contributorUserId, mediaTagCount) - ) - .unit - } - } - } - - def countDistinctUserMediaTags(mediaTags: Map[MediaId, Seq[MediaTag]]): Int = - mediaTags.values.flatten.toSeq - .collect { case MediaTag(MediaTagType.User, Some(userId), _, _) => userId } - .distinct - .size -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LogLensStore.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LogLensStore.docx new file mode 100644 index 000000000..ce1570e0c Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LogLensStore.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LogLensStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LogLensStore.scala deleted file mode 100644 index 67b69691e..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LogLensStore.scala +++ /dev/null @@ -1,169 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.fasterxml.jackson.databind.ObjectMapper -import com.fasterxml.jackson.module.scala.DefaultScalaModule -import com.twitter.finagle.tracing.Trace -import com.twitter.tweetypie.additionalfields.AdditionalFields -import com.twitter.tweetypie.client_id.ClientIdHelper -import com.twitter.tweetypie.media.Media.ownMedia - -trait LogLensStore - extends TweetStoreBase[LogLensStore] - with InsertTweet.Store - with DeleteTweet.Store - with UndeleteTweet.Store - with SetAdditionalFields.Store - with DeleteAdditionalFields.Store - with ScrubGeo.Store - with Takedown.Store - with UpdatePossiblySensitiveTweet.Store { - def wrap(w: TweetStore.Wrap): LogLensStore = - new TweetStoreWrapper(w, this) - with LogLensStore - with InsertTweet.StoreWrapper - with DeleteTweet.StoreWrapper - with UndeleteTweet.StoreWrapper - with SetAdditionalFields.StoreWrapper - with DeleteAdditionalFields.StoreWrapper - with ScrubGeo.StoreWrapper - with Takedown.StoreWrapper - with UpdatePossiblySensitiveTweet.StoreWrapper -} - -object LogLensStore { - def apply( - tweetCreationsLogger: Logger, - tweetDeletionsLogger: Logger, - tweetUndeletionsLogger: Logger, - tweetUpdatesLogger: Logger, - clientIdHelper: ClientIdHelper, - ): LogLensStore = - new LogLensStore { - private[this] val mapper = new ObjectMapper().registerModule(DefaultScalaModule) - - private def logMessage(logger: Logger, data: (String, Any)*): Future[Unit] = - Future { - val allData = data ++ defaultData - val msg = mapper.writeValueAsString(Map(allData: _*)) - logger.info(msg) - } - - // Note: Longs are logged as strings to avoid JSON 53-bit numeric truncation - private def defaultData: Seq[(String, Any)] = { - val viewer = TwitterContext() - Seq( - "client_id" -> getOpt(clientIdHelper.effectiveClientId), - "service_id" -> getOpt(clientIdHelper.effectiveServiceIdentifier), - "trace_id" -> Trace.id.traceId.toString, - "audit_ip" -> getOpt(viewer.flatMap(_.auditIp)), - "application_id" -> getOpt(viewer.flatMap(_.clientApplicationId).map(_.toString)), - "user_agent" -> getOpt(viewer.flatMap(_.userAgent)), - "authenticated_user_id" -> getOpt(viewer.flatMap(_.authenticatedUserId).map(_.toString)) - ) - } - - private def getOpt[A](opt: Option[A]): Any = - opt.getOrElse(null) - - override val insertTweet: FutureEffect[InsertTweet.Event] = - FutureEffect[InsertTweet.Event] { event => - logMessage( - tweetCreationsLogger, - "type" -> "create_tweet", - "tweet_id" -> event.tweet.id.toString, - "user_id" -> event.user.id.toString, - "source_tweet_id" -> getOpt(event.sourceTweet.map(_.id.toString)), - "source_user_id" -> getOpt(event.sourceUser.map(_.id.toString)), - "directed_at_user_id" -> getOpt(getDirectedAtUser(event.tweet).map(_.userId.toString)), - "reply_to_tweet_id" -> getOpt( - getReply(event.tweet).flatMap(_.inReplyToStatusId).map(_.toString)), - "reply_to_user_id" -> getOpt(getReply(event.tweet).map(_.inReplyToUserId.toString)), - "media_ids" -> ownMedia(event.tweet).map(_.mediaId.toString) - ) - } - - override val deleteTweet: FutureEffect[DeleteTweet.Event] = - FutureEffect[DeleteTweet.Event] { event => - logMessage( - tweetDeletionsLogger, - "type" -> "delete_tweet", - "tweet_id" -> event.tweet.id.toString, - "user_id" -> getOpt(event.user.map(_.id.toString)), - "source_tweet_id" -> getOpt(getShare(event.tweet).map(_.sourceStatusId.toString)), - "by_user_id" -> getOpt(event.byUserId.map(_.toString)), - "passthrough_audit_ip" -> getOpt(event.auditPassthrough.flatMap(_.host)), - "media_ids" -> ownMedia(event.tweet).map(_.mediaId.toString), - "cascaded_from_tweet_id" -> getOpt(event.cascadedFromTweetId.map(_.toString)) - ) - } - - override val undeleteTweet: FutureEffect[UndeleteTweet.Event] = - FutureEffect[UndeleteTweet.Event] { event => - logMessage( - tweetUndeletionsLogger, - "type" -> "undelete_tweet", - "tweet_id" -> event.tweet.id.toString, - "user_id" -> event.user.id.toString, - "source_tweet_id" -> getOpt(getShare(event.tweet).map(_.sourceStatusId.toString)), - "media_ids" -> ownMedia(event.tweet).map(_.mediaId.toString) - ) - } - - override val setAdditionalFields: FutureEffect[SetAdditionalFields.Event] = - FutureEffect[SetAdditionalFields.Event] { event => - logMessage( - tweetUpdatesLogger, - "type" -> "set_additional_fields", - "tweet_id" -> event.additionalFields.id.toString, - "field_ids" -> AdditionalFields.nonEmptyAdditionalFieldIds(event.additionalFields) - ) - } - - override val deleteAdditionalFields: FutureEffect[DeleteAdditionalFields.Event] = - FutureEffect[DeleteAdditionalFields.Event] { event => - logMessage( - tweetUpdatesLogger, - "type" -> "delete_additional_fields", - "tweet_id" -> event.tweetId.toString, - "field_ids" -> event.fieldIds - ) - } - - override val scrubGeo: FutureEffect[ScrubGeo.Event] = - FutureEffect[ScrubGeo.Event] { event => - Future.join( - event.tweetIds.map { tweetId => - logMessage( - tweetUpdatesLogger, - "type" -> "scrub_geo", - "tweet_id" -> tweetId.toString, - "user_id" -> event.userId.toString - ) - } - ) - } - - override val takedown: FutureEffect[Takedown.Event] = - FutureEffect[Takedown.Event] { event => - logMessage( - tweetUpdatesLogger, - "type" -> "takedown", - "tweet_id" -> event.tweet.id.toString, - "user_id" -> getUserId(event.tweet).toString, - "reasons" -> event.takedownReasons - ) - } - - override val updatePossiblySensitiveTweet: FutureEffect[UpdatePossiblySensitiveTweet.Event] = - FutureEffect[UpdatePossiblySensitiveTweet.Event] { event => - logMessage( - tweetUpdatesLogger, - "type" -> "update_possibly_sensitive_tweet", - "tweet_id" -> event.tweet.id.toString, - "nsfw_admin" -> TweetLenses.nsfwAdmin(event.tweet), - "nsfw_user" -> TweetLenses.nsfwUser(event.tweet) - ) - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ManhattanTweetStore.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ManhattanTweetStore.docx new file mode 100644 index 000000000..e8109162c Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ManhattanTweetStore.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ManhattanTweetStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ManhattanTweetStore.scala deleted file mode 100644 index 6eaa65eee..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ManhattanTweetStore.scala +++ /dev/null @@ -1,231 +0,0 @@ -/** Copyright 2010 Twitter, Inc. */ -package com.twitter.tweetypie -package store - -import com.twitter.stitch.Stitch -import com.twitter.tweetypie.additionalfields.AdditionalFields -import com.twitter.tweetypie.storage.Field -import com.twitter.tweetypie.storage.Response.TweetResponse -import com.twitter.tweetypie.storage.Response.TweetResponseCode -import com.twitter.tweetypie.storage.TweetStorageClient -import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet -import com.twitter.tweetypie.storage.TweetStorageException -import com.twitter.tweetypie.thriftscala._ -import com.twitter.util.Future - -case class UpdateTweetNotFoundException(tweetId: TweetId) extends Exception - -trait ManhattanTweetStore - extends TweetStoreBase[ManhattanTweetStore] - with InsertTweet.Store - with AsyncDeleteTweet.Store - with ScrubGeo.Store - with SetAdditionalFields.Store - with DeleteAdditionalFields.Store - with AsyncDeleteAdditionalFields.Store - with Takedown.Store - with UpdatePossiblySensitiveTweet.Store - with AsyncUpdatePossiblySensitiveTweet.Store { - def wrap(w: TweetStore.Wrap): ManhattanTweetStore = - new TweetStoreWrapper(w, this) - with ManhattanTweetStore - with InsertTweet.StoreWrapper - with AsyncDeleteTweet.StoreWrapper - with ScrubGeo.StoreWrapper - with SetAdditionalFields.StoreWrapper - with DeleteAdditionalFields.StoreWrapper - with AsyncDeleteAdditionalFields.StoreWrapper - with Takedown.StoreWrapper - with UpdatePossiblySensitiveTweet.StoreWrapper - with AsyncUpdatePossiblySensitiveTweet.StoreWrapper -} - -/** - * A TweetStore implementation that writes to Manhattan. - */ -object ManhattanTweetStore { - val Action: AsyncWriteAction.TbirdUpdate.type = AsyncWriteAction.TbirdUpdate - - private val log = Logger(getClass) - private val successResponses = Set(TweetResponseCode.Success, TweetResponseCode.Deleted) - - case class AnnotationFailure(message: String) extends Exception(message) - - def apply(tweetStorageClient: TweetStorageClient): ManhattanTweetStore = { - - def handleStorageResponses( - responsesStitch: Stitch[Seq[TweetResponse]], - action: String - ): Future[Unit] = - Stitch - .run(responsesStitch) - .onFailure { - case ex: TweetStorageException => log.warn("failed on: " + action, ex) - case _ => - } - .flatMap { responses => - Future.when(responses.exists(resp => !successResponses(resp.overallResponse))) { - Future.exception(AnnotationFailure(s"$action gets failure response $responses")) - } - } - - def updateTweetMediaIds(mutation: Mutation[MediaEntity]): Tweet => Tweet = - tweet => tweet.copy(media = tweet.media.map(entities => entities.map(mutation.endo))) - - /** - * Does a get and set, and only sets fields that are allowed to be - * changed. This also prevents incoming tweets containing incomplete - * fields from being saved to Manhattan. - */ - def updateOneTweetByIdAction(tweetId: TweetId, copyFields: Tweet => Tweet): Future[Unit] = { - Stitch.run { - tweetStorageClient.getTweet(tweetId).flatMap { - case GetTweet.Response.Found(tweet) => - val updatedTweet = copyFields(tweet) - - if (updatedTweet != tweet) { - tweetStorageClient.addTweet(updatedTweet) - } else { - Stitch.Unit - } - case _ => Stitch.exception(UpdateTweetNotFoundException(tweetId)) - } - } - } - - // This should NOT be used in parallel with other write operations. - // A race condition can occur after changes to the storage library to - // return all additional fields. The resulting behavior can cause - // fields that were modified by other writes to revert to their old value. - def updateOneTweetAction(update: Tweet, copyFields: Tweet => Tweet => Tweet): Future[Unit] = - updateOneTweetByIdAction(update.id, copyFields(update)) - - def tweetStoreUpdateTweet(tweet: Tweet): Future[Unit] = { - val setFields = AdditionalFields.nonEmptyAdditionalFieldIds(tweet).map(Field.additionalField) - handleStorageResponses( - tweetStorageClient.updateTweet(tweet, setFields).map(Seq(_)), - s"updateTweet($tweet, $setFields)" - ) - } - - // This is an edit so update the initial Tweet's control - def updateInitialTweet(event: InsertTweet.Event): Future[Unit] = { - event.initialTweetUpdateRequest match { - case Some(request) => - updateOneTweetByIdAction( - request.initialTweetId, - tweet => InitialTweetUpdate.updateTweet(tweet, request) - ) - case None => Future.Unit - } - } - - new ManhattanTweetStore { - override val insertTweet: FutureEffect[InsertTweet.Event] = - FutureEffect[InsertTweet.Event] { event => - Stitch - .run( - tweetStorageClient.addTweet(event.internalTweet.tweet) - ).flatMap(_ => updateInitialTweet(event)) - } - - override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = - FutureEffect[AsyncDeleteTweet.Event] { event => - if (event.isBounceDelete) { - Stitch.run(tweetStorageClient.bounceDelete(event.tweet.id)) - } else { - Stitch.run(tweetStorageClient.softDelete(event.tweet.id)) - } - } - - override val retryAsyncDeleteTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncDeleteTweet.Event] - ] = - TweetStore.retry(Action, asyncDeleteTweet) - - override val scrubGeo: FutureEffect[ScrubGeo.Event] = - FutureEffect[ScrubGeo.Event] { event => - Stitch.run(tweetStorageClient.scrub(event.tweetIds, Seq(Field.Geo))) - } - - override val setAdditionalFields: FutureEffect[SetAdditionalFields.Event] = - FutureEffect[SetAdditionalFields.Event] { event => - tweetStoreUpdateTweet(event.additionalFields) - } - - override val deleteAdditionalFields: FutureEffect[DeleteAdditionalFields.Event] = - FutureEffect[DeleteAdditionalFields.Event] { event => - handleStorageResponses( - tweetStorageClient.deleteAdditionalFields( - Seq(event.tweetId), - event.fieldIds.map(Field.additionalField) - ), - s"deleteAdditionalFields(${event.tweetId}, ${event.fieldIds}})" - ) - } - - override val asyncDeleteAdditionalFields: FutureEffect[AsyncDeleteAdditionalFields.Event] = - FutureEffect[AsyncDeleteAdditionalFields.Event] { event => - handleStorageResponses( - tweetStorageClient.deleteAdditionalFields( - Seq(event.tweetId), - event.fieldIds.map(Field.additionalField) - ), - s"deleteAdditionalFields(Seq(${event.tweetId}), ${event.fieldIds}})" - ) - } - - override val retryAsyncDeleteAdditionalFields: FutureEffect[ - TweetStoreRetryEvent[AsyncDeleteAdditionalFields.Event] - ] = - TweetStore.retry(Action, asyncDeleteAdditionalFields) - - override val takedown: FutureEffect[Takedown.Event] = - FutureEffect[Takedown.Event] { event => - val (fieldsToUpdate, fieldsToDelete) = - Seq( - Field.TweetypieOnlyTakedownCountryCodes, - Field.TweetypieOnlyTakedownReasons - ).filter(_ => event.updateCodesAndReasons) - .partition(f => event.tweet.getFieldBlob(f.id).isDefined) - - val allFieldsToUpdate = Seq(Field.HasTakedown) ++ fieldsToUpdate - - Future - .join( - handleStorageResponses( - tweetStorageClient - .updateTweet(event.tweet, allFieldsToUpdate) - .map(Seq(_)), - s"updateTweet(${event.tweet}, $allFieldsToUpdate)" - ), - Future.when(fieldsToDelete.nonEmpty) { - handleStorageResponses( - tweetStorageClient - .deleteAdditionalFields(Seq(event.tweet.id), fieldsToDelete), - s"deleteAdditionalFields(Seq(${event.tweet.id}), $fieldsToDelete)" - ) - } - ).unit - } - - override val updatePossiblySensitiveTweet: FutureEffect[UpdatePossiblySensitiveTweet.Event] = - FutureEffect[UpdatePossiblySensitiveTweet.Event] { event => - updateOneTweetAction(event.tweet, TweetUpdate.copyNsfwFieldsForUpdate) - } - - override val asyncUpdatePossiblySensitiveTweet: FutureEffect[ - AsyncUpdatePossiblySensitiveTweet.Event - ] = - FutureEffect[AsyncUpdatePossiblySensitiveTweet.Event] { event => - updateOneTweetAction(event.tweet, TweetUpdate.copyNsfwFieldsForUpdate) - } - - override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncUpdatePossiblySensitiveTweet.Event] - ] = - TweetStore.retry(Action, asyncUpdatePossiblySensitiveTweet) - - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaIndexHelper.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaIndexHelper.docx new file mode 100644 index 000000000..ab704de37 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaIndexHelper.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaIndexHelper.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaIndexHelper.scala deleted file mode 100644 index 4efe22706..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaIndexHelper.scala +++ /dev/null @@ -1,34 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.tweetypie.thriftscala._ -import scala.util.matching.Regex - -object MediaIndexHelper { - - /** - * Which tweets should we treat as "media" tweets? - * - * Any tweet that is not a retweet and any of: - * - Is explicitly marked as a media tweet. - * - Has a media entity. - * - Includes a partner media URL. - */ - def apply(partnerMediaRegexes: Seq[Regex]): Tweet => Boolean = { - val isPartnerUrl = partnerUrlMatcher(partnerMediaRegexes) - - tweet => - getShare(tweet).isEmpty && - (hasMediaFlagSet(tweet) || - getMedia(tweet).nonEmpty || - getUrls(tweet).exists(isPartnerUrl)) - } - - def partnerUrlMatcher(partnerMediaRegexes: Seq[Regex]): UrlEntity => Boolean = - _.expanded.exists { expandedUrl => - partnerMediaRegexes.exists(_.findFirstIn(expandedUrl).isDefined) - } - - def hasMediaFlagSet(tweet: Tweet): Boolean = - tweet.coreData.flatMap(_.hasMedia).getOrElse(false) -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaServiceStore.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaServiceStore.docx new file mode 100644 index 000000000..e871614af Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaServiceStore.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaServiceStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaServiceStore.scala deleted file mode 100644 index f2f427c3c..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaServiceStore.scala +++ /dev/null @@ -1,62 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.mediaservices.commons.thriftscala.MediaKey -import com.twitter.servo.util.FutureArrow -import com.twitter.tweetypie.media._ -import com.twitter.tweetypie.thriftscala._ - -trait MediaServiceStore - extends TweetStoreBase[MediaServiceStore] - with AsyncDeleteTweet.Store - with AsyncUndeleteTweet.Store { - def wrap(w: TweetStore.Wrap): MediaServiceStore = - new TweetStoreWrapper(w, this) - with MediaServiceStore - with AsyncDeleteTweet.StoreWrapper - with AsyncUndeleteTweet.StoreWrapper -} - -object MediaServiceStore { - val Action: AsyncWriteAction.MediaDeletion.type = AsyncWriteAction.MediaDeletion - - private def ownMedia(t: Tweet): Seq[(MediaKey, TweetId)] = - getMedia(t) - .collect { - case m if Media.isOwnMedia(t.id, m) => (MediaKeyUtil.get(m), t.id) - } - - def apply( - deleteMedia: FutureArrow[DeleteMediaRequest, Unit], - undeleteMedia: FutureArrow[UndeleteMediaRequest, Unit] - ): MediaServiceStore = - new MediaServiceStore { - override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = - FutureEffect[AsyncDeleteTweet.Event] { e => - Future.when(!isRetweet(e.tweet)) { - val ownMediaKeys: Seq[(MediaKey, TweetId)] = ownMedia(e.tweet) - val deleteMediaRequests = ownMediaKeys.map(DeleteMediaRequest.tupled) - Future.collect(deleteMediaRequests.map(deleteMedia)) - } - } - - override val retryAsyncDeleteTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncDeleteTweet.Event] - ] = - TweetStore.retry(Action, asyncDeleteTweet) - - override val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = - FutureEffect[AsyncUndeleteTweet.Event] { e => - Future.when(!isRetweet(e.tweet)) { - val ownMediaKeys: Seq[(MediaKey, TweetId)] = ownMedia(e.tweet) - val unDeleteMediaRequests = ownMediaKeys.map(UndeleteMediaRequest.tupled) - Future.collect(unDeleteMediaRequests.map(undeleteMedia)) - } - } - - override val retryAsyncUndeleteTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncUndeleteTweet.Event] - ] = - TweetStore.retry(Action, asyncUndeleteTweet) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetDelete.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetDelete.docx new file mode 100644 index 000000000..a42ab1800 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetDelete.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetDelete.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetDelete.scala deleted file mode 100644 index 68a6283d7..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetDelete.scala +++ /dev/null @@ -1,45 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.tweetypie.thriftscala._ - -object QuotedTweetDelete extends TweetStore.SyncModule { - - case class Event( - quotingTweetId: TweetId, - quotingUserId: UserId, - quotedTweetId: TweetId, - quotedUserId: UserId, - timestamp: Time, - optUser: Option[User] = None) - extends SyncTweetStoreEvent("quoted_tweet_delete") - with TweetStoreTweetEvent { - - override def toTweetEventData: Seq[TweetEventData] = - Seq( - TweetEventData.QuotedTweetDeleteEvent( - QuotedTweetDeleteEvent( - quotingTweetId = quotingTweetId, - quotingUserId = quotingUserId, - quotedTweetId = quotedTweetId, - quotedUserId = quotedUserId - ) - ) - ) - } - - trait Store { - val quotedTweetDelete: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val quotedTweetDelete: FutureEffect[Event] = wrap(underlying.quotedTweetDelete) - } - - object Store { - def apply(eventBusEnqueueStore: TweetEventBusStore): Store = - new Store { - override val quotedTweetDelete: FutureEffect[Event] = eventBusEnqueueStore.quotedTweetDelete - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetOps.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetOps.docx new file mode 100644 index 000000000..9ab0821be Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetOps.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetOps.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetOps.scala deleted file mode 100644 index 34fa71aa6..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetOps.scala +++ /dev/null @@ -1,33 +0,0 @@ -package com.twitter.tweetypie -package store - -/** - * Mixin that implements public quoted tweet and public quoted user - * filtering for tweet events that have quoted tweets and users. - */ -trait QuotedTweetOps { - def quotedTweet: Option[Tweet] - def quotedUser: Option[User] - - /** - * Do we have evidence that the quoted user is unprotected? - */ - def quotedUserIsPublic: Boolean = - // The quoted user should include the `safety` struct, but if it - // doesn't for any reason then the quoted tweet and quoted user - // should not be included in the events. This is a safety measure to - // avoid leaking private information. - quotedUser.exists(_.safety.exists(!_.isProtected)) - - /** - * The quoted tweet, filtered as it should appear through public APIs. - */ - def publicQuotedTweet: Option[Tweet] = - if (quotedUserIsPublic) quotedTweet else None - - /** - * The quoted user, filtered as it should appear through public APIs. - */ - def publicQuotedUser: Option[User] = - if (quotedUserIsPublic) quotedUser else None -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetTakedown.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetTakedown.docx new file mode 100644 index 000000000..3bf54ea14 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetTakedown.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetTakedown.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetTakedown.scala deleted file mode 100644 index 4b73437cb..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetTakedown.scala +++ /dev/null @@ -1,51 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.tseng.withholding.thriftscala.TakedownReason -import com.twitter.tweetypie.thriftscala._ - -object QuotedTweetTakedown extends TweetStore.SyncModule { - - case class Event( - quotingTweetId: TweetId, - quotingUserId: UserId, - quotedTweetId: TweetId, - quotedUserId: UserId, - takedownCountryCodes: Seq[String], - takedownReasons: Seq[TakedownReason], - timestamp: Time, - optUser: Option[User] = None) - extends SyncTweetStoreEvent("quoted_tweet_takedown") - with TweetStoreTweetEvent { - - override def toTweetEventData: Seq[TweetEventData] = - Seq( - TweetEventData.QuotedTweetTakedownEvent( - QuotedTweetTakedownEvent( - quotingTweetId = quotingTweetId, - quotingUserId = quotingUserId, - quotedTweetId = quotedTweetId, - quotedUserId = quotedUserId, - takedownCountryCodes = takedownCountryCodes, - takedownReasons = takedownReasons - ) - ) - ) - } - - trait Store { - val quotedTweetTakedown: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val quotedTweetTakedown: FutureEffect[Event] = wrap(underlying.quotedTweetTakedown) - } - - object Store { - def apply(eventBusEnqueueStore: TweetEventBusStore): Store = - new Store { - override val quotedTweetTakedown: FutureEffect[Event] = - eventBusEnqueueStore.quotedTweetTakedown - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ReplicatingTweetStore.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ReplicatingTweetStore.docx new file mode 100644 index 000000000..34d9d624a Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ReplicatingTweetStore.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ReplicatingTweetStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ReplicatingTweetStore.scala deleted file mode 100644 index 333103447..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ReplicatingTweetStore.scala +++ /dev/null @@ -1,180 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.tweetypie.thriftscala._ - -/** - * A TweetStore that sends write events to the replication endpoints - * of a ThriftTweetService. - * - * The events that are sent are sufficient to keep the other - * instance's caches up to date. The calls contain sufficient data so - * that the remote caches can be updated without requiring the remote - * Tweetypie to access any other services. - * - * The replication services two purposes: - * - * 1. Maintain consistency between caches in different data centers. - * - * 2. Keep the caches in all data centers warm, protecting backend - * services. - * - * Correctness bugs are worse than bugs that make data less available. - * All of these events affect data consistency. - * - * IncrFavCount.Event and InsertEvents are the least important - * from a data consistency standpoint, because the only data - * consistency issues are counts, which are cached for a shorter time, - * and are not as noticable to end users if they fail to occur. - * (Failure to apply them is both less severe and self-correcting.) - * - * Delete and GeoScrub events are critical, because the cached data - * has a long expiration and failure to apply them can result in - * violations of user privacy. - * - * Update events are also important from a legal perspective, since - * the update may be updating the per-country take-down status. - * - * @param svc: The ThriftTweetService implementation that will receive the - * replication events. In practice, this will usually be a - * deferredrpc service. - */ -trait ReplicatingTweetStore - extends TweetStoreBase[ReplicatingTweetStore] - with AsyncInsertTweet.Store - with AsyncDeleteTweet.Store - with AsyncUndeleteTweet.Store - with AsyncSetRetweetVisibility.Store - with AsyncSetAdditionalFields.Store - with AsyncDeleteAdditionalFields.Store - with ScrubGeo.Store - with IncrFavCount.Store - with IncrBookmarkCount.Store - with AsyncTakedown.Store - with AsyncUpdatePossiblySensitiveTweet.Store { - def wrap(w: TweetStore.Wrap): ReplicatingTweetStore = - new TweetStoreWrapper(w, this) - with ReplicatingTweetStore - with AsyncInsertTweet.StoreWrapper - with AsyncDeleteTweet.StoreWrapper - with AsyncUndeleteTweet.StoreWrapper - with AsyncSetRetweetVisibility.StoreWrapper - with AsyncSetAdditionalFields.StoreWrapper - with AsyncDeleteAdditionalFields.StoreWrapper - with ScrubGeo.StoreWrapper - with IncrFavCount.StoreWrapper - with IncrBookmarkCount.StoreWrapper - with AsyncTakedown.StoreWrapper - with AsyncUpdatePossiblySensitiveTweet.StoreWrapper -} - -object ReplicatingTweetStore { - - val Action: AsyncWriteAction.Replication.type = AsyncWriteAction.Replication - - def apply( - svc: ThriftTweetService - ): ReplicatingTweetStore = - new ReplicatingTweetStore { - override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = - FutureEffect[AsyncInsertTweet.Event] { e => - svc.replicatedInsertTweet2( - ReplicatedInsertTweet2Request( - e.cachedTweet, - initialTweetUpdateRequest = e.initialTweetUpdateRequest - )) - } - - override val retryAsyncInsertTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncInsertTweet.Event] - ] = - TweetStore.retry(Action, asyncInsertTweet) - - override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = - FutureEffect[AsyncDeleteTweet.Event] { e => - svc.replicatedDeleteTweet2( - ReplicatedDeleteTweet2Request( - tweet = e.tweet, - isErasure = e.isUserErasure, - isBounceDelete = e.isBounceDelete - ) - ) - } - - override val retryAsyncDeleteTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncDeleteTweet.Event] - ] = - TweetStore.retry(Action, asyncDeleteTweet) - - override val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = - FutureEffect[AsyncUndeleteTweet.Event] { e => - svc.replicatedUndeleteTweet2(ReplicatedUndeleteTweet2Request(e.cachedTweet)) - } - - override val retryAsyncUndeleteTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncUndeleteTweet.Event] - ] = - TweetStore.retry(Action, asyncUndeleteTweet) - - override val asyncSetAdditionalFields: FutureEffect[AsyncSetAdditionalFields.Event] = - FutureEffect[AsyncSetAdditionalFields.Event] { e => - svc.replicatedSetAdditionalFields(SetAdditionalFieldsRequest(e.additionalFields)) - } - - override val retryAsyncSetAdditionalFields: FutureEffect[ - TweetStoreRetryEvent[AsyncSetAdditionalFields.Event] - ] = - TweetStore.retry(Action, asyncSetAdditionalFields) - - override val asyncSetRetweetVisibility: FutureEffect[AsyncSetRetweetVisibility.Event] = - FutureEffect[AsyncSetRetweetVisibility.Event] { e => - svc.replicatedSetRetweetVisibility( - ReplicatedSetRetweetVisibilityRequest(e.srcId, e.visible) - ) - } - - override val retryAsyncSetRetweetVisibility: FutureEffect[ - TweetStoreRetryEvent[AsyncSetRetweetVisibility.Event] - ] = - TweetStore.retry(Action, asyncSetRetweetVisibility) - - override val asyncDeleteAdditionalFields: FutureEffect[AsyncDeleteAdditionalFields.Event] = - FutureEffect[AsyncDeleteAdditionalFields.Event] { e => - svc.replicatedDeleteAdditionalFields( - ReplicatedDeleteAdditionalFieldsRequest(Map(e.tweetId -> e.fieldIds)) - ) - } - - override val retryAsyncDeleteAdditionalFields: FutureEffect[ - TweetStoreRetryEvent[AsyncDeleteAdditionalFields.Event] - ] = - TweetStore.retry(Action, asyncDeleteAdditionalFields) - - override val scrubGeo: FutureEffect[ScrubGeo.Event] = - FutureEffect[ScrubGeo.Event](e => svc.replicatedScrubGeo(e.tweetIds)) - - override val incrFavCount: FutureEffect[IncrFavCount.Event] = - FutureEffect[IncrFavCount.Event](e => svc.replicatedIncrFavCount(e.tweetId, e.delta)) - - override val incrBookmarkCount: FutureEffect[IncrBookmarkCount.Event] = - FutureEffect[IncrBookmarkCount.Event](e => - svc.replicatedIncrBookmarkCount(e.tweetId, e.delta)) - - override val asyncTakedown: FutureEffect[AsyncTakedown.Event] = - FutureEffect[AsyncTakedown.Event](e => svc.replicatedTakedown(e.tweet)) - - override val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[AsyncTakedown.Event]] = - TweetStore.retry(Action, asyncTakedown) - - override val asyncUpdatePossiblySensitiveTweet: FutureEffect[ - AsyncUpdatePossiblySensitiveTweet.Event - ] = - FutureEffect[AsyncUpdatePossiblySensitiveTweet.Event](e => - svc.replicatedUpdatePossiblySensitiveTweet(e.tweet)) - - override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncUpdatePossiblySensitiveTweet.Event] - ] = - TweetStore.retry(Action, asyncUpdatePossiblySensitiveTweet) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/RetweetArchivalEnqueueStore.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/RetweetArchivalEnqueueStore.docx new file mode 100644 index 000000000..591814416 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/RetweetArchivalEnqueueStore.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/RetweetArchivalEnqueueStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/RetweetArchivalEnqueueStore.scala deleted file mode 100644 index 4720e0317..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/RetweetArchivalEnqueueStore.scala +++ /dev/null @@ -1,38 +0,0 @@ -package com.twitter.tweetypie.store -import com.twitter.tweetypie.FutureEffect -import com.twitter.tweetypie.thriftscala.AsyncWriteAction -import com.twitter.tweetypie.thriftscala.RetweetArchivalEvent - -trait RetweetArchivalEnqueueStore - extends TweetStoreBase[RetweetArchivalEnqueueStore] - with AsyncSetRetweetVisibility.Store { - def wrap(w: TweetStore.Wrap): RetweetArchivalEnqueueStore = - new TweetStoreWrapper(w, this) - with RetweetArchivalEnqueueStore - with AsyncSetRetweetVisibility.StoreWrapper -} - -object RetweetArchivalEnqueueStore { - - def apply(enqueue: FutureEffect[RetweetArchivalEvent]): RetweetArchivalEnqueueStore = - new RetweetArchivalEnqueueStore { - override val asyncSetRetweetVisibility: FutureEffect[AsyncSetRetweetVisibility.Event] = - FutureEffect[AsyncSetRetweetVisibility.Event] { e => - enqueue( - RetweetArchivalEvent( - retweetId = e.retweetId, - srcTweetId = e.srcId, - retweetUserId = e.retweetUserId, - srcTweetUserId = e.srcTweetUserId, - timestampMs = e.timestamp.inMillis, - isArchivingAction = Some(!e.visible) - ) - ) - } - - override val retryAsyncSetRetweetVisibility: FutureEffect[ - TweetStoreRetryEvent[AsyncSetRetweetVisibility.Event] - ] = - TweetStore.retry(AsyncWriteAction.RetweetArchivalEnqueue, asyncSetRetweetVisibility) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScribeMediaTagStore.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScribeMediaTagStore.docx new file mode 100644 index 000000000..8fa76df2d Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScribeMediaTagStore.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScribeMediaTagStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScribeMediaTagStore.scala deleted file mode 100644 index f610fb5ce..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScribeMediaTagStore.scala +++ /dev/null @@ -1,42 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.servo.util.Scribe -import com.twitter.tweetypie.thriftscala.TweetMediaTagEvent - -/** - * Scribes thrift-encoded TweetMediaTagEvents (from tweet_events.thrift). - */ -trait ScribeMediaTagStore extends TweetStoreBase[ScribeMediaTagStore] with AsyncInsertTweet.Store { - def wrap(w: TweetStore.Wrap): ScribeMediaTagStore = - new TweetStoreWrapper(w, this) with ScribeMediaTagStore with AsyncInsertTweet.StoreWrapper -} - -object ScribeMediaTagStore { - - private def toMediaTagEvent(event: AsyncInsertTweet.Event): Option[TweetMediaTagEvent] = { - val tweet = event.tweet - val taggedUserIds = getMediaTagMap(tweet).values.flatten.flatMap(_.userId).toSet - val timestamp = Time.now.inMilliseconds - if (taggedUserIds.nonEmpty) { - Some(TweetMediaTagEvent(tweet.id, getUserId(tweet), taggedUserIds, Some(timestamp))) - } else { - None - } - } - - def apply( - scribe: FutureEffect[String] = Scribe("tweetypie_media_tag_events") - ): ScribeMediaTagStore = - new ScribeMediaTagStore { - override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = - Scribe(TweetMediaTagEvent, scribe) - .contramapOption[AsyncInsertTweet.Event](toMediaTagEvent) - - // we don't retry this action - override val retryAsyncInsertTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncInsertTweet.Event] - ] = - FutureEffect.unit[TweetStoreRetryEvent[AsyncInsertTweet.Event]] - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScrubGeo.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScrubGeo.docx new file mode 100644 index 000000000..51cd2e787 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScrubGeo.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScrubGeo.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScrubGeo.scala deleted file mode 100644 index 262def919..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScrubGeo.scala +++ /dev/null @@ -1,164 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.conversions.DurationOps._ -import com.twitter.servo.cache.Cached -import com.twitter.servo.cache.CachedValueStatus -import com.twitter.servo.cache.LockingCache -import com.twitter.snowflake.id.SnowflakeId -import com.twitter.tweetypie.backends.GeoScrubEventStore -import com.twitter.tweetypie.thriftscala._ - -/** - * Scrub geo information from Tweets. - */ -object ScrubGeo extends TweetStore.SyncModule { - - case class Event( - tweetIdSet: Set[TweetId], - userId: UserId, - optUser: Option[User], - timestamp: Time, - enqueueMax: Boolean) - extends SyncTweetStoreEvent("scrub_geo") - with TweetStoreTweetEvent { - - val tweetIds: Seq[TweetId] = tweetIdSet.toSeq - - override def toTweetEventData: Seq[TweetEventData] = - tweetIds.map { tweetId => - TweetEventData.TweetScrubGeoEvent( - TweetScrubGeoEvent( - tweetId = tweetId, - userId = userId - ) - ) - } - } - - trait Store { - val scrubGeo: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val scrubGeo: FutureEffect[Event] = wrap(underlying.scrubGeo) - } - - object Store { - def apply( - logLensStore: LogLensStore, - manhattanStore: ManhattanTweetStore, - cachingTweetStore: CachingTweetStore, - eventBusEnqueueStore: TweetEventBusStore, - replicatingStore: ReplicatingTweetStore - ): Store = - new Store { - override val scrubGeo: FutureEffect[Event] = - FutureEffect.inParallel( - logLensStore.scrubGeo, - manhattanStore.scrubGeo, - cachingTweetStore.scrubGeo, - eventBusEnqueueStore.scrubGeo, - replicatingStore.scrubGeo - ) - } - } -} - -object ReplicatedScrubGeo extends TweetStore.ReplicatedModule { - - case class Event(tweetIds: Seq[TweetId]) extends ReplicatedTweetStoreEvent("replicated_scrub_geo") - - trait Store { - val replicatedScrubGeo: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val replicatedScrubGeo: FutureEffect[Event] = wrap(underlying.replicatedScrubGeo) - } - - object Store { - def apply(cachingTweetStore: CachingTweetStore): Store = { - new Store { - override val replicatedScrubGeo: FutureEffect[Event] = - cachingTweetStore.replicatedScrubGeo - } - } - } -} - -/** - * Update the timestamp of the user's most recent request to delete all - * location data attached to her tweets. We use the timestamp to ensure - * that even if we fail to scrub a particular tweet in storage, we will - * not return geo information with that tweet. - * - * See http://go/geoscrub for more details. - */ -object ScrubGeoUpdateUserTimestamp extends TweetStore.SyncModule { - - case class Event(userId: UserId, timestamp: Time, optUser: Option[User]) - extends SyncTweetStoreEvent("scrub_geo_update_user_timestamp") - with TweetStoreTweetEvent { - - def mightHaveGeotaggedStatuses: Boolean = - optUser.forall(_.account.forall(_.hasGeotaggedStatuses == true)) - - def maxTweetId: TweetId = SnowflakeId.firstIdFor(timestamp + 1.millisecond) - 1 - - override def toTweetEventData: Seq[TweetEventData] = - Seq( - TweetEventData.UserScrubGeoEvent( - UserScrubGeoEvent( - userId = userId, - maxTweetId = maxTweetId - ) - ) - ) - - /** - * How to update a geo scrub timestamp cache entry. Always prefers - * the highest timestamp value that is available, regardless of when - * it was added to cache. - */ - def cacheHandler: LockingCache.Handler[Cached[Time]] = { - case Some(c) if c.value.exists(_ >= timestamp) => None - case _ => Some(Cached(Some(timestamp), CachedValueStatus.Found, Time.now)) - } - } - - trait Store { - val scrubGeoUpdateUserTimestamp: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val scrubGeoUpdateUserTimestamp: FutureEffect[Event] = wrap( - underlying.scrubGeoUpdateUserTimestamp) - } - - object Store { - def apply( - geotagUpdateStore: GizmoduckUserGeotagUpdateStore, - tweetEventBusStore: TweetEventBusStore, - setInManhattan: GeoScrubEventStore.SetGeoScrubTimestamp, - cache: LockingCache[UserId, Cached[Time]] - ): Store = { - val manhattanEffect = - setInManhattan.asFutureEffect - .contramap[Event](e => (e.userId, e.timestamp)) - - val cacheEffect = - FutureEffect[Event](e => cache.lockAndSet(e.userId, e.cacheHandler).unit) - - new Store { - override val scrubGeoUpdateUserTimestamp: FutureEffect[Event] = - FutureEffect.inParallel( - manhattanEffect, - cacheEffect, - geotagUpdateStore.scrubGeoUpdateUserTimestamp, - tweetEventBusStore.scrubGeoUpdateUserTimestamp - ) - } - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetAdditionalFields.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetAdditionalFields.docx new file mode 100644 index 000000000..607034d9f Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetAdditionalFields.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetAdditionalFields.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetAdditionalFields.scala deleted file mode 100644 index a1dfef0df..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetAdditionalFields.scala +++ /dev/null @@ -1,155 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.tweetypie.thriftscala._ - -object SetAdditionalFields extends TweetStore.SyncModule { - - case class Event(additionalFields: Tweet, userId: UserId, timestamp: Time) - extends SyncTweetStoreEvent("set_additional_fields") { - - def toAsyncRequest: AsyncSetAdditionalFieldsRequest = - AsyncSetAdditionalFieldsRequest( - additionalFields = additionalFields, - userId = userId, - timestamp = timestamp.inMillis - ) - } - - trait Store { - val setAdditionalFields: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val setAdditionalFields: FutureEffect[Event] = wrap(underlying.setAdditionalFields) - } - - object Store { - def apply( - manhattanStore: ManhattanTweetStore, - cachingTweetStore: CachingTweetStore, - asyncEnqueueStore: AsyncEnqueueStore, - logLensStore: LogLensStore - ): Store = - new Store { - override val setAdditionalFields: FutureEffect[Event] = - FutureEffect.sequentially( - logLensStore.setAdditionalFields, - manhattanStore.setAdditionalFields, - // Ignore failures but wait for completion to ensure we attempted to update cache before - // running async tasks, in particular publishing an event to EventBus. - cachingTweetStore.ignoreFailuresUponCompletion.setAdditionalFields, - asyncEnqueueStore.setAdditionalFields - ) - } - } -} - -object AsyncSetAdditionalFields extends TweetStore.AsyncModule { - - object Event { - def fromAsyncRequest( - request: AsyncSetAdditionalFieldsRequest, - user: User - ): TweetStoreEventOrRetry[Event] = - TweetStoreEventOrRetry( - Event( - additionalFields = request.additionalFields, - userId = request.userId, - optUser = Some(user), - timestamp = Time.fromMilliseconds(request.timestamp) - ), - request.retryAction, - RetryEvent - ) - } - - case class Event(additionalFields: Tweet, userId: UserId, optUser: Option[User], timestamp: Time) - extends AsyncTweetStoreEvent("async_set_additional_fields") - with TweetStoreTweetEvent { - - def toAsyncRequest(action: Option[AsyncWriteAction] = None): AsyncSetAdditionalFieldsRequest = - AsyncSetAdditionalFieldsRequest( - additionalFields = additionalFields, - retryAction = action, - userId = userId, - timestamp = timestamp.inMillis - ) - - override def toTweetEventData: Seq[TweetEventData] = - Seq( - TweetEventData.AdditionalFieldUpdateEvent( - AdditionalFieldUpdateEvent( - updatedFields = additionalFields, - userId = optUser.map(_.id) - ) - ) - ) - - override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = - service.asyncSetAdditionalFields(toAsyncRequest(Some(action))) - } - - case class RetryEvent(action: AsyncWriteAction, event: Event) - extends TweetStoreRetryEvent[Event] { - - override val eventType: AsyncWriteEventType.SetAdditionalFields.type = - AsyncWriteEventType.SetAdditionalFields - override val scribedTweetOnFailure: None.type = None - } - - trait Store { - val asyncSetAdditionalFields: FutureEffect[Event] - val retryAsyncSetAdditionalFields: FutureEffect[TweetStoreRetryEvent[Event]] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val asyncSetAdditionalFields: FutureEffect[Event] = wrap( - underlying.asyncSetAdditionalFields) - override val retryAsyncSetAdditionalFields: FutureEffect[TweetStoreRetryEvent[Event]] = wrap( - underlying.retryAsyncSetAdditionalFields) - } - - object Store { - def apply( - replicatingStore: ReplicatingTweetStore, - eventBusEnqueueStore: TweetEventBusStore - ): Store = { - val stores: Seq[Store] = Seq(replicatingStore, eventBusEnqueueStore) - - def build[E <: TweetStoreEvent](extract: Store => FutureEffect[E]): FutureEffect[E] = - FutureEffect.inParallel[E](stores.map(extract): _*) - - new Store { - override val asyncSetAdditionalFields: FutureEffect[Event] = build( - _.asyncSetAdditionalFields) - override val retryAsyncSetAdditionalFields: FutureEffect[TweetStoreRetryEvent[Event]] = - build(_.retryAsyncSetAdditionalFields) - } - } - } -} - -object ReplicatedSetAdditionalFields extends TweetStore.ReplicatedModule { - - case class Event(additionalFields: Tweet) - extends ReplicatedTweetStoreEvent("replicated_set_additional_fields") - - trait Store { - val replicatedSetAdditionalFields: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val replicatedSetAdditionalFields: FutureEffect[Event] = wrap( - underlying.replicatedSetAdditionalFields) - } - - object Store { - def apply(cachingTweetStore: CachingTweetStore): Store = { - new Store { - override val replicatedSetAdditionalFields: FutureEffect[Event] = - cachingTweetStore.replicatedSetAdditionalFields - } - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetRetweetVisibility.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetRetweetVisibility.docx new file mode 100644 index 000000000..b319fcd68 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetRetweetVisibility.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetRetweetVisibility.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetRetweetVisibility.scala deleted file mode 100644 index 7f4736f15..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetRetweetVisibility.scala +++ /dev/null @@ -1,172 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.tweetypie.thriftscala._ - -object SetRetweetVisibility extends TweetStore.SyncModule { - - case class Event( - retweetId: TweetId, - visible: Boolean, - srcId: TweetId, - retweetUserId: UserId, - srcTweetUserId: UserId, - timestamp: Time) - extends SyncTweetStoreEvent("set_retweet_visibility") { - def toAsyncRequest: AsyncSetRetweetVisibilityRequest = - AsyncSetRetweetVisibilityRequest( - retweetId = retweetId, - visible = visible, - srcId = srcId, - retweetUserId = retweetUserId, - sourceTweetUserId = srcTweetUserId, - timestamp = timestamp.inMillis - ) - } - - trait Store { - val setRetweetVisibility: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - val setRetweetVisibility: FutureEffect[Event] = wrap(underlying.setRetweetVisibility) - } - - object Store { - - /** - * [[AsyncEnqueueStore]] - use this store to call the asyncSetRetweetVisibility endpoint. - * - * @see [[AsyncSetRetweetVisibility.Store.apply]] - */ - def apply(asyncEnqueueStore: AsyncEnqueueStore): Store = - new Store { - override val setRetweetVisibility: FutureEffect[Event] = - asyncEnqueueStore.setRetweetVisibility - } - } -} - -object AsyncSetRetweetVisibility extends TweetStore.AsyncModule { - - case class Event( - retweetId: TweetId, - visible: Boolean, - srcId: TweetId, - retweetUserId: UserId, - srcTweetUserId: UserId, - timestamp: Time) - extends AsyncTweetStoreEvent("async_set_retweet_visibility") { - def toAsyncRequest(action: Option[AsyncWriteAction] = None): AsyncSetRetweetVisibilityRequest = - AsyncSetRetweetVisibilityRequest( - retweetId = retweetId, - visible = visible, - srcId = srcId, - retweetUserId = retweetUserId, - sourceTweetUserId = srcTweetUserId, - retryAction = action, - timestamp = timestamp.inMillis - ) - - override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = - service.asyncSetRetweetVisibility(toAsyncRequest(Some(action))) - } - - object Event { - def fromAsyncRequest(req: AsyncSetRetweetVisibilityRequest): TweetStoreEventOrRetry[Event] = - TweetStoreEventOrRetry( - AsyncSetRetweetVisibility.Event( - retweetId = req.retweetId, - visible = req.visible, - srcId = req.srcId, - retweetUserId = req.retweetUserId, - srcTweetUserId = req.sourceTweetUserId, - timestamp = Time.fromMilliseconds(req.timestamp) - ), - req.retryAction, - RetryEvent - ) - } - - case class RetryEvent(action: AsyncWriteAction, event: Event) - extends TweetStoreRetryEvent[Event] { - - override val eventType: AsyncWriteEventType.SetRetweetVisibility.type = - AsyncWriteEventType.SetRetweetVisibility - override val scribedTweetOnFailure: None.type = None - } - - trait Store { - val asyncSetRetweetVisibility: FutureEffect[Event] - val retryAsyncSetRetweetVisibility: FutureEffect[TweetStoreRetryEvent[Event]] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - val asyncSetRetweetVisibility: FutureEffect[Event] = wrap(underlying.asyncSetRetweetVisibility) - val retryAsyncSetRetweetVisibility: FutureEffect[TweetStoreRetryEvent[Event]] = wrap( - underlying.retryAsyncSetRetweetVisibility) - } - - object Store { - - /** - * [[TweetIndexingStore]] - archive or unarchive a retweet edge in TFlock RetweetGraph - * [[TweetCountsCacheUpdatingStore]] - modify the retweet count directly in cache. - * [[ReplicatingTweetStore]] - replicate this [[Event]] in the other DC. - * [[RetweetArchivalEnqueueStore]] - publish RetweetArchivalEvent to "retweet_archival_events" event stream. - * - * @see [[ReplicatedSetRetweetVisibility.Store.apply]] - */ - def apply( - tweetIndexingStore: TweetIndexingStore, - tweetCountsCacheUpdatingStore: TweetCountsCacheUpdatingStore, - replicatingTweetStore: ReplicatingTweetStore, - retweetArchivalEnqueueStore: RetweetArchivalEnqueueStore - ): Store = { - val stores: Seq[Store] = - Seq( - tweetIndexingStore, - tweetCountsCacheUpdatingStore, - replicatingTweetStore, - retweetArchivalEnqueueStore - ) - - def build[E <: TweetStoreEvent, S](extract: Store => FutureEffect[E]): FutureEffect[E] = - FutureEffect.inParallel[E](stores.map(extract): _*) - - new Store { - override val asyncSetRetweetVisibility: FutureEffect[Event] = build( - _.asyncSetRetweetVisibility) - override val retryAsyncSetRetweetVisibility: FutureEffect[TweetStoreRetryEvent[Event]] = - build(_.retryAsyncSetRetweetVisibility) - } - } - } -} - -object ReplicatedSetRetweetVisibility extends TweetStore.ReplicatedModule { - - case class Event(srcId: TweetId, visible: Boolean) - extends ReplicatedTweetStoreEvent("replicated_set_retweet_visibility") - - trait Store { - val replicatedSetRetweetVisibility: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val replicatedSetRetweetVisibility: FutureEffect[Event] = - wrap(underlying.replicatedSetRetweetVisibility) - } - - object Store { - - /** - * [[TweetCountsCacheUpdatingStore]] - replicate modifying the retweet count directly in cache. - */ - def apply(tweetCountsCacheUpdatingStore: TweetCountsCacheUpdatingStore): Store = - new Store { - override val replicatedSetRetweetVisibility: FutureEffect[Event] = - tweetCountsCacheUpdatingStore.replicatedSetRetweetVisibility - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Takedown.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Takedown.docx new file mode 100644 index 000000000..54080dbcc Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Takedown.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Takedown.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Takedown.scala deleted file mode 100644 index cfe3262b5..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Takedown.scala +++ /dev/null @@ -1,205 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.takedown.util.TakedownReasons -import com.twitter.tseng.withholding.thriftscala.TakedownReason -import com.twitter.tweetypie.thriftscala._ - -object Takedown extends TweetStore.SyncModule { - - case class Event( - tweet: Tweet, // for CachingTweetStore / ManhattanTweetStore / ReplicatedTakedown - timestamp: Time, - user: Option[User] = None, - takedownReasons: Seq[TakedownReason] = Seq(), // for EventBus - reasonsToAdd: Seq[TakedownReason] = Seq(), // for Guano - reasonsToRemove: Seq[TakedownReason] = Seq(), // for Guano - auditNote: Option[String] = None, - host: Option[String] = None, - byUserId: Option[UserId] = None, - eventbusEnqueue: Boolean = true, - scribeForAudit: Boolean = true, - // If ManhattanTweetStore should update countryCodes and reasons - updateCodesAndReasons: Boolean = false) - extends SyncTweetStoreEvent("takedown") { - def toAsyncRequest(): AsyncTakedownRequest = - AsyncTakedownRequest( - tweet = tweet, - user = user, - takedownReasons = takedownReasons, - reasonsToAdd = reasonsToAdd, - reasonsToRemove = reasonsToRemove, - scribeForAudit = scribeForAudit, - eventbusEnqueue = eventbusEnqueue, - auditNote = auditNote, - byUserId = byUserId, - host = host, - timestamp = timestamp.inMillis - ) - } - - trait Store { - val takedown: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val takedown: FutureEffect[Event] = wrap(underlying.takedown) - } - - object Store { - def apply( - logLensStore: LogLensStore, - manhattanStore: ManhattanTweetStore, - cachingTweetStore: CachingTweetStore, - asyncEnqueueStore: AsyncEnqueueStore - ): Store = - new Store { - override val takedown: FutureEffect[Event] = - FutureEffect.inParallel( - logLensStore.takedown, - FutureEffect.sequentially( - manhattanStore.takedown, - FutureEffect.inParallel( - cachingTweetStore.takedown, - asyncEnqueueStore.takedown - ) - ) - ) - } - } -} - -object AsyncTakedown extends TweetStore.AsyncModule { - - object Event { - def fromAsyncRequest(request: AsyncTakedownRequest): TweetStoreEventOrRetry[Event] = - TweetStoreEventOrRetry( - Event( - tweet = request.tweet, - optUser = request.user, - takedownReasons = request.takedownReasons, - reasonsToAdd = request.reasonsToAdd, - reasonsToRemove = request.reasonsToRemove, - auditNote = request.auditNote, - host = request.host, - byUserId = request.byUserId, - eventbusEnqueue = request.eventbusEnqueue, - scribeForAudit = request.scribeForAudit, - timestamp = Time.fromMilliseconds(request.timestamp) - ), - request.retryAction, - RetryEvent - ) - } - - case class Event( - tweet: Tweet, - timestamp: Time, - optUser: Option[User], - takedownReasons: Seq[TakedownReason], // for EventBus - reasonsToAdd: Seq[TakedownReason], // for Guano - reasonsToRemove: Seq[TakedownReason], // for Guano - auditNote: Option[String], // for Guano - host: Option[String], // for Guano - byUserId: Option[UserId], // for Guano - eventbusEnqueue: Boolean, - scribeForAudit: Boolean) - extends AsyncTweetStoreEvent("async_takedown") - with TweetStoreTweetEvent { - - def toAsyncRequest(action: Option[AsyncWriteAction] = None): AsyncTakedownRequest = - AsyncTakedownRequest( - tweet = tweet, - user = optUser, - takedownReasons = takedownReasons, - reasonsToAdd = reasonsToAdd, - reasonsToRemove = reasonsToRemove, - scribeForAudit = scribeForAudit, - eventbusEnqueue = eventbusEnqueue, - auditNote = auditNote, - byUserId = byUserId, - host = host, - timestamp = timestamp.inMillis, - retryAction = action - ) - - override def toTweetEventData: Seq[TweetEventData] = - optUser.map { user => - TweetEventData.TweetTakedownEvent( - TweetTakedownEvent( - tweetId = tweet.id, - userId = user.id, - takedownCountryCodes = - takedownReasons.collect(TakedownReasons.reasonToCountryCode).sorted, - takedownReasons = takedownReasons - ) - ) - }.toSeq - - override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = - service.asyncTakedown(toAsyncRequest(Some(action))) - } - - case class RetryEvent(action: AsyncWriteAction, event: Event) - extends TweetStoreRetryEvent[Event] { - - override val eventType: AsyncWriteEventType.Takedown.type = AsyncWriteEventType.Takedown - override val scribedTweetOnFailure: Option[Tweet] = Some(event.tweet) - } - - trait Store { - val asyncTakedown: FutureEffect[Event] - val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[Event]] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val asyncTakedown: FutureEffect[Event] = wrap(underlying.asyncTakedown) - override val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[Event]] = wrap( - underlying.retryAsyncTakedown) - } - - object Store { - def apply( - replicatingStore: ReplicatingTweetStore, - guanoStore: GuanoServiceStore, - eventBusEnqueueStore: TweetEventBusStore - ): Store = { - val stores: Seq[Store] = - Seq( - replicatingStore, - guanoStore, - eventBusEnqueueStore - ) - - def build[E <: TweetStoreEvent](extract: Store => FutureEffect[E]): FutureEffect[E] = - FutureEffect.inParallel[E](stores.map(extract): _*) - - new Store { - override val asyncTakedown: FutureEffect[Event] = build(_.asyncTakedown) - override val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[Event]] = build( - _.retryAsyncTakedown) - } - } - } -} - -object ReplicatedTakedown extends TweetStore.ReplicatedModule { - - case class Event(tweet: Tweet) extends ReplicatedTweetStoreEvent("takedown") - - trait Store { - val replicatedTakedown: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val replicatedTakedown: FutureEffect[Event] = wrap(underlying.replicatedTakedown) - } - - object Store { - def apply(cachingTweetStore: CachingTweetStore): Store = { - new Store { - override val replicatedTakedown: FutureEffect[Event] = cachingTweetStore.replicatedTakedown - } - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TlsTimelineUpdatingStore.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TlsTimelineUpdatingStore.docx new file mode 100644 index 000000000..e9c5fe47c Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TlsTimelineUpdatingStore.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TlsTimelineUpdatingStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TlsTimelineUpdatingStore.scala deleted file mode 100644 index 14b83d878..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TlsTimelineUpdatingStore.scala +++ /dev/null @@ -1,150 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.timelineservice.{thriftscala => tls} -import com.twitter.tweetypie.backends.TimelineService -import com.twitter.tweetypie.thriftscala._ - -trait TlsTimelineUpdatingStore - extends TweetStoreBase[TlsTimelineUpdatingStore] - with AsyncInsertTweet.Store - with AsyncDeleteTweet.Store - with AsyncUndeleteTweet.Store { - def wrap(w: TweetStore.Wrap): TlsTimelineUpdatingStore = - new TweetStoreWrapper(w, this) - with TlsTimelineUpdatingStore - with AsyncInsertTweet.StoreWrapper - with AsyncDeleteTweet.StoreWrapper - with AsyncUndeleteTweet.StoreWrapper -} - -/** - * An implementation of TweetStore that sends update events to - * the Timeline Service. - */ -object TlsTimelineUpdatingStore { - val Action: AsyncWriteAction.TimelineUpdate.type = AsyncWriteAction.TimelineUpdate - - /** - * Converts a TweetyPie Tweet to tls.Tweet - * - * @param explicitCreatedAt when Some, overrides the default getTimestamp defined in package - * object com.twitter.tweetypie - */ - def tweetToTLSFullTweet( - hasMedia: Tweet => Boolean - )( - tweet: Tweet, - explicitCreatedAt: Option[Time], - noteTweetMentionedUserIds: Option[Seq[Long]] - ): tls.FullTweet = - tls.FullTweet( - userId = getUserId(tweet), - tweetId = tweet.id, - mentionedUserIds = - noteTweetMentionedUserIds.getOrElse(getMentions(tweet).flatMap(_.userId)).toSet, - isNullcasted = TweetLenses.nullcast.get(tweet), - conversationId = TweetLenses.conversationId.get(tweet).getOrElse(tweet.id), - narrowcastGeos = Set.empty, - createdAtMs = explicitCreatedAt.getOrElse(getTimestamp(tweet)).inMillis, - hasMedia = hasMedia(tweet), - directedAtUserId = TweetLenses.directedAtUser.get(tweet).map(_.userId), - retweet = getShare(tweet).map { share => - tls.Retweet( - sourceUserId = share.sourceUserId, - sourceTweetId = share.sourceStatusId, - parentTweetId = Some(share.parentStatusId) - ) - }, - reply = getReply(tweet).map { reply => - tls.Reply( - inReplyToUserId = reply.inReplyToUserId, - inReplyToTweetId = reply.inReplyToStatusId - ) - }, - quote = tweet.quotedTweet.map { qt => - tls.Quote( - quotedUserId = qt.userId, - quotedTweetId = qt.tweetId - ) - }, - mediaTags = tweet.mediaTags, - text = Some(getText(tweet)) - ) - - val logger: Logger = Logger(getClass) - - def logValidationFailed(stats: StatsReceiver): tls.ProcessEventResult => Unit = { - case tls.ProcessEventResult(tls.ProcessEventResultType.ValidationFailed, errors) => - logger.error(s"Validation Failed in processEvent2: $errors") - stats.counter("processEvent2_validation_failed").incr() - case _ => () - } - - def apply( - processEvent2: TimelineService.ProcessEvent2, - hasMedia: Tweet => Boolean, - stats: StatsReceiver - ): TlsTimelineUpdatingStore = { - val toTlsTweet = tweetToTLSFullTweet(hasMedia) _ - - val processAndLog = - processEvent2.andThen(FutureArrow.fromFunction(logValidationFailed(stats))) - - new TlsTimelineUpdatingStore { - override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = - processAndLog - .contramap[AsyncInsertTweet.Event] { event => - tls.Event.FullTweetCreate( - tls.FullTweetCreateEvent( - toTlsTweet(event.tweet, Some(event.timestamp), event.noteTweetMentionedUserIds), - event.timestamp.inMillis, - featureContext = event.featureContext - ) - ) - } - .asFutureEffect[AsyncInsertTweet.Event] - - override val retryAsyncInsertTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncInsertTweet.Event] - ] = - TweetStore.retry(Action, asyncInsertTweet) - - override val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = - processAndLog - .contramap[AsyncUndeleteTweet.Event] { event => - tls.Event.FullTweetRestore( - tls.FullTweetRestoreEvent( - toTlsTweet(event.tweet, None, None), - event.deletedAt.map(_.inMillis) - ) - ) - } - .asFutureEffect[AsyncUndeleteTweet.Event] - - override val retryAsyncUndeleteTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncUndeleteTweet.Event] - ] = - TweetStore.retry(Action, asyncUndeleteTweet) - - override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = - processAndLog - .contramap[AsyncDeleteTweet.Event] { event => - tls.Event.FullTweetDelete( - tls.FullTweetDeleteEvent( - toTlsTweet(event.tweet, None, None), - event.timestamp.inMillis, - isUserErasure = Some(event.isUserErasure), - isBounceDelete = Some(event.isBounceDelete) - ) - ) - } - .asFutureEffect[AsyncDeleteTweet.Event] - - override val retryAsyncDeleteTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncDeleteTweet.Event] - ] = - TweetStore.retry(Action, asyncDeleteTweet) - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetCountsCacheUpdatingStore.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetCountsCacheUpdatingStore.docx new file mode 100644 index 000000000..e1bc80bb9 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetCountsCacheUpdatingStore.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetCountsCacheUpdatingStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetCountsCacheUpdatingStore.scala deleted file mode 100644 index 3f1d3e288..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetCountsCacheUpdatingStore.scala +++ /dev/null @@ -1,358 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.concurrent.Serialized -import com.twitter.servo.cache.LockingCache.Handler -import com.twitter.servo.cache._ -import com.twitter.tweetypie.repository.BookmarksKey -import com.twitter.tweetypie.repository.FavsKey -import com.twitter.tweetypie.repository.QuotesKey -import com.twitter.tweetypie.repository.RepliesKey -import com.twitter.tweetypie.repository.RetweetsKey -import com.twitter.tweetypie.repository.TweetCountKey -import com.twitter.util.Duration -import com.twitter.util.Timer -import scala.collection.mutable - -trait TweetCountsCacheUpdatingStore - extends TweetStoreBase[TweetCountsCacheUpdatingStore] - with InsertTweet.Store - with AsyncInsertTweet.Store - with ReplicatedInsertTweet.Store - with DeleteTweet.Store - with AsyncDeleteTweet.Store - with ReplicatedDeleteTweet.Store - with UndeleteTweet.Store - with ReplicatedUndeleteTweet.Store - with AsyncIncrFavCount.Store - with ReplicatedIncrFavCount.Store - with AsyncIncrBookmarkCount.Store - with ReplicatedIncrBookmarkCount.Store - with AsyncSetRetweetVisibility.Store - with ReplicatedSetRetweetVisibility.Store - with Flush.Store { - def wrap(w: TweetStore.Wrap): TweetCountsCacheUpdatingStore = { - new TweetStoreWrapper(w, this) - with TweetCountsCacheUpdatingStore - with InsertTweet.StoreWrapper - with AsyncInsertTweet.StoreWrapper - with ReplicatedInsertTweet.StoreWrapper - with DeleteTweet.StoreWrapper - with AsyncDeleteTweet.StoreWrapper - with ReplicatedDeleteTweet.StoreWrapper - with UndeleteTweet.StoreWrapper - with ReplicatedUndeleteTweet.StoreWrapper - with AsyncIncrFavCount.StoreWrapper - with ReplicatedIncrFavCount.StoreWrapper - with AsyncIncrBookmarkCount.StoreWrapper - with ReplicatedIncrBookmarkCount.StoreWrapper - with AsyncSetRetweetVisibility.StoreWrapper - with ReplicatedSetRetweetVisibility.StoreWrapper - with Flush.StoreWrapper - } -} - -/** - * An implementation of TweetStore that updates tweet-specific counts in - * the CountsCache. - */ -object TweetCountsCacheUpdatingStore { - private type Action = TweetCountKey => Future[Unit] - - def keys(tweetId: TweetId): Seq[TweetCountKey] = - Seq( - RetweetsKey(tweetId), - RepliesKey(tweetId), - FavsKey(tweetId), - QuotesKey(tweetId), - BookmarksKey(tweetId)) - - def relatedKeys(tweet: Tweet): Seq[TweetCountKey] = - Seq( - getReply(tweet).flatMap(_.inReplyToStatusId).map(RepliesKey(_)), - getQuotedTweet(tweet).map(quotedTweet => QuotesKey(quotedTweet.tweetId)), - getShare(tweet).map(share => RetweetsKey(share.sourceStatusId)) - ).flatten - - // pick all keys except quotes key - def relatedKeysWithoutQuotesKey(tweet: Tweet): Seq[TweetCountKey] = - relatedKeys(tweet).filterNot(_.isInstanceOf[QuotesKey]) - - def apply(countsStore: CachedCountsStore): TweetCountsCacheUpdatingStore = { - val incr: Action = key => countsStore.incr(key, 1) - val decr: Action = key => countsStore.incr(key, -1) - val init: Action = key => countsStore.add(key, 0) - val delete: Action = key => countsStore.delete(key) - - def initCounts(tweetId: TweetId) = Future.join(keys(tweetId).map(init)) - def incrRelatedCounts(tweet: Tweet, excludeQuotesKey: Boolean = false) = { - Future.join { - if (excludeQuotesKey) { - relatedKeysWithoutQuotesKey(tweet).map(incr) - } else { - relatedKeys(tweet).map(incr) - } - } - } - def deleteCounts(tweetId: TweetId) = Future.join(keys(tweetId).map(delete)) - - // Decrement all the counters if is the last quote, otherwise avoid decrementing quote counters - def decrRelatedCounts(tweet: Tweet, isLastQuoteOfQuoter: Boolean = false) = { - Future.join { - if (isLastQuoteOfQuoter) { - relatedKeys(tweet).map(decr) - } else { - relatedKeysWithoutQuotesKey(tweet).map(decr) - } - } - } - - def updateFavCount(tweetId: TweetId, delta: Int) = - countsStore.incr(FavsKey(tweetId), delta).unit - - def updateBookmarkCount(tweetId: TweetId, delta: Int) = - countsStore.incr(BookmarksKey(tweetId), delta).unit - - // these are use specifically for setRetweetVisibility - def incrRetweetCount(tweetId: TweetId) = incr(RetweetsKey(tweetId)) - def decrRetweetCount(tweetId: TweetId) = decr(RetweetsKey(tweetId)) - - new TweetCountsCacheUpdatingStore { - override val insertTweet: FutureEffect[InsertTweet.Event] = - FutureEffect[InsertTweet.Event](e => initCounts(e.tweet.id)) - - override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = - FutureEffect[AsyncInsertTweet.Event] { e => - incrRelatedCounts(e.cachedTweet.tweet, e.quoterHasAlreadyQuotedTweet) - } - - override val retryAsyncInsertTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncInsertTweet.Event] - ] = - FutureEffect.unit[TweetStoreRetryEvent[AsyncInsertTweet.Event]] - - override val replicatedInsertTweet: FutureEffect[ReplicatedInsertTweet.Event] = - FutureEffect[ReplicatedInsertTweet.Event] { e => - Future - .join( - initCounts(e.tweet.id), - incrRelatedCounts(e.tweet, e.quoterHasAlreadyQuotedTweet)).unit - } - - override val deleteTweet: FutureEffect[DeleteTweet.Event] = - FutureEffect[DeleteTweet.Event](e => deleteCounts(e.tweet.id)) - - override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = - FutureEffect[AsyncDeleteTweet.Event](e => decrRelatedCounts(e.tweet, e.isLastQuoteOfQuoter)) - - override val retryAsyncDeleteTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncDeleteTweet.Event] - ] = - FutureEffect.unit[TweetStoreRetryEvent[AsyncDeleteTweet.Event]] - - override val replicatedDeleteTweet: FutureEffect[ReplicatedDeleteTweet.Event] = - FutureEffect[ReplicatedDeleteTweet.Event] { e => - Future - .join(deleteCounts(e.tweet.id), decrRelatedCounts(e.tweet, e.isLastQuoteOfQuoter)).unit - } - - override val undeleteTweet: FutureEffect[UndeleteTweet.Event] = - FutureEffect[UndeleteTweet.Event] { e => - incrRelatedCounts(e.tweet, e.quoterHasAlreadyQuotedTweet) - } - - override val replicatedUndeleteTweet: FutureEffect[ReplicatedUndeleteTweet.Event] = - FutureEffect[ReplicatedUndeleteTweet.Event] { e => - incrRelatedCounts(e.tweet, e.quoterHasAlreadyQuotedTweet) - } - - override val asyncIncrFavCount: FutureEffect[AsyncIncrFavCount.Event] = - FutureEffect[AsyncIncrFavCount.Event](e => updateFavCount(e.tweetId, e.delta)) - - override val replicatedIncrFavCount: FutureEffect[ReplicatedIncrFavCount.Event] = - FutureEffect[ReplicatedIncrFavCount.Event](e => updateFavCount(e.tweetId, e.delta)) - - override val asyncIncrBookmarkCount: FutureEffect[AsyncIncrBookmarkCount.Event] = - FutureEffect[AsyncIncrBookmarkCount.Event](e => updateBookmarkCount(e.tweetId, e.delta)) - - override val replicatedIncrBookmarkCount: FutureEffect[ReplicatedIncrBookmarkCount.Event] = - FutureEffect[ReplicatedIncrBookmarkCount.Event] { e => - updateBookmarkCount(e.tweetId, e.delta) - } - - override val asyncSetRetweetVisibility: FutureEffect[AsyncSetRetweetVisibility.Event] = - FutureEffect[AsyncSetRetweetVisibility.Event] { e => - if (e.visible) incrRetweetCount(e.srcId) else decrRetweetCount(e.srcId) - } - - override val retryAsyncSetRetweetVisibility: FutureEffect[ - TweetStoreRetryEvent[AsyncSetRetweetVisibility.Event] - ] = - FutureEffect.unit[TweetStoreRetryEvent[AsyncSetRetweetVisibility.Event]] - - override val replicatedSetRetweetVisibility: FutureEffect[ - ReplicatedSetRetweetVisibility.Event - ] = - FutureEffect[ReplicatedSetRetweetVisibility.Event] { e => - if (e.visible) incrRetweetCount(e.srcId) else decrRetweetCount(e.srcId) - } - - override val flush: FutureEffect[Flush.Event] = - FutureEffect[Flush.Event] { e => Future.collect(e.tweetIds.map(deleteCounts)).unit } - .onlyIf(_.flushCounts) - } - } -} - -/** - * A simple trait around the cache operations needed by TweetCountsCacheUpdatingStore. - */ -trait CachedCountsStore { - def add(key: TweetCountKey, count: Count): Future[Unit] - def delete(key: TweetCountKey): Future[Unit] - def incr(key: TweetCountKey, delta: Count): Future[Unit] -} - -object CachedCountsStore { - def fromLockingCache(cache: LockingCache[TweetCountKey, Cached[Count]]): CachedCountsStore = - new CachedCountsStore { - def add(key: TweetCountKey, count: Count): Future[Unit] = - cache.add(key, toCached(count)).unit - - def delete(key: TweetCountKey): Future[Unit] = - cache.delete(key).unit - - def incr(key: TweetCountKey, delta: Count): Future[Unit] = - cache.lockAndSet(key, IncrDecrHandler(delta)).unit - } - - def toCached(count: Count): Cached[Count] = { - val now = Time.now - Cached(Some(count), CachedValueStatus.Found, now, Some(now)) - } - - case class IncrDecrHandler(delta: Long) extends Handler[Cached[Count]] { - override def apply(inCache: Option[Cached[Count]]): Option[Cached[Count]] = - inCache.flatMap(incrCount) - - private[this] def incrCount(oldCached: Cached[Count]): Option[Cached[Count]] = { - oldCached.value.map { oldCount => oldCached.copy(value = Some(saferIncr(oldCount))) } - } - - private[this] def saferIncr(value: Long) = math.max(0, value + delta) - - override lazy val toString: String = "IncrDecrHandler(%s)".format(delta) - } - - object QueueIsFullException extends Exception -} - -/** - * An implementation of CachedCountsStore that can queue and aggregate multiple incr - * updates to the same key together. Currently, updates for a key only start to aggregate - * after there is a failure to incr on the underlying store, which often indicates contention - * due to a high level of updates. After a failure, a key is promoted into a "tracked" state, - * and subsequent updates are aggregated together. Periodically, the aggregated updates will - * be flushed. If the flush for a key succeeds and no more updates have come in during the flush, - * then the key is demoted out of the tracked state. Otherwise, updates continue to aggregate - * until the next flush attempt. - */ -class AggregatingCachedCountsStore( - underlying: CachedCountsStore, - timer: Timer, - flushInterval: Duration, - maxSize: Int, - stats: StatsReceiver) - extends CachedCountsStore - with Serialized { - private[this] val pendingUpdates: mutable.Map[TweetCountKey, Count] = - new mutable.HashMap[TweetCountKey, Count] - - private[this] var trackingCount: Int = 0 - - private[this] val promotionCounter = stats.counter("promotions") - private[this] val demotionCounter = stats.counter("demotions") - private[this] val updateCounter = stats.counter("aggregated_updates") - private[this] val overflowCounter = stats.counter("overflows") - private[this] val flushFailureCounter = stats.counter("flush_failures") - private[this] val trackingCountGauge = stats.addGauge("tracking")(trackingCount.toFloat) - - timer.schedule(flushInterval) { flush() } - - def add(key: TweetCountKey, count: Count): Future[Unit] = - underlying.add(key, count) - - def delete(key: TweetCountKey): Future[Unit] = - underlying.delete(key) - - def incr(key: TweetCountKey, delta: Count): Future[Unit] = - aggregateIfTracked(key, delta).flatMap { - case true => Future.Unit - case false => - underlying - .incr(key, delta) - .rescue { case _ => aggregate(key, delta) } - } - - /** - * Queues an update to be aggregated and applied to a key at a later time, but only if we are - * already aggregating updates for the key. - * - * @return true the delta was aggregated, false if the key is not being tracked - * and the incr should be attempted directly. - */ - private[this] def aggregateIfTracked(key: TweetCountKey, delta: Count): Future[Boolean] = - serialized { - pendingUpdates.get(key) match { - case None => false - case Some(current) => - updateCounter.incr() - pendingUpdates(key) = current + delta - true - } - } - - /** - * Queues an update to be aggregated and applied to a key at a later time. - */ - private[this] def aggregate(key: TweetCountKey, delta: Count): Future[Unit] = - serialized { - val alreadyTracked = pendingUpdates.contains(key) - - if (!alreadyTracked) { - if (pendingUpdates.size < maxSize) - promotionCounter.incr() - else { - overflowCounter.incr() - throw CachedCountsStore.QueueIsFullException - } - } - - (pendingUpdates.get(key).getOrElse(0L) + delta) match { - case 0 => - pendingUpdates.remove(key) - demotionCounter.incr() - - case aggregatedDelta => - pendingUpdates(key) = aggregatedDelta - } - - trackingCount = pendingUpdates.size - } - - private[this] def flush(): Future[Unit] = { - for { - // make a copy of the updates to flush, so that updates can continue to be queued - // while the flush is in progress. if an individual flush succeeds, then we - // go back and update pendingUpdates. - updates <- serialized { pendingUpdates.toSeq.toList } - () <- Future.join(for ((key, delta) <- updates) yield flush(key, delta)) - } yield () - } - - private[this] def flush(key: TweetCountKey, delta: Count): Future[Unit] = - underlying - .incr(key, delta) - .flatMap(_ => aggregate(key, -delta)) - .handle { case ex => flushFailureCounter.incr() } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetEventBusStore.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetEventBusStore.docx new file mode 100644 index 000000000..3f1aed535 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetEventBusStore.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetEventBusStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetEventBusStore.scala deleted file mode 100644 index e846c01ea..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetEventBusStore.scala +++ /dev/null @@ -1,209 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.tweetypie.thriftscala._ - -trait TweetEventBusStore - extends TweetStoreBase[TweetEventBusStore] - with AsyncDeleteAdditionalFields.Store - with AsyncDeleteTweet.Store - with AsyncInsertTweet.Store - with AsyncSetAdditionalFields.Store - with AsyncTakedown.Store - with AsyncUndeleteTweet.Store - with AsyncUpdatePossiblySensitiveTweet.Store - with QuotedTweetDelete.Store - with QuotedTweetTakedown.Store - with ScrubGeoUpdateUserTimestamp.Store - with ScrubGeo.Store { self => - def wrap(w: TweetStore.Wrap): TweetEventBusStore = - new TweetStoreWrapper(w, this) - with TweetEventBusStore - with AsyncDeleteAdditionalFields.StoreWrapper - with AsyncDeleteTweet.StoreWrapper - with AsyncInsertTweet.StoreWrapper - with AsyncSetAdditionalFields.StoreWrapper - with AsyncTakedown.StoreWrapper - with AsyncUndeleteTweet.StoreWrapper - with AsyncUpdatePossiblySensitiveTweet.StoreWrapper - with QuotedTweetDelete.StoreWrapper - with QuotedTweetTakedown.StoreWrapper - with ScrubGeo.StoreWrapper - with ScrubGeoUpdateUserTimestamp.StoreWrapper - - def inParallel(that: TweetEventBusStore): TweetEventBusStore = - new TweetEventBusStore { - override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = - self.asyncInsertTweet.inParallel(that.asyncInsertTweet) - override val asyncDeleteAdditionalFields: FutureEffect[AsyncDeleteAdditionalFields.Event] = - self.asyncDeleteAdditionalFields.inParallel(that.asyncDeleteAdditionalFields) - override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = - self.asyncDeleteTweet.inParallel(that.asyncDeleteTweet) - override val asyncSetAdditionalFields: FutureEffect[AsyncSetAdditionalFields.Event] = - self.asyncSetAdditionalFields.inParallel(that.asyncSetAdditionalFields) - override val asyncTakedown: FutureEffect[AsyncTakedown.Event] = - self.asyncTakedown.inParallel(that.asyncTakedown) - override val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = - self.asyncUndeleteTweet.inParallel(that.asyncUndeleteTweet) - override val asyncUpdatePossiblySensitiveTweet: FutureEffect[ - AsyncUpdatePossiblySensitiveTweet.Event - ] = - self.asyncUpdatePossiblySensitiveTweet.inParallel(that.asyncUpdatePossiblySensitiveTweet) - override val quotedTweetDelete: FutureEffect[QuotedTweetDelete.Event] = - self.quotedTweetDelete.inParallel(that.quotedTweetDelete) - override val quotedTweetTakedown: FutureEffect[QuotedTweetTakedown.Event] = - self.quotedTweetTakedown.inParallel(that.quotedTweetTakedown) - override val retryAsyncInsertTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncInsertTweet.Event] - ] = - self.retryAsyncInsertTweet.inParallel(that.retryAsyncInsertTweet) - override val retryAsyncDeleteAdditionalFields: FutureEffect[ - TweetStoreRetryEvent[AsyncDeleteAdditionalFields.Event] - ] = - self.retryAsyncDeleteAdditionalFields.inParallel(that.retryAsyncDeleteAdditionalFields) - override val retryAsyncDeleteTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncDeleteTweet.Event] - ] = - self.retryAsyncDeleteTweet.inParallel(that.retryAsyncDeleteTweet) - override val retryAsyncUndeleteTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncUndeleteTweet.Event] - ] = - self.retryAsyncUndeleteTweet.inParallel(that.retryAsyncUndeleteTweet) - override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncUpdatePossiblySensitiveTweet.Event] - ] = - self.retryAsyncUpdatePossiblySensitiveTweet.inParallel( - that.retryAsyncUpdatePossiblySensitiveTweet - ) - override val retryAsyncSetAdditionalFields: FutureEffect[ - TweetStoreRetryEvent[AsyncSetAdditionalFields.Event] - ] = - self.retryAsyncSetAdditionalFields.inParallel(that.retryAsyncSetAdditionalFields) - override val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[AsyncTakedown.Event]] = - self.retryAsyncTakedown.inParallel(that.retryAsyncTakedown) - override val scrubGeo: FutureEffect[ScrubGeo.Event] = - self.scrubGeo.inParallel(that.scrubGeo) - override val scrubGeoUpdateUserTimestamp: FutureEffect[ScrubGeoUpdateUserTimestamp.Event] = - self.scrubGeoUpdateUserTimestamp.inParallel(that.scrubGeoUpdateUserTimestamp) - } -} - -object TweetEventBusStore { - val Action: AsyncWriteAction = AsyncWriteAction.EventBusEnqueue - - def safetyTypeForUser(user: User): Option[SafetyType] = - user.safety.map(userSafetyToSafetyType) - - def userSafetyToSafetyType(safety: Safety): SafetyType = - if (safety.isProtected) { - SafetyType.Private - } else if (safety.suspended) { - SafetyType.Restricted - } else { - SafetyType.Public - } - - def apply( - eventStore: FutureEffect[TweetEvent] - ): TweetEventBusStore = { - - def toTweetEvents(event: TweetStoreTweetEvent): Seq[TweetEvent] = - event.toTweetEventData.map { data => - TweetEvent( - data, - TweetEventFlags( - timestampMs = event.timestamp.inMillis, - safetyType = event.optUser.flatMap(safetyTypeForUser) - ) - ) - } - - def enqueueEvents[E <: TweetStoreTweetEvent]: FutureEffect[E] = - eventStore.liftSeq.contramap[E](toTweetEvents) - - new TweetEventBusStore { - override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = - enqueueEvents[AsyncInsertTweet.Event] - - override val asyncDeleteAdditionalFields: FutureEffect[AsyncDeleteAdditionalFields.Event] = - enqueueEvents[AsyncDeleteAdditionalFields.Event] - - override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = - enqueueEvents[AsyncDeleteTweet.Event] - - override val asyncSetAdditionalFields: FutureEffect[AsyncSetAdditionalFields.Event] = - enqueueEvents[AsyncSetAdditionalFields.Event] - - override val asyncTakedown: FutureEffect[AsyncTakedown.Event] = - enqueueEvents[AsyncTakedown.Event] - .onlyIf(_.eventbusEnqueue) - - override val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = - enqueueEvents[AsyncUndeleteTweet.Event] - - override val asyncUpdatePossiblySensitiveTweet: FutureEffect[ - AsyncUpdatePossiblySensitiveTweet.Event - ] = - enqueueEvents[AsyncUpdatePossiblySensitiveTweet.Event] - - override val quotedTweetDelete: FutureEffect[QuotedTweetDelete.Event] = - enqueueEvents[QuotedTweetDelete.Event] - - override val quotedTweetTakedown: FutureEffect[QuotedTweetTakedown.Event] = - enqueueEvents[QuotedTweetTakedown.Event] - - override val retryAsyncInsertTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncInsertTweet.Event] - ] = - TweetStore.retry(Action, asyncInsertTweet) - - override val retryAsyncDeleteAdditionalFields: FutureEffect[ - TweetStoreRetryEvent[AsyncDeleteAdditionalFields.Event] - ] = - TweetStore.retry(Action, asyncDeleteAdditionalFields) - - override val retryAsyncDeleteTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncDeleteTweet.Event] - ] = - TweetStore.retry(Action, asyncDeleteTweet) - - override val retryAsyncUndeleteTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncUndeleteTweet.Event] - ] = - TweetStore.retry(Action, asyncUndeleteTweet) - - override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncUpdatePossiblySensitiveTweet.Event] - ] = - TweetStore.retry(Action, asyncUpdatePossiblySensitiveTweet) - - override val retryAsyncSetAdditionalFields: FutureEffect[ - TweetStoreRetryEvent[AsyncSetAdditionalFields.Event] - ] = - TweetStore.retry(Action, asyncSetAdditionalFields) - - override val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[AsyncTakedown.Event]] = - TweetStore.retry(Action, asyncTakedown) - - override val scrubGeo: FutureEffect[ScrubGeo.Event] = - enqueueEvents[ScrubGeo.Event] - - override val scrubGeoUpdateUserTimestamp: FutureEffect[ScrubGeoUpdateUserTimestamp.Event] = - enqueueEvents[ScrubGeoUpdateUserTimestamp.Event] - } - } -} - -/** - * Scrubs inappropriate fields from tweet events before publishing. - */ -object TweetEventDataScrubber { - def scrub(tweet: Tweet): Tweet = - tweet.copy( - cards = None, - card2 = None, - media = tweet.media.map(_.map { mediaEntity => mediaEntity.copy(extensionsReply = None) }), - previousCounts = None, - editPerspective = None - ) -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetIndexingStore.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetIndexingStore.docx new file mode 100644 index 000000000..bc8e830ef Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetIndexingStore.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetIndexingStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetIndexingStore.scala deleted file mode 100644 index 648e9a17c..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetIndexingStore.scala +++ /dev/null @@ -1,65 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.tweetypie.tflock.TweetIndexer -import com.twitter.tweetypie.thriftscala._ - -trait TweetIndexingStore - extends TweetStoreBase[TweetIndexingStore] - with AsyncInsertTweet.Store - with AsyncDeleteTweet.Store - with AsyncUndeleteTweet.Store - with AsyncSetRetweetVisibility.Store { - def wrap(w: TweetStore.Wrap): TweetIndexingStore = - new TweetStoreWrapper(w, this) - with TweetIndexingStore - with AsyncInsertTweet.StoreWrapper - with AsyncDeleteTweet.StoreWrapper - with AsyncUndeleteTweet.StoreWrapper - with AsyncSetRetweetVisibility.StoreWrapper -} - -/** - * A TweetStore that sends indexing updates to a TweetIndexer. - */ -object TweetIndexingStore { - val Action: AsyncWriteAction.TweetIndex.type = AsyncWriteAction.TweetIndex - - def apply(indexer: TweetIndexer): TweetIndexingStore = - new TweetIndexingStore { - override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = - FutureEffect[AsyncInsertTweet.Event](event => indexer.createIndex(event.tweet)) - - override val retryAsyncInsertTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncInsertTweet.Event] - ] = - TweetStore.retry(Action, asyncInsertTweet) - - override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = - FutureEffect[AsyncDeleteTweet.Event](event => - indexer.deleteIndex(event.tweet, event.isBounceDelete)) - - override val retryAsyncDeleteTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncDeleteTweet.Event] - ] = - TweetStore.retry(Action, asyncDeleteTweet) - - override val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = - FutureEffect[AsyncUndeleteTweet.Event](event => indexer.undeleteIndex(event.tweet)) - - override val retryAsyncUndeleteTweet: FutureEffect[ - TweetStoreRetryEvent[AsyncUndeleteTweet.Event] - ] = - TweetStore.retry(Action, asyncUndeleteTweet) - - override val asyncSetRetweetVisibility: FutureEffect[AsyncSetRetweetVisibility.Event] = - FutureEffect[AsyncSetRetweetVisibility.Event] { event => - indexer.setRetweetVisibility(event.retweetId, event.visible) - } - - override val retryAsyncSetRetweetVisibility: FutureEffect[ - TweetStoreRetryEvent[AsyncSetRetweetVisibility.Event] - ] = - TweetStore.retry(Action, asyncSetRetweetVisibility) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStatsStore.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStatsStore.docx new file mode 100644 index 000000000..01c4fb41a Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStatsStore.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStatsStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStatsStore.scala deleted file mode 100644 index 23f6f5124..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStatsStore.scala +++ /dev/null @@ -1,64 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.finagle.stats.RollupStatsReceiver -import com.twitter.servo.util.MemoizingStatsReceiver - -/** - * Records some stats about inserted tweets. Tweets are currently classified by three criteria: - * - * - tweet type: "tweet" or "retweet" - * - user type: "stresstest", "protected", "restricted", or "public" - * - fanout type: "nullcast", "narrowcast", or "usertimeline" - * - * A counter is incremented for a tweet using those three criteria in order. Counters are - * created with a RollupStatsReceiver, so counts are aggregated at each level. Some - * example counters are: - * - * ./insert - * ./insert/tweet - * ./insert/tweet/public - * ./insert/tweet/protected/usertimeline - * ./insert/retweet/stresstest - * ./insert/retweet/public/nullcast - */ -trait TweetStatsStore extends TweetStoreBase[TweetStatsStore] with InsertTweet.Store { - def wrap(w: TweetStore.Wrap): TweetStatsStore = - new TweetStoreWrapper(w, this) with TweetStatsStore with InsertTweet.StoreWrapper -} - -object TweetStatsStore { - def apply(stats: StatsReceiver): TweetStatsStore = { - val rollup = new MemoizingStatsReceiver(new RollupStatsReceiver(stats)) - val inserts = rollup.scope("insert") - - def tweetType(tweet: Tweet) = - if (getShare(tweet).isDefined) "retweet" else "tweet" - - def userType(user: User) = - if (user.roles.exists(_.roles.contains("stresstest"))) "stresstest" - else if (user.safety.exists(_.isProtected)) "protected" - else if (user.safety.exists(_.suspended)) "restricted" - else "public" - - def fanoutType(tweet: Tweet) = - if (TweetLenses.nullcast(tweet)) "nullcast" - else if (TweetLenses.narrowcast(tweet).isDefined) "narrowcast" - else "usertimeline" - - new TweetStatsStore { - override val insertTweet: FutureEffect[InsertTweet.Event] = - FutureEffect[InsertTweet.Event] { event => - inserts - .counter( - tweetType(event.tweet), - userType(event.user), - fanoutType(event.tweet) - ) - .incr() - - Future.Unit - } - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStore.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStore.docx new file mode 100644 index 000000000..739d2a2d5 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStore.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStore.scala deleted file mode 100644 index 62a668681..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStore.scala +++ /dev/null @@ -1,292 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.finagle.service.RetryPolicy -import com.twitter.finagle.stats.Stat -import com.twitter.servo.util.RetryHandler -import com.twitter.tweetypie.thriftscala._ -import com.twitter.util.Timer - -object TweetStore { - // Using the old-school c.t.logging.Logger here as this log is only used by - // servo.FutureEffect's trackOutcome method, which needs that kind of logger. - val log: com.twitter.logging.Logger = com.twitter.logging.Logger(getClass) - - /** - * Adapts a tweet store on a specific TweetStoreEvent type to one that handles - * TweetStoreRetryEvents of that type that match the given AsyncWriteAction. - */ - def retry[T <: AsyncTweetStoreEvent]( - action: AsyncWriteAction, - store: FutureEffect[T] - ): FutureEffect[TweetStoreRetryEvent[T]] = - store.contramap[TweetStoreRetryEvent[T]](_.event).onlyIf(_.action == action) - - /** - * Defines an abstract polymorphic operation to be applied to FutureEffects over any - * TweetStoreEvent type. The Wrap operation is defined over all possible - * FutureEffect[E <: TweetStoreEvent] types. - */ - trait Wrap { - def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] - } - - /** - * A Wrap operation that applies standardized metrics collection to the FutureEffect. - */ - case class Tracked(stats: StatsReceiver) extends Wrap { - def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] = - FutureEffect[E] { event => - Stat.timeFuture(stats.scope(event.name).stat("latency_ms")) { - handler(event) - } - }.trackOutcome(stats, _.name, log) - } - - /** - * A Wrap operation that makes the FutureEffect enabled according to the given gate. - */ - case class Gated(gate: Gate[Unit]) extends Wrap { - def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] = - handler.enabledBy(gate) - } - - /** - * A Wrap operation that updates the FutureEffect to ignore failures. - */ - object IgnoreFailures extends Wrap { - def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] = - handler.ignoreFailures - } - - /** - * A Wrap operation that updates the FutureEffect to ignore failures upon completion. - */ - object IgnoreFailuresUponCompletion extends Wrap { - def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] = - handler.ignoreFailuresUponCompletion - } - - /** - * A Wrap operation that applies a RetryHandler to FutureEffects. - */ - case class Retry(retryHandler: RetryHandler[Unit]) extends Wrap { - def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] = - handler.retry(retryHandler) - } - - /** - * A Wrap operation that applies a RetryHandler to FutureEffects. - */ - case class ReplicatedEventRetry(retryHandler: RetryHandler[Unit]) extends Wrap { - def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] = - FutureEffect[E] { event => - event.retryStrategy match { - case TweetStoreEvent.ReplicatedEventLocalRetry => handler.retry(retryHandler)(event) - case _ => handler(event) - } - } - } - - /** - * A Wrap operation that configures async-retry behavior to async-write events. - */ - class AsyncRetry( - localRetryPolicy: RetryPolicy[Try[Nothing]], - enqueueRetryPolicy: RetryPolicy[Try[Nothing]], - timer: Timer, - tweetService: ThriftTweetService, - scribe: FutureEffect[FailedAsyncWrite] - )( - stats: StatsReceiver, - action: AsyncWriteAction) - extends Wrap { - - override def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] = - FutureEffect[E] { event => - event.retryStrategy match { - case TweetStoreEvent.EnqueueAsyncRetry(enqueueRetry) => - enqueueAsyncRetry(handler, enqueueRetry)(event) - - case TweetStoreEvent.LocalRetryThenScribeFailure(toFailedAsyncWrite) => - localRetryThenScribeFailure(handler, toFailedAsyncWrite)(event) - - case _ => - handler(event) - } - } - - private def enqueueAsyncRetry[E <: TweetStoreEvent]( - handler: FutureEffect[E], - enqueueRetry: (ThriftTweetService, AsyncWriteAction) => Future[Unit] - ): FutureEffect[E] = { - val retryInitCounter = stats.counter("retries_initiated") - - // enqueues failed TweetStoreEvents to the deferredrpc-backed tweetService - // to be retried. this store uses the enqueueRetryPolicy to retry the enqueue - // attempts in the case of deferredrpc application failures. - val enqueueRetryHandler = - FutureEffect[E](_ => enqueueRetry(tweetService, action)) - .retry(RetryHandler.failuresOnly(enqueueRetryPolicy, timer, stats.scope("enqueue_retry"))) - - handler.rescue { - case ex => - TweetStore.log.warning(ex, s"will retry $action") - retryInitCounter.incr() - enqueueRetryHandler - } - } - - private def localRetryThenScribeFailure[E <: TweetStoreEvent]( - handler: FutureEffect[E], - toFailedAsyncWrite: AsyncWriteAction => FailedAsyncWrite - ): FutureEffect[E] = { - val exhaustedCounter = stats.counter("retries_exhausted") - - // scribe events that failed after exhausting all retries - val scribeEventHandler = - FutureEffect[E](_ => scribe(toFailedAsyncWrite(action))) - - // wraps `handle` with a retry policy to retry failures with a backoff. if we exhaust - // all retries, then we pass the event to `scribeEventStore` to scribe the failure. - handler - .retry(RetryHandler.failuresOnly(localRetryPolicy, timer, stats)) - .rescue { - case ex => - TweetStore.log.warning(ex, s"exhausted retries on $action") - exhaustedCounter.incr() - scribeEventHandler - } - } - } - - /** - * Parent trait for defining a "module" that defines a TweetStoreEvent type and corresponding - * TweetStore and TweetStoreWrapper types. - */ - sealed trait Module { - type Store - type StoreWrapper <: Store - } - - /** - * Parent trait for defining a "module" that defines a sync TweetStoreEvent. - */ - trait SyncModule extends Module { - type Event <: SyncTweetStoreEvent - } - - /** - * Parent trait for defining a "module" that defines an async TweetStoreEvent and a - * TweetStoreRetryEvent. - */ - trait AsyncModule extends Module { - type Event <: AsyncTweetStoreEvent - type RetryEvent <: TweetStoreRetryEvent[Event] - } - - /** - * Parent trait for defining a "module" that defines a replicated TweetStoreEvent. - */ - trait ReplicatedModule extends Module { - type Event <: ReplicatedTweetStoreEvent - } -} - -/** - * Trait for TweetStore implementations that support handler wrapping. - */ -trait TweetStoreBase[Self] { - import TweetStore._ - - /** - * Returns a new store of type Self with Wrap applied to each event handler in this instance. - */ - def wrap(w: Wrap): Self - - /** - * Applies the Tracked Wrap operation to the store. - */ - def tracked(stats: StatsReceiver): Self = wrap(Tracked(stats)) - - /** - * Applies the Gated Wrap operation to the store. - */ - def enabledBy(gate: Gate[Unit]): Self = wrap(Gated(gate)) - - /** - * Applies the IgnoreFailures Wrap operation to the store. - */ - def ignoreFailures: Self = wrap(IgnoreFailures) - - /** - * Applies the IgnoreFailuresUponCompletion Wrap operation to the store. - */ - def ignoreFailuresUponCompletion: Self = wrap(IgnoreFailuresUponCompletion) - - /** - * Applies a RetryHandler to each event handler. - */ - def retry(retryHandler: RetryHandler[Unit]): Self = wrap(Retry(retryHandler)) - - /** - * Applies a RetryHandler to replicated event handlers. - */ - def replicatedRetry(retryHandler: RetryHandler[Unit]): Self = - wrap(ReplicatedEventRetry(retryHandler)) - - /** - * Applies the AsyncRetryConfig Wrap operation to the store. - */ - def asyncRetry(cfg: AsyncRetry): Self = wrap(cfg) -} - -/** - * An abstract base class for tweet store instances that wrap another tweet store instance. - * You can mix event-specific store wrapper traits into this class to automatically - * have the event-specific handlers wrapped. - */ -abstract class TweetStoreWrapper[+T]( - protected val wrap: TweetStore.Wrap, - protected val underlying: T) - -/** - * A TweetStore that has a handler for all possible TweetStoreEvents. - */ -trait TotalTweetStore - extends AsyncDeleteAdditionalFields.Store - with AsyncDeleteTweet.Store - with AsyncIncrBookmarkCount.Store - with AsyncIncrFavCount.Store - with AsyncInsertTweet.Store - with AsyncSetAdditionalFields.Store - with AsyncSetRetweetVisibility.Store - with AsyncTakedown.Store - with AsyncUndeleteTweet.Store - with AsyncUpdatePossiblySensitiveTweet.Store - with DeleteAdditionalFields.Store - with DeleteTweet.Store - with Flush.Store - with IncrBookmarkCount.Store - with IncrFavCount.Store - with InsertTweet.Store - with QuotedTweetDelete.Store - with QuotedTweetTakedown.Store - with ReplicatedDeleteAdditionalFields.Store - with ReplicatedDeleteTweet.Store - with ReplicatedIncrBookmarkCount.Store - with ReplicatedIncrFavCount.Store - with ReplicatedInsertTweet.Store - with ReplicatedScrubGeo.Store - with ReplicatedSetAdditionalFields.Store - with ReplicatedSetRetweetVisibility.Store - with ReplicatedTakedown.Store - with ReplicatedUndeleteTweet.Store - with ReplicatedUpdatePossiblySensitiveTweet.Store - with ScrubGeo.Store - with ScrubGeoUpdateUserTimestamp.Store - with SetAdditionalFields.Store - with SetRetweetVisibility.Store - with Takedown.Store - with UndeleteTweet.Store - with UpdatePossiblySensitiveTweet.Store diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStoreEvent.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStoreEvent.docx new file mode 100644 index 000000000..d37d06bc7 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStoreEvent.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStoreEvent.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStoreEvent.scala deleted file mode 100644 index 987668d6f..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStoreEvent.scala +++ /dev/null @@ -1,144 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.finagle.tracing.Trace -import com.twitter.tweetypie.store.TweetStoreEvent.RetryStrategy -import com.twitter.tweetypie.thriftscala._ - -object TweetStoreEvent { - - /** - * Parent trait for indicating what type of retry strategy to apply to event handlers - * for the corresponding event type. Different classes of events use different strategies. - */ - sealed trait RetryStrategy - - /** - * Indicates that the event type doesn't support retries. - */ - case object NoRetry extends RetryStrategy - - /** - * Indicates that if an event handler encounters a failure, it should enqueue a - * retry to be performed asynchronously. - */ - case class EnqueueAsyncRetry(enqueueRetry: (ThriftTweetService, AsyncWriteAction) => Future[Unit]) - extends RetryStrategy - - /** - * Indicates that if an event handler encounters a failure, it should retry - * the event locally some number of times, before eventually given up and scribing - * the failure. - */ - case class LocalRetryThenScribeFailure(toFailedAsyncWrite: AsyncWriteAction => FailedAsyncWrite) - extends RetryStrategy - - /** - * Indicates that if an event handler encounters a failure, it should retry - * the event locally some number of times. - */ - case object ReplicatedEventLocalRetry extends RetryStrategy -} - -/** - * The abstract parent class for all TweetStoreEvent types. - */ -sealed trait TweetStoreEvent { - val name: String - - val traceId: Long = Trace.id.traceId.toLong - - /** - * Indicates a particular retry behavior that should be applied to event handlers for - * the corresponding event type. The specifics of the strategy might depend upon the - * specific TweetStore implementation. - */ - def retryStrategy: RetryStrategy -} - -abstract class SyncTweetStoreEvent(val name: String) extends TweetStoreEvent { - override def retryStrategy: RetryStrategy = TweetStoreEvent.NoRetry -} - -abstract class AsyncTweetStoreEvent(val name: String) extends TweetStoreEvent { - def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] - - override def retryStrategy: RetryStrategy = TweetStoreEvent.EnqueueAsyncRetry(enqueueRetry) -} - -abstract class ReplicatedTweetStoreEvent(val name: String) extends TweetStoreEvent { - override def retryStrategy: RetryStrategy = TweetStoreEvent.ReplicatedEventLocalRetry -} - -/** - * A trait for all TweetStoreEvents that become TweetEvents. - */ -trait TweetStoreTweetEvent { - val timestamp: Time - - val optUser: Option[User] - - /** - * Most TweetStoreTweetEvents map to a single TweetEvent, but some - * optionally map to an event and others map to multiple events, so - * this method needs to return a Seq of TweetEventData. - */ - def toTweetEventData: Seq[TweetEventData] -} - -/** - * The abstract parent class for an event that indicates a particular action - * for a particular event that needs to be retried via the async-write-retrying mechanism. - */ -abstract class TweetStoreRetryEvent[E <: AsyncTweetStoreEvent] extends TweetStoreEvent { - override val name = "async_write_retry" - - def action: AsyncWriteAction - def event: E - - def eventType: AsyncWriteEventType - - def scribedTweetOnFailure: Option[Tweet] - - override def retryStrategy: RetryStrategy = - TweetStoreEvent.LocalRetryThenScribeFailure(action => - FailedAsyncWrite(eventType, action, scribedTweetOnFailure)) -} - -/** - * Functions as a disjunction between an event type E and it's corresonding - * retry event type TweetStoreRetryEvent[E] - */ -case class TweetStoreEventOrRetry[E <: AsyncTweetStoreEvent]( - event: E, - toRetry: Option[TweetStoreRetryEvent[E]]) { - def toInitial: Option[E] = if (retryAction.isDefined) None else Some(event) - def retryAction: Option[RetryStrategy] = toRetry.map(_.retryStrategy) - def hydrate(f: E => Future[E]): Future[TweetStoreEventOrRetry[E]] = - f(event).map(e => copy(event = e)) -} - -object TweetStoreEventOrRetry { - def apply[E <: AsyncTweetStoreEvent, R <: TweetStoreRetryEvent[E]]( - event: E, - retryAction: Option[AsyncWriteAction], - toRetryEvent: (AsyncWriteAction, E) => R - ): TweetStoreEventOrRetry[E] = - TweetStoreEventOrRetry(event, retryAction.map(action => toRetryEvent(action, event))) - - object First { - - /** matches against TweetStoreEventOrRetry instances for an initial event */ - def unapply[E <: AsyncTweetStoreEvent](it: TweetStoreEventOrRetry[E]): Option[E] = - it.toInitial - } - - object Retry { - - /** matches against TweetStoreEventOrRetry instances for a retry event */ - def unapply[E <: AsyncTweetStoreEvent]( - it: TweetStoreEventOrRetry[E] - ): Option[TweetStoreRetryEvent[E]] = - it.toRetry - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetUpdate.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetUpdate.docx new file mode 100644 index 000000000..9a2054b27 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetUpdate.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetUpdate.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetUpdate.scala deleted file mode 100644 index 8e031fc46..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetUpdate.scala +++ /dev/null @@ -1,41 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.tweetypie.thriftscala._ - -object TweetUpdate { - - /** - * Copies takedown information from the source [[Tweet]] into [[CachedTweet]]. - * - * Note that this method requires the source [[Tweet]] to have been loaded with the following - * additional fields (which happens for all paths that create [[ReplicatedTakedown.Event]], in - * both [[TakedownHandler]] and [[UserTakedownHandler]]: - * - TweetypieOnlyTakedownReasonsField - * - TweetypieOnlyTakedownCountryCodesField - * This is done to ensure the remote datacenter of a takedown does not incorrectly try to load - * from MH as the data is already cached. - */ - def copyTakedownFieldsForUpdate(source: Tweet): CachedTweet => CachedTweet = - ct => { - val newCoreData = source.coreData.get - val updatedCoreData = ct.tweet.coreData.map(_.copy(hasTakedown = newCoreData.hasTakedown)) - ct.copy( - tweet = ct.tweet.copy( - coreData = updatedCoreData, - tweetypieOnlyTakedownCountryCodes = source.tweetypieOnlyTakedownCountryCodes, - tweetypieOnlyTakedownReasons = source.tweetypieOnlyTakedownReasons - ) - ) - } - - def copyNsfwFieldsForUpdate(source: Tweet): Tweet => Tweet = - tweet => { - val newCoreData = source.coreData.get - val updatedCoreData = - tweet.coreData.map { core => - core.copy(nsfwUser = newCoreData.nsfwUser, nsfwAdmin = newCoreData.nsfwAdmin) - } - tweet.copy(coreData = updatedCoreData) - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UndeleteTweet.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UndeleteTweet.docx new file mode 100644 index 000000000..90c6b90c7 Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UndeleteTweet.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UndeleteTweet.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UndeleteTweet.scala deleted file mode 100644 index 72edb8cc1..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UndeleteTweet.scala +++ /dev/null @@ -1,237 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.tweetypie.store.TweetEventDataScrubber.scrub -import com.twitter.tweetypie.thriftscala._ - -object UndeleteTweet extends TweetStore.SyncModule { - - /** - * A TweetStoreEvent for Undeletion. - */ - case class Event( - tweet: Tweet, - user: User, - timestamp: Time, - hydrateOptions: WritePathHydrationOptions, - _internalTweet: Option[CachedTweet] = None, - deletedAt: Option[Time], - sourceTweet: Option[Tweet] = None, - sourceUser: Option[User] = None, - quotedTweet: Option[Tweet] = None, - quotedUser: Option[User] = None, - parentUserId: Option[UserId] = None, - quoterHasAlreadyQuotedTweet: Boolean = false) - extends SyncTweetStoreEvent("undelete_tweet") - with QuotedTweetOps { - def internalTweet: CachedTweet = - _internalTweet.getOrElse( - throw new IllegalStateException( - s"internalTweet should have been set in WritePathHydration, ${this}" - ) - ) - - def toAsyncUndeleteTweetRequest: AsyncUndeleteTweetRequest = - AsyncUndeleteTweetRequest( - tweet = tweet, - cachedTweet = internalTweet, - user = user, - timestamp = timestamp.inMillis, - deletedAt = deletedAt.map(_.inMillis), - sourceTweet = sourceTweet, - sourceUser = sourceUser, - quotedTweet = quotedTweet, - quotedUser = quotedUser, - parentUserId = parentUserId, - quoterHasAlreadyQuotedTweet = Some(quoterHasAlreadyQuotedTweet) - ) - } - - trait Store { - val undeleteTweet: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val undeleteTweet: FutureEffect[Event] = wrap(underlying.undeleteTweet) - } - - object Store { - def apply( - logLensStore: LogLensStore, - cachingTweetStore: CachingTweetStore, - tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore, - asyncEnqueueStore: AsyncEnqueueStore - ): Store = - new Store { - override val undeleteTweet: FutureEffect[Event] = - FutureEffect.inParallel( - logLensStore.undeleteTweet, - // ignore failures writing to cache, will be retried in async-path - cachingTweetStore.ignoreFailures.undeleteTweet, - tweetCountsUpdatingStore.undeleteTweet, - asyncEnqueueStore.undeleteTweet - ) - } - } -} - -object AsyncUndeleteTweet extends TweetStore.AsyncModule { - - object Event { - def fromAsyncRequest(request: AsyncUndeleteTweetRequest): TweetStoreEventOrRetry[Event] = - TweetStoreEventOrRetry( - AsyncUndeleteTweet.Event( - tweet = request.tweet, - cachedTweet = request.cachedTweet, - user = request.user, - optUser = Some(request.user), - timestamp = Time.fromMilliseconds(request.timestamp), - deletedAt = request.deletedAt.map(Time.fromMilliseconds), - sourceTweet = request.sourceTweet, - sourceUser = request.sourceUser, - quotedTweet = request.quotedTweet, - quotedUser = request.quotedUser, - parentUserId = request.parentUserId, - quoterHasAlreadyQuotedTweet = request.quoterHasAlreadyQuotedTweet.getOrElse(false) - ), - request.retryAction, - RetryEvent - ) - } - - case class Event( - tweet: Tweet, - cachedTweet: CachedTweet, - user: User, - optUser: Option[User], - timestamp: Time, - deletedAt: Option[Time], - sourceTweet: Option[Tweet], - sourceUser: Option[User], - quotedTweet: Option[Tweet], - quotedUser: Option[User], - parentUserId: Option[UserId] = None, - quoterHasAlreadyQuotedTweet: Boolean = false) - extends AsyncTweetStoreEvent("async_undelete_tweet") - with QuotedTweetOps - with TweetStoreTweetEvent { - - /** - * Convert this event into an AsyncUndeleteTweetRequest thrift request object - */ - def toAsyncRequest(retryAction: Option[AsyncWriteAction] = None): AsyncUndeleteTweetRequest = - AsyncUndeleteTweetRequest( - tweet = tweet, - cachedTweet = cachedTweet, - user = user, - timestamp = timestamp.inMillis, - retryAction = retryAction, - deletedAt = deletedAt.map(_.inMillis), - sourceTweet = sourceTweet, - sourceUser = sourceUser, - quotedTweet = quotedTweet, - quotedUser = quotedUser, - parentUserId = parentUserId, - quoterHasAlreadyQuotedTweet = Some(quoterHasAlreadyQuotedTweet) - ) - - override def toTweetEventData: Seq[TweetEventData] = - Seq( - TweetEventData.TweetUndeleteEvent( - TweetUndeleteEvent( - tweet = scrub(tweet), - user = Some(user), - sourceTweet = sourceTweet.map(scrub), - sourceUser = sourceUser, - retweetParentUserId = parentUserId, - quotedTweet = publicQuotedTweet.map(scrub), - quotedUser = publicQuotedUser, - deletedAtMsec = deletedAt.map(_.inMilliseconds) - ) - ) - ) - - override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = - service.asyncUndeleteTweet(toAsyncRequest(Some(action))) - } - - case class RetryEvent(action: AsyncWriteAction, event: Event) - extends TweetStoreRetryEvent[Event] { - - override val eventType: AsyncWriteEventType.Undelete.type = AsyncWriteEventType.Undelete - override val scribedTweetOnFailure: Option[Tweet] = Some(event.tweet) - } - - trait Store { - val asyncUndeleteTweet: FutureEffect[Event] - val retryAsyncUndeleteTweet: FutureEffect[TweetStoreRetryEvent[Event]] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val asyncUndeleteTweet: FutureEffect[Event] = wrap(underlying.asyncUndeleteTweet) - override val retryAsyncUndeleteTweet: FutureEffect[TweetStoreRetryEvent[Event]] = wrap( - underlying.retryAsyncUndeleteTweet) - } - - object Store { - def apply( - cachingTweetStore: CachingTweetStore, - eventBusEnqueueStore: TweetEventBusStore, - indexingStore: TweetIndexingStore, - replicatingStore: ReplicatingTweetStore, - mediaServiceStore: MediaServiceStore, - timelineUpdatingStore: TlsTimelineUpdatingStore - ): Store = { - val stores: Seq[Store] = - Seq( - cachingTweetStore, - eventBusEnqueueStore, - indexingStore, - replicatingStore, - mediaServiceStore, - timelineUpdatingStore - ) - - def build[E <: TweetStoreEvent](extract: Store => FutureEffect[E]): FutureEffect[E] = - FutureEffect.inParallel[E](stores.map(extract): _*) - - new Store { - override val asyncUndeleteTweet: FutureEffect[Event] = build(_.asyncUndeleteTweet) - override val retryAsyncUndeleteTweet: FutureEffect[TweetStoreRetryEvent[Event]] = build( - _.retryAsyncUndeleteTweet) - } - } - } -} - -object ReplicatedUndeleteTweet extends TweetStore.ReplicatedModule { - - case class Event( - tweet: Tweet, - cachedTweet: CachedTweet, - quoterHasAlreadyQuotedTweet: Boolean = false) - extends ReplicatedTweetStoreEvent("replicated_undelete_tweet") - - trait Store { - val replicatedUndeleteTweet: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val replicatedUndeleteTweet: FutureEffect[Event] = wrap( - underlying.replicatedUndeleteTweet) - } - - object Store { - def apply( - cachingTweetStore: CachingTweetStore, - tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore - ): Store = - new Store { - override val replicatedUndeleteTweet: FutureEffect[Event] = - FutureEffect.inParallel( - cachingTweetStore.replicatedUndeleteTweet.ignoreFailures, - tweetCountsUpdatingStore.replicatedUndeleteTweet.ignoreFailures - ) - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UpdatePossiblySensitiveTweet.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UpdatePossiblySensitiveTweet.docx new file mode 100644 index 000000000..8bb56617b Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UpdatePossiblySensitiveTweet.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UpdatePossiblySensitiveTweet.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UpdatePossiblySensitiveTweet.scala deleted file mode 100644 index c8d1d0b30..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UpdatePossiblySensitiveTweet.scala +++ /dev/null @@ -1,206 +0,0 @@ -package com.twitter.tweetypie -package store - -import com.twitter.tweetypie.thriftscala._ - -object UpdatePossiblySensitiveTweet extends TweetStore.SyncModule { - - case class Event( - tweet: Tweet, - user: User, - timestamp: Time, - byUserId: UserId, - nsfwAdminChange: Option[Boolean], - nsfwUserChange: Option[Boolean], - note: Option[String], - host: Option[String]) - extends SyncTweetStoreEvent("update_possibly_sensitive_tweet") { - def toAsyncRequest: AsyncUpdatePossiblySensitiveTweetRequest = - AsyncUpdatePossiblySensitiveTweetRequest( - tweet = tweet, - user = user, - byUserId = byUserId, - timestamp = timestamp.inMillis, - nsfwAdminChange = nsfwAdminChange, - nsfwUserChange = nsfwUserChange, - note = note, - host = host - ) - } - - trait Store { - val updatePossiblySensitiveTweet: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val updatePossiblySensitiveTweet: FutureEffect[Event] = wrap( - underlying.updatePossiblySensitiveTweet - ) - } - - object Store { - def apply( - manhattanStore: ManhattanTweetStore, - cachingTweetStore: CachingTweetStore, - logLensStore: LogLensStore, - asyncEnqueueStore: AsyncEnqueueStore - ): Store = - new Store { - override val updatePossiblySensitiveTweet: FutureEffect[Event] = - FutureEffect.inParallel( - manhattanStore.ignoreFailures.updatePossiblySensitiveTweet, - cachingTweetStore.ignoreFailures.updatePossiblySensitiveTweet, - logLensStore.updatePossiblySensitiveTweet, - asyncEnqueueStore.updatePossiblySensitiveTweet - ) - } - } -} - -object AsyncUpdatePossiblySensitiveTweet extends TweetStore.AsyncModule { - - object Event { - def fromAsyncRequest( - request: AsyncUpdatePossiblySensitiveTweetRequest - ): TweetStoreEventOrRetry[Event] = - TweetStoreEventOrRetry( - AsyncUpdatePossiblySensitiveTweet.Event( - tweet = request.tweet, - user = request.user, - optUser = Some(request.user), - timestamp = Time.fromMilliseconds(request.timestamp), - byUserId = request.byUserId, - nsfwAdminChange = request.nsfwAdminChange, - nsfwUserChange = request.nsfwUserChange, - note = request.note, - host = request.host - ), - request.action, - RetryEvent - ) - } - - case class Event( - tweet: Tweet, - user: User, - optUser: Option[User], - timestamp: Time, - byUserId: UserId, - nsfwAdminChange: Option[Boolean], - nsfwUserChange: Option[Boolean], - note: Option[String], - host: Option[String]) - extends AsyncTweetStoreEvent("async_update_possibly_sensitive_tweet") - with TweetStoreTweetEvent { - - def toAsyncRequest( - action: Option[AsyncWriteAction] = None - ): AsyncUpdatePossiblySensitiveTweetRequest = - AsyncUpdatePossiblySensitiveTweetRequest( - tweet = tweet, - user = user, - byUserId = byUserId, - timestamp = timestamp.inMillis, - nsfwAdminChange = nsfwAdminChange, - nsfwUserChange = nsfwUserChange, - note = note, - host = host, - action = action - ) - - override def toTweetEventData: Seq[TweetEventData] = - Seq( - TweetEventData.TweetPossiblySensitiveUpdateEvent( - TweetPossiblySensitiveUpdateEvent( - tweetId = tweet.id, - userId = user.id, - nsfwAdmin = TweetLenses.nsfwAdmin.get(tweet), - nsfwUser = TweetLenses.nsfwUser.get(tweet) - ) - ) - ) - - override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = - service.asyncUpdatePossiblySensitiveTweet(toAsyncRequest(Some(action))) - } - - case class RetryEvent(action: AsyncWriteAction, event: Event) - extends TweetStoreRetryEvent[Event] { - - override val eventType: AsyncWriteEventType.UpdatePossiblySensitiveTweet.type = - AsyncWriteEventType.UpdatePossiblySensitiveTweet - override val scribedTweetOnFailure: Option[Tweet] = Some(event.tweet) - } - - trait Store { - val asyncUpdatePossiblySensitiveTweet: FutureEffect[Event] - val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[TweetStoreRetryEvent[Event]] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val asyncUpdatePossiblySensitiveTweet: FutureEffect[Event] = wrap( - underlying.asyncUpdatePossiblySensitiveTweet - ) - override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[TweetStoreRetryEvent[Event]] = - wrap( - underlying.retryAsyncUpdatePossiblySensitiveTweet - ) - } - - object Store { - def apply( - manhattanStore: ManhattanTweetStore, - cachingTweetStore: CachingTweetStore, - replicatingStore: ReplicatingTweetStore, - guanoStore: GuanoServiceStore, - eventBusStore: TweetEventBusStore - ): Store = { - val stores: Seq[Store] = - Seq( - manhattanStore, - cachingTweetStore, - replicatingStore, - guanoStore, - eventBusStore - ) - - def build[E <: TweetStoreEvent](extract: Store => FutureEffect[E]): FutureEffect[E] = - FutureEffect.inParallel[E](stores.map(extract): _*) - - new Store { - override val asyncUpdatePossiblySensitiveTweet: FutureEffect[Event] = build( - _.asyncUpdatePossiblySensitiveTweet) - override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ - TweetStoreRetryEvent[Event] - ] = build( - _.retryAsyncUpdatePossiblySensitiveTweet - ) - } - } - } -} - -object ReplicatedUpdatePossiblySensitiveTweet extends TweetStore.ReplicatedModule { - - case class Event(tweet: Tweet) - extends ReplicatedTweetStoreEvent("replicated_update_possibly_sensitive_tweet") - - trait Store { - val replicatedUpdatePossiblySensitiveTweet: FutureEffect[Event] - } - - trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => - override val replicatedUpdatePossiblySensitiveTweet: FutureEffect[Event] = wrap( - underlying.replicatedUpdatePossiblySensitiveTweet - ) - } - - object Store { - def apply(cachingTweetStore: CachingTweetStore): Store = { - new Store { - override val replicatedUpdatePossiblySensitiveTweet: FutureEffect[Event] = - cachingTweetStore.replicatedUpdatePossiblySensitiveTweet - } - } - } -} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/package.docx b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/package.docx new file mode 100644 index 000000000..1188dae7a Binary files /dev/null and b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/package.docx differ diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/package.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/package.scala deleted file mode 100644 index aa399d9bf..000000000 --- a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/package.scala +++ /dev/null @@ -1,16 +0,0 @@ -package com.twitter.tweetypie - -import com.fasterxml.jackson.core.JsonGenerator -import com.twitter.tweetypie.thriftscala.CachedTweet -import com.twitter.context.TwitterContext - -package object store { - type JsonGen = JsonGenerator => Unit - - // Bring Tweetypie permitted TwitterContext into scope - val TwitterContext: TwitterContext = - com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) - - def cachedTweetFromUnhydratedTweet(tweet: Tweet): CachedTweet = - CachedTweet(tweet = tweet) -} diff --git a/tweetypie/server/src/main/thrift/BUILD b/tweetypie/server/src/main/thrift/BUILD deleted file mode 100644 index f90f1b823..000000000 --- a/tweetypie/server/src/main/thrift/BUILD +++ /dev/null @@ -1,29 +0,0 @@ -create_thrift_libraries( - base_name = "compiled", - sources = ["**/*.thrift"], - platform = "java8", - strict_deps = True, - tags = ["bazel-compatible"], - dependency_roots = [ - "mediaservices/commons/src/main/thrift", - "tweetypie/servo/repo/src/main/thrift", - "src/thrift/com/twitter/context:feature-context", - "src/thrift/com/twitter/escherbird:media-annotation-structs", - "src/thrift/com/twitter/expandodo:capi", - "src/thrift/com/twitter/expandodo:only", - "src/thrift/com/twitter/geoduck", - "src/thrift/com/twitter/gizmoduck:thrift", - "src/thrift/com/twitter/gizmoduck:user-thrift", - "src/thrift/com/twitter/servo:servo-exception", - "tweetypie/common/src/thrift/com/twitter/tweetypie:audit", - "tweetypie/common/src/thrift/com/twitter/tweetypie:delete_location_data", - "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity", - "tweetypie/common/src/thrift/com/twitter/tweetypie:service", - "tweetypie/common/src/thrift/com/twitter/tweetypie:stored-tweet-info", - "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet", - ], - generate_languages = [ - "java", - "scala", - ], -) diff --git a/tweetypie/server/src/main/thrift/BUILD.docx b/tweetypie/server/src/main/thrift/BUILD.docx new file mode 100644 index 000000000..06e91fa67 Binary files /dev/null and b/tweetypie/server/src/main/thrift/BUILD.docx differ diff --git a/tweetypie/server/src/main/thrift/tweetypie_internal.docx b/tweetypie/server/src/main/thrift/tweetypie_internal.docx new file mode 100644 index 000000000..ee02b60fe Binary files /dev/null and b/tweetypie/server/src/main/thrift/tweetypie_internal.docx differ diff --git a/tweetypie/server/src/main/thrift/tweetypie_internal.thrift b/tweetypie/server/src/main/thrift/tweetypie_internal.thrift deleted file mode 100644 index 3cc16381e..000000000 --- a/tweetypie/server/src/main/thrift/tweetypie_internal.thrift +++ /dev/null @@ -1,705 +0,0 @@ -namespace java com.twitter.tweetypie.thriftjava -#@namespace scala com.twitter.tweetypie.thriftscala - -include "com/twitter/context/feature_context.thrift" -include "com/twitter/expandodo/cards.thrift" -include "com/twitter/gizmoduck/user.thrift" -include "com/twitter/mediaservices/commons/MediaCommon.thrift" -include "com/twitter/mediaservices/commons/MediaInformation.thrift" -include "com/twitter/mediaservices/commons/TweetMedia.thrift" -include "com/twitter/servo/exceptions.thrift" -include "com/twitter/servo/cache/servo_repo.thrift" -include "com/twitter/tseng/withholding/withholding.thrift" -include "com/twitter/tweetypie/delete_location_data.thrift" -include "com/twitter/tweetypie/transient_context.thrift" -include "com/twitter/tweetypie/media_entity.thrift" -include "com/twitter/tweetypie/tweet.thrift" -include "com/twitter/tweetypie/tweet_audit.thrift" -include "com/twitter/tweetypie/stored_tweet_info.thrift" -include "com/twitter/tweetypie/tweet_service.thrift" - -typedef i16 FieldId - -struct UserIdentity { - 1: required i64 id - 2: required string screen_name - 3: required string real_name -# obsolete 4: bool deactivated = 0 -# obsolete 5: bool suspended = 0 -} - -enum HydrationType { - MENTIONS = 1, - URLS = 2, - CACHEABLE_MEDIA = 3, - QUOTED_TWEET_REF = 4, - REPLY_SCREEN_NAME = 5, - DIRECTED_AT = 6, - CONTRIBUTOR = 7, - SELF_THREAD_INFO = 8 -} - -struct CachedTweet { - 1: required tweet.Tweet tweet - // @obsolete 2: optional set included_additional_fields - 3: set completed_hydrations = [] - - // Indicates that a tweet was deleted after being bounced for violating - // the Twitter Rules. - // When set to true, all other fields in CachedTweet are ignored. - 4: optional bool is_bounce_deleted - - // Indicates whether this tweet has safety labels stored in Strato. - // See com.twitter.tweetypie.core.TweetData.hasSafetyLabels for more details. - // @obsolete 5: optional bool has_safety_labels -} (persisted='true', hasPersonalData='true') - -struct MediaFaces { - 1: required map> faces -} - -enum AsyncWriteEventType { - INSERT = 1, - DELETE = 2, - UNDELETE = 3, - SET_ADDITIONAL_FIELDS = 4, - DELETE_ADDITIONAL_FIELDS = 5, - UPDATE_POSSIBLY_SENSITIVE_TWEET = 6, - UPDATE_TWEET_MEDIA = 7, - TAKEDOWN = 8, - SET_RETWEET_VISIBILITY = 9 -} - -// an enum of actions that could happen in an async-write (insert or delete) -enum AsyncWriteAction { - HOSEBIRD_ENQUEUE = 1 - SEARCH_ENQUEUE = 2 - // obsolete MAIL_ENQUEUE = 3 - FANOUT_DELIVERY = 4 - // obsolete FACEBOOK_ENQUEUE = 5 - TWEET_INDEX = 6 - TIMELINE_UPDATE = 7 - CACHE_UPDATE = 8 - REPLICATION = 9 - // obsolete MONORAIL_EXPIRY_ENQUEUE = 10 - USER_GEOTAG_UPDATE = 11 - // obsolete IBIS_ENQUEUE = 12 - EVENT_BUS_ENQUEUE = 13 - // obsolete HOSEBIRD_BINARY_ENQUEUE = 14 - TBIRD_UPDATE = 15 - RETWEETS_DELETION = 16 - GUANO_SCRIBE = 17 - MEDIA_DELETION = 18 - GEO_SEARCH_REQUEST_ID = 19 - SEARCH_THRIFT_ENQUEUE = 20 - RETWEET_ARCHIVAL_ENQUEUE = 21 -} - -# This struct is scribed to test_tweetypie_failed_async_write after -# an async-write action has failed multiple retries -struct FailedAsyncWrite { - 1: required AsyncWriteEventType event_type - 2: required AsyncWriteAction action - 3: optional tweet.Tweet tweet -} (persisted='true', hasPersonalData='true') - -# This struct is scribed to test_tweetypie_detached_retweets after -# attempting to read a retweet for which the source tweet has been deleted. -struct DetachedRetweet { - 1: required i64 tweet_id (personalDataType='TweetId') - 2: required i64 user_id (personalDataType='UserId') - 3: required i64 source_tweet_id (personalDataType='TweetId') -} (persisted='true', hasPersonalData='true') - -struct TweetCacheWrite { - 1: required i64 tweet_id (personalDataType = 'TweetId') - // If the tweet id is a snowflake id, this is an offset since tweet creation. - // If it is not a snowflake id, then this is a Unix epoch time in - // milliseconds. (The idea is that for most tweets, this encoding will make - // it easier to see the interval between events and whether it occured soon - // acter tweet creation.) - 2: required i64 timestamp (personalDataType = 'TransactionTimestamp') - 3: required string action // One of "set", "add", "replace", "cas", "delete" - 4: required servo_repo.CachedValue cached_value // Contains metadata about the cached value - 5: optional CachedTweet cached_tweet -} (persisted='true', hasPersonalData='true') - -struct AsyncInsertRequest { - 12: required tweet.Tweet tweet - 18: required user.User user - 21: required i64 timestamp - // the cacheable version of tweet from field 12 - 29: required CachedTweet cached_tweet - # 13: obsolete tweet.Tweet internal_tweet - 19: optional tweet.Tweet source_tweet - 20: optional user.User source_user - // Used for quote tweet feature - 22: optional tweet.Tweet quoted_tweet - 23: optional user.User quoted_user - 28: optional i64 parent_user_id - // Used for delivering the requestId of a geotagged tweet - 24: optional string geo_search_request_id - # 7: obsolete - # if not specified, all async insert actions are performed. if specified, only - # the specified action is performed; this is used for retrying specific actions - # that failed on a previous attempt. - 10: optional AsyncWriteAction retry_action - # 11: obsolete: bool from_monorail = 0 - # 14: obsolete - 15: optional feature_context.FeatureContext feature_context - # 16: obsolete - # 17: obsolete - # 26: obsolete: optional tweet.Tweet debug_tweet_copy - 27: optional map additional_context - 30: optional transient_context.TransientCreateContext transient_context - // Used to check whether the same tweet has been quoted multiple - // times by a given user. - 31: optional bool quoter_has_already_quoted_tweet - 32: optional InitialTweetUpdateRequest initialTweetUpdateRequest - // User ids of users mentioned in note tweet. Used for tls events - 33: optional list note_tweet_mentioned_user_ids -} - -struct AsyncUpdatePossiblySensitiveTweetRequest { - 1: required tweet.Tweet tweet - 2: required user.User user - 3: required i64 by_user_id - 4: required i64 timestamp - 5: optional bool nsfw_admin_change - 6: optional bool nsfw_user_change - 7: optional string note - 8: optional string host - 9: optional AsyncWriteAction action -} - -struct AsyncUpdateTweetMediaRequest { - 1: required i64 tweet_id - 2: required list orphaned_media - 3: optional AsyncWriteAction retry_action - 4: optional list media_keys -} - -struct AsyncSetAdditionalFieldsRequest { - 1: required tweet.Tweet additional_fields - 3: required i64 timestamp - 4: required i64 user_id - 2: optional AsyncWriteAction retry_action -} - -struct AsyncSetRetweetVisibilityRequest { - 1: required i64 retweet_id - // Whether to archive or unarchive(visible=true) the retweet_id edge in the RetweetsGraph. - 2: required bool visible - 3: required i64 src_id - 5: required i64 retweet_user_id - 6: required i64 source_tweet_user_id - 7: required i64 timestamp - 4: optional AsyncWriteAction retry_action -} - -struct SetRetweetVisibilityRequest { - 1: required i64 retweet_id - // Whether to archive or unarchive(visible=true) the retweet_id edge in the RetweetsGraph. - 2: required bool visible -} - -struct AsyncEraseUserTweetsRequest { - 1: required i64 user_id - 3: required i64 flock_cursor - 4: required i64 start_timestamp - 5: required i64 tweet_count -} - -struct AsyncDeleteRequest { - 4: required tweet.Tweet tweet - 11: required i64 timestamp - 2: optional user.User user - 9: optional i64 by_user_id - 12: optional tweet_audit.AuditDeleteTweet audit_passthrough - 13: optional i64 cascaded_from_tweet_id - # if not specified, all async-delete actions are performed. if specified, only - # the specified action is performed; this is used for retrying specific actions - # that failed on a previous attempt. - 3: optional AsyncWriteAction retry_action - 5: bool delete_media = 1 - 6: bool delete_retweets = 1 - 8: bool scribe_for_audit = 1 - 15: bool is_user_erasure = 0 - 17: bool is_bounce_delete = 0 - 18: optional bool is_last_quote_of_quoter - 19: optional bool is_admin_delete -} - -struct AsyncUndeleteTweetRequest { - 1: required tweet.Tweet tweet - 3: required user.User user - 4: required i64 timestamp - // the cacheable version of tweet from field 1 - 12: required CachedTweet cached_tweet - # 2: obsolete tweet.Tweet internal_tweet - 5: optional AsyncWriteAction retry_action - 6: optional i64 deleted_at - 7: optional tweet.Tweet source_tweet - 8: optional user.User source_user - 9: optional tweet.Tweet quoted_tweet - 10: optional user.User quoted_user - 11: optional i64 parent_user_id - 13: optional bool quoter_has_already_quoted_tweet -} - -struct AsyncIncrFavCountRequest { - 1: required i64 tweet_id - 2: required i32 delta -} - -struct AsyncIncrBookmarkCountRequest { - 1: required i64 tweet_id - 2: required i32 delta -} - -struct AsyncDeleteAdditionalFieldsRequest { - 6: required i64 tweet_id - 7: required list field_ids - 4: required i64 timestamp - 5: required i64 user_id - 3: optional AsyncWriteAction retry_action -} - -// Used for both tweet and user takedowns. -// user will be None for user takedowns because user is only used when scribe_for_audit or -// eventbus_enqueue are true, which is never the case for user takedown. -struct AsyncTakedownRequest { - 1: required tweet.Tweet tweet - - // Author of the tweet. Used when scribe_for_audit or eventbus_enqueue are true which is the case - // for tweet takedown but not user takedown. - 2: optional user.User user - - // This field is the resulting list of takedown country codes on the tweet after the - // countries_to_add and countries_to_remove changes have been applied. - 13: list takedown_reasons = [] - - // This field is the list of takedown reaons to add to the tweet. - 14: list reasons_to_add = [] - - // This field is the list of takedown reasons to remove from the tweet. - 15: list reasons_to_remove = [] - - // This field determines whether or not Tweetypie should write takedown audits - // for this request to Guano. - 6: required bool scribe_for_audit - - // This field determines whether or not Tweetypie should enqueue a - // TweetTakedownEvent to EventBus and Hosebird for this request. - 7: required bool eventbus_enqueue - - // This field is sent as part of the takedown audit that's written to Guano, - // and is not persisted with the takedown itself. - 8: optional string audit_note - - // This field is the ID of the user who initiated the takedown. It is used - // when auditing the takedown in Guano. If unset, it will be logged as -1. - 9: optional i64 by_user_id - - // This field is the host where the request originated or the remote IP that - // is associated with the request. It is used when auditing the takedown in - // Guano. If unset, it will be logged as "". - 10: optional string host - - 11: optional AsyncWriteAction retry_action - 12: required i64 timestamp -} - -struct SetTweetUserTakedownRequest { - 1: required i64 tweet_id - 2: required bool has_takedown - 3: optional i64 user_id -} - -enum DataErrorCause { - UNKNOWN = 0 - // Returned on set_tweet_user_takedown when - // the SetTweetUserTakedownRequest.user_id does not match the author - // of the tweet identified by SetTweetUserTakedownRequest.tweet_id. - USER_TWEET_RELATIONSHIP = 1 -} - -/** - * DataError is returned for operations that perform data changes, - * but encountered an inconsistency, and the operation cannot - * be meaninfully performed. - */ -exception DataError { - 1: required string message - 2: optional DataErrorCause errorCause -} - -struct ReplicatedDeleteAdditionalFieldsRequest { - /** is a map for backwards compatibility, but will only contain a single tweet id */ - 1: required map> fields_map -} - -struct CascadedDeleteTweetRequest { - 1: required i64 tweet_id - 2: required i64 cascaded_from_tweet_id - 3: optional tweet_audit.AuditDeleteTweet audit_passthrough -} - -struct QuotedTweetDeleteRequest { - 1: i64 quoting_tweet_id - 2: i64 quoted_tweet_id - 3: i64 quoted_user_id -} - -struct QuotedTweetTakedownRequest { - 1: i64 quoting_tweet_id - 2: i64 quoted_tweet_id - 3: i64 quoted_user_id - 4: list takedown_country_codes = [] - 5: list takedown_reasons = [] -} - -struct ReplicatedInsertTweet2Request { - 1: required CachedTweet cached_tweet - // Used to check whether the same tweet has been quoted by a user. - 2: optional bool quoter_has_already_quoted_tweet - 3: optional InitialTweetUpdateRequest initialTweetUpdateRequest -} - -struct ReplicatedDeleteTweet2Request { - 1: required tweet.Tweet tweet - 2: required bool is_erasure - 3: required bool is_bounce_delete - 4: optional bool is_last_quote_of_quoter -} - -struct ReplicatedSetRetweetVisibilityRequest { - 1: required i64 src_id - // Whether to archive or unarchive(visible=true) the retweet_id edge in the RetweetsGraph. - 2: required bool visible -} - -struct ReplicatedUndeleteTweet2Request { - 1: required CachedTweet cached_tweet - 2: optional bool quoter_has_already_quoted_tweet -} - -struct GetStoredTweetsOptions { - 1: bool bypass_visibility_filtering = 0 - 2: optional i64 for_user_id - 3: list additional_field_ids = [] -} - -struct GetStoredTweetsRequest { - 1: required list tweet_ids - 2: optional GetStoredTweetsOptions options -} - -struct GetStoredTweetsResult { - 1: required stored_tweet_info.StoredTweetInfo stored_tweet -} - -struct GetStoredTweetsByUserOptions { - 1: bool bypass_visibility_filtering = 0 - 2: bool set_for_user_id = 0 - 3: optional i64 start_time_msec - 4: optional i64 end_time_msec - 5: optional i64 cursor - 6: bool start_from_oldest = 0 - 7: list additional_field_ids = [] -} - -struct GetStoredTweetsByUserRequest { - 1: required i64 user_id - 2: optional GetStoredTweetsByUserOptions options -} - -struct GetStoredTweetsByUserResult { - 1: required list stored_tweets - 2: optional i64 cursor -} - -/* This is a request to update an initial tweet based on the creation of a edit tweet - * initialTweetId: The tweet to be updated - * editTweetId: The tweet being created, which is an edit of initialTweetId - * selfPermalink: A self permalink for initialTweetId - */ -struct InitialTweetUpdateRequest { - 1: required i64 initialTweetId - 2: required i64 editTweetId - 3: optional tweet.ShortenedUrl selfPermalink -} - -service TweetServiceInternal extends tweet_service.TweetService { - - /** - * Performs the async portion of TweetService.erase_user_tweets. - * Only tweetypie itself can call this. - */ - void async_erase_user_tweets(1: AsyncEraseUserTweetsRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Performs the async portion of TweetService.post_tweet. - * Only tweetypie itself can call this. - */ - void async_insert(1: AsyncInsertRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Performs the async portion of TweetService.delete_tweets. - * Only tweetypie itself can call this. - */ - void async_delete(1: AsyncDeleteRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Performs the async portion of TweetService.undelete_tweet. - * Only tweetypie itself can call this. - */ - void async_undelete_tweet(1: AsyncUndeleteTweetRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Performs the async portion of TweetService.update_possibly_sensitive_tweet. - * Only tweetypie itself can call this. - */ - void async_update_possibly_sensitive_tweet(1: AsyncUpdatePossiblySensitiveTweetRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Performs the async portion of TweetService.incr_tweet_fav_count. - * Only tweetypie itself can call this. - */ - void async_incr_fav_count(1: AsyncIncrFavCountRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Performs the async portion of TweetService.incr_tweet_bookmark_count. - * Only tweetypie itself can call this. - */ - void async_incr_bookmark_count(1: AsyncIncrBookmarkCountRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Performs the async portion of TweetService.set_additional_fields. - * Only tweetypie itself can call this. - */ - void async_set_additional_fields(1: AsyncSetAdditionalFieldsRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Performs the async portion of TweetServiceInternal.set_retweet_visibility. - * Only tweetypie itself can call this. - */ - void async_set_retweet_visibility(1: AsyncSetRetweetVisibilityRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Set whether the specified retweet ID should be included in its source tweet's retweet count. - * This endpoint is invoked from a tweetypie-daemon to adjust retweet counts for all tweets a - * suspended or fraudulent (e.g. ROPO-'d) user has retweeted to disincentivize their false engagement. - */ - void set_retweet_visibility(1: SetRetweetVisibilityRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Performs the async portion of TweetService.delete_additional_fields. - * Only tweetypie itself can call this. - */ - void async_delete_additional_fields(1: AsyncDeleteAdditionalFieldsRequest field_delete) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Performs the async portion of TweetService.takedown. - * Only tweetypie itself can call this. - */ - void async_takedown(1: AsyncTakedownRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Update the tweet's takedown fields when a user is taken down. - * Only tweetypie's UserTakedownChange daemon can call this. - */ - void set_tweet_user_takedown(1: SetTweetUserTakedownRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error, - 3: DataError data_error) - - /** - * Cascade delete tweet is the logic for removing tweets that are detached - * from their dependency which has been deleted. They are already filtered - * out from serving, so this operation reconciles storage with the view - * presented by Tweetypie. - * This RPC call is delegated from daemons or batch jobs. Currently there - * are two use-cases when this call is issued: - * * Deleting detached retweets after the source tweet was deleted. - * This is done through RetweetsDeletion daemon and the - * CleanupDetachedRetweets job. - * * Deleting edits of an initial tweet that has been deleted. - * This is done by CascadedEditedTweetDelete daemon. - * Note that, when serving the original delete request for an edit, - * the initial tweet is only deleted, which makes all edits hidden. - */ - void cascaded_delete_tweet(1: CascadedDeleteTweetRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Update the timestamp of the user's most recent request to delete - * location data on their tweets. This does not actually remove the - * geo information from the user's tweets, but it will prevent the geo - * information for this user's tweets from being returned by - * Tweetypie. - */ - void scrub_geo_update_user_timestamp(1: delete_location_data.DeleteLocationData request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Look up tweets quoting a tweet that has been deleted and enqueue a compliance event. - * Only tweetypie's QuotedTweetDelete daemon can call this. - **/ - void quoted_tweet_delete(1: QuotedTweetDeleteRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Look up tweets quoting a tweet that has been taken down and enqueue a compliance event. - * Only tweetypie's QuotedTweetTakedown daemon can call this. - **/ - void quoted_tweet_takedown(1: QuotedTweetTakedownRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Replicates TweetService.get_tweet_counts from another cluster. - */ - void replicated_get_tweet_counts(1: tweet_service.GetTweetCountsRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Replicates TweetService.get_tweet_fields from another cluster. - */ - void replicated_get_tweet_fields(1: tweet_service.GetTweetFieldsRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Replicates TweetService.get_tweets from another cluster. - */ - void replicated_get_tweets(1: tweet_service.GetTweetsRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Replicates a TweetService.post_tweet InsertTweet event from another cluster. - * Note: v1 version of this endpoint previously just took a Tweet which is why it was replaced - */ - void replicated_insert_tweet2(1: ReplicatedInsertTweet2Request request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Replicates a TweetService.delete_tweets DeleteTweet event from another cluster. - */ - void replicated_delete_tweet2(1: ReplicatedDeleteTweet2Request request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Replicates a TweetService.incr_tweet_fav_count event from another cluster. - */ - void replicated_incr_fav_count(1: i64 tweet_id, 2: i32 delta) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Replicates a TweetService.incr_tweet_bookmark_count event from another cluster. - */ - void replicated_incr_bookmark_count(1: i64 tweet_id, 2: i32 delta) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Replicates a TweetServiceInternal.set_retweet_visibility event from another cluster. - */ - void replicated_set_retweet_visibility(1: ReplicatedSetRetweetVisibilityRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Replicates a TweetService.scrub_geo from another cluster. - */ - void replicated_scrub_geo(1: list tweet_ids) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Replicates a TweetService.set_additional_fields event from another cluster. - */ - void replicated_set_additional_fields( - 1: tweet_service.SetAdditionalFieldsRequest request - ) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Replicates a TweetService.delete_additional_fields event from another cluster. - */ - void replicated_delete_additional_fields( - 1: ReplicatedDeleteAdditionalFieldsRequest request - ) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Replicates a TweetService.undelete_tweet event from another cluster. - * Note: v1 version of this endpoint previously just took a Tweet which is why it was replaced - */ - void replicated_undelete_tweet2(1: ReplicatedUndeleteTweet2Request request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Replicates a TweetService.takedown event from another cluster. - */ - void replicated_takedown(1: tweet.Tweet tweet) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Replicates a TweetService.update_possibly_sensitive_tweet event from another cluster. - */ - void replicated_update_possibly_sensitive_tweet(1: tweet.Tweet tweet) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Fetches hydrated Tweets and some metadata irrespective of the Tweets' state. - */ - list get_stored_tweets(1: GetStoredTweetsRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) - - /** - * Fetches hydrated Tweets and some metadata for a particular user, irrespective of the Tweets' - * state. - */ - GetStoredTweetsByUserResult get_stored_tweets_by_user(1: GetStoredTweetsByUserRequest request) throws ( - 1: exceptions.ClientError client_error, - 2: exceptions.ServerError server_error) -} diff --git a/tweetypie/servo/README.docx b/tweetypie/servo/README.docx new file mode 100644 index 000000000..65cd54e69 Binary files /dev/null and b/tweetypie/servo/README.docx differ diff --git a/tweetypie/servo/README.md b/tweetypie/servo/README.md deleted file mode 100644 index ff9d1e89d..000000000 --- a/tweetypie/servo/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Servo - -Servo is a collection of classes and patterns for building services in Scala. It's a grab-bag of code that was deemed useful for service development. diff --git a/tweetypie/servo/decider/BUILD b/tweetypie/servo/decider/BUILD deleted file mode 100644 index 2da29494b..000000000 --- a/tweetypie/servo/decider/BUILD +++ /dev/null @@ -1,5 +0,0 @@ -target( - dependencies = [ - "tweetypie/servo/decider/src/main/scala", - ], -) diff --git a/tweetypie/servo/decider/BUILD.docx b/tweetypie/servo/decider/BUILD.docx new file mode 100644 index 000000000..b5abe9d2e Binary files /dev/null and b/tweetypie/servo/decider/BUILD.docx differ diff --git a/tweetypie/servo/decider/src/main/scala/BUILD b/tweetypie/servo/decider/src/main/scala/BUILD deleted file mode 100644 index 846ac3eb2..000000000 --- a/tweetypie/servo/decider/src/main/scala/BUILD +++ /dev/null @@ -1,18 +0,0 @@ -scala_library( - sources = ["**/*.scala"], - platform = "java8", - provides = scala_artifact( - org = "com.twitter", - name = "servo-decider", - repo = artifactory, - ), - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "decider", - "finagle/finagle-core/src/main", - "tweetypie/servo/util", - "twitter-server-internal", - "twitter-server/server/src/main/scala", - ], -) diff --git a/tweetypie/servo/decider/src/main/scala/BUILD.docx b/tweetypie/servo/decider/src/main/scala/BUILD.docx new file mode 100644 index 000000000..c9d0d1e89 Binary files /dev/null and b/tweetypie/servo/decider/src/main/scala/BUILD.docx differ diff --git a/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderGateBuilder.docx b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderGateBuilder.docx new file mode 100644 index 000000000..508e71ef8 Binary files /dev/null and b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderGateBuilder.docx differ diff --git a/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderGateBuilder.scala b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderGateBuilder.scala deleted file mode 100644 index e147ad2fe..000000000 --- a/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderGateBuilder.scala +++ /dev/null @@ -1,41 +0,0 @@ -package com.twitter.servo.decider - -import com.twitter.decider.{Decider, Feature} -import com.twitter.servo.util.Gate -import com.twitter.servo.gate.DeciderGate - -/** - * Convenience syntax for creating decider gates - */ -class DeciderGateBuilder(decider: Decider) { - - /** - * idGate should be used when the result of the gate needs to be consistent between repeated - * invocations, with the condition that consistency is dependent up on passing identical - * parameter between the invocations. - */ - def idGate(key: DeciderKeyName): Gate[Long] = - DeciderGate.byId(keyToFeature(key)) - - /** - * linearGate should be used when the probability of the gate returning true needs to - * increase linearly with the availability of feature. - */ - def linearGate(key: DeciderKeyName): Gate[Unit] = - DeciderGate.linear(keyToFeature(key)) - - /** - * typedLinearGate is a linearGate that conforms to the gate of the specified type. - */ - def typedLinearGate[T](key: DeciderKeyName): Gate[T] = - linearGate(key).contramap[T] { _ => () } - - /** - * expGate should be used when the probability of the gate returning true needs to - * increase exponentially with the availability of feature. - */ - def expGate(key: DeciderKeyName, exponent: Int): Gate[Unit] = - DeciderGate.exp(keyToFeature(key), exponent) - - def keyToFeature(key: DeciderKeyName): Feature = decider.feature(key.toString) -} diff --git a/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderKeyEnum.docx b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderKeyEnum.docx new file mode 100644 index 000000000..0b05d628c Binary files /dev/null and b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderKeyEnum.docx differ diff --git a/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderKeyEnum.scala b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderKeyEnum.scala deleted file mode 100644 index 8f9e17dce..000000000 --- a/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderKeyEnum.scala +++ /dev/null @@ -1,3 +0,0 @@ -package com.twitter.servo.decider - -trait DeciderKeyEnum extends Enumeration diff --git a/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/package.docx b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/package.docx new file mode 100644 index 000000000..a65fdbc4c Binary files /dev/null and b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/package.docx differ diff --git a/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/package.scala b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/package.scala deleted file mode 100644 index 86aa734cb..000000000 --- a/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/package.scala +++ /dev/null @@ -1,5 +0,0 @@ -package com.twitter.servo - -package object decider { - type DeciderKeyName = DeciderKeyEnum#Value -} diff --git a/tweetypie/servo/decider/src/main/scala/com/twitter/servo/gate/DeciderGate.docx b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/gate/DeciderGate.docx new file mode 100644 index 000000000..079108988 Binary files /dev/null and b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/gate/DeciderGate.docx differ diff --git a/tweetypie/servo/decider/src/main/scala/com/twitter/servo/gate/DeciderGate.scala b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/gate/DeciderGate.scala deleted file mode 100644 index 42874e20d..000000000 --- a/tweetypie/servo/decider/src/main/scala/com/twitter/servo/gate/DeciderGate.scala +++ /dev/null @@ -1,34 +0,0 @@ -package com.twitter.servo.gate - -import com.twitter.decider -import com.twitter.servo.util.Gate -import scala.annotation.tailrec - -object DeciderGate { - - /** - * Create a Gate[Unit] with a probability of returning true - * that increases linearly with the availability of feature. - */ - def linear(feature: decider.Feature): Gate[Unit] = - Gate(_ => feature.isAvailable, "DeciderGate.linear(%s)".format(feature)) - - /** - * Create a Gate[Unit] with a probability of returning true - * that increases exponentially with the availability of feature. - */ - def exp(feature: decider.Feature, exponent: Int): Gate[Unit] = { - val gate = if (exponent >= 0) linear(feature) else !linear(feature) - - @tailrec - def go(exp: Int): Boolean = if (exp == 0) true else (gate() && go(exp - 1)) - - Gate(_ => go(math.abs(exponent)), "DeciderGate.exp(%s, %s)".format(feature, exponent)) - } - - /** - * Create a Gate[Long] that returns true if the given feature is available for an id. - */ - def byId(feature: decider.Feature): Gate[Long] = - Gate(id => feature.isAvailable(id), "DeciderGate.byId(%s)".format(feature)) -} diff --git a/tweetypie/servo/json/BUILD b/tweetypie/servo/json/BUILD deleted file mode 100644 index 9f49967ba..000000000 --- a/tweetypie/servo/json/BUILD +++ /dev/null @@ -1,5 +0,0 @@ -target( - dependencies = [ - "tweetypie/servo/json/src/main/scala/com/twitter/servo/json", - ], -) diff --git a/tweetypie/servo/json/BUILD.docx b/tweetypie/servo/json/BUILD.docx new file mode 100644 index 000000000..caf32c77b Binary files /dev/null and b/tweetypie/servo/json/BUILD.docx differ diff --git a/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/BUILD b/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/BUILD deleted file mode 100644 index c641f0626..000000000 --- a/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/BUILD +++ /dev/null @@ -1,21 +0,0 @@ -scala_library( - sources = ["*.scala"], - platform = "java8", - provides = scala_artifact( - org = "com.twitter", - name = "servo-json", - repo = artifactory, - ), - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "//:scala-reflect", - "3rdparty/jvm/com/fasterxml/jackson/core:jackson-core", - "3rdparty/jvm/com/fasterxml/jackson/core:jackson-databind", - "3rdparty/jvm/com/googlecode/java-diff-utils:diffutils", - "3rdparty/jvm/org/apache/thrift:libthrift", - "scrooge/scrooge-core", - "scrooge/scrooge-serializer", - "util/util-codec/src/main/scala", - ], -) diff --git a/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/BUILD.docx b/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/BUILD.docx new file mode 100644 index 000000000..0d6997481 Binary files /dev/null and b/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/BUILD.docx differ diff --git a/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/ThriftJsonInspector.docx b/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/ThriftJsonInspector.docx new file mode 100644 index 000000000..fc8a96efc Binary files /dev/null and b/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/ThriftJsonInspector.docx differ diff --git a/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/ThriftJsonInspector.scala b/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/ThriftJsonInspector.scala deleted file mode 100644 index cb9e65ee8..000000000 --- a/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/ThriftJsonInspector.scala +++ /dev/null @@ -1,142 +0,0 @@ -package com.twitter.servo.json - -import com.fasterxml.jackson.core.JsonParser -import com.fasterxml.jackson.databind.JsonNode -import com.fasterxml.jackson.databind.ObjectMapper -import com.twitter.scrooge.ThriftStruct -import com.twitter.scrooge.ThriftStructCodec -import com.twitter.scrooge.ThriftStructSerializer -import difflib.DiffUtils -import java.io.StringWriter -import org.apache.thrift.protocol.TField -import org.apache.thrift.protocol.TProtocol -import org.apache.thrift.protocol.TProtocolFactory -import org.apache.thrift.protocol.TSimpleJSONProtocol -import org.apache.thrift.transport.TTransport -import scala.collection.JavaConverters._ -import scala.language.experimental.macros -import scala.reflect.macros.blackbox.Context - -object ThriftJsonInspector { - private val mapper = new ObjectMapper() - mapper.configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, true) - private val factory = mapper.getFactory() - - private def mkSerializer[T <: ThriftStruct](_codec: ThriftStructCodec[T]) = - new ThriftStructSerializer[T] { - def codec = _codec - - def protocolFactory = - // Identical to TSimpleJSONProtocol.Factory except the TProtocol - // returned serializes Thrift pass-through fields with the name - // "(TField.id)" instead of empty string. - new TProtocolFactory { - def getProtocol(trans: TTransport): TProtocol = - new TSimpleJSONProtocol(trans) { - override def writeFieldBegin(field: TField): Unit = - writeString(if (field.name.isEmpty) s"(${field.id})" else field.name) - } - } - } - - def apply[T <: ThriftStruct](codec: ThriftStructCodec[T]) = new ThriftJsonInspector(codec) -} - -/** - * Helper for human inspection of Thrift objects. - */ -class ThriftJsonInspector[T <: ThriftStruct](codec: ThriftStructCodec[T]) { - import ThriftJsonInspector._ - - private[this] val serializer = mkSerializer(codec) - - /** - * Convert the Thrift object to a JSON representation based on this - * object's codec, in the manner of TSimpleJSONProtocol. The resulting - * JSON will have human-readable field names that match the field - * names that were used in the Thrift definition that the codec was - * created from, but the conversion is lossy, and the JSON - * representation cannot be converted back. - */ - def toSimpleJson(t: T): JsonNode = - mapper.readTree(factory.createParser(serializer.toBytes(t))) - - /** - * Selects requested fields (matching against the JSON fields) from a - * Thrift-generated class. - * - * Paths are specified as slash-separated strings (e.g., - * "key1/key2/key3"). If the path specifies an array or object, it is - * included in the output in JSON format, otherwise the simple value is - * converted to a string. - */ - def select(item: T, paths: Seq[String]): Seq[String] = { - val jsonNode = toSimpleJson(item) - paths.map { - _.split("/").foldLeft(jsonNode)(_.findPath(_)) match { - case node if node.isMissingNode => "[invalid-path]" - case node if node.isContainerNode => node.toString - case node => node.asText - } - } - } - - /** - * Convert the given Thrift struct to a human-readable pretty-printed - * JSON representation. This JSON cannot be converted back into a - * struct. This output is intended for debug logging or interactive - * inspection of Thrift objects. - */ - def prettyPrint(t: T): String = print(t, true) - - def print(t: T, pretty: Boolean = false): String = { - val writer = new StringWriter() - val generator = factory.createGenerator(writer) - if (pretty) - generator.useDefaultPrettyPrinter() - generator.writeTree(toSimpleJson(t)) - writer.toString - } - - /** - * Produce a human-readable unified diff of the json pretty-printed - * representations of `a` and `b`. If the inputs have the same JSON - * representation, the result will be the empty string. - */ - def diff(a: T, b: T, contextLines: Int = 1): String = { - val linesA = prettyPrint(a).linesIterator.toList.asJava - val linesB = prettyPrint(b).linesIterator.toList.asJava - val patch = DiffUtils.diff(linesA, linesB) - DiffUtils.generateUnifiedDiff("a", "b", linesA, patch, contextLines).asScala.mkString("\n") - } -} - -object syntax { - private[this] object CompanionObjectLoader { - def load[T](c: Context)(implicit t: c.universe.WeakTypeTag[T]) = { - val tSym = t.tpe.typeSymbol - val companion = tSym.asClass.companion - if (companion == c.universe.NoSymbol) { - c.abort(c.enclosingPosition, s"${tSym} has no companion object") - } else { - c.universe.Ident(companion) - } - } - } - - /** - * Load the companion object of the named type parameter and require - * it to be a ThriftStructCodec. Compilation will fail if the - * companion object is not a ThriftStructCodec. - */ - implicit def thriftStructCodec[T <: ThriftStruct]: ThriftStructCodec[T] = - macro CompanionObjectLoader.load[T] - - implicit class ThriftJsonSyntax[T <: ThriftStruct](t: T)(implicit codec: ThriftStructCodec[T]) { - private[this] def inspector = ThriftJsonInspector(codec) - def toSimpleJson: JsonNode = inspector.toSimpleJson(t) - def prettyPrint: String = inspector.prettyPrint(t) - def diff(other: T, contextLines: Int = 1): String = - inspector.diff(t, other, contextLines) - } -} diff --git a/tweetypie/servo/repo/BUILD b/tweetypie/servo/repo/BUILD deleted file mode 100644 index 66618d7e0..000000000 --- a/tweetypie/servo/repo/BUILD +++ /dev/null @@ -1,5 +0,0 @@ -target( - dependencies = [ - "tweetypie/servo/repo/src/main/scala", - ], -) diff --git a/tweetypie/servo/repo/BUILD.docx b/tweetypie/servo/repo/BUILD.docx new file mode 100644 index 000000000..94b0f30e2 Binary files /dev/null and b/tweetypie/servo/repo/BUILD.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/BUILD b/tweetypie/servo/repo/src/main/scala/BUILD deleted file mode 100644 index c50c57807..000000000 --- a/tweetypie/servo/repo/src/main/scala/BUILD +++ /dev/null @@ -1,29 +0,0 @@ -scala_library( - sources = ["**/*.scala"], - platform = "java8", - provides = scala_artifact( - org = "com.twitter", - name = "servo-repo", - repo = artifactory, - ), - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/com/google/guava", - "3rdparty/jvm/com/google/inject:guice", - "3rdparty/jvm/org/apache/thrift:libthrift", - "finagle/finagle-core/src/main", - "finagle/finagle-memcached/src/main/scala", - "finagle/finagle-mux/src/main/scala", - "finagle/finagle-thrift", - "scrooge/scrooge-core", - "scrooge/scrooge-serializer/src/main/scala", - "tweetypie/servo/repo/src/main/thrift:thrift-scala", - "tweetypie/servo/util", - "util/util-codec/src/main/scala", - "util/util-hashing/src/main/scala", - "util/util-logging", - "util/util-security/src/main/scala/com/twitter/util/security", - "util/util-stats/src/main/scala", - ], -) diff --git a/tweetypie/servo/repo/src/main/scala/BUILD.docx b/tweetypie/servo/repo/src/main/scala/BUILD.docx new file mode 100644 index 000000000..ef1764122 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/BUILD.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ByteCountingMemcache.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ByteCountingMemcache.docx new file mode 100644 index 000000000..2f9a93fd0 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ByteCountingMemcache.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ByteCountingMemcache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ByteCountingMemcache.scala deleted file mode 100644 index 6a00220ef..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ByteCountingMemcache.scala +++ /dev/null @@ -1,183 +0,0 @@ -package com.twitter.servo.cache - -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.util.{Duration, Future} - -case class ByteCountingMemcacheFactory( - memcacheFactory: MemcacheFactory, - statsReceiver: StatsReceiver, - delimiter: String = constants.Colon, - checksumSize: Int = 8) // memcached checksums are u64s - extends MemcacheFactory { - - def apply() = - new ByteCountingMemcache(memcacheFactory(), statsReceiver, delimiter, checksumSize) -} - -/** - * A decorator around a Memcache that counts the rough number - * of bytes transferred, bucketed & rolled up by in/out, method name, - * and key prefix - */ -class ByteCountingMemcache( - underlying: Memcache, - statsReceiver: StatsReceiver, - delimiter: String, - checksumSize: Int) - extends Memcache { - val scopedReceiver = statsReceiver.scope("memcache").scope("bytes") - - val outStat = scopedReceiver.stat("out") - val outReceiver = scopedReceiver.scope("out") - - val inStat = scopedReceiver.stat("in") - val inReceiver = scopedReceiver.scope("in") - - val getOutStat = outReceiver.stat("get") - val getOutReceiver = outReceiver.scope("get") - - val getInStat = inReceiver.stat("get") - val getInReceiver = inReceiver.scope("get") - val getInHitsStat = getInReceiver.stat("hits") - val getInHitsReceiver = getInReceiver.scope("hits") - val getInMissesStat = getInReceiver.stat("misses") - val getInMissesReceiver = getInReceiver.scope("misses") - - val gwcOutStat = outReceiver.stat("get_with_checksum") - val gwcOutReceiver = outReceiver.scope("get_with_checksum") - - val gwcInStat = inReceiver.stat("get_with_checksum") - val gwcInReceiver = inReceiver.scope("get_with_checksum") - val gwcInHitsStat = gwcOutReceiver.stat("hits") - val gwcInHitsReceiver = gwcOutReceiver.scope("hits") - val gwcInMissesStat = gwcOutReceiver.stat("misses") - val gwcInMissesReceiver = gwcOutReceiver.scope("misses") - - val addStat = outReceiver.stat("add") - val addReceiver = outReceiver.scope("add") - - val setStat = outReceiver.stat("set") - val setReceiver = outReceiver.scope("set") - - val replaceStat = outReceiver.stat("replace") - val replaceReceiver = outReceiver.scope("replace") - - val casStat = outReceiver.stat("check_and_set") - val casReceiver = outReceiver.scope("check_and_set") - - def release() = underlying.release() - - // get namespace from key - protected[this] def ns(key: String) = { - val idx = math.min(key.size - 1, math.max(key.lastIndexOf(delimiter), 0)) - key.substring(0, idx).replaceAll(delimiter, "_") - } - - override def get(keys: Seq[String]): Future[KeyValueResult[String, Array[Byte]]] = { - keys foreach { key => - val size = key.size - outStat.add(size) - getOutStat.add(size) - getOutReceiver.stat(ns(key)).add(size) - } - underlying.get(keys) onSuccess { lr => - lr.found foreach { - case (key, bytes) => - val size = key.size + bytes.length - inStat.add(size) - getInStat.add(size) - getInHitsStat.add(size) - getInHitsReceiver.stat(ns(key)).add(size) - } - lr.notFound foreach { key => - val size = key.size - inStat.add(size) - getInStat.add(size) - getInMissesStat.add(size) - getInMissesReceiver.stat(ns(key)).add(size) - } - } - } - - override def getWithChecksum( - keys: Seq[String] - ): Future[CsKeyValueResult[String, Array[Byte]]] = { - keys foreach { key => - val size = key.size - outStat.add(size) - gwcOutStat.add(size) - gwcOutReceiver.stat(ns(key)).add(size) - } - underlying.getWithChecksum(keys) onSuccess { lr => - lr.found foreach { - case (key, (bytes, _)) => - val size = key.size + (bytes map { _.length } getOrElse (0)) + checksumSize - inStat.add(size) - gwcInStat.add(size) - gwcInHitsStat.add(size) - gwcInHitsReceiver.stat(ns(key)).add(size) - } - lr.notFound foreach { key => - val size = key.size - inStat.add(size) - gwcInStat.add(size) - gwcInMissesStat.add(size) - gwcInMissesReceiver.stat(ns(key)).add(size) - } - } - } - - override def add(key: String, value: Array[Byte], ttl: Duration): Future[Boolean] = { - val size = key.size + value.size - outStat.add(size) - addStat.add(size) - addReceiver.stat(ns(key)).add(size) - underlying.add(key, value, ttl) - } - - override def checkAndSet( - key: String, - value: Array[Byte], - checksum: Checksum, - ttl: Duration - ): Future[Boolean] = { - val size = key.size + value.size + checksumSize - outStat.add(size) - casStat.add(size) - casReceiver.stat(ns(key)).add(size) - underlying.checkAndSet(key, value, checksum, ttl) - } - - override def set(key: String, value: Array[Byte], ttl: Duration): Future[Unit] = { - val size = key.size + value.size - outStat.add(size) - setStat.add(size) - setReceiver.stat(ns(key)).add(size) - underlying.set(key, value, ttl) - } - - override def replace(key: String, value: Array[Byte], ttl: Duration): Future[Boolean] = { - val size = key.size + value.size - outStat.add(size) - replaceStat.add(size) - replaceReceiver.stat(ns(key)).add(size) - underlying.replace(key, value, ttl) - } - - override def delete(key: String): Future[Boolean] = { - outStat.add(key.size) - underlying.delete(key) - } - - override def incr(key: String, delta: Long = 1): Future[Option[Long]] = { - val size = key.size + 8 - outStat.add(size) - underlying.incr(key, delta) - } - - override def decr(key: String, delta: Long = 1): Future[Option[Long]] = { - val size = key.size + 8 - outStat.add(size) - underlying.decr(key, delta) - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cache.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cache.docx new file mode 100644 index 000000000..f2c7b47fc Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cache.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cache.scala deleted file mode 100644 index c23e6e462..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cache.scala +++ /dev/null @@ -1,275 +0,0 @@ -package com.twitter.servo.cache - -import com.google.common.cache.CacheBuilder -import com.twitter.finagle.memcached.util.NotFound -import com.twitter.servo.util.ThreadLocalStringBuilder -import com.twitter.util.{Duration, Future, Return} -import java.util.concurrent.TimeUnit -import scala.collection.mutable -import scala.collection.JavaConverters._ - -/** - * opaque trait used for getWithChecksum calls. - * the implementation should be private to the cache, - * to inhibit peeking - */ -trait Checksum extends Any - -object ScopedCacheKey { - private[ScopedCacheKey] val builder = new ThreadLocalStringBuilder(64) -} - -/** - * base class for cache keys needing scoping - * - * @param globalNamespace - * the project-level namespace - * @param cacheNamespace - * the cache-level namespace - * @param version - * the version of serialization for values - * @param scopes - * additional key scopes - */ -abstract class ScopedCacheKey( - globalNamespace: String, - cacheNamespace: String, - version: Int, - scopes: String*) { - import constants._ - - override lazy val toString = { - val builder = ScopedCacheKey - .builder() - .append(globalNamespace) - .append(Colon) - .append(cacheNamespace) - .append(Colon) - .append(version) - - scopes foreach { - builder.append(Colon).append(_) - } - - builder.toString - } -} - -/** - * Shared trait for reading from a cache - */ -trait ReadCache[K, V] { - def get(keys: Seq[K]): Future[KeyValueResult[K, V]] - - /** - * get the value with an opaque checksum that can be passed in - * a checkAndSet operation. If there is a deserialization error, - * the checksum is still returned - */ - def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, V]] - - /** - * release any underlying resources - */ - def release(): Unit -} - -/** - * allows one ReadCache to wrap another - */ -trait ReadCacheWrapper[K, V, This <: ReadCache[K, V]] extends ReadCache[K, V] { - def underlyingCache: This - - override def get(keys: Seq[K]) = underlyingCache.get(keys) - - override def getWithChecksum(keys: Seq[K]) = underlyingCache.getWithChecksum(keys) - - override def release() = underlyingCache.release() -} - -/** - * Simple trait for a cache supporting multi-get and single set - */ -trait Cache[K, V] extends ReadCache[K, V] { - def add(key: K, value: V): Future[Boolean] - - def checkAndSet(key: K, value: V, checksum: Checksum): Future[Boolean] - - def set(key: K, value: V): Future[Unit] - - def set(pairs: Seq[(K, V)]): Future[Unit] = { - Future.join { - pairs map { - case (key, value) => set(key, value) - } - } - } - - /** - * Replaces the value for an existing key. If the key doesn't exist, this has no effect. - * @return true if replaced, false if not found - */ - def replace(key: K, value: V): Future[Boolean] - - /** - * Deletes a value from cache. - * @return true if deleted, false if not found - */ - def delete(key: K): Future[Boolean] -} - -/** - * allows one cache to wrap another - */ -trait CacheWrapper[K, V] extends Cache[K, V] with ReadCacheWrapper[K, V, Cache[K, V]] { - override def add(key: K, value: V) = underlyingCache.add(key, value) - - override def checkAndSet(key: K, value: V, checksum: Checksum) = - underlyingCache.checkAndSet(key, value, checksum) - - override def set(key: K, value: V) = underlyingCache.set(key, value) - - override def replace(key: K, value: V) = underlyingCache.replace(key, value) - - override def delete(key: K) = underlyingCache.delete(key) -} - -/** - * Switch between two caches with a decider value - */ -class DeciderableCache[K, V](primary: Cache[K, V], secondary: Cache[K, V], isAvailable: => Boolean) - extends CacheWrapper[K, V] { - override def underlyingCache = if (isAvailable) primary else secondary -} - -private object MutableMapCache { - case class IntChecksum(i: Int) extends AnyVal with Checksum -} - -/** - * implementation of a Cache with a mutable.Map - */ -class MutableMapCache[K, V](underlying: mutable.Map[K, V]) extends Cache[K, V] { - import MutableMapCache.IntChecksum - - protected[this] def checksum(value: V): Checksum = IntChecksum(value.hashCode) - - override def get(keys: Seq[K]): Future[KeyValueResult[K, V]] = Future { - val founds = Map.newBuilder[K, V] - val iter = keys.iterator - while (iter.hasNext) { - val key = iter.next() - synchronized { - underlying.get(key) - } match { - case Some(v) => founds += key -> v - case None => - } - } - val found = founds.result() - val notFound = NotFound(keys, found.keySet) - KeyValueResult(found, notFound) - } - - override def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, V]] = Future { - val founds = Map.newBuilder[K, (Return[V], Checksum)] - val iter = keys.iterator - while (iter.hasNext) { - val key = iter.next() - synchronized { - underlying.get(key) - } match { - case Some(value) => founds += key -> (Return(value), checksum(value)) - case None => - } - } - val found = founds.result() - val notFound = NotFound(keys, found.keySet) - KeyValueResult(found, notFound) - } - - override def add(key: K, value: V): Future[Boolean] = - synchronized { - underlying.get(key) match { - case Some(_) => - Future.False - case None => - underlying += key -> value - Future.True - } - } - - override def checkAndSet(key: K, value: V, cs: Checksum): Future[Boolean] = - synchronized { - underlying.get(key) match { - case Some(current) => - if (checksum(current) == cs) { - // checksums match, set value - underlying += key -> value - Future.True - } else { - // checksums didn't match, so no set - Future.False - } - case None => - // if nothing there, the checksums can't be compared - Future.False - } - } - - override def set(key: K, value: V): Future[Unit] = { - synchronized { - underlying += key -> value - } - Future.Done - } - - override def replace(key: K, value: V): Future[Boolean] = synchronized { - if (underlying.contains(key)) { - underlying(key) = value - Future.True - } else { - Future.False - } - } - - override def delete(key: K): Future[Boolean] = synchronized { - if (underlying.remove(key).nonEmpty) Future.True else Future.False - } - - override def release(): Unit = synchronized { - underlying.clear() - } -} - -/** - * In-memory implementation of a cache with LRU semantics and a TTL. - */ -class ExpiringLruCache[K, V](ttl: Duration, maximumSize: Int) - extends MutableMapCache[K, V]( - // TODO: consider wiring the Cache interface directly to the - // Guava Cache, instead of introducing two layers of indirection - CacheBuilder.newBuilder - .asInstanceOf[CacheBuilder[K, V]] - .expireAfterWrite(ttl.inMilliseconds, TimeUnit.MILLISECONDS) - .initialCapacity(maximumSize) - .maximumSize(maximumSize) - .build[K, V]() - .asMap - .asScala - ) - -/** - * An empty cache that stays empty - */ -class NullCache[K, V] extends Cache[K, V] { - lazy val futureTrue = Future.value(true) - override def get(keys: Seq[K]) = Future.value(KeyValueResult(notFound = keys.toSet)) - override def getWithChecksum(keys: Seq[K]) = Future.value(KeyValueResult(notFound = keys.toSet)) - override def add(key: K, value: V) = futureTrue - override def checkAndSet(key: K, value: V, checksum: Checksum) = Future.value(true) - override def set(key: K, value: V) = Future.Done - override def replace(key: K, value: V) = futureTrue - override def delete(key: K) = futureTrue - override def release() = () -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CacheFactory.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CacheFactory.docx new file mode 100644 index 000000000..27519a1ea Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CacheFactory.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CacheFactory.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CacheFactory.scala deleted file mode 100644 index 85359db1a..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CacheFactory.scala +++ /dev/null @@ -1,153 +0,0 @@ -package com.twitter.servo.cache - -import com.twitter.util.Duration -import scala.collection.mutable - -/** - * Used to produce differently-typed caches with the same configuration - * and potentially with shared observation. - */ -trait CacheFactory { - def apply[K, V](serializer: Serializer[V], scopes: String*): Cache[K, V] -} - -/** - * Builds an instance of NullCache. - */ -object NullCacheFactory extends CacheFactory { - val cache = new NullCache[Nothing, Nothing] - - override def apply[K, V](serializer: Serializer[V], scopes: String*): Cache[K, V] = - cache.asInstanceOf[NullCache[K, V]] -} - -/** - * Builds DeciderableCaches, which proxy to one of two caches built from the - * argument CacheFactories depending on a decider value. - */ -case class DeciderableCacheFactory( - primaryCacheFactory: CacheFactory, - secondaryCacheFactory: CacheFactory, - isAvailable: () => Boolean) - extends CacheFactory { - override def apply[K, V](serializer: Serializer[V], scopes: String*) = - new DeciderableCache( - primaryCacheFactory(serializer, scopes: _*), - secondaryCacheFactory(serializer, scopes: _*), - isAvailable() - ) -} - -/** - * Builds MigratingCaches, which support gradual migrations from one cache - * to another. See MigratingCache.scala for details. - */ -case class MigratingCacheFactory(cacheFactory: CacheFactory, darkCacheFactory: CacheFactory) - extends CacheFactory { - override def apply[K, V](serializer: Serializer[V], scopes: String*) = - new MigratingCache( - cacheFactory(serializer, scopes: _*), - darkCacheFactory(serializer, scopes: _*) - ) -} - -case class ObservableCacheFactory(cacheFactory: CacheFactory, cacheObserver: CacheObserver) - extends CacheFactory { - override def apply[K, V](serializer: Serializer[V], scopes: String*) = - new ObservableCache(cacheFactory(serializer), cacheObserver.scope(scopes: _*)) -} - -/** - * Builds in-memory caches with elements that never expire. - */ -case class MutableMapCacheFactory( - serialize: Boolean = false, - useSharedCache: Boolean = false, - keyTransformerFactory: KeyTransformerFactory = ToStringKeyTransformerFactory) - extends CacheFactory { - lazy val sharedCache = mkCache - - def mkCache = { - new MutableMapCache[Object, Object](new mutable.HashMap) - } - - override def apply[K, V](serializer: Serializer[V], scopes: String*) = { - val cache = if (useSharedCache) sharedCache else mkCache - if (serialize) { - new KeyValueTransformingCache( - cache.asInstanceOf[Cache[String, Array[Byte]]], - serializer, - keyTransformerFactory() - ) - } else { - cache.asInstanceOf[Cache[K, V]] - } - } -} - -/** - * Builds in-memory caches with TTL'd entries and LRU eviction policies. - */ -case class InProcessLruCacheFactory( - ttl: Duration, - lruSize: Int, - serialize: Boolean = false, - useSharedCache: Boolean = false, - keyTransformerFactory: KeyTransformerFactory = ToStringKeyTransformerFactory) - extends CacheFactory { - def mkCache = new ExpiringLruCache[Object, Object](ttl, lruSize) - lazy val sharedCache = mkCache - - override def apply[K, V](serializer: Serializer[V], scopes: String*) = { - val cache = if (useSharedCache) sharedCache else mkCache - if (serialize) { - new KeyValueTransformingCache( - cache.asInstanceOf[Cache[String, Array[Byte]]], - serializer, - keyTransformerFactory() - ) - } else { - cache.asInstanceOf[Cache[K, V]] - } - } -} - -/** - * Builds MemcacheCaches, which applies serialization, key-transformation, - * and TTL mechanics to an underlying Memcache. - */ -case class MemcacheCacheFactory( - memcache: Memcache, - ttl: Duration, - keyTransformerFactory: KeyTransformerFactory = ToStringKeyTransformerFactory) - extends CacheFactory { - override def apply[K, V](serializer: Serializer[V], scopes: String*) = - new MemcacheCache(memcache, ttl, serializer, keyTransformerFactory[K]()) -} - -/** - * Builds KeyTransformers, which are required for constructing - * KeyValueTransformingCaches. - */ -trait KeyTransformerFactory { - def apply[K](): KeyTransformer[K] -} - -/** - * Builds KeyTransformers by simply call the keys' toString methods. - */ -object ToStringKeyTransformerFactory extends KeyTransformerFactory { - def apply[K]() = new ToStringKeyTransformer[K]() -} - -/** - * Builds KeyTransformers that prefix all keys generated by an underlying - * transformer with a string. - */ -case class PrefixKeyTransformerFactory( - prefix: String, - delimiter: String = constants.Colon, - underlying: KeyTransformerFactory = ToStringKeyTransformerFactory) - extends KeyTransformerFactory { - def apply[K]() = new PrefixKeyTransformer[K](prefix, delimiter, underlying[K]()) -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cached.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cached.docx new file mode 100644 index 000000000..f172edbdb Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cached.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cached.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cached.scala deleted file mode 100644 index 9956cb515..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cached.scala +++ /dev/null @@ -1,261 +0,0 @@ -package com.twitter.servo.cache - -import com.twitter.conversions.DurationOps._ -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.servo.cache.thriftscala.CachedValueStatus.DoNotCache -import com.twitter.servo.util.{Gate, Transformer} -import com.twitter.util.{Duration, Return, Throw, Time} -import java.nio.ByteBuffer - -object Cached { - - private[this] val millisToTime: Long => Time = - ms => Time.fromMilliseconds(ms) - - private val timeToMills: Time => Long = - time => time.inMilliseconds - - /** - * Deserialize a CachedValue to a Cached[V] - * - * If the ByteBuffer contained in the `cachedValue` is backed by an `Array[Byte]` with its offset - * at 0, we will apply the serializer directly to the backing array for performance reasons. - * - * As such, the `Serializer[V]` the caller provides MUST NOT mutate the buffer it is given. - * This exhortation is also given in com.twitter.servo.util.Transformer, but repeated here. - */ - def apply[V](cachedValue: CachedValue, serializer: Serializer[V]): Cached[V] = { - val value: Option[V] = cachedValue.value match { - case Some(buf) if buf.hasArray && buf.arrayOffset() == 0 => - serializer.from(buf.array).toOption - case Some(buf) => - val array = new Array[Byte](buf.remaining) - buf.duplicate.get(array) - serializer.from(array).toOption - case None => None - } - val status = - if (cachedValue.value.nonEmpty && value.isEmpty) - CachedValueStatus.DeserializationFailed - else - cachedValue.status - - Cached( - value, - status, - Time.fromMilliseconds(cachedValue.cachedAtMsec), - cachedValue.readThroughAtMsec.map(millisToTime), - cachedValue.writtenThroughAtMsec.map(millisToTime), - cachedValue.doNotCacheUntilMsec.map(millisToTime), - cachedValue.softTtlStep - ) - } -} - -/** - * A simple metadata wrapper for cached values. This is stored in the cache - * using the [[com.twitter.servo.cache.thriftscala.CachedValue]] struct, which is similar, but - * untyped. - */ -case class Cached[V]( - value: Option[V], - status: CachedValueStatus, - cachedAt: Time, - readThroughAt: Option[Time] = None, - writtenThroughAt: Option[Time] = None, - doNotCacheUntil: Option[Time] = None, - softTtlStep: Option[Short] = None) { - - /** - * produce a new cached value with the same metadata - */ - def map[W](f: V => W): Cached[W] = copy(value = value.map(f)) - - /** - * serialize to a CachedValue - */ - def toCachedValue(serializer: Serializer[V]): CachedValue = { - var serializedValue: Option[ByteBuffer] = None - val cachedValueStatus = value match { - case Some(v) => - serializer.to(v) match { - case Return(sv) => - serializedValue = Some(ByteBuffer.wrap(sv)) - status - case Throw(_) => CachedValueStatus.SerializationFailed - } - case None => status - } - - CachedValue( - serializedValue, - cachedValueStatus, - cachedAt.inMilliseconds, - readThroughAt.map(Cached.timeToMills), - writtenThroughAt.map(Cached.timeToMills), - doNotCacheUntil.map(Cached.timeToMills), - softTtlStep - ) - } - - /** - * Resolves conflicts between a value being inserted into cache and a value already in cache by - * using the time a cached value was last updated. - * If the cached value has a writtenThroughAt, returns it. Otherwise returns readThroughAt, but - * if that doesn't exist, returns cachedAt. - * This makes it favor writes to reads in the event of a race condition. - */ - def effectiveUpdateTime[V](writtenThroughBuffer: Duration = 0.second): Time = { - this.writtenThroughAt match { - case Some(wta) => wta + writtenThroughBuffer - case None => - this.readThroughAt match { - case Some(rta) => rta - case None => this.cachedAt - } - } - } -} - -/** - * Switch between two cache pickers by providing deciderable gate - */ -class DeciderablePicker[V]( - primaryPicker: LockingCache.Picker[Cached[V]], - secondaryPicker: LockingCache.Picker[Cached[V]], - usePrimary: Gate[Unit], - statsReceiver: StatsReceiver) - extends LockingCache.Picker[Cached[V]] { - private[this] val stats = statsReceiver.scope("deciderable_picker") - private[this] val pickerScope = stats.scope("picker") - private[this] val primaryPickerCount = pickerScope.counter("primary") - private[this] val secondaryPickerCount = pickerScope.counter("secondary") - - private[this] val pickedScope = stats.scope("picked_values") - private[this] val pickedValuesMatched = pickedScope.counter("matched") - private[this] val pickedValuesMismatched = pickedScope.counter("mismatched") - - override def apply(newValue: Cached[V], oldValue: Cached[V]): Option[Cached[V]] = { - val secondaryPickerValue = secondaryPicker(newValue, oldValue) - - if (usePrimary()) { - val primaryPickerValue = primaryPicker(newValue, oldValue) - - primaryPickerCount.incr() - if (primaryPickerValue == secondaryPickerValue) pickedValuesMatched.incr() - else pickedValuesMismatched.incr() - - primaryPickerValue - } else { - secondaryPickerCount.incr() - secondaryPickerValue - } - } - - override def toString(): String = "DeciderablePicker" - -} - -/** - * It's similar to the PreferNewestCached picker, but it prefers written-through value - * over read-through as long as written-through value + writtenThroughExtra is - * newer than read-through value. Same as in PreferNewestCached, if values cached - * have the same cached method and time picker picks the new value. - * - * It intends to solve race condition when the read and write requests come at the - * same time, but write requests is getting cached first and then getting override with - * a stale value from the read request. - * - * If enabled gate is disabled, it falls back to PreferNewestCached logic. - * - */ -class PreferWrittenThroughCached[V]( - writtenThroughBuffer: Duration = 1.second) - extends PreferNewestCached[V] { - override def apply(newValue: Cached[V], oldValue: Cached[V]): Option[Cached[V]] = { - // the tie goes to newValue - if (oldValue.effectiveUpdateTime(writtenThroughBuffer) > newValue.effectiveUpdateTime( - writtenThroughBuffer)) - None - else - Some(newValue) - } - override def toString(): String = "PreferWrittenThroughCached" -} - -/** - * prefer one value over another based on Cached metadata - */ -class PreferNewestCached[V] extends LockingCache.Picker[Cached[V]] { - - override def apply(newValue: Cached[V], oldValue: Cached[V]): Option[Cached[V]] = { - if (oldValue.effectiveUpdateTime() > newValue.effectiveUpdateTime()) - None - else - Some(newValue) - } - - override def toString(): String = "PreferNewestCached" -} - -/** - * Prefer non-empty values. If a non-empty value is in cache, and the - * value to store is empty, return the non-empty value with a fresh cachedAt - * instead. - */ -class PreferNewestNonEmptyCached[V] extends PreferNewestCached[V] { - override def apply(newValue: Cached[V], oldValue: Cached[V]) = { - (newValue.value, oldValue.value) match { - // Some/Some and None/None cases are handled by the super class - case (Some(_), Some(_)) => super.apply(newValue, oldValue) - case (None, None) => super.apply(newValue, oldValue) - case (Some(_), None) => Some(newValue) - case (None, Some(_)) => Some(oldValue.copy(cachedAt = Time.now)) - } - } -} - -/** - * Prefer do not cache entries if they're not expired. Otherwise uses fallbackPicker - * @param fallBackPicker the picker to use when the oldvalue isn't do not cache or is expired. - * Defaults to PreferNewestCache. - */ -class PreferDoNotCache[V]( - fallBackPicker: LockingCache.Picker[Cached[V]] = new PreferNewestCached[V]: PreferNewestCached[V], - statsReceiver: StatsReceiver) - extends LockingCache.Picker[Cached[V]] { - private[this] val pickDoNotCacheEntryCounter = statsReceiver.counter("pick_do_not_cache_entry") - private[this] val useFallbackCounter = statsReceiver.counter("use_fallback") - override def apply(newValue: Cached[V], oldValue: Cached[V]): Option[Cached[V]] = { - if (oldValue.status == DoNotCache && oldValue.doNotCacheUntil.forall( - _ > newValue.effectiveUpdateTime())) { // evaluates to true if dnc until is None - pickDoNotCacheEntryCounter.incr() - None - } else { - useFallbackCounter.incr() - fallBackPicker.apply(newValue, oldValue) - } - } -} - -/** - * A Transformer of Cached values composed of a Transformer of the underlying values. - */ -class CachedTransformer[A, B](underlying: Transformer[A, B]) - extends Transformer[Cached[A], Cached[B]] { - def to(cachedA: Cached[A]) = cachedA.value match { - case None => Return(cachedA.copy(value = None)) - case Some(a) => - underlying.to(a) map { b => - cachedA.copy(value = Some(b)) - } - } - - def from(cachedB: Cached[B]) = cachedB.value match { - case None => Return(cachedB.copy(value = None)) - case Some(b) => - underlying.from(b) map { a => - cachedB.copy(value = Some(a)) - } - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterCache.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterCache.docx new file mode 100644 index 000000000..e66854cdc Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterCache.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterCache.scala deleted file mode 100644 index 5fa06185a..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterCache.scala +++ /dev/null @@ -1,20 +0,0 @@ -package com.twitter.servo.cache - -import com.twitter.util.{Duration, Future} - -trait CounterCache[K] extends Cache[K, Long] { - def incr(key: K, delta: Int = 1): Future[Option[Long]] - def decr(key: K, delta: Int = 1): Future[Option[Long]] -} - -class MemcacheCounterCache[K]( - memcache: Memcache, - ttl: Duration, - transformKey: KeyTransformer[K] = ((k: K) => k.toString): (K => java.lang.String)) - extends MemcacheCache[K, Long](memcache, ttl, CounterSerializer, transformKey) - with CounterCache[K] - -class NullCounterCache[K] extends NullCache[K, Long] with CounterCache[K] { - override def incr(key: K, delta: Int = 1): Future[Option[Long]] = Future.value(Some(0L)) - override def decr(key: K, delta: Int = 1): Future[Option[Long]] = Future.value(Some(0L)) -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterSerializer.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterSerializer.docx new file mode 100644 index 000000000..601e5f3d6 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterSerializer.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterSerializer.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterSerializer.scala deleted file mode 100644 index 4711cc2ef..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterSerializer.scala +++ /dev/null @@ -1,114 +0,0 @@ -package com.twitter.servo.cache - -import com.google.common.base.Charsets -import com.twitter.util.Try - -/** - * Fast implementation of dealing with memcached counters. - * - * Memcache is funkytown for incr and decr. Basically, you store a number, - * as a STRING, and then incr and decr that. This abstracts over that detail. - * - * This implementation was quite a bit faster than the simple implementation - * of `new String(bytes, Charsets.US_ASCII).toLong()` - * and `Long.toString(value).getBytes()` - * - * Thread-safe. - */ -object CounterSerializer extends Serializer[Long] { - private[this] val Minus = '-'.toByte - // The lower bound - private[this] val Zero = '0'.toByte - // The upper bound - private[this] val Nine = '9'.toByte - - // Max length for our byte arrays that'll fit all positive longs - private[this] val MaxByteArrayLength = 19 - - override def to(long: Long): Try[Array[Byte]] = Try { - // NOTE: code based on Long.toString(value), but it avoids creating the - // intermediate String object and the charset encoding in String.getBytes - // This was about 12% faster than calling Long.toString(long).getBytes - if (long == Long.MinValue) { - "-9223372036854775808".getBytes(Charsets.US_ASCII) - } else { - val size = if (long < 0) stringSize(-long) + 1 else stringSize(long) - val bytes = new Array[Byte](size) - - var isNegative = false - var endAt = 0 - var currentLong = if (long < 0) { - isNegative = true - endAt = 1 - -long - } else { - long - } - - // Note: look at the implementation in Long.getChars(long, int, char[]) - // They can do 2 digits at a time for this, so we could speed this up - // See: Division by Invariant Integers using Multiplication - // http://gmplib.org/~tege/divcnst-pldi94.pdf - - // starting at the least significant digit and working our way up... - var pos = size - 1 - do { - val byte = currentLong % 10 - bytes(pos) = (Zero + byte).toByte - currentLong /= 10 - pos -= 1 - } while (currentLong != 0) - - if (isNegative) { - assert(pos == 0, "For value " + long + ", pos " + pos) - bytes(0) = Minus - } - - bytes - } - } - - override def from(bytes: Array[Byte]): Try[Long] = Try { - // This implementation was about 4x faster than the simple: - // new String(bytes, Charsets.US_ASCII).toLong - - if (bytes.length < 1) - throw new NumberFormatException("Empty byte arrays are unsupported") - - val isNegative = bytes(0) == Minus - if (isNegative && bytes.length == 1) - throw new NumberFormatException(bytes.mkString(",")) - - // we count in negative numbers so we don't have problems at Long.MaxValue - var total = 0L - val endAt = bytes.length - var i = if (isNegative) 1 else 0 - while (i < endAt) { - val b = bytes(i) - if (b < Zero || b > Nine) - throw new NumberFormatException(bytes.mkString(",")) - - val int = b - Zero - total = (total * 10L) - int - - i += 1 - } - - if (isNegative) total else -total - } - - /** - * @param long must be non-negative - */ - private[this] def stringSize(long: Long): Int = { - var p = 10 - var i = 1 - while (i < MaxByteArrayLength) { - if (long < p) return i - p *= 10 - i += 1 - } - MaxByteArrayLength - } - -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/FinagleMemcache.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/FinagleMemcache.docx new file mode 100644 index 000000000..20aa5f655 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/FinagleMemcache.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/FinagleMemcache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/FinagleMemcache.scala deleted file mode 100644 index 0cd3153a7..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/FinagleMemcache.scala +++ /dev/null @@ -1,149 +0,0 @@ -package com.twitter.servo.cache - -import com.twitter.finagle.memcached.{CasResult, Client} -import com.twitter.finagle.service.RetryPolicy -import com.twitter.finagle.{Backoff, Memcached, TimeoutException, WriteException} -import com.twitter.hashing.KeyHasher -import com.twitter.io.Buf -import com.twitter.logging.Logger -import com.twitter.util._ - -case class MemcacheRetryPolicy( - writeExceptionBackoffs: Backoff, - timeoutBackoffs: Backoff) - extends RetryPolicy[Try[Nothing]] { - override def apply(r: Try[Nothing]) = r match { - case Throw(_: WriteException) => onWriteException - case Throw(_: TimeoutException) => onTimeoutException - case _ => None - } - - private[this] def onTimeoutException = consume(timeoutBackoffs.toStream) { tail => - copy(timeoutBackoffs = Backoff.fromStream(tail)) - } - - private[this] def onWriteException = consume(writeExceptionBackoffs.toStream) { tail => - copy(writeExceptionBackoffs = Backoff.fromStream(tail)) - } - - private[this] def consume(s: Stream[Duration])(f: Stream[Duration] => MemcacheRetryPolicy) = { - s.headOption map { duration => - (duration, f(s.tail)) - } - } -} - -object FinagleMemcacheFactory { - val DefaultHashName = "fnv1-32" - - def apply(client: Memcached.Client, dest: String, hashName: String = DefaultHashName) = - new FinagleMemcacheFactory(client, dest, hashName) -} - -class FinagleMemcacheFactory private[cache] ( - client: Memcached.Client, - dest: String, - hashName: String) - extends MemcacheFactory { - - def apply(): Memcache = { - val keyHasher = KeyHasher.byName(hashName) - new FinagleMemcache(client.withKeyHasher(keyHasher).newTwemcacheClient(dest), hashName) - } -} - -object FinagleMemcache { - val NoFlags = 0 - val logger = Logger(getClass) -} - -/** - * Adapter for a [[Memcache]] (type alias for [[TtlCache]]) from a Finagle Memcached - * [[Client]]. - */ -class FinagleMemcache(client: Client, hashName: String = FinagleMemcacheFactory.DefaultHashName) - extends Memcache { - - import FinagleMemcache.NoFlags - - private[this] case class BufferChecksum(buffer: Buf) extends Checksum - - def release(): Unit = { - client.close() - } - - override def get(keys: Seq[String]): Future[KeyValueResult[String, Array[Byte]]] = - client.getResult(keys).transform { - case Return(gr) => - val found = gr.hits.map { - case (key, v) => - val bytes = Buf.ByteArray.Owned.extract(v.value) - key -> bytes - } - Future.value(KeyValueResult(found, gr.misses, gr.failures)) - - case Throw(t) => - Future.value(KeyValueResult(failed = keys.map(_ -> t).toMap)) - } - - override def getWithChecksum(keys: Seq[String]): Future[CsKeyValueResult[String, Array[Byte]]] = - client.getsResult(keys).transform { - case Return(gr) => - try { - val hits = gr.hits map { - case (key, v) => - val bytes = Buf.ByteArray.Owned.extract(v.value) - key -> (Return(bytes), BufferChecksum( - v.casUnique.get - )) // TODO. what to do if missing? - } - Future.value(KeyValueResult(hits, gr.misses, gr.failures)) - } catch { - case t: Throwable => - Future.value(KeyValueResult(failed = keys.map(_ -> t).toMap)) - } - case Throw(t) => - Future.value(KeyValueResult(failed = keys.map(_ -> t).toMap)) - } - - private val jb2sb: java.lang.Boolean => Boolean = _.booleanValue - private val jl2sl: java.lang.Long => Long = _.longValue - - override def add(key: String, value: Array[Byte], ttl: Duration): Future[Boolean] = - client.add(key, NoFlags, ttl.fromNow, Buf.ByteArray.Owned(value)) map jb2sb - - override def checkAndSet( - key: String, - value: Array[Byte], - checksum: Checksum, - ttl: Duration - ): Future[Boolean] = { - checksum match { - case BufferChecksum(cs) => - client.checkAndSet(key, NoFlags, ttl.fromNow, Buf.ByteArray.Owned(value), cs) map { - res: CasResult => - res.replaced - } - case _ => - Future.exception(new IllegalArgumentException("unrecognized checksum: " + checksum)) - } - } - - override def set(key: String, value: Array[Byte], ttl: Duration): Future[Unit] = - client.set(key, NoFlags, ttl.fromNow, Buf.ByteArray.Owned(value)) - - override def replace(key: String, value: Array[Byte], ttl: Duration): Future[Boolean] = - client.replace(key, NoFlags, ttl.fromNow, Buf.ByteArray.Owned(value)) map jb2sb - - override def delete(key: String): Future[Boolean] = - client.delete(key) map jb2sb - - def incr(key: String, delta: Long = 1): Future[Option[Long]] = - client.incr(key, delta) map { _ map jl2sl } - - def decr(key: String, delta: Long = 1): Future[Option[Long]] = - client.decr(key, delta) map { _ map jl2sl } - - // NOTE: This is the only reason that hashName is passed as a param to FinagleMemcache. - override lazy val toString = "FinagleMemcache(%s)".format(hashName) -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ForwardingCache.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ForwardingCache.docx new file mode 100644 index 000000000..786dd88a1 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ForwardingCache.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ForwardingCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ForwardingCache.scala deleted file mode 100644 index 86c7f495a..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ForwardingCache.scala +++ /dev/null @@ -1,186 +0,0 @@ -package com.twitter.servo.cache - -import com.twitter.util.{Future, Return} -import scala.collection.mutable - -/** - * uses a forwarding cache to lookup a value by a secondary index. - * filters out values for which the requested secondary index does not - * match the actual secondary index (these are treated as a miss) - */ -class ForwardingCache[K, F, V]( - forwardingCache: Cache[K, Cached[F]], - underlyingCache: SecondaryIndexingCache[F, _, V], - primaryKey: V => F, - secondaryKey: SecondaryIndexingCache.IndexMapping[K, V], - lockingCacheFactory: LockingCacheFactory) - extends LockingCache[K, Cached[V]] { - protected[this] case class ForwardingChecksum( - forwardingChecksum: Checksum, - underlyingChecksum: Option[Checksum]) - extends Checksum - - protected[this] val lockingUnderlying = lockingCacheFactory(underlyingCache) - protected[this] val lockingForwarding = lockingCacheFactory(forwardingCache) - - override def get(keys: Seq[K]): Future[KeyValueResult[K, Cached[V]]] = { - forwardingCache.get(keys) flatMap { flr => - val (tombstones, notTombstones) = { - val tombstones = mutable.Map.empty[K, Cached[F]] - val notTombstones = mutable.Map.empty[F, K] - // split results into tombstoned keys and non-tombstoned key/pKeys - // while we're at it, produce a reverse-keymap of non-tombstones - flr.found foreach { - case (key, cachedPKey) => - cachedPKey.value match { - case Some(pKey) => notTombstones += pKey -> key - case None => tombstones += key -> cachedPKey - } - } - (tombstones.toMap, notTombstones.toMap) - } - - // only make call to underlyingCache if there are keys to lookup - val fromUnderlying = if (notTombstones.isEmpty) { - KeyValueResult.emptyFuture - } else { - // get non-tombstoned values from underlying cache - underlyingCache.get(notTombstones.keys.toSeq) map { lr => - val (goodValues, badValues) = lr.found partition { - case (pKey, cachedValue) => - // filter out values that somehow don't match the primary key and secondary key - cachedValue.value match { - case Some(value) => - secondaryKey(value) match { - case Return(Some(sKey)) => - pKey == primaryKey(value) && sKey == notTombstones(pKey) - case _ => false - } - case None => true - } - } - val found = goodValues map { case (k, v) => notTombstones(k) -> v } - val notFound = (lr.notFound ++ badValues.keySet) map { notTombstones(_) } - val failed = lr.failed map { case (k, t) => notTombstones(k) -> t } - KeyValueResult(found, notFound, failed) - } handle { - case t => - KeyValueResult(failed = notTombstones.values map { _ -> t } toMap) - } - } - - fromUnderlying map { lr => - // fill in tombstone values, copying the metadata from the Cached[F] - val withTombstones = tombstones map { - case (key, cachedPKey) => - key -> cachedPKey.copy[V](value = None) - } - val found = lr.found ++ withTombstones - val notFound = flr.notFound ++ lr.notFound - val failed = flr.failed ++ lr.failed - KeyValueResult(found, notFound, failed) - } - } - } - - // since we implement lockAndSet directly, we don't support getWithChecksum and checkAndSet. - // we should consider changing the class hierarchy of Cache/LockingCache so that this can - // be checked at compile time. - - override def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, Cached[V]]] = - Future.exception(new UnsupportedOperationException("Use lockAndSet directly")) - - override def checkAndSet(key: K, cachedValue: Cached[V], checksum: Checksum): Future[Boolean] = - Future.exception(new UnsupportedOperationException("Use lockAndSet directly")) - - protected[this] def maybeAddForwardingIndex( - key: K, - cachedPrimaryKey: Cached[F], - wasAdded: Boolean - ): Future[Boolean] = { - if (wasAdded) - forwardingCache.set(key, cachedPrimaryKey) map { _ => - true - } - else - Future.value(false) - } - - override def add(key: K, cachedValue: Cached[V]): Future[Boolean] = { - // copy the cache metadata to the primaryKey - val cachedPrimaryKey = cachedValue map { primaryKey(_) } - cachedPrimaryKey.value match { - case Some(pKey) => - // if a value can be derived from the key, use the underlying cache to add it - // the underlying cache will create the secondary index as a side-effect - underlyingCache.add(pKey, cachedValue) - case None => - // otherwise, we're just writing a tombstone, so we need to check if it exists - forwardingCache.add(key, cachedPrimaryKey) - } - } - - override def lockAndSet( - key: K, - handler: LockingCache.Handler[Cached[V]] - ): Future[Option[Cached[V]]] = { - handler(None) match { - case Some(cachedValue) => - cachedValue.value match { - case Some(value) => - // set on the underlying cache, and let it take care of adding - // the secondary index - val pKey = primaryKey(value) - lockingUnderlying.lockAndSet(pKey, handler) - case None => - // no underlying value to set, so just write the forwarding entry. - // secondaryIndexingCache doesn't lock for this set, so there's - // no point in our doing it. There's a slight risk of writing an - // errant tombstone in a race, but the only way to get around this - // would be to lock around *all* primary and secondary indexes, - // which could produce deadlocks, which is probably worse. - val cachedEmptyPKey = cachedValue.copy[F](value = None) - forwardingCache.set(key, cachedEmptyPKey) map { _ => - Some(cachedValue) - } - } - case None => - // nothing to do here - Future.value(None) - } - } - - override def set(key: K, cachedValue: Cached[V]): Future[Unit] = { - cachedValue.value match { - case Some(value) => - // set on the underlying cache, and let it take care of adding - // the secondary index - val pKey = primaryKey(value) - underlyingCache.set(pKey, cachedValue) - case None => - // no underlying value to set, so just write the forwarding entry - forwardingCache.set(key, cachedValue.copy[F](value = None)) - } - } - - override def replace(key: K, cachedValue: Cached[V]): Future[Boolean] = { - cachedValue.value match { - case Some(value) => - // replace in the underlying cache, and let it take care of adding the secondary index - val pKey = primaryKey(value) - underlyingCache.replace(pKey, cachedValue) - case None => - // no underlying value to set, so just write the forwarding entry - forwardingCache.replace(key, cachedValue.copy[F](value = None)) - } - } - - override def delete(key: K): Future[Boolean] = { - forwardingCache.delete(key) - } - - override def release(): Unit = { - forwardingCache.release() - underlyingCache.release() - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/HotKeyMemcacheClient.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/HotKeyMemcacheClient.docx new file mode 100644 index 000000000..767f3ccb3 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/HotKeyMemcacheClient.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/HotKeyMemcacheClient.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/HotKeyMemcacheClient.scala deleted file mode 100644 index af29080e4..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/HotKeyMemcacheClient.scala +++ /dev/null @@ -1,109 +0,0 @@ -package com.twitter.servo.cache - -import com.twitter.finagle.memcached.Client -import com.twitter.finagle.memcached.protocol.Value -import com.twitter.finagle.memcached.GetResult -import com.twitter.finagle.memcached.ProxyClient -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finagle.tracing.Trace -import com.twitter.io.Buf -import com.twitter.logging.Logger -import com.twitter.util.Future -import scala.collection.breakOut - -object HotKeyCachingCache { - private[cache] val logger = Logger.get(getClass) -} - -/** - * Wrapper for a [[com.twitter.finagle.Memcached.Client]] that handles in-process caching for - * values flagged for promotion ("hot keys") by a twemcache backend. - * - * This is similar conceptually to - * [[com.twitter.servo.repository.HotKeyCachingKeyValueRepository]] but differs because - * HotKeyCachingKeyValueRepository detects hot keys in the client, which requires tuning and - * becomes less effective as the number of instances in the cluster grows. [[HotKeyMemcacheClient]] - * uses detection in the memcache server, which is centralized and has a better view of frequently - * accessed keys. This is a custom feature in twemcache, Twitter's memcache fork, that is not - * enabled by default. Consult with the cache team if you want to use it. - * - * Usage: - * {{{ - * new HotKeyMemcacheClient( - * underlyingCache = Memcached.client. ... .newRichClient(destination), - * inProcessCache = ExpiringLruInProcessCache(ttl = 10.seconds, maximumSize = 100), - * statsReceiver = statsReceiver.scope("inprocess") - * ) - * }}} - */ -class HotKeyMemcacheClient( - override val proxyClient: Client, - inProcessCache: InProcessCache[String, Value], - statsReceiver: StatsReceiver, - label: Option[String] = None) - extends ProxyClient { - import HotKeyCachingCache._ - - private val promotions = statsReceiver.counter("promotions") - private val hits = statsReceiver.counter("hits") - private val misses = statsReceiver.counter("misses") - - private def cacheIfPromoted(key: String, value: Value): Unit = { - if (value.flags.exists(MemcacheFlags.shouldPromote)) { - logger.debug(s"Promoting hot-key $key flagged by memcached backend to in-process cache.") - Trace.recordBinary("hot_key_cache.hot_key_promoted", s"${label.getOrElse("")},$key") - promotions.incr() - inProcessCache.set(key, value) - } - } - - override def getResult(keys: Iterable[String]): Future[GetResult] = { - val resultsFromInProcessCache: Map[String, Value] = - keys.flatMap(k => inProcessCache.get(k).map(v => (k, v)))(breakOut) - val foundInProcess = resultsFromInProcessCache.keySet - val newKeys = keys.filterNot(foundInProcess.contains) - - hits.incr(foundInProcess.size) - misses.incr(newKeys.size) - - if (foundInProcess.nonEmpty) { - // If there are hot keys found in the cache, record a trace annotation with the format: - // hot key cache client label;the number of hits;number of misses;and the set of hot keys found in the cache. - Trace.recordBinary( - "hot_key_cache", - s"${label.getOrElse("")};${foundInProcess.size};${newKeys.size};${foundInProcess.mkString(",")}" - ) - } - - proxyClient.getResult(newKeys).map { result => - result.hits.foreach { case (k, v) => cacheIfPromoted(k, v) } - result.copy(hits = result.hits ++ resultsFromInProcessCache) - } - } - - /** - * Exposes whether or not a key was promoted to the in-process hot key cache. In most cases, users - * of [[HotKeyMemcacheClient]] should not need to know this. However, they may if hot key caching - * conflicts with other layers of caching they are using. - */ - def isHotKey(key: String): Boolean = inProcessCache.get(key).isDefined -} - -// TOOD: May want to turn flags into a value class in com.twitter.finagle.memcached -// with methods for these operations -object MemcacheFlags { - val FrequencyBasedPromotion: Int = 1 - val BandwidthBasedPromotion: Int = 1 << 1 - val Promotable: Int = FrequencyBasedPromotion | BandwidthBasedPromotion - - /** - * Memcache flags are returned as an unsigned integer, represented as a decimal string. - * - * Check whether the bit in position 0 ([[FrequencyBasedPromotion]]) or the bit in position 1 - * ([[BandwidthBasedPromotion]]) is set to 1 (zero-index from least-significant bit). - */ - def shouldPromote(flagsBuf: Buf): Boolean = { - val flags = flagsBuf match { case Buf.Utf8(s) => s.toInt } - (flags & Promotable) != 0 - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/InProcessCache.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/InProcessCache.docx new file mode 100644 index 000000000..bb0e584b9 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/InProcessCache.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/InProcessCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/InProcessCache.scala deleted file mode 100644 index a47e0f7a1..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/InProcessCache.scala +++ /dev/null @@ -1,63 +0,0 @@ -package com.twitter.servo.cache - -import com.google.common.cache.{CacheBuilder, RemovalListener} -import com.twitter.util.Duration -import java.util.concurrent.TimeUnit - -object InProcessCache { - - /** - * Apply a read filter to exclude items in an InProcessCache - */ - def withFilter[K, V]( - underlying: InProcessCache[K, V] - )( - shouldFilter: (K, V) => Boolean - ): InProcessCache[K, V] = - new InProcessCache[K, V] { - def get(key: K): Option[V] = underlying.get(key) filterNot { shouldFilter(key, _) } - def set(key: K, value: V) = underlying.set(key, value) - } -} - -/** - * An in-process cache interface. It is distinct from a map in that: - * 1) All methods must be threadsafe - * 2) A value set in cache is not guaranteed to remain in the cache. - */ -trait InProcessCache[K, V] { - def get(key: K): Option[V] - def set(key: K, value: V): Unit -} - -/** - * In-process implementation of a cache with LRU semantics and a TTL. - */ -class ExpiringLruInProcessCache[K, V]( - ttl: Duration, - maximumSize: Int, - removalListener: Option[RemovalListener[K, V]] = None: None.type) - extends InProcessCache[K, V] { - - private[this] val cacheBuilder = - CacheBuilder.newBuilder - .asInstanceOf[CacheBuilder[K, V]] - .expireAfterWrite(ttl.inMilliseconds, TimeUnit.MILLISECONDS) - .initialCapacity(maximumSize) - .maximumSize(maximumSize) - - private[this] val cache = - removalListener match { - case Some(listener) => - cacheBuilder - .removalListener(listener) - .build[K, V]() - case None => - cacheBuilder - .build[K, V]() - } - - def get(key: K): Option[V] = Option(cache.getIfPresent(key)) - - def set(key: K, value: V): Unit = cache.put(key, value) -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/IterableSerializer.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/IterableSerializer.docx new file mode 100644 index 000000000..6a0df00c6 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/IterableSerializer.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/IterableSerializer.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/IterableSerializer.scala deleted file mode 100644 index 0228b4a0f..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/IterableSerializer.scala +++ /dev/null @@ -1,84 +0,0 @@ -package com.twitter.servo.cache - -import com.twitter.util.{Throw, Return, Try} -import java.io.{DataOutputStream, ByteArrayOutputStream} -import java.nio.ByteBuffer -import scala.collection.mutable -import scala.util.control.NonFatal - -object IterableSerializer { - // Serialized format for version 0: - // Header: - // 1 byte - Version - // 4 byte - number of items - // Data, 1 per item: - // 4 bytes - item length in bytes (n) - // n bytes - item data - val FormatVersion = 0 -} - -/** - * A `Serializer` for `Iterable[T]`s. - * - * @param itemSerializer a Serializer for the individual elements. - * @param itemSizeEstimate estimated size in bytes of individual elements - */ -class IterableSerializer[T, C <: Iterable[T]]( - newBuilder: () => mutable.Builder[T, C], - itemSerializer: Serializer[T], - itemSizeEstimate: Int = 8) - extends Serializer[C] { - import IterableSerializer.FormatVersion - - if (itemSizeEstimate <= 0) { - throw new IllegalArgumentException( - "Item size estimate must be positive. Invalid estimate provided: " + itemSizeEstimate - ) - } - - override def to(iterable: C): Try[Array[Byte]] = Try { - assert(iterable.hasDefiniteSize, "Must have a definite size: %s".format(iterable)) - - val numItems = iterable.size - val baos = new ByteArrayOutputStream(1 + 4 + (numItems * (4 + itemSizeEstimate))) - val output = new DataOutputStream(baos) - - // Write serialization version format and set length. - output.writeByte(FormatVersion) - output.writeInt(numItems) - - iterable.foreach { item => - val itemBytes = itemSerializer.to(item).get() - output.writeInt(itemBytes.length) - output.write(itemBytes) - } - output.flush() - baos.toByteArray() - } - - override def from(bytes: Array[Byte]): Try[C] = { - try { - val buf = ByteBuffer.wrap(bytes) - val formatVersion = buf.get() - if (formatVersion < 0 || formatVersion > FormatVersion) { - Throw(new IllegalArgumentException("Invalid serialization format: " + formatVersion)) - } else { - val numItems = buf.getInt() - val builder = newBuilder() - builder.sizeHint(numItems) - - var i = 0 - while (i < numItems) { - val itemBytes = new Array[Byte](buf.getInt()) - buf.get(itemBytes) - val item = itemSerializer.from(itemBytes).get() - builder += item - i += 1 - } - Return(builder.result()) - } - } catch { - case NonFatal(e) => Throw(e) - } - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyFilteringCache.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyFilteringCache.docx new file mode 100644 index 000000000..479976a93 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyFilteringCache.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyFilteringCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyFilteringCache.scala deleted file mode 100644 index 8caea385a..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyFilteringCache.scala +++ /dev/null @@ -1,51 +0,0 @@ -package com.twitter.servo.cache - -import com.twitter.util.Future - -/** - * A cache wrapper that makes the underlying cache transparent to - * certain keys. - */ -class KeyFilteringCache[K, V](val underlyingCache: Cache[K, V], keyPredicate: K => Boolean) - extends CacheWrapper[K, V] { - override def get(keys: Seq[K]): Future[KeyValueResult[K, V]] = - underlyingCache.get(keys filter keyPredicate) - - override def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, V]] = - underlyingCache.getWithChecksum(keys filter keyPredicate) - - override def add(key: K, value: V) = - if (keyPredicate(key)) { - underlyingCache.add(key, value) - } else { - Future.True - } - - override def checkAndSet(key: K, value: V, checksum: Checksum) = - if (keyPredicate(key)) { - underlyingCache.checkAndSet(key, value, checksum) - } else { - Future.True - } - - override def set(key: K, value: V) = - if (keyPredicate(key)) { - underlyingCache.set(key, value) - } else { - Future.Done - } - - override def replace(key: K, value: V) = - if (keyPredicate(key)) { - underlyingCache.replace(key, value) - } else { - Future.True - } - - override def delete(key: K) = - if (keyPredicate(key)) { - underlyingCache.delete(key) - } else { - Future.True - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyTransformer.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyTransformer.docx new file mode 100644 index 000000000..f84baf1c9 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyTransformer.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyTransformer.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyTransformer.scala deleted file mode 100644 index fb7641b9e..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyTransformer.scala +++ /dev/null @@ -1,21 +0,0 @@ -package com.twitter.servo.cache - -/** - * Converts all keys to a string via .toString - */ -class ToStringKeyTransformer[K] extends KeyTransformer[K] { - override def apply(key: K) = key.toString -} - -/** - * Prefixes all keys with a string - */ -class PrefixKeyTransformer[K]( - prefix: String, - delimiter: String = constants.Colon, - underlying: KeyTransformer[K] = new ToStringKeyTransformer[K]: ToStringKeyTransformer[K]) - extends KeyTransformer[K] { - private[this] val fullPrefix = prefix + delimiter - - override def apply(key: K) = fullPrefix + underlying(key) -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/LockingCache.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/LockingCache.docx new file mode 100644 index 000000000..f6b667ea8 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/LockingCache.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/LockingCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/LockingCache.scala deleted file mode 100644 index caf990303..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/LockingCache.scala +++ /dev/null @@ -1,486 +0,0 @@ -package com.twitter.servo.cache - -import com.twitter.conversions.DurationOps._ -import com.twitter.finagle.service.RetryPolicy -import com.twitter.finagle.partitioning.FailureAccrualException -import com.twitter.finagle.Backoff -import com.twitter.finagle.stats.{NullStatsReceiver, Stat, StatsReceiver} -import com.twitter.logging.{Level, Logger} -import com.twitter.servo.util.{ExceptionCounter, RateLimitingLogger} -import com.twitter.util._ -import scala.util.control.NoStackTrace - -object LockingCache { - - /** - * first argument is value to store, second argument is value in cache, - * returns an Option of the value to be stored. None should be interpreted - * as "don't store anything" - */ - type Picker[V] = (V, V) => Option[V] - - /** - * argument is value, if any, in cache. - * return type is value, if any, to be stored in cache. - * returning None means nothing will be done. - */ - type Handler[V] = Option[V] => Option[V] - - case class AlwaysSetHandler[V](value: Option[V]) extends Handler[V] { - override def apply(ignored: Option[V]) = value - } - - case class PickingHandler[V](newValue: V, pick: Picker[V]) extends Handler[V] { - override def apply(inCache: Option[V]): Option[V] = - inCache match { - case None => - // if nothing in cache, go ahead and store! - Some(newValue) - case Some(oldValue) => - // if something in cache, store a picked value based on - // what's in cache and what's being stored - pick(newValue, oldValue) - } - - // apparently case classes that extend functions don't get pretty toString methods - override lazy val toString = "PickingHandler(%s, %s)".format(newValue, pick) - } - - case class UpdateOnlyPickingHandler[V](newValue: V, pick: Picker[V]) extends Handler[V] { - override def apply(inCache: Option[V]): Option[V] = - inCache match { - case None => - // if nothing in cache, do not update - None - case Some(oldValue) => - // if something in cache, store a picked value based on - // what's in cache and what's being stored - pick(newValue, oldValue) - } - - // apparently case classes that extend functions don't get pretty toString methods - override lazy val toString = "UpdateOnlyPickingHandler(%s, %s)".format(newValue, pick) - } -} - -trait LockingCacheFactory { - def apply[K, V](cache: Cache[K, V]): LockingCache[K, V] - def scope(scopes: String*): LockingCacheFactory -} - -/** - * A cache that enforces a consistent view of values between the time when a set - * is initiated and when the value is actually updated in cache. - */ -trait LockingCache[K, V] extends Cache[K, V] { - - /** - * Look up a value and dispatch based on the result. The particular locking - * approach is defined by the implementing class. May call handler multiple - * times as part of more elaborate locking and retry looping. - * - * Overview of semantics: - * `handler(None)` is called if no value is present in cache. - * `handler(Some(value))` is called if a value is present. - * `handler(x)` should return None if nothing should be done and `Some(value)` - * if a value should be set. - * - * @return the value that was actually set - */ - def lockAndSet(key: K, handler: LockingCache.Handler[V]): Future[Option[V]] -} - -class OptimisticLockingCacheObserver(statsReceiver: StatsReceiver) { - import OptimisticLockingCache._ - - private[this] val scopedReceiver = statsReceiver.scope("locking_cache") - - private[this] val successCounter = scopedReceiver.counter("success") - private[this] val failureCounter = scopedReceiver.counter("failure") - private[this] val exceptionCounter = new ExceptionCounter(scopedReceiver) - private[this] val lockAndSetStat = scopedReceiver.stat("lockAndSet") - - def time[V](f: => Future[Option[V]]): Future[Option[V]] = { - Stat.timeFuture(lockAndSetStat) { - f - } - } - - def success(attempts: Seq[FailedAttempt]): Unit = { - successCounter.incr() - countAttempts(attempts) - } - - def failure(attempts: Seq[FailedAttempt]): Unit = { - failureCounter.incr() - countAttempts(attempts) - } - - def scope(s: String*): OptimisticLockingCacheObserver = - s.toList match { - case Nil => this - case head :: tail => - new OptimisticLockingCacheObserver(statsReceiver.scope(head)).scope(tail: _*) - } - - private[this] def countAttempts(attempts: Seq[FailedAttempt]): Unit = { - attempts foreach { attempt => - val name = attempt.getClass.getSimpleName - scopedReceiver.counter(name).incr() - attempt.maybeThrowable foreach { t => - exceptionCounter(t) - scopedReceiver.scope(name).counter(t.getClass.getName).incr() - } - } - } -} - -case class OptimisticLockingCacheFactory( - backoffs: Backoff, - observer: OptimisticLockingCacheObserver = new OptimisticLockingCacheObserver(NullStatsReceiver), - timer: Timer = new NullTimer, - // Enabling key logging may unintentionally cause inclusion of sensitive data - // in service logs and any accompanying log sinks such as Splunk. By default, this is disabled, - // however may be optionally enabled for the purpose of debugging. Caution is warranted. - enableKeyLogging: Boolean = false) - extends LockingCacheFactory { - def this( - backoffs: Backoff, - statsReceiver: StatsReceiver, - timer: Timer, - enableKeyLogging: Boolean - ) = this(backoffs, new OptimisticLockingCacheObserver(statsReceiver), timer, enableKeyLogging) - - override def apply[K, V](cache: Cache[K, V]): LockingCache[K, V] = { - new OptimisticLockingCache(cache, backoffs, observer, timer, enableKeyLogging) - } - - override def scope(scopes: String*): LockingCacheFactory = { - new OptimisticLockingCacheFactory(backoffs, observer.scope(scopes: _*), timer) - } -} - -object OptimisticLockingCache { - private[this] val FutureNone = Future.value(None) - - def emptyFutureNone[V] = FutureNone.asInstanceOf[Future[Option[V]]] - - sealed abstract class FailedAttempt(val maybeThrowable: Option[Throwable]) - extends Exception - with NoStackTrace - case class GetWithChecksumException(t: Throwable) extends FailedAttempt(Some(t)) - case object GetWithChecksumEmpty extends FailedAttempt(None) - case object CheckAndSetFailed extends FailedAttempt(None) - case class CheckAndSetException(t: Throwable) extends FailedAttempt(Some(t)) - case class AddException(t: Throwable) extends FailedAttempt(Some(t)) - - case class LockAndSetFailure(str: String, attempts: Seq[FailedAttempt]) - extends Exception( - str, - // if the last exception was an RPC exception, try to recover the stack trace - attempts.lastOption.flatMap(_.maybeThrowable).orNull - ) - - private def retryPolicy(backoffs: Backoff): RetryPolicy[Try[Nothing]] = - RetryPolicy.backoff(backoffs) { - case Throw(_: FailureAccrualException) => false - case _ => true - } -} - -/** - * Implementation of a LockingCache using add/getWithChecksum/checkAndSet. - */ -class OptimisticLockingCache[K, V]( - override val underlyingCache: Cache[K, V], - retryPolicy: RetryPolicy[Try[Nothing]], - observer: OptimisticLockingCacheObserver, - timer: Timer, - enableKeyLogging: Boolean) - extends LockingCache[K, V] - with CacheWrapper[K, V] { - import LockingCache._ - import OptimisticLockingCache._ - - def this( - underlyingCache: Cache[K, V], - retryPolicy: RetryPolicy[Try[Nothing]], - observer: OptimisticLockingCacheObserver, - timer: Timer, - ) = - this( - underlyingCache: Cache[K, V], - retryPolicy: RetryPolicy[Try[Nothing]], - observer: OptimisticLockingCacheObserver, - timer: Timer, - false - ) - - def this( - underlyingCache: Cache[K, V], - backoffs: Backoff, - observer: OptimisticLockingCacheObserver, - timer: Timer - ) = - this( - underlyingCache, - OptimisticLockingCache.retryPolicy(backoffs), - observer, - timer, - false - ) - - def this( - underlyingCache: Cache[K, V], - backoffs: Backoff, - observer: OptimisticLockingCacheObserver, - timer: Timer, - enableKeyLogging: Boolean - ) = - this( - underlyingCache, - OptimisticLockingCache.retryPolicy(backoffs), - observer, - timer, - enableKeyLogging - ) - - private[this] val log = Logger.get("OptimisticLockingCache") - private[this] val rateLimitedLogger = new RateLimitingLogger(logger = log) - - @deprecated("use RetryPolicy-based constructor", "0.1.2") - def this(underlyingCache: Cache[K, V], maxTries: Int = 10, enableKeyLogging: Boolean) = { - this( - underlyingCache, - Backoff.const(0.milliseconds).take(maxTries), - new OptimisticLockingCacheObserver(NullStatsReceiver), - new NullTimer, - enableKeyLogging - ) - } - - override def lockAndSet(key: K, handler: Handler[V]): Future[Option[V]] = { - observer.time { - dispatch(key, handler, retryPolicy, Nil) - } - } - - /** - * @param key - * The key to look up in cache - * @param handler - * The handler that is applied to values from cache - * @param retryPolicy - * Used to determine if more attempts should be made. - * @param attempts - * Contains representations of the causes of previous dispatch failures - */ - protected[this] def retry( - key: K, - failure: Try[Nothing], - handler: Handler[V], - retryPolicy: RetryPolicy[Try[Nothing]], - attempts: Seq[FailedAttempt] - ): Future[Option[V]] = - retryPolicy(failure) match { - case None => - observer.failure(attempts) - if (enableKeyLogging) { - rateLimitedLogger.log( - s"failed attempts for ${key}:\n ${attempts.mkString("\n ")}", - level = Level.INFO) - Future.exception(LockAndSetFailure("lockAndSet failed for " + key, attempts)) - } else { - Future.exception(LockAndSetFailure("lockAndSet failed", attempts)) - } - - case Some((backoff, tailPolicy)) => - timer - .doLater(backoff) { - dispatch(key, handler, tailPolicy, attempts) - } - .flatten - } - - /** - * @param key - * The key to look up in cache - * @param handler - * The handler that is applied to values from cache - * @param retryPolicy - * Used to determine if more attempts should be made. - * @param attempts - * Contains representations of the causes of previous dispatch failures - */ - protected[this] def dispatch( - key: K, - handler: Handler[V], - retryPolicy: RetryPolicy[Try[Nothing]], - attempts: Seq[FailedAttempt] - ): Future[Option[V]] = { - // get the value if nothing's there - handler(None) match { - case None => - // if nothing should be done when missing, go straight to getAndConditionallySet, - // since there's nothing to attempt an add with - getAndConditionallySet(key, handler, retryPolicy, attempts) - - case some @ Some(value) => - // otherwise, try to do an atomic add, which will return false if something's there - underlyingCache.add(key, value) transform { - case Return(added) => - if (added) { - // if added, return the value - observer.success(attempts) - Future.value(some) - } else { - // otherwise, do a checkAndSet based on the current value - getAndConditionallySet(key, handler, retryPolicy, attempts) - } - - case Throw(t) => - // count exception against retries - if (enableKeyLogging) - rateLimitedLogger.logThrowable(t, s"add($key) returned exception. will retry") - retry(key, Throw(t), handler, retryPolicy, attempts :+ AddException(t)) - } - } - } - - /** - * @param key - * The key to look up in cache - * @param handler - * The handler that is applied to values from cache - * @param retryPolicy - * Used to determine if more attempts should be made. - * @param attempts - * Contains representations of the causes of previous dispatch failures - */ - protected[this] def getAndConditionallySet( - key: K, - handler: Handler[V], - retryPolicy: RetryPolicy[Try[Nothing]], - attempts: Seq[FailedAttempt] - ): Future[Option[V]] = { - // look in the cache to see what's there - underlyingCache.getWithChecksum(Seq(key)) handle { - case t => - // treat global failure as key-based failure - KeyValueResult(failed = Map(key -> t)) - } flatMap { lr => - lr(key) match { - case Return.None => - handler(None) match { - case Some(_) => - // if there's nothing in the cache now, but handler(None) return Some, - // that means something has changed since we attempted the add, so try again - val failure = GetWithChecksumEmpty - retry(key, Throw(failure), handler, retryPolicy, attempts :+ failure) - - case None => - // if there's nothing in the cache now, but handler(None) returns None, - // that means we don't want to store anything when there's nothing already - // in cache, so return None - observer.success(attempts) - emptyFutureNone - } - - case Return(Some((Return(current), checksum))) => - // the cache entry is present - dispatchCheckAndSet(Some(current), checksum, key, handler, retryPolicy, attempts) - - case Return(Some((Throw(t), checksum))) => - // the cache entry failed to deserialize; treat it as a None and overwrite. - if (enableKeyLogging) - rateLimitedLogger.logThrowable( - t, - s"getWithChecksum(${key}) returned a bad value. overwriting.") - dispatchCheckAndSet(None, checksum, key, handler, retryPolicy, attempts) - - case Throw(t) => - // lookup failure counts against numTries - if (enableKeyLogging) - rateLimitedLogger.logThrowable( - t, - s"getWithChecksum(${key}) returned exception. will retry.") - retry(key, Throw(t), handler, retryPolicy, attempts :+ GetWithChecksumException(t)) - } - } - } - - /** - * @param current - * The value currently cached under key `key`, if any - * @param checksum - * The checksum of the currently-cached value - * @param key - * The key mapping to `current` - * @param handler - * The handler that is applied to values from cache - * @param retryPolicy - * Used to determine if more attempts should be made. - * @param attempts - * Contains representations of the causes of previous dispatch failures - */ - protected[this] def dispatchCheckAndSet( - current: Option[V], - checksum: Checksum, - key: K, - handler: Handler[V], - retryPolicy: RetryPolicy[Try[Nothing]], - attempts: Seq[FailedAttempt] - ): Future[Option[V]] = { - handler(current) match { - case None => - // if nothing should be done based on the current value, don't do anything - observer.success(attempts) - emptyFutureNone - - case some @ Some(value) => - // otherwise, try a check and set with the checksum - underlyingCache.checkAndSet(key, value, checksum) transform { - case Return(added) => - if (added) { - // if added, return the value - observer.success(attempts) - Future.value(some) - } else { - // otherwise, something has changed, try again - val failure = CheckAndSetFailed - retry(key, Throw(failure), handler, retryPolicy, attempts :+ failure) - } - - case Throw(t) => - // count exception against retries - if (enableKeyLogging) - rateLimitedLogger.logThrowable( - t, - s"checkAndSet(${key}) returned exception. will retry.") - retry(key, Throw(t), handler, retryPolicy, attempts :+ CheckAndSetException(t)) - } - } - } -} - -object NonLockingCacheFactory extends LockingCacheFactory { - override def apply[K, V](cache: Cache[K, V]): LockingCache[K, V] = new NonLockingCache(cache) - override def scope(scopes: String*) = this -} - -class NonLockingCache[K, V](override val underlyingCache: Cache[K, V]) - extends LockingCache[K, V] - with CacheWrapper[K, V] { - override def lockAndSet(key: K, handler: LockingCache.Handler[V]): Future[Option[V]] = { - handler(None) match { - case None => - // if nothing should be done when nothing's there, don't do anything - Future.value(None) - - case some @ Some(value) => - set(key, value) map { _ => - some - } - } - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Memcache.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Memcache.docx new file mode 100644 index 000000000..36ebc559d Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Memcache.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Memcache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Memcache.scala deleted file mode 100644 index 8b0be8dcc..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Memcache.scala +++ /dev/null @@ -1,59 +0,0 @@ -package com.twitter.servo.cache - -import com.twitter.util.{Duration, Future} - -/** - * [[Memcache]] is a Cache with types that reflect the memcached protocol. Keys are strings and - * values are byte arrays. - */ -trait Memcache extends TtlCache[String, Array[Byte]] { - def incr(key: String, delta: Long = 1): Future[Option[Long]] - def decr(key: String, delta: Long = 1): Future[Option[Long]] -} - -/** - * allows one Memcache to wrap another - */ -trait MemcacheWrapper extends TtlCacheWrapper[String, Array[Byte]] with Memcache { - override def underlyingCache: Memcache - - override def incr(key: String, delta: Long = 1) = underlyingCache.incr(key, delta) - override def decr(key: String, delta: Long = 1) = underlyingCache.decr(key, delta) -} - -/** - * Switch between two caches with a decider value - */ -class DeciderableMemcache(primary: Memcache, secondary: Memcache, isAvailable: => Boolean) - extends MemcacheWrapper { - override def underlyingCache = if (isAvailable) primary else secondary -} - -/** - * [[MemcacheCache]] converts a [[Memcache]] to a [[Cache[K, V]]] using a [[Serializer]] for values - * and a [[KeyTransformer]] for keys. - * - * The value serializer is bidirectional. Keys are serialized using a one-way transformation - * method, which defaults to _.toString. - */ -class MemcacheCache[K, V]( - memcache: Memcache, - ttl: Duration, - serializer: Serializer[V], - transformKey: KeyTransformer[K] = new ToStringKeyTransformer[K]: ToStringKeyTransformer[K]) - extends CacheWrapper[K, V] { - override val underlyingCache = new KeyValueTransformingCache( - new SimpleTtlCacheToCache(memcache, ttl), - serializer, - transformKey - ) - - def incr(key: K, delta: Int = 1): Future[Option[Long]] = { - if (delta >= 0) - memcache.incr(transformKey(key), delta) - else - memcache.decr(transformKey(key), -delta) - } - - def decr(key: K, delta: Int = 1): Future[Option[Long]] = incr(key, -delta) -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MigratingCache.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MigratingCache.docx new file mode 100644 index 000000000..d38c06209 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MigratingCache.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MigratingCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MigratingCache.scala deleted file mode 100644 index 750dc913c..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MigratingCache.scala +++ /dev/null @@ -1,245 +0,0 @@ -package com.twitter.servo.cache - -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.util.Duration -import com.twitter.util.Future -import com.twitter.util.Return -import com.twitter.util.Throw - -/** - * MigratingReadCache supports a gradual migration from one cache to another. Reads from the - * cache are compared to reads from the darkCache and new values are written to the darkCache - * if necessary. - */ -class MigratingReadCache[K, V]( - cache: ReadCache[K, V], - darkCache: Cache[K, V], - statsReceiver: StatsReceiver = NullStatsReceiver) - extends ReadCache[K, V] { - - private[this] val scopedStatsReceiver = statsReceiver.scope("migrating_read_cache") - private[this] val getScope = scopedStatsReceiver.scope("get") - private[this] val getMismatchedResultsCounter = getScope.counter("mismatched_results") - private[this] val getMissingResultsCounter = getScope.counter("missing_results") - private[this] val getUnexpectedResultsCounter = getScope.counter("unexpected_results") - private[this] val getMatchingResultsCounter = getScope.counter("matching_results") - - private[this] val getWithChecksumScope = scopedStatsReceiver.scope("get_with_cheksum") - private[this] val getWithChecksumMismatchedResultsCounter = - getWithChecksumScope.counter("mismatched_results") - private[this] val getWithChecksumMissingResultsCounter = - getWithChecksumScope.counter("missing_results") - private[this] val getWithChecksumUnexpectedResultsCounter = - getWithChecksumScope.counter("unexpected_results") - private[this] val getWithChecksumMatchingResultsCounter = - getWithChecksumScope.counter("matching_results") - - override def get(keys: Seq[K]): Future[KeyValueResult[K, V]] = { - cache.get(keys) onSuccess { result => - darkCache.get(keys) onSuccess { darkResult => - keys foreach { k => - (result(k), darkResult(k)) match { - // compare values, set if they differ - case (Return(Some(v)), Return(Some(dv))) if (v != dv) => - getMismatchedResultsCounter.incr() - darkCache.set(k, v) - // set a value if missing - case (Return(Some(v)), Return.None | Throw(_)) => - getMissingResultsCounter.incr() - darkCache.set(k, v) - // remove if necessary - case (Return.None, Return(Some(_)) | Throw(_)) => - getUnexpectedResultsCounter.incr() - darkCache.delete(k) - // do nothing otherwise - case _ => - getMatchingResultsCounter.incr() - () - } - } - } - } - } - - override def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, V]] = { - cache.getWithChecksum(keys) onSuccess { result => - // no point in the getWithChecksum from the darkCache - darkCache.get(keys) onSuccess { darkResult => - keys foreach { k => - (result(k), darkResult(k)) match { - // compare values, set if they differ - case (Return(Some((Return(v), _))), Return(Some(dv))) if (v != dv) => - getWithChecksumMismatchedResultsCounter.incr() - darkCache.set(k, v) - // set a value if missing - case (Return(Some((Return(v), _))), Return.None | Throw(_)) => - getWithChecksumMissingResultsCounter.incr() - darkCache.set(k, v) - // remove if necessary - case (Return.None, Return(Some(_)) | Throw(_)) => - getWithChecksumUnexpectedResultsCounter.incr() - darkCache.delete(k) - // do nothing otherwise - case _ => - getWithChecksumMatchingResultsCounter.incr() - () - } - } - } - } - } - - override def release(): Unit = { - cache.release() - darkCache.release() - } -} - -/** - * MigratingCache supports a gradual migration from one cache to another. Writes to the cache - * are propogated to the darkCache. Reads from the cache are compared to reads from the darkCache - * and new values are written to the darkCache if necessary. - * - * Writes to the darkCache are not locking writes, so there is some risk of inconsistencies from - * race conditions. However, writes to the darkCache only occur if they succeed in the cache, so - * if a checkAndSet fails, for example, no write is issued to the darkCache. - */ -class MigratingCache[K, V]( - cache: Cache[K, V], - darkCache: Cache[K, V], - statsReceiver: StatsReceiver = NullStatsReceiver) - extends MigratingReadCache(cache, darkCache, statsReceiver) - with Cache[K, V] { - override def add(key: K, value: V): Future[Boolean] = { - cache.add(key, value) onSuccess { wasAdded => - if (wasAdded) { - darkCache.set(key, value) - } - } - } - - override def checkAndSet(key: K, value: V, checksum: Checksum): Future[Boolean] = { - cache.checkAndSet(key, value, checksum) onSuccess { wasSet => - if (wasSet) { - darkCache.set(key, value) - } - } - } - - override def set(key: K, value: V): Future[Unit] = { - cache.set(key, value) onSuccess { _ => - darkCache.set(key, value) - } - } - - override def replace(key: K, value: V): Future[Boolean] = { - cache.replace(key, value) onSuccess { wasReplaced => - if (wasReplaced) { - darkCache.set(key, value) - } - } - } - - override def delete(key: K): Future[Boolean] = { - cache.delete(key) onSuccess { wasDeleted => - if (wasDeleted) { - darkCache.delete(key) - } - } - } -} - -/** - * Like MigratingCache but for TtlCaches - */ -class MigratingTtlCache[K, V]( - cache: TtlCache[K, V], - darkCache: TtlCache[K, V], - ttl: (K, V) => Duration) - extends MigratingReadCache(cache, new TtlCacheToCache(darkCache, ttl)) - with TtlCache[K, V] { - override def add(key: K, value: V, ttl: Duration): Future[Boolean] = { - cache.add(key, value, ttl) onSuccess { wasAdded => - if (wasAdded) { - darkCache.set(key, value, ttl) - } - } - } - - override def checkAndSet(key: K, value: V, checksum: Checksum, ttl: Duration): Future[Boolean] = { - cache.checkAndSet(key, value, checksum, ttl) onSuccess { wasSet => - if (wasSet) { - darkCache.set(key, value, ttl) - } - } - } - - override def set(key: K, value: V, ttl: Duration): Future[Unit] = { - cache.set(key, value, ttl) onSuccess { _ => - darkCache.set(key, value, ttl) - } - } - - override def replace(key: K, value: V, ttl: Duration): Future[Boolean] = { - cache.replace(key, value, ttl) onSuccess { wasReplaced => - if (wasReplaced) { - darkCache.set(key, value, ttl) - } - } - } - - override def delete(key: K): Future[Boolean] = { - cache.delete(key) onSuccess { wasDeleted => - if (wasDeleted) { - darkCache.delete(key) - } - } - } - - override def release(): Unit = { - cache.release() - darkCache.release() - } -} - -/** - * A MigratingTtlCache for Memcaches, implementing a migrating incr and decr. Race conditions - * are possible and may prevent the counts from being perfectly synchronized. - */ -class MigratingMemcache( - cache: Memcache, - darkCache: Memcache, - ttl: (String, Array[Byte]) => Duration) - extends MigratingTtlCache[String, Array[Byte]](cache, darkCache, ttl) - with Memcache { - def incr(key: String, delta: Long = 1): Future[Option[Long]] = { - cache.incr(key, delta) onSuccess { - case None => - darkCache.delete(key) - - case Some(value) => - darkCache.incr(key, delta) onSuccess { - case Some(`value`) => // same value! - case _ => - val b = value.toString.getBytes - darkCache.set(key, b, ttl(key, b)) - } - } - } - - def decr(key: String, delta: Long = 1): Future[Option[Long]] = { - cache.decr(key, delta) onSuccess { - case None => - darkCache.delete(key) - - case Some(value) => - darkCache.decr(key, delta) onSuccess { - case Some(`value`) => // same value! - case _ => - val b = value.toString.getBytes - darkCache.set(key, b, ttl(key, b)) - } - } - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MissingCache.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MissingCache.docx new file mode 100644 index 000000000..ac28e6469 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MissingCache.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MissingCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MissingCache.scala deleted file mode 100644 index 59acd28d0..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MissingCache.scala +++ /dev/null @@ -1,46 +0,0 @@ -package com.twitter.servo.cache - -import com.twitter.finagle.memcached.util.NotFound -import scala.util.Random - -/** - * wrap a ReadCache, forcing a miss rate. useful for playing back - * the same logs over and over, but simulating expected cache misses - */ -class MissingReadCache[K, V]( - underlyingCache: ReadCache[K, V], - hitRate: Float, - rand: Random = new Random) - extends ReadCache[K, V] { - assert(hitRate > 1 || hitRate < 0, "hitRate must be <= 1 and => 0") - - protected def filterResult[W](lr: KeyValueResult[K, W]) = { - val found = lr.found.filter { _ => - rand.nextFloat <= hitRate - } - val notFound = lr.notFound ++ NotFound(lr.found.keySet, found.keySet) - KeyValueResult(found, notFound, lr.failed) - } - - override def get(keys: Seq[K]) = - underlyingCache.get(keys) map { filterResult(_) } - - override def getWithChecksum(keys: Seq[K]) = - underlyingCache.getWithChecksum(keys) map { filterResult(_) } - - override def release() = underlyingCache.release() -} - -class MissingCache[K, V]( - override val underlyingCache: Cache[K, V], - hitRate: Float, - rand: Random = new Random) - extends MissingReadCache[K, V](underlyingCache, hitRate, rand) - with CacheWrapper[K, V] - -class MissingTtlCache[K, V]( - override val underlyingCache: TtlCache[K, V], - hitRate: Float, - rand: Random = new Random) - extends MissingReadCache[K, V](underlyingCache, hitRate, rand) - with TtlCacheWrapper[K, V] diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ObservableCache.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ObservableCache.docx new file mode 100644 index 000000000..8146cc6e1 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ObservableCache.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ObservableCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ObservableCache.scala deleted file mode 100644 index a3bed9624..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ObservableCache.scala +++ /dev/null @@ -1,419 +0,0 @@ -package com.twitter.servo.cache - -import com.twitter.finagle.stats.{Stat, StatsReceiver} -import com.twitter.logging.{Level, Logger} -import com.twitter.servo.util.{ExceptionCounter, WindowedAverage} -import com.twitter.util._ - -/** - * track hits and misses in caches, time reads and writes - */ -trait CacheObserver { - - /** - * register a hit - */ - def hit(key: String): Unit - - /** - * register a miss - */ - def miss(key: String): Unit - - /** - * time the read, and automatically handle hits and misses from the KeyValueResult - */ - def read[K, T]( - name: String, - keys: Seq[K] - )( - f: => Future[KeyValueResult[K, T]] - ): Future[KeyValueResult[K, T]] - - /** - * time the write - */ - def write[K, T](name: String, key: K)(f: => Future[T]): Future[T] - - /** - * time the incr, and record the success/failure - */ - def incr[K](name: String, key: Seq[K])(f: => Future[Option[Long]]): Future[Option[Long]] - - /** - * produce a new CacheObserver with a nested scope - */ - def scope(s: String*): CacheObserver - - /** - * increment a counter tracking the number of expirations. - */ - def expired(delta: Int = 1): Unit - - /** - * Increment a counter tracking the number of failures. - */ - def failure(delta: Int = 1): Unit - - /** - * Increment a counter tracking the number of tombstones. - */ - def tombstone(delta: Int = 1): Unit - - /** - * Increment a counter tracking the number of not cached. - */ - def noCache(delta: Int = 1): Unit -} - -object NullCacheObserver extends CacheObserver { - override def hit(key: String) = () - override def miss(key: String) = () - override def read[K, T](name: String, keys: Seq[K])(f: => Future[KeyValueResult[K, T]]) = f - override def write[K, T](name: String, key: K)(f: => Future[T]) = f - override def incr[K](name: String, key: Seq[K])(f: => Future[Option[Long]]) = f - override def scope(s: String*) = this - override def expired(delta: Int = 1) = () - override def failure(delta: Int = 1): Unit = {} - override def tombstone(delta: Int = 1): Unit = {} - override def noCache(delta: Int = 1): Unit = {} -} - -/** - * A CacheObserver that writes to a StatsReceiver - */ -class StatsReceiverCacheObserver( - stats: StatsReceiver, - windowSize: Long, - log: Logger, - disableLogging: Boolean = false) - extends CacheObserver { - - def this( - statsReceiver: StatsReceiver, - windowSize: Long, - scope: String - ) = - this( - statsReceiver.scope(scope), - windowSize, - Logger.get(scope.replaceAll("([a-z]+)([A-Z])", "$1_$2").toLowerCase) - ) - - def this( - statsReceiver: StatsReceiver, - windowSize: Long, - scope: String, - disableLogging: Boolean - ) = - this( - statsReceiver.scope(scope), - windowSize, - Logger.get(scope.replaceAll("([a-z]+)([A-Z])", "$1_$2").toLowerCase), - disableLogging - ) - - protected[this] val expirationCounter = stats.counter("expirations") - - // needed to make sure we hand out the same observer for each scope, - // so that the hit rates are properly calculated - protected[this] val children = Memoize { - new StatsReceiverCacheObserver(stats, windowSize, _: String, disableLogging) - } - - protected[this] val exceptionCounter = new ExceptionCounter(stats) - private[this] val hitCounter = stats.counter("hits") - private[this] val missCounter = stats.counter("misses") - private[this] val failuresCounter = stats.counter("failures") - private[this] val tombstonesCounter = stats.counter("tombstones") - private[this] val noCacheCounter = stats.counter("noCache") - - private[this] val windowedHitRate = new WindowedAverage(windowSize) - private[this] val windowedIncrHitRate = new WindowedAverage(windowSize) - - private[this] val hitRateGauge = stats.addGauge("hit_rate") { - windowedHitRate.value.getOrElse(1.0).toFloat - } - - private[this] val incrHitRateGauge = stats.addGauge("incr_hit_rate") { - windowedIncrHitRate.value.getOrElse(1.0).toFloat - } - - protected[this] def handleThrowable[K](name: String, t: Throwable, key: Option[K]): Unit = { - stats.counter(name + "_failures").incr() - exceptionCounter(t) - if (!disableLogging) { - lazy val suffix = key - .map { k => - "(" + k.toString + ")" - } - .getOrElse("") - log.warning("%s%s caught: %s", name, suffix, t.getClass.getName) - log.trace(t, "stack trace was: ") - } - } - - override def hit(key: String): Unit = { - hits(1) - if (!disableLogging) - log.trace("cache hit: %s", key) - } - - private[this] def hits(n: Int): Unit = { - windowedHitRate.record(n.toDouble, n.toDouble) - hitCounter.incr(n) - } - - override def miss(key: String): Unit = { - misses(1) - if (!disableLogging) - log.trace("cache miss: %s", key) - } - - private[this] def misses(n: Int): Unit = { - windowedHitRate.record(0.0F, n.toDouble) - missCounter.incr(n) - } - - override def read[K, T]( - name: String, - keys: Seq[K] - )( - f: => Future[KeyValueResult[K, T]] - ): Future[KeyValueResult[K, T]] = - Stat - .timeFuture(stats.stat(name)) { - stats.counter(name).incr() - f - } - .respond { - case Return(lr) => - if (log.isLoggable(Level.TRACE)) { - lr.found.keys.foreach { k => - hit(k.toString) - } - lr.notFound.foreach { k => - miss(k.toString) - } - } else { - hits(lr.found.keys.size) - misses(lr.notFound.size) - } - lr.failed foreach { - case (k, t) => - handleThrowable(name, t, Some(k)) - // count failures as misses - miss(k.toString) - failuresCounter.incr() - } - case Throw(t) => - handleThrowable(name, t, None) - // count failures as misses - keys.foreach { k => - miss(k.toString) - } - failuresCounter.incr() - } - - override def write[K, T](name: String, key: K)(f: => Future[T]): Future[T] = - Stat.timeFuture(stats.stat(name)) { - stats.counter(name).incr() - f - } onFailure { - handleThrowable(name, _, Some(key)) - } - - override def incr[K](name: String, key: Seq[K])(f: => Future[Option[Long]]) = - Stat.timeFuture(stats.stat(name)) { - stats.counter(name).incr() - f - } onSuccess { optVal => - val hit = optVal.isDefined - windowedIncrHitRate.record(if (hit) 1F else 0F) - stats.counter(name + (if (hit) "_hits" else "_misses")).incr() - } - - override def scope(s: String*) = - s.toList match { - case Nil => this - case head :: tail => children(head).scope(tail: _*) - } - - override def expired(delta: Int = 1): Unit = { expirationCounter.incr(delta) } - override def failure(delta: Int = 1): Unit = { failuresCounter.incr(delta) } - override def tombstone(delta: Int = 1): Unit = { tombstonesCounter.incr(delta) } - override def noCache(delta: Int = 1): Unit = { noCacheCounter.incr(delta) } - -} - -/** - * Wraps an underlying cache with calls to a CacheObserver - */ -class ObservableReadCache[K, V](underlyingCache: ReadCache[K, V], observer: CacheObserver) - extends ReadCache[K, V] { - override def get(keys: Seq[K]): Future[KeyValueResult[K, V]] = { - observer.read("get", keys) { - underlyingCache.get(keys) - } - } - - override def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, V]] = { - observer.read[K, (Try[V], Checksum)]("get_with_checksum", keys) { - underlyingCache.getWithChecksum(keys) - } - } - - override def release() = underlyingCache.release() -} - -object ObservableCache { - def apply[K, V]( - underlyingCache: Cache[K, V], - statsReceiver: StatsReceiver, - windowSize: Long, - name: String - ): Cache[K, V] = - new ObservableCache( - underlyingCache, - new StatsReceiverCacheObserver(statsReceiver, windowSize, name) - ) - - def apply[K, V]( - underlyingCache: Cache[K, V], - statsReceiver: StatsReceiver, - windowSize: Long, - name: String, - disableLogging: Boolean - ): Cache[K, V] = - new ObservableCache( - underlyingCache, - new StatsReceiverCacheObserver( - statsReceiver = statsReceiver, - windowSize = windowSize, - scope = name, - disableLogging = disableLogging) - ) - - def apply[K, V]( - underlyingCache: Cache[K, V], - statsReceiver: StatsReceiver, - windowSize: Long, - log: Logger - ): Cache[K, V] = - new ObservableCache( - underlyingCache, - new StatsReceiverCacheObserver(statsReceiver, windowSize, log) - ) -} - -/** - * Wraps an underlying Cache with calls to a CacheObserver - */ -class ObservableCache[K, V](underlyingCache: Cache[K, V], observer: CacheObserver) - extends ObservableReadCache(underlyingCache, observer) - with Cache[K, V] { - override def add(key: K, value: V): Future[Boolean] = - observer.write("add", key) { - underlyingCache.add(key, value) - } - - override def checkAndSet(key: K, value: V, checksum: Checksum): Future[Boolean] = - observer.write("check_and_set", key) { - underlyingCache.checkAndSet(key, value, checksum) - } - - override def set(key: K, value: V): Future[Unit] = - observer.write("set", key) { - underlyingCache.set(key, value) - } - - override def replace(key: K, value: V): Future[Boolean] = - observer.write("replace", key) { - underlyingCache.replace(key, value) - } - - override def delete(key: K): Future[Boolean] = - observer.write("delete", key) { - underlyingCache.delete(key) - } -} - -object ObservableTtlCache { - def apply[K, V]( - underlyingCache: TtlCache[K, V], - statsReceiver: StatsReceiver, - windowSize: Long, - name: String - ): TtlCache[K, V] = - new ObservableTtlCache( - underlyingCache, - new StatsReceiverCacheObserver(statsReceiver, windowSize, name) - ) -} - -/** - * Wraps an underlying TtlCache with calls to a CacheObserver - */ -class ObservableTtlCache[K, V](underlyingCache: TtlCache[K, V], observer: CacheObserver) - extends ObservableReadCache(underlyingCache, observer) - with TtlCache[K, V] { - override def add(key: K, value: V, ttl: Duration): Future[Boolean] = - observer.write("add", key) { - underlyingCache.add(key, value, ttl) - } - - override def checkAndSet(key: K, value: V, checksum: Checksum, ttl: Duration): Future[Boolean] = - observer.write("check_and_set", key) { - underlyingCache.checkAndSet(key, value, checksum, ttl) - } - - override def set(key: K, value: V, ttl: Duration): Future[Unit] = - observer.write("set", key) { - underlyingCache.set(key, value, ttl) - } - - override def replace(key: K, value: V, ttl: Duration): Future[Boolean] = - observer.write("replace", key) { - underlyingCache.replace(key, value, ttl) - } - - override def delete(key: K): Future[Boolean] = - observer.write("delete", key) { - underlyingCache.delete(key) - } -} - -case class ObservableMemcacheFactory(memcacheFactory: MemcacheFactory, cacheObserver: CacheObserver) - extends MemcacheFactory { - - override def apply() = - new ObservableMemcache(memcacheFactory(), cacheObserver) -} - -@deprecated("use ObservableMemcacheFactory or ObservableMemcache directly", "0.1.2") -object ObservableMemcache { - def apply( - underlyingCache: Memcache, - statsReceiver: StatsReceiver, - windowSize: Long, - name: String - ): Memcache = - new ObservableMemcache( - underlyingCache, - new StatsReceiverCacheObserver(statsReceiver, windowSize, name) - ) -} - -class ObservableMemcache(underlyingCache: Memcache, observer: CacheObserver) - extends ObservableTtlCache[String, Array[Byte]](underlyingCache, observer) - with Memcache { - def incr(key: String, delta: Long = 1): Future[Option[Long]] = - observer.incr("incr", key) { - underlyingCache.incr(key, delta) - } - - def decr(key: String, delta: Long = 1): Future[Option[Long]] = - observer.incr("decr", key) { - underlyingCache.decr(key, delta) - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SecondaryIndexingCache.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SecondaryIndexingCache.docx new file mode 100644 index 000000000..f51ca4f82 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SecondaryIndexingCache.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SecondaryIndexingCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SecondaryIndexingCache.scala deleted file mode 100644 index 801d21ea6..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SecondaryIndexingCache.scala +++ /dev/null @@ -1,85 +0,0 @@ -package com.twitter.servo.cache - -import com.twitter.logging.Logger -import com.twitter.util.{Future, Return, Throw, Try} - -object SecondaryIndexingCache { - type IndexMapping[S, V] = V => Try[Option[S]] -} - -/** - * Stores a secondary index whenever set is called, - * using a mapping from value to secondary index - */ -class SecondaryIndexingCache[K, S, V]( - override val underlyingCache: Cache[K, Cached[V]], - secondaryIndexCache: Cache[S, Cached[K]], - secondaryIndex: SecondaryIndexingCache.IndexMapping[S, V]) - extends CacheWrapper[K, Cached[V]] { - protected[this] val log = Logger.get(getClass.getSimpleName) - - protected[this] def setSecondaryIndex(key: K, cachedValue: Cached[V]): Future[Unit] = - cachedValue.value match { - case Some(value) => - secondaryIndex(value) match { - case Return(Some(index)) => - val cachedKey = cachedValue.copy(value = Some(key)) - secondaryIndexCache.set(index, cachedKey) - case Return.None => - Future.Done - case Throw(t) => - log.error(t, "failed to determine secondary index for: %s", cachedValue) - Future.Done - } - // if we're storing a tombstone, no secondary index can be made - case None => Future.Done - } - - override def set(key: K, cachedValue: Cached[V]): Future[Unit] = - super.set(key, cachedValue) flatMap { _ => - setSecondaryIndex(key, cachedValue) - } - - override def checkAndSet(key: K, cachedValue: Cached[V], checksum: Checksum): Future[Boolean] = - super.checkAndSet(key, cachedValue, checksum) flatMap { wasStored => - if (wasStored) - // do a straight set of the secondary index, but only if the CAS succeeded - setSecondaryIndex(key, cachedValue) map { _ => - true - } - else - Future.value(false) - } - - override def add(key: K, cachedValue: Cached[V]): Future[Boolean] = - super.add(key, cachedValue) flatMap { wasAdded => - if (wasAdded) - // do a straight set of the secondary index, but only if the add succeeded - setSecondaryIndex(key, cachedValue) map { _ => - true - } - else - Future.value(false) - } - - override def replace(key: K, cachedValue: Cached[V]): Future[Boolean] = - super.replace(key, cachedValue) flatMap { wasReplaced => - if (wasReplaced) - setSecondaryIndex(key, cachedValue) map { _ => - true - } - else - Future.value(false) - } - - override def release(): Unit = { - underlyingCache.release() - secondaryIndexCache.release() - } - - def withSecondaryIndex[T]( - secondaryIndexingCache: Cache[T, Cached[K]], - secondaryIndex: SecondaryIndexingCache.IndexMapping[T, V] - ): SecondaryIndexingCache[K, T, V] = - new SecondaryIndexingCache[K, T, V](this, secondaryIndexingCache, secondaryIndex) -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SelectedCache.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SelectedCache.docx new file mode 100644 index 000000000..ec27adb96 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SelectedCache.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SelectedCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SelectedCache.scala deleted file mode 100644 index 3e46211e4..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SelectedCache.scala +++ /dev/null @@ -1,97 +0,0 @@ -package com.twitter.servo.cache - -import com.twitter.util.Future - -/** - * Represents multiple underlying ReadCaches selected by key at invocation time. - */ -trait SelectedReadCacheWrapper[K, V, This <: ReadCache[K, V]] extends ReadCache[K, V] { - - /** Retrieves the underlying cache for the given key. */ - def underlyingCache(key: K): This - - /** Retrieves tuples of the underlying caches and the keys they apply to. */ - def underlyingCacheForKeys(keys: Seq[K]): Seq[(This, Seq[K])] - - /** Retrieves all underlying caches. */ - def underlyingCaches: Seq[This] - - private[this] def collectUnderlying[V2]( - keys: Seq[K] - )( - f: (This, Seq[K]) => Future[KeyValueResult[K, V2]] - ): Future[KeyValueResult[K, V2]] = { - Future.collect( - underlyingCacheForKeys(keys) collect { - case (cacheForKey, keys) if !keys.isEmpty => - f(cacheForKey, keys) - } - ) map { - KeyValueResult.sum(_) - } - } - - override def get(keys: Seq[K]) = collectUnderlying(keys) { _.get(_) } - override def getWithChecksum(keys: Seq[K]) = collectUnderlying(keys) { _.getWithChecksum(_) } - - override def release(): Unit = { - underlyingCaches foreach { _.release() } - } -} - -/** - * Represents multiple underlying Caches selected by key at invocation time. - */ -trait SelectedCacheWrapper[K, V] - extends Cache[K, V] - with SelectedReadCacheWrapper[K, V, Cache[K, V]] { - override def add(key: K, value: V) = underlyingCache(key).add(key, value) - - override def checkAndSet(key: K, value: V, checksum: Checksum) = - underlyingCache(key).checkAndSet(key, value, checksum) - - override def set(key: K, value: V) = underlyingCache(key).set(key, value) - - override def replace(key: K, value: V) = underlyingCache(key).replace(key, value) - - override def delete(key: K) = underlyingCache(key).delete(key) -} - -/** - * GateSelectedCache implements SelectedCache to choose between two underlying - * caches based on a function. - */ -class SelectedCache[K, V](primary: Cache[K, V], secondary: Cache[K, V], usePrimary: K => Boolean) - extends SelectedCacheWrapper[K, V] { - override def underlyingCache(key: K) = if (usePrimary(key)) primary else secondary - - override def underlyingCacheForKeys(keys: Seq[K]) = { - keys partition (usePrimary) match { - case (primaryKeys, secondaryKeys) => Seq((primary, primaryKeys), (secondary, secondaryKeys)) - } - } - - override def underlyingCaches = Seq(primary, secondary) -} - -/** - * Factory for SelectedCache instances that use a simple function to migrate - * users from a secondary cache (function returns false) to a primary cache - * (function returns true). Serves a purpose similar to CacheFactory, but - * cannot extend it due to type constraints. - * - * The function is expected to produce stable results by key over time to - * prevent accessing stale cache entries due to keys flapping between the - * two caches. - */ -class SelectedCacheFactory[K]( - primaryFactory: CacheFactory, - secondaryFactory: CacheFactory, - usePrimary: K => Boolean) { - def apply[V](serializer: Serializer[V], scopes: String*): Cache[K, V] = - new SelectedCache( - primaryFactory[K, V](serializer, scopes: _*), - secondaryFactory[K, V](serializer, scopes: _*), - usePrimary - ) -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SeqSerializer.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SeqSerializer.docx new file mode 100644 index 000000000..a81d9003f Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SeqSerializer.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SeqSerializer.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SeqSerializer.scala deleted file mode 100644 index 7477aa9c6..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SeqSerializer.scala +++ /dev/null @@ -1,10 +0,0 @@ -package com.twitter.servo.cache - -/** - * A Serializer of `Seq[T]`s. - * - * @param itemSerializer a Serializer for the individual elements. - * @param itemSizeEstimate estimated size in bytes of individual elements - */ -class SeqSerializer[T](itemSerializer: Serializer[T], itemSizeEstimate: Int = 8) - extends IterableSerializer[T, Seq[T]](() => Seq.newBuilder[T], itemSerializer, itemSizeEstimate) diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Serializer.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Serializer.docx new file mode 100644 index 000000000..d3300bf1d Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Serializer.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Serializer.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Serializer.scala deleted file mode 100644 index abe4e420c..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Serializer.scala +++ /dev/null @@ -1,184 +0,0 @@ -package com.twitter.servo.cache - -import com.google.common.primitives.{Ints, Longs} -import com.twitter.finagle.thrift.Protocols -import com.twitter.io.Buf -import com.twitter.scrooge.{ThriftStruct, ThriftStructCodec, ThriftStructSerializer} -import com.twitter.servo.util.Transformer -import com.twitter.util.{Time => UtilTime, Try} -import java.io.{ByteArrayInputStream, ByteArrayOutputStream} -import java.nio.ByteBuffer -import org.apache.thrift.TBase -import org.apache.thrift.protocol.{TCompactProtocol, TProtocolFactory} -import org.apache.thrift.transport.TIOStreamTransport - -object Serializers { self => - val CompactProtocolFactory = new TCompactProtocol.Factory - val EmptyByteArray = Array.empty[Byte] - - val Unit = Transformer[Unit, Array[Byte]](_ => EmptyByteArray, _ => ()) - - object Long { - val Simple = Transformer[Long, Array[Byte]](Longs.toByteArray, Longs.fromByteArray) - } - - object CachedLong { - val Compact: Serializer[Cached[Long]] = - new CachedSerializer(self.Long.Simple, CompactProtocolFactory) - } - - object SeqLong { - val Simple: Serializer[Seq[Long]] = new SeqSerializer(self.Long.Simple, 8) - } - - object CachedSeqLong { - val Compact: Serializer[Cached[Seq[Long]]] = - new CachedSerializer(self.SeqLong.Simple, CompactProtocolFactory) - } - - object Int { - val Simple = Transformer[Int, Array[Byte]](Ints.toByteArray, Ints.fromByteArray) - } - - object CachedInt { - val Compact: Serializer[Cached[Int]] = - new CachedSerializer(self.Int.Simple, CompactProtocolFactory) - } - - object SeqInt { - val Simple: Serializer[Seq[Int]] = new SeqSerializer(self.Int.Simple, 4) - } - - object CachedSeqInt { - val Compact: Serializer[Cached[Seq[Int]]] = - new CachedSerializer(self.SeqInt.Simple, CompactProtocolFactory) - } - - object String { - val Utf8: Serializer[String] = Transformer.Utf8ToBytes - } - - object CachedString { - val Compact: Serializer[Cached[String]] = - new CachedSerializer(self.String.Utf8, CompactProtocolFactory) - } - - object SeqString { - val Utf8: Serializer[Seq[String]] = new SeqSerializer(self.String.Utf8) - } - - object CachedSeqString { - val Compact: Serializer[Cached[Seq[String]]] = - new CachedSerializer(self.SeqString.Utf8, CompactProtocolFactory) - } - - /** - * We take care not to alter the buffer so that this conversion can - * safely be used multiple times with the same buffer, and that - * other threads cannot view other states of the buffer. - */ - private[this] def byteBufferToArray(b: ByteBuffer): Array[Byte] = { - val a = new Array[Byte](b.remaining) - b.duplicate.get(a) - a - } - - /** - * Convert between a ByteBuffer and an Array of bytes. The - * conversion to Array[Byte] makes a copy of the data, while the - * reverse conversion just wraps the array. - */ - val ArrayByteBuffer: Transformer[Array[Byte], ByteBuffer] = - Transformer(ByteBuffer.wrap(_: Array[Byte]), byteBufferToArray) - - val ArrayByteBuf: Transformer[Array[Byte], Buf] = - Transformer(Buf.ByteArray.Shared.apply, Buf.ByteArray.Shared.extract) - - /** - * Isomorphism between Time and Long. The Long represents the number - * of nanoseconds since the epoch. - */ - val TimeNanos: Transformer[UtilTime, Long] = - Transformer.pure[UtilTime, Long](_.inNanoseconds, UtilTime.fromNanoseconds) - - /** - * Transformer from Time to Array[Byte] always succeeds. The inverse - * transform throws BufferUnderflowException if the buffer is less - * than eight bytes in length. If it is greater than eight bytes, - * the later bytes are discarded. - */ - // This is lazy because if it is not, it may be initialized before - // Long.Simple. In that case, Long.Simple will be null at - // initialization time, and will be captured here. Unfortunately, - // this is dependent on the order of class initialization, which may - // vary between runs of a program. - lazy val Time: Serializer[UtilTime] = TimeNanos andThen Long.Simple -} - -/** - * A Serializer for Thrift structs generated by Scrooge. - * - * @param codec used to encode and decode structs for a given protocol - * @param protocolFactory defines the serialization protocol to be used - */ -class ThriftSerializer[T <: ThriftStruct]( - val codec: ThriftStructCodec[T], - val protocolFactory: TProtocolFactory) - extends Serializer[T] - with ThriftStructSerializer[T] { - override def to(obj: T): Try[Array[Byte]] = Try(toBytes(obj)) - override def from(bytes: Array[Byte]): Try[T] = Try(fromBytes(bytes)) -} - -/** - * A Serializer for Thrift structs generated by the Apache code generator. - * - * @param tFactory a factory for Thrift-defined objects of type T. Objects - * yielded by the factory are read into and returned during - * deserialization. - * - * @param protocolFactory defines the serialization protocol to be used - */ -class TBaseSerializer[T <: TBase[_, _]](tFactory: () => T, protocolFactory: TProtocolFactory) - extends Serializer[T] { - override def to(obj: T): Try[Array[Byte]] = Try { - val baos = new ByteArrayOutputStream - obj.write(protocolFactory.getProtocol(new TIOStreamTransport(baos))) - baos.toByteArray - } - - override def from(bytes: Array[Byte]): Try[T] = Try { - val obj = tFactory() - val stream = new ByteArrayInputStream(bytes) - obj.read(protocolFactory.getProtocol(new TIOStreamTransport(stream))) - obj - } -} - -object CachedSerializer { - def binary[T](valueSerializer: Serializer[T]): CachedSerializer[T] = - new CachedSerializer(valueSerializer, Protocols.binaryFactory()) - - def compact[T](valueSerializer: Serializer[T]): CachedSerializer[T] = - new CachedSerializer(valueSerializer, new TCompactProtocol.Factory) -} - -/** - * A Serializer of Cached object. - * - * @param valueSerializer an underlying serializer of the values to be cached. - * @param protocolFactory defines the serialization protocol to be used - */ -class CachedSerializer[T](valueSerializer: Serializer[T], protocolFactory: TProtocolFactory) - extends Serializer[Cached[T]] { - private[this] val underlying = new ThriftSerializer(CachedValue, protocolFactory) - - override def to(cached: Cached[T]): Try[Array[Byte]] = - underlying.to(cached.toCachedValue(valueSerializer)) - - private[this] val asCached: CachedValue => Cached[T] = - t => Cached(t, valueSerializer) - - override def from(bytes: Array[Byte]): Try[Cached[T]] = - underlying.from(bytes).map(asCached) -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SetSerializer.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SetSerializer.docx new file mode 100644 index 000000000..362fbccda Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SetSerializer.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SetSerializer.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SetSerializer.scala deleted file mode 100644 index 9bc9a4c91..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SetSerializer.scala +++ /dev/null @@ -1,10 +0,0 @@ -package com.twitter.servo.cache - -/** - * A Serializer of `Set[T]`s. - * - * @param itemSerializer a Serializer for the individual elements. - * @param itemSizeEstimate estimated size in bytes of individual elements - */ -class SetSerializer[T](itemSerializer: Serializer[T], itemSizeEstimate: Int = 8) - extends IterableSerializer[T, Set[T]](() => Set.newBuilder[T], itemSerializer, itemSizeEstimate) diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SimpleReplicatingCache.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SimpleReplicatingCache.docx new file mode 100644 index 000000000..0d02675e9 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SimpleReplicatingCache.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SimpleReplicatingCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SimpleReplicatingCache.scala deleted file mode 100644 index 595f0698a..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SimpleReplicatingCache.scala +++ /dev/null @@ -1,231 +0,0 @@ -package com.twitter.servo.cache - -import com.twitter.servo.keyvalue._ -import com.twitter.servo.util.{OptionOrdering, TryOrdering} -import com.twitter.util.{Future, Return, Throw, Time, Try} - -object SimpleReplicatingCache { - - /** - * Builds a SimpleReplicatingCache that writes a value multiple times to the same underlying - * cache but under different keys. If the underlying cache is backed by enough shards, there - * is a good chance that the different keys will end up on different shards, giving you similar - * behavior to having multiple distinct caches. - */ - def apply[K, K2, V]( - underlying: LockingCache[K2, Cached[V]], - keyReplicator: (K, Int) => K2, - replicas: Int = 2 - ) = new SimpleReplicatingCache( - (0 until replicas).toSeq map { replica => - new KeyTransformingLockingCache( - underlying, - (key: K) => keyReplicator(key, replica) - ) - } - ) -} - -/** - * A very simple replicating cache implementation. It writes the same key/value pair to - * multiple underlying caches. On read, each underlying cache is queried with the key; if the - * results are not all the same for a given key, then the most recent value is chosen and - * replicated to all caches. - * - * Some cache operations are not currently supported, because their semantics are a little fuzzy - * in the replication case. Specifically: add and checkAndSet. - */ -class SimpleReplicatingCache[K, V](underlyingCaches: Seq[LockingCache[K, Cached[V]]]) - extends LockingCache[K, Cached[V]] { - private type CsValue = (Try[Cached[V]], Checksum) - - private val cachedOrdering = new Ordering[Cached[V]] { - // sort by ascending timestamp - def compare(a: Cached[V], b: Cached[V]) = a.cachedAt.compare(b.cachedAt) - } - - private val csValueOrdering = new Ordering[CsValue] { - // order by Try[V], ignore checksum - val subordering = TryOrdering(cachedOrdering) - def compare(a: CsValue, b: CsValue) = subordering.compare(a._1, b._1) - } - - private val tryOptionCsValueOrdering = TryOrdering(OptionOrdering(csValueOrdering)) - private val tryOptionCachedOrdering = TryOrdering(OptionOrdering(cachedOrdering)) - - /** - * release any underlying resources - */ - def release(): Unit = { - underlyingCaches foreach { _.release() } - } - - /** - * Fetches from all underlying caches in parallel, and if results differ, will choose a - * winner and push updated results back to the stale caches. - */ - def get(keys: Seq[K]): Future[KeyValueResult[K, Cached[V]]] = { - getWithChecksum(keys) map { csKvRes => - val resBldr = new KeyValueResultBuilder[K, Cached[V]] - - csKvRes.found foreach { - case (k, (Return(v), _)) => resBldr.addFound(k, v) - case (k, (Throw(t), _)) => resBldr.addFailed(k, t) - } - - resBldr.addNotFound(csKvRes.notFound) - resBldr.addFailed(csKvRes.failed) - resBldr.result() - } - } - - /** - * Fetches from all underlying caches in parallel, and if results differ, will choose a - * winner and push updated results back to the stale caches. - */ - def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, Cached[V]]] = { - Future.collect { - underlyingCaches map { underlying => - underlying.getWithChecksum(keys) - } - } map { underlyingResults => - val resBldr = new KeyValueResultBuilder[K, CsValue] - - for (key <- keys) { - val keyResults = underlyingResults map { _(key) } - resBldr(key) = getAndReplicate(key, keyResults) map { - // treat evictions as misses - case Some((Return(c), _)) if c.status == CachedValueStatus.Evicted => None - case v => v - } - } - - resBldr.result() - } - } - - /** - * Looks at all the returned values for a given set of replication keys, returning the most recent - * cached value if available, or indicate a miss if applicable, or return a failure if all - * keys failed. If a cached value is returned, and some keys don't have that cached value, - * the cached value will be replicated to those keys, possibly overwriting stale data. - */ - private def getAndReplicate( - key: K, - keyResults: Seq[Try[Option[CsValue]]] - ): Try[Option[CsValue]] = { - val max = keyResults.max(tryOptionCsValueOrdering) - - max match { - // if one of the replication keys returned a cached value, then make sure all replication - // keys contain that cached value. - case Return(Some((Return(cached), cs))) => - for ((underlying, keyResult) <- underlyingCaches zip keyResults) { - if (keyResult != max) { - replicate(key, cached, keyResult, underlying) - } - } - case _ => - } - - max - } - - private def replicate( - key: K, - cached: Cached[V], - current: Try[Option[CsValue]], - underlying: LockingCache[K, Cached[V]] - ): Future[Unit] = { - current match { - case Throw(_) => - // if we failed to read a particular value, we don't want to write to that key - // because that key could potentially have the real newest value - Future.Unit - case Return(None) => - // add rather than set, and fail if another value is written first - underlying.add(key, cached).unit - case Return(Some((_, cs))) => - underlying.checkAndSet(key, cached, cs).unit - } - } - - /** - * Currently not supported. Use set or lockAndSet. - */ - def add(key: K, value: Cached[V]): Future[Boolean] = { - Future.exception(new UnsupportedOperationException("use set or lockAndSet")) - } - - /** - * Currently not supported. - */ - def checkAndSet(key: K, value: Cached[V], checksum: Checksum): Future[Boolean] = { - Future.exception(new UnsupportedOperationException("use set or lockAndSet")) - } - - /** - * Calls set on all underlying caches. If at least one set succeeds, Future.Unit is - * returned. If all fail, a Future.exception will be returned. - */ - def set(key: K, value: Cached[V]): Future[Unit] = { - liftAndCollect { - underlyingCaches map { _.set(key, value) } - } flatMap { seqTryUnits => - // return Future.Unit if any underlying call succeeded, otherwise return - // the first failure. - if (seqTryUnits exists { _.isReturn }) - Future.Unit - else - Future.const(seqTryUnits.head) - } - } - - /** - * Calls lockAndSet on the underlying cache for all replication keys. If at least one - * underlying call succeeds, a successful result will be returned. - */ - def lockAndSet(key: K, handler: LockingCache.Handler[Cached[V]]): Future[Option[Cached[V]]] = { - liftAndCollect { - underlyingCaches map { _.lockAndSet(key, handler) } - } flatMap { seqTryOptionCached => - Future.const(seqTryOptionCached.max(tryOptionCachedOrdering)) - } - } - - /** - * Returns Future(true) if any of the underlying caches return Future(true); otherwise, - * returns Future(false) if any of the underlying caches return Future(false); otherwise, - * returns the first failure. - */ - def replace(key: K, value: Cached[V]): Future[Boolean] = { - liftAndCollect { - underlyingCaches map { _.replace(key, value) } - } flatMap { seqTryBools => - if (seqTryBools.contains(Return.True)) - Future.value(true) - else if (seqTryBools.contains(Return.False)) - Future.value(false) - else - Future.const(seqTryBools.head) - } - } - - /** - * Performing an actual deletion on the underlying caches is not a good idea in the face - * of potential failure, because failing to remove all values would allow a cached value to - * be resurrected. Instead, delete actually does a replace on the underlying caches with a - * CachedValueStatus of Evicted, which will be treated as a miss on read. - */ - def delete(key: K): Future[Boolean] = { - replace(key, Cached(None, CachedValueStatus.Evicted, Time.now)) - } - - /** - * Convets a Seq[Future[A]] into a Future[Seq[Try[A]]], isolating failures into Trys, instead - * of allowing the entire Future to failure. - */ - private def liftAndCollect[A](seq: Seq[Future[A]]): Future[Seq[Try[A]]] = { - Future.collect { seq map { _ transform { Future(_) } } } - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TransformingCache.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TransformingCache.docx new file mode 100644 index 000000000..7a4a9d882 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TransformingCache.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TransformingCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TransformingCache.scala deleted file mode 100644 index 14e64d133..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TransformingCache.scala +++ /dev/null @@ -1,324 +0,0 @@ -package com.twitter.servo.cache - -import com.twitter.servo.util.Transformer -import com.twitter.util.{Duration, Future, Return, Throw} -import scala.collection.mutable.ArrayBuffer -import scala.collection.{breakOut, mutable} - -/** - * Adaptor from a ReadCache[K, V1] to an underlying ReadCache[K, V2] - * - * a Transformer is used to map between value types - */ -class ValueTransformingReadCache[K, V1, V2]( - underlyingCache: ReadCache[K, V2], - transformer: Transformer[V1, V2]) - extends ReadCache[K, V1] { - // overridden to avoid mapping the unneeded keyMap - override def get(keys: Seq[K]): Future[KeyValueResult[K, V1]] = { - underlyingCache.get(keys) map { lr => - // fold lr.found into found/deserialization failures - val found = mutable.Map.empty[K, V1] - val failed = mutable.Map.empty[K, Throwable] - - lr.found foreach { - case (key, value) => - transformer.from(value) match { - case Return(v) => found += key -> v - case Throw(t) => failed += key -> t - } - } - - lr.copy(found = found.toMap, failed = lr.failed ++ failed.toMap) - } handle { - case t => - KeyValueResult(failed = keys.map(_ -> t).toMap) - } - } - - // overridden to avoid mapping the unneeded keyMap - override def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, V1]] = { - underlyingCache.getWithChecksum(keys) map { clr => - clr.copy(found = clr.found map { - case (key, (value, checksum)) => - key -> (value flatMap { transformer.from(_) }, checksum) - }) - } handle { - case t => - KeyValueResult(failed = keys.map(_ -> t).toMap) - } - } - - override def release() = underlyingCache.release() -} - -/** - * Adaptor from a ReadCache[K, V1] to an underlying ReadCache[K2, V2] - * - * a Transformer is used to map between value types, and a - * one-way mapping is used for keys, making it possible to - * store data in the underlying cache using keys that can't - * easily be reverse-mapped. - */ -class KeyValueTransformingReadCache[K1, K2, V1, V2]( - underlyingCache: ReadCache[K2, V2], - transformer: Transformer[V1, V2], - underlyingKey: K1 => K2) - extends ReadCache[K1, V1] { - - // make keymapping for key recovery later - private[this] def mappedKeys( - keys: Seq[K1] - ): (IndexedSeq[K2], Map[K2, K1]) = { - val k2s = new ArrayBuffer[K2](keys.size) - val k2k1s: Map[K2, K1] = - keys.map { key => - val k2 = underlyingKey(key) - k2s += k2 - k2 -> key - }(breakOut) - (k2s, k2k1s) - } - - override def get(keys: Seq[K1]): Future[KeyValueResult[K1, V1]] = { - val (k2s, kMap) = mappedKeys(keys) - - underlyingCache - .get(k2s) - .map { lr => - // fold lr.found into found/deserialization failures - val found = Map.newBuilder[K1, V1] - val failed = Map.newBuilder[K1, Throwable] - - lr.found.foreach { - case (key, value) => - transformer.from(value) match { - case Return(v) => found += kMap(key) -> v - case Throw(t) => failed += kMap(key) -> t - } - } - - lr.failed.foreach { - case (k, t) => - failed += kMap(k) -> t - } - - KeyValueResult( - found.result(), - lr.notFound.map { kMap(_) }, - failed.result() - ) - } - .handle { - case t => - KeyValueResult(failed = keys.map(_ -> t).toMap) - } - } - - override def getWithChecksum(keys: Seq[K1]): Future[CsKeyValueResult[K1, V1]] = { - val (k2s, kMap) = mappedKeys(keys) - - underlyingCache - .getWithChecksum(k2s) - .map { clr => - KeyValueResult( - clr.found.map { - case (key, (value, checksum)) => - kMap(key) -> (value.flatMap(transformer.from), checksum) - }, - clr.notFound map { kMap(_) }, - clr.failed map { - case (key, t) => - kMap(key) -> t - } - ) - } - .handle { - case t => - KeyValueResult(failed = keys.map(_ -> t).toMap) - } - } - - override def release(): Unit = underlyingCache.release() -} - -class KeyTransformingCache[K1, K2, V](underlyingCache: Cache[K2, V], underlyingKey: K1 => K2) - extends KeyValueTransformingCache[K1, K2, V, V]( - underlyingCache, - Transformer.identity, - underlyingKey - ) - -/** - * Adaptor from a Cache[K, V1] to an underlying Cache[K, V2] - * - * a Transformer is used to map between value types - */ -class ValueTransformingCache[K, V1, V2]( - underlyingCache: Cache[K, V2], - transformer: Transformer[V1, V2]) - extends ValueTransformingReadCache[K, V1, V2](underlyingCache, transformer) - with Cache[K, V1] { - private[this] def to(v1: V1): Future[V2] = Future.const(transformer.to(v1)) - - override def add(key: K, value: V1): Future[Boolean] = - to(value) flatMap { underlyingCache.add(key, _) } - - override def checkAndSet(key: K, value: V1, checksum: Checksum): Future[Boolean] = - to(value) flatMap { underlyingCache.checkAndSet(key, _, checksum) } - - override def set(key: K, value: V1): Future[Unit] = - to(value) flatMap { underlyingCache.set(key, _) } - - override def replace(key: K, value: V1): Future[Boolean] = - to(value) flatMap { underlyingCache.replace(key, _) } - - override def delete(key: K): Future[Boolean] = - underlyingCache.delete(key) -} - -/** - * Adaptor from a Cache[K1, V1] to an underlying Cache[K2, V2] - * - * a Transformer is used to map between value types, and a - * one-way mapping is used for keys, making it possible to - * store data in the underlying cache using keys that can't - * easily be reverse-mapped. - */ -class KeyValueTransformingCache[K1, K2, V1, V2]( - underlyingCache: Cache[K2, V2], - transformer: Transformer[V1, V2], - underlyingKey: K1 => K2) - extends KeyValueTransformingReadCache[K1, K2, V1, V2]( - underlyingCache, - transformer, - underlyingKey - ) - with Cache[K1, V1] { - private[this] def to(v1: V1): Future[V2] = Future.const(transformer.to(v1)) - - override def add(key: K1, value: V1): Future[Boolean] = - to(value) flatMap { underlyingCache.add(underlyingKey(key), _) } - - override def checkAndSet(key: K1, value: V1, checksum: Checksum): Future[Boolean] = - to(value) flatMap { underlyingCache.checkAndSet(underlyingKey(key), _, checksum) } - - override def set(key: K1, value: V1): Future[Unit] = - to(value) flatMap { underlyingCache.set(underlyingKey(key), _) } - - override def replace(key: K1, value: V1): Future[Boolean] = - to(value) flatMap { underlyingCache.replace(underlyingKey(key), _) } - - override def delete(key: K1): Future[Boolean] = - underlyingCache.delete(underlyingKey(key)) -} - -/** - * Adaptor from a TtlCache[K, V1] to an underlying TtlCache[K, V2] - * - * a Transformer is used to map between value types - */ -class ValueTransformingTtlCache[K, V1, V2]( - underlyingCache: TtlCache[K, V2], - transformer: Transformer[V1, V2]) - extends ValueTransformingReadCache[K, V1, V2](underlyingCache, transformer) - with TtlCache[K, V1] { - private[this] def to(v1: V1): Future[V2] = Future.const(transformer.to(v1)) - - override def add(key: K, value: V1, ttl: Duration): Future[Boolean] = - to(value) flatMap { underlyingCache.add(key, _, ttl) } - - override def checkAndSet( - key: K, - value: V1, - checksum: Checksum, - ttl: Duration - ): Future[Boolean] = - to(value) flatMap { underlyingCache.checkAndSet(key, _, checksum, ttl) } - - override def set(key: K, value: V1, ttl: Duration): Future[Unit] = - to(value) flatMap { underlyingCache.set(key, _, ttl) } - - override def replace(key: K, value: V1, ttl: Duration): Future[Boolean] = - to(value) flatMap { underlyingCache.replace(key, _, ttl) } - - override def delete(key: K): Future[Boolean] = - underlyingCache.delete(key) -} - -/** - * Adaptor from a TtlCache[K1, V1] to an underlying TtlCache[K2, V2] - * - * a Transformer is used to map between value types, and a - * one-way mapping is used for keys, making it possible to - * store data in the underlying cache using keys that can't - * easily be reverse-mapped. - */ -class KeyValueTransformingTtlCache[K1, K2, V1, V2]( - underlyingCache: TtlCache[K2, V2], - transformer: Transformer[V1, V2], - underlyingKey: K1 => K2) - extends KeyValueTransformingReadCache[K1, K2, V1, V2]( - underlyingCache, - transformer, - underlyingKey - ) - with TtlCache[K1, V1] { - private[this] def to(v1: V1): Future[V2] = Future.const(transformer.to(v1)) - - override def add(key: K1, value: V1, ttl: Duration): Future[Boolean] = - to(value) flatMap { underlyingCache.add(underlyingKey(key), _, ttl) } - - override def checkAndSet( - key: K1, - value: V1, - checksum: Checksum, - ttl: Duration - ): Future[Boolean] = - to(value) flatMap { underlyingCache.checkAndSet(underlyingKey(key), _, checksum, ttl) } - - override def set(key: K1, value: V1, ttl: Duration): Future[Unit] = - to(value) flatMap { underlyingCache.set(underlyingKey(key), _, ttl) } - - override def replace(key: K1, value: V1, ttl: Duration): Future[Boolean] = - to(value) flatMap { underlyingCache.replace(underlyingKey(key), _, ttl) } - - override def delete(key: K1): Future[Boolean] = - underlyingCache.delete(underlyingKey(key)) -} - -class KeyTransformingTtlCache[K1, K2, V](underlyingCache: TtlCache[K2, V], underlyingKey: K1 => K2) - extends KeyValueTransformingTtlCache[K1, K2, V, V]( - underlyingCache, - Transformer.identity, - underlyingKey - ) - -class KeyTransformingLockingCache[K1, K2, V]( - underlyingCache: LockingCache[K2, V], - underlyingKey: K1 => K2) - extends KeyValueTransformingCache[K1, K2, V, V]( - underlyingCache, - Transformer.identity, - underlyingKey - ) - with LockingCache[K1, V] { - import LockingCache._ - - override def lockAndSet(key: K1, handler: Handler[V]): Future[Option[V]] = - underlyingCache.lockAndSet(underlyingKey(key), handler) -} - -class KeyTransformingCounterCache[K1, K2]( - underlyingCache: CounterCache[K2], - underlyingKey: K1 => K2) - extends KeyTransformingCache[K1, K2, Long](underlyingCache, underlyingKey) - with CounterCache[K1] { - override def incr(key: K1, delta: Int = 1): Future[Option[Long]] = { - underlyingCache.incr(underlyingKey(key), delta) - } - - override def decr(key: K1, delta: Int = 1): Future[Option[Long]] = { - underlyingCache.decr(underlyingKey(key), delta) - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TtlCache.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TtlCache.docx new file mode 100644 index 000000000..5ab9f9b80 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TtlCache.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TtlCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TtlCache.scala deleted file mode 100644 index d42766951..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TtlCache.scala +++ /dev/null @@ -1,95 +0,0 @@ -package com.twitter.servo.cache - -import com.twitter.util.{Duration, Future} - -/** - * a Cache that takes a TTL per set - */ -trait TtlCache[K, V] extends ReadCache[K, V] { - def add(key: K, value: V, ttl: Duration): Future[Boolean] - - def checkAndSet(key: K, value: V, checksum: Checksum, ttl: Duration): Future[Boolean] - - def set(key: K, value: V, ttl: Duration): Future[Unit] - - /** - * Replaces the value for an existing key. If the key doesn't exist, this has no effect. - * @return true if replaced, false if not found - */ - def replace(key: K, value: V, ttl: Duration): Future[Boolean] - - /** - * Deletes a value from cache. - * @return true if deleted, false if not found - */ - def delete(key: K): Future[Boolean] -} - -/** - * allows one TtlCache to wrap another - */ -trait TtlCacheWrapper[K, V] extends TtlCache[K, V] with ReadCacheWrapper[K, V, TtlCache[K, V]] { - override def add(key: K, value: V, ttl: Duration) = underlyingCache.add(key, value, ttl) - - override def checkAndSet(key: K, value: V, checksum: Checksum, ttl: Duration) = - underlyingCache.checkAndSet(key, value, checksum, ttl) - - override def set(key: K, value: V, ttl: Duration) = underlyingCache.set(key, value, ttl) - - override def replace(key: K, value: V, ttl: Duration) = underlyingCache.replace(key, value, ttl) - - override def delete(key: K) = underlyingCache.delete(key) -} - -class PerturbedTtlCache[K, V]( - override val underlyingCache: TtlCache[K, V], - perturbTtl: Duration => Duration) - extends TtlCacheWrapper[K, V] { - override def add(key: K, value: V, ttl: Duration) = - underlyingCache.add(key, value, perturbTtl(ttl)) - - override def checkAndSet(key: K, value: V, checksum: Checksum, ttl: Duration) = - underlyingCache.checkAndSet(key, value, checksum, perturbTtl(ttl)) - - override def set(key: K, value: V, ttl: Duration) = - underlyingCache.set(key, value, perturbTtl(ttl)) - - override def replace(key: K, value: V, ttl: Duration) = - underlyingCache.replace(key, value, perturbTtl(ttl)) -} - -/** - * an adaptor to wrap a Cache[K, V] interface around a TtlCache[K, V] - */ -class TtlCacheToCache[K, V](override val underlyingCache: TtlCache[K, V], ttl: (K, V) => Duration) - extends Cache[K, V] - with ReadCacheWrapper[K, V, TtlCache[K, V]] { - override def add(key: K, value: V) = underlyingCache.add(key, value, ttl(key, value)) - - override def checkAndSet(key: K, value: V, checksum: Checksum) = - underlyingCache.checkAndSet(key, value, checksum, ttl(key, value)) - - override def set(key: K, value: V) = underlyingCache.set(key, value, ttl(key, value)) - - override def replace(key: K, value: V) = underlyingCache.replace(key, value, ttl(key, value)) - - override def delete(key: K) = underlyingCache.delete(key) -} - -/** - * use a single TTL for all objects - */ -class SimpleTtlCacheToCache[K, V](underlyingTtlCache: TtlCache[K, V], ttl: Duration) - extends TtlCacheToCache[K, V](underlyingTtlCache, (k: K, v: V) => ttl) - -/** - * use a value-based TTL function - */ -class ValueBasedTtlCacheToCache[K, V](underlyingTtlCache: TtlCache[K, V], ttl: V => Duration) - extends TtlCacheToCache[K, V](underlyingTtlCache, (k: K, v: V) => ttl(v)) - -/** - * use a key-based TTL function - */ -class KeyBasedTtlCacheToCache[K, V](underlyingTtlCache: TtlCache[K, V], ttl: K => Duration) - extends TtlCacheToCache[K, V](underlyingTtlCache, (k: K, v: V) => ttl(k)) diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/package.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/package.docx new file mode 100644 index 000000000..4e7a046e3 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/package.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/package.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/package.scala deleted file mode 100644 index f2e74624d..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/package.scala +++ /dev/null @@ -1,36 +0,0 @@ -package com.twitter.servo - -import com.twitter.finagle.partitioning.PartitionNode -import com.twitter.servo.util.Transformer -import com.twitter.util.Try - -package object cache { - type CachedValue = thriftscala.CachedValue - val CachedValue = thriftscala.CachedValue - type CachedValueStatus = thriftscala.CachedValueStatus - val CachedValueStatus = thriftscala.CachedValueStatus - - type KeyTransformer[K] = K => String - type CsKeyValueResult[K, V] = KeyValueResult[K, (Try[V], Checksum)] - - type KeyValueResult[K, V] = keyvalue.KeyValueResult[K, V] - val KeyValueResult = keyvalue.KeyValueResult - - @deprecated("Use com.twitter.finagle.partitioning.PartitionNode instead", "1/7/2013") - type WeightedHost = PartitionNode - - type Serializer[T] = Transformer[T, Array[Byte]] - - /** - * Like a companion object, but for a type alias! - */ - val Serializer = Serializers - - type MemcacheFactory = (() => Memcache) -} - -package cache { - package object constants { - val Colon = ":" - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Accessors.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Accessors.docx new file mode 100644 index 000000000..03e857da0 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Accessors.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Accessors.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Accessors.scala deleted file mode 100644 index 647e9b3f0..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Accessors.scala +++ /dev/null @@ -1,151 +0,0 @@ -package com.twitter.servo.database - -import com.twitter.util.Time -import java.sql.{ResultSet, Timestamp} - -/** - * A base trait for transforming JDBC ResultSets. - * Designed to be used with the Accessors trait. - */ -trait ImplicitBuilder[T] extends Accessors { - def apply(implicit row: ResultSet): T -} - -object Accessors { - - /** - * helper to make it compile time error when trying to call getOption on types not supported - * instead of a runtime exception - */ - object SafeManifest { - implicit val booleanSafeManifest = new SafeManifest(implicitly[Manifest[Boolean]]) - implicit val doubleSafeManifest = new SafeManifest(implicitly[Manifest[Double]]) - implicit val intSafeManifest = new SafeManifest[Int](implicitly[Manifest[Int]]) - implicit val longSafeManifest = new SafeManifest[Long](implicitly[Manifest[Long]]) - implicit val stringSafeManifest = new SafeManifest[String](implicitly[Manifest[String]]) - implicit val timestampSafeManifest = - new SafeManifest[Timestamp](implicitly[Manifest[Timestamp]]) - } - - @deprecated("safe manifests no longer supported, use type-specific accessors instead", "1.1.1") - case class SafeManifest[T](mf: Manifest[T]) -} - -/** - * mixin to get ResultSet accessors for standard types - */ -trait Accessors { - import Accessors._ - - /** - * @return None when the column is null for the current row of the result set passed in - * Some[T] otherwise - * @throws UnsupportedOperationException if the return type expected is not supported, currently - * only Boolean, Int, Long, String and Timestamp are supported - */ - @deprecated("use type-specific accessors instead", "1.1.1") - def getOption[T](column: String)(implicit row: ResultSet, sf: SafeManifest[T]): Option[T] = { - val res = { - if (classOf[Boolean] == sf.mf.erasure) { - row.getBoolean(column) - } else if (classOf[Double] == sf.mf.erasure) { - row.getDouble(column) - } else if (classOf[Int] == sf.mf.erasure) { - row.getInt(column) - } else if (classOf[Long] == sf.mf.erasure) { - row.getLong(column) - } else if (classOf[String] == sf.mf.erasure) { - row.getString(column) - } else if (classOf[Timestamp] == sf.mf.erasure) { - row.getTimestamp(column) - } else { - throw new UnsupportedOperationException("type not supported: " + sf.mf.erasure) - } - } - if (row.wasNull()) { - None - } else { - Some(res.asInstanceOf[T]) - } - } - - /** - * @param get the method to apply to the ResultSet - * @param row the implicit ResultSet on which to apply get - * @return None when the column is null for the current row of the result set passed in - * Some[T] otherwise - */ - def getOption[T](get: ResultSet => T)(implicit row: ResultSet): Option[T] = { - val result = get(row) - if (row.wasNull()) { - None - } else { - Some(result) - } - } - - def booleanOption(column: String)(implicit row: ResultSet): Option[Boolean] = - getOption((_: ResultSet).getBoolean(column)) - - def boolean(column: String, default: Boolean = false)(implicit row: ResultSet): Boolean = - booleanOption(column).getOrElse(default) - - def doubleOption(column: String)(implicit row: ResultSet): Option[Double] = - getOption((_: ResultSet).getDouble(column)) - - def double(column: String, default: Double = 0.0)(implicit row: ResultSet): Double = - doubleOption(column).getOrElse(default) - - def intOption(column: String)(implicit row: ResultSet): Option[Int] = - getOption((_: ResultSet).getInt(column)) - - def int(column: String, default: Int = 0)(implicit row: ResultSet): Int = - intOption(column).getOrElse(default) - - def longOption(column: String)(implicit row: ResultSet): Option[Long] = - getOption((_: ResultSet).getLong(column)) - - def long(column: String, default: Long = 0)(implicit row: ResultSet): Long = - longOption(column).getOrElse(default) - - def stringOption(column: String)(implicit row: ResultSet): Option[String] = - getOption((_: ResultSet).getString(column)) - - def string(column: String, default: String = "")(implicit row: ResultSet): String = - stringOption(column).getOrElse(default) - - def timestampOption(column: String)(implicit row: ResultSet): Option[Timestamp] = - getOption((_: ResultSet).getTimestamp(column)) - - def timestamp( - column: String, - default: Timestamp = new Timestamp(0) - )( - implicit row: ResultSet - ): Timestamp = - timestampOption(column).getOrElse(default) - - def datetimeOption(column: String)(implicit row: ResultSet): Option[Long] = - timestampOption(column) map { _.getTime } - - def datetime(column: String, default: Long = 0L)(implicit row: ResultSet): Long = - datetimeOption(column).getOrElse(default) - - def timeOption(column: String)(implicit row: ResultSet): Option[Time] = - datetimeOption(column) map { Time.fromMilliseconds(_) } - - def time(column: String, default: Time = Time.epoch)(implicit row: ResultSet): Time = - timeOption(column).getOrElse(default) - - def bytesOption(column: String)(implicit row: ResultSet): Option[Array[Byte]] = - getOption((_: ResultSet).getBytes(column)) - - def bytes( - column: String, - default: Array[Byte] = Array.empty[Byte] - )( - implicit row: ResultSet - ): Array[Byte] = - bytesOption(column).getOrElse(default) - -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Bitfield.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Bitfield.docx new file mode 100644 index 000000000..842d07206 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Bitfield.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Bitfield.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Bitfield.scala deleted file mode 100644 index fafd0fb72..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Bitfield.scala +++ /dev/null @@ -1,56 +0,0 @@ -package com.twitter.servo.database - -object Bitfield { - def multiValue(bits: Boolean*): Int = { - bits.foldLeft(0) { (accum, bit) => - (accum << 1) | (if (bit) 1 else 0) - } - } - - def multiValueLong(bits: Boolean*): Long = { - bits.foldLeft(0L) { (accum, bit) => - (accum << 1) | (if (bit) 1L else 0L) - } - } -} - -/** - * A mixin for unpacking bitfields. - */ -trait Bitfield { - val bitfield: Int - - /** - * Tests that a given position is set to 1. - */ - def isSet(position: Int): Boolean = { - (bitfield & (1 << position)) != 0 - } - - /** - * takes a sequence of booleans, from most to least significant - * and converts them to an integer. - * - * example: multiValue(true, false, true) yields 0b101 = 5 - */ - def multiValue(bits: Boolean*): Int = Bitfield.multiValue(bits: _*) -} - -trait LongBitfield { - val bitfield: Long - - /** - * Tests that a given position is set to 1. - */ - def isSet(position: Int): Boolean = { - (bitfield & (1L << position)) != 0 - } - - /** - * takes a sequence of booleans, from most to least significant - * and converts them to a long. - * - * example: multiValue(true, false, true) yields 0b101 = 5L - */ - def multiValue(bits: Boolean*): Long = Bitfield.multiValueLong(bits: _*) -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Credentials.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Credentials.docx new file mode 100644 index 000000000..0d8916c5b Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Credentials.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Credentials.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Credentials.scala deleted file mode 100644 index b4eef7418..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Credentials.scala +++ /dev/null @@ -1,22 +0,0 @@ -package com.twitter.servo.database - -import com.twitter.util.security -import java.io.File - -sealed trait Credentials { - def username: String - def password: String -} - -case class InlineCredentials(username: String, password: String) extends Credentials - -case class FileCredentials( - path: String, - usernameField: String = "db_username", - passwordField: String = "db_password") - extends Credentials { - lazy val (username, password) = { - val credentials = security.Credentials(new File(path)) - (credentials(usernameField), credentials(passwordField)) - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Database.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Database.docx new file mode 100644 index 000000000..db26f3d52 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Database.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Database.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Database.scala deleted file mode 100644 index 3d9845c31..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Database.scala +++ /dev/null @@ -1,201 +0,0 @@ -package com.twitter.servo.database - -import com.twitter.servo.repository._ -import com.twitter.util.Future -import scala.collection.mutable.{HashMap, HashSet, ListBuffer} -import scala.collection.generic.Growable - -object Database { - - /** - * Construct a KeyValueRepository wrapping access to a database. - * - * Data retrieved as a row from the query is passed to a Builder producing a - * (Key, Row) tuple. Once all rows have been processed this way it is passed as a - * sequence to a post-query function that can perform actions (aggregation usually) - * and produce a final sequence of (Key, Value). - * - * @tparam Q - * how we'll be querying the this repository - * - * @tparam K - * the key used for looking data up - * - * @tparam R - * each entry from the the database will be represented as an instance of R - * - * @tparam V - * the repository will return a V produced by processing one or more Rs - * - * @param database - * A database used to back the KeyValueRepository being built. - * - * @param dbQuery - * A database query for fetching records to be parsed into objects of type - * Row. The query string can contain instances of the character '?' as - * placeholders for parameter passed into the `Database.select` calls. - * - * @param builder - * A Builder that builds (K, Row) pairs from ResultSets from the database - * - * @param postProcess - * A function which can manipulate the Seq[(K, Row)] that is returned from the - * database. Useful for aggregating multi-mapped K, V pairs where V holds a - * container with multiple values for the same key in the database. This function - * should not manipulate the list of keys; doing so will result in Return.None - * elements in the ensuing KeyValueResult. - * - * AggregateByKey has a basic implementation that groups R objects by a - * specified identifier and may be useful as a common impl. - * - * @param selectParams - * A function that is applied to the distinct keys in a repository query. - * The result is passed to `Database.select` to be used for filling in - * bind variables in dbQuery. By default, the repository query is passed - * directly to the select. The use cases for this function are situations - * where the SELECT statement takes multiple parameters. - * - * Example: - * // A repository that takes Seq[Long]s of userids and returns - * // Item objects of a parameterized item type. - * Database.keyValueRepository[Seq[Long], Long, Item, Item]( - * database, - * "SELECT * FROM items WHERE user_id IN (?) AND item_type = ?;", - * ItemBuilder, - * selectParams = Seq(_: Seq[Long], itemType) - * ) - */ - def keyValueRepository[Q <: Seq[K], K, R, V]( - database: Database, - dbQuery: String, - builder: Builder[(K, R)], - postProcess: Seq[(K, R)] => Seq[(K, V)] = - (identity[Seq[(K, V)]] _): (Seq[(K, V)] => Seq[(K, V)]), - selectParams: Seq[K] => Seq[Any] = (Seq(_: Seq[K])): (Seq[K] => collection.Seq[Seq[K]]) - ): KeyValueRepository[Q, K, V] = - query => { - if (query.isEmpty) { - KeyValueResult.emptyFuture - } else { - val uniqueKeys = query.distinct - KeyValueResult.fromPairs(uniqueKeys) { - database.select(dbQuery, builder, selectParams(uniqueKeys): _*) map postProcess - } - } - } -} - -/** - * A thin trait for async interaction with a database. - */ -trait Database { - def select[A](query: String, builder: Builder[A], params: Any*): Future[Seq[A]] - def selectOne[A](query: String, builder: Builder[A], params: Any*): Future[Option[A]] - def execute(query: String, params: Any*): Future[Int] - def insert(query: String, params: Any*): Future[Long] - def release(): Unit -} - -object NullDatabase extends Database { - override def select[Unit](query: String, builder: Builder[Unit], params: Any*) = - Future.value(Seq.empty[Unit]) - - override def selectOne[Unit](query: String, builder: Builder[Unit], params: Any*) = - Future.value(None) - - override def release() = () - - override def execute(query: String, params: Any*) = - Future.value(0) - - override def insert(query: String, params: Any*) = - Future.value(0) -} - -object AggregateByKey { - def apply[K, R, A]( - extractKey: R => K, - reduce: Seq[R] => A, - pruneDuplicates: Boolean = false - ) = new AggregateByKey(extractKey, reduce, pruneDuplicates) - - /** - * In the event that the item type (V) does not carry an aggregation key then we can have - * the Builder return a tuple with some id attached. If that is done then each Row from the - * builder will look something like (SomeGroupId, SomeRowObject). Because we tend to minimize - * data duplication this seems to be a pretty common pattern and can be seen in - * SavedSearchesRepository, FacebookConnectionsRepository, and UserToRoleRepository. - * - * @tparam K - * The type for the key - * @tparam V - * The type of a single element of the list - * @tparam A - * The object we'll aggregate list items into - * @param reduce - * A function that combines a seq of V into A - * @param pruneDuplicates - * If set this ensures that, at most, one instance of any given V will be passed into reduce. - */ - def withKeyValuePairs[K, V, A]( - reduce: Seq[V] => A, - pruneDuplicates: Boolean - ): AggregateByKey[K, (K, V), A] = - new AggregateByKey( - { case (k, _) => k }, - values => reduce(values map { case (_, v) => v }), - pruneDuplicates - ) -} - -/** - * Basic aggregator that extracts keys from a Row, groups into a Seq by those keys, and - * performs some reduction step to mash those into an aggregated object. Order is not - * necessarily kept between the retrieving rows from the database and passing them into - * reduce. - * - * @tparam K - * the type used by the item on which we aggregate rows - * - * @tparam R - * object that a single row of the query will be represented as - * - * @tparam A - * what we collect groups of R into - * - * @param extractKey - * function to extract a key from a row object - * - * @param reduce - * function that can take a sequence of rows and combine them into an aggregate - * - * @param pruneDuplicates - * if set this will ensure that at most one copy of each R will be passed into reduce (as - * determined by R's equal method) but will pass the input through a set which will - * likely lose ordering. - */ -class AggregateByKey[K, R, A]( - extractKey: R => K, - reduce: Seq[R] => A, - pruneDuplicates: Boolean = false) - extends (Seq[R] => Seq[(K, A)]) { - override def apply(input: Seq[R]): Seq[(K, A)] = { - val collectionMap = new HashMap[K, Growable[R] with Iterable[R]] - - def emptyCollection: Growable[R] with Iterable[R] = - if (pruneDuplicates) { - new HashSet[R] - } else { - new ListBuffer[R] - } - - input foreach { element => - (collectionMap.getOrElseUpdate(extractKey(element), emptyCollection)) += element - } - - collectionMap map { - case (key, items) => - key -> reduce(items toSeq) - } toSeq - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/package.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/package.docx new file mode 100644 index 000000000..a212b6543 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/package.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/package.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/package.scala deleted file mode 100644 index 6a1f41437..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/package.scala +++ /dev/null @@ -1,19 +0,0 @@ -package com.twitter.servo - -import com.twitter.util.Future -import java.sql.ResultSet - -package object database { - type DatabaseFactory = (() => Database) - - /** - * A function type for translating ResultSets into objects of the result type A. - */ - type Builder[A] = ResultSet => A - - /** - * A function type for asynchronously translating ResultSets into objects - * of the result type A. - */ - type FutureBuilder[A] = Builder[Future[A]] -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/hydrator/KeyValueHydrator.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/hydrator/KeyValueHydrator.docx new file mode 100644 index 000000000..ab6a6f75d Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/hydrator/KeyValueHydrator.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/hydrator/KeyValueHydrator.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/hydrator/KeyValueHydrator.scala deleted file mode 100644 index 67feab329..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/hydrator/KeyValueHydrator.scala +++ /dev/null @@ -1,155 +0,0 @@ -package com.twitter.servo.hydrator - -import com.twitter.servo.data.Mutation -import com.twitter.servo.util.{Effect, Gate} -import com.twitter.servo.repository._ -import com.twitter.util.{Future, Return, Try} - -object KeyValueHydrator { - // KeyValueHydrator extends this function type - type FunctionType[Q, K, V] = (Q, Future[KeyValueResult[K, V]]) => Future[Mutation[V]] - type Filter[Q, K, V] = (Q, Future[KeyValueResult[K, V]]) => Future[Boolean] - - private[this] val _unit = fromMutation[Any, Any, Any](Mutation.unit[Any]) - - /** - * A no-op hydrator. Forms a monoid with `also`. - */ - def unit[Q, K, V]: KeyValueHydrator[Q, K, V] = - _unit.asInstanceOf[KeyValueHydrator[Q, K, V]] - - /** - * Packages a function as a KeyValueHydrator - */ - def apply[Q, K, V](f: FunctionType[Q, K, V]): KeyValueHydrator[Q, K, V] = - new KeyValueHydrator[Q, K, V] { - override def apply(query: Q, futureResults: Future[KeyValueResult[K, V]]) = - f(query, futureResults) - } - - /** - * Creates a new KeyValueHydrator out of several underlying KVHydrators. The - * apply method is called on each KeyValueHydrator with the same - * futureResults, allowing each to kick-off some asynchronous work - * to produce a future Hydrated[Mutation]. When all the future - * Hydrated[Mutation]s are available, the results are folded, - * left-to-right, over the mutations, to build up the final - * results. - */ - def inParallel[Q, K, V](hydrators: KeyValueHydrator[Q, K, V]*): KeyValueHydrator[Q, K, V] = - KeyValueHydrator[Q, K, V] { (query, futureResults) => - val futureMutations = hydrators map { t => - t(query, futureResults) - } - Future.collect(futureMutations) map Mutation.all - } - - def const[Q, K, V](futureMutation: Future[Mutation[V]]): KeyValueHydrator[Q, K, V] = - KeyValueHydrator[Q, K, V] { (_, _) => - futureMutation - } - - def fromMutation[Q, K, V](mutation: Mutation[V]): KeyValueHydrator[Q, K, V] = - const[Q, K, V](Future.value(mutation)) -} - -/** - * A KeyValueHydrator builds a Mutation to be applied to the values in a KeyValueResult, but does - * not itself apply the Mutation. This allows several KeyValueHydrators to be composed together to - * begin their work in parallel to build the Mutations, which can then be combined and applied - * to the results later (see asRepositoryFilter). - * - * Forms a monoid with KeyValueHydrator.unit as unit and `also` as the combining function. - */ -trait KeyValueHydrator[Q, K, V] extends KeyValueHydrator.FunctionType[Q, K, V] { - protected[this] val unitMutation = Mutation.unit[V] - protected[this] val futureUnitMutation = Future.value(unitMutation) - - /** - * Combines two KeyValueHydrators. Forms a monoid with KeyValueHydator.unit - */ - def also(next: KeyValueHydrator[Q, K, V]): KeyValueHydrator[Q, K, V] = - KeyValueHydrator.inParallel(this, next) - - /** - * Turns a single KeyValueHydrator into a RepositoryFilter by applying the Mutation to - * found values in the KeyValueResult. If the mutation throws an exception, it will - * be caught and the resulting key/value paired moved to the failed map of the resulting - * KeyValueResult. - */ - lazy val asRepositoryFilter: RepositoryFilter[Q, KeyValueResult[K, V], KeyValueResult[K, V]] = - (query, futureResults) => { - this(query, futureResults) flatMap { mutation => - val update = mutation.endo - futureResults map { results => - results.mapValues { - case Return(Some(value)) => Try(Some(update(value))) - case x => x - } - } - } - } - - /** - * Apply this hydrator to the result of a repository. - */ - def hydratedBy_:(repo: KeyValueRepository[Q, K, V]): KeyValueRepository[Q, K, V] = - Repository.composed(repo, asRepositoryFilter) - - /** - * Return a new hydrator that applies the same mutation as this - * hydrator, but can be enabled/disabled or dark enabled/disabled via Gates. The light - * gate takes precedence over the dark gate. This allows you to go from 0%->100% dark, - * and then from 0%->100% light without affecting backend traffic. - */ - @deprecated("Use enabledBy(() => Boolean, () => Boolean)", "2.5.1") - def enabledBy(light: Gate[Unit], dark: Gate[Unit] = Gate.False): KeyValueHydrator[Q, K, V] = - enabledBy( - { () => - light() - }, - { () => - dark() - }) - - /** - * Return a new hydrator that applies the same mutation as this - * hydrator, but can be enabled/disabled or dark enable/disabled via nullary boolean functions. - * The light function takes precedence over the dark function. - * This allows you to go from 0%->100% dark, and then from 0%->100% light - * without affecting backend traffic. - */ - def enabledBy(light: () => Boolean, dark: () => Boolean): KeyValueHydrator[Q, K, V] = - KeyValueHydrator[Q, K, V] { (query, futureResults) => - val isLight = light() - val isDark = !isLight && dark() - if (!isLight && !isDark) { - futureUnitMutation - } else { - this(query, futureResults) map { - case mutation if isLight => mutation - case mutation if isDark => mutation.dark - } - } - } - - /** - * Build a new hydrator that will return the same result as the current hydrator, - * but will additionally perform the supplied effect on the result of hydration. - */ - def withEffect(effect: Effect[Option[V]]): KeyValueHydrator[Q, K, V] = - KeyValueHydrator[Q, K, V] { (query, futureResults) => - this(query, futureResults) map { _ withEffect effect } - } - - /** - * Builds a new hydrator that only attempt to hydrate if the - * supplied filter returns true. - */ - def filter(predicate: KeyValueHydrator.Filter[Q, K, V]): KeyValueHydrator[Q, K, V] = - KeyValueHydrator[Q, K, V] { (q, r) => - predicate(q, r) flatMap { t => - if (t) this(q, r) else futureUnitMutation - } - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/keyvalue/KeyValueResult.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/keyvalue/KeyValueResult.docx new file mode 100644 index 000000000..b6341bea2 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/keyvalue/KeyValueResult.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/keyvalue/KeyValueResult.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/keyvalue/KeyValueResult.scala deleted file mode 100644 index bcf49efb8..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/keyvalue/KeyValueResult.scala +++ /dev/null @@ -1,473 +0,0 @@ -package com.twitter.servo.keyvalue - -import com.twitter.finagle.memcached.util.NotFound -import com.twitter.util.{Future, Return, Throw, Try} -import scala.collection.immutable - -object KeyValueResult { - private[this] val Empty = KeyValueResult() - private[this] val EmptyFuture = Future.value(Empty) - - def empty[K, V]: KeyValueResult[K, V] = - Empty.asInstanceOf[KeyValueResult[K, V]] - - def emptyFuture[K, V]: Future[KeyValueResult[K, V]] = - EmptyFuture.asInstanceOf[Future[KeyValueResult[K, V]]] - - /** - * Builds a KeyValueResult using pairs of keys to Try[Option[V]]. These values are split - * out to build the separate found/notFound/failed collections. - */ - def build[K, V](data: (K, Try[Option[V]])*): KeyValueResult[K, V] = { - val bldr = new KeyValueResultBuilder[K, V] - data.foreach { case (k, v) => bldr.update(k, v) } - bldr.result() - } - - /** - * Builds a future KeyValueResult using a future sequence of key-value tuples. That - * sequence does not necessarily match up with the sequence of keys provided. The - * sequence of pairs represent the found results. notFound will be filled in from the - * missing keys. - */ - def fromPairs[K, V]( - keys: Iterable[K] = Nil: immutable.Nil.type - )( - futurePairs: Future[TraversableOnce[(K, V)]] - ): Future[KeyValueResult[K, V]] = { - fromMap(keys) { - futurePairs map { _.toMap } - } - } - - /** - * Builds a future KeyValueResult using a future map of found results. notFound will be filled - * in from the missing keys. - */ - def fromMap[K, V]( - keys: Iterable[K] = Nil: immutable.Nil.type - )( - futureMap: Future[Map[K, V]] - ): Future[KeyValueResult[K, V]] = { - futureMap map { found => - KeyValueResult[K, V](found = found, notFound = NotFound(keys.toSet, found.keySet)) - } handle { - case t => - KeyValueResult[K, V](failed = keys.map { _ -> t }.toMap) - } - } - - /** - * Builds a future KeyValueResult using a future sequence of optional results. That - * sequence must match up pair-wise with the given sequence of keys. A value of Some[V] is - * counted as a found result, a value of None is counted as a notFound result. - */ - def fromSeqOption[K, V]( - keys: Iterable[K] - )( - futureSeq: Future[Seq[Option[V]]] - ): Future[KeyValueResult[K, V]] = { - futureSeq map { seq => - keys.zip(seq).foldLeft(new KeyValueResultBuilder[K, V]) { - case (bldr, (key, tryRes)) => - tryRes match { - case Some(value) => bldr.addFound(key, value) - case None => bldr.addNotFound(key) - } - } result () - } handle { - case t => - KeyValueResult[K, V](failed = keys.map { _ -> t }.toMap) - } - } - - /** - * Builds a future KeyValueResult using a future sequence of Try results. That - * sequence must match up pair-wise with the given sequence of keys. A value of Return[V] is - * counted as a found result, a value of Throw is counted as a failed result. - */ - def fromSeqTry[K, V]( - keys: Iterable[K] - )( - futureSeq: Future[Seq[Try[V]]] - ): Future[KeyValueResult[K, V]] = { - futureSeq map { seq => - keys.zip(seq).foldLeft(new KeyValueResultBuilder[K, V]) { - case (bldr, (key, tryRes)) => - tryRes match { - case Return(value) => bldr.addFound(key, value) - case Throw(t) => bldr.addFailed(key, t) - } - } result () - } handle { - case t => - KeyValueResult[K, V](failed = keys.map { _ -> t }.toMap) - } - } - - /** - * Builds a future KeyValueResult using a sequence of future options. That sequence must - * match up pair-wise with the given sequence of keys. A value of Some[V] is - * counted as a found result, a value of None is counted as a notFound result. - */ - def fromSeqFuture[K, V]( - keys: Iterable[K] - )( - futureSeq: Seq[Future[Option[V]]] - ): Future[KeyValueResult[K, V]] = { - fromSeqTryOptions(keys) { - Future.collect { - futureSeq map { _.transform(Future(_)) } - } - } - } - - /** - * Builds a future KeyValueResult using a future sequence of Try[Option[V]]. That sequence must - * match up pair-wise with the given sequence of keys. A value of Return[Some[V]] is - * counted as a found result, a value of Return[None] is counted as a notFound result, and a value - * of Throw[V] is counted as a failed result. - */ - def fromSeqTryOptions[K, V]( - keys: Iterable[K] - )( - futureSeq: Future[Seq[Try[Option[V]]]] - ): Future[KeyValueResult[K, V]] = { - futureSeq map { seq => - keys.zip(seq).foldLeft(new KeyValueResultBuilder[K, V]) { - case (bldr, (key, tryRes)) => - tryRes match { - case Return(Some(value)) => bldr.addFound(key, value) - case Return(None) => bldr.addNotFound(key) - case Throw(t) => bldr.addFailed(key, t) - } - } result () - } handle { - case t => - KeyValueResult[K, V](failed = keys.map { _ -> t }.toMap) - } - } - - /** - * Builds a future KeyValueResult using a future map with value Try[Option[V]]. A value of - * Return[Some[V]] is counted as a found result, a value of Return[None] is counted as a notFound - * result, and a value of Throw[V] is counted as a failed result. - * - * notFound will be filled in from the missing keys. Exceptions will be handled by counting all - * keys as failed. Values that are in map but not keys will be ignored. - */ - def fromMapTryOptions[K, V]( - keys: Iterable[K] - )( - futureMapTryOptions: Future[Map[K, Try[Option[V]]]] - ): Future[KeyValueResult[K, V]] = { - futureMapTryOptions map { mapTryOptions => - keys.foldLeft(new KeyValueResultBuilder[K, V]) { - case (builder, key) => - mapTryOptions.get(key) match { - case Some(Return(Some(value))) => builder.addFound(key, value) - case Some(Return(None)) | None => builder.addNotFound(key) - case Some(Throw(failure)) => builder.addFailed(key, failure) - } - } result () - } handle { - case t => - KeyValueResult[K, V](failed = keys.map { _ -> t }.toMap) - } - } - - /** - * Reduces several KeyValueResults down to just 1, by combining as if by ++, but - * more efficiently with fewer intermediate results. - */ - def sum[K, V](results: Iterable[KeyValueResult[K, V]]): KeyValueResult[K, V] = { - val bldr = new KeyValueResultBuilder[K, V] - - results foreach { result => - bldr.addFound(result.found) - bldr.addNotFound(result.notFound) - bldr.addFailed(result.failed) - } - - val res = bldr.result() - - if (res.notFound.isEmpty && res.failed.isEmpty) { - res - } else { - val foundKeySet = res.found.keySet - val notFound = NotFound(res.notFound, foundKeySet) - val failed = NotFound(NotFound(res.failed, foundKeySet), res.notFound) - KeyValueResult(res.found, notFound, failed) - } - } -} - -case class KeyValueResult[K, +V]( - found: Map[K, V] = Map.empty[K, V]: immutable.Map[K, V], - notFound: Set[K] = Set.empty[K]: immutable.Set[K], - failed: Map[K, Throwable] = Map.empty[K, Throwable]: immutable.Map[K, Throwable]) - extends Iterable[(K, Try[Option[V]])] { - - /** - * A cheaper implementation of isEmpty than the default which relies - * on building an iterator. - */ - override def isEmpty = found.isEmpty && notFound.isEmpty && failed.isEmpty - - /** - * map over the keyspace to produce a new KeyValueResult - */ - def mapKeys[K2](f: K => K2): KeyValueResult[K2, V] = - copy( - found = found.map { case (k, v) => f(k) -> v }, - notFound = notFound.map(f), - failed = failed.map { case (k, t) => f(k) -> t } - ) - - /** - * Maps over found values to produce a new KeyValueResult. If the given function throws an - * exception for a particular value, that value will be moved to the `failed` bucket with - * the thrown exception. - */ - def mapFound[V2](f: V => V2): KeyValueResult[K, V2] = { - val builder = new KeyValueResultBuilder[K, V2]() - - found.foreach { - case (k, v) => - builder.update(k, Try(Some(f(v)))) - } - builder.addNotFound(notFound) - builder.addFailed(failed) - - builder.result() - } - - /** - * map over the values provided by the iterator, to produce a new KeyValueResult - */ - def mapValues[V2](f: Try[Option[V]] => Try[Option[V2]]): KeyValueResult[K, V2] = { - val builder = new KeyValueResultBuilder[K, V2]() - - found.foreach { - case (k, v) => - builder.update(k, f(Return(Some(v)))) - } - notFound.foreach { k => - builder.update(k, f(Return.None)) - } - failed.foreach { - case (k, t) => - builder.update(k, f(Throw(t))) - } - - builder.result() - } - - /** - * Map over found values to create a new KVR with the existing notFound and failed keys intact. - */ - def mapFoundValues[V2](f: V => Try[Option[V2]]): KeyValueResult[K, V2] = { - val builder = new KeyValueResultBuilder[K, V2]() - - found.foreach { - case (k, v) => builder.update(k, f(v)) - } - builder.addNotFound(notFound) - builder.addFailed(failed) - - builder.result() - } - - /** - * map over the pairs of results, creating a new KeyValueResult based on the returned - * tuples from the provided function. - */ - def mapPairs[K2, V2](f: (K, Try[Option[V]]) => (K2, Try[Option[V2]])): KeyValueResult[K2, V2] = { - val builder = new KeyValueResultBuilder[K2, V2] - - def update(k: K, v: Try[Option[V]]): Unit = - f(k, v) match { - case (k2, v2) => builder.update(k2, v2) - } - - found.foreach { - case (k, v) => - update(k, Return(Some(v))) - } - notFound.foreach { k => - update(k, Return.None) - } - failed.foreach { - case (k, t) => - update(k, Throw(t)) - } - - builder.result() - } - - /** - * filter the KeyValueResult, to produce a new KeyValueResult - */ - override def filter(p: ((K, Try[Option[V]])) => Boolean): KeyValueResult[K, V] = { - val builder = new KeyValueResultBuilder[K, V] - - def update(k: K, v: Try[Option[V]]): Unit = { - if (p((k, v))) - builder.update(k, v) - } - - found.foreach { - case (k, v) => - update(k, Return(Some(v))) - } - notFound.foreach { k => - update(k, Return.None) - } - failed.foreach { - case (k, t) => - update(k, Throw(t)) - } - - builder.result() - } - - /** - * filterNot the KeyValueResult, to produce a new KeyValueResult - */ - override def filterNot(p: ((K, Try[Option[V]])) => Boolean): KeyValueResult[K, V] = { - filter(!p(_)) - } - - /** - * Returns an Iterator that yields all found, notFound, and failed values - * represented in the combined Try[Option[V]] type. - */ - def iterator: Iterator[(K, Try[Option[V]])] = - (found.iterator map { case (k, v) => k -> Return(Some(v)) }) ++ - (notFound.iterator map { k => - k -> Return.None - }) ++ - (failed.iterator map { case (k, t) => k -> Throw(t) }) - - /** - * Returns a copy in which all failed entries are converted to misses. The specific - * failure information is lost. - */ - def convertFailedToNotFound = - copy( - notFound = notFound ++ failed.keySet, - failed = Map.empty[K, Throwable] - ) - - /** - * Returns a copy in which all not-found entries are converted to failures. - */ - def convertNotFoundToFailed(f: K => Throwable) = - copy( - notFound = Set.empty[K], - failed = failed ++ (notFound map { k => - k -> f(k) - }) - ) - - /** - * Returns a copy in which failures are repaired with the supplied handler - */ - def repairFailed[V2 >: V](handler: PartialFunction[Throwable, Option[V2]]) = - if (failed.isEmpty) { - this - } else { - val builder = new KeyValueResultBuilder[K, V2] - builder.addFound(found) - builder.addNotFound(notFound) - failed map { case (k, t) => builder.update(k, Throw(t) handle handler) } - builder.result() - } - - /** - * Combines two KeyValueResults. Conflicting founds/notFounds are resolved - * as founds, and conflicting (found|notFound)/failures are resolved as (found|notFound). - */ - def ++[K2 >: K, V2 >: V](that: KeyValueResult[K2, V2]): KeyValueResult[K2, V2] = { - if (this.isEmpty) that - else if (that.isEmpty) this.asInstanceOf[KeyValueResult[K2, V2]] - else { - val found = this.found ++ that.found - val notFound = NotFound(this.notFound ++ that.notFound, found.keySet) - val failed = NotFound(NotFound(this.failed ++ that.failed, found.keySet), notFound) - KeyValueResult(found, notFound, failed) - } - } - - /** - * Looks up a result for a key. - */ - def apply(key: K): Try[Option[V]] = { - found.get(key) match { - case some @ Some(_) => Return(some) - case None => - failed.get(key) match { - case Some(t) => Throw(t) - case None => Return.None - } - } - } - - /** - * Looks up a result for a key, returning a provided default if the key is not - * found or failed. - */ - def getOrElse[V2 >: V](key: K, default: => V2): V2 = - found.getOrElse(key, default) - - /** - * If any keys fail, will return the first failure. Otherwise, - * will convert founds/notFounds to a Seq[Option[V]], ordered by - * the keys provided - */ - def toFutureSeqOfOptions(keys: Seq[K]): Future[Seq[Option[V]]] = { - failed.values.headOption match { - case Some(t) => Future.exception(t) - case None => Future.value(keys.map(found.get)) - } - } - - // This is unfortunate, but we end up pulling in Iterable's toString, - // which is not all that readable. - override def toString(): String = { - val sb = new StringBuilder(256) - sb.append("KeyValueResult(") - sb.append("found = ") - sb.append(found) - sb.append(", notFound = ") - sb.append(notFound) - sb.append(", failed = ") - sb.append(failed) - sb.append(')') - sb.toString() - } -} - -class KeyValueResultBuilder[K, V] { - private[this] val found = Map.newBuilder[K, V] - private[this] val notFound = Set.newBuilder[K] - private[this] val failed = Map.newBuilder[K, Throwable] - - def addFound(k: K, v: V) = { found += (k -> v); this } - def addNotFound(k: K) = { notFound += k; this } - def addFailed(k: K, t: Throwable) = { failed += (k -> t); this } - - def addFound(kvs: Iterable[(K, V)]) = { found ++= kvs; this } - def addNotFound(ks: Iterable[K]) = { notFound ++= ks; this } - def addFailed(kts: Iterable[(K, Throwable)]) = { failed ++= kts; this } - - def update(k: K, tryV: Try[Option[V]]) = { - tryV match { - case Throw(t) => addFailed(k, t) - case Return(None) => addNotFound(k) - case Return(Some(v)) => addFound(k, v) - } - } - - def result() = KeyValueResult(found.result(), notFound.result(), failed.result()) -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingCounterKeyValueRepository.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingCounterKeyValueRepository.docx new file mode 100644 index 000000000..edad81f79 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingCounterKeyValueRepository.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingCounterKeyValueRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingCounterKeyValueRepository.scala deleted file mode 100644 index 40f69b81a..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingCounterKeyValueRepository.scala +++ /dev/null @@ -1,44 +0,0 @@ -package com.twitter.servo.repository - -import com.twitter.servo.cache._ -import com.twitter.util.Future - -class CachingCounterKeyValueRepository[K]( - underlying: CounterKeyValueRepository[K], - cache: CounterCache[K], - observer: CacheObserver = NullCacheObserver) - extends CounterKeyValueRepository[K] { - - def apply(keys: Seq[K]): Future[KeyValueResult[K, Long]] = { - val uniqueKeys = keys.distinct - cache.get(uniqueKeys) flatMap { cachedResults => - recordResults(cachedResults) - - val missed = cachedResults.notFound ++ cachedResults.failed.keySet - readThrough(missed.toSeq) map { readResults => - KeyValueResult(cachedResults.found) ++ readResults - } - } - } - - private def readThrough(keys: Seq[K]): Future[KeyValueResult[K, Long]] = - if (keys.isEmpty) { - KeyValueResult.emptyFuture - } else { - underlying(keys) onSuccess { readResults => - for ((k, v) <- readResults.found) { - cache.add(k, v) - } - } - } - - private def recordResults(cachedResults: KeyValueResult[K, Long]): Unit = { - cachedResults.found.keys foreach { key => - observer.hit(key.toString) - } - cachedResults.notFound foreach { key => - observer.miss(key.toString) - } - observer.failure(cachedResults.failed.size) - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingKeyValueRepository.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingKeyValueRepository.docx new file mode 100644 index 000000000..7f3779389 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingKeyValueRepository.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingKeyValueRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingKeyValueRepository.scala deleted file mode 100644 index fe6e257d2..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingKeyValueRepository.scala +++ /dev/null @@ -1,736 +0,0 @@ -package com.twitter.servo.repository - -import com.twitter.logging.{Level, Logger} -import com.twitter.servo.cache._ -import com.twitter.servo.util.{Effect, Gate, RateLimitingLogger} -import com.twitter.util._ -import scala.collection.mutable -import scala.util.Random - -/** - * A set of classes that indicate how to handle cached results. - */ -sealed abstract class CachedResultAction[+V] - -object CachedResultAction { - - /** Indicates a key should be fetched from the underlying repo */ - case object HandleAsMiss extends CachedResultAction[Nothing] - - /** Indicates a key should be returned as not-found, and not fetched from the underlying repo */ - case object HandleAsNotFound extends CachedResultAction[Nothing] - - /** Indicates the value should be returned as found */ - case class HandleAsFound[V](value: V) extends CachedResultAction[V] - - /** Indicates the value should not be cached */ - case object HandleAsDoNotCache extends CachedResultAction[Nothing] - - /** Indicates that the given action should be applied, and the given function applied to the resulting value */ - case class TransformSubAction[V](action: CachedResultAction[V], f: V => V) - extends CachedResultAction[V] - - /** Indicates the key should be returned as a failure */ - case class HandleAsFailed(t: Throwable) extends CachedResultAction[Nothing] - - /** Indicates that the value should be refetched asynchronously, be immediately treated - * as the given action. */ - case class SoftExpiration[V](action: CachedResultAction[V]) extends CachedResultAction[V] -} - -/** - * A set of classes representing the various states for a cached result. - */ -sealed abstract class CachedResult[+K, +V] { - def key: K -} - -object CachedResult { - import CachedResultAction._ - - /** Indicates the key was not in cache */ - case class NotFound[K](key: K) extends CachedResult[K, Nothing] - - /** Indicates there was an error fetching the key */ - case class Failed[K](key: K, t: Throwable) extends CachedResult[K, Nothing] - - /** Indicates the cached value could not be deserialized */ - case class DeserializationFailed[K](key: K) extends CachedResult[K, Nothing] - - /** Indicates the cached value could not be serialized */ - case class SerializationFailed[K](key: K) extends CachedResult[K, Nothing] - - /** Indicates that a NotFound tombstone was found in cached */ - case class CachedNotFound[K]( - key: K, - cachedAt: Time, - softTtlStep: Option[Short] = None) - extends CachedResult[K, Nothing] - - /** Indicates that a Deleted tombstone was found in cached */ - case class CachedDeleted[K]( - key: K, - cachedAt: Time, - softTtlStep: Option[Short] = None) - extends CachedResult[K, Nothing] - - /** Indicates that value was found in cached */ - case class CachedFound[K, V]( - key: K, - value: V, - cachedAt: Time, - softTtlStep: Option[Short] = None) - extends CachedResult[K, V] - - /** Indicates that value should not be cached until */ - case class DoNotCache[K](key: K, until: Option[Time]) extends CachedResult[K, Nothing] - - type Handler[K, V] = CachedResult[K, V] => CachedResultAction[V] - - type PartialHandler[K, V] = CachedResult[K, V] => Option[CachedResultAction[V]] - - type HandlerFactory[Q, K, V] = Q => Handler[K, V] - - /** - * companion object for Handler type - */ - object Handler { - - /** - * terminate a PartialHandler to produce a new Handler - */ - def apply[K, V]( - partial: PartialHandler[K, V], - handler: Handler[K, V] = defaultHandler[K, V] - ): Handler[K, V] = { cachedResult => - partial(cachedResult) match { - case Some(s) => s - case None => handler(cachedResult) - } - } - } - - /** - * companion object for PartialHandler type - */ - object PartialHandler { - - /** - * Sugar to produce a PartialHandler from a PartialFunction. Successive calls to - * isDefined MUST return the same result. Otherwise, take the syntax hit and wire - * up your own PartialHandler. - */ - def apply[K, V]( - partial: PartialFunction[CachedResult[K, V], CachedResultAction[V]] - ): PartialHandler[K, V] = partial.lift - - /** - * chain one PartialHandler after another to produce a new PartialHandler - */ - def orElse[K, V]( - thisHandler: PartialHandler[K, V], - thatHandler: PartialHandler[K, V] - ): PartialHandler[K, V] = { cachedResult => - thisHandler(cachedResult) match { - case some @ Some(_) => some - case None => thatHandler(cachedResult) - } - } - } - - /** - * companion object for HandlerFactory type - */ - object HandlerFactory { - def apply[Q, K, V](handler: Handler[K, V]): HandlerFactory[Q, K, V] = _ => handler - } - - def defaultHandlerFactory[Q, K, V]: HandlerFactory[Q, K, V] = - HandlerFactory[Q, K, V](defaultHandler) - - /** - * This is the default Handler. Failures are treated as misses. - */ - def defaultHandler[K, V]: Handler[K, V] = { - case NotFound(_) | Failed(_, _) => HandleAsMiss - case DeserializationFailed(_) | SerializationFailed(_) => HandleAsMiss - case CachedNotFound(_, _, _) | CachedDeleted(_, _, _) => HandleAsNotFound - case CachedFound(_, value, _, _) => HandleAsFound(value) - case DoNotCache(_, Some(time)) if Time.now > time => HandleAsMiss - case DoNotCache(_, _) => HandleAsDoNotCache - } - - /** - * A PartialHandler that bubbles memcache failures up instead of converting - * those failures to misses. - */ - def failuresAreFailures[K, V] = PartialHandler[K, V] { - case Failed(_, t) => HandleAsFailed(t) - } - - /** - * A PartialHandler that doesn't attempt to write back to cache if the initial - * cache read failed, but still fetches from the underlying repo. - */ - def failuresAreDoNotCache[K, V] = PartialHandler[K, V] { - case Failed(_, _) => HandleAsDoNotCache - } - - /** - * A function that takes a cachedAt time and ttl, and returns an expiry time. This function - * _must_ be deterministic with respect to the arguments provided, otherwise, you might get a - * MatchError when using this with softTtlExpiration. - */ - type Expiry = (Time, Duration) => Time - - /** - * An Expiry function with an epsilon of zero. - */ - val fixedExpiry: Expiry = (cachedAt: Time, ttl: Duration) => cachedAt + ttl - - /** - * A repeatable "random" expiry function that perturbs the ttl with a random value - * no greater than +/-(ttl * maxFactor). - */ - def randomExpiry(maxFactor: Float): Expiry = { - if (maxFactor == 0) { - fixedExpiry - } else { (cachedAt: Time, ttl: Duration) => - { - val factor = (2 * new Random(cachedAt.inMilliseconds).nextFloat - 1) * maxFactor - cachedAt + ttl + Duration.fromNanoseconds((factor * ttl.inNanoseconds).toLong) - } - } - } - - /** - * soft-expires CachedFound and CachedNotFound based on a ttl. - * - * @param ttl - * values older than this will be considered expired, but still - * returned, and asynchronously refreshed in cache. - * @param expiry - * (optional) function to compute the expiry time - */ - def softTtlExpiration[K, V]( - ttl: Duration, - expiry: Expiry = fixedExpiry - ): PartialHandler[K, V] = - softTtlExpiration(_ => ttl, expiry) - - /** - * soft-expires CachedFound and CachedNotFound based on a ttl derived from the value - * - * @param ttl - * values older than this will be considered expired, but still - * returned, and asynchronously refreshed in cache. - * @param expiry - * (optional) function to compute the expiry time - */ - def softTtlExpiration[K, V]( - ttl: Option[V] => Duration, - expiry: Expiry - ): PartialHandler[K, V] = PartialHandler[K, V] { - case CachedFound(_, value, cachedAt, _) if expiry(cachedAt, ttl(Some(value))) < Time.now => - SoftExpiration(HandleAsFound(value)) - case CachedNotFound(_, cachedAt, _) if expiry(cachedAt, ttl(None)) < Time.now => - SoftExpiration(HandleAsNotFound) - } - - /** - * soft-expires CachedFound and CachedNotFound based on a ttl derived from both the value - * and the softTtlStep - * - * @param ttl - * values older than this will be considered expired, but still returned, and - * asynchronously refreshed in cache. - * @param expiry - * (optional) function to compute the expiry time - */ - def steppedSoftTtlExpiration[K, V]( - ttl: (Option[V], Option[Short]) => Duration, - expiry: Expiry = fixedExpiry - ): PartialHandler[K, V] = PartialHandler[K, V] { - case CachedFound(_, value, cachedAt, softTtlStep) - if expiry(cachedAt, ttl(Some(value), softTtlStep)) < Time.now => - SoftExpiration(HandleAsFound(value)) - case CachedNotFound(_, cachedAt, softTtlStep) - if expiry(cachedAt, ttl(None, softTtlStep)) < Time.now => - SoftExpiration(HandleAsNotFound) - case CachedDeleted(_, cachedAt, softTtlStep) - if expiry(cachedAt, ttl(None, softTtlStep)) < Time.now => - SoftExpiration(HandleAsNotFound) - } - - /** - * hard-expires CachedFound and CachedNotFound based on a ttl. - * - * @param ttl - * values older than this will be considered a miss - * @param expiry - * (optional) function to compute the expiry time - */ - def hardTtlExpiration[K, V]( - ttl: Duration, - expiry: Expiry = fixedExpiry - ): PartialHandler[K, V] = - hardTtlExpiration(_ => ttl, expiry) - - /** - * hard-expires CachedFound and CachedNotFound based on a ttl derived from the value - * - * @param ttl - * values older than this will be considered a miss - * @param expiry - * (optional) function to compute the expiry time - */ - def hardTtlExpiration[K, V]( - ttl: Option[V] => Duration, - expiry: Expiry - ): PartialHandler[K, V] = PartialHandler[K, V] { - case CachedFound(_, value, cachedAt, _) if expiry(cachedAt, ttl(Some(value))) < Time.now => - HandleAsMiss - case CachedNotFound(_, cachedAt, _) if expiry(cachedAt, ttl(None)) < Time.now => - HandleAsMiss - } - - /** - * hard-expires a CachedNotFound tombstone based on a ttl - * - * @param ttl - * values older than this will be considered expired - * @param expiry - * (optional) function to compute the expiry time - */ - def notFoundHardTtlExpiration[K, V]( - ttl: Duration, - expiry: Expiry = fixedExpiry - ): PartialHandler[K, V] = PartialHandler[K, V] { - case CachedNotFound(_, cachedAt, _) => - if (expiry(cachedAt, ttl) < Time.now) - HandleAsMiss - else - HandleAsNotFound - } - - /** - * hard-expires a CachedDeleted tombstone based on a ttl - * - * @param ttl - * values older than this will be considered expired - * @param expiry - * (optional) function to compute the expiry time - */ - def deletedHardTtlExpiration[K, V]( - ttl: Duration, - expiry: Expiry = fixedExpiry - ): PartialHandler[K, V] = PartialHandler[K, V] { - case CachedDeleted(_, cachedAt, _) => - if (expiry(cachedAt, ttl) < Time.now) - HandleAsMiss - else - HandleAsNotFound - } - - /** - * read only from cache, never fall back to underlying KeyValueRepository - */ - def cacheOnly[K, V]: Handler[K, V] = { - case CachedFound(_, value, _, _) => HandleAsFound(value) - case _ => HandleAsNotFound - } - - /** - * use either primary or backup Handler, depending on usePrimary result - * - * @param primaryHandler - * the handler to be used if usePrimary evaluates to true - * @param backupHandler - * the handle to be used if usePrimary evaluates to false - * @param usePrimary - * evaluates the query to determine which handler to use - */ - def switchedHandlerFactory[Q, K, V]( - primaryHandler: Handler[K, V], - backupHandler: Handler[K, V], - usePrimary: Q => Boolean - ): HandlerFactory[Q, K, V] = { query => - if (usePrimary(query)) - primaryHandler - else - backupHandler - } -} - -object CacheResultObserver { - case class CachingRepositoryResult[K, V]( - resultFromCache: KeyValueResult[K, Cached[V]], - resultFromCacheMissReadthrough: KeyValueResult[K, V], - resultFromSoftTtlReadthrough: KeyValueResult[K, V]) - def unit[K, V] = Effect.unit[CachingRepositoryResult[K, V]] -} - -object CachingKeyValueRepository { - type CacheResultObserver[K, V] = Effect[CacheResultObserver.CachingRepositoryResult[K, V]] -} - -/** - * Reads keyed values from a LockingCache, and reads through to an underlying - * KeyValueRepository for misses. supports a "soft ttl", beyond which values - * will be read through out-of-band to the originating request - * - * @param underlying - * the underlying KeyValueRepository - * @param cache - * the locking cache to read from - * @param newQuery - * a function for converting a subset of the keys of the original query into a new - * query. this is used to construct the query passed to the underlying repository - * to fetch the cache misses. - * @param handlerFactory - * A factory to produce functions that specify policies about how to handle results - * from cache. (i.e. to handle failures as misses vs failures, etc) - * @param picker - * used to choose between the value in cache and the value read from the DB when - * storing values in the cache - * @param observer - * a CacheObserver for collecting cache statistics* - * @param writeSoftTtlStep - * Write the soft_ttl_step value to indicate number of consistent reads from underlying store - * @param cacheResultObserver - * An [[Effect]] of type [[CacheResultObserver.CachingRepositoryResult]] which is useful for examining - * the results from the cache, underlying storage, and any later read-throughs. The effect is - * executed asynchronously from the request path and has no bearing on the Future[KeyValueResult]* - * returned from this Repository. - */ -class CachingKeyValueRepository[Q <: Seq[K], K, V]( - underlying: KeyValueRepository[Q, K, V], - val cache: LockingCache[K, Cached[V]], - newQuery: SubqueryBuilder[Q, K], - handlerFactory: CachedResult.HandlerFactory[Q, K, V] = - CachedResult.defaultHandlerFactory[Q, K, V], - picker: LockingCache.Picker[Cached[V]] = new PreferNewestCached[V]: PreferNewestCached[V], - observer: CacheObserver = NullCacheObserver, - writeSoftTtlStep: Gate[Unit] = Gate.False, - cacheResultObserver: CachingKeyValueRepository.CacheResultObserver[K, V] = - CacheResultObserver.unit[K, V]: Effect[CacheResultObserver.CachingRepositoryResult[K, V]]) - extends KeyValueRepository[Q, K, V] { - import CachedResult._ - import CachedResultAction._ - - protected[this] val log = Logger.get(getClass.getSimpleName) - private[this] val rateLimitedLogger = new RateLimitingLogger(logger = log) - - protected[this] val effectiveCacheStats = observer.scope("effective") - - /** - * Calculates the softTtlStep based on result from cache and underlying store. - * The softTtlStep indicates how many times we have - * performed & recorded a consistent read-through. - * A value of None is equivalent to Some(0) - it indicates zero consistent read-throughs. - */ - protected[this] def updateSoftTtlStep( - underlyingResult: Option[V], - cachedResult: Cached[V] - ): Option[Short] = { - if (writeSoftTtlStep() && underlyingResult == cachedResult.value) { - cachedResult.softTtlStep match { - case Some(step) if step < Short.MaxValue => Some((step + 1).toShort) - case Some(step) if step == Short.MaxValue => cachedResult.softTtlStep - case _ => Some(1) - } - } else { - None - } - } - - protected case class ProcessedCacheResult( - hits: Map[K, V], - misses: Seq[K], - doNotCache: Set[K], - failures: Map[K, Throwable], - tombstones: Set[K], - softExpirations: Seq[K], - transforms: Map[K, (V => V)]) - - override def apply(keys: Q): Future[KeyValueResult[K, V]] = { - getFromCache(keys).flatMap { cacheResult => - val ProcessedCacheResult( - hits, - misses, - doNotCache, - failures, - tombstones, - softExpirations, - transforms - ) = - process(keys, cacheResult) - - if (log.isLoggable(Level.TRACE)) { - log.trace( - "CachingKVR.apply keys %d hit %d miss %d noCache %d failure %d " + - "tombstone %d softexp %d", - keys.size, - hits.size, - misses.size, - doNotCache.size, - failures.size, - tombstones.size, - softExpirations.size - ) - } - recordCacheStats( - keys, - notFound = misses.toSet, - doNotCache = doNotCache, - expired = softExpirations.toSet, - numFailures = failures.size, - numTombstones = tombstones.size - ) - - // now read through all notFound - val underlyingQuery = newQuery(misses ++ doNotCache, keys) - val writeToCacheQuery = if (doNotCache.nonEmpty) newQuery(misses, keys) else underlyingQuery - val futureFromUnderlying = readThrough(underlyingQuery, writeToCacheQuery) - - // async read-through for the expired results, ignore results - val softExpirationQuery = newQuery(softExpirations, keys) - val futureFromSoftExpiry = readThrough(softExpirationQuery, softExpirationQuery, cacheResult) - - // merge all results together - for { - fromUnderlying <- futureFromUnderlying - fromCache = KeyValueResult(hits, tombstones, failures) - fromUnderlyingTransformed = transformResults(fromUnderlying, transforms) - } yield { - futureFromSoftExpiry.onSuccess { readThroughResults => - cacheResultObserver( - CacheResultObserver.CachingRepositoryResult( - cacheResult, - fromUnderlyingTransformed, - readThroughResults - ) - ) - } - KeyValueResult.sum(Seq(fromCache, fromUnderlyingTransformed)) - } - } - } - - /** - * Given results and a map of keys to transform functions, apply those transform functions - * to the found results. - */ - protected[this] def transformResults( - results: KeyValueResult[K, V], - transforms: Map[K, (V => V)] - ): KeyValueResult[K, V] = { - if (transforms.isEmpty) { - results - } else { - results.copy(found = results.found.map { - case (key, value) => - (key, transforms.get(key).map(_(value)).getOrElse(value)) - }) - } - } - - protected[this] def getFromCache(keys: Seq[K]): Future[KeyValueResult[K, Cached[V]]] = { - val uniqueKeys = keys.distinct - cache.get(uniqueKeys) handle { - case t: Throwable => - rateLimitedLogger.logThrowable(t, "exception caught in cache get") - - // treat total cache failure as a fetch that returned all failures - KeyValueResult(failed = uniqueKeys.map { _ -> t }.toMap) - } - } - - /** - * Buckets cache results according to the wishes of the CachedResultHandler - */ - protected[this] def process( - keys: Q, - cacheResult: KeyValueResult[K, Cached[V]] - ): ProcessedCacheResult = { - val cachedResultHandler = handlerFactory(keys) - - val hits = Map.newBuilder[K, V] - val misses = new mutable.ArrayBuffer[K] - val failures = Map.newBuilder[K, Throwable] - val tombstones = Set.newBuilder[K] - val softExpiredKeys = new mutable.ListBuffer[K] - val doNotCache = Set.newBuilder[K] - val transforms = Map.newBuilder[K, (V => V)] - - for (key <- keys) { - val cachedResult = cacheResult(key) match { - case Throw(t) => Failed(key, t) - case Return(None) => NotFound(key) - case Return(Some(cached)) => - cached.status match { - case CachedValueStatus.Found => - cached.value match { - case None => NotFound(key) - case Some(value) => - CachedFound( - key, - value, - cached.cachedAt, - cached.softTtlStep - ) - } - case CachedValueStatus.NotFound => CachedNotFound(key, cached.cachedAt) - case CachedValueStatus.Deleted => CachedDeleted(key, cached.cachedAt) - case CachedValueStatus.SerializationFailed => SerializationFailed(key) - case CachedValueStatus.DeserializationFailed => DeserializationFailed(key) - case CachedValueStatus.Evicted => NotFound(key) - case CachedValueStatus.DoNotCache => DoNotCache(key, cached.doNotCacheUntil) - } - } - - def processAction(action: CachedResultAction[V]): Unit = { - action match { - case HandleAsMiss => misses += key - case HandleAsFound(value) => hits += key -> value - case HandleAsNotFound => tombstones += key - case HandleAsDoNotCache => doNotCache += key - case HandleAsFailed(t) => failures += key -> t - case TransformSubAction(subAction, f) => - transforms += key -> f - processAction(subAction) - case SoftExpiration(subAction) => - softExpiredKeys += key - processAction(subAction) - } - } - - processAction(cachedResultHandler(cachedResult)) - } - - ProcessedCacheResult( - hits.result(), - misses, - doNotCache.result(), - failures.result(), - tombstones.result(), - softExpiredKeys, - transforms.result() - ) - } - - protected[this] def recordCacheStats( - keys: Seq[K], - notFound: Set[K], - doNotCache: Set[K], - expired: Set[K], - numFailures: Int, - numTombstones: Int - ): Unit = { - keys.foreach { key => - val wasntFound = notFound.contains(key) - val keyString = key.toString - if (wasntFound || expired.contains(key)) - effectiveCacheStats.miss(keyString) - else - effectiveCacheStats.hit(keyString) - - if (wasntFound) - observer.miss(keyString) - else - observer.hit(keyString) - } - observer.expired(expired.size) - observer.failure(numFailures) - observer.tombstone(numTombstones) - observer.noCache(doNotCache.size) - } - - /** - * read through to the underlying repository - * - * @param cacheKeys - * the keys to read and cache - */ - def readThrough(cacheKeys: Q): Future[KeyValueResult[K, V]] = { - readThrough(cacheKeys, cacheKeys) - } - - /** - * read through to the underlying repository - * - * @param writeToCacheQuery - * the query to pass to the writeToCache method after getting a result back from the - * underlying repository. this query can be exactly the same as underlyingQuery if - * all readThrough keys should be cached, or it may contain a subset of the keys if - * some keys should not be written back to cache. - * @param cacheResult - * the current cache results for underlyingQuery. - */ - def readThrough( - underlyingQuery: Q, - writeToCacheQuery: Q, - cacheResult: KeyValueResult[K, Cached[V]] = KeyValueResult.empty - ): Future[KeyValueResult[K, V]] = { - if (underlyingQuery.isEmpty) { - KeyValueResult.emptyFuture - } else { - underlying(underlyingQuery).onSuccess { result => - if (writeToCacheQuery.nonEmpty) { - writeToCache(writeToCacheQuery, result, cacheResult) - } - } - } - } - - /** - * Writes the contents of the given KeyValueResult to cache. - */ - def writeToCache( - keys: Q, - underlyingResult: KeyValueResult[K, V], - cacheResult: KeyValueResult[K, Cached[V]] = KeyValueResult[K, Cached[V]]() - ): Unit = { - lazy val cachedEmpty = { - val now = Time.now - Cached[V](None, CachedValueStatus.NotFound, now, Some(now), softTtlStep = None) - } - - keys.foreach { key => - // only cache Returns from the underlying repo, skip Throws. - // iff cached value matches value from underlying store - // (for both NotFound and Found results), increment softTtlStep - // otherwise, set softTtlStep to None - underlyingResult(key) match { - case Return(optUnderlyingVal) => - val softTtlStep = - cacheResult(key) match { - case Return(Some(cacheVal)) => updateSoftTtlStep(optUnderlyingVal, cacheVal) - case _ => None - } - - val status = - optUnderlyingVal match { - case Some(_) => CachedValueStatus.Found - case None => CachedValueStatus.NotFound - } - - val cached = - cachedEmpty.copy( - value = optUnderlyingVal, - status = status, - softTtlStep = softTtlStep - ) - - cache - .lockAndSet(key, LockingCache.PickingHandler(cached, picker)) - .onFailure { - case t: Throwable => - rateLimitedLogger.logThrowable(t, "exception caught in lockAndSet") - } - - case Throw(_) => None - } - } - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ChunkingStrategy.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ChunkingStrategy.docx new file mode 100644 index 000000000..7870384a5 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ChunkingStrategy.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ChunkingStrategy.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ChunkingStrategy.scala deleted file mode 100644 index 1816596fc..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ChunkingStrategy.scala +++ /dev/null @@ -1,50 +0,0 @@ -package com.twitter.servo.repository - -object ChunkingStrategy { - - /** - * A chunking strategy for breaking a query into fixed size chunks, with the last - * chunk possibly being any size between 1 and chunkSize. - */ - def fixedSize[K](chunkSize: Int): Seq[K] => Seq[Seq[K]] = { - fixedSize(chunkSize, keysAsQuery[K]) - } - - /** - * A chunking strategy for breaking a query into fixed size chunks, with the last - * chunk possibly being any size between 1 and chunkSize. - */ - def fixedSize[Q <: Seq[K], K]( - chunkSize: Int, - newQuery: SubqueryBuilder[Q, K] - ): Q => Seq[Q] = { query => - query.distinct.grouped(chunkSize) map { newQuery(_, query) } toSeq - } - - /** - * A chunking strategy for breaking a query into roughly equal sized chunks no - * larger than maxSize. The last chunk may be slightly smaller due to rounding. - */ - def equalSize[K](maxSize: Int): Seq[K] => Seq[Seq[K]] = { - equalSize(maxSize, keysAsQuery[K]) - } - - /** - * A chunking strategy for breaking a query into roughly equal sized chunks no - * larger than maxSize. The last chunk may be slightly smaller due to rounding. - */ - def equalSize[Q <: Seq[K], K]( - maxSize: Int, - newQuery: SubqueryBuilder[Q, K] - ): Q => Seq[Q] = { query => - { - if (query.size <= maxSize) { - Seq(query) - } else { - val chunkCount = math.ceil(query.size / maxSize.toDouble) - val chunkSize = math.ceil(query.size / chunkCount).toInt - query.distinct.grouped(chunkSize) map { newQuery(_, query) } toSeq - } - } - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/DarkmodingKeyValueRepositoryFactory.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/DarkmodingKeyValueRepositoryFactory.docx new file mode 100644 index 000000000..ba9559f54 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/DarkmodingKeyValueRepositoryFactory.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/DarkmodingKeyValueRepositoryFactory.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/DarkmodingKeyValueRepositoryFactory.scala deleted file mode 100644 index f5c3f4c46..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/DarkmodingKeyValueRepositoryFactory.scala +++ /dev/null @@ -1,161 +0,0 @@ -package com.twitter.servo.repository - -import com.twitter.conversions.DurationOps._ -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.servo.cache.{CacheObserver, Cached, LockingCache} -import com.twitter.servo.repository -import com.twitter.servo.repository.CachedResult.{Handler, HandlerFactory} -import com.twitter.servo.util._ -import com.twitter.util._ - -import scala.util.control.NoStackTrace - -object DarkmodingKeyValueRepositoryFactory { - val DefaultEwmaHalfLife = 5.minutes - val DefaultRecentWindow = 10.seconds - val DefaultWindowSize = 5000 - val DefaultAvailabilityFromSuccessRate = - Availability.linearlyScaled(highWaterMark = 0.98, lowWaterMark = 0.75, minAvailability = 0.02) - - def DefaultEwmaTracker = new EwmaSuccessRateTracker(DefaultEwmaHalfLife) - def DefaultRecentWindowTracker = SuccessRateTracker.recentWindowed(DefaultRecentWindow) - def DefaultRollingWindowTracker = SuccessRateTracker.rollingWindow(DefaultWindowSize) - - /** - * Wraps an underlying repository, which can be manually or automatically darkmoded. - * - * Auto-darkmoding is based on success rate (SR) as reported by a [[SuccessRateTracker]]. - * - * @param readFromUnderlying Open: operate normally. Closed: read from backupRepo regardless of SR. - * @param autoDarkmode Open: auto-darkmoding kicks in based on SR. Closed: auto-darkmoding will not kick in regardless of SR. - * @param stats Used to record success rate and availability; often should be scoped to this repo for stats naming - * @param underlyingRepo The underlying repo; read from when not darkmoded - * @param backupRepo The repo to read from when darkmoded; defaults to an always-failing repo. - * @param successRateTracker Strategy for reporting SR, usually over a moving window - * @param availabilityFromSuccessRate Function to calculate availability based on success rate - * @param shouldIgnore don't count certain exceptions as failures, e.g. cancellations - */ - def darkmoding[Q <: Seq[K], K, V]( - readFromUnderlying: Gate[Unit], - autoDarkmode: Gate[Unit], - stats: StatsReceiver, - underlyingRepo: KeyValueRepository[Q, K, V], - backupRepo: KeyValueRepository[Q, K, V] = - KeyValueRepository.alwaysFailing[Q, K, V](DarkmodedException), - successRateTracker: SuccessRateTracker = DefaultRecentWindowTracker, - availabilityFromSuccessRate: Double => Double = DefaultAvailabilityFromSuccessRate, - shouldIgnore: Throwable => Boolean = SuccessRateTrackingRepository.isCancellation - ): KeyValueRepository[Q, K, V] = { - val (successRateTrackingRepoFactory, successRateGate) = - SuccessRateTrackingRepository.withGate[Q, K, V]( - stats, - availabilityFromSuccessRate, - successRateTracker.observed(stats), - shouldIgnore - ) - val gate = mkGate(successRateGate, readFromUnderlying, autoDarkmode) - - Repository.selected( - q => gate(()), - successRateTrackingRepoFactory(underlyingRepo), - backupRepo - ) - } - - /** - * Produces a caching repository around an underlying repository, which - * can be manually or automatically darkmoded. - * - * @param underlyingRepo The underlying repo from which to read - * @param cache The typed locking cache to fall back to when darkmoded - * @param picker Used to break ties when a value being written is already present in cache - * @param readFromUnderlying Open: operate normally. Closed: read from cache regardless of SR. - * @param autoDarkmode Open: auto-darkmoding kicks in based on SR. Closed: auto-darkmoding will not kick in regardless of SR. - * @param cacheObserver Observes interactions with the cache; often should be scoped to this repo for stats naming - * @param stats Used to record various stats; often should be scoped to this repo for stats naming - * @param handler a [[Handler]] to use when not darkmoded - * @param successRateTracker Strategy for reporting SR, usually over a moving window - * @param availabilityFromSuccessRate Function to calculate availability based on success rate - * @param shouldIgnore don't count certain exceptions as failures, e.g. cancellations - */ - def darkmodingCaching[K, V, CacheKey]( - underlyingRepo: KeyValueRepository[Seq[K], K, V], - cache: LockingCache[K, Cached[V]], - picker: LockingCache.Picker[Cached[V]], - readFromUnderlying: Gate[Unit], - autoDarkmode: Gate[Unit], - cacheObserver: CacheObserver, - stats: StatsReceiver, - handler: Handler[K, V], - successRateTracker: SuccessRateTracker = DefaultRecentWindowTracker, - availabilityFromSuccessRate: Double => Double = DefaultAvailabilityFromSuccessRate, - shouldIgnore: Throwable => Boolean = SuccessRateTrackingRepository.isCancellation, - writeSoftTtlStep: Gate[Unit] = Gate.False, - cacheResultObserver: CachingKeyValueRepository.CacheResultObserver[K, V] = - CacheResultObserver.unit[K, V]: Effect[CacheResultObserver.CachingRepositoryResult[K, V]] - ): CachingKeyValueRepository[Seq[K], K, V] = { - val (successRateTrackingRepoFactory, successRateGate) = - SuccessRateTrackingRepository.withGate[Seq[K], K, V]( - stats, - availabilityFromSuccessRate, - successRateTracker.observed(stats), - shouldIgnore - ) - val gate = mkGate(successRateGate, readFromUnderlying, autoDarkmode) - - new CachingKeyValueRepository[Seq[K], K, V]( - successRateTrackingRepoFactory(underlyingRepo), - cache, - repository.keysAsQuery, - mkHandlerFactory(handler, gate), - picker, - cacheObserver, - writeSoftTtlStep = writeSoftTtlStep, - cacheResultObserver = cacheResultObserver - ) - } - - /** - * Create a composite gate suitable for controlling darkmoding, usually via decider - * - * @param successRate gate that should close and open according to success rate (SR) changes - * @param readFromUnderlying if open: returned gate operates normally. if closed: returned gate will be closed regardless of SR - * @param autoDarkMode if open: close gate according to SR. if closed: gate ignores SR changes - * @return - */ - def mkGate( - successRate: Gate[Unit], - readFromUnderlying: Gate[Unit], - autoDarkMode: Gate[Unit] - ): Gate[Unit] = - readFromUnderlying & (successRate | !autoDarkMode) - - /** - * Construct a [[CachedResult.HandlerFactory]] with sane defaults for use with a caching darkmoded repository - * @param softTtl TTL for soft-expiration of values in the cache - * @param expiry Used to apply the softTTL (e.g. fixed vs randomly perturbed) - */ - def mkDefaultHandler[K, V]( - softTtl: Option[V] => Duration, - expiry: CachedResult.Expiry - ): Handler[K, V] = - CachedResult.Handler( - CachedResult.failuresAreDoNotCache, - CachedResult.Handler(CachedResult.softTtlExpiration(softTtl, expiry)) - ) - - private[repository] def mkHandlerFactory[CacheKey, V, K]( - handler: Handler[K, V], - successRateGate: Gate[Unit] - ): HandlerFactory[Seq[K], K, V] = - query => - if (successRateGate(())) handler - else CachedResult.cacheOnly -} - -/** - * This exception is returned from a repository when it is auto-darkmoded due to low backend - * success rate, or darkmoded manually via gate (usually a decider). - */ -class DarkmodedException extends Exception with NoStackTrace -object DarkmodedException extends DarkmodedException diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/HotKeyCachingKeyValueRepository.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/HotKeyCachingKeyValueRepository.docx new file mode 100644 index 000000000..a70323091 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/HotKeyCachingKeyValueRepository.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/HotKeyCachingKeyValueRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/HotKeyCachingKeyValueRepository.scala deleted file mode 100644 index f8df436d0..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/HotKeyCachingKeyValueRepository.scala +++ /dev/null @@ -1,74 +0,0 @@ -package com.twitter.servo.repository - -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.logging.Logger -import com.twitter.servo.cache.{InProcessCache, StatsReceiverCacheObserver} -import com.twitter.servo.util.FrequencyCounter -import com.twitter.util.Future - -/** - * A KeyValueRepository which uses a sliding window to track - * the frequency at which keys are requested and diverts requests - * for keys above the promotionThreshold through an in-memory request cache. - * - * @param underlyingRepo - * the underlying KeyValueRepository - * @param newQuery - * a function for converting a subset of the keys of the original query into a new query. - * @param windowSize - * the number of previous requests to include in the window - * @param promotionThreshold - * the number of requests for the same key in the window required - * to divert the request through the request cache - * @param cacheFactory - * a function which constructs a future response cache of the given size - * @param statsReceiver - * records stats on the cache - * @param disableLogging - * disables logging in token cache for pdp purposes - */ -object HotKeyCachingKeyValueRepository { - def apply[Q <: Seq[K], K, V]( - underlyingRepo: KeyValueRepository[Q, K, V], - newQuery: SubqueryBuilder[Q, K], - windowSize: Int, - promotionThreshold: Int, - cacheFactory: Int => InProcessCache[K, Future[Option[V]]], - statsReceiver: StatsReceiver, - disableLogging: Boolean = false - ): KeyValueRepository[Q, K, V] = { - val log = Logger.get(getClass.getSimpleName) - - val promotionsCounter = statsReceiver.counter("promotions") - - val onPromotion = { (k: K) => - log.debug("key %s promoted to HotKeyCache", k.toString) - promotionsCounter.incr() - } - - val frequencyCounter = new FrequencyCounter[K](windowSize, promotionThreshold, onPromotion) - - // Maximum cache size occurs in the event that every key in the buffer occurs - // `promotionThreshold` times. We apply a failure-refreshing filter to avoid - // caching failed responses. - val cache = - InProcessCache.withFilter( - cacheFactory(windowSize / promotionThreshold) - )( - ResponseCachingKeyValueRepository.refreshFailures - ) - - val observer = - new StatsReceiverCacheObserver(statsReceiver, windowSize, "request_cache", disableLogging) - - val cachingRepo = - new ResponseCachingKeyValueRepository[Q, K, V](underlyingRepo, cache, newQuery, observer) - - KeyValueRepository.selected( - frequencyCounter.incr, - cachingRepo, - underlyingRepo, - newQuery - ) - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ImmutableKeyValueRepository.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ImmutableKeyValueRepository.docx new file mode 100644 index 000000000..3035623da Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ImmutableKeyValueRepository.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ImmutableKeyValueRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ImmutableKeyValueRepository.scala deleted file mode 100644 index f1711e99c..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ImmutableKeyValueRepository.scala +++ /dev/null @@ -1,18 +0,0 @@ -package com.twitter.servo.repository - -import com.twitter.util.{Future, Return, Throw, Try} - -class ImmutableKeyValueRepository[K, V](data: Map[K, Try[V]]) - extends KeyValueRepository[Seq[K], K, V] { - def apply(keys: Seq[K]) = Future { - val hits = keys flatMap { key => - data.get(key) map { key -> _ } - } toMap - - val found = hits collect { case (key, Return(value)) => key -> value } - val failed = hits collect { case (key, Throw(t)) => key -> t } - val notFound = keys.toSet -- found.keySet -- failed.keySet - - KeyValueResult(found, notFound, failed) - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/KeyValueRepository.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/KeyValueRepository.docx new file mode 100644 index 000000000..85863ab1a Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/KeyValueRepository.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/KeyValueRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/KeyValueRepository.scala deleted file mode 100644 index 82f6393f0..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/KeyValueRepository.scala +++ /dev/null @@ -1,192 +0,0 @@ -package com.twitter.servo.repository - -import com.twitter.util.{Future, Try} - -object KeyValueRepository { - - /** - * Builds a KeyValueRepository that returns KeyValueResults in which all keys failed with the - * provided Throwable. - */ - def alwaysFailing[Q <: Seq[K], K, V](failure: Throwable): KeyValueRepository[Q, K, V] = - (query: Q) => - Future.value( - KeyValueResult[K, V]( - failed = query map { _ -> failure } toMap - ) - ) - - /** - * Builds an immutable KeyValueRepository - */ - def apply[K, V](data: Map[K, Try[V]]): KeyValueRepository[Seq[K], K, V] = - new ImmutableKeyValueRepository(data) - - /** - * Sets up a mapReduce type operation on a KeyValueRepository where the query mapping function - * breaks the query up into smaller chunks, and the reducing function is just KeyValueResult.sum. - */ - def chunked[Q, K, V]( - repo: KeyValueRepository[Q, K, V], - chunker: Q => Seq[Q] - ): KeyValueRepository[Q, K, V] = - Repository.mapReduced(repo, chunker, KeyValueResult.sum[K, V]) - - /** - * Wraps a KeyValueRepository with stats recording functionality. - */ - def observed[Q, K, V]( - repo: KeyValueRepository[Q, K, V], - observer: RepositoryObserver, - querySize: Q => Int - ): KeyValueRepository[Q, K, V] = - query => { - observer.time(querySize(query)) { - repo(query).respond(observer.observeKeyValueResult) - } - } - - /** - * Creates a new KeyValueRepository that dispatches to onTrueRepo if the key - * predicate returns true, dispatches to onFalseRepo otherwise. - */ - def selected[Q <: Seq[K], K, V]( - select: K => Boolean, - onTrueRepo: KeyValueRepository[Q, K, V], - onFalseRepo: KeyValueRepository[Q, K, V], - queryBuilder: SubqueryBuilder[Q, K] - ): KeyValueRepository[Q, K, V] = selectedByQuery( - predicateFactory = _ => select, - onTrueRepo = onTrueRepo, - onFalseRepo = onFalseRepo, - queryBuilder = queryBuilder - ) - - /** - * Creates a new KeyValueRepository that uses predicateFactory to create a key predicate, then - * dispatches to onTrueRepo if the key predicate returns true, dispatches to onFalseRepo - * otherwise. - */ - def selectedByQuery[Q <: Seq[K], K, V]( - predicateFactory: Q => (K => Boolean), - onTrueRepo: KeyValueRepository[Q, K, V], - onFalseRepo: KeyValueRepository[Q, K, V], - queryBuilder: SubqueryBuilder[Q, K] - ): KeyValueRepository[Q, K, V] = { - val queryIsEmpty = (q: Q) => q.isEmpty - val r1 = shortCircuitEmpty(queryIsEmpty)(onTrueRepo) - val r2 = shortCircuitEmpty(queryIsEmpty)(onFalseRepo) - - (query: Q) => { - val predicate = predicateFactory(query) - val (q1, q2) = query.partition(predicate) - val futureRst1 = r1(queryBuilder(q1, query)) - val futureRst2 = r2(queryBuilder(q2, query)) - for { - r1 <- futureRst1 - r2 <- futureRst2 - } yield r1 ++ r2 - } - } - - /** - * Creates a new KeyValueRepository that dispatches to onTrueRepo if the query - * predicate returns true, dispatches to onFalseRepo otherwise. - */ - def choose[Q, K, V]( - predicate: Q => Boolean, - onTrueRepo: KeyValueRepository[Q, K, V], - onFalseRepo: KeyValueRepository[Q, K, V] - ): KeyValueRepository[Q, K, V] = { (query: Q) => - { - if (predicate(query)) { - onTrueRepo(query) - } else { - onFalseRepo(query) - } - } - } - - /** - * Short-circuit a KeyValueRepository to return an empty - * KeyValueResult when the query is empty rather than calling the - * backend. It is up to the caller to define empty. - * - * The implementation of repo and isEmpty should satisfy: - * - * forAll { (q: Q) => !isEmpty(q) || (repo(q).get == KeyValueResult.empty[K, V]) } - */ - def shortCircuitEmpty[Q, K, V]( - isEmpty: Q => Boolean - )( - repo: KeyValueRepository[Q, K, V] - ): KeyValueRepository[Q, K, V] = { q => - if (isEmpty(q)) KeyValueResult.emptyFuture[K, V] else repo(q) - } - - /** - * Short-circuit a KeyValueRepository to return an empty - * KeyValueResult for any empty Traversable query rather than - * calling the backend. - * - * The implementation of repo should satisfy: - * - * forAll { (q: Q) => !q.isEmpty || (repo(q).get == KeyValueResult.empty[K, V]) } - */ - def shortCircuitEmpty[Q <: Traversable[_], K, V]( - repo: KeyValueRepository[Q, K, V] - ): KeyValueRepository[Q, K, V] = shortCircuitEmpty[Q, K, V]((_: Q).isEmpty)(repo) - - /** - * Turns a bulking KeyValueRepository into a non-bulking Repository. The query to the - * KeyValueRepository must be nothing more than a Seq[K]. - */ - def singular[K, V](repo: KeyValueRepository[Seq[K], K, V]): Repository[K, Option[V]] = - singular(repo, (key: K) => Seq(key)) - - /** - * Turns a bulking KeyValueRepository into a non-bulking Repository. - */ - def singular[Q, K, V]( - repo: KeyValueRepository[Q, K, V], - queryBuilder: K => Q - ): Repository[K, Option[V]] = - key => { - repo(queryBuilder(key)) flatMap { results => - Future.const(results(key)) - } - } - - /** - * Converts a KeyValueRepository with value type V to one with value type - * V2 using a function that maps found values. - */ - def mapFound[Q, K, V, V2]( - repo: KeyValueRepository[Q, K, V], - f: V => V2 - ): KeyValueRepository[Q, K, V2] = - repo andThen { _ map { _ mapFound f } } - - /** - * Converts a KeyValueRepository with value type V to one with value type - * V2 using a function that maps over results. - */ - def mapValues[Q, K, V, V2]( - repo: KeyValueRepository[Q, K, V], - f: Try[Option[V]] => Try[Option[V2]] - ): KeyValueRepository[Q, K, V2] = - repo andThen { _ map { _ mapValues f } } - - /** - * Turns a KeyValueRepository which may throw an exception to another - * KeyValueRepository which always returns Future.value(KeyValueResult) - * even when there is an exception - */ - def scatterExceptions[Q <: Traversable[K], K, V]( - repo: KeyValueRepository[Q, K, V] - ): KeyValueRepository[Q, K, V] = - q => - repo(q) handle { - case t => KeyValueResult[K, V](failed = q map { _ -> t } toMap) - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ObservableKeyValueRepository.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ObservableKeyValueRepository.docx new file mode 100644 index 000000000..b4151857a Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ObservableKeyValueRepository.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ObservableKeyValueRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ObservableKeyValueRepository.scala deleted file mode 100644 index 4f0fc1f42..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ObservableKeyValueRepository.scala +++ /dev/null @@ -1,89 +0,0 @@ -package com.twitter.servo.repository - -import com.twitter.finagle.stats.{StatsReceiver, Stat} -import com.twitter.servo.util.{ExceptionCounter, LogarithmicallyBucketedTimer} -import com.twitter.util.{Future, Return, Throw, Try} - -class RepositoryObserver( - statsReceiver: StatsReceiver, - bucketBySize: Boolean, - exceptionCounter: ExceptionCounter) { - protected[this] lazy val timer = new LogarithmicallyBucketedTimer(statsReceiver) - protected[this] val sizeStat = statsReceiver.stat("size") - protected[this] val foundStat = statsReceiver.counter("found") - protected[this] val notFoundStat = statsReceiver.counter("not_found") - protected[this] val total = statsReceiver.counter("total") - private[this] val timeStat = statsReceiver.stat(LogarithmicallyBucketedTimer.LatencyStatName) - - def this(statsReceiver: StatsReceiver, bucketBySize: Boolean = true) = - this(statsReceiver, bucketBySize, new ExceptionCounter(statsReceiver)) - - def time[T](size: Int = 1)(f: => Future[T]) = { - sizeStat.add(size) - if (bucketBySize) - timer(size)(f) - else - Stat.timeFuture(timeStat)(f) - } - - private[this] def total(size: Int = 1): Unit = total.incr(size) - - def found(size: Int = 1): Unit = { - foundStat.incr(size) - total(size) - } - - def notFound(size: Int = 1): Unit = { - notFoundStat.incr(size) - total(size) - } - - def exception(ts: Throwable*): Unit = { - exceptionCounter(ts) - total(ts.size) - } - - def exceptions(ts: Seq[Throwable]): Unit = { - exception(ts: _*) - } - - def observeTry[V](tryObj: Try[V]): Unit = { - tryObj.respond { - case Return(_) => found() - case Throw(t) => exception(t) - } - } - - def observeOption[V](optionTry: Try[Option[V]]): Unit = { - optionTry.respond { - case Return(Some(_)) => found() - case Return(None) => notFound() - case Throw(t) => exception(t) - } - } - - def observeKeyValueResult[K, V](resultTry: Try[KeyValueResult[K, V]]): Unit = { - resultTry.respond { - case Return(result) => - found(result.found.size) - notFound(result.notFound.size) - exceptions(result.failed.values.toSeq) - case Throw(t) => - exception(t) - } - } - - /** - * observeSeq observes the result of a fetch against a key-value repository - * when the returned value is a Seq of type V. When the fetch is completed, - * observes whether or not the returned Seq is empty, contains some number of - * items, or has failed in some way. - */ - def observeSeq[V](seqTry: Try[Seq[V]]): Unit = { - seqTry.respond { - case Return(seq) if seq.isEmpty => notFound() - case Return(seq) => found(seq.length) - case Throw(t) => exception(t) - } - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/Repository.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/Repository.docx new file mode 100644 index 000000000..8d05bb6c6 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/Repository.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/Repository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/Repository.scala deleted file mode 100644 index 5a62fe175..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/Repository.scala +++ /dev/null @@ -1,133 +0,0 @@ -package com.twitter.servo.repository - -import com.twitter.servo.util.RetryHandler -import com.twitter.util.{Duration, Future, Timer} - -object Repository { - - /** - * Composes a RepositoryFilter onto a Repository, producing a new Repository. - */ - def composed[Q, R1, R2]( - repo: Repository[Q, R1], - filter: RepositoryFilter[Q, R1, R2] - ): Repository[Q, R2] = - q => filter(q, repo(q)) - - /** - * Chains 2 or more RepositoryFilters together into a single RepositoryFilter. - */ - def chained[Q, R1, R2, R3]( - f1: RepositoryFilter[Q, R1, R2], - f2: RepositoryFilter[Q, R2, R3], - fs: RepositoryFilter[Q, R3, R3]* - ): RepositoryFilter[Q, R1, R3] = { - val first: RepositoryFilter[Q, R1, R3] = (q, r) => f2(q, f1(q, r)) - fs.toList match { - case Nil => first - case head :: tail => chained(first, head, tail: _*) - } - } - - /** - * Wraps a Repository with a function that transforms queries on the way in, and - * results on the way out. - */ - def transformed[Q, Q2, R, R2]( - repo: Repository[Q, R], - qmapper: Q2 => Q = (identity[Q] _): (Q => Q), - rmapper: R => R2 = (identity[R] _): (R => R) - ): Repository[Q2, R2] = - qmapper andThen repo andThen { _ map rmapper } - - /** - * Wraps a Repository with another Repository that explodes the query into multiple - * queries, executes those queries in parallel, then combines (reduces) results. - */ - def mapReduced[Q, Q2, R, R2]( - repo: Repository[Q, R], - mapper: Q2 => Seq[Q], - reducer: Seq[R] => R2 - ): Repository[Q2, R2] = - mapReducedWithQuery(repo, mapper, (rs: Seq[(Q, R)]) => reducer(rs map { case (_, r) => r })) - - /** - * An extension of mapReduced that passes query and result to the reducer. - */ - def mapReducedWithQuery[Q, Q2, R, R2]( - repo: Repository[Q, R], - mapper: Q2 => Seq[Q], - reducer: Seq[(Q, R)] => R2 - ): Repository[Q2, R2] = { - val queryRepo: Q => Future[(Q, R)] = q => repo(q) map { (q, _) } - q2 => Future.collect(mapper(q2) map queryRepo) map reducer - } - - /** - * Creates a new Repository that dispatches to r1 if the given query predicate returns true, - * and dispatches to r2 otherwise. - */ - def selected[Q, R]( - select: Q => Boolean, - onTrueRepo: Repository[Q, R], - onFalseRepo: Repository[Q, R] - ): Repository[Q, R] = - dispatched(select andThen { - case true => onTrueRepo - case false => onFalseRepo - }) - - /** - * Creates a new Repository that uses a function that selects an underlying repository - * based upon the query. - */ - def dispatched[Q, R](f: Q => Repository[Q, R]): Repository[Q, R] = - q => f(q)(q) - - /** - * Wraps a Repository with the given RetryHandler, which may automatically retry - * failed requests. - */ - def retrying[Q, R](handler: RetryHandler[R], repo: Repository[Q, R]): Repository[Q, R] = - handler.wrap(repo) - - /** - * Produces a new Repository where the returned Future must complete within the specified - * timeout, otherwise the Future fails with a com.twitter.util.TimeoutException. - * - * ''Note'': On timeout, the underlying future is not interrupted. - */ - def withTimeout[Q, R]( - timer: Timer, - timeout: Duration, - repo: Repository[Q, R] - ): Repository[Q, R] = - repo andThen { _.within(timer, timeout) } - - /** - * Produces a new Repository where the returned Future must complete within the specified - * timeout, otherwise the Future fails with the specified Throwable. - * - * ''Note'': On timeout, the underlying future is not interrupted. - */ - def withTimeout[Q, R]( - timer: Timer, - timeout: Duration, - exc: => Throwable, - repo: Repository[Q, R] - ): Repository[Q, R] = - repo andThen { _.within(timer, timeout, exc) } - - /** - * Wraps a Repository with stats recording functionality. - */ - def observed[Q, R]( - repo: Repository[Q, R], - observer: RepositoryObserver - ): Repository[Q, R] = - query => { - observer.time() { - repo(query).respond(observer.observeTry) - } - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ResponseCachingKeyValueRepository.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ResponseCachingKeyValueRepository.docx new file mode 100644 index 000000000..9508a6d3f Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ResponseCachingKeyValueRepository.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ResponseCachingKeyValueRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ResponseCachingKeyValueRepository.scala deleted file mode 100644 index efbd6f5a7..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ResponseCachingKeyValueRepository.scala +++ /dev/null @@ -1,103 +0,0 @@ -package com.twitter.servo.repository - -import com.twitter.servo.cache._ -import com.twitter.util._ - -object ResponseCachingKeyValueRepository { - - /** - * An cache filter that excludes cached future responses that are already fulfilled. - * Using this policy ensures that this repository will only ever have one outstanding request for the same item. - */ - def refreshSatisfied[K, V]: (K, Future[Option[V]]) => Boolean = - (_, v) => v.isDefined - - /** - * An cache filter that excludes cached future response that are failures - */ - def refreshFailures[K, V]: (K, Future[Option[V]]) => Boolean = - (_, v) => - v.poll match { - case Some(t) => t.isThrow - case None => false - } -} - -/** - * A repository that caches(in-process) Future responses from an underlying KeyValueRepository. - * Each time a request for a key is made, the repository first checks - * if any Future responses for that key are already cached. - * If so, the Future response from cache is returned. - * If not, a new Promise is placed in to cache, - * the underlying repository is queried to fulfill the Promise, - * and the new Promise is returned to the caller. - * @param underlying - * the underlying KeyValueRepository - * @param cache - * an inprocess cache of (future) responses - * @param newQuery - * a function which constructs a new query from a query and a set of keys - * @param observer - * a CacheObserver which records the hits/misses on the request cache - */ -class ResponseCachingKeyValueRepository[Q <: Seq[K], K, V]( - underlying: KeyValueRepository[Q, K, V], - cache: InProcessCache[K, Future[Option[V]]], - newQuery: SubqueryBuilder[Q, K], - observer: CacheObserver = NullCacheObserver) - extends KeyValueRepository[Q, K, V] { - private[this] def load(query: Q, promises: Seq[(K, Promise[Option[V]])]): Unit = { - if (promises.nonEmpty) { - underlying(newQuery(promises map { case (k, _) => k }, query)) respond { - case Throw(t) => promises foreach { case (_, p) => p.updateIfEmpty(Throw(t)) } - case Return(kvr) => promises foreach { case (k, p) => p.updateIfEmpty(kvr(k)) } - } - } - } - - sealed trait RefreshResult[K, V] { - def toInterruptible: Future[Option[V]] - } - - private case class CachedResult[K, V](result: Future[Option[V]]) extends RefreshResult[K, V] { - def toInterruptible = result.interruptible - } - - private case class LoadResult[K, V](keyToLoad: K, result: Promise[Option[V]]) - extends RefreshResult[K, V] { - def toInterruptible = result.interruptible - } - - private[this] def refresh(key: K): RefreshResult[K, V] = - synchronized { - cache.get(key) match { - case Some(updated) => - observer.hit(key.toString) - CachedResult(updated) - case None => - observer.miss(key.toString) - val promise = new Promise[Option[V]] - cache.set(key, promise) - LoadResult(key, promise) - } - } - - def apply(query: Q): Future[KeyValueResult[K, V]] = - KeyValueResult.fromSeqFuture(query) { - val result: Seq[RefreshResult[K, V]] = - query map { key => - cache.get(key) match { - case Some(value) => - observer.hit(key.toString) - CachedResult[K, V](value) - case None => - refresh(key) - } - } - - val toLoad = result collect { case LoadResult(k, p) => k -> p } - load(query, toLoad) - - result map { _.toInterruptible } - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/RichQuery.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/RichQuery.docx new file mode 100644 index 000000000..7c399fdbf Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/RichQuery.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/RichQuery.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/RichQuery.scala deleted file mode 100644 index 9f2e315c7..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/RichQuery.scala +++ /dev/null @@ -1,34 +0,0 @@ -package com.twitter.servo.repository - -import scala.collection.SeqProxy - -/** - * RichQuery is a mixin trait for KeyValueRepository query objects that are more complex - * than Seq[K]. It extends SeqProxy to satisfy servo's requirements but provides Product-based - * implementations of equals and toString. (The query object is expected to be a case class - * and therefore implement Product.) - */ -trait RichQuery[K] extends SeqProxy[K] with Product { - // Compare to other RichQuery instances via Product; otherwise allow any sequence to - // match our proxied Seq (thereby matching the semantics of a case class that simply - // extends SeqProxy). - override def equals(any: Any) = { - any match { - case null => false - - case other: RichQuery[_] => - ( - this.productArity == other.productArity && - this.productIterator.zip(other.productIterator).foldLeft(true) { - case (ok, (e1, e2)) => - ok && e1 == e2 - } - ) - - case other => other.equals(this) - } - } - - // Produce reasonable string for testing - override def toString = "%s(%s)".format(this.productPrefix, this.productIterator.mkString(",")) -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/SuccessRateTrackingRepository.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/SuccessRateTrackingRepository.docx new file mode 100644 index 000000000..3edaf89fd Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/SuccessRateTrackingRepository.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/SuccessRateTrackingRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/SuccessRateTrackingRepository.scala deleted file mode 100644 index d4d9aed9d..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/SuccessRateTrackingRepository.scala +++ /dev/null @@ -1,81 +0,0 @@ -package com.twitter.servo.repository - -import com.twitter.finagle.mux.ClientDiscardedRequestException -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finagle.{CancelledConnectionException, CancelledRequestException} -import com.twitter.servo.util.{Gate, SuccessRateTracker} -import com.twitter.util.Throwables.RootCause -import java.util.concurrent.CancellationException - -object SuccessRateTrackingRepository { - - /** - * (successes, failures) - */ - type SuccessRateObserver = (Int, Int) => Unit - - /** - * Identifies [[Throwable]]s that should not be counted as failures. - * - * This is a total function instead of a partial function so it can reliably recurse on itself - * to find a root cause. - */ - def isCancellation(t: Throwable): Boolean = - t match { - // We don't consider CancelledRequestExceptions or CancelledConnectionExceptions to be - // failures in order not to tarnish our success rate on upstream request cancellations. - case _: CancelledRequestException => true - case _: CancelledConnectionException => true - // non-finagle backends can throw CancellationExceptions when their futures are cancelled. - case _: CancellationException => true - // Mux servers can return ClientDiscardedRequestException. - case _: ClientDiscardedRequestException => true - // Most of these exceptions can be wrapped in com.twitter.finagle.Failure - case RootCause(t) => isCancellation(t) - case _ => false - } - - /** - * Return a Success Rate (SR) tracking repository along with the gate controlling it. - * - * @param stats Provides availability gauge - * @param availabilityFromSuccessRate function to calculate availability given SR - * @param tracker strategy for tracking (usually recent) SR - * @param shouldIgnore don't count certain exceptions as failures, e.g. cancellations - * @return tuple of (SR tracking repo, gate closing if SR drops too far) - */ - def withGate[Q <: Seq[K], K, V]( - stats: StatsReceiver, - availabilityFromSuccessRate: Double => Double, - tracker: SuccessRateTracker, - shouldIgnore: Throwable => Boolean = isCancellation - ): (KeyValueRepository[Q, K, V] => KeyValueRepository[Q, K, V], Gate[Unit]) = { - val successRateGate = tracker.observedAvailabilityGate(availabilityFromSuccessRate, stats) - - (new SuccessRateTrackingRepository[Q, K, V](_, tracker.record, shouldIgnore), successRateGate) - } -} - -/** - * A KeyValueRepository that provides feedback on query success rate to - * a SuccessRateObserver. Both found and not found are considered successful - * responses, while failures are not. Cancellations are ignored by default. - */ -class SuccessRateTrackingRepository[Q <: Seq[K], K, V]( - underlying: KeyValueRepository[Q, K, V], - observer: SuccessRateTrackingRepository.SuccessRateObserver, - shouldIgnore: Throwable => Boolean = SuccessRateTrackingRepository.isCancellation) - extends KeyValueRepository[Q, K, V] { - def apply(query: Q) = - underlying(query) onSuccess { kvr => - val nonIgnoredFailures = kvr.failed.values.foldLeft(0) { - case (count, t) if shouldIgnore(t) => count - case (count, _) => count + 1 - } - observer(kvr.found.size + kvr.notFound.size, nonIgnoredFailures) - } onFailure { t => - if (!shouldIgnore(t)) { - observer(0, query.size) - } - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/package.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/package.docx new file mode 100644 index 000000000..6b2ac8869 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/package.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/package.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/package.scala deleted file mode 100644 index 4c4fe7e4d..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/package.scala +++ /dev/null @@ -1,50 +0,0 @@ -package com.twitter.servo - -import com.twitter.util.Future - -package object repository { - - /** - * Base repository type. Maps a Query to a future Result - */ - type Repository[-Q, +R] = Q => Future[R] - - /** - * RepositoryFilters can be chained onto Repositories to asynchronously apply transformations to - * Repository results. - */ - type RepositoryFilter[-Q, -R, +S] = (Q, Future[R]) => Future[S] - - type KeyValueResult[K, V] = keyvalue.KeyValueResult[K, V] - val KeyValueResult = keyvalue.KeyValueResult - - /** - * A KeyValueRepository is a type of repository that handles bulk gets of data. The query - * defines the values to fetch, and is usually made of up of a Seq[K], possibly with other - * contextual information needed to perform the query. The result is a KeyValueResult, - * which contains a break-out of found, notFound, and failed key lookups. The set of - * keys may or may-not be computable locally from the query. This top-level type does not - * require that the keys are computable from the query, but certain instances, such as - * CachingKeyValueRepository, do require key-computability. - */ - type KeyValueRepository[Q, K, V] = Repository[Q, KeyValueResult[K, V]] - - type CounterKeyValueRepository[K] = KeyValueRepository[Seq[K], K, Long] - - /** - * For KeyValueRepository scenarios where the query is a sequence of keys, a SubqueryBuilder - * defines how to convert a sub-set of the keys from the query into a query. - */ - type SubqueryBuilder[Q <: Seq[K], K] = (Seq[K], Q) => Q - - /** - * A SubqueryBuilder where the query type is nothing more than a sequence of keys. - */ - @deprecated("use keysAsQuery", "1.1.0") - def KeysAsQuery[K]: SubqueryBuilder[Seq[K], K] = keysAsQuery[K] - - /** - * A SubqueryBuilder where the query type is nothing more than a sequence of keys. - */ - def keysAsQuery[K]: SubqueryBuilder[Seq[K], K] = (keys, parentQuery) => keys -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/CachingStore.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/CachingStore.docx new file mode 100644 index 000000000..1240a4a87 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/CachingStore.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/CachingStore.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/CachingStore.scala deleted file mode 100644 index a6dd69e26..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/CachingStore.scala +++ /dev/null @@ -1,112 +0,0 @@ -package com.twitter.servo.store - -import com.twitter.servo.cache.{Cached, CachedValueStatus, LockingCache} -import com.twitter.logging.Logger -import com.twitter.util.{Future, Time} - -/** - * Wraps a cache around an underlying store. - * - * CachingStore is a specialization of TransformingCachingStore where the store and cache are - * assumed to have the same key and value types. See TransformingCachingStore for a discussion - * of the arguments to CachingStore. - */ -class CachingStore[K, V]( - cache: LockingCache[K, Cached[V]], - underlying: Store[K, V], - valuePicker: LockingCache.Picker[Cached[V]], - key: V => K) - extends TransformingCachingStore[K, V, K, V]( - cache, - underlying, - valuePicker, - key, - identity, - identity - ) - -/** - * Wraps a cache of differing key/value types around an underlying store. - * - * Updates are applied first (unmodified) to the underlying store and then - * the cache is updated after running the key/value through a one-way function - * to derive the key/value as expected by the cache. - * - * @param cache - * the wrapping cache - * - * @param underlying - * the underlying store - * - * @param valuePicker - * chooses between existing and new value - * - * @param key - * computes a key from the value being stored - * - * @param cacheKey - * transforms the store's key type to the cache's key type - * - * @param cacheValue - * transforms the store's value type to the cache's value type - */ -class TransformingCachingStore[K, V, CacheK, CacheV]( - cache: LockingCache[CacheK, Cached[CacheV]], - underlying: Store[K, V], - valuePicker: LockingCache.Picker[Cached[CacheV]], - key: V => K, - cacheKey: K => CacheK, - cacheValue: V => CacheV) - extends Store[K, V] { - protected[this] val log = Logger.get(getClass.getSimpleName) - - override def create(value: V): Future[V] = { - chainCacheOp[V]( - underlying.create(value), - result => cache(key(result), Some(result), CachedValueStatus.Found, "new") - ) - } - - override def update(value: V): Future[Unit] = { - chainCacheOp[Unit]( - underlying.update(value), - _ => cache(key(value), Some(value), CachedValueStatus.Found, "updated") - ) - } - - override def destroy(key: K): Future[Unit] = { - chainCacheOp[Unit]( - underlying.destroy(key), - _ => cache(key, None, CachedValueStatus.Deleted, "deleted") - ) - } - - /** - * Subclasses may override this to alter the relationship between the result - * of the underlying Store operation and the result of the Cache operation. - * By default, the cache operation occurs asynchronously and only upon success - * of the store operation. Cache operation failures are logged but otherwise - * ignored. - */ - protected[this] def chainCacheOp[Result]( - storeOp: Future[Result], - cacheOp: Result => Future[Unit] - ): Future[Result] = { - storeOp onSuccess { cacheOp(_) } - } - - protected[this] def cache( - key: K, - value: Option[V], - status: CachedValueStatus, - desc: String - ): Future[Unit] = { - val now = Time.now - val cached = Cached(value map { cacheValue(_) }, status, now, None, Some(now)) - val handler = LockingCache.PickingHandler(cached, valuePicker) - cache.lockAndSet(cacheKey(key), handler).unit onFailure { - case t => - log.error(t, "exception caught while caching %s value", desc) - } - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/KeyValueStore.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/KeyValueStore.docx new file mode 100644 index 000000000..6583f0090 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/KeyValueStore.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/KeyValueStore.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/KeyValueStore.scala deleted file mode 100644 index 96866e854..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/KeyValueStore.scala +++ /dev/null @@ -1,13 +0,0 @@ -package com.twitter.servo.store - -import com.twitter.util.Future - -trait KeyValueStore[C, K, V, R] { - def put(ctx: C, key: K, value: Option[V]): Future[R] = multiPut(ctx, Seq((key -> value))) - def multiPut(ctx: C, kvs: Seq[(K, Option[V])]): Future[R] -} - -trait SimpleKeyValueStore[K, V] extends KeyValueStore[Unit, K, V, Unit] { - def put(key: K, value: Option[V]): Future[Unit] = multiPut((), Seq(key -> value)) - def multiPut(kvs: Seq[(K, Option[V])]): Future[Unit] = multiPut((), kvs) -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/ObservableStore.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/ObservableStore.docx new file mode 100644 index 000000000..d9902d548 Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/ObservableStore.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/ObservableStore.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/ObservableStore.scala deleted file mode 100644 index ae582c307..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/ObservableStore.scala +++ /dev/null @@ -1,32 +0,0 @@ -package com.twitter.servo.store - -import com.twitter.finagle.stats.{StatsReceiver, Stat} -import com.twitter.servo.util.{ExceptionCounter, LogarithmicallyBucketedTimer} -import com.twitter.util.Future - -class StoreObserver(statsReceiver: StatsReceiver) { - protected[this] val exceptionCounter = new ExceptionCounter(statsReceiver) - - def time[T](f: => Future[T]) = { - Stat.timeFuture(statsReceiver.stat(LogarithmicallyBucketedTimer.LatencyStatName))(f) - } - - def exception(ts: Throwable*): Unit = exceptionCounter(ts) -} - -class ObservableStore[K, V](underlying: Store[K, V], statsReceiver: StatsReceiver) - extends Store[K, V] { - protected[this] val observer = new StoreObserver(statsReceiver) - - override def create(value: V) = observer.time { - underlying.create(value) onFailure { observer.exception(_) } - } - - override def update(value: V) = observer.time { - underlying.update(value) onFailure { observer.exception(_) } - } - - override def destroy(key: K) = observer.time { - underlying.destroy(key) onFailure { observer.exception(_) } - } -} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/Store.docx b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/Store.docx new file mode 100644 index 000000000..3a985d54e Binary files /dev/null and b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/Store.docx differ diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/Store.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/Store.scala deleted file mode 100644 index a86283b82..000000000 --- a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/Store.scala +++ /dev/null @@ -1,93 +0,0 @@ -package com.twitter.servo.store - -import com.twitter.servo.util.Gate -import com.twitter.util.Future - -/** - * models a write-store of key/values - */ -trait Store[K, V] { - def create(value: V): Future[V] - def update(value: V): Future[Unit] - def destroy(key: K): Future[Unit] -} - -object Store { - - /** - * Filter store operations based on either the key or the value. If the gate passes then forward - * the operation to the underlying store, if not then forward the operation to a null store - * (effectively a no-op) - */ - def filtered[K, V](store: Store[K, V], filterKey: Gate[K], filterValue: Gate[V]) = - new GatedStore(store, new NullStore[K, V], filterKey, filterValue) - - /** - * A store type that selects between one of two underlying stores based on the key/value of the - * operation. If the key/value gate passes, forward the operation to the primary store, otherwise - * forward the operation to the secondary store. - */ - def gated[K, V]( - primary: Store[K, V], - secondary: Store[K, V], - usePrimaryKey: Gate[K], - usePrimaryValue: Gate[V] - ) = new GatedStore(primary, secondary, usePrimaryKey, usePrimaryValue) - - /** - * A store type that selects between one of two underlying stores based on a predicative value, - * which may change dynamically at runtime. - */ - def deciderable[K, V]( - primary: Store[K, V], - backup: Store[K, V], - primaryIsAvailable: => Boolean - ) = new DeciderableStore(primary, backup, primaryIsAvailable) -} - -trait StoreWrapper[K, V] extends Store[K, V] { - def underlyingStore: Store[K, V] - - override def create(value: V) = underlyingStore.create(value) - override def update(value: V) = underlyingStore.update(value) - override def destroy(key: K) = underlyingStore.destroy(key) -} - -class NullStore[K, V] extends Store[K, V] { - override def create(value: V) = Future.value(value) - override def update(value: V) = Future.Done - override def destroy(key: K) = Future.Done -} - -/** - * A Store type that selects between one of two underlying stores based - * on the key/value, which may change dynamically at runtime. - */ -private[servo] class GatedStore[K, V]( - primary: Store[K, V], - secondary: Store[K, V], - usePrimaryKey: Gate[K], - usePrimaryValue: Gate[V]) - extends Store[K, V] { - private[this] def pick[T](item: T, gate: Gate[T]) = if (gate(item)) primary else secondary - - override def create(value: V) = pick(value, usePrimaryValue).create(value) - override def update(value: V) = pick(value, usePrimaryValue).update(value) - override def destroy(key: K) = pick(key, usePrimaryKey).destroy(key) -} - -/** - * A Store type that selects between one of two underlying stores based - * on a predicative value, which may change dynamically at runtime. - */ -class DeciderableStore[K, V]( - primary: Store[K, V], - backup: Store[K, V], - primaryIsAvailable: => Boolean) - extends Store[K, V] { - private[this] def pick = if (primaryIsAvailable) primary else backup - - override def create(value: V) = pick.create(value) - override def update(value: V) = pick.update(value) - override def destroy(key: K) = pick.destroy(key) -} diff --git a/tweetypie/servo/repo/src/main/thrift/BUILD b/tweetypie/servo/repo/src/main/thrift/BUILD deleted file mode 100644 index 6ad3c0873..000000000 --- a/tweetypie/servo/repo/src/main/thrift/BUILD +++ /dev/null @@ -1,13 +0,0 @@ -create_thrift_libraries( - base_name = "thrift", - sources = ["**/*.thrift"], - platform = "java8", - tags = ["bazel-compatible"], - generate_languages = [ - "java", - "scala", - "strato", - ], - provides_java_name = "servo-repo-thrift-java", - provides_scala_name = "servo-repo-thrift-scala", -) diff --git a/tweetypie/servo/repo/src/main/thrift/BUILD.docx b/tweetypie/servo/repo/src/main/thrift/BUILD.docx new file mode 100644 index 000000000..1ae6ff35d Binary files /dev/null and b/tweetypie/servo/repo/src/main/thrift/BUILD.docx differ diff --git a/tweetypie/servo/repo/src/main/thrift/com/twitter/servo/cache/servo_repo.docx b/tweetypie/servo/repo/src/main/thrift/com/twitter/servo/cache/servo_repo.docx new file mode 100644 index 000000000..2eead9e4e Binary files /dev/null and b/tweetypie/servo/repo/src/main/thrift/com/twitter/servo/cache/servo_repo.docx differ diff --git a/tweetypie/servo/repo/src/main/thrift/com/twitter/servo/cache/servo_repo.thrift b/tweetypie/servo/repo/src/main/thrift/com/twitter/servo/cache/servo_repo.thrift deleted file mode 100644 index 51b7373f3..000000000 --- a/tweetypie/servo/repo/src/main/thrift/com/twitter/servo/cache/servo_repo.thrift +++ /dev/null @@ -1,39 +0,0 @@ -#@namespace scala com.twitter.servo.cache.thriftscala -#@ namespace strato com.twitter.servo.cache -// the java namespace is unused, but appeases the thrift Linter gods -namespace java com.twitter.servo.cache.thriftjava - -enum CachedValueStatus { - FOUND = 0, - NOT_FOUND = 1, - DELETED = 2, - SERIALIZATION_FAILED = 3 - DESERIALIZATION_FAILED = 4, - EVICTED = 5, - DO_NOT_CACHE = 6 -} - -/** - * Caching metadata for an binary cache value - */ -struct CachedValue { - 1: optional binary value - // can be used to distinguish between deletion tombstones and not-found tombstones - 2: CachedValueStatus status - // when was the cache value written - 3: i64 cached_at_msec - // set if the cache was read through - 4: optional i64 read_through_at_msec - // set if the cache was written through - 5: optional i64 written_through_at_msec - // This optional field is only read when the CacheValueStatus is DO_NOT_CACHE. - // When CacheValueStatus is DO_NOT_CACHE and this field is not set, the key - // will not be cached without a time limit. If the client wants to cache - // immediately, they would not set DO_NOT_CACHE. - 6: optional i64 do_not_cache_until_msec - // Indicates how many times we've successfully checked - // the cached value against the backing store. Should be initially set to 0. - // The client may choose to increase the soft TTL duration based on this value. - // See http://go/gd-dynamic-cache-ttls and http://go/strato-progressive-ttls for some use cases - 7: optional i16 soft_ttl_step -} (persisted='true') diff --git a/tweetypie/servo/request/BUILD b/tweetypie/servo/request/BUILD deleted file mode 100644 index 434ab68f4..000000000 --- a/tweetypie/servo/request/BUILD +++ /dev/null @@ -1,5 +0,0 @@ -target( - dependencies = [ - "tweetypie/servo/request/src/main/scala", - ], -) diff --git a/tweetypie/servo/request/BUILD.docx b/tweetypie/servo/request/BUILD.docx new file mode 100644 index 000000000..dff4d6bed Binary files /dev/null and b/tweetypie/servo/request/BUILD.docx differ diff --git a/tweetypie/servo/request/src/main/scala/BUILD b/tweetypie/servo/request/src/main/scala/BUILD deleted file mode 100644 index 2d50540e5..000000000 --- a/tweetypie/servo/request/src/main/scala/BUILD +++ /dev/null @@ -1,20 +0,0 @@ -scala_library( - sources = ["**/*.scala"], - platform = "java8", - provides = scala_artifact( - org = "com.twitter", - name = "servo-request", - repo = artifactory, - ), - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "finagle/finagle-core/src/main", - "tweetypie/servo/util", - "twitter-config/yaml", - "util/util-stats/src/main/scala", - ], - exports = [ - "tweetypie/servo/util", - ], -) diff --git a/tweetypie/servo/request/src/main/scala/BUILD.docx b/tweetypie/servo/request/src/main/scala/BUILD.docx new file mode 100644 index 000000000..8311a7749 Binary files /dev/null and b/tweetypie/servo/request/src/main/scala/BUILD.docx differ diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestAuthorizer.docx b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestAuthorizer.docx new file mode 100644 index 000000000..e446e1601 Binary files /dev/null and b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestAuthorizer.docx differ diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestAuthorizer.scala b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestAuthorizer.scala deleted file mode 100644 index 1547adbbd..000000000 --- a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestAuthorizer.scala +++ /dev/null @@ -1,172 +0,0 @@ -package com.twitter.servo.request - -import com.twitter.servo.gate.RateLimitingGate -import com.twitter.servo.util.Gate -import com.twitter.util.Future - -/** - * Collects per-request stats by method-name and client. - */ -trait ClientRequestAuthorizer extends ((String, Option[String]) => Future[Unit]) { self => - - /** - * @param methodName the name of the Service method being called - * @param clientIdStrOpt an Option of the string value of the originating - * request's ClientId - */ - def apply(methodName: String, clientIdStrOpt: Option[String]): Future[Unit] - - /** - * Compose this authorizer with another so that one is applied after the other. - * - * The resultant authorizer requires both underlying authorizers to succeed in - * order to authorize a request. - */ - def andThen(other: ClientRequestAuthorizer) = new ClientRequestAuthorizer { - override def apply(methodName: String, clientIdStrOpt: Option[String]): Future[Unit] = { - self.apply(methodName, clientIdStrOpt) flatMap { _ => - other(methodName, clientIdStrOpt) - } - } - } -} - -object ClientRequestAuthorizer { - case class UnauthorizedException(msg: String) extends Exception(msg) - - protected[this] val noClientIdException = - Future.exception(new UnauthorizedException("No ClientId specified")) - protected[this] val unauthorizedException = - new UnauthorizedException("Your ClientId is not authorized.") - protected[this] val overRateLimitException = - new UnauthorizedException("Your ClientId is over the allowed rate limit.") - - /** - * Increment stats counters for this request. - * - * Note that ClientRequestAuthorizer.observed doesn't compose in the same fashion - * as other authorizers via `andThen`. In order to observe authorization results, - * pass in an underlying authorizer as an argument to observed. - */ - def observed( - underlyingAuthorizer: ClientRequestAuthorizer, - observer: ClientRequestObserver - ) = new ClientRequestAuthorizer { - override def apply(methodName: String, clientIdStrOpt: Option[String]): Future[Unit] = { - val clientIdStr = clientIdStrOpt.getOrElse("no_client_id") - - observer(methodName, clientIdStrOpt map { Seq(_) }) - - underlyingAuthorizer(methodName, clientIdStrOpt) onFailure { _ => - observer.unauthorized(methodName, clientIdStr) - } onSuccess { _ => - observer.authorized(methodName, clientIdStr) - } - } - } - - def observed(observer: ClientRequestObserver): ClientRequestAuthorizer = - observed(ClientRequestAuthorizer.permissive, observer) - - /** - * Lets all requests through. - */ - def permissive = new ClientRequestAuthorizer { - override def apply(methodName: String, clientIdStrOpt: Option[String]) = Future.Done - } - - /** - * A Generic Authorizer that allows you to pass in your own authorizer function (filter). - * The filter should take in methodName and clientId and return a Boolean decision - * - * Note: Requires requests to have ClientIds. - * @param exception return this exception if the request does not pass the filter - */ - def filtered( - filter: (String, String) => Boolean, - exception: Exception = unauthorizedException - ): ClientRequestAuthorizer = - new ClientRequestAuthorizer { - val futureException = Future.exception(exception) - - override def apply(methodName: String, clientIdStrOpt: Option[String]): Future[Unit] = { - clientIdStrOpt match { - case Some(clientIdStr) => - if (filter(methodName, clientIdStr)) - Future.Done - else - futureException - case None => - noClientIdException - } - } - } - - /** - * Authorizes client requests based on a allowlist of ClientId strings. - */ - def allowlisted(allowlist: Set[String]): ClientRequestAuthorizer = - filtered { (_, clientIdStr) => - allowlist.contains(clientIdStr) - } - - /** - * Authorizes requests if and only if they have an associated ClientId. - */ - def withClientId: ClientRequestAuthorizer = filtered { (_, _) => - true - } - - /** - * Consult a (presumably) Decider-backed predicate to authorize requests by ClientId. - * @param exception return this exception if the request does not pass the filter - */ - def deciderable( - isAvailable: String => Boolean, - exception: Exception = unauthorizedException - ): ClientRequestAuthorizer = - filtered( - { (_, clientIdStr) => - isAvailable(clientIdStr) - }, - exception - ) - - /** - * Simple rate limiter for unknown client ids. Useful for letting new clients - * send some traffic without the risk of being overrun by requests. - * - * @param limitPerSecond Number of calls per second we can tolerate - */ - def rateLimited(limitPerSecond: Double): ClientRequestAuthorizer = { - gated(RateLimitingGate.uniform(limitPerSecond), overRateLimitException) - } - - /** - * Simple Gate based authorizer, will authorize according to the result of the gate regardless - * of the client/method name - */ - def gated( - gate: Gate[Unit], - exception: Exception = unauthorizedException - ): ClientRequestAuthorizer = { - deciderable(_ => gate(), exception) - } - - /** - * @return A ClientRequestAuthorizer that switches between two provided - * ClientRequestAuthorizers depending on a decider. - */ - def select( - decider: Gate[Unit], - ifTrue: ClientRequestAuthorizer, - ifFalse: ClientRequestAuthorizer - ): ClientRequestAuthorizer = - new ClientRequestAuthorizer { - override def apply(methodName: String, clientIdStrOpt: Option[String]): Future[Unit] = - decider.pick( - ifTrue(methodName, clientIdStrOpt), - ifFalse(methodName, clientIdStrOpt) - ) - } -} diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestObserver.docx b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestObserver.docx new file mode 100644 index 000000000..cefab5158 Binary files /dev/null and b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestObserver.docx differ diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestObserver.scala b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestObserver.scala deleted file mode 100644 index e7de2ab04..000000000 --- a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestObserver.scala +++ /dev/null @@ -1,58 +0,0 @@ -package com.twitter.servo.request - -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.util.Future - -object ClientRequestObserver { - private[request] val noClientIdKey = "no_client_id" -} - -/** - * Provides per-request stats based on Finagle ClientId. - * - * @param statsReceiver the StatsReceiver used for counting - * @param observeAuthorizationAttempts: if true (the default), observe all attempts. If false, - * only failures (unauthorized attempts) are observed. - */ -class ClientRequestObserver( - statsReceiver: StatsReceiver, - observeAuthorizationAttempts: Boolean = true) - extends ((String, Option[Seq[String]]) => Future[Unit]) { - import ClientRequestObserver.noClientIdKey - - protected[this] val scopedReceiver = statsReceiver.scope("client_request") - protected[this] val unauthorizedReceiver = scopedReceiver.scope("unauthorized") - protected[this] val unauthorizedCounter = scopedReceiver.counter("unauthorized") - - /** - * @param methodName the name of the Service method being called - * @param clientIdScopesOpt optional sequence of scope strings representing the - * originating request's ClientId - */ - override def apply(methodName: String, clientIdScopesOpt: Option[Seq[String]]): Future[Unit] = { - if (observeAuthorizationAttempts) { - scopedReceiver.counter(methodName).incr() - clientIdScopesOpt match { - case Some(clientIdScopes) => - scopedReceiver.scope(methodName).counter(clientIdScopes: _*).incr() - - case None => - scopedReceiver.scope(methodName).counter(noClientIdKey).incr() - } - } - Future.Done - } - - /** - * Increments a counter for unauthorized requests. - */ - def unauthorized(methodName: String, clientIdStr: String): Unit = { - unauthorizedCounter.incr() - unauthorizedReceiver.scope(methodName).counter(clientIdStr).incr() - } - - def authorized(methodName: String, clientIdStr: String): Unit = {} -} - -object NullClientRequestObserver extends ClientRequestObserver(NullStatsReceiver) diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/PermissionModule.docx b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/PermissionModule.docx new file mode 100644 index 000000000..a266ee4aa Binary files /dev/null and b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/PermissionModule.docx differ diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/PermissionModule.scala b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/PermissionModule.scala deleted file mode 100644 index 5ccc171ed..000000000 --- a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/PermissionModule.scala +++ /dev/null @@ -1,233 +0,0 @@ -package com.twitter.servo.request - -import com.twitter.config.yaml.YamlMap -import com.twitter.util.Try - -/** - * Module for defining a set of permissions. This is similar to - * Enumeration in the scala standard library. - * - * To use, instantiate a subclass: - * - * {{{ - * object MyPermissions extends PermissionModule { - * val Eat = create("eat") - * val Drink = create("drink") - * } - * }}} - * - * Permissions only support one kind of authorization, which is that - * you can check whether a holder of permissions has all of the - * permissions in a particular set. - * - * {{{ - * val snack = MyPermissions.Eat - * val dinner = MyPermissions.Eat union MyPermissions.Drink - * val canEat = MyPermissions.Eat - * dinner satisfiedBy canEat // false - * snack satisfiedBy canEat // true - * }}} - * - * Each instance will have its own distinct permission type, so it is - * not possible to confuse the permissions defined in different - * modules. - * - * {{{ - * scala> object P1 extends PermissionModule { val Read = create("read") } - * scala> object P2 extends PermissionModule { val Read = create("read") } - * scala> P1.Read satisfiedBy P2.Read - * error: type mismatch; - * found : P2.Permissions - * required: P1.Permissions - * P1.Read satisfiedBy P2.Read - * }}} - * - * Once an instance has been created, it will not be possible to - * create new permissions. The intention is that all permissions will - * be created at object initialization time. - * - * Each instance also supplies functionality for accessing permissions - * by name, including parsing client permission maps from YAML. - */ -trait PermissionModule { - // This var is used during object initialization to collect all of - // the permissions that are created in the subclass. The lazy - // initializer for `All` will set this to null as a side-effect, so - // that further permission creations are not allowed. - @volatile private[this] var allPerms: Set[String] = Set.empty - - /** - * Create a new Permission with the given name. Note that "*" is a - * reversed string for `All` permissions, thus it can not be - * used as the name of an individual permission. - * - * This method must be called before `All` is accessed. - * The intention is that it should be called as part of - * object initialization. - * - * Note that some methods of PermissionModule access `All`, so it is - * best to create all of your permissions before doing anything - * else. - * - * @throws RuntimeException: If it is called after `All` has been - * initialized. - */ - protected def create(name: String) = { - synchronized { - if (allPerms == null) { - throw new RuntimeException("Permission creation after initialization") - } - - allPerms = allPerms union Set(name) - } - - new Permissions(Set(name)) - } - - /** - * Get a set of permissions with this single permission by name. It - * will return None if there is no permission by that name. - * - * No permissions may be defined after this method is called. - */ - def get(name: String): Option[Permissions] = All.get(name) - - /** - * Get the set of permissions that contains that single permission - * by name. - * - * @throws RuntimeException if there is no defined permission with - * this name. - * - * No permissions may be defined after this method is called. - */ - def apply(name: String): Permissions = - get(name) match { - case None => throw new RuntimeException("Unknown permission: " + name) - case Some(p) => p - } - - /** - * No permissions (required or held) - */ - val Empty: Permissions = new Permissions(Set.empty) - - /** - * All defined permissions. - * - * No permissions may be defined after this value is initialized. - */ - lazy val All: Permissions = { - val p = new Permissions(allPerms) - allPerms = null - p - } - - /** - * Load permissions from a YAML map. - * - * No permissions may be defined after this method is called. - * - * @return a map from client identifier to permission set. - * @throws RuntimeException when the permission from the Map is not defined. - */ - def fromYaml(m: YamlMap): Try[Map[String, Permissions]] = - Try { - m.keys.map { k => - k -> fromSeq((m yamlList k).map { _.toString }) - }.toMap - } - - /** - * Load permissions from map. - * - * No permissions may be defined after this method is called. - * - * @param m a map from client identifier to a set of permission strings - * - * @return a map from client identifier to permission set. - * @throws RuntimeException when the permission from the Map is not defined. - */ - def fromMap(m: Map[String, Seq[String]]): Try[Map[String, Permissions]] = - Try { - m.map { case (k, v) => k -> fromSeq(v) } - } - - /** - * Load permissions from seq. - * - * No permissions may be defined after this method is called. - * - * @param sequence a Seq of permission strings - * - * @return a permission set. - * @throws RuntimeException when the permission is not defined. - */ - def fromSeq(permissionStrings: Seq[String]): Permissions = - permissionStrings.foldLeft(Empty) { (p, v) => - v match { - case "all" if get("all").isEmpty => All - case other => p union apply(other) - } - } - - /** - * Authorizer based on a Permissions for RPC method names. - * @param requiredPermissions - * map of RPC method names to Permissions required for that RPC - * @param clientPermissions - * map of ClientId to Permissions a client has - */ - def permissionBasedAuthorizer( - requiredPermissions: Map[String, Permissions], - clientPermissions: Map[String, Permissions] - ): ClientRequestAuthorizer = - ClientRequestAuthorizer.filtered { (methodName, clientId) => - requiredPermissions.get(methodName) exists { - _ satisfiedBy clientPermissions.getOrElse(clientId, Empty) - } - } - - /** - * A set of permissions. This can represent either permissions that - * are required to perform an action, or permissions that are held - * by a client. - * - * This type cannot be instantiated directly. Use the methods of - * your subclass of PermissionModule to do so. - */ - class Permissions private[PermissionModule] (private[PermissionModule] val permSet: Set[String]) { - - /** - * Does the supplied set of held permissions satisfy the - * requirements of this set of permissions? - * - * For example, if this set of permissions is Set("read"), and the - * other set of permissions is Set("read", "write"), then the - * other set of permissions satisfies this set. - */ - def satisfiedBy(other: Permissions): Boolean = permSet subsetOf other.permSet - - override def equals(other: Any): Boolean = - other match { - case p: Permissions => p.permSet == permSet - case _ => false - } - - override lazy val hashCode: Int = 5 + 37 * permSet.hashCode - - /** - * Get a single permission - */ - def get(permName: String): Option[Permissions] = - if (permSet contains permName) Some(new Permissions(Set(permName))) else None - - /** - * Create a new permission set that holds the permissions of this - * object as well as the permissions of the other object. - */ - def union(other: Permissions): Permissions = new Permissions(permSet union other.permSet) - - override def toString: String = "Permissions(%s)".format(permSet.mkString(", ")) - } -} diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestFilter.docx b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestFilter.docx new file mode 100644 index 000000000..e1cef5fe6 Binary files /dev/null and b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestFilter.docx differ diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestFilter.scala b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestFilter.scala deleted file mode 100644 index e80044c2d..000000000 --- a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestFilter.scala +++ /dev/null @@ -1,120 +0,0 @@ -package com.twitter.servo.request - -import com.twitter.finagle.tracing.TraceId -import com.twitter.servo.util.{FunctionArrow, Effect, FutureArrow, FutureEffect, Observable} -import com.twitter.util.{Future, Try} - -/** - * Useful mixins for request types. - */ -trait HasTraceId { - - /** - * The Finagle TraceId of the request. - */ - def traceId: TraceId -} - -/** - * A collection of RequestFilter factory functions. - * - * type RequestFilter[A] = FutureArrow[A, A] - */ -object RequestFilter { - - /** - * Produce a RequestFilter from a function `A => Future[A]`. - */ - def apply[A](f: A => Future[A]): RequestFilter[A] = FutureArrow(f) - - /** - * Produce a RequestFilter from a function `A => Try[A]`. - * - * The Try is evaluated within a Future. Thus, Throw results are translated - * to `Future.exception`s. - */ - def fromTry[A](f: A => Try[A]): RequestFilter[A] = FutureArrow.fromTry(f) - - /** - * A no-op RequestFilter; it simply returns the request. - * - * This forms a monoid with `append`. - */ - def identity[A]: RequestFilter[A] = FutureArrow.identity - - /** - * Appends two RequestFilters together. - * - * This forms a monoid with 'identity'. - */ - def append[A](a: RequestFilter[A], b: RequestFilter[A]): RequestFilter[A] = - FutureArrow.append(a, b) - - /** - * Compose an ordered series of RequestFilters into a single object. - */ - def all[A](filters: RequestFilter[A]*): RequestFilter[A] = - filters.foldLeft(identity[A])(append) - - /** - * Produce a RequestFilter that applies a side-effect, returning the argument - * request as-is. - */ - def effect[A](effect: Effect[A]): RequestFilter[A] = - FutureArrow.fromFunctionArrow(FunctionArrow.effect(effect)) - - /** - * Produce a RequestFilter that applies a side-effect, returning the argument - * request as-is. - */ - def effect[A](effect: FutureEffect[A]): RequestFilter[A] = FutureArrow.effect(effect) - - /** - * Returns a new request filter where all Futures returned from `a` have their - * `masked` method called - */ - def masked[A](a: RequestFilter[A]): RequestFilter[A] = a.masked - - /** - * Produces a RequestFilter that proxies to one of two others, depending on a - * predicate. - */ - def choose[A]( - predicate: A => Boolean, - ifTrue: RequestFilter[A], - ifFalse: RequestFilter[A] - ): RequestFilter[A] = - FutureArrow.choose(predicate, ifTrue, ifFalse) - - /** - * Guard the application of a filter on a predicate. The filter is applied - * if the predicate returns true, otherwise, the request is simply returned. - */ - def onlyIf[A](predicate: A => Boolean, f: RequestFilter[A]): RequestFilter[A] = - FutureArrow.onlyIf(predicate, f) - - /** - * Produces a RequestFilter that authorizes requests by applying an - * authorization function `A => Future[Unit]`. If the authorizer function - * results in a Future exception, requests are failed. Otherwise, they pass. - */ - def authorized[A <: Observable](authorizer: ClientRequestAuthorizer): RequestFilter[A] = - RequestFilter[A] { request => - authorizer(request.requestName, request.clientIdString) map { _ => - request - } - } - - /** - * Produces a RequestFilter that applies a ClientRequestObserver to requests. - * - * Used to increment counters and track stats for requests. - */ - def observed[A <: Observable](observer: ClientRequestObserver): RequestFilter[A] = - RequestFilter[A] { request => - val clientIdScopesOpt = request.clientIdString map { Seq(_) } - observer(request.requestName, clientIdScopesOpt) map { _ => - request - } - } -} diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestHandler.docx b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestHandler.docx new file mode 100644 index 000000000..3b5f80a2c Binary files /dev/null and b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestHandler.docx differ diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestHandler.scala b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestHandler.scala deleted file mode 100644 index 207999580..000000000 --- a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestHandler.scala +++ /dev/null @@ -1,24 +0,0 @@ -package com.twitter.servo.request - -/** - * A collection of RequestHandler factory functions. - * - * type RequestHandler[-A, +B] = FutureArrow[A, B] - */ -object RequestHandler { - - /** - * Terminate a RequestFilter with a RequestHandler, producing a new handler. - */ - def apply[A, B <: A, C]( - filter: RequestFilter[A], - handler: RequestHandler[B, C] - ): RequestHandler[B, C] = - new RequestHandler[B, C] { - override def apply(request: B) = { - filter(request: A) flatMap { filteredRequest => - handler(filteredRequest.asInstanceOf[B]) - } - } - } -} diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/package.docx b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/package.docx new file mode 100644 index 000000000..08a8b9a8f Binary files /dev/null and b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/package.docx differ diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/package.scala b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/package.scala deleted file mode 100644 index c02b4161c..000000000 --- a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/package.scala +++ /dev/null @@ -1,35 +0,0 @@ -package com.twitter.servo - -import com.twitter.servo.util.FutureArrow - -package object request { - - /** - * RequestFilters provide a mechanism for composing a chain of actions - * (e.g. logging, authentication, replication, etc) to be performed per - * request. The intention is for a series of RequestFilters are terminated in a - * RequestHandler, which returns an object of some response type. - * - * Upon completion of a filter's work, the convention is to either: - * - * a) Return a Future of a request object of type `A` to be passed to the next - * member of the filter/handler chain. - * b) Return a Future response outright in cases where request handling must - * be halted at the current filter (i.e. returning `Future.exception(...)`. - * - * @tparam A - * A type encapsulating all context and data required to satisfy a request. - */ - type RequestFilter[A] = FutureArrow[A, A] - - /** - * A handler of requests parameterized on the request and response types. - * - * @tparam A - * A type encapsulating all context and data required to satisfy a request. - * - * @tparam B - * A response type. - */ - type RequestHandler[-A, +B] = FutureArrow[A, B] -} diff --git a/tweetypie/servo/util/BUILD b/tweetypie/servo/util/BUILD deleted file mode 100644 index b27c20631..000000000 --- a/tweetypie/servo/util/BUILD +++ /dev/null @@ -1,6 +0,0 @@ -target( - tags = ["bazel-compatible"], - dependencies = [ - "tweetypie/servo/util/src/main/scala", - ], -) diff --git a/tweetypie/servo/util/BUILD.docx b/tweetypie/servo/util/BUILD.docx new file mode 100644 index 000000000..1eb11a03d Binary files /dev/null and b/tweetypie/servo/util/BUILD.docx differ diff --git a/tweetypie/servo/util/src/main/scala/BUILD b/tweetypie/servo/util/src/main/scala/BUILD deleted file mode 100644 index 2a6d5f1c5..000000000 --- a/tweetypie/servo/util/src/main/scala/BUILD +++ /dev/null @@ -1,53 +0,0 @@ -EXCEPTION_SOURCES = [ - "com/twitter/servo/util/Effect.scala", - "com/twitter/servo/util/ExceptionCounter.scala", - "com/twitter/servo/util/Gate.scala", - "com/twitter/servo/util/ThrowableHelper.scala", - "com/twitter/servo/util/package.scala", -] - -scala_library( - sources = ["**/*.scala"] + exclude_globs(EXCEPTION_SOURCES), - platform = "java8", - provides = scala_artifact( - org = "com.twitter", - name = "servo-util", - repo = artifactory, - ), - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - ":exception", - "3rdparty/jvm/com/google/guava", - "3rdparty/jvm/com/google/inject:guice", - "finagle/finagle-core/src/main", - "finagle/finagle-mux/src/main/scala", - "scrooge/scrooge-core", - "scrooge/scrooge-serializer", - "util-internal/scribe", - "util/util-logging/src/main/scala/com/twitter/logging", - "util/util-stats/src/main/scala", - ], - exports = [ - ":exception", - "util/util-logging/src/main/scala/com/twitter/logging", - ], -) - -scala_library( - name = "exception", - sources = EXCEPTION_SOURCES, - compiler_option_sets = ["fatal_warnings"], - platform = "java8", - provides = scala_artifact( - org = "com.twitter", - name = "servo-util-exception", - repo = artifactory, - ), - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "util/util-core:util-core-util", - "util/util-stats/src/main/scala", - ], -) diff --git a/tweetypie/servo/util/src/main/scala/BUILD.docx b/tweetypie/servo/util/src/main/scala/BUILD.docx new file mode 100644 index 000000000..683b59f77 Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/BUILD.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Lens.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Lens.docx new file mode 100644 index 000000000..773601df2 Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Lens.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Lens.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Lens.scala deleted file mode 100644 index 9396c38f7..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Lens.scala +++ /dev/null @@ -1,147 +0,0 @@ -package com.twitter.servo.data - -import scala.language.existentials - -object Lens { - private[this] val _identity = iso[Any, Any](x => x, x => x) - - /** - * The identity lens. - */ - def identity[A] = _identity.asInstanceOf[Lens[A, A]] - - /** - * Convenience method for creating lenses with slightly more - * efficient setters. - */ - def checkEq[A, B](get: A => B, set: (A, B) => A) = Lens[A, B](get, set).checkEq - - /** - * Create a lens from an isomorphism. - */ - def iso[A, B](to: A => B, from: B => A) = Lens[A, B](to, (_, x) => from(x)) - - /** - * Using multiple lenses, copy multiple fields from one object to another, returning - * the updated result. - */ - def copyAll[A](lenses: Lens[A, _]*)(src: A, dst: A): A = - lenses.foldLeft(dst) { (t, l) => - l.copy(src, t) - } - - /** - * setAll can be used to set multiple values using multiple lenses on the same input - * value in one call, which is more readable than nested calls. For example, say - * that we have lenses (lensX: Lens[A, X]), (lensY: Lens[A, Y]), and (lensZ: Lens[A, Z]), - * then instead of writing: - * - * lensX.set(lensY.set(lensZ.set(a, z), y), x) - * - * you can write: - * - * Lens.setAll(a, lensX -> x, lensY -> y, lensZ -> z) - */ - def setAll[A](a: A, lensAndValues: ((Lens[A, B], B) forSome { type B })*): A = - lensAndValues.foldLeft(a) { case (a, (l, b)) => l.set(a, b) } - - /** - * Combines two lenses into one that gets and sets a tuple of values. - */ - def join[A, B, C](lensB: Lens[A, B], lensC: Lens[A, C]): Lens[A, (B, C)] = - Lens[A, (B, C)]( - a => (lensB.get(a), lensC.get(a)), - { case (a, (b, c)) => lensC.set(lensB.set(a, b), c) } - ) - - /** - * Combines three lenses into one that gets and sets a tuple of values. - */ - def join[A, B, C, D]( - lensB: Lens[A, B], - lensC: Lens[A, C], - lensD: Lens[A, D] - ): Lens[A, (B, C, D)] = - Lens[A, (B, C, D)]( - a => (lensB.get(a), lensC.get(a), lensD.get(a)), - { case (a, (b, c, d)) => lensD.set(lensC.set(lensB.set(a, b), c), d) } - ) -} - -/** - * A Lens is a first-class getter/setter. The value of lenses is that - * they can be composed with other operations. - * - * Note that it is up to you to ensure that the functions you pass to - * Lens obey the following laws for all inputs: - * - * a => set(a, get(a)) == a - * (a, b) => get(set(a, b)) == b - * (a, b, b1) => set(set(a, b), b1) == set(a, b1) - * - * The intuition for the name Lens[A, B] is that you are "viewing" A - * through a Lens that lets you see (and manipulate) a B. - * - * See e.g. - * http://stackoverflow.com/questions/5767129/lenses-fclabels-data-accessor-which-library-for-structure-access-and-mutatio#answer-5769285 - * for a more in-depth explanation of lenses. - */ -case class Lens[A, B](get: A => B, set: (A, B) => A) { - - /** - * Get the field. - */ - def apply(a: A) = get(a) - - /** - * Compose with another lens, such that the setter updates the - * outermost structure, and the getter gets the innermost structure. - */ - def andThen[C](next: Lens[B, C]) = - Lens(get andThen next.get, (a: A, c: C) => set(a, next.set(get(a), c))) - - /** - * An operator alias for `andThen`. - */ - def >>[C](next: Lens[B, C]) = andThen(next) - - /** - * Lift the function on the viewed value to a function on the outer - * value. - */ - def update(f: B => B): A => A = a => set(a, f(get(a))) - - /** - * Copies the field from one object to another. - */ - def copy(src: A, dst: A): A = set(dst, get(src)) - - /** - * Lift a mutation of the viewed value to a transform of the - * container. (E.g. a Mutation[Seq[UrlEntity]] to a Mutation[Tweet]) - */ - def mutation(m: Mutation[B]) = - Mutation[A] { a => - m(get(a)) map { set(a, _) } - } - - /** - * Create a new lens whose setter makes sure that the update would - * change the value. - * - * This should not change the meaning of the lens, but can possibly - * make it more efficient by avoiding copies when performing no-op - * sets. - * - * This is only worthwhile when the getter and equality comparison - * are cheap compared to the setter. - */ - def checkEq = Lens[A, B](get, (a, b) => if (get(a) == b) a else set(a, b)) - - /** - * Combines this lens and the given lens into one that gets and sets a tuple - * of values. - */ - def join[C](right: Lens[A, C]): Lens[A, (B, C)] = - Lens.join(this, right) -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Mutation.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Mutation.docx new file mode 100644 index 000000000..a25272465 Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Mutation.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Mutation.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Mutation.scala deleted file mode 100644 index 78e08df74..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Mutation.scala +++ /dev/null @@ -1,268 +0,0 @@ -package com.twitter.servo.data - -import com.twitter.util.{Return, Throw, Try} -import com.twitter.finagle.stats.{Counter, StatsReceiver} -import com.twitter.servo.util.{Effect, Gate} - -object Mutation { - - /** - * A mutation that ignores its input and always returns the given - * value as new. Use checkEq if this value could be the same as the - * input. - */ - def const[T](x: T) = Mutation[T] { _ => - Some(x) - } - - private[this] val _unit = Mutation[Any] { _ => - None - } - - /** - * A "no-op" mutation that will never alter the value. - * - * For any Mutations A, (A also unit) == (unit also A) == A. - * - * Forms a monoid with also as the operation. - */ - def unit[A]: Mutation[A] = _unit.asInstanceOf[Mutation[A]] - - /** - * Makes a Mutation out of a function. - */ - def apply[A](f: A => Option[A]): Mutation[A] = - new Mutation[A] { - override def apply(x: A) = f(x) - } - - /** - * Lift a function that returns the same type to a Mutation, using - * the type's notion of equality to detect when the mutation has - * not changed the value. - */ - def fromEndo[A](f: A => A): Mutation[A] = - Mutation[A] { x => - val y = f(x) - if (y == x) None else Some(y) - } - - /** - * Lift a partial function from A to A to a mutation. - */ - def fromPartial[A](f: PartialFunction[A, A]): Mutation[A] = Mutation[A](f.lift) - - /** - * Creates a new Mutation that applies all the given mutations in order. - */ - def all[A](mutations: Seq[Mutation[A]]): Mutation[A] = - mutations.foldLeft(unit[A])(_ also _) -} - -/** - * A Mutation encapsulates a computation that may optionally "mutate" a value, where - * "mutate" should be interpreted in the stateless/functional sense of making a copy with a - * a change. If the value is unchanged, the mutation should return None. When mutations are - * composed with `also`, the final result will be None iff no mutation actually changed the - * value. - * - * Forms a monoid with Mutation.unit as unit and `also` as the - * combining operation. - * - * This abstraction is useful for composing changes to a value when - * some action (such as updating a cache) should be performed if the - * value has changed. - */ -trait Mutation[A] extends (A => Option[A]) { - - /** - * Convert this mutation to a function that always returns a - * result. If the mutation has no effect, it returns the original - * input. - * - * (convert to an endofunction on A) - */ - lazy val endo: A => A = - x => - apply(x) match { - case Some(v) => v - case None => x - } - - /** - * Apply this mutation, and then apply the next mutation to the - * result. If this mutation leaves the value unchanged, the next - * mutation is invoked with the original input. - */ - def also(g: Mutation[A]): Mutation[A] = - Mutation[A] { x => - apply(x) match { - case None => g(x) - case someY @ Some(y) => - g(y) match { - case some @ Some(_) => some - case None => someY - } - } - } - - /** - * Apply this mutation, but refuse to return an altered value. This - * yields all of the effects of this mutation without affecting the - * final result. - */ - def dark: Mutation[A] = Mutation[A] { x => - apply(x); None - } - - /** - * Convert a Mutation on A to a Mutation on B by way of a pair of functions for - * converting from B to A and back. - */ - def xmap[B](f: B => A, g: A => B): Mutation[B] = - Mutation[B](f andThen this andThen { _ map g }) - - /** - * Converts a Mutation on A to a Mutation on Try[A], where the Mutation is only applied - * to Return values and any exceptions caught by the underying function are caught and - * returned as Some(Throw(_)) - */ - def tryable: Mutation[Try[A]] = - Mutation[Try[A]] { - case Throw(x) => Some(Throw(x)) - case Return(x) => - Try(apply(x)) match { - case Throw(y) => Some(Throw(y)) - case Return(None) => None - case Return(Some(y)) => Some(Return(y)) - } - } - - /** - * Perform this mutation only if the provided predicate returns true - * for the input. - */ - def onlyIf(predicate: A => Boolean): Mutation[A] = - Mutation[A] { x => - if (predicate(x)) this(x) else None - } - - /** - * Performs this mutation only if the given gate returns true. - */ - def enabledBy(enabled: Gate[Unit]): Mutation[A] = - enabledBy(() => enabled()) - - /** - * Performs this mutation only if the given function returns true. - */ - def enabledBy(enabled: () => Boolean): Mutation[A] = - onlyIf { _ => - enabled() - } - - /** - * A new mutation that returns the same result as this mutation, - * and additionally calls the specified Effect. - */ - def withEffect(effect: Effect[Option[A]]): Mutation[A] = - Mutation[A](this andThen effect.identity) - - /** - * Perform an equality check when a value is returned from the - * mutation. If the values are equal, then the mutation will yield - * None. - * - * This is useful for two reasons: - * - * 1. Any effects that are conditional upon mutation will not occur - * when the values are equal (e.g. updating a cache) - * - * 2. When using a Lens to lift a mutation to a mutation on a - * larger structure, checking equality on the smaller structure - * can prevent unnecessary copies of the larger structure. - */ - def checkEq = Mutation[A] { x => - this(x) match { - case someY @ Some(y) if y != x => someY - case _ => None - } - } - - /** - * Converts this mutation to a mutation of a different type, using a Lens to - * convert between types. - */ - def lensed[B](lens: Lens[B, A]): Mutation[B] = - Mutation[B](b => this(lens(b)).map(lens.set(b, _))) - - /** - * Convert this mutation to a mutation of a Seq of its type. It will - * yield None if no values are changed, or a Seq of both the changed - * and unchanged values if any value is mutated. - */ - def liftSeq = Mutation[Seq[A]] { xs => - var changed = false - val detectChange = Effect.fromPartial[Option[A]] { case Some(_) => changed = true } - val mutated = xs map (this withEffect detectChange).endo - if (changed) Some(mutated) else None - } - - /** - * Convert this mutation to a mutation of a Option of its type. It will yield - * None if the value is not changed, or a Some(Some(_)) if the value is mutated. - */ - def liftOption = Mutation[Option[A]] { - case None => None - case Some(x) => - this(x) match { - case None => None - case Some(y) => Some(Some(y)) - } - } - - /** - * Convert this mutation to a mutation of the values of a Map. It will - * yield None if no values are changed, or a Map with both the changed - * and unchanged values if any value is mutated. - */ - def liftMapValues[K] = Mutation[Map[K, A]] { m => - var changed = false - val detectChange = Effect.fromPartial[Option[A]] { case Some(_) => changed = true } - val f = (this withEffect detectChange).endo - val mutated = m map { case (k, v) => (k, f(v)) } - if (changed) Some(mutated) else None - } - - /** - * Return a new mutation that returns the same result as this - * mutation, as well as incrementing the given counter when the - * value is mutated. - */ - def countMutations(c: Counter) = - this withEffect { Effect.fromPartial { case Some(_) => c.incr() } } - - /** - * Wrap a mutation in stats with the following counters: - * - no-op (returned value was the same as the input) - * - none (mutation returned none) - * - mutated (mutation modified the result) - */ - def withStats(stats: StatsReceiver): Mutation[A] = { - val none = stats.counter("none") - val noop = stats.counter("noop") - val mutated = stats.counter("mutated") - input: A => { - val result = apply(input) - result.fold(none.incr()) { output => - if (output == input) { - noop.incr() - } else { - mutated.incr() - } - } - result - } - } - -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/Forked.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/Forked.docx new file mode 100644 index 000000000..3d104b88b Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/Forked.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/Forked.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/Forked.scala deleted file mode 100644 index 55d031784..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/Forked.scala +++ /dev/null @@ -1,120 +0,0 @@ -/** - * Provides the ability to partially tee traffic to a secondary - * service. - * - * This code was originally written to provide a way to provide - * production traffic to the TweetyPie staging cluster, selecting a - * consistent subset of tweet ids, to enable a production-like cache - * hit rate with a much smaller cache. - */ -package com.twitter.servo.forked - -import com.twitter.servo.data.Lens - -object Forked { - - /** - * A strategy for executing forked actions. - */ - type Executor = (() => Unit) => Unit - - /** - * Directly execute the forked action. - */ - val inlineExecutor: Executor = f => f() - - /** - * Produce objects of type A to send to a secondary target. - * Returning None signifies that nothing should be forked. - */ - type Fork[A] = A => Option[A] - - /** - * Fork the input unchanged, only when it passes the specified - * predicate. - * - * For instance, if your service has a get() method - */ - def forkWhen[T](f: T => Boolean): Fork[T] = - a => if (f(a)) Some(a) else None - - /** - * Fork a subset of the elements of the Seq, based on the supplied - * predicate. If the resulting Seq is empty, the secondary action - * will not be executed. - */ - def forkSeq[T](f: T => Boolean): Fork[Seq[T]] = { xs => - val newXs = xs filter f - if (newXs.nonEmpty) Some(newXs) else None - } - - /** - * Apply forking through lens. - */ - def forkLens[A, B](lens: Lens[A, B], f: Fork[B]): Fork[A] = - a => f(lens(a)).map(lens.set(a, _)) - - /** - * A factory for building actions that will partially tee their input - * to a secondary target. The executor is parameterized to make the - * execution strategy independent from the forking logic. - */ - def toSecondary[S](secondary: S, executor: Executor): S => Forked[S] = - primary => - new Forked[S] { - - /** - * Tee a subset of requests defined by the forking function to the - * secondary service. - */ - def apply[Q, R](fork: Forked.Fork[Q], action: (S, Q) => R): Q => R = { req => - fork(req) foreach { req => - executor(() => action(secondary, req)) - } - action(primary, req) - } - } - - /** - * A forked action builder that bypasses the forking altogether and - * just calls the supplied action on a service. - * - * This is useful for configurations that will sometimes have fork - * targets defined and sometimes not. - */ - def notForked[S]: S => Forked[S] = - service => - new Forked[S] { - def apply[Q, R](unusedFork: Forked.Fork[Q], action: (S, Q) => R): Q => R = - action(service, _) - } -} - -/** - * Factory for forking functions, primarily useful for sending a copy - * of a stream of requests to a secondary service. - */ -trait Forked[S] { - import Forked._ - - /** - * Fork an action that takes two parameters, forking only on the - * first parameter, passing the second unchanged. - */ - def first[Q1, Q2, R]( - fork: Fork[Q1], - action: S => (Q1, Q2) => R - ): (Q1, Q2) => R = { - val f = - apply[(Q1, Q2), R]( - fork = p => - fork(p._1) map { q1 => - (q1, p._2) - }, - action = (svc, p) => action(svc)(p._1, p._2) - ) - (q1, q2) => f((q1, q2)) - } - - def apply[Q, R](fork: Fork[Q], action: (S, Q) => R): Q => R -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/QueueExecutor.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/QueueExecutor.docx new file mode 100644 index 000000000..451ca678b Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/QueueExecutor.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/QueueExecutor.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/QueueExecutor.scala deleted file mode 100644 index 5b2949e45..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/QueueExecutor.scala +++ /dev/null @@ -1,82 +0,0 @@ -package com.twitter.servo.forked - -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.logging.Logger -import com.twitter.servo.util.ExceptionCounter -import com.twitter.util.{Duration, Time, Local, TimeoutException} -import java.util.concurrent.{LinkedBlockingQueue, TimeUnit, CountDownLatch} - -/** - * A forking action executor that executes the actions in a separate - * thread, using a bounded queue as the communication channel. If the - * queue is full (the secondary thread is slow to drain it), then the - * items will be dropped rather than enqueued. - */ -class QueueExecutor(maxQueueSize: Int, stats: StatsReceiver) extends Forked.Executor { - private val forkExceptionsCounter = new ExceptionCounter(stats) - private val enqueuedCounter = stats.counter("forked_actions_enqueued") - private val droppedCounter = stats.counter("forked_actions_dropped") - private val log = Logger.get("Forked.QueueExecutor") - - @volatile private var isStopped = false - private val releaseCountDownLatch = new CountDownLatch(1) - private val queue = new LinkedBlockingQueue[() => Unit](maxQueueSize) - private val thread = new Thread { - override def run(): Unit = { - while (!isStopped) { - try { - queue.take()() - } catch { - // Ignore interrupts from other threads - case _: InterruptedException => - // TODO: handle fatal errors more seriously - case e: Throwable => - forkExceptionsCounter(e) - log.error(e, "Executing queued action") - } - } - releaseCountDownLatch.countDown() - } - } - - thread.setDaemon(true) - thread.start() - - /** - * Interrupts the thread and directs it to stop processing. This - * method will not return until the processing thread has finished - * or the timeout occurs. Ok to call multiple times. - */ - def release(timeout: Duration): Unit = { - if (!isStopped) { - isStopped = true - thread.interrupt() - releaseCountDownLatch.await(timeout.inMilliseconds, TimeUnit.MILLISECONDS) || { - throw new TimeoutException(timeout.toString) - } - } - } - - /** - * Blocks until all the items currently in the queue have been - * executed, or the timeout occurs. Mostly useful during testing. - */ - def waitForQueueToDrain(timeout: Duration): Unit = { - val latch = new CountDownLatch(1) - val start = Time.now - queue.offer(() => latch.countDown(), timeout.inMilliseconds, TimeUnit.MILLISECONDS) - val remaining = timeout - (Time.now - start) - latch.await(remaining.inMilliseconds, TimeUnit.MILLISECONDS) || { - throw new TimeoutException(remaining.toString) - } - } - - /** - * Queue the action for execution in this object's thread. - */ - def apply(action: () => Unit) = - if (queue.offer(Local.closed(action))) - enqueuedCounter.incr() - else - droppedCounter.incr() -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/gate/RateLimitingGate.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/gate/RateLimitingGate.docx new file mode 100644 index 000000000..3741501a6 Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/gate/RateLimitingGate.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/gate/RateLimitingGate.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/gate/RateLimitingGate.scala deleted file mode 100644 index 5cee23f22..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/gate/RateLimitingGate.scala +++ /dev/null @@ -1,64 +0,0 @@ -package com.twitter.servo.gate - -import com.google.common.annotations.VisibleForTesting -import com.google.common.util.concurrent.RateLimiter -import com.twitter.servo.util -import java.util.concurrent.TimeUnit - -/** - * A Rate Limiting Gate backed by com.google.common.util.concurrent.RateLimiter - * http://docs.guava-libraries.googlecode.com/git/javadoc/com/google/common/util/concurrent/RateLimiter.html - */ -object RateLimitingGate { - - /** - * Creates a Gate[Int] that returns true if acquiring number of permits - * from the ratelimiter succeeds. - */ - def weighted(permitsPerSecond: Double): util.Gate[Int] = { - val rateLimiter: RateLimiter = RateLimiter.create(permitsPerSecond) - util.Gate { rateLimiter.tryAcquire(_, 0, TimeUnit.SECONDS) } - } - - /** - * Creates a Gate[Unit] that returns true if acquiring a permit from the ratelimiter succeeds. - */ - def uniform(permitsPerSecond: Double): util.Gate[Unit] = { - weighted(permitsPerSecond) contramap { _ => - 1 - } - } - - /** - * Creates a Gate[Unit] with floating limit. Could be used with deciders. - */ - def dynamic(permitsPerSecond: => Double): util.Gate[Unit] = - dynamic(RateLimiter.create, permitsPerSecond) - - @VisibleForTesting - def dynamic( - rateLimiterFactory: Double => RateLimiter, - permitsPerSecond: => Double - ): util.Gate[Unit] = { - val rateLimiter: RateLimiter = rateLimiterFactory(permitsPerSecond) - util.Gate { _ => - val currentRate = permitsPerSecond - if (rateLimiter.getRate != currentRate) { - rateLimiter.setRate(currentRate) - } - rateLimiter.tryAcquire(0L, TimeUnit.SECONDS) - } - } -} - -@deprecated("Use RateLimitingGate.uniform", "2.8.2") -class RateLimitingGate[T](permitsPerSecond: Double) extends util.Gate[T] { - private[this] val rateLimiter: RateLimiter = RateLimiter.create(permitsPerSecond) - - /** - * If a "permit" is available, this method acquires it and returns true - * Else returns false immediately without waiting - */ - override def apply[U](u: U)(implicit asT: <:<[U, T]): Boolean = - rateLimiter.tryAcquire(1, 0, TimeUnit.SECONDS) -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Availability.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Availability.docx new file mode 100644 index 000000000..f3aa3ea71 Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Availability.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Availability.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Availability.scala deleted file mode 100644 index a23e9ed5f..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Availability.scala +++ /dev/null @@ -1,43 +0,0 @@ -package com.twitter.servo.util - -/** - * Provides functions for computing prescribed feature availability based - * on some runtime condition(s). (e.g. watermark values) - */ -object Availability { - - /** - * Stay at 100% available down to a high watermark success rate. Then - * between high and low watermarks, dial down availability to a provided - * minimum. Never go below this level because we need some requests to - * track the success rate going back up. - * - * NOTE: watermarks and minAvailability must be between 0 and 1. - */ - def linearlyScaled( - highWaterMark: Double, - lowWaterMark: Double, - minAvailability: Double - ): Double => Double = { - require( - highWaterMark >= lowWaterMark && highWaterMark <= 1, - s"highWaterMark ($highWaterMark) must be between lowWaterMark ($lowWaterMark) and 1, inclusive" - ) - require( - lowWaterMark >= minAvailability && lowWaterMark <= 1, - s"lowWaterMark ($lowWaterMark) must be between minAvailability ($minAvailability) and 1, inclusive" - ) - require( - minAvailability > 0 && minAvailability < 1, - s"minAvailability ($minAvailability) must be between 0 and 1, exclusive" - ) - - { - case sr if sr >= highWaterMark => 1.0 - case sr if sr <= lowWaterMark => minAvailability - case sr => - val linearFraction = (sr - lowWaterMark) / (highWaterMark - lowWaterMark) - minAvailability + (1.0 - minAvailability) * linearFraction - } - } -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Average.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Average.docx new file mode 100644 index 000000000..8e84d549a Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Average.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Average.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Average.scala deleted file mode 100644 index 9aab6f25c..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Average.scala +++ /dev/null @@ -1,116 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.util.{Duration, Time} - -/** - * Calculate a running average of data points - */ -trait Average { - def value: Option[Double] - def record(dataPoint: Double, count: Double = 1.0): Unit -} - -/** - * Calculates a running average using two windows of data points, a - * current one and a previous one. When the current window is full, - * it is rolled into the previous and the current window starts - * filling up again. - */ -class WindowedAverage(val windowSize: Long, initialValue: Option[Double] = None) extends Average { - private[this] val average = new ResettableAverage(None) - private[this] var lastAverage: Option[Double] = initialValue - - def value: Option[Double] = - synchronized { - lastAverage match { - case Some(lastAvg) => - // currentCount can temporarily exceed windowSize - val currentWeight = (average.count / windowSize) min 1.0 - Some((1.0 - currentWeight) * lastAvg + currentWeight * average.value.getOrElse(0.0)) - case None => average.value - } - } - - def record(dataPoint: Double, count: Double = 1.0): Unit = - synchronized { - if (average.count >= windowSize) { - lastAverage = value - average.reset() - } - average.record(dataPoint, count) - } -} - -/** - * Calculates a recent average using the past windowDuration of data points. Old average is mixed - * with the new average during windowDuration. If new data points are not recorded the average - * will revert towards defaultAverage. - */ -class RecentAverage( - val windowDuration: Duration, - val defaultAverage: Double, - currentTime: Time = Time.now // passing in start time to simplify scalacheck tests -) extends Average { - private[this] val default = Some(defaultAverage) - private[this] val currentAverage = new ResettableAverage(Some(defaultAverage)) - private[this] var prevAverage: Option[Double] = None - private[this] var windowStart: Time = currentTime - - private[this] def mix(fractOfV2: Double, v1: Double, v2: Double): Double = { - val f = 0.0.max(1.0.min(fractOfV2)) - (1.0 - f) * v1 + f * v2 - } - - private[this] def timeFract: Double = - 0.0.max(windowStart.untilNow.inNanoseconds.toDouble / windowDuration.inNanoseconds) - - def value: Some[Double] = - synchronized { - timeFract match { - case f if f < 1.0 => - Some(mix(f, prevAverage.getOrElse(defaultAverage), currentAverage.getValue)) - case f if f < 2.0 => Some(mix(f - 1.0, currentAverage.getValue, defaultAverage)) - case f => default - } - } - - def getValue: Double = value.get - - def record(dataPoint: Double, count: Double = 1.0): Unit = - synchronized { - // if we're past windowDuration, roll average - val now = Time.now - if (now - windowStart > windowDuration) { - prevAverage = value - windowStart = now - currentAverage.reset() - } - currentAverage.record(dataPoint, count) - } - - override def toString = - s"RecentAverage(window=$windowDuration, default=$defaultAverage, " + - s"prevValue=$prevAverage, value=$value, timeFract=$timeFract)" -} - -private class ResettableAverage[DoubleOpt <: Option[Double]](defaultAverage: DoubleOpt) - extends Average { - private[this] var currentCount: Double = 0 - private[this] var currentValue: Double = 0 - def reset(): Unit = { - currentCount = 0 - currentValue = 0 - } - def record(dataPoint: Double, count: Double): Unit = { - currentCount += count - currentValue += dataPoint - } - def value: Option[Double] = - if (currentCount == 0) defaultAverage - else Some(currentValue / currentCount) - - def getValue(implicit ev: DoubleOpt <:< Some[Double]): Double = - value.get - - def count: Double = currentCount -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/BatchExecutor.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/BatchExecutor.docx new file mode 100644 index 000000000..0c45e710b Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/BatchExecutor.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/BatchExecutor.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/BatchExecutor.scala deleted file mode 100644 index 827e371c2..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/BatchExecutor.scala +++ /dev/null @@ -1,218 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.logging.Logger -import com.twitter.util.{Timer, Duration, Promise, Future, Return, Throw} -import java.util.concurrent.CancellationException -import scala.collection.mutable.ArrayBuffer - -@deprecated("Use `Future.batched`", "2.6.1") -trait BatchExecutorFactory { - def apply[In, Out](f: Seq[In] => Future[Seq[Out]]): BatchExecutor[In, Out] -} - -/** - * A BatchExecutorFactory allows you to specify the criteria in which a batch - * should be flushed prior to constructing a BatchExecutor. A BatchExecutor asks for a - * function that takes a Seq[In] and returns a Future[Seq[Out]], in return it gives you - * a `In => Future[Out]` interface so that you can incrementally submit tasks to be - * performed when the criteria for batch flushing is met. - * - * Examples: - * val batcherFactory = BatchExecutorFactory(sizeThreshold = 10) - * def processBatch(reqs: Seq[Request]): Future[Seq[Response]] - * val batcher = batcherFactory(processBatch) - * - * val response: Future[Response] = batcher(new Request) - * - * the batcher will wait until 10 requests have been submitted, then delegate - * to the processBatch method to compute the responses. - * - * you can also construct a BatchExecutor that has a time-based threshold or both: - * val batcherFactory = BatchExecutorFactory( - * sizeThreshold = 10, timeThreshold = 10.milliseconds, timer = new JavaTimer(true)) - * - * A batcher's size can be controlled at runtime through a bufSizeFraction function - * that should return a float between 0.0 and 1.0 that represents the fractional size - * of the sizeThreshold that should be used for the next batch to be collected. - * - */ -@deprecated("Use `Future.batched`", "2.6.1") -object BatchExecutorFactory { - final val DefaultBufSizeFraction = 1.0f - lazy val instant = sized(1) - - def sized(sizeThreshold: Int): BatchExecutorFactory = new BatchExecutorFactory { - override def apply[In, Out](f: Seq[In] => Future[Seq[Out]]) = { - new BatchExecutor(sizeThreshold, None, f, DefaultBufSizeFraction) - } - } - - def timed(timeThreshold: Duration, timer: Timer): BatchExecutorFactory = - sizedAndTimed(Int.MaxValue, timeThreshold, timer) - - def sizedAndTimed( - sizeThreshold: Int, - timeThreshold: Duration, - timer: Timer - ): BatchExecutorFactory = - dynamicSizedAndTimed(sizeThreshold, timeThreshold, timer, DefaultBufSizeFraction) - - def dynamicSizedAndTimed( - sizeThreshold: Int, - timeThreshold: Duration, - timer: Timer, - bufSizeFraction: => Float - ): BatchExecutorFactory = new BatchExecutorFactory { - override def apply[In, Out](f: (Seq[In]) => Future[Seq[Out]]) = { - new BatchExecutor(sizeThreshold, Some(timeThreshold, timer), f, bufSizeFraction) - } - } -} - -@deprecated("Use `Future.batched`", "2.6.1") -class BatchExecutor[In, Out] private[util] ( - maxSizeThreshold: Int, - timeThreshold: Option[(Duration, Timer)], - f: Seq[In] => Future[Seq[Out]], - bufSizeFraction: => Float) { batcher => - - private[this] class ScheduledFlush(after: Duration, timer: Timer) { - @volatile private[this] var cancelled = false - private[this] val task = timer.schedule(after.fromNow) { flush() } - - def cancel(): Unit = { - cancelled = true - task.cancel() - } - - private[this] def flush(): Unit = { - val doAfter = batcher.synchronized { - if (!cancelled) { - flushBatch() - } else { () => - () - } - } - - doAfter() - } - } - - private[this] val log = Logger.get("BatchExecutor") - - // operations on these are synchronized on `this` - private[this] val buf = new ArrayBuffer[(In, Promise[Out])](maxSizeThreshold) - private[this] var scheduled: Option[ScheduledFlush] = None - private[this] var currentBufThreshold = newBufThreshold - - private[this] def shouldSchedule = timeThreshold.isDefined && scheduled.isEmpty - - private[this] def currentBufFraction = { - val fract = bufSizeFraction - - if (fract > 1.0f) { - log.warning( - "value returned for BatchExecutor.bufSizeFraction (%f) was > 1.0f, using 1.0", - fract - ) - 1.0f - } else if (fract < 0.0f) { - log.warning( - "value returned for BatchExecutor.bufSizeFraction (%f) was negative, using 0.0f", - fract - ) - 0.0f - } else { - fract - } - } - - private[this] def newBufThreshold = { - val size: Int = math.round(currentBufFraction * maxSizeThreshold) - - if (size < 1) { - 1 - } else if (size >= maxSizeThreshold) { - maxSizeThreshold - } else { - size - } - } - - def apply(t: In): Future[Out] = { - enqueue(t) - } - - private[this] def enqueue(t: In): Future[Out] = { - val promise = new Promise[Out] - val doAfter = synchronized { - buf.append((t, promise)) - if (buf.size >= currentBufThreshold) { - flushBatch() - } else { - scheduleFlushIfNecessary() - () => () - } - } - - doAfter() - promise - } - - private[this] def scheduleFlushIfNecessary(): Unit = { - timeThreshold foreach { - case (duration, timer) => - if (shouldSchedule) { - scheduled = Some(new ScheduledFlush(duration, timer)) - } - } - } - - private[this] def flushBatch(): () => Unit = { - // this must be executed within a synchronize block - val prevBatch = new ArrayBuffer[(In, Promise[Out])](buf.length) - buf.copyToBuffer(prevBatch) - buf.clear() - - scheduled foreach { _.cancel() } - scheduled = None - currentBufThreshold = newBufThreshold // set the next batch's size - - () => - try { - executeBatch(prevBatch) - } catch { - case e: Throwable => - log.warning(e, "unhandled exception caught in BatchExecutor: %s", e.toString) - } - } - - private[this] def executeBatch(batch: Seq[(In, Promise[Out])]): Unit = { - val uncancelled = batch filter { - case (in, p) => - p.isInterrupted match { - case Some(_cause) => - p.setException(new CancellationException) - false - case None => true - } - } - - val ins = uncancelled map { case (in, _) => in } - // N.B. intentionally not linking cancellation of these promises to the execution of the batch - // because it seems that in most cases you would be canceling mostly uncanceled work for an - // outlier. - val promises = uncancelled map { case (_, promise) => promise } - - f(ins) respond { - case Return(outs) => - (outs zip promises) foreach { - case (out, p) => - p() = Return(out) - } - case Throw(e) => - val t = Throw(e) - promises foreach { _() = t } - } - } -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CancelledExceptionExtractor.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CancelledExceptionExtractor.docx new file mode 100644 index 000000000..94bde4726 Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CancelledExceptionExtractor.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CancelledExceptionExtractor.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CancelledExceptionExtractor.scala deleted file mode 100644 index ca3ebe151..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CancelledExceptionExtractor.scala +++ /dev/null @@ -1,21 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.finagle.mux.stats.MuxCancelledCategorizer -import com.twitter.finagle.stats.CancelledCategorizer -import com.twitter.util.FutureCancelledException -import com.twitter.util.Throwables.RootCause - -/** - * Helper that consolidates various ways (nested and top level) cancel exceptions can be detected. - */ -object CancelledExceptionExtractor { - def unapply(e: Throwable): Option[Throwable] = { - e match { - case _: FutureCancelledException => Some(e) - case MuxCancelledCategorizer(cause) => Some(cause) - case CancelledCategorizer(cause) => Some(cause) - case RootCause(CancelledExceptionExtractor(cause)) => Some(cause) - case _ => None - } - } -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CounterInitializingStatsReceiver.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CounterInitializingStatsReceiver.docx new file mode 100644 index 000000000..21deadc6b Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CounterInitializingStatsReceiver.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CounterInitializingStatsReceiver.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CounterInitializingStatsReceiver.scala deleted file mode 100644 index f8da5c5cf..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CounterInitializingStatsReceiver.scala +++ /dev/null @@ -1,24 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.finagle.stats.{Counter, MetricBuilder, StatsReceiver, StatsReceiverProxy} - -/** - * A StatsReceiver that initializes counters to zero. - * Provides a simple wrapper that wraps a StatsReceiver where when using counters, - * have them auto initialize to 0. - * Until a counter performs its first incr() its returned as "undefined", - * which means if an alert is set on that counter - * it will result in an error. - * Another advantage is to remove the need to manually initialize counters in order - * to overcome aforementioned problem. - * @param self - underlying StatsReceiver - */ -class CounterInitializingStatsReceiver(protected val self: StatsReceiver) - extends StatsReceiverProxy { - - override def counter(metricBuilder: MetricBuilder): Counter = { - val counter = self.counter(metricBuilder) - counter.incr(0) - counter - } -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Effect.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Effect.docx new file mode 100644 index 000000000..7c9ccddc4 Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Effect.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Effect.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Effect.scala deleted file mode 100644 index 00510a3e3..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Effect.scala +++ /dev/null @@ -1,83 +0,0 @@ -package com.twitter.servo.util - -object Effect { - // a no-op effect - private[this] val _unit = Effect[Any] { _ => - () - } - - /** - * A "no-op" Effect. For any effect E, (E also unit) == (unit also E) == E. - * Forms a monoid with `also`. - */ - def unit[A]: Effect[A] = _unit.asInstanceOf[Effect[A]] - - /** - * Package a function as an Effect. - */ - def apply[A](f: A => Unit): Effect[A] = - new Effect[A] { - override def apply(value: A) = f(value) - } - - /** - * An effect that only applies to some values. - */ - def fromPartial[A](f: PartialFunction[A, Unit]): Effect[A] = - Effect[A] { x => - if (f.isDefinedAt(x)) f(x) - } -} - -/** - * Perform an effect with the given value, without altering the result. - * - * Forms a monoid with Effect.unit as unit and `also` as the combining operation. - */ -trait Effect[A] extends (A => Unit) { self => - - /** - * An identity function that executes this effect as a side-effect. - */ - lazy val identity: A => A = { value => - self(value); value - } - - /** - * Combine effects, so that both effects are performed. - * Forms a monoid with Effect.unit. - */ - def also(next: Effect[A]): Effect[A] = - Effect[A](identity andThen next) - - /** - * Convert an effect to an effect of a more general type by way - * of an extraction function. (contravariant map) - */ - def contramap[B](extract: B => A): Effect[B] = - Effect[B](extract andThen self) - - /** - * Perform this effect only if the provided gate returns true. - */ - @deprecated("Use enabledBy(() => Boolean)", "2.5.1") - def enabledBy(enabled: Gate[Unit]): Effect[A] = - enabledBy(() => enabled()) - - /** - * Perform this effect only if the provided gate returns true. - */ - def enabledBy(enabled: () => Boolean): Effect[A] = - onlyIf { _ => - enabled() - } - - /** - * Perform this effect only if the provided predicate returns true - * for the input. - */ - def onlyIf(predicate: A => Boolean) = - Effect[A] { x => - if (predicate(x)) this(x) else () - } -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ExceptionCounter.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ExceptionCounter.docx new file mode 100644 index 000000000..c0ea2e57e Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ExceptionCounter.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ExceptionCounter.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ExceptionCounter.scala deleted file mode 100644 index 85e4ac996..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ExceptionCounter.scala +++ /dev/null @@ -1,193 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.util.Future -import scala.collection.mutable - -/** - * Categorizes an exception according to some criteria. - * n.b. Implemented in terms of lift rather than apply to avoid extra allocations when - * used when lifting the effect. - */ -trait ExceptionCategorizer { - import ExceptionCategorizer._ - - def lift(effect: Effect[Category]): Effect[Throwable] - - def apply(t: Throwable): Set[Category] = { - val s = mutable.Set.empty[Category] - lift(Effect(s += _))(t) - s.toSet - } - - /** - * construct a new categorizer that prepends scope to all categories returned by this categorizer - */ - def scoped(scope: Seq[String]): ExceptionCategorizer = - if (scope.isEmpty) { - this - } else { - val scopeIt: Category => Category = Memoize(scope ++ _) - fromLift(effect => lift(effect.contramap(scopeIt))) - } - - /** - * construct a new categorizer that returns the union of the categories returned by this and that - */ - def ++(that: ExceptionCategorizer): ExceptionCategorizer = - fromLift(effect => this.lift(effect).also(that.lift(effect))) - - /** - * construct a new categorizer that only returns categories for throwables matching pred - */ - def onlyIf(pred: Throwable => Boolean): ExceptionCategorizer = - fromLift(lift(_).onlyIf(pred)) -} - -object ExceptionCategorizer { - type Category = Seq[String] - - def const(categories: Set[Category]): ExceptionCategorizer = ExceptionCategorizer(_ => categories) - def const(c: Category): ExceptionCategorizer = const(Set(c)) - def const(s: String): ExceptionCategorizer = const(Seq(s)) - - def apply(fn: Throwable => Set[Category]): ExceptionCategorizer = - new ExceptionCategorizer { - def lift(effect: Effect[Category]) = Effect[Throwable](t => fn(t).foreach(effect)) - override def apply(t: Throwable) = fn(t) - } - - def fromLift(fn: Effect[Category] => Effect[Throwable]): ExceptionCategorizer = - new ExceptionCategorizer { - def lift(effect: Effect[Category]) = fn(effect) - } - - def singular(fn: Throwable => Category): ExceptionCategorizer = - fromLift(_.contramap(fn)) - - def simple(fn: Throwable => String): ExceptionCategorizer = - singular(fn.andThen(Seq(_))) - - def default( - name: Category = Seq("exceptions"), - sanitizeClassnameChain: Throwable => Seq[String] = ThrowableHelper.sanitizeClassnameChain - ): ExceptionCategorizer = - ExceptionCategorizer.const(name) ++ - ExceptionCategorizer.singular(sanitizeClassnameChain).scoped(name) -} - -/** - * Increments a counter for each category returned by the exception categorizer - * - * @param statsReceiver - * the unscoped statsReceiver on which to hang the counters - * @param categorizer - * A function that returns a list of category names that a throwable should be counted under. - */ -class ExceptionCounter(statsReceiver: StatsReceiver, categorizer: ExceptionCategorizer) { - - /** - * alternative constructor for backwards compatibility - * - * @param statsReceiver - * the unscoped statsReceiver on which to hang the counters - * @param name - * the counter name for total exceptions, and scope for individual - * exception counters. default value is `exceptions` - * @param sanitizeClassnameChain - * A function that can be used to cleanup classnames before passing them to the StatsReceiver. - */ - def this( - statsReceiver: StatsReceiver, - name: String, - sanitizeClassnameChain: Throwable => Seq[String] - ) = - this(statsReceiver, ExceptionCategorizer.default(List(name), sanitizeClassnameChain)) - - /** - * provided for backwards compatibility - */ - def this(statsReceiver: StatsReceiver) = - this(statsReceiver, ExceptionCategorizer.default()) - - /** - * provided for backwards compatibility - */ - def this(statsReceiver: StatsReceiver, name: String) = - this(statsReceiver, ExceptionCategorizer.default(List(name))) - - /** - * provided for backwards compatibility - */ - def this(statsReceiver: StatsReceiver, sanitizeClassnameChain: Throwable => Seq[String]) = - this( - statsReceiver, - ExceptionCategorizer.default(sanitizeClassnameChain = sanitizeClassnameChain) - ) - - private[this] val counter = categorizer.lift(Effect(statsReceiver.counter(_: _*).incr())) - - /** - * count one or more throwables - */ - def apply(t: Throwable, throwables: Throwable*): Unit = { - counter(t) - if (throwables.nonEmpty) apply(throwables) - } - - /** - * count n throwables - */ - def apply(throwables: Iterable[Throwable]): Unit = { - throwables.foreach(counter) - } - - /** - * wrap around a Future to capture exceptions - */ - def apply[T](f: => Future[T]): Future[T] = { - f onFailure { case t => apply(t) } - } -} - -/** - * A memoized exception counter factory. - * - * @param stats - * the unscoped statsReceiver on which to hang the counters - * @param categorizer - * A function that returns a list of category names that a throwable should be counted under. - */ -class MemoizedExceptionCounterFactory(stats: StatsReceiver, categorizer: ExceptionCategorizer) { - - /** - * A memoized exception counter factory using the default categorizer. - * - * @param stats - * the unscoped statsReceiver on which to hang the counters - */ - def this(stats: StatsReceiver) = - this(stats, ExceptionCategorizer.default()) - - /** - * A memoized exception counter factory using a categorizer with the given suffix. - * - * @param stats - * the unscoped statsReceiver on which to hang the counters - * @param suffix - * All created exception counters will have the - * specified suffix added. This allows compatibility with - * Servo's ExceptionCounter's name param (allows creating - * exception counters that default to the "exceptions" namespace - * as well as those with an otherwise-specified scope). - */ - def this(stats: StatsReceiver, suffix: Seq[String]) = - this(stats, ExceptionCategorizer.default(suffix)) - - private[this] val getCounter = - Memoize { (path: Seq[String]) => - new ExceptionCounter(stats, categorizer.scoped(path)) - } - - def apply(path: String*): ExceptionCounter = getCounter(path) -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FrequencyCounter.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FrequencyCounter.docx new file mode 100644 index 000000000..769b13fee Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FrequencyCounter.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FrequencyCounter.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FrequencyCounter.scala deleted file mode 100644 index 2fecb6414..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FrequencyCounter.scala +++ /dev/null @@ -1,51 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.finagle.stats.{NullStatsReceiver, StatsReceiver} -import scala.collection.mutable - -/** - * Maintains a frequency counted circular buffer of objects. - */ -class FrequencyCounter[Q]( - size: Int, - threshold: Int, - trigger: Q => Unit, - statsReceiver: StatsReceiver = NullStatsReceiver) { - require(threshold > 1) // in order to minimize work for the common case - private[this] val buffer = new mutable.ArraySeq[Q](size) - private[this] var index = 0 - private[this] val counts = mutable.Map[Q, Int]() - - private[this] val keyCountStat = statsReceiver.scope("frequencyCounter").stat("keyCount") - - /** - * Adds a new key to the circular buffer and updates frequency counts. - * Runs trigger if this key occurs exactly `threshold` times in the buffer. - * Returns true if this key occurs at least `threshold` times in the buffer. - */ - def incr(key: Q): Boolean = { - // TOOD(aa): maybe write lock-free version - val count = synchronized { - counts(key) = counts.getOrElse(key, 0) + 1 - - Option(buffer(index)) foreach { oldKey => - val countVal = counts(oldKey) - if (countVal == 1) { - counts -= oldKey - } else { - counts(oldKey) = countVal - 1 - } - } - - buffer(index) = key - index = (index + 1) % size - counts(key) - } - keyCountStat.add(count) - if (count == threshold) { - trigger(key) - } - count >= threshold - } - -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FunctionArrow.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FunctionArrow.docx new file mode 100644 index 000000000..a4b350c07 Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FunctionArrow.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FunctionArrow.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FunctionArrow.scala deleted file mode 100644 index a9cc5be0e..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FunctionArrow.scala +++ /dev/null @@ -1,75 +0,0 @@ -package com.twitter.servo.util - -/** - * A collection of FunctionArrow factory functions. - */ -object FunctionArrow { - def apply[A, B](f: A => B): FunctionArrow[A, B] = fromFunction(f) - - /** - * Produce an FunctionArrow from a function `A => B`. - */ - def fromFunction[A, B](f: A => B): FunctionArrow[A, B] = - new FunctionArrow[A, B] { - def apply(a: A): B = f(a) - } - - /** - * Produces a FunctionArrow with no side-effects that simply returns its argument. - */ - def identity[A]: FunctionArrow[A, A] = apply(Predef.identity[A]) - - /** - * Appends two FunctionArrows together. - * - * This forms a monoid with 'identity'. - */ - def append[A, B, C](a: FunctionArrow[A, B], b: FunctionArrow[B, C]): FunctionArrow[A, C] = - a.andThen(b) - - /** - * Produce an FunctionArrow that applies an Effect, returning the argument - * value as-is. - */ - def effect[A](effect: Effect[A]): FunctionArrow[A, A] = apply { a => - effect(a); a - } - - /** - * Produces an FunctionArrow that proxies to one of two others, depending on a - * predicate. - */ - def choose[A, B]( - predicate: A => Boolean, - ifTrue: FunctionArrow[A, B], - ifFalse: FunctionArrow[A, B] - ): FunctionArrow[A, B] = - apply { a: A => - if (predicate(a)) ifTrue(a) else ifFalse(a) - } - - /** - * Produces an FunctionArrow whose application is guarded by a predicate. `f` is - * applied if the predicate returns true, otherwise the argument is simply - * returned. - */ - def onlyIf[A](predicate: A => Boolean, f: FunctionArrow[A, A]): FunctionArrow[A, A] = - choose(predicate, f, identity[A]) -} - -/** - * A function encapsulating a computation. - * - * Background on the Arrow abstraction: - * http://en.wikipedia.org/wiki/Arrow_(computer_science) - */ -trait FunctionArrow[-A, +B] extends (A => B) { self => - - /** - * Composes two FunctionArrows. Produces a new FunctionArrow that performs both in series. - */ - def andThen[C](next: FunctionArrow[B, C]): FunctionArrow[A, C] = - new FunctionArrow[A, C] { - override def apply(a: A) = next.apply(self(a)) - } -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureArrow.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureArrow.docx new file mode 100644 index 000000000..743b8264c Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureArrow.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureArrow.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureArrow.scala deleted file mode 100644 index ea3fb8959..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureArrow.scala +++ /dev/null @@ -1,501 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.finagle.service.RetryPolicy -import com.twitter.finagle.stats.Stat -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finagle.tracing.Trace -import com.twitter.finagle.FailedFastException -import com.twitter.finagle.Filter -import com.twitter.finagle.Service -import com.twitter.util._ -import scala.util.control.NonFatal - -/** - * A collection of FutureArrow factory functions. - */ -object FutureArrow { - - /** - * Produce a FutureArrow from a function `A => Future[B]`. - */ - def apply[A, B](f: A => Future[B]): FutureArrow[A, B] = - new FutureArrow[A, B] { - override def apply(a: A): Future[B] = - try f(a) - catch { - case NonFatal(e) => Future.exception(e) - } - } - - /** - * Produce a FutureArrow that supports recursive calls. Recursing from a `Future` - * continuation is stack-safe, but direct recursion will use the stack, like a - * normal method invocation. - */ - def rec[A, B](f: FutureArrow[A, B] => A => Future[B]): FutureArrow[A, B] = - new FutureArrow[A, B] { self => - private val g: A => Future[B] = f(this) - override def apply(a: A): Future[B] = - try g(a) - catch { - case NonFatal(e) => Future.exception(e) - } - } - - /** - * Produce a FutureArrow from an FunctionArrow. - */ - def fromFunctionArrow[A, B](f: FunctionArrow[A, B]): FutureArrow[A, B] = - FutureArrow[A, B](a => Future(f(a))) - - /** - * Produce a FutureArrow from a function. - */ - def fromFunction[A, B](f: A => B): FutureArrow[A, B] = fromFunctionArrow(FunctionArrow(f)) - - /** - * Produce a FutureArrow from a function `A => Try[B]`. - * - * The Try is evaluated within a Future. Thus, Throw results are translated - * to `Future.exception`s. - */ - def fromTry[A, B](f: A => Try[B]): FutureArrow[A, B] = - FutureArrow[A, B](a => Future.const(f(a))) - - /** - * A FutureArrow that simply returns a Future of its argument. - */ - def identity[A]: FutureArrow[A, A] = - FutureArrow[A, A](a => Future.value(a)) - - /** - * A FutureArrow with a constant result, regardless of input. - */ - def const[A, B](value: Future[B]): FutureArrow[A, B] = - FutureArrow[A, B](_ => value) - - /** - * Appends two FutureArrows together. - * - * This forms a category with 'identity'. - */ - def append[A, B, C](a: FutureArrow[A, B], b: FutureArrow[B, C]) = a.andThen(b) - - /** - * Produce a FutureArrow that applies an FutureEffect, returning the argument - * value as-is on success. If the effect returns an Future exception, then the - * result of the filter will also be that exception. - */ - def effect[A](effect: FutureEffect[A]): FutureArrow[A, A] = - apply(a => effect(a).map(_ => a)) - - /** - * Produces a FutureArrow that proxies to one of two others, depending on a - * predicate. - */ - def choose[A, B](predicate: A => Boolean, ifTrue: FutureArrow[A, B], ifFalse: FutureArrow[A, B]) = - FutureArrow[A, B](a => if (predicate(a)) ifTrue(a) else ifFalse(a)) - - /** - * Produces a FutureArrow whose application is guarded by a predicate. `f` is - * applied if the predicate returns true, otherwise the argument is simply - * returned. - */ - def onlyIf[A](predicate: A => Boolean, f: FutureArrow[A, A]) = - choose(predicate, f, identity[A]) - - /** - * Produces a FutureArrow that forwards to multiple FutureArrows and collects - * the results into a `Seq[B]`. Results are gathered via Future.collect, so - * failure semantics are inherited from that method. - */ - def collect[A, B](arrows: Seq[FutureArrow[A, B]]): FutureArrow[A, Seq[B]] = - apply(a => Future.collect(arrows.map(arrow => arrow(a)))) - - private val RetryOnNonFailedFast: PartialFunction[Try[Any], Boolean] = { - case Throw(_: FailedFastException) => false - case Throw(_: Exception) => true - } -} - -/** - * A function encapsulating an asynchronous computation. - * - * Background on the Arrow abstraction: - * http://en.wikipedia.org/wiki/Arrow_(computer_science) - */ -trait FutureArrow[-A, +B] extends (A => Future[B]) { self => - - /** - * Composes two FutureArrows. Produces a new FutureArrow that performs both in - * series, depending on the success of the first. - */ - def andThen[C](next: FutureArrow[B, C]): FutureArrow[A, C] = - FutureArrow[A, C](a => self(a).flatMap(next.apply)) - - /** - * Combines this FutureArrow with another, producing one that translates a - * tuple of its constituents' arguments into a tuple of their results. - */ - def zipjoin[C, D](other: FutureArrow[C, D]): FutureArrow[(A, C), (B, D)] = - FutureArrow[(A, C), (B, D)] { - case (a, c) => self(a) join other(c) - } - - /** - * Converts a FutureArrow on a scalar input and output value into a FutureArrow on a - * Sequence of input values producing a pairwise sequence of output values. The elements - * of the input sequence are processed in parallel, so execution order is not guaranteed. - * Results are gathered via Future.collect, so failure semantics are inherited from that method. - */ - def liftSeq: FutureArrow[Seq[A], Seq[B]] = - FutureArrow[Seq[A], Seq[B]] { seqA => - Future.collect(seqA.map(this)) - } - - /** - * Converts this FutureArrow to a FutureEffect, where the result value is ignored. - */ - def asFutureEffect[A2 <: A]: FutureEffect[A2] = - FutureEffect(this.unit) - - /** - * Combines this FutureArrow with another, producing one that applies both - * in parallel, producing a tuple of their results. - */ - def inParallel[A2 <: A, C](other: FutureArrow[A2, C]): FutureArrow[A2, (B, C)] = { - val paired = self.zipjoin(other) - FutureArrow[A2, (B, C)](a => paired((a, a))) - } - - /** - * Wrap a FutureArrow with an ExceptionCounter, thus providing - * observability into the arrow's success and failure. - */ - def countExceptions( - exceptionCounter: ExceptionCounter - ): FutureArrow[A, B] = - FutureArrow[A, B](request => exceptionCounter(self(request))) - - /** - * Returns a chained FutureArrow in which the given function will be called for any - * input that succeeds. - */ - def onSuccess[A2 <: A](f: (A2, B) => Unit): FutureArrow[A2, B] = - FutureArrow[A2, B](a => self(a).onSuccess(b => f(a, b))) - - /** - * Returns a chained FutureArrow in which the given function will be called for any - * input that fails. - */ - def onFailure[A2 <: A](f: (A2, Throwable) => Unit): FutureArrow[A2, B] = - FutureArrow[A2, B](a => self(a).onFailure(t => f(a, t))) - - /** - * Translate exception returned by a FutureArrow according to a - * PartialFunction. - */ - def translateExceptions( - translateException: PartialFunction[Throwable, Throwable] - ): FutureArrow[A, B] = - FutureArrow[A, B] { request => - self(request).rescue { - case t if translateException.isDefinedAt(t) => Future.exception(translateException(t)) - case t => Future.exception(t) - } - } - - /** - * Apply a FutureArrow, lifting any non-Future exceptions thrown into - * `Future.exception`s. - */ - def liftExceptions: FutureArrow[A, B] = - FutureArrow[A, B] { request => - // Flattening the Future[Future[Response]] is equivalent, but more concise - // than wrapping the arrow(request) call in a try/catch block that transforms - // the exception to a Future.exception, or at least was more concise before - // I added a four-line comment. - Future(self(request)).flatten - } - - /** - * Wrap a FutureArrow in exception-tracking and -translation. Given a - * filter and a handler, exceptional results will be observed and translated - * according to the function passed in this function's second argument list. - */ - def cleanly( - exceptionCounter: ExceptionCounter - )( - translateException: PartialFunction[Throwable, Throwable] = { case t => t } - ): FutureArrow[A, B] = { - liftExceptions - .translateExceptions(translateException) - .countExceptions(exceptionCounter) - } - - /** - * Produces a FutureArrow that tracks its own application latency. - */ - @deprecated("use trackLatency(StatsReceiver, (A2 => String)", "2.11.1") - def trackLatency[A2 <: A]( - extractName: (A2 => String), - statsReceiver: StatsReceiver - ): FutureArrow[A2, B] = - trackLatency(statsReceiver, extractName) - - /** - * Produces a FutureArrow that tracks its own application latency. - */ - def trackLatency[A2 <: A]( - statsReceiver: StatsReceiver, - extractName: (A2 => String) - ): FutureArrow[A2, B] = - FutureArrow[A2, B] { request => - Stat.timeFuture(statsReceiver.stat(extractName(request), "latency_ms")) { - self(request) - } - } - - /** - * Produces a FutureArrow that tracks the outcome (i.e. success vs failure) of - * requests. - */ - @deprecated("use trackOutcome(StatsReceiver, (A2 => String)", "2.11.1") - def trackOutcome[A2 <: A]( - extractName: (A2 => String), - statsReceiver: StatsReceiver - ): FutureArrow[A2, B] = - trackOutcome(statsReceiver, extractName) - - def trackOutcome[A2 <: A]( - statsReceiver: StatsReceiver, - extractName: (A2 => String) - ): FutureArrow[A2, B] = - trackOutcome(statsReceiver, extractName, _ => None) - - /** - * Produces a FutureArrow that tracks the outcome (i.e. success vs failure) of - * requests. - */ - def trackOutcome[A2 <: A]( - statsReceiver: StatsReceiver, - extractName: (A2 => String), - exceptionCategorizer: Throwable => Option[String] - ): FutureArrow[A2, B] = - FutureArrow[A2, B] { request => - val scope = statsReceiver.scope(extractName(request)) - - self(request).respond { r => - statsReceiver.counter("requests").incr() - scope.counter("requests").incr() - - r match { - case Return(_) => - statsReceiver.counter("success").incr() - scope.counter("success").incr() - - case Throw(t) => - val category = exceptionCategorizer(t).getOrElse("failures") - statsReceiver.counter(category).incr() - scope.counter(category).incr() - scope.scope(category).counter(ThrowableHelper.sanitizeClassnameChain(t): _*).incr() - } - } - } - - /** - * Observe latency and success rate for any FutureArrow[A, B] where A is Observable - */ - def observed[A2 <: A with Observable]( - statsReceiver: StatsReceiver - ): FutureArrow[A2, B] = - observed(statsReceiver, exceptionCategorizer = _ => None) - - /** - * Observe latency and success rate for any FutureArrow[A, B] where A is Observable - */ - def observed[A2 <: A with Observable]( - statsReceiver: StatsReceiver, - exceptionCategorizer: Throwable => Option[String] - ): FutureArrow[A2, B] = - self.observed( - statsReceiver.scope("client_request"), - (a: A2) => a.requestName, - exceptionCategorizer - ) - - /** - * Observe latency and success rate for any FutureArrow - */ - def observed[A2 <: A]( - statsReceiver: StatsReceiver, - statsScope: A2 => String, - exceptionCategorizer: Throwable => Option[String] = _ => None - ): FutureArrow[A2, B] = - self - .trackLatency(statsReceiver, statsScope) - .trackOutcome(statsReceiver, statsScope, exceptionCategorizer) - - /** - * Trace the future arrow using local spans as documented here: - * https://docbird.twitter.biz/finagle/Tracing.html - */ - def traced[A2 <: A]( - traceScope: A2 => String - ): FutureArrow[A2, B] = { - FutureArrow[A2, B] { a => - Trace.traceLocalFuture(traceScope(a))(self(a)) - } - } - - /** - * Produces a new FutureArrow where the given function is applied to the input, and the result - * passed to this FutureArrow. - */ - def contramap[C](f: C => A): FutureArrow[C, B] = - FutureArrow[C, B](f.andThen(self)) - - /** - * Produces a new FutureArrow where the given function is applied to the result of this - * FutureArrow. - */ - def map[C](f: B => C): FutureArrow[A, C] = - mapResult(_.map(f)) - - /** - * Produces a new FutureArrow where the given function is applied to the resulting Future of - * this FutureArrow. - */ - def mapResult[C](f: Future[B] => Future[C]): FutureArrow[A, C] = - FutureArrow[A, C](a => f(self(a))) - - /** - * Produces a new FutureArrow which translates exceptions into futures - */ - def rescue[B2 >: B]( - rescueException: PartialFunction[Throwable, Future[B2]] - ): FutureArrow[A, B2] = { - FutureArrow[A, B2] { a => - self(a).rescue(rescueException) - } - } - - /** - * Produces a new FutureArrow where the result value is ignored, and Unit is returned. - */ - def unit: FutureArrow[A, Unit] = - mapResult(_.unit) - - /** - * Returns a copy of this FutureArrow where the returned Future has its `.masked` - * method called. - */ - def masked: FutureArrow[A, B] = - mapResult(_.masked) - - /** - * Wraps this FutureArrow by passing the underlying operation to the given retry handler - * for possible retries. - */ - def retry(handler: RetryHandler[B]): FutureArrow[A, B] = - FutureArrow[A, B](a => handler(self(a))) - - def retry[A2 <: A]( - policy: RetryPolicy[Try[B]], - timer: Timer, - statsReceiver: StatsReceiver, - extractName: (A2 => String) - ): FutureArrow[A2, B] = - FutureArrow[A2, B] { a => - val scoped = statsReceiver.scope(extractName(a)) - RetryHandler(policy, timer, scoped)(self(a)) - } - - /** - * Produces a new FutureArrow where the returned Future[B] must complete within the specified - * timeout, otherwise the Future fails with a com.twitter.util.TimeoutException. - * - * The [[timeout]] is passed by name to take advantage of deadlines passed in the request context. - * - * ''Note'': On timeout, the underlying future is NOT interrupted. - */ - def withTimeout(timer: Timer, timeout: => Duration): FutureArrow[A, B] = - mapResult(_.within(timer, timeout)) - - /** - * Produces a new FutureArrow where the returned Future must complete within the specified - * timeout, otherwise the Future fails with the specified Throwable. - * - * The [[timeout]] is passed by name to take advantage of deadlines passed in the request context. - * - * ''Note'': On timeout, the underlying future is NOT interrupted. - */ - def withTimeout(timer: Timer, timeout: => Duration, exc: => Throwable): FutureArrow[A, B] = - mapResult(_.within(timer, timeout, exc)) - - /** - * Produces a new FutureArrow where the returned Future[B] must complete within the specified - * timeout, otherwise the Future fails with a com.twitter.util.TimeoutException. - * - * The [[timeout]] is passed by name to take advantage of deadlines passed in the request context. - * - * ''Note'': On timeout, the underlying future is interrupted. - */ - def raiseWithin(timer: Timer, timeout: => Duration): FutureArrow[A, B] = - mapResult(_.raiseWithin(timeout)(timer)) - - /** - * Produces a new FutureArrow where the returned Future must complete within the specified - * timeout, otherwise the Future fails with the specified Throwable. - * - * [[timeout]] is passed by name to take advantage of deadlines passed in the request context. - * - * ''Note'': On timeout, the underlying future is interrupted. - */ - def raiseWithin(timer: Timer, timeout: => Duration, exc: => Throwable): FutureArrow[A, B] = - mapResult(_.raiseWithin(timer, timeout, exc)) - - /** - * Produces a finagle.Service instance that invokes this arrow. - */ - def asService: Service[A, B] = Service.mk(this) - - /** - * Produces a new FutureArrow with the given finagle.Filter applied to this instance. - */ - def withFilter[A2, B2](filter: Filter[A2, B2, A, B]): FutureArrow[A2, B2] = - FutureArrow[A2, B2](filter.andThen(asService)) - - /** - * Produces a new FutureArrow with the given timeout which retries on Exceptions or timeouts and - * records stats about the logical request. This is only appropriate for idempotent operations. - */ - def observedWithTimeoutAndRetry[A2 <: A]( - statsReceiver: StatsReceiver, - extractName: (A2 => String), - timer: Timer, - timeout: Duration, - numTries: Int, - shouldRetry: PartialFunction[Try[B], Boolean] = FutureArrow.RetryOnNonFailedFast - ): FutureArrow[A2, B] = { - val retryPolicy = RetryPolicy.tries(numTries, shouldRetry) - withTimeout(timer, timeout) - .retry(retryPolicy, timer, statsReceiver, extractName) - .trackLatency(statsReceiver, extractName) - .trackOutcome(statsReceiver, extractName) - } - - /** - * Produces a new FutureArrow with the given timeout and records stats about the logical request. - * This does not retry and is appropriate for non-idempotent operations. - */ - def observedWithTimeout[A2 <: A]( - statsReceiver: StatsReceiver, - extractName: (A2 => String), - timer: Timer, - timeout: Duration - ): FutureArrow[A2, B] = - withTimeout(timer, timeout) - .trackLatency(statsReceiver, extractName) - .trackOutcome(statsReceiver, extractName) -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureEffect.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureEffect.docx new file mode 100644 index 000000000..fa230de58 Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureEffect.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureEffect.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureEffect.scala deleted file mode 100644 index aa20bcd9f..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureEffect.scala +++ /dev/null @@ -1,379 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.finagle.stats.{StatsReceiver, Stat} -import com.twitter.logging.{Logger, NullLogger} -import com.twitter.util._ - -object FutureEffect { - private[this] val _unit = FutureEffect[Any] { _ => - Future.Unit - } - - /** - * A FutureEffect that always succeeds. - */ - def unit[T]: FutureEffect[T] = - _unit.asInstanceOf[FutureEffect[T]] - - /** - * A FutureEffect that always fails with the given exception. - */ - def fail[T](ex: Throwable): FutureEffect[T] = - FutureEffect[T] { _ => - Future.exception(ex) - } - - /** - * Lift a function returning a Future to a FutureEffect. - */ - def apply[T](f: T => Future[Unit]) = - new FutureEffect[T] { - override def apply(x: T) = f(x) - } - - /** - * Performs all of the effects in order. If any effect fails, the - * whole operation fails, and the subsequent effects are not - * attempted. - */ - def sequentially[T](effects: FutureEffect[T]*): FutureEffect[T] = - effects.foldLeft[FutureEffect[T]](unit[T])(_ andThen _) - - /** - * Perform all of the effects concurrently. If any effect fails, the - * whole operation fails, but any of the effects may or may not have - * taken place. - */ - def inParallel[T](effects: FutureEffect[T]*): FutureEffect[T] = - FutureEffect[T] { t => - Future.join(effects map { _(t) }) - } - - def fromPartial[T](f: PartialFunction[T, Future[Unit]]) = - FutureEffect[T] { x => - if (f.isDefinedAt(x)) f(x) else Future.Unit - } - - /** - * Combines two FutureEffects into one that dispatches according to a gate. If the gate is - * true, use `a`, otherwise, use `b`. - */ - def selected[T](condition: Gate[Unit], a: FutureEffect[T], b: FutureEffect[T]): FutureEffect[T] = - selected(() => condition(), a, b) - - /** - * Combines two FutureEffects into one that dispatches according to a nullary boolean function. - * If the function returns true, use `a`, otherwise, use `b`. - */ - def selected[T](f: () => Boolean, a: FutureEffect[T], b: FutureEffect[T]): FutureEffect[T] = - FutureEffect[T] { t => - if (f()) a(t) else b(t) - } -} - -/** - * A function whose only result is a future effect. This wrapper - * provides convenient combinators. - */ -trait FutureEffect[T] extends (T => Future[Unit]) { self => - - /** - * Simplified version of `apply` when type is `Unit`. - */ - def apply()(implicit ev: Unit <:< T): Future[Unit] = self(()) - - /** - * Combines two Future effects, performing this one first and - * performing the next one if this one succeeds. - */ - def andThen(next: FutureEffect[T]): FutureEffect[T] = - FutureEffect[T] { x => - self(x) flatMap { _ => - next(x) - } - } - - /** - * Wraps this FutureEffect with a failure handling function that will be chained to - * the Future returned by this FutureEffect. - */ - def rescue( - handler: PartialFunction[Throwable, FutureEffect[T]] - ): FutureEffect[T] = - FutureEffect[T] { x => - self(x) rescue { - case t if handler.isDefinedAt(t) => - handler(t)(x) - } - } - - /** - * Combines two future effects, performing them both simultaneously. - * If either effect fails, the result will be failure, but the other - * effects will have occurred. - */ - def inParallel(other: FutureEffect[T]) = - FutureEffect[T] { x => - Future.join(Seq(self(x), other(x))) - } - - /** - * Perform this effect only if the provided gate returns true. - */ - def enabledBy(enabled: Gate[Unit]): FutureEffect[T] = - enabledBy(() => enabled()) - - /** - * Perform this effect only if the provided gate returns true. - */ - def enabledBy(enabled: () => Boolean): FutureEffect[T] = - onlyIf { _ => - enabled() - } - - /** - * Perform this effect only if the provided predicate returns true - * for the input. - */ - def onlyIf(predicate: T => Boolean) = - FutureEffect[T] { x => - if (predicate(x)) self(x) else Future.Unit - } - - /** - * Perform this effect with arg only if the condition is true. Otherwise just return Future Unit - */ - def when(condition: Boolean)(arg: => T): Future[Unit] = - if (condition) self(arg) else Future.Unit - - /** - * Adapt this effect to take a different input via the provided conversion. - * - * (Contravariant map) - */ - def contramap[U](g: U => T) = FutureEffect[U] { u => - self(g(u)) - } - - /** - * Adapt this effect to take a different input via the provided conversion. - * - * (Contravariant map) - */ - def contramapFuture[U](g: U => Future[T]) = FutureEffect[U] { u => - g(u) flatMap self - } - - /** - * Adapt this effect to take a different input via the provided conversion. - * If the output value of the given function is None, the effect is a no-op. - */ - def contramapOption[U](g: U => Option[T]) = - FutureEffect[U] { - g andThen { - case None => Future.Unit - case Some(t) => self(t) - } - } - - /** - * Adapt this effect to take a different input via the provided conversion. - * If the output value of the given function is future-None, the effect is a no-op. - * (Contravariant map) - */ - def contramapFutureOption[U](g: U => Future[Option[T]]) = - FutureEffect[U] { u => - g(u) flatMap { - case None => Future.Unit - case Some(x) => self(x) - } - } - - /** - * Adapt this effect to take a sequence of input values. - */ - def liftSeq: FutureEffect[Seq[T]] = - FutureEffect[Seq[T]] { seqT => - Future.join(seqT.map(self)) - } - - /** - * Allow the effect to fail, but immediately return success. The - * effect is not guaranteed to have finished when its future is - * available. - */ - def ignoreFailures: FutureEffect[T] = - FutureEffect[T] { x => - Try(self(x)); Future.Unit - } - - /** - * Allow the effect to fail but always return success. Unlike ignoreFailures, the - * effect is guaranteed to have finished when its future is available. - */ - def ignoreFailuresUponCompletion: FutureEffect[T] = - FutureEffect[T] { x => - Try(self(x)) match { - case Return(f) => f.handle { case _ => () } - case Throw(_) => Future.Unit - } - } - - /** - * Returns a chained FutureEffect in which the given function will be called for any - * input that succeeds. - */ - def onSuccess(f: T => Unit): FutureEffect[T] = - FutureEffect[T] { x => - self(x).onSuccess(_ => f(x)) - } - - /** - * Returns a chained FutureEffect in which the given function will be called for any - * input that fails. - */ - def onFailure(f: (T, Throwable) => Unit): FutureEffect[T] = - FutureEffect[T] { x => - self(x).onFailure(t => f(x, t)) - } - - /** - * Translate exception returned by a FutureEffect according to a - * PartialFunction. - */ - def translateExceptions( - translateException: PartialFunction[Throwable, Throwable] - ): FutureEffect[T] = - FutureEffect[T] { request => - self(request) rescue { - case t if translateException.isDefinedAt(t) => Future.exception(translateException(t)) - case t => Future.exception(t) - } - } - - /** - * Wraps an effect with retry logic. Will retry against any failure. - */ - def retry(backoffs: Stream[Duration], timer: Timer, stats: StatsReceiver): FutureEffect[T] = - retry(RetryHandler.failuresOnly(backoffs, timer, stats)) - - /** - * Returns a new FutureEffect that executes the effect within the given RetryHandler, which - * may retry the operation on failures. - */ - def retry(handler: RetryHandler[Unit]): FutureEffect[T] = - FutureEffect[T](handler.wrap(self)) - - @deprecated("use trackOutcome", "2.11.1") - def countExceptions(stats: StatsReceiver, getScope: T => String) = { - val exceptionCounterFactory = new MemoizedExceptionCounterFactory(stats) - FutureEffect[T] { t => - exceptionCounterFactory(getScope(t)) { self(t) } - } - } - - /** - * Produces a FutureEffect that tracks the latency of the underlying operation. - */ - def trackLatency(stats: StatsReceiver, extractName: T => String): FutureEffect[T] = - FutureEffect[T] { t => - Stat.timeFuture(stats.stat(extractName(t), "latency_ms")) { self(t) } - } - - def trackOutcome( - stats: StatsReceiver, - extractName: T => String, - logger: Logger = NullLogger - ): FutureEffect[T] = trackOutcome(stats, extractName, logger, _ => None) - - /** - * Produces a FutureEffect that tracks the outcome (i.e. success vs failure) of - * requests, including counting exceptions by classname. - */ - def trackOutcome( - stats: StatsReceiver, - extractName: T => String, - logger: Logger, - exceptionCategorizer: Throwable => Option[String] - ): FutureEffect[T] = - FutureEffect[T] { t => - val name = extractName(t) - val scope = stats.scope(name) - - self(t) respond { r => - scope.counter("requests").incr() - - r match { - case Return(_) => - scope.counter("success").incr() - - case Throw(t) => - val category = exceptionCategorizer(t).getOrElse("failures") - scope.counter(category).incr() - scope.scope(category).counter(ThrowableHelper.sanitizeClassnameChain(t): _*).incr() - logger.warning(t, s"failure in $name") - } - } - } - - /** - * Observe latency and success rate for any FutureEffect - * @param statsScope a function to produce a parent stats scope from the argument - * to the FutureEffect - * @param exceptionCategorizer a function to assign different Throwables with custom stats scopes. - */ - def observed( - statsReceiver: StatsReceiver, - statsScope: T => String, - logger: Logger = NullLogger, - exceptionCategorizer: Throwable => Option[String] = _ => None - ): FutureEffect[T] = - self - .trackLatency(statsReceiver, statsScope) - .trackOutcome(statsReceiver, statsScope, logger, exceptionCategorizer) - - /** - * Produces a new FutureEffect where the given function is applied to the result of this - * FutureEffect. - */ - def mapResult(f: Future[Unit] => Future[Unit]): FutureEffect[T] = - FutureEffect[T] { x => - f(self(x)) - } - - /** - * Produces a new FutureEffect where the returned Future must complete within the specified - * timeout, otherwise the Future fails with a com.twitter.util.TimeoutException. - * - * ''Note'': On timeout, the underlying future is NOT interrupted. - */ - def withTimeout(timer: Timer, timeout: Duration): FutureEffect[T] = - mapResult(_.within(timer, timeout)) - - /** - * Produces a new FutureEffect where the returned Future must complete within the specified - * timeout, otherwise the Future fails with the specified Throwable. - * - * ''Note'': On timeout, the underlying future is NOT interrupted. - */ - def withTimeout(timer: Timer, timeout: Duration, exc: => Throwable): FutureEffect[T] = - mapResult(_.within(timer, timeout, exc)) - - /** - * Produces a new FutureEffect where the returned Future must complete within the specified - * timeout, otherwise the Future fails with a com.twitter.util.TimeoutException. - * - * ''Note'': On timeout, the underlying future is interrupted. - */ - def raiseWithin(timer: Timer, timeout: Duration): FutureEffect[T] = - mapResult(_.raiseWithin(timeout)(timer)) - - /** - * Produces a new FutureEffect where the returned Future must complete within the specified - * timeout, otherwise the Future fails with the specified Throwable. - * - * ''Note'': On timeout, the underlying future is interrupted. - */ - def raiseWithin(timer: Timer, timeout: Duration, exc: => Throwable): FutureEffect[T] = - mapResult(_.raiseWithin(timer, timeout, exc)) -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Gate.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Gate.docx new file mode 100644 index 000000000..9c813ac0f Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Gate.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Gate.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Gate.scala deleted file mode 100644 index 7b1420bff..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Gate.scala +++ /dev/null @@ -1,210 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.util.{Duration, Time} -import java.util.concurrent.ThreadLocalRandom -import scala.language.implicitConversions - -object Gate { - - /** - * Construct a new Gate from a boolean function and a string representation - */ - def apply[T](f: T => Boolean, repr: => String): Gate[T] = - new Gate[T] { - override def apply[U](u: U)(implicit asT: <:<[U, T]): Boolean = f(asT(u)) - override def toString: String = repr - } - - /** - * Construct a new Gate from a boolean function - */ - def apply[T](f: T => Boolean): Gate[T] = Gate(f, "Gate(" + f + ")") - - /** - * Create a Gate[Any] with a probability of returning true - * that increases linearly with the availability, which should range from 0.0 to 1.0. - */ - def fromAvailability( - availability: => Double, - randomDouble: => Double = ThreadLocalRandom.current().nextDouble(), - repr: String = "Gate.fromAvailability" - ): Gate[Any] = - Gate(_ => randomDouble < math.max(math.min(availability, 1.0), 0.0), repr) - - /** - * Creates a Gate[Any] with a probability of returning true that - * increases linearly in time between startTime and (startTime + rampUpDuration). - */ - def linearRampUp( - startTime: Time, - rampUpDuration: Duration, - randomDouble: => Double = ThreadLocalRandom.current().nextDouble() - ): Gate[Any] = { - val availability = availabilityFromLinearRampUp(startTime, rampUpDuration) - - fromAvailability( - availability(Time.now), - randomDouble, - repr = "Gate.rampUp(" + startTime + ", " + rampUpDuration + ")" - ) - } - - /** - * Generates an availability function that maps a point in time to an availability value - * in the range of 0.0 - 1.0. Availability is 0 if the given time is before startTime, is - * 1 if the greather than (startTime + rampUpDuration), and is otherwise linearly - * interpolated between 0.0 and 1.0 as the time moves through the two endpoints. - */ - def availabilityFromLinearRampUp(startTime: Time, rampUpDuration: Duration): Time => Double = { - val endTime = startTime + rampUpDuration - val rampUpMillis = rampUpDuration.inMilliseconds.toDouble - now => { - if (now >= endTime) { - 1.0 - } else if (now <= startTime) { - 0.0 - } else { - (now - startTime).inMilliseconds.toDouble / rampUpMillis - } - } - } - - /** - * Returns a gate that increments true / false counters for each Gate invocation. Counter name - * can be overridden with trueName and falseName. - */ - def observed[T]( - gate: Gate[T], - stats: StatsReceiver, - trueName: String = "true", - falseName: String = "false" - ): Gate[T] = { - val trueCount = stats.counter(trueName) - val falseCount = stats.counter(falseName) - gate - .onTrue[T] { _ => - trueCount.incr() - } - .onFalse[T] { _ => - falseCount.incr() - } - } - - /** - * Construct a new Gate from a boolean value - */ - def const(v: Boolean): Gate[Any] = Gate(_ => v, v.toString) - - /** - * Constructs a new Gate that returns true if any of the gates in the input list return true. - * Always returns false when the input list is empty. - */ - def any[T](gates: Gate[T]*): Gate[T] = gates.foldLeft[Gate[T]](Gate.False)(_ | _) - - /** - * Constructs a new Gate that returns true iff all the gates in the input list return true. - * Always returns true when the input list is empty. - */ - def all[T](gates: Gate[T]*): Gate[T] = gates.foldLeft[Gate[T]](Gate.True)(_ & _) - - /** - * Gates that always return true/false - */ - val True: Gate[Any] = const(true) - val False: Gate[Any] = const(false) - - // Implicit conversions to downcast Gate to a plain function - implicit def gate2function1[T](g: Gate[T]): T => Boolean = g(_) - implicit def gate2function0(g: Gate[Unit]): () => Boolean = () => g(()) -} - -/** - * A function from T to Boolean, composable with boolean-like operators. - * Also supports building higher-order functions - * for dispatching based upon the value of this function over values of type T. - * Note: Gate does not inherit from T => Boolean in order to enforce correct type checking - * in the apply method of Gate[Unit]. (Scala is over eager to convert the return type of - * expression to Unit.) Instead, an implicit conversion allows Gate to be used in methods that - * require a function T => Boolean. - */ -trait Gate[-T] { - - /** - * A function from T => boolean with strict type bounds - */ - def apply[U](u: U)(implicit asT: <:<[U, T]): Boolean - - /** - * A nullary variant of apply that can be used when T is a Unit - */ - def apply()(implicit isUnit: <:<[Unit, T]): Boolean = apply(isUnit(())) - - /** - * Return a new Gate which applies the given function and then calls this Gate - */ - def contramap[U](f: U => T): Gate[U] = Gate(f andThen this, "%s.contramap(%s)".format(this, f)) - - /** - * Returns a new Gate of the requested type that ignores its input - */ - def on[U](implicit isUnit: <:<[Unit, T]): Gate[U] = contramap((_: U) => ()) - - /** - * Returns a new Gate which returns true when this Gate returns false - */ - def unary_! : Gate[T] = Gate(x => !this(x), "!%s".format(this)) - - /** - * Returns a new Gate which returns true when both this Gate and other Gate return true - */ - def &[U <: T](other: Gate[U]): Gate[U] = - Gate(x => this(x) && other(x), "(%s & %s)".format(this, other)) - - /** - * Returns a new Gate which returns true when either this Gate or other Gate return true - */ - def |[U <: T](other: Gate[U]): Gate[U] = - Gate(x => this(x) || other(x), "(%s | %s)".format(this, other)) - - /** - * Returns a new Gate which returns true when return values of this Gate and other Gate differ - */ - def ^[U <: T](other: Gate[U]): Gate[U] = - Gate(x => this(x) ^ other(x), "(%s ^ %s)".format(this, other)) - - /** - * Returns the first value when this Gate returns true, or the second value if it returns false. - */ - def pick[A](t: T, x: => A, y: => A): A = if (this(t)) x else y - - /** - * A varient of pick that doesn't require a value if T is a subtype of Unit - */ - def pick[A](x: => A, y: => A)(implicit isUnit: <:<[Unit, T]): A = pick(isUnit(()), x, y) - - /** - * Returns a 1-arg function that dynamically picks x or y based upon the function arg. - */ - def select[A](x: => A, y: => A): T => A = pick(_, x, y) - - /** - * Returns a version of this gate that runs the effect if the gate returns true. - */ - def onTrue[U <: T](f: U => Unit): Gate[U] = - Gate { (t: U) => - val v = this(t) - if (v) f(t) - v - } - - /** - * Returns a version of this gate that runs the effect if the gate returns false. - */ - def onFalse[U <: T](f: U => Unit): Gate[U] = - Gate { (t: U) => - val v = this(t) - if (!v) f(t) - v - } -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/LogarithmicallyBucketedTimer.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/LogarithmicallyBucketedTimer.docx new file mode 100644 index 000000000..453fa3499 Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/LogarithmicallyBucketedTimer.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/LogarithmicallyBucketedTimer.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/LogarithmicallyBucketedTimer.scala deleted file mode 100644 index 262ea1bab..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/LogarithmicallyBucketedTimer.scala +++ /dev/null @@ -1,41 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.finagle.stats.{StatsReceiver, Stat} -import com.twitter.util.Future - -object LogarithmicallyBucketedTimer { - val LatencyStatName = "latency_ms" -} - -/** - * helper to bucket timings by quantity. it produces base10 and baseE log buckets. - */ -class LogarithmicallyBucketedTimer( - statsReceiver: StatsReceiver, - prefix: String = LogarithmicallyBucketedTimer.LatencyStatName) { - - protected[this] def base10Key(count: Int) = - prefix + "_log_10_" + math.floor(math.log10(count)).toInt - - protected[this] def baseEKey(count: Int) = - prefix + "_log_E_" + math.floor(math.log(count)).toInt - - /** - * takes the base10 and baseE logs of the count, adds timings to the - * appropriate buckets - */ - def apply[T](count: Int = 0)(f: => Future[T]) = { - Stat.timeFuture(statsReceiver.stat(prefix)) { - // only bucketize for positive, non-zero counts - if (count > 0) { - Stat.timeFuture(statsReceiver.stat(base10Key(count))) { - Stat.timeFuture(statsReceiver.stat(baseEKey(count))) { - f - } - } - } else { - f - } - } - } -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/MemoizingStatsReceiver.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/MemoizingStatsReceiver.docx new file mode 100644 index 000000000..cb362560f Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/MemoizingStatsReceiver.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/MemoizingStatsReceiver.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/MemoizingStatsReceiver.scala deleted file mode 100644 index 995d01906..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/MemoizingStatsReceiver.scala +++ /dev/null @@ -1,46 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.finagle.stats._ - -/** - * Stores scoped StatsReceivers in a map to avoid unnecessary object creation. - */ -class MemoizingStatsReceiver(val self: StatsReceiver) - extends StatsReceiver - with DelegatingStatsReceiver - with Proxy { - def underlying: Seq[StatsReceiver] = Seq(self) - - val repr = self.repr - - private[this] lazy val scopeMemo = - Memoize[String, StatsReceiver] { name => - new MemoizingStatsReceiver(self.scope(name)) - } - - private[this] lazy val counterMemo = - Memoize[(Seq[String], Verbosity), Counter] { - case (names, verbosity) => - self.counter(verbosity, names: _*) - } - - private[this] lazy val statMemo = - Memoize[(Seq[String], Verbosity), Stat] { - case (names, verbosity) => - self.stat(verbosity, names: _*) - } - - def counter(metricBuilder: MetricBuilder): Counter = - counterMemo(metricBuilder.name -> metricBuilder.verbosity) - - def stat(metricBuilder: MetricBuilder): Stat = statMemo( - metricBuilder.name -> metricBuilder.verbosity) - - def addGauge(metricBuilder: MetricBuilder)(f: => Float): Gauge = { - // scalafix:off StoreGaugesAsMemberVariables - self.addGauge(metricBuilder)(f) - // scalafix:on StoreGaugesAsMemberVariables - } - - override def scope(name: String): StatsReceiver = scopeMemo(name) -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Observable.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Observable.docx new file mode 100644 index 000000000..1b8b76cc2 Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Observable.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Observable.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Observable.scala deleted file mode 100644 index 443911763..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Observable.scala +++ /dev/null @@ -1,22 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.finagle.thrift.ClientId - -/** - * A trait defining contextual information necessary to authorize - * and observe a request. - */ -trait Observable { - val requestName: String - val clientId: Option[ClientId] - - /** - * An Option[String] representation of the request-issuer's ClientId. - */ - lazy val clientIdString: Option[String] = - // It's possible for `ClientId.name` to be `null`, so we wrap it in - // `Option()` to force such cases to be None. - clientId flatMap { cid => - Option(cid.name) - } -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/OptionOrdering.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/OptionOrdering.docx new file mode 100644 index 000000000..dc5a23994 Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/OptionOrdering.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/OptionOrdering.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/OptionOrdering.scala deleted file mode 100644 index 11635316a..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/OptionOrdering.scala +++ /dev/null @@ -1,22 +0,0 @@ -package com.twitter.servo.util - -object OptionOrdering { - - /** - * Creates an Ordering of Option objects. Nones are ordered before Somes, and two Somes - * are ordered according to the given value ordering. - */ - def apply[A](valueOrdering: Ordering[A]) = new Ordering[Option[A]] { - // Nones before Somes, for two Somes, use valueOrdering - def compare(x: Option[A], y: Option[A]): Int = { - x match { - case None => if (y.nonEmpty) -1 else 0 - case Some(xValue) => - y match { - case None => 1 - case Some(yValue) => valueOrdering.compare(xValue, yValue) - } - } - } - } -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RandomPerturber.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RandomPerturber.docx new file mode 100644 index 000000000..7585d35fe Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RandomPerturber.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RandomPerturber.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RandomPerturber.scala deleted file mode 100644 index 569538554..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RandomPerturber.scala +++ /dev/null @@ -1,16 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.util.Duration -import scala.util.Random - -/** - * A class for generating bounded random fluctuations around a given Duration. - */ -class RandomPerturber(percentage: Float, rnd: Random = new Random) extends (Duration => Duration) { - assert(percentage > 0 && percentage < 1, "percentage must be > 0 and < 1") - - override def apply(dur: Duration): Duration = { - val ns = dur.inNanoseconds - Duration.fromNanoseconds((ns + ((2 * rnd.nextFloat - 1) * percentage * ns)).toLong) - } -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RateLimitingLogger.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RateLimitingLogger.docx new file mode 100644 index 000000000..4edc1f83d Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RateLimitingLogger.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RateLimitingLogger.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RateLimitingLogger.scala deleted file mode 100644 index 749addcc7..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RateLimitingLogger.scala +++ /dev/null @@ -1,71 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.logging.{Level, Logger} -import com.twitter.util.{Duration, Time} -import com.twitter.conversions.DurationOps._ -import java.util.concurrent.atomic.AtomicLong - -object RateLimitingLogger { - private[util] val DefaultLoggerName = "servo" - private[util] val DefaultLogInterval = 500.milliseconds -} - -/** - * Class that makes it easier to rate-limit log messages, either by call site, or by - * logical grouping of messages. - * @param interval the interval in which messages should be rate limited - * @param logger the logger to use - */ -class RateLimitingLogger( - interval: Duration = RateLimitingLogger.DefaultLogInterval, - logger: Logger = Logger(RateLimitingLogger.DefaultLoggerName)) { - private[this] val last: AtomicLong = new AtomicLong(0L) - private[this] val sinceLast: AtomicLong = new AtomicLong(0L) - - private[this] val intervalNanos = interval.inNanoseconds - private[this] val intervalMsString = interval.inMilliseconds.toString - - private[this] def limited(action: Long => Unit): Unit = { - val now = Time.now.inNanoseconds - val lastNanos = last.get() - if (now - lastNanos > intervalNanos) { - if (last.compareAndSet(lastNanos, now)) { - val currentSinceLast = sinceLast.getAndSet(0L) - action(currentSinceLast) - } - } else { - sinceLast.incrementAndGet() - } - } - - def log(msg: => String, level: Level = Level.ERROR): Unit = { - limited { currentSinceLast: Long => - logger( - level, - "%s (group is logged at most once every %s ms%s)".format( - msg, - intervalMsString, - if (currentSinceLast > 0) { - s", ${currentSinceLast} occurrences since last" - } else "" - ) - ) - } - } - - def logThrowable(t: Throwable, msg: => String, level: Level = Level.ERROR): Unit = { - limited { currentSinceLast: Long => - logger( - level, - t, - "%s (group is logged at most once every %s ms%s)".format( - msg, - intervalMsString, - if (currentSinceLast > 0) { - s", ${currentSinceLast} occurrences since last" - } else "" - ) - ) - } - } -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Retry.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Retry.docx new file mode 100644 index 000000000..c188a2b6a Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Retry.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Retry.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Retry.scala deleted file mode 100644 index 164dc2561..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Retry.scala +++ /dev/null @@ -1,100 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.finagle.{Backoff, Service, TimeoutException, WriteException} -import com.twitter.finagle.service.{RetryExceptionsFilter, RetryPolicy} -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finagle.util.DefaultTimer -import com.twitter.util.{Duration, Future, Throw, Timer, Try} - -/** - * Allows an action to be retried according to a backoff strategy. - * This is an adaption of the Finagle RetryExceptionsFilter, but with an - * arbitrary asynchronous computation. - */ -class Retry( - statsReceiver: StatsReceiver, - backoffs: Backoff, - private[this] val timer: Timer = DefaultTimer) { - - /** - * retry on specific exceptions - */ - def apply[T]( - f: () => Future[T] - )( - shouldRetry: PartialFunction[Throwable, Boolean] - ): Future[T] = { - val policy = RetryPolicy.backoff[Try[Nothing]](backoffs) { - case Throw(t) if shouldRetry.isDefinedAt(t) => shouldRetry(t) - } - - val service = new Service[Unit, T] { - override def apply(u: Unit): Future[T] = f() - } - - val retrying = new RetryExceptionsFilter(policy, timer, statsReceiver) andThen service - - retrying() - } - - @deprecated("release() has no function and will be removed", "2.8.2") - def release(): Unit = {} -} - -/** - * Use to configure separate backoffs for WriteExceptions, TimeoutExceptions, - * and service-specific exceptions - */ -class ServiceRetryPolicy( - writeExceptionBackoffs: Backoff, - timeoutBackoffs: Backoff, - serviceBackoffs: Backoff, - shouldRetryService: PartialFunction[Throwable, Boolean]) - extends RetryPolicy[Try[Nothing]] { - override def apply(r: Try[Nothing]) = r match { - case Throw(t) if shouldRetryService.isDefinedAt(t) => - if (shouldRetryService(t)) - onServiceException - else - None - case Throw(_: WriteException) => onWriteException - case Throw(_: TimeoutException) => onTimeoutException - case _ => None - } - - def copy( - writeExceptionBackoffs: Backoff = writeExceptionBackoffs, - timeoutBackoffs: Backoff = timeoutBackoffs, - serviceBackoffs: Backoff = serviceBackoffs, - shouldRetryService: PartialFunction[Throwable, Boolean] = shouldRetryService - ) = - new ServiceRetryPolicy( - writeExceptionBackoffs, - timeoutBackoffs, - serviceBackoffs, - shouldRetryService - ) - - private[this] def onWriteException = consume(writeExceptionBackoffs) { tail => - copy(writeExceptionBackoffs = tail) - } - - private[this] def onTimeoutException = consume(timeoutBackoffs) { tail => - copy(timeoutBackoffs = tail) - } - - private[this] def onServiceException = consume(serviceBackoffs) { tail => - copy(serviceBackoffs = tail) - } - - private[this] def consume(b: Backoff)(f: Backoff => ServiceRetryPolicy) = { - if (b.isExhausted) None - else Some((b.duration, f(b.next))) - } - - override val toString = "ServiceRetryPolicy(%s, %s, %s)".format( - writeExceptionBackoffs, - timeoutBackoffs, - serviceBackoffs - ) -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RetryHandler.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RetryHandler.docx new file mode 100644 index 000000000..15f424462 Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RetryHandler.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RetryHandler.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RetryHandler.scala deleted file mode 100644 index f1e02c641..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RetryHandler.scala +++ /dev/null @@ -1,169 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.finagle.Backoff -import com.twitter.finagle.service.{RetryBudget, RetryPolicy} -import com.twitter.finagle.stats.{Counter, StatsReceiver} -import com.twitter.util._ -import java.util.concurrent.CancellationException -import scala.util.control.NonFatal - -/** - * A RetryHandler can wrap an arbitrary Future-producing operation with retry logic, where the - * operation may conditionally be retried multiple times. - */ -trait RetryHandler[-A] { - - /** - * Executes the given operation and performs any applicable retries. - */ - def apply[A2 <: A](f: => Future[A2]): Future[A2] - - /** - * Wraps an arbitrary function with this RetryHandler's retrying logic. - */ - def wrap[A2 <: A, B](f: B => Future[A2]): B => Future[A2] = - b => this(f(b)) -} - -object RetryHandler { - - /** - * Builds a RetryHandler that retries according to the given RetryPolicy. Retries, if any, - * will be scheduled on the given Timer to be executed after the appropriate backoff, if any. - * Retries will be limited according the given `RetryBudget`. - */ - def apply[A]( - policy: RetryPolicy[Try[A]], - timer: Timer, - statsReceiver: StatsReceiver, - budget: RetryBudget = RetryBudget() - ): RetryHandler[A] = { - val firstTryCounter = statsReceiver.counter("first_try") - val retriesCounter = statsReceiver.counter("retries") - val budgetExhausedCounter = statsReceiver.counter("budget_exhausted") - - new RetryHandler[A] { - def apply[A2 <: A](f: => Future[A2]): Future[A2] = { - firstTryCounter.incr() - budget.deposit() - retry[A2](policy, timer, retriesCounter, budgetExhausedCounter, budget)(f) - } - } - } - - /** - * Builds a RetryHandler that will only retry on failures that are handled by the given policy, - * and does not consider any successful future for retries. - */ - def failuresOnly[A]( - policy: RetryPolicy[Try[Nothing]], - timer: Timer, - statsReceiver: StatsReceiver, - budget: RetryBudget = RetryBudget() - ): RetryHandler[A] = - apply(failureOnlyRetryPolicy(policy), timer, statsReceiver, budget) - - /** - * Builds a RetryHandler that will retry any failure according to the given backoff schedule, - * until either either the operation succeeds or all backoffs are exhausted. - */ - def failuresOnly[A]( - backoffs: Stream[Duration], - timer: Timer, - stats: StatsReceiver, - budget: RetryBudget - ): RetryHandler[A] = - failuresOnly( - RetryPolicy.backoff[Try[Nothing]](Backoff.fromStream(backoffs)) { case Throw(_) => true }, - timer, - stats, - budget - ) - - /** - * Builds a RetryHandler that will retry any failure according to the given backoff schedule, - * until either either the operation succeeds or all backoffs are exhausted. - */ - def failuresOnly[A]( - backoffs: Stream[Duration], - timer: Timer, - stats: StatsReceiver - ): RetryHandler[A] = - failuresOnly(backoffs, timer, stats, RetryBudget()) - - /** - * Converts a RetryPolicy that only handles failures (Throw) to a RetryPolicy that also - * handles successes (Return), by flagging that successes need not be retried. - */ - def failureOnlyRetryPolicy[A](policy: RetryPolicy[Try[Nothing]]): RetryPolicy[Try[A]] = - RetryPolicy[Try[A]] { - case Return(_) => None - case Throw(ex) => - policy(Throw(ex)) map { - case (backoff, p2) => (backoff, failureOnlyRetryPolicy(p2)) - } - } - - private[this] def retry[A]( - policy: RetryPolicy[Try[A]], - timer: Timer, - retriesCounter: Counter, - budgetExhausedCounter: Counter, - budget: RetryBudget - )( - f: => Future[A] - ): Future[A] = { - forceFuture(f).transform { transformed => - policy(transformed) match { - case Some((backoff, nextPolicy)) => - if (budget.tryWithdraw()) { - retriesCounter.incr() - schedule(backoff, timer) { - retry(nextPolicy, timer, retriesCounter, budgetExhausedCounter, budget)(f) - } - } else { - budgetExhausedCounter.incr() - Future.const(transformed) - } - case None => - Future.const(transformed) - } - } - } - - // similar to finagle's RetryExceptionsFilter - private[this] def schedule[A](d: Duration, timer: Timer)(f: => Future[A]) = { - if (d.inNanoseconds > 0) { - val promise = new Promise[A] - val task = timer.schedule(Time.now + d) { - if (!promise.isDefined) { - try { - promise.become(f) - } catch { - case NonFatal(cause) => - // Ignore any exceptions thrown by Promise#become(). This usually means that the promise - // was already defined and cannot be transformed. - } - } - } - promise.setInterruptHandler { - case cause => - task.cancel() - val cancellation = new CancellationException - cancellation.initCause(cause) - promise.updateIfEmpty(Throw(cancellation)) - } - promise - } else forceFuture(f) - } - - // (Future { f } flatten), but without the allocation - private[this] def forceFuture[A](f: => Future[A]) = { - try { - f - } catch { - case NonFatal(cause) => - Future.exception(cause) - } - } -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RpcRetry.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RpcRetry.docx new file mode 100644 index 000000000..1c6dbc95d Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RpcRetry.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RpcRetry.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RpcRetry.scala deleted file mode 100644 index 36b790760..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RpcRetry.scala +++ /dev/null @@ -1,90 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.util.Future - -object RpcRetry { - - /** - * Provides a generic implementation of a retry logic to only a subset - * of requests according to a given predicate and returning the result - * in the original order after the retry. - * @param rpcs Methods that can transform a Seq[Request] to - * Future[Map[Request, Response]], they will be invoked in order - * while there are remaining rpcs to invoke AND some responses - * still return false to the predicate. - * @param isSuccess if true, keep the response, else retry. - * @tparam Req a request object - * @tparam Resp a response object - * @return an rpc function (Seq[Req] => Future[Map[Req, Resp]]) that performs - * the retries internally. - */ - def retryableRpc[Req, Resp]( - rpcs: Seq[Seq[Req] => Future[Map[Req, Resp]]], - isSuccess: Resp => Boolean - ): Seq[Req] => Future[Map[Req, Resp]] = { - requestRetryAndMerge[Req, Resp](_, isSuccess, rpcs.toStream) - } - - /** - * Provides a generic implementation of a retry logic to only a subset - * of requests according to a given predicate and returning the result - * in the original order after the retry. - * @param rpcs Methods that can transform a Seq[Request] to - * Future[Seq[Response]], they will be invoked in order - * while there are remaining rpcs to invoke AND some responses - * still return false to the predicate. - * Note that all Request objects must adhere to hashCode/equals standards - * @param isSuccess if true, keep the response, else retry. - * @tparam Req a request object. Must adhere to hashCode/equals standards - * @tparam Resp a response object - * @return an rpc function (Seq[Req] => Future[Seq[Resp]]) that performs - * the retries internally. - */ - def retryableRpcSeq[Req, Resp]( - rpcs: Seq[Seq[Req] => Future[Seq[Resp]]], - isSuccess: Resp => Boolean - ): Seq[Req] => Future[Seq[Resp]] = { - requestRetryAndMergeSeq[Req, Resp](_, isSuccess, rpcs) - } - - private[this] def requestRetryAndMergeSeq[Req, Resp]( - requests: Seq[Req], - isSuccess: Resp => Boolean, - rpcs: Seq[Seq[Req] => Future[Seq[Resp]]] - ): Future[Seq[Resp]] = { - requestRetryAndMerge(requests, isSuccess, (rpcs map { rpcToMapResponse(_) }).toStream) map { - responseMap => - requests map { responseMap(_) } - } - } - - private[this] def requestRetryAndMerge[Req, Resp]( - requests: Seq[Req], - isSuccess: Resp => Boolean, - rpcs: Stream[Seq[Req] => Future[Map[Req, Resp]]] - ): Future[Map[Req, Resp]] = { - if (rpcs.isEmpty) { - Future.exception(new IllegalArgumentException("rpcs is empty.")) - } else { - val rpc = rpcs.head - rpc(requests) flatMap { responses => - val (keep, recurse) = responses partition { - case (_, rep) => isSuccess(rep) - } - if (rpcs.tail.nonEmpty && recurse.nonEmpty) { - requestRetryAndMerge(recurse.keys.toSeq, isSuccess, rpcs.tail) map { keep ++ _ } - } else { - Future.value(responses) - } - } - } - } - - private[this] def rpcToMapResponse[Req, Resp]( - rpc: Seq[Req] => Future[Seq[Resp]] - ): Seq[Req] => Future[Map[Req, Resp]] = { (reqs: Seq[Req]) => - rpc(reqs) map { reps => - (reqs zip reps).toMap - } - } -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Scribe.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Scribe.docx new file mode 100644 index 000000000..ed240bbd0 Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Scribe.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Scribe.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Scribe.scala deleted file mode 100644 index 1d20842df..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Scribe.scala +++ /dev/null @@ -1,80 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.finagle.stats.{NullStatsReceiver, StatsReceiver} -import com.twitter.logging._ -import com.twitter.scrooge.{BinaryThriftStructSerializer, ThriftStruct, ThriftStructCodec} -import com.twitter.util.Future - -object Scribe { - - /** - * Returns a new FutureEffect for scribing text to the specified category. - */ - def apply( - category: String, - statsReceiver: StatsReceiver = NullStatsReceiver - ): FutureEffect[String] = - Scribe(loggingHandler(category = category, statsReceiver = statsReceiver)) - - /** - * Returns a new FutureEffect for scribing text to the specified logging handler. - */ - def apply(handler: Handler): FutureEffect[String] = - FutureEffect[String] { msg => - handler.publish(new LogRecord(handler.getLevel, msg)) - Future.Unit - } - - /** - * Returns a new FutureEffect for scribing thrift objects to the specified category. - * The thrift object will be serialized to binary then converted to Base64. - */ - def apply[T <: ThriftStruct]( - codec: ThriftStructCodec[T], - category: String - ): FutureEffect[T] = - Scribe(codec, Scribe(category = category)) - - /** - * Returns a new FutureEffect for scribing thrift objects to the specified category. - * The thrift object will be serialized to binary then converted to Base64. - */ - def apply[T <: ThriftStruct]( - codec: ThriftStructCodec[T], - category: String, - statsReceiver: StatsReceiver - ): FutureEffect[T] = - Scribe(codec, Scribe(category = category, statsReceiver = statsReceiver)) - - /** - * Returns a new FutureEffect for scribing thrift objects to the underlying scribe effect. - * The thrift object will be serialized to binary then converted to Base64. - */ - def apply[T <: ThriftStruct]( - codec: ThriftStructCodec[T], - underlying: FutureEffect[String] - ): FutureEffect[T] = - underlying contramap serialize(codec) - - /** - * Builds a logging Handler that scribes log messages, wrapped with a QueueingHandler. - */ - def loggingHandler( - category: String, - formatter: Formatter = BareFormatter, - maxQueueSize: Int = 5000, - statsReceiver: StatsReceiver = NullStatsReceiver - ): Handler = - new QueueingHandler( - ScribeHandler(category = category, formatter = formatter, statsReceiver = statsReceiver)(), - maxQueueSize = maxQueueSize - ) - - /** - * Returns a function that serializes thrift structs to Base64. - */ - def serialize[T <: ThriftStruct](c: ThriftStructCodec[T]): T => String = { - val serializer = BinaryThriftStructSerializer(c) - t => serializer.toString(t) - } -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SuccessRateTracker.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SuccessRateTracker.docx new file mode 100644 index 000000000..a13220046 Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SuccessRateTracker.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SuccessRateTracker.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SuccessRateTracker.scala deleted file mode 100644 index 4e84fb801..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SuccessRateTracker.scala +++ /dev/null @@ -1,179 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.util.{Duration, Local} - -/** - * A strategy for tracking success rate, usually over a window - */ -trait SuccessRateTracker { self => - def record(successes: Int, failures: Int): Unit - def successRate: Double - - /** - * A [[Gate]] whose availability is computed from the success rate (SR) reported by the tracker. - * - * @param availabilityFromSuccessRate function to calculate availability of gate given SR - */ - def availabilityGate(availabilityFromSuccessRate: Double => Double): Gate[Unit] = - Gate.fromAvailability(availabilityFromSuccessRate(successRate)) - - /** - * A [[Gate]] whose availability is computed from the success rate reported by the tracker - * with stats attached. - */ - def observedAvailabilityGate( - availabilityFromSuccessRate: Double => Double, - stats: StatsReceiver - ): Gate[Unit] = - new Gate[Unit] { - val underlying = availabilityGate(availabilityFromSuccessRate) - val availabilityGauge = - stats.addGauge("availability") { availabilityFromSuccessRate(successRate).toFloat } - override def apply[U](u: U)(implicit asT: <:<[U, Unit]): Boolean = underlying.apply(u) - } - - /** - * Tracks number of successes and failures as counters, and success_rate as a gauge - */ - def observed(stats: StatsReceiver) = { - val successCounter = stats.counter("successes") - val failureCounter = stats.counter("failures") - new SuccessRateTracker { - private[this] val successRateGauge = stats.addGauge("success_rate")(successRate.toFloat) - override def record(successes: Int, failures: Int) = { - self.record(successes, failures) - successCounter.incr(successes) - failureCounter.incr(failures) - } - override def successRate = self.successRate - } - } -} - -object SuccessRateTracker { - - /** - * Track success rate (SR) using [[RecentAverage]] - * - * Defaults success rate to 100% which prevents early failures (or periods of 0 data points, - * e.g. tracking backend SR during failover) from producing dramatic drops in success rate. - * - * @param window Window size as duration - */ - def recentWindowed(window: Duration) = - new AverageSuccessRateTracker(new RecentAverage(window, defaultAverage = 1.0)) - - /** - * Track success rate using [[WindowedAverage]] - * - * Initializes the windowedAverage to one window's worth of successes. This prevents - * the problem where early failures produce dramatic drops in the success rate. - * - * @param windowSize Window size in number of data points - */ - def rollingWindow(windowSize: Int) = - new AverageSuccessRateTracker(new WindowedAverage(windowSize, initialValue = Some(1.0))) -} - -/** - * Tracks success rate using an [[Average]] - * - * @param average Strategy for recording an average, usually over a window - */ -class AverageSuccessRateTracker(average: Average) extends SuccessRateTracker { - def record(successes: Int, failures: Int): Unit = - average.record(successes, successes + failures) - - def successRate: Double = average.value.getOrElse(1) -} - -/** - * EwmaSuccessRateTracker computes a failure rate with exponential decay over a time bound. - * - * @param halfLife determines the rate of decay. Assuming a hypothetical service that is initially - * 100% successful and then instantly switches to 50% successful, it will take `halfLife` time - * for this tracker to report a success rate of ~75%. - */ -class EwmaSuccessRateTracker(halfLife: Duration) extends SuccessRateTracker { - // math.exp(-x) = 0.50 when x == ln(2) - // math.exp(-x / Tau) == math.exp(-x / halfLife * ln(2)) therefore when x/halfLife == 1, the - // decay output is 0.5 - private[this] val Tau: Double = halfLife.inNanoseconds.toDouble / math.log(2.0) - - private[this] var stamp: Long = EwmaSuccessRateTracker.nanoTime() - private[this] var decayingFailureRate: Double = 0.0 - - def record(successes: Int, failures: Int): Unit = { - if (successes < 0 || failures < 0) return - - val total = successes + failures - if (total == 0) return - - val observation = (failures.toDouble / total) max 0.0 min 1.0 - - synchronized { - val time = EwmaSuccessRateTracker.nanoTime() - val delta = ((time - stamp) max 0L).toDouble - val weight = math.exp(-delta / Tau) - decayingFailureRate = (decayingFailureRate * weight) + (observation * (1.0 - weight)) - stamp = time - } - } - - /** - * The current success rate computed as the inverse of the failure rate. - */ - def successRate: Double = 1.0 - failureRate - - def failureRate = synchronized { decayingFailureRate } -} - -private[servo] trait NanoTimeControl { - def set(nanoTime: Long): Unit - def advance(delta: Long): Unit - def advance(delta: Duration): Unit = advance(delta.inNanoseconds) -} - -object EwmaSuccessRateTracker { - private[EwmaSuccessRateTracker] val localNanoTime = new Local[() => Long] - - private[EwmaSuccessRateTracker] def nanoTime(): Long = { - localNanoTime() match { - case None => System.nanoTime() - case Some(f) => f() - } - } - - /** - * Execute body with the time function replaced by `timeFunction` - * WARNING: This is only meant for testing purposes. - */ - private[this] def withNanoTimeFunction[A]( - timeFunction: => Long - )( - body: NanoTimeControl => A - ): A = { - @volatile var tf = () => timeFunction - - localNanoTime.let(() => tf()) { - val timeControl = new NanoTimeControl { - def set(nanoTime: Long): Unit = { - tf = () => nanoTime - } - def advance(delta: Long): Unit = { - val newNanoTime = tf() + delta - tf = () => newNanoTime - } - } - - body(timeControl) - } - } - - private[this] def withNanoTimeAt[A](nanoTime: Long)(body: NanoTimeControl => A): A = - withNanoTimeFunction(nanoTime)(body) - - private[servo] def withCurrentNanoTimeFrozen[A](body: NanoTimeControl => A): A = - withNanoTimeAt(System.nanoTime())(body) -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SynchronizedHashMap.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SynchronizedHashMap.docx new file mode 100644 index 000000000..72eedb5a5 Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SynchronizedHashMap.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SynchronizedHashMap.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SynchronizedHashMap.scala deleted file mode 100644 index a57d30533..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SynchronizedHashMap.scala +++ /dev/null @@ -1,5 +0,0 @@ -package com.twitter.servo.util - -import scala.collection.mutable - -class SynchronizedHashMap[K, V] extends mutable.HashMap[K, V] with mutable.SynchronizedMap[K, V] diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThreadLocalStringBuilder.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThreadLocalStringBuilder.docx new file mode 100644 index 000000000..95c6d59a5 Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThreadLocalStringBuilder.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThreadLocalStringBuilder.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThreadLocalStringBuilder.scala deleted file mode 100644 index 3edd1cf31..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThreadLocalStringBuilder.scala +++ /dev/null @@ -1,11 +0,0 @@ -package com.twitter.servo.util - -class ThreadLocalStringBuilder(initialSize: Int) extends ThreadLocal[StringBuilder] { - override def initialValue = new StringBuilder(initialSize) - - def apply() = { - val buf = get - buf.setLength(0) - buf - } -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThrowableHelper.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThrowableHelper.docx new file mode 100644 index 000000000..488ff47f1 Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThrowableHelper.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThrowableHelper.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThrowableHelper.scala deleted file mode 100644 index 5feeaa7e7..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThrowableHelper.scala +++ /dev/null @@ -1,41 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.util.Throwables - -/** - * An object with some helper methods for dealing with exceptions - * (currently just classname cleanup) - */ -object ThrowableHelper { - - /** - * Returns a sanitized sequence of classname for the given Throwable - * including root causes. - */ - def sanitizeClassnameChain(t: Throwable): Seq[String] = - Throwables.mkString(t).map(classnameTransform(_)) - - /** - * Returns a sanitized classname for the given Throwable. - */ - def sanitizeClassname(t: Throwable): String = - classnameTransform(t.getClass.getName) - - /** - * A function that applies a bunch of cleanup transformations to exception classnames - * (currently just 1, but there will likely be more!). - */ - private val classnameTransform: String => String = - Memoize { stripSuffix("$Immutable").andThen(stripSuffix("$")) } - - /** - * Generates a function that strips off the specified suffix from strings, if found. - */ - private def stripSuffix(suffix: String): String => String = - s => { - if (s.endsWith(suffix)) - s.substring(0, s.length - suffix.length) - else - s - } -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Transformer.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Transformer.docx new file mode 100644 index 000000000..97588cd0c Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Transformer.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Transformer.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Transformer.scala deleted file mode 100644 index d5cb14479..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Transformer.scala +++ /dev/null @@ -1,227 +0,0 @@ -package com.twitter.servo.util - -import com.google.common.base.Charsets -import com.google.common.primitives.{Ints, Longs} -import com.twitter.scrooge.{BinaryThriftStructSerializer, ThriftStructCodec, ThriftStruct} -import com.twitter.util.{Future, Return, Try, Throw} -import java.nio.{ByteBuffer, CharBuffer} -import java.nio.charset.{Charset, CharsetEncoder, CharsetDecoder} - -/** - * Transformer is a (possibly partial) bidirectional conversion - * between values of two types. It is particularly useful for - * serializing values for storage and reading them back out (see - * com.twitter.servo.cache.Serializer). - * - * In some implementations, the conversion may lose data (for example - * when used for storage in a cache). In general, any data that passes - * through a conversion should be preserved if the data is converted - * back. There is code to make it easy to check that your Transformer - * instance has this property in - * com.twitter.servo.util.TransformerLawSpec. - * - * Transformers should take care not to mutate their inputs when - * converting in either direction, in order to ensure that concurrent - * transformations of the same input yield the same result. - * - * Transformer forms a category with `andThen` and `identity`. - */ -trait Transformer[A, B] { self => - def to(a: A): Try[B] - - def from(b: B): Try[A] - - @deprecated("Use Future.const(transformer.to(x))", "2.0.1") - def asyncTo(a: A): Future[B] = Future.const(to(a)) - - @deprecated("Use Future.const(transformer.from(x))", "2.0.1") - def asyncFrom(b: B): Future[A] = Future.const(from(b)) - - /** - * Compose this transformer with another. As long as both - * transformers follow the stated laws, the composed transformer - * will follow them. - */ - def andThen[C](t: Transformer[B, C]): Transformer[A, C] = - new Transformer[A, C] { - override def to(a: A) = self.to(a) andThen t.to - override def from(c: C) = t.from(c) andThen self.from - } - - /** - * Reverse the direction of this transformer. - * - * Law: t.flip.flip == t - */ - lazy val flip: Transformer[B, A] = - new Transformer[B, A] { - override lazy val flip = self - override def to(b: B) = self.from(b) - override def from(a: A) = self.to(a) - } -} - -object Transformer { - - /** - * Create a new Transformer from the supplied functions, catching - * exceptions and converting them to failures. - */ - def apply[A, B](tTo: A => B, tFrom: B => A): Transformer[A, B] = - new Transformer[A, B] { - override def to(a: A): Try[B] = Try { tTo(a) } - override def from(b: B): Try[A] = Try { tFrom(b) } - } - - def identity[A]: Transformer[A, A] = pure[A, A](a => a, a => a) - - /** - * Lift a pair of (total) conversion functions to a Transformer. The - * caller is responsible for ensuring that the resulting transformer - * follows the laws for Transformers. - */ - def pure[A, B](pureTo: A => B, pureFrom: B => A): Transformer[A, B] = - new Transformer[A, B] { - override def to(a: A): Try[B] = Return(pureTo(a)) - override def from(b: B): Try[A] = Return(pureFrom(b)) - } - - /** - * Lift a transformer to a transformer on optional values. - * - * None bypasses the underlying conversion (as it must, since there - * is no value to transform). - */ - def optional[A, B](underlying: Transformer[A, B]): Transformer[Option[A], Option[B]] = - new Transformer[Option[A], Option[B]] { - override def to(optA: Option[A]) = optA match { - case None => Return.None - case Some(a) => underlying.to(a) map { Some(_) } - } - - override def from(optB: Option[B]) = optB match { - case None => Return.None - case Some(b) => underlying.from(b) map { Some(_) } - } - } - - ////////////////////////////////////////////////// - // Transformers for accessing/generating fields of a Map. - // - // These transformers are useful for serializing/deserializing to - // storage that stores Maps, for example Hamsa. - - /** - * Thrown by `requiredField` when the field is not present. - */ - case class MissingRequiredField[K](k: K) extends RuntimeException - - /** - * Get a value from the map, yielding MissingRequiredField when the - * value is not present in the map. - * - * The inverse transform yields a Map containing only the one value. - */ - def requiredField[K, V](k: K): Transformer[Map[K, V], V] = - new Transformer[Map[K, V], V] { - override def to(m: Map[K, V]) = - m get k match { - case Some(v) => Return(v) - case None => Throw(MissingRequiredField(k)) - } - - override def from(v: V) = Return(Map(k -> v)) - } - - /** - * Attempt to get a field from a Map, yielding None if the value is - * not present. - * - * The inverse transform will put the value in a Map if it is Some, - * and omit it if it is None. - */ - def optionalField[K, V](k: K): Transformer[Map[K, V], Option[V]] = - pure[Map[K, V], Option[V]](_.get(k), _.map { k -> _ }.toMap) - - /** - * Transforms an Option[T] to a T, using a default value for None. - * - * Note that the default value will be converted back to None by - * .from (.from will never return Some(default)). - */ - def default[T](value: T): Transformer[Option[T], T] = - pure[Option[T], T](_ getOrElse value, t => if (t == value) None else Some(t)) - - /** - * Transforms `Long`s to big-endian byte arrays. - */ - lazy val LongToBigEndian: Transformer[Long, Array[Byte]] = - new Transformer[Long, Array[Byte]] { - def to(a: Long) = Try(Longs.toByteArray(a)) - def from(b: Array[Byte]) = Try(Longs.fromByteArray(b)) - } - - /** - * Transforms `Int`s to big-endian byte arrays. - */ - lazy val IntToBigEndian: Transformer[Int, Array[Byte]] = - new Transformer[Int, Array[Byte]] { - def to(a: Int) = Try(Ints.toByteArray(a)) - def from(b: Array[Byte]) = Try(Ints.fromByteArray(b)) - } - - /** - * Transforms UTF8-encoded strings to byte arrays. - */ - lazy val Utf8ToBytes: Transformer[String, Array[Byte]] = - stringToBytes(Charsets.UTF_8) - - /** - * Transforms strings, encoded in a given character set, to byte arrays. - */ - private[util] def stringToBytes(charset: Charset): Transformer[String, Array[Byte]] = - new Transformer[String, Array[Byte]] { - private[this] val charsetEncoder = new ThreadLocal[CharsetEncoder]() { - protected override def initialValue() = charset.newEncoder - } - - private[this] val charsetDecoder = new ThreadLocal[CharsetDecoder]() { - protected override def initialValue() = charset.newDecoder - } - - override def to(str: String): Try[Array[Byte]] = Try { - // We can't just use `String.getBytes("UTF-8")` here because it will - // silently replace UTF-16 surrogate characters, which will cause - // CharsetEncoder to throw exceptions. - val bytes = charsetEncoder.get.encode(CharBuffer.wrap(str)) - bytes.array.slice(bytes.position, bytes.limit) - } - - override def from(bytes: Array[Byte]): Try[String] = Try { - charsetDecoder.get.decode(ByteBuffer.wrap(bytes)).toString - } - } - - /** - * Transforms a ThriftStruct to a byte-array using Thrift's TBinaryProtocol. - */ - def thriftStructToBytes[T <: ThriftStruct](c: ThriftStructCodec[T]): Transformer[T, Array[Byte]] = - new Transformer[T, Array[Byte]] { - private[this] val ser = BinaryThriftStructSerializer(c) - def to(a: T) = Try(ser.toBytes(a)) - def from(b: Array[Byte]) = Try(ser.fromBytes(b)) - } -} - -/** - * transforms an Option[T] to a T, using a default value for None - */ -@deprecated("Use Transformer.default", "2.0.1") -class OptionToTypeTransformer[T](default: T) extends Transformer[Option[T], T] { - override def to(b: Option[T]): Try[T] = Return(b.getOrElse(default)) - - override def from(a: T): Try[Option[T]] = a match { - case `default` => Return.None - case _ => Return(Some(a)) - } -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/TryOrdering.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/TryOrdering.docx new file mode 100644 index 000000000..4cd04dbb5 Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/TryOrdering.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/TryOrdering.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/TryOrdering.scala deleted file mode 100644 index d770be704..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/TryOrdering.scala +++ /dev/null @@ -1,23 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.util.{Return, Throw, Try} - -object TryOrdering { - - /** - * Creates an Ordering of Try objects. Throws are ordered before Returns, and two Returns - * are ordered according to the given value ordering. - */ - def apply[A](valueOrdering: Ordering[A]) = new Ordering[Try[A]] { - def compare(x: Try[A], y: Try[A]): Int = { - x match { - case Throw(_) => if (y.isReturn) -1 else 0 - case Return(xValue) => - y match { - case Throw(_) => 1 - case Return(yValue) => valueOrdering.compare(xValue, yValue) - } - } - } - } -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/WaitForServerSets.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/WaitForServerSets.docx new file mode 100644 index 000000000..2ee8571c7 Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/WaitForServerSets.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/WaitForServerSets.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/WaitForServerSets.scala deleted file mode 100644 index e76020098..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/WaitForServerSets.scala +++ /dev/null @@ -1,60 +0,0 @@ -package com.twitter.servo.util - -import com.twitter.finagle.util.DefaultTimer -import com.twitter.finagle.{Addr, Name, Namer} -import com.twitter.logging.Logger -import com.twitter.util._ -import scala.collection.JavaConverters._ - -/** - * A simple utility class to wait for serverset names to be resolved at startup. - * - * See [[com.twitter.finagle.client.ClientRegistry.expAllRegisteredClientsResolved()]] for an - * alternative way to wait for ServerSet resolution. - */ -object WaitForServerSets { - val log = Logger.get("WaitForServerSets") - - /** - * Convenient wrapper for single name in Java. Provides the default timer from Finagle. - */ - def ready(name: Name, timeout: Duration): Future[Unit] = - ready(Seq(name), timeout, DefaultTimer) - - /** - * Java Compatibility wrapper. Uses java.util.List instead of Seq. - */ - def ready(names: java.util.List[Name], timeout: Duration, timer: Timer): Future[Unit] = - ready(names.asScala, timeout, timer) - - /** - * Returns a Future that is satisfied when no more names resolve to Addr.Pending, - * or the specified timeout expires. - * - * This ignores address resolution failures, so just because the Future is satisfied - * doesn't necessarily imply that all names are resolved to something useful. - */ - def ready(names: Seq[Name], timeout: Duration, timer: Timer): Future[Unit] = { - val vars: Var[Seq[(Name, Addr)]] = Var.collect(names.map { - case n @ Name.Path(v) => Namer.resolve(v).map((n, _)) - case n @ Name.Bound(v) => v.map((n, _)) - }) - - val pendings = vars.changes.map { names => - names.filter { case (_, addr) => addr == Addr.Pending } - } - - pendings - .filter(_.isEmpty) - .toFuture() - .unit - .within( - timer, - timeout, - new TimeoutException( - "Failed to resolve: " + - vars.map(_.map { case (name, _) => name }).sample() - ) - ) - } -} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/package.docx b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/package.docx new file mode 100644 index 000000000..973b03592 Binary files /dev/null and b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/package.docx differ diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/package.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/package.scala deleted file mode 100644 index e9afcacc1..000000000 --- a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/package.scala +++ /dev/null @@ -1,6 +0,0 @@ -package com.twitter.servo - -package object util { - /* aliases to preserve compatibility after classes moved to different package */ - val Memoize = com.twitter.util.Memoize -} diff --git a/twml/BUILD b/twml/BUILD deleted file mode 100644 index c339f6fae..000000000 --- a/twml/BUILD +++ /dev/null @@ -1,186 +0,0 @@ -twml_sources = [ - "twml/**/*.py", -] - -twml_deps = [ - "3rdparty/python/cherrypy:default", - "3rdparty/python/pyyaml:default", - "3rdparty/python/absl-py:default", - "3rdparty/python/joblib:default", - "3rdparty/python/kazoo:default", - "3rdparty/python/python-dateutil:default", - "3rdparty/python/pytz:default", - "cortex/ml-metastore/src/main/python/com/twitter/mlmetastore/modelrepo/client", - "src/python/twitter/common/app", - "src/python/twitter/common/app/modules:vars", - "src/python/twitter/common/metrics", - "src/python/twitter/deepbird/compat/v1/optimizers", - "src/python/twitter/deepbird/compat/v1/rnn", - "src/python/twitter/deepbird/hparam", - "src/python/twitter/deepbird/io", - "src/python/twitter/deepbird/io/legacy", - "src/python/twitter/deepbird/logging", - "src/python/twitter/deepbird/sparse", - "src/python/twitter/deepbird/stats_server", - "src/python/twitter/deepbird/util:simple-data-record-handler", - "src/python/twitter/deepbird/util/hashing", - "src/python/twitter/ml/api/dal", - "src/python/twitter/ml/common:metrics", - "src/python/twitter/ml/common/kubernetes", - "src/python/twitter/ml/common:resources", - "src/python/twitter/ml/twml/kubernetes", - "src/python/twitter/ml/twml:status", - "src/thrift/com/twitter/dal:dal_no_constants-python", - "src/thrift/com/twitter/statebird:compiled-v2-python", -] - -python3_library( - name = "twml-test-common-deps", - tags = ["no-mypy"], - dependencies = [ - "src/python/twitter/deepbird/util:inference", - "src/python/twitter/deepbird/util/data", - "src/thrift/com/twitter/ml/api:data-python", - "twml/tests/data:resources", - ], -) - -python3_library( - name = "twml_packer_deps_no_tf", - tags = [ - "bazel-compatible", - "no-mypy", - ], - dependencies = [ - "3rdparty/python/numpy:default", - "3rdparty/python/pandas:default", - "3rdparty/python/pyyaml:default", - "3rdparty/python/requests:default", - "3rdparty/python/scikit-learn:default", - "3rdparty/python/scipy:default", - "3rdparty/python/tensorflow-hub:default", - "3rdparty/python/thriftpy2:default", - ], -) - -python3_library( - name = "twml_packer_deps_no_tf_py3", - tags = [ - "known-to-fail-jira:CX-20246", - "no-mypy", - ], - dependencies = [ - ":twml_packer_deps_no_tf", - "3rdparty/python/tensorflow-model-analysis", - ], -) - -alias( - name = "twml-test-shared", - target = ":twml_common", -) - -python3_library( - name = "twml_common", - sources = ["twml_common/**/*.py"], - tags = [ - "bazel-compatible", - "no-mypy", - ], -) - -# Alias twml-dev to twml to avoid breaking user targets. -alias( - name = "twml-dev", - target = "twml", -) - -python3_library( - name = "twml-test-dev-deps", - tags = [ - "bazel-compatible", - "no-mypy", - ], - dependencies = [ - ":twml", - ":twml-test-common-deps", - ":twml-test-shared", - "3rdparty/python/freezegun:default", - "src/python/twitter/deepbird/keras/layers", - "src/thrift/com/twitter/ml/api:data-python", - "src/thrift/com/twitter/ml/prediction_service:prediction_service-python", - ], -) - -python3_library( - name = "twml-dev-python", - sources = twml_sources, - tags = [ - "bazel-compatible", - "no-mypy", - ], - dependencies = twml_deps + [ - ":twml_packer_deps_no_tf", - "3rdparty/python/tensorflow", - "3rdparty/python/twml:libtwml-universal", - "twml/libtwml:libtwml-python", - ], -) - -# Build a smaller .pex file that models can depend on. -# Tensorflow and other dependencies are downloaded from Packer on Aurora. -# Note: This gets the C++ ops through 3rdparty artifacts. -python3_library( - name = "twml-nodeps", - sources = twml_sources, - tags = [ - "bazel-compatible", - "no-mypy", - ], - dependencies = twml_deps + [ - "3rdparty/python/twml:libtwml-universal", - ], -) - -python3_library( - name = "twml", - tags = [ - "bazel-compatible", - "no-mypy", - ], - dependencies = [ - ":twml-nodeps", - ":twml_packer_deps_no_tf", - "3rdparty/python/tensorflow", - ], -) - -python37_binary( - name = "tensorboard", - source = "twml/tensorboard/__main__.py", - dependencies = [ - "3rdparty/python/_closures/twml:tensorboard", - "3rdparty/python/tensorflow", - ], -) - -python37_binary( - name = "saved_model_cli", - source = "twml/saved_model_cli/__main__.py", - dependencies = [ - "3rdparty/python/_closures/twml:saved_model_cli", - "3rdparty/python/tensorflow", - ], -) - -# This target is added so twml can be used regardless of the Tensorflow version: -# This target does not pull in TensorFlow 1.x or the related libtwml compiled using TF 1.x. -python3_library( - name = "twml-py-source-only", - sources = twml_sources, - tags = [ - "known-to-fail-jira:CX-23416", - "no-mypy", - ], - dependencies = twml_deps, -) diff --git a/twml/BUILD.docx b/twml/BUILD.docx new file mode 100644 index 000000000..cdc4fdc52 Binary files /dev/null and b/twml/BUILD.docx differ diff --git a/twml/README.docx b/twml/README.docx new file mode 100644 index 000000000..e78bca71e Binary files /dev/null and b/twml/README.docx differ diff --git a/twml/README.md b/twml/README.md deleted file mode 100644 index b2b315b45..000000000 --- a/twml/README.md +++ /dev/null @@ -1,13 +0,0 @@ -# TWML - ---- -Note: `twml` is no longer under development. Much of the code here is out of date and unused. -It is included here for completeness, because `twml` is still used to train the light ranker models -(see `src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/README.md`) ---- - -TWML is one of Twitter's machine learning frameworks, which uses Tensorflow under the hood. While it is mostly -deprecated, -it is still currently used to train the Earlybird light ranking models ( -see `src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/train.py`). -The most relevant part of this is the `DataRecordTrainer` class, which is where the core training logic resides. diff --git a/twml/libtwml/BUILD b/twml/libtwml/BUILD deleted file mode 100644 index c80b64b3b..000000000 --- a/twml/libtwml/BUILD +++ /dev/null @@ -1,8 +0,0 @@ -python3_library( - name = "libtwml-python", - sources = ["libtwml/**/*.py"], - tags = [ - "no-mypy", - "bazel-compatible", - ], -) diff --git a/twml/libtwml/BUILD.docx b/twml/libtwml/BUILD.docx new file mode 100644 index 000000000..79d6606b8 Binary files /dev/null and b/twml/libtwml/BUILD.docx differ diff --git a/twml/libtwml/include/twml.docx b/twml/libtwml/include/twml.docx new file mode 100644 index 000000000..526ef884d Binary files /dev/null and b/twml/libtwml/include/twml.docx differ diff --git a/twml/libtwml/include/twml.h b/twml/libtwml/include/twml.h deleted file mode 100644 index 9d88cdc7b..000000000 --- a/twml/libtwml/include/twml.h +++ /dev/null @@ -1,21 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include diff --git a/twml/libtwml/include/twml/BatchPredictionRequest.docx b/twml/libtwml/include/twml/BatchPredictionRequest.docx new file mode 100644 index 000000000..390c264da Binary files /dev/null and b/twml/libtwml/include/twml/BatchPredictionRequest.docx differ diff --git a/twml/libtwml/include/twml/BatchPredictionRequest.h b/twml/libtwml/include/twml/BatchPredictionRequest.h deleted file mode 100644 index 6070ec045..000000000 --- a/twml/libtwml/include/twml/BatchPredictionRequest.h +++ /dev/null @@ -1,45 +0,0 @@ -#pragma once - -#ifdef __cplusplus - -#include -#include -#include - -namespace twml { - -template -class GenericBatchPredictionRequest { - static_assert(std::is_same::value || - std::is_same::value, - "RecordType has to be HashedDatarecord or DataRecord"); - public: - typedef typename RecordType::Reader Reader; - GenericBatchPredictionRequest(int numOfLabels=0, int numOfWeights=0): - m_common_features(), m_requests(), - num_labels(numOfLabels), num_weights(numOfWeights) - {} - - void decode(Reader &reader); - - std::vector& requests() { - return m_requests; - } - - RecordType& common() { - return m_common_features; - } - - private: - RecordType m_common_features; - std::vector m_requests; - int num_labels; - int num_weights; -}; - -using HashedBatchPredictionRequest = GenericBatchPredictionRequest; -using BatchPredictionRequest = GenericBatchPredictionRequest; - -} - -#endif diff --git a/twml/libtwml/include/twml/BatchPredictionResponse.docx b/twml/libtwml/include/twml/BatchPredictionResponse.docx new file mode 100644 index 000000000..0dec2a9e9 Binary files /dev/null and b/twml/libtwml/include/twml/BatchPredictionResponse.docx differ diff --git a/twml/libtwml/include/twml/BatchPredictionResponse.h b/twml/libtwml/include/twml/BatchPredictionResponse.h deleted file mode 100644 index b7e709464..000000000 --- a/twml/libtwml/include/twml/BatchPredictionResponse.h +++ /dev/null @@ -1,58 +0,0 @@ -#pragma once - -#include -#include -#include - -namespace twml { - - // Encodes a batch of model predictions as a list of Thrift DataRecord - // objects inside a Thrift BatchPredictionResponse object. Prediction - // values are continousFeatures inside each DataRecord. - // - // The BatchPredictionResponseWriter TensorFlow operator uses this class - // to determine the size of the output tensor to allocate. The operator - // then allocates memory for the output tensor and uses this class to - // write binary Thrift to the output tensor. - // - class BatchPredictionResponse { - private: - uint64_t batch_size_; - const Tensor &keys_; - const Tensor &values_; // prediction values (batch_size * num_keys) - const Tensor &dense_keys_; - const std::vector &dense_values_; - - inline uint64_t getBatchSize() { return batch_size_; } - inline bool hasContinuous() { return keys_.getNumDims() > 0; } - inline bool hasDenseTensors() { return dense_keys_.getNumDims() > 0; } - - inline uint64_t getPredictionSize() { - return values_.getNumDims() > 1 ? values_.getDim(1) : 1; - }; - - void encode(twml::ThriftWriter &thrift_writer); - - template - void serializePredictions(twml::ThriftWriter &thrift_writer); - - public: - // keys: 'continuousFeatures' prediction keys - // values: 'continuousFeatures' prediction values (batch_size * num_keys) - // dense_keys: 'tensors' prediction keys - // dense_values: 'tensors' prediction values (batch_size * num_keys) - BatchPredictionResponse( - const Tensor &keys, const Tensor &values, - const Tensor &dense_keys, const std::vector &dense_values); - - // Calculate the size of the Thrift encoded output (but do not encode). - // The BatchPredictionResponseWriter TensorFlow operator uses this value - // to allocate the output tensor. - uint64_t encodedSize(); - - // Write the BatchPredictionResponse as binary Thrift. The - // BatchPredictionResponseWriter operator uses this method to populate - // the output tensor. - void write(Tensor &result); - }; -} diff --git a/twml/libtwml/include/twml/BlockFormatReader.docx b/twml/libtwml/include/twml/BlockFormatReader.docx new file mode 100644 index 000000000..fc7ddf8f1 Binary files /dev/null and b/twml/libtwml/include/twml/BlockFormatReader.docx differ diff --git a/twml/libtwml/include/twml/BlockFormatReader.h b/twml/libtwml/include/twml/BlockFormatReader.h deleted file mode 100644 index 4c68458ba..000000000 --- a/twml/libtwml/include/twml/BlockFormatReader.h +++ /dev/null @@ -1,32 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace twml { -class BlockFormatReader { - private: - int record_size_; - long block_pos_; - long block_end_; - char classname_[1024]; - - int read_one_record_size(); - int read_int(); - int consume_marker(int scan); - int unpack_varint_i32(); - int unpack_tag_and_wiretype(uint32_t *tag, uint32_t *wiretype); - int unpack_string(char *out, uint64_t max_out_len); - - public: - BlockFormatReader(); - bool next(); - uint64_t current_size() const { return record_size_; } - - virtual uint64_t read_bytes(void *dest, int size, int count) = 0; -}; -} diff --git a/twml/libtwml/include/twml/BlockFormatWriter.docx b/twml/libtwml/include/twml/BlockFormatWriter.docx new file mode 100644 index 000000000..9d72cdbfc Binary files /dev/null and b/twml/libtwml/include/twml/BlockFormatWriter.docx differ diff --git a/twml/libtwml/include/twml/BlockFormatWriter.h b/twml/libtwml/include/twml/BlockFormatWriter.h deleted file mode 100644 index b9c496f40..000000000 --- a/twml/libtwml/include/twml/BlockFormatWriter.h +++ /dev/null @@ -1,61 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include - -#ifndef PATH_MAX -#define PATH_MAX (8096) -#endif - -#ifdef __cplusplus -extern "C" { -#endif - - struct block_format_writer__; - typedef block_format_writer__ * block_format_writer; - -#ifdef __cplusplus -} -#endif - - -#ifdef __cplusplus -namespace twml { - class BlockFormatWriter { - private: - const char *file_name_; - FILE *outputfile_; - char temp_file_name_[PATH_MAX]; - int record_index_; - int records_per_block_; - - int pack_tag_and_wiretype(FILE *file, uint32_t tag, uint32_t wiretype); - int pack_varint_i32(FILE *file, int value); - int pack_string(FILE *file, const char *in, size_t in_len); - int write_int(FILE *file, int value); - - public: - BlockFormatWriter(const char *file_name, int record_per_block); - ~BlockFormatWriter(); - int write(const char *class_name, const char *record, int record_len) ; - int flush(); - block_format_writer getHandle(); - }; - - BlockFormatWriter *getBlockFormatWriter(block_format_writer w); -} //twml namespace -#endif - -#ifdef __cplusplus -extern "C" { -#endif -twml_err block_format_writer_create(block_format_writer *w, const char *file_name, int records_per_block); -twml_err block_format_write(block_format_writer w, const char *class_name, const char *record, int record_len); -twml_err block_format_flush(block_format_writer w); -twml_err block_format_writer_delete(const block_format_writer w); -#ifdef __cplusplus -} -#endif diff --git a/twml/libtwml/include/twml/DataRecord.docx b/twml/libtwml/include/twml/DataRecord.docx new file mode 100644 index 000000000..3484bfe25 Binary files /dev/null and b/twml/libtwml/include/twml/DataRecord.docx differ diff --git a/twml/libtwml/include/twml/DataRecord.h b/twml/libtwml/include/twml/DataRecord.h deleted file mode 100644 index f39f1158b..000000000 --- a/twml/libtwml/include/twml/DataRecord.h +++ /dev/null @@ -1,108 +0,0 @@ -#pragma once -#ifdef __cplusplus - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -namespace twml { - -class DataRecordReader; - -class TWMLAPI DataRecord : public TensorRecord { -public: - typedef std::vector> SparseContinuousValueType; - typedef std::vector SparseBinaryValueType; - typedef Set BinaryFeatures; - typedef Map ContinuousFeatures; - typedef Map DiscreteFeatures; - typedef Map StringFeatures; - typedef Map SparseBinaryFeatures; - typedef Map SparseContinuousFeatures; - typedef Map> BlobFeatures; - -private: - BinaryFeatures m_binary; - ContinuousFeatures m_continuous; - DiscreteFeatures m_discrete; - StringFeatures m_string; - SparseBinaryFeatures m_sparsebinary; - SparseContinuousFeatures m_sparsecontinuous; - BlobFeatures m_blob; - - - std::vector m_labels; - std::vector m_weights; - - void addLabel(int64_t id, double label = 1); - void addWeight(int64_t id, double value); - -public: - typedef DataRecordReader Reader; - - DataRecord(int num_labels=0, int num_weights=0): - m_binary(), - m_continuous(), - m_discrete(), - m_string(), - m_sparsebinary(), - m_sparsecontinuous(), - m_blob(), - m_labels(num_labels, std::nanf("")), - m_weights(num_weights) { -#ifdef USE_DENSE_HASH - m_binary.set_empty_key(0); - m_continuous.set_empty_key(0); - m_discrete.set_empty_key(0); - m_string.set_empty_key(0); - m_sparsebinary.set_empty_key(0); - m_sparsecontinuous.set_empty_key(0); -#endif - m_binary.max_load_factor(0.5); - m_continuous.max_load_factor(0.5); - m_discrete.max_load_factor(0.5); - m_string.max_load_factor(0.5); - m_sparsebinary.max_load_factor(0.5); - m_sparsecontinuous.max_load_factor(0.5); - } - - const BinaryFeatures &getBinary() const { return m_binary; } - const ContinuousFeatures &getContinuous() const { return m_continuous; } - const DiscreteFeatures &getDiscrete() const { return m_discrete; } - const StringFeatures &getString() const { return m_string; } - const SparseBinaryFeatures &getSparseBinary() const { return m_sparsebinary; } - const SparseContinuousFeatures &getSparseContinuous() const { return m_sparsecontinuous; } - const BlobFeatures &getBlob() const { return m_blob; } - - const std::vector &labels() const { return m_labels; } - const std::vector &weights() const { return m_weights; } - - // used by DataRecordWriter - template - void addContinuous(std::vector feature_ids, std::vector values) { - for (size_t i = 0; i < feature_ids.size(); ++i){ - m_continuous[feature_ids[i]] = values[i]; - } - } - - template - void addContinuous(const int64_t *keys, uint64_t num_keys, T *values) { - for (size_t i = 0; i < num_keys; ++i){ - m_continuous[keys[i]] = values[i]; - } - } - - void decode(DataRecordReader &reader); - void clear(); - friend class DataRecordReader; -}; - -} -#endif diff --git a/twml/libtwml/include/twml/DataRecordReader.docx b/twml/libtwml/include/twml/DataRecordReader.docx new file mode 100644 index 000000000..2cdd39860 Binary files /dev/null and b/twml/libtwml/include/twml/DataRecordReader.docx differ diff --git a/twml/libtwml/include/twml/DataRecordReader.h b/twml/libtwml/include/twml/DataRecordReader.h deleted file mode 100644 index 0ef8e64ff..000000000 --- a/twml/libtwml/include/twml/DataRecordReader.h +++ /dev/null @@ -1,61 +0,0 @@ -#pragma once -#ifdef __cplusplus - -#include -#include -#include -#include - -#include - -#include -#include -#include - -namespace twml { - -class TWMLAPI DataRecordReader : public TensorRecordReader { - -private: - typedef Map KeyMap_t; - KeyMap_t *m_keep_map; - KeyMap_t *m_labels_map; - KeyMap_t *m_weights_map; - -public: - bool keepKey (const int64_t &key, int64_t &code); - bool isLabel (const int64_t &key, int64_t &code); - bool isWeight (const int64_t &key, int64_t &code); - void readBinary (const int feature_type , DataRecord *record); - void readContinuous (const int feature_type , DataRecord *record); - void readDiscrete (const int feature_type , DataRecord *record); - void readString (const int feature_type , DataRecord *record); - void readSparseBinary (const int feature_type , DataRecord *record); - void readSparseContinuous (const int feature_type , DataRecord *record); - void readBlob (const int feature_type , DataRecord *record); - - DataRecordReader() : - TensorRecordReader(nullptr), - m_keep_map(nullptr), - m_labels_map(nullptr), - m_weights_map(nullptr) - {} - - // Using a template instead of int64_t because tensorflow implements int64 based on compiler. - void setKeepMap(KeyMap_t *keep_map) { - m_keep_map = keep_map; - } - - void setLabelsMap(KeyMap_t *labels_map) { - m_labels_map = labels_map; - } - - void setWeightsMap(KeyMap_t *weights_map) { - m_weights_map = weights_map; - } - - void setDecodeMode(int64_t mode) {} -}; - -} -#endif diff --git a/twml/libtwml/include/twml/DataRecordWriter.docx b/twml/libtwml/include/twml/DataRecordWriter.docx new file mode 100644 index 000000000..daad8c2b3 Binary files /dev/null and b/twml/libtwml/include/twml/DataRecordWriter.docx differ diff --git a/twml/libtwml/include/twml/DataRecordWriter.h b/twml/libtwml/include/twml/DataRecordWriter.h deleted file mode 100644 index 6b330d323..000000000 --- a/twml/libtwml/include/twml/DataRecordWriter.h +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once -#ifdef __cplusplus - -#include -#include -#include - -namespace twml { - -// Encodes DataRecords as binary Thrift. BatchPredictionResponse -// uses this class to encode prediction responses through our -// TensorFlow response writer operator. -class TWMLAPI DataRecordWriter { - private: - uint32_t m_records_written; - twml::ThriftWriter &m_thrift_writer; - twml::TensorRecordWriter m_tensor_writer; - - void writeBinary(twml::DataRecord &record); - void writeContinuous(twml::DataRecord &record); - void writeDiscrete(twml::DataRecord &record); - void writeString(twml::DataRecord &record); - void writeSparseBinaryFeatures(twml::DataRecord &record); - void writeSparseContinuousFeatures(twml::DataRecord &record); - void writeBlobFeatures(twml::DataRecord &record); - void writeDenseTensors(twml::DataRecord &record); - - public: - DataRecordWriter(twml::ThriftWriter &thrift_writer): - m_records_written(0), - m_thrift_writer(thrift_writer), - m_tensor_writer(twml::TensorRecordWriter(thrift_writer)) { } - - uint32_t getRecordsWritten(); - uint64_t write(twml::DataRecord &record); -}; - -} -#endif diff --git a/twml/libtwml/include/twml/Error.docx b/twml/libtwml/include/twml/Error.docx new file mode 100644 index 000000000..ed047d380 Binary files /dev/null and b/twml/libtwml/include/twml/Error.docx differ diff --git a/twml/libtwml/include/twml/Error.h b/twml/libtwml/include/twml/Error.h deleted file mode 100644 index 89307d214..000000000 --- a/twml/libtwml/include/twml/Error.h +++ /dev/null @@ -1,48 +0,0 @@ -#pragma once -#include - -#ifdef __cplusplus -#include -#include -#include -#include - -namespace twml { - -class Error : public std::runtime_error { - private: - twml_err m_err; - public: - Error(twml_err err, const std::string &msg) : - std::runtime_error(msg), m_err(err) - { - } - - twml_err err() const - { - return m_err; - } -}; - -class ThriftInvalidField: public twml::Error { - public: - ThriftInvalidField(int16_t field_id, const std::string& func) : - Error(TWML_ERR_THRIFT, - "Found invalid field (" + std::to_string(field_id) - + ") while reading thrift [" + func + "]") - { - } -}; - -class ThriftInvalidType: public twml::Error { - public: - ThriftInvalidType(uint8_t type_id, const std::string& func, const std::string type) : - Error(TWML_ERR_THRIFT, - "Found invalid type (" + std::to_string(type_id) + - ") while reading thrift [" + func + "::" + type + "]") - { - } -}; - -} -#endif diff --git a/twml/libtwml/include/twml/HashedDataRecord.docx b/twml/libtwml/include/twml/HashedDataRecord.docx new file mode 100644 index 000000000..c1fad21cc Binary files /dev/null and b/twml/libtwml/include/twml/HashedDataRecord.docx differ diff --git a/twml/libtwml/include/twml/HashedDataRecord.h b/twml/libtwml/include/twml/HashedDataRecord.h deleted file mode 100644 index de63c4dc7..000000000 --- a/twml/libtwml/include/twml/HashedDataRecord.h +++ /dev/null @@ -1,70 +0,0 @@ -#pragma once -#ifdef __cplusplus - -#include -#include - -#include -#include -#include - -namespace twml { - -class HashedDataRecordReader; - -class TWMLAPI HashedDataRecord : public TensorRecord { - public: - typedef HashedDataRecordReader Reader; - - HashedDataRecord(int num_labels=0, int num_weights=0): - m_keys(), - m_transformed_keys(), - m_values(), - m_codes(), - m_types(), - m_labels(num_labels, std::nanf("")), - m_weights(num_weights) {} - - void decode(HashedDataRecordReader &reader); - - const std::vector &keys() const { return m_keys; } - const std::vector &transformed_keys() const { return m_transformed_keys; } - const std::vector &values() const { return m_values; } - const std::vector &codes() const { return m_codes; } - const std::vector &types() const { return m_types; } - - const std::vector &labels() const { return m_labels; } - const std::vector &weights() const { return m_weights; } - - void clear(); - - uint64_t totalSize() const { return m_keys.size(); } - - void extendSize(int delta_size) { - int count = m_keys.size() + delta_size; - m_keys.reserve(count); - m_transformed_keys.reserve(count); - m_values.reserve(count); - m_codes.reserve(count); - m_types.reserve(count); - } - - private: - std::vector m_keys; - std::vector m_transformed_keys; - std::vector m_values; - std::vector m_codes; - std::vector m_types; - - std::vector m_labels; - std::vector m_weights; - - void addKey(int64_t key, int64_t transformed_key, int64_t code, uint8_t type, double value=1); - void addLabel(int64_t id, double value = 1); - void addWeight(int64_t id, double value); - - friend class HashedDataRecordReader; -}; - -} -#endif \ No newline at end of file diff --git a/twml/libtwml/include/twml/HashedDataRecordReader.docx b/twml/libtwml/include/twml/HashedDataRecordReader.docx new file mode 100644 index 000000000..4f4bec136 Binary files /dev/null and b/twml/libtwml/include/twml/HashedDataRecordReader.docx differ diff --git a/twml/libtwml/include/twml/HashedDataRecordReader.h b/twml/libtwml/include/twml/HashedDataRecordReader.h deleted file mode 100644 index 5470eb5c8..000000000 --- a/twml/libtwml/include/twml/HashedDataRecordReader.h +++ /dev/null @@ -1,70 +0,0 @@ -#pragma once -#ifdef __cplusplus - -#include -#include -#include -#include - -#include - -#include -#include -#include - -namespace twml { - -enum class DecodeMode: int64_t -{ - hash_valname = 0, - hash_fname_and_valname = 1, -}; - -class TWMLAPI HashedDataRecordReader : public TensorRecordReader { -private: - typedef Map KeyMap_t; - KeyMap_t *m_keep_map; - KeyMap_t *m_labels_map; - KeyMap_t *m_weights_map; - DecodeMode m_decode_mode; - -public: - bool keepId (const int64_t &key, int64_t &code); - bool isLabel (const int64_t &key, int64_t &code); - bool isWeight (const int64_t &key, int64_t &code); - void readBinary (const int feature_type , HashedDataRecord *record); - void readContinuous (const int feature_type , HashedDataRecord *record); - void readDiscrete (const int feature_type , HashedDataRecord *record); - void readString (const int feature_type , HashedDataRecord *record); - void readSparseBinary (const int feature_type , HashedDataRecord *record); - void readSparseContinuous (const int feature_type , HashedDataRecord *record); - void readBlob (const int feature_type , HashedDataRecord *record); - - HashedDataRecordReader() : - TensorRecordReader(nullptr), - m_keep_map(nullptr), - m_labels_map(nullptr), - m_weights_map(nullptr), - m_decode_mode(DecodeMode::hash_valname) - {} - - // Using a template instead of int64_t because tensorflow implements int64 based on compiler. - void setKeepMap(KeyMap_t *keep_map) { - m_keep_map = keep_map; - } - - void setLabelsMap(KeyMap_t *labels_map) { - m_labels_map = labels_map; - } - - void setWeightsMap(KeyMap_t *weights_map) { - m_weights_map = weights_map; - } - - void setDecodeMode(int64_t mode) { - m_decode_mode = static_cast(mode); - } -}; - -} -#endif diff --git a/twml/libtwml/include/twml/Hashmap.docx b/twml/libtwml/include/twml/Hashmap.docx new file mode 100644 index 000000000..fd48a2bd9 Binary files /dev/null and b/twml/libtwml/include/twml/Hashmap.docx differ diff --git a/twml/libtwml/include/twml/Hashmap.h b/twml/libtwml/include/twml/Hashmap.h deleted file mode 100644 index 59314236b..000000000 --- a/twml/libtwml/include/twml/Hashmap.h +++ /dev/null @@ -1,110 +0,0 @@ -#pragma once -#include -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - typedef void * twml_hashmap; - typedef int64_t tw_hash_key_t; - typedef int64_t tw_hash_val_t; -#ifdef __cplusplus -} -#endif - -#ifdef __cplusplus -namespace twml { - - typedef tw_hash_key_t HashKey_t; - typedef tw_hash_val_t HashVal_t; - - class HashMap { - private: - twml_hashmap m_hashmap; - - public: - HashMap(); - ~HashMap(); - - // Disable copy constructor and assignment - // TODO: Fix this after retain and release are added to twml_hashmap - HashMap(const HashMap &other) = delete; - HashMap& operator=(const HashMap &other) = delete; - - void clear(); - uint64_t size() const; - int8_t insert(const HashKey_t key); - int8_t insert(const HashKey_t key, const HashVal_t val); - void remove(const HashKey_t key); - int8_t get(HashVal_t &val, const HashKey_t key) const; - - void insert(Tensor &mask, const Tensor keys); - void insert(Tensor &mask, const Tensor keys, const Tensor vals); - void remove(const Tensor keys); - void get(Tensor &mask, Tensor &vals, const Tensor keys) const; - - void getInplace(Tensor &mask, Tensor &keys_vals) const; - void toTensors(Tensor &keys, Tensor &vals) const; - }; -} -#endif - -#ifdef __cplusplus -extern "C" { -#endif - - - TWMLAPI twml_err twml_hashmap_create(twml_hashmap *hashmap); - - TWMLAPI twml_err twml_hashmap_clear(const twml_hashmap hashmap); - - TWMLAPI twml_err twml_hashmap_get_size(uint64_t *size, const twml_hashmap hashmap); - - TWMLAPI twml_err twml_hashmap_delete(const twml_hashmap hashmap); - - // insert, get, remove single key / value - TWMLAPI twml_err twml_hashmap_insert_key(int8_t *mask, - const twml_hashmap hashmap, - const tw_hash_key_t key); - - TWMLAPI twml_err twml_hashmap_insert_key_and_value(int8_t *mask, twml_hashmap hashmap, - const tw_hash_key_t key, - const tw_hash_val_t val); - - TWMLAPI twml_err twml_hashmap_remove_key(const twml_hashmap hashmap, - const tw_hash_key_t key); - - TWMLAPI twml_err twml_hashmap_get_value(int8_t *mask, tw_hash_val_t *val, - const twml_hashmap hashmap, - const tw_hash_key_t key); - - TWMLAPI twml_err twml_hashmap_insert_keys(twml_tensor masks, - const twml_hashmap hashmap, - const twml_tensor keys); - - // insert, get, remove tensors of keys / values - TWMLAPI twml_err twml_hashmap_insert_keys_and_values(twml_tensor masks, - twml_hashmap hashmap, - const twml_tensor keys, - const twml_tensor vals); - - TWMLAPI twml_err twml_hashmap_remove_keys(const twml_hashmap hashmap, - const twml_tensor keys); - - TWMLAPI twml_err twml_hashmap_get_values(twml_tensor masks, - twml_tensor vals, - const twml_hashmap hashmap, - const twml_tensor keys); - - TWMLAPI twml_err twml_hashmap_get_values_inplace(twml_tensor masks, - twml_tensor keys_vals, - const twml_hashmap hashmap); - - TWMLAPI twml_err twml_hashmap_to_tensors(twml_tensor keys, - twml_tensor vals, - const twml_hashmap hashmap); -#ifdef __cplusplus -} -#endif diff --git a/twml/libtwml/include/twml/RawTensor.docx b/twml/libtwml/include/twml/RawTensor.docx new file mode 100644 index 000000000..97162fd9c Binary files /dev/null and b/twml/libtwml/include/twml/RawTensor.docx differ diff --git a/twml/libtwml/include/twml/RawTensor.h b/twml/libtwml/include/twml/RawTensor.h deleted file mode 100644 index 571966743..000000000 --- a/twml/libtwml/include/twml/RawTensor.h +++ /dev/null @@ -1,92 +0,0 @@ -#pragma once -#include -#include - -#ifdef __cplusplus -namespace twml { - -// This class contains the raw pointers to tensors coming from thrift object. -class TWMLAPI RawTensor : public Tensor -{ -private: - bool m_is_big_endian; - uint64_t m_raw_length; -public: - - RawTensor() {} - - RawTensor(void *data, const std::vector &dims, - const std::vector &strides, twml_type type, bool is_big_endian, uint64_t length) - : Tensor(data, dims, strides, type), m_is_big_endian(is_big_endian), m_raw_length(length) {} - - bool is_big_endian() const { - return m_is_big_endian; - } - - uint64_t getRawLength() const { - return m_raw_length; - } - - // Extracts a slice from a tensor at idx0 along dimension 0 - // Used in BatchPredictionResponse to write each slice in separate records - RawTensor getSlice(uint64_t idx0) const { - void *slice = nullptr; - uint64_t raw_length = 0; - - if (getType() == TWML_TYPE_STRING) { - raw_length = getStride(0); - std::string *data = const_cast(static_cast(getData())); - slice = static_cast(data + raw_length * idx0); - } else { - raw_length = getStride(0) * getSizeOf(getType()); - char *data = const_cast(static_cast(getData())); - slice = static_cast(data + raw_length * idx0); - } - - std::vector dims, strides; - for (int i = 1; i < getNumDims(); i++) { - dims.push_back(getDim(i)); - strides.push_back(getStride(i)); - } - - return RawTensor(slice, dims, strides, getType(), m_is_big_endian, raw_length); - } -}; - -// Wrapper class around RawTensor to hold sparse tensors. -class TWMLAPI RawSparseTensor -{ -private: - RawTensor m_indices; - RawTensor m_values; - std::vector m_dense_shape; - -public: - - RawSparseTensor() { - } - - RawSparseTensor(const RawTensor &indices_, const RawTensor &values_, - const std::vector &dense_shape_) : - m_indices(indices_), m_values(values_), m_dense_shape(dense_shape_) - { - if (m_indices.getType() != TWML_TYPE_INT64) { - throw twml::Error(TWML_ERR_TYPE, "Indices of Sparse Tensor must be of type int64"); - } - } - - const RawTensor &indices() const { - return m_indices; - } - - const RawTensor &values() const { - return m_values; - } - - const std::vector& denseShape() const { - return m_dense_shape; - } -}; - -} -#endif diff --git a/twml/libtwml/include/twml/Tensor.docx b/twml/libtwml/include/twml/Tensor.docx new file mode 100644 index 000000000..12eb19c07 Binary files /dev/null and b/twml/libtwml/include/twml/Tensor.docx differ diff --git a/twml/libtwml/include/twml/Tensor.h b/twml/libtwml/include/twml/Tensor.h deleted file mode 100644 index 774474403..000000000 --- a/twml/libtwml/include/twml/Tensor.h +++ /dev/null @@ -1,82 +0,0 @@ -#pragma once -#include - -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - - struct twml_tensor__; - typedef twml_tensor__ * twml_tensor; - -#ifdef __cplusplus -} -#endif - -#ifdef __cplusplus -namespace twml { - -class TWMLAPI Tensor -{ -private: - twml_type m_type; - void *m_data; - std::vector m_dims; - std::vector m_strides; - -public: - Tensor() {} - Tensor(void *data, int ndims, const uint64_t *dims, const uint64_t *strides, twml_type type); - Tensor(void *data, const std::vector &dims, const std::vector &strides, twml_type type); - - const std::vector& getDims() const { - return m_dims; - } - - int getNumDims() const; - uint64_t getDim(int dim) const; - uint64_t getStride(int dim) const; - uint64_t getNumElements() const; - twml_type getType() const; - - twml_tensor getHandle(); - const twml_tensor getHandle() const; - - template T *getData(); - template const T *getData() const; -}; - -TWMLAPI std::string getTypeName(twml_type type); -TWMLAPI const Tensor *getConstTensor(const twml_tensor t); -TWMLAPI Tensor *getTensor(twml_tensor t); -TWMLAPI uint64_t getSizeOf(twml_type type); - -} -#endif - -#ifdef __cplusplus -extern "C" { -#endif - TWMLAPI twml_err twml_tensor_create(twml_tensor *tensor, void *data, - int ndims, uint64_t *dims, - uint64_t *strides, twml_type type); - - TWMLAPI twml_err twml_tensor_delete(const twml_tensor tensor); - - TWMLAPI twml_err twml_tensor_get_type(twml_type *type, const twml_tensor tensor); - - TWMLAPI twml_err twml_tensor_get_data(void **data, const twml_tensor tensor); - - TWMLAPI twml_err twml_tensor_get_dim(uint64_t *dim, const twml_tensor tensor, int id); - - TWMLAPI twml_err twml_tensor_get_num_dims(int *ndims, const twml_tensor tensor); - - TWMLAPI twml_err twml_tensor_get_num_elements(uint64_t *nelements, const twml_tensor tensor); - - TWMLAPI twml_err twml_tensor_get_stride(uint64_t *stride, const twml_tensor tensor, int id); -#ifdef __cplusplus -} -#endif diff --git a/twml/libtwml/include/twml/TensorRecord.docx b/twml/libtwml/include/twml/TensorRecord.docx new file mode 100644 index 000000000..2153deb08 Binary files /dev/null and b/twml/libtwml/include/twml/TensorRecord.docx differ diff --git a/twml/libtwml/include/twml/TensorRecord.h b/twml/libtwml/include/twml/TensorRecord.h deleted file mode 100644 index d128cfdce..000000000 --- a/twml/libtwml/include/twml/TensorRecord.h +++ /dev/null @@ -1,47 +0,0 @@ -#pragma once -#ifdef __cplusplus - -#include -#include - -#include -#include - -namespace twml { - -class TensorRecordReader; - -// A class containing the data from TensorRecord. -// - This serves as the base class from which DataRecord and HashedDataRecord are inherited. -class TWMLAPI TensorRecord { -public: - typedef std::unordered_map RawTensors; - typedef std::unordered_map RawSparseTensors; - -private: - RawTensors m_tensors; - RawSparseTensors m_sparse_tensors; - -public: - - const RawTensors &getRawTensors() { - return m_tensors; - } - - const RawTensor& getRawTensor(int64_t id) const { - return m_tensors.at(id); - } - - const RawSparseTensor& getRawSparseTensor(int64_t id) const { - return m_sparse_tensors.at(id); - } - - void addRawTensor(int64_t id, const RawTensor &tensor) { - m_tensors.emplace(id, tensor); - } - - friend class TensorRecordReader; -}; - -} -#endif diff --git a/twml/libtwml/include/twml/TensorRecordReader.docx b/twml/libtwml/include/twml/TensorRecordReader.docx new file mode 100644 index 000000000..ba92f1c83 Binary files /dev/null and b/twml/libtwml/include/twml/TensorRecordReader.docx differ diff --git a/twml/libtwml/include/twml/TensorRecordReader.h b/twml/libtwml/include/twml/TensorRecordReader.h deleted file mode 100644 index 3a62bd885..000000000 --- a/twml/libtwml/include/twml/TensorRecordReader.h +++ /dev/null @@ -1,34 +0,0 @@ -#pragma once -#ifdef __cplusplus - -#include -#include -#include - -#include - -#include -#include -#include - -namespace twml { - -// Class that parses the thrift objects as defined in tensor.thrift -class TWMLAPI TensorRecordReader : public ThriftReader { - - std::vector readShape(); - template RawTensor readTypedTensor(); - RawTensor readRawTypedTensor(); - RawTensor readStringTensor(); - RawTensor readGeneralTensor(); - RawSparseTensor readCOOSparseTensor(); - -public: - void readTensor(const int feature_type, TensorRecord *record); - void readSparseTensor(const int feature_type, TensorRecord *record); - - TensorRecordReader(const uint8_t *buffer) : ThriftReader(buffer) {} -}; - -} -#endif diff --git a/twml/libtwml/include/twml/TensorRecordWriter.docx b/twml/libtwml/include/twml/TensorRecordWriter.docx new file mode 100644 index 000000000..e5cae1f66 Binary files /dev/null and b/twml/libtwml/include/twml/TensorRecordWriter.docx differ diff --git a/twml/libtwml/include/twml/TensorRecordWriter.h b/twml/libtwml/include/twml/TensorRecordWriter.h deleted file mode 100644 index d8b7c3dbf..000000000 --- a/twml/libtwml/include/twml/TensorRecordWriter.h +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once -#ifdef __cplusplus - -#include -#include - -namespace twml { - -// Encodes tensors as DataRecord/TensorRecord-compatible Thrift. -// DataRecordWriter relies on this class to encode the tensor fields. -class TWMLAPI TensorRecordWriter { - -private: - uint32_t m_records_written; - twml::ThriftWriter &m_thrift_writer; - - void writeTensor(const RawTensor &tensor); - void writeRawTensor(const RawTensor &tensor); - -public: - TensorRecordWriter(twml::ThriftWriter &thrift_writer): - m_records_written(0), - m_thrift_writer(thrift_writer) { } - - uint32_t getRecordsWritten(); - - // Caller (usually DataRecordWriter) must precede with struct header field - // like thrift_writer.writeStructFieldHeader(TTYPE_MAP, DR_GENERAL_TENSOR) - // - // All tensors written as RawTensors except for StringTensors - uint64_t write(twml::TensorRecord &record); -}; - -} -#endif diff --git a/twml/libtwml/include/twml/ThriftReader.docx b/twml/libtwml/include/twml/ThriftReader.docx new file mode 100644 index 000000000..f759d2dd5 Binary files /dev/null and b/twml/libtwml/include/twml/ThriftReader.docx differ diff --git a/twml/libtwml/include/twml/ThriftReader.h b/twml/libtwml/include/twml/ThriftReader.h deleted file mode 100644 index 25c83ea29..000000000 --- a/twml/libtwml/include/twml/ThriftReader.h +++ /dev/null @@ -1,56 +0,0 @@ -#pragma once - -#ifdef __cplusplus - -#include -#include -#include -#include - -namespace twml { - -class ThriftReader { - protected: - const uint8_t *m_buffer; - - public: - - ThriftReader(const uint8_t *buffer): m_buffer(buffer) {} - - const uint8_t *getBuffer() { return m_buffer; } - - void setBuffer(const uint8_t *buffer) { m_buffer = buffer; } - - template T readDirect() { - T val; - memcpy(&val, m_buffer, sizeof(T)); - m_buffer += sizeof(T); - return val; - } - - template void skip() { - m_buffer += sizeof(T); - } - - void skipLength(size_t length) { - m_buffer += length; - } - - uint8_t readByte(); - int16_t readInt16(); - int32_t readInt32(); - int64_t readInt64(); - double readDouble(); - - template inline - int32_t getRawBuffer(const uint8_t **begin) { - int32_t length = readInt32(); - *begin = m_buffer; - skipLength(length * sizeof(T)); - return length; - } - -}; - -} -#endif diff --git a/twml/libtwml/include/twml/ThriftWriter.docx b/twml/libtwml/include/twml/ThriftWriter.docx new file mode 100644 index 000000000..4e5a63f7d Binary files /dev/null and b/twml/libtwml/include/twml/ThriftWriter.docx differ diff --git a/twml/libtwml/include/twml/ThriftWriter.h b/twml/libtwml/include/twml/ThriftWriter.h deleted file mode 100644 index 1216415b0..000000000 --- a/twml/libtwml/include/twml/ThriftWriter.h +++ /dev/null @@ -1,59 +0,0 @@ -#pragma once - -#ifdef __cplusplus - -#include -#include -#include -#include - -namespace twml { - -// A low-level binary Thrift writer that can also compute output size -// in dry run mode without copying memory. See also https://git.io/vNPiv -// -// WARNING: Users of this class are responsible for generating valid Thrift -// by following the Thrift binary protocol (https://git.io/vNPiv). -class TWMLAPI ThriftWriter { - protected: - bool m_dry_run; - uint8_t *m_buffer; - size_t m_buffer_size; - size_t m_bytes_written; - - template inline uint64_t write(T val); - - public: - // buffer: Memory to write the binary Thrift to. - // buffer_size: Length of the buffer. - // dry_run: If true, just count bytes 'written' but do not copy memory. - // If false, write binary Thrift to the buffer normally. - // Useful to determine output size for TensorFlow allocations. - ThriftWriter(uint8_t *buffer, size_t buffer_size, bool dry_run = false) : - m_dry_run(dry_run), - m_buffer(buffer), - m_buffer_size(buffer_size), - m_bytes_written(0) {} - - // total bytes written to the buffer since object creation - uint64_t getBytesWritten(); - - // encode headers and values into the buffer - uint64_t writeStructFieldHeader(int8_t field_type, int16_t field_id); - uint64_t writeStructStop(); - uint64_t writeListHeader(int8_t element_type, int32_t num_elems); - uint64_t writeMapHeader(int8_t key_type, int8_t val_type, int32_t num_elems); - uint64_t writeDouble(double val); - uint64_t writeInt8(int8_t val); - uint64_t writeInt16(int16_t val); - uint64_t writeInt32(int32_t val); - uint64_t writeInt64(int64_t val); - uint64_t writeBinary(const uint8_t *bytes, int32_t num_bytes); - // clients expect UTF-8-encoded strings per the Thrift protocol - // (often this is just used to send bytes, not real strings though) - uint64_t writeString(std::string str); - uint64_t writeBool(bool val); -}; - -} -#endif diff --git a/twml/libtwml/include/twml/Type.docx b/twml/libtwml/include/twml/Type.docx new file mode 100644 index 000000000..a6cf4d005 Binary files /dev/null and b/twml/libtwml/include/twml/Type.docx differ diff --git a/twml/libtwml/include/twml/Type.h b/twml/libtwml/include/twml/Type.h deleted file mode 100644 index 8b460c812..000000000 --- a/twml/libtwml/include/twml/Type.h +++ /dev/null @@ -1,69 +0,0 @@ -#pragma once -#include -#include -#include - -#ifdef __cplusplus -namespace twml { - - template struct Type; - - template<> struct Type - { - enum { - type = TWML_TYPE_FLOAT, - }; - }; - - template<> struct Type - { - enum { - type = TWML_TYPE_STRING, - }; - }; - - template<> struct Type - { - enum { - type = TWML_TYPE_DOUBLE, - }; - }; - - template<> struct Type - { - enum { - type = TWML_TYPE_INT64, - }; - }; - - template<> struct Type - { - enum { - type = TWML_TYPE_INT32, - }; - }; - - template<> struct Type - { - enum { - type = TWML_TYPE_INT8, - }; - }; - - template<> struct Type - { - enum { - type = TWML_TYPE_UINT8, - }; - }; - - - template<> struct Type - { - enum { - type = TWML_TYPE_BOOL, - }; - }; - -} -#endif diff --git a/twml/libtwml/include/twml/common.docx b/twml/libtwml/include/twml/common.docx new file mode 100644 index 000000000..a166451f7 Binary files /dev/null and b/twml/libtwml/include/twml/common.docx differ diff --git a/twml/libtwml/include/twml/common.h b/twml/libtwml/include/twml/common.h deleted file mode 100644 index c3a2e9aee..000000000 --- a/twml/libtwml/include/twml/common.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef TWML_LIBTWML_INCLUDE_TWML_COMMON_H_ -#define TWML_LIBTWML_INCLUDE_TWML_COMMON_H_ - -#define USE_ABSEIL_HASH 1 - -#if defined(USE_ABSEIL_HASH) -#include "absl/container/flat_hash_map.h" -#include "absl/container/flat_hash_set.h" -#elif defined(USE_DENSE_HASH) -#include -#include -#else -#include -#include -#endif // USE_ABSEIL_HASH - - -namespace twml { -#if defined(USE_ABSEIL_HASH) - template - using Map = absl::flat_hash_map; - - template - using Set = absl::flat_hash_set; -#elif defined(USE_DENSE_HASH) -// Do not use this unless an proper empty key can be found. - template - using Map = google::dense_hash_map; - - template - using Set = google::dense_hash_set; -#else - template - using Map = std::unordered_map; - - template - using Set = std::unordered_set; -#endif // USE_DENSE_HASH - -} // namespace twml - -#endif // TWML_LIBTWML_INCLUDE_TWML_COMMON_H_ \ No newline at end of file diff --git a/twml/libtwml/include/twml/defines.docx b/twml/libtwml/include/twml/defines.docx new file mode 100644 index 000000000..60f665938 Binary files /dev/null and b/twml/libtwml/include/twml/defines.docx differ diff --git a/twml/libtwml/include/twml/defines.h b/twml/libtwml/include/twml/defines.h deleted file mode 100644 index e7f7d138d..000000000 --- a/twml/libtwml/include/twml/defines.h +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once -#include -#ifdef __cplusplus -extern "C" { -#endif - typedef enum { - TWML_TYPE_FLOAT32 = 1, - TWML_TYPE_FLOAT64 = 2, - TWML_TYPE_INT32 = 3, - TWML_TYPE_INT64 = 4, - TWML_TYPE_INT8 = 5, - TWML_TYPE_UINT8 = 6, - TWML_TYPE_BOOL = 7, - TWML_TYPE_STRING = 8, - TWML_TYPE_FLOAT = TWML_TYPE_FLOAT32, - TWML_TYPE_DOUBLE = TWML_TYPE_FLOAT64, - TWML_TYPE_UNKNOWN = -1, - } twml_type; - - typedef enum { - TWML_ERR_NONE = 1000, - TWML_ERR_SIZE = 1001, - TWML_ERR_TYPE = 1002, - TWML_ERR_THRIFT = 1100, - TWML_ERR_IO = 1200, - TWML_ERR_UNKNOWN = 1999, - } twml_err; -#ifdef __cplusplus -} -#endif - -#define TWMLAPI __attribute__((visibility("default"))) - -#ifndef TWML_INDEX_BASE -#define TWML_INDEX_BASE 0 -#endif diff --git a/twml/libtwml/include/twml/discretizer_impl.docx b/twml/libtwml/include/twml/discretizer_impl.docx new file mode 100644 index 000000000..4e35d96d0 Binary files /dev/null and b/twml/libtwml/include/twml/discretizer_impl.docx differ diff --git a/twml/libtwml/include/twml/discretizer_impl.h b/twml/libtwml/include/twml/discretizer_impl.h deleted file mode 100644 index 587bde458..000000000 --- a/twml/libtwml/include/twml/discretizer_impl.h +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once -#include -#include -#include - -#ifdef __cplusplus -namespace twml { - TWMLAPI void discretizerInfer( - Tensor &output_keys, - Tensor &output_vals, - const Tensor &input_ids, - const Tensor &input_vals, - const Tensor &bin_ids, - const Tensor &bin_vals, - const Tensor &feature_offsets, - int output_bits, - const Map &ID_to_index, - int start_compute, - int end_compute, - int output_start); -} // namespace twml -#endif diff --git a/twml/libtwml/include/twml/functions.docx b/twml/libtwml/include/twml/functions.docx new file mode 100644 index 000000000..197fe0704 Binary files /dev/null and b/twml/libtwml/include/twml/functions.docx differ diff --git a/twml/libtwml/include/twml/functions.h b/twml/libtwml/include/twml/functions.h deleted file mode 100644 index c23680cac..000000000 --- a/twml/libtwml/include/twml/functions.h +++ /dev/null @@ -1,26 +0,0 @@ -#pragma once -#include -#include - -#ifdef __cplusplus -namespace twml { - - // Adding these as an easy way to test the wrappers - TWMLAPI void add1(Tensor &output, const Tensor input); - TWMLAPI void copy(Tensor &output, const Tensor input); - TWMLAPI int64_t featureId(const std::string &feature); -} -#endif - -#ifdef __cplusplus -extern "C" { -#endif - - // Adding these as an easy way to test the wrappers - TWMLAPI twml_err twml_add1(twml_tensor output, const twml_tensor input); - TWMLAPI twml_err twml_copy(twml_tensor output, const twml_tensor input); - TWMLAPI twml_err twml_get_feature_id(int64_t *result, const uint64_t len, const char *str); - -#ifdef __cplusplus -} -#endif diff --git a/twml/libtwml/include/twml/hashing_discretizer_impl.docx b/twml/libtwml/include/twml/hashing_discretizer_impl.docx new file mode 100644 index 000000000..354f7b4e1 Binary files /dev/null and b/twml/libtwml/include/twml/hashing_discretizer_impl.docx differ diff --git a/twml/libtwml/include/twml/hashing_discretizer_impl.h b/twml/libtwml/include/twml/hashing_discretizer_impl.h deleted file mode 100644 index a04efb7e0..000000000 --- a/twml/libtwml/include/twml/hashing_discretizer_impl.h +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once -#include -#include -#include -#include - -#ifdef __cplusplus -namespace twml { - TWMLAPI void hashDiscretizerInfer( - Tensor &output_keys, - Tensor &output_vals, - const Tensor &input_ids, - const Tensor &input_vals, - int n_bin, - const Tensor &bin_vals, - int output_bits, - const Map &ID_to_index, - int start_compute, - int end_compute, - int64_t options); -} // namespace twml -#endif diff --git a/twml/libtwml/include/twml/io/IOError.docx b/twml/libtwml/include/twml/io/IOError.docx new file mode 100644 index 000000000..7eb828fac Binary files /dev/null and b/twml/libtwml/include/twml/io/IOError.docx differ diff --git a/twml/libtwml/include/twml/io/IOError.h b/twml/libtwml/include/twml/io/IOError.h deleted file mode 100644 index 867ab44df..000000000 --- a/twml/libtwml/include/twml/io/IOError.h +++ /dev/null @@ -1,45 +0,0 @@ -#pragma once - -#include - -namespace twml { -namespace io { - -class IOError : public twml::Error { - public: - enum Status { - OUT_OF_RANGE = 1, - WRONG_MAGIC = 2, - WRONG_HEADER = 3, - ERROR_HEADER_CHECKSUM = 4, - INVALID_METHOD = 5, - USING_RESERVED = 6, - ERROR_HEADER_EXTRA_FIELD_CHECKSUM = 7, - CANT_FIT_OUTPUT = 8, - SPLIT_FILE = 9, - BLOCK_SIZE_TOO_LARGE = 10, - SOURCE_LARGER_THAN_DESTINATION = 11, - DESTINATION_LARGER_THAN_CAPACITY = 12, - HEADER_FLAG_MISMATCH = 13, - NOT_ENOUGH_INPUT = 14, - ERROR_SOURCE_BLOCK_CHECKSUM = 15, - COMPRESSED_DATA_VIOLATION = 16, - ERROR_DESTINATION_BLOCK_CHECKSUM = 17, - EMPTY_RECORD = 18, - MALFORMED_MEMORY_RECORD = 19, - UNSUPPORTED_OUTPUT_TYPE = 20, - OTHER_ERROR - }; - - IOError(Status status); - - Status status() const { - return m_status; - } - - private: - Status m_status; -}; - -} -} diff --git a/twml/libtwml/include/twml/optim.docx b/twml/libtwml/include/twml/optim.docx new file mode 100644 index 000000000..6024f898a Binary files /dev/null and b/twml/libtwml/include/twml/optim.docx differ diff --git a/twml/libtwml/include/twml/optim.h b/twml/libtwml/include/twml/optim.h deleted file mode 100644 index d0a2df4ef..000000000 --- a/twml/libtwml/include/twml/optim.h +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once -#include -#include - -#ifdef __cplusplus -namespace twml { - TWMLAPI void linearInterpolation( - Tensor output, - const Tensor input, - const Tensor xs, - const Tensor ys); - - TWMLAPI void nearestInterpolation( - Tensor output, - const Tensor input, - const Tensor xs, - const Tensor ys); - - TWMLAPI void mdlInfer( - Tensor &output_keys, - Tensor &output_vals, - const Tensor &input_keys, - const Tensor &input_vals, - const Tensor &bin_ids, - const Tensor &bin_vals, - const Tensor &feature_offsets, - bool return_bin_indices = false); -} -#endif - -#ifdef __cplusplus -extern "C" { -#endif - TWMLAPI twml_err twml_optim_nearest_interpolation( - twml_tensor output, - const twml_tensor input, - const twml_tensor xs, - const twml_tensor ys); - - TWMLAPI twml_err twml_optim_mdl_infer( - twml_tensor output_keys, - twml_tensor output_vals, - const twml_tensor input_keys, - const twml_tensor input_vals, - const twml_tensor bin_ids, - const twml_tensor bin_vals, - const twml_tensor feature_offsets, - const bool return_bin_indices = false); -#ifdef __cplusplus -} -#endif diff --git a/twml/libtwml/include/twml/utilities.docx b/twml/libtwml/include/twml/utilities.docx new file mode 100644 index 000000000..6a7b9f52f Binary files /dev/null and b/twml/libtwml/include/twml/utilities.docx differ diff --git a/twml/libtwml/include/twml/utilities.h b/twml/libtwml/include/twml/utilities.h deleted file mode 100644 index a30b44aff..000000000 --- a/twml/libtwml/include/twml/utilities.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once -#ifdef __cplusplus -namespace twml { - -inline int64_t mixDiscreteIdAndValue(int64_t key, int64_t value) { - key ^= ((17LL + value) * 2654435761LL); - return key; -} - -inline int64_t mixStringIdAndValue(int64_t key, int32_t str_len, const uint8_t *str) { - int32_t hash = 0; - for (int32_t i = 0; i < str_len; i++) { - hash = (31 * hash) + (int32_t)str[i]; - } - return key ^ hash; -} -} -#endif \ No newline at end of file diff --git a/twml/libtwml/setup.cfg b/twml/libtwml/setup.cfg deleted file mode 100644 index d5253c179..000000000 --- a/twml/libtwml/setup.cfg +++ /dev/null @@ -1,9 +0,0 @@ -[bdist_wheel] -universal=1 - -[build] -build-lib=build_dir -build-temp=build_dir - -[bdist] -bdist-base=build_dir diff --git a/twml/libtwml/setup.docx b/twml/libtwml/setup.docx new file mode 100644 index 000000000..0dc51e8ef Binary files /dev/null and b/twml/libtwml/setup.docx differ diff --git a/twml/libtwml/setup.py b/twml/libtwml/setup.py deleted file mode 100644 index 2dcfa105d..000000000 --- a/twml/libtwml/setup.py +++ /dev/null @@ -1,12 +0,0 @@ -""" -libtwml setup.py module -""" -from setuptools import setup, find_packages - -setup( - name='libtwml', - version='2.0', - description="Tensorflow C++ ops for twml", - packages=find_packages(), - data_files=[('', ['libtwml_tf.so'])], -) diff --git a/twml/libtwml/src/lib/BatchPredictionRequest.cpp b/twml/libtwml/src/lib/BatchPredictionRequest.cpp deleted file mode 100644 index cca8d6545..000000000 --- a/twml/libtwml/src/lib/BatchPredictionRequest.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#include "internal/thrift.h" -#include "internal/error.h" - -#include -#include -#include -#include - -#include -#include -#include - -namespace twml { - -template -void GenericBatchPredictionRequest::decode(Reader &reader) { - uint8_t feature_type = reader.readByte(); - while (feature_type != TTYPE_STOP) { - int16_t field_id = reader.readInt16(); - - switch (field_id) { - case 1: { - CHECK_THRIFT_TYPE(feature_type, TTYPE_LIST, "list"); - CHECK_THRIFT_TYPE(reader.readByte(), TTYPE_STRUCT, "list_element"); - - int32_t length = reader.readInt32(); - m_requests.resize(length, RecordType(this->num_labels, this->num_weights)); - for (auto &request : m_requests) { - request.decode(reader); - } - - break; - } - case 2: { - CHECK_THRIFT_TYPE(feature_type, TTYPE_STRUCT, "commonFeatures"); - m_common_features.decode(reader); - break; - } - default: throw ThriftInvalidField(field_id, __func__); - } - - feature_type = reader.readByte(); - } - return; -} - - -// Instantiate decoders. -template void GenericBatchPredictionRequest::decode(HashedDataRecordReader &reader); -template void GenericBatchPredictionRequest::decode(DataRecordReader &reader); - -} // namespace twml diff --git a/twml/libtwml/src/lib/BatchPredictionRequest.docx b/twml/libtwml/src/lib/BatchPredictionRequest.docx new file mode 100644 index 000000000..b8245e602 Binary files /dev/null and b/twml/libtwml/src/lib/BatchPredictionRequest.docx differ diff --git a/twml/libtwml/src/lib/BatchPredictionResponse.cpp b/twml/libtwml/src/lib/BatchPredictionResponse.cpp deleted file mode 100644 index 2a17d3605..000000000 --- a/twml/libtwml/src/lib/BatchPredictionResponse.cpp +++ /dev/null @@ -1,125 +0,0 @@ -#include "internal/endianutils.h" -#include "internal/error.h" -#include "internal/thrift.h" - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - -// When the number of predictions is very high, as some cases that Ads wants, the generic thrift -// encoder becomes super expensive because we have to deal with lua tables. -// This function is a special operation to efficiently write a batch prediction responses based on -// tensors. -namespace twml { - -BatchPredictionResponse::BatchPredictionResponse( - const Tensor &keys, const Tensor &values, - const Tensor &dense_keys, const std::vector &dense_values -) : keys_(keys), values_(values), dense_keys_(dense_keys), dense_values_(dense_values) { - // determine batch size - if (values_.getNumDims() > 0) { - batch_size_ = values_.getDim(0); - } else if (dense_keys_.getNumElements() < 1) { - throw twml::Error(TWML_ERR_TYPE, "Continuous values and dense tensors are both empty"); - } else if (dense_keys_.getNumElements() != dense_values_.size()) { - throw twml::Error(TWML_ERR_TYPE, "Number of tensors not equal to number of keys"); - } else { - // dim 0 for each tensor indexes batch elements - std::vector batch_sizes; - batch_sizes.reserve(dense_values_.size()); - - for (int i = 0; i < dense_values_.size(); i++) - batch_sizes.push_back(dense_values_.at(i).getDim(0)); - - if (std::adjacent_find( - batch_sizes.begin(), - batch_sizes.end(), - std::not_equal_to()) != batch_sizes.end()) - throw twml::Error(TWML_ERR_TYPE, "Batch size (dim 0) for all tensors must be the same"); - - batch_size_ = dense_values.at(0).getDim(0); - } -} - -void BatchPredictionResponse::encode(twml::ThriftWriter &thrift_writer) { - if (hasContinuous()) { - switch (values_.getType()) { - case TWML_TYPE_FLOAT: - serializePredictions(thrift_writer); - break; - case TWML_TYPE_DOUBLE: - serializePredictions(thrift_writer); - break; - default: - throw twml::Error(TWML_ERR_TYPE, "Predictions must be float or double."); - } - } else { - // dense tensor predictions - serializePredictions(thrift_writer); - } -} - -template -void BatchPredictionResponse::serializePredictions(twml::ThriftWriter &thrift_writer) { - twml::DataRecordWriter record_writer = twml::DataRecordWriter(thrift_writer); - - // start BatchPredictionResponse - thrift_writer.writeStructFieldHeader(TTYPE_LIST, BPR_PREDICTIONS); - thrift_writer.writeListHeader(TTYPE_STRUCT, getBatchSize()); - - for (int i = 0; i < getBatchSize(); i++) { - twml::DataRecord record = twml::DataRecord(); - - if (hasContinuous()) { - const T *values = values_.getData(); - const int64_t *local_keys = keys_.getData(); - const T *local_values = values + (i * getPredictionSize()); - record.addContinuous(local_keys, getPredictionSize(), local_values); - } - - if (hasDenseTensors()) { - const int64_t *local_dense_keys = dense_keys_.getData(); - - for (int j = 0; j < dense_keys_.getNumElements(); j++) { - const RawTensor &dense_value = dense_values_.at(j).getSlice(i); - record.addRawTensor(local_dense_keys[j], dense_value); - } - } - - record_writer.write(record); - } - - // end BatchPredictionResponse - thrift_writer.writeStructStop(); -} - -// calculate expected binary Thrift size (no memory is copied) -uint64_t BatchPredictionResponse::encodedSize() { - bool dry_mode = true; - twml::ThriftWriter dry_writer = twml::ThriftWriter(nullptr, 0, dry_mode); - encode(dry_writer); - return dry_writer.getBytesWritten(); -} - -void BatchPredictionResponse::write(Tensor &result) { - size_t result_size = result.getNumElements(); - uint8_t *result_data = result.getData(); - - if (result_size != this->encodedSize()) { - throw twml::Error(TWML_ERR_SIZE, "Sizes do not match"); - } - - twml::ThriftWriter writer = twml::ThriftWriter(result_data, result_size); - encode(writer); -} - -} // namespace twml diff --git a/twml/libtwml/src/lib/BatchPredictionResponse.docx b/twml/libtwml/src/lib/BatchPredictionResponse.docx new file mode 100644 index 000000000..e60d49b69 Binary files /dev/null and b/twml/libtwml/src/lib/BatchPredictionResponse.docx differ diff --git a/twml/libtwml/src/lib/BlockFormatReader.cpp b/twml/libtwml/src/lib/BlockFormatReader.cpp deleted file mode 100644 index 98f49ac4f..000000000 --- a/twml/libtwml/src/lib/BlockFormatReader.cpp +++ /dev/null @@ -1,145 +0,0 @@ -#include -#include -#include - -#define OFFSET_CHUNK (32768) -#define RECORDS_PER_BLOCK (100) - -#define WIRE_TYPE_VARINT (0) -#define WIRE_TYPE_64BIT (1) -#define WIRE_TYPE_LENGTH_PREFIXED (2) - -/* - This was all extracted from the ancient elephant bird scrolls - https://github.com/twitter/elephant-bird/blob/master/core/src/main/java/com/twitter/elephantbird/mapreduce/io/BinaryBlockReader.java -*/ - -#define MARKER_SIZE (16) -static uint8_t _marker[MARKER_SIZE] = { - 0x29, 0xd8, 0xd5, 0x06, 0x58, 0xcd, 0x4c, 0x29, - 0xb2, 0xbc, 0x57, 0x99, 0x21, 0x71, 0xbd, 0xff -}; - - -namespace twml { -BlockFormatReader::BlockFormatReader(): - record_size_(0), block_pos_(0), block_end_(0) { - memset(classname_, 0, sizeof(classname_)); -} - - -bool BlockFormatReader::next() { - record_size_ = read_one_record_size(); - if (record_size_ < 0) { - record_size_ = 0; - return false; - } - return true; -} - -int BlockFormatReader::read_int() { - uint8_t buff[4]; - if (read_bytes(buff, 1, 4) != 4) - return -1; - return static_cast(buff[0]) - | (static_cast(buff[1] << 8)) - | (static_cast(buff[2] << 16)) - | (static_cast(buff[3] << 24)); -} - -int BlockFormatReader::consume_marker(int scan) { - uint8_t buff[MARKER_SIZE]; - if (read_bytes(buff, 1, MARKER_SIZE) != MARKER_SIZE) - return 0; - - while (memcmp(buff, _marker, MARKER_SIZE) != 0) { - if (!scan) return 0; - memmove(buff, buff + 1, MARKER_SIZE - 1); - if (read_bytes(buff + MARKER_SIZE - 1, 1, 1) != 1) - return 0; - } - return 1; -} - -int BlockFormatReader::unpack_varint_i32() { - int value = 0; - for (int i = 0; i < 10; i++) { - uint8_t x; - if (read_bytes(&x, 1, 1) != 1) - return -1; - block_pos_++; - value |= (static_cast(x & 0x7F)) << (i * 7); - if ((x & 0x80) == 0) break; - } - return value; -} - - -int BlockFormatReader::unpack_tag_and_wiretype(uint32_t *tag, uint32_t *wiretype) { - uint8_t x; - if (read_bytes(&x, 1, 1) != 1) - return -1; - - block_pos_++; - *tag = (x & 0x7f) >> 3; - *wiretype = x & 7; - if ((x & 0x80) == 0) - return 0; - - return -1; -} - -int BlockFormatReader::unpack_string(char *out, uint64_t max_out_len) { - int len = unpack_varint_i32(); - if (len < 0) return -1; - uint64_t slen = len; - if (slen + 1 > max_out_len) return -1; - uint64_t n = read_bytes(out, 1, slen); - if (n != slen) return -1; - block_pos_ += n; - out[n] = 0; - return 0; -} - -int BlockFormatReader::read_one_record_size() { - for (int i = 0; i < 2; i++) { - if (block_end_ == 0) { - while (consume_marker(1)) { - int block_size = read_int(); - if (block_size > 0) { - block_pos_ = 0; - block_end_ = block_size; - uint32_t tag, wiretype; - if (unpack_tag_and_wiretype(&tag, &wiretype)) - throw std::invalid_argument("unsupported tag and wiretype"); - if (tag != 1 && wiretype != WIRE_TYPE_VARINT) - throw std::invalid_argument("unexpected tag and wiretype"); - int version = unpack_varint_i32(); - if (version != 1) - throw std::invalid_argument("unsupported version"); - if (unpack_tag_and_wiretype(&tag, &wiretype)) - throw std::invalid_argument("unsupported tag and wiretype"); - if (tag != 2 && wiretype != WIRE_TYPE_LENGTH_PREFIXED) - throw std::invalid_argument("unexpected tag and wiretype"); - if (unpack_string(classname_, sizeof(classname_)-1)) - throw std::invalid_argument("unsupported class name"); - break; - } - } - } - if (block_pos_ < block_end_) { - uint32_t tag, wiretype; - if (unpack_tag_and_wiretype(&tag, &wiretype)) - throw std::invalid_argument("unsupported tag and wiretype"); - if (tag != 3 && wiretype != WIRE_TYPE_LENGTH_PREFIXED) - throw std::invalid_argument("unexpected tag and wiretype"); - int record_size = unpack_varint_i32(); - block_pos_ += record_size; - return record_size; - } else { - block_end_ = 0; - } - } - return -1; -} -} // namespace twml diff --git a/twml/libtwml/src/lib/BlockFormatReader.docx b/twml/libtwml/src/lib/BlockFormatReader.docx new file mode 100644 index 000000000..264731e98 Binary files /dev/null and b/twml/libtwml/src/lib/BlockFormatReader.docx differ diff --git a/twml/libtwml/src/lib/BlockFormatWriter.cpp b/twml/libtwml/src/lib/BlockFormatWriter.cpp deleted file mode 100644 index d66e17351..000000000 --- a/twml/libtwml/src/lib/BlockFormatWriter.cpp +++ /dev/null @@ -1,163 +0,0 @@ -#include "internal/error.h" -#include -#include -#include - -#define WIRE_TYPE_LENGTH_PREFIXED (2) -#define WIRE_TYPE_VARINT (0) - -#ifndef PATH_MAX -#define PATH_MAX (8096) -#endif - -#define MARKER_SIZE (16) -static uint8_t _marker[MARKER_SIZE] = { - 0x29, 0xd8, 0xd5, 0x06, 0x58, 0xcd, 0x4c, 0x29, - 0xb2, 0xbc, 0x57, 0x99, 0x21, 0x71, 0xbd, 0xff -}; -namespace twml { - - BlockFormatWriter::BlockFormatWriter(const char *file_name, int record_per_block) : - file_name_(file_name), record_index_(0), records_per_block_(record_per_block) { - snprintf(temp_file_name_, PATH_MAX, "%s.block", file_name); - outputfile_ = fopen(file_name_, "a"); - } - - BlockFormatWriter::~BlockFormatWriter() { - fclose(outputfile_); - } - // TODO: use fstream - int BlockFormatWriter::pack_tag_and_wiretype(FILE *buffer, uint32_t tag, uint32_t wiretype) { - uint8_t x = ((tag & 0x0f) << 3) | (wiretype & 0x7); - size_t n = fwrite(&x, 1, 1, buffer); - if (n != 1) { - return -1; - } - return 0; - } - - int BlockFormatWriter::pack_varint_i32(FILE *buffer, int value) { - for (int i = 0; i < 10; i++) { - uint8_t x = value & 0x7F; - value = value >> 7; - if (value != 0) x |= 0x80; - size_t n = fwrite(&x, 1, 1, buffer); - if (n != 1) { - return -1; - } - if (value == 0) break; - } - return 0; - } - - int BlockFormatWriter::pack_string(FILE *buffer, const char *in, size_t in_len) { - if (pack_varint_i32(buffer, in_len)) return -1; - size_t n = fwrite(in, 1, in_len, buffer); - if (n != in_len) return -1; - return 0; - } - - int BlockFormatWriter::write_int(FILE *buffer, int value) { - uint8_t buff[4]; - buff[0] = value & 0xff; - buff[1] = (value >> 8) & 0xff; - buff[2] = (value >> 16) & 0xff; - buff[3] = (value >> 24) & 0xff; - size_t n = fwrite(buff, 1, 4, buffer); - if (n != 4) { - return -1; - } - return 0; - } - - int BlockFormatWriter::write(const char *class_name, const char *record, int record_len) { - if (record) { - record_index_++; - // The buffer holds max records_per_block_ of records (block). - FILE *buffer = fopen(temp_file_name_, "a"); - if (!buffer) return -1; - if (ftell(buffer) == 0) { - if (pack_tag_and_wiretype(buffer, 1, WIRE_TYPE_VARINT)) - throw std::invalid_argument("Error writting tag and wiretype"); - if (pack_varint_i32(buffer, 1)) - throw std::invalid_argument("Error writting varint_i32"); - if (pack_tag_and_wiretype(buffer, 2, WIRE_TYPE_LENGTH_PREFIXED)) - throw std::invalid_argument("Error writting tag and wiretype"); - if (pack_string(buffer, class_name, strlen(class_name))) - throw std::invalid_argument("Error writting class name"); - } - if (pack_tag_and_wiretype(buffer, 3, WIRE_TYPE_LENGTH_PREFIXED)) - throw std::invalid_argument("Error writtig tag and wiretype"); - if (pack_string(buffer, record, record_len)) - throw std::invalid_argument("Error writting record"); - fclose(buffer); - } - - if ((record_index_ % records_per_block_) == 0) { - flush(); - } - return 0; - } - - int BlockFormatWriter::flush() { - // Flush the records in the buffer to outputfile - FILE *buffer = fopen(temp_file_name_, "r"); - if (buffer) { - fseek(buffer, 0, SEEK_END); - int64_t block_size = ftell(buffer); - fseek(buffer, 0, SEEK_SET); - - if (fwrite(_marker, sizeof(_marker), 1, outputfile_) != 1) return 1; - if (write_int(outputfile_, block_size)) return 1; - uint8_t buff[4096]; - while (1) { - size_t n = fread(buff, 1, sizeof(buff), buffer); - if (n) { - size_t x = fwrite(buff, 1, n, outputfile_); - if (x != n) return 1; - } - if (n != sizeof(buff)) break; - } - fclose(buffer); - // Remove the buffer - if (remove(temp_file_name_)) return 1; - } - return 0; - } - - block_format_writer BlockFormatWriter::getHandle() { - return reinterpret_cast(this); - } - - BlockFormatWriter *getBlockFormatWriter(block_format_writer w) { - return reinterpret_cast(w); - } - -} // namespace twml - -twml_err block_format_writer_create(block_format_writer *w, const char *file_name, int records_per_block) { - HANDLE_EXCEPTIONS( - twml::BlockFormatWriter *writer = new twml::BlockFormatWriter(file_name, records_per_block); - *w = reinterpret_cast(writer);); - return TWML_ERR_NONE; -} - -twml_err block_format_write(block_format_writer w, const char *class_name, const char *record, int record_len) { - HANDLE_EXCEPTIONS( - twml::BlockFormatWriter *writer = twml::getBlockFormatWriter(w); - writer->write(class_name, record, record_len);); - return TWML_ERR_NONE; -} - -twml_err block_format_flush(block_format_writer w) { - HANDLE_EXCEPTIONS( - twml::BlockFormatWriter *writer = twml::getBlockFormatWriter(w); - writer->flush();); - return TWML_ERR_NONE; -} - -twml_err block_format_writer_delete(const block_format_writer w) { - HANDLE_EXCEPTIONS( - delete twml::getBlockFormatWriter(w);); - return TWML_ERR_NONE; -} diff --git a/twml/libtwml/src/lib/BlockFormatWriter.docx b/twml/libtwml/src/lib/BlockFormatWriter.docx new file mode 100644 index 000000000..50a1a4122 Binary files /dev/null and b/twml/libtwml/src/lib/BlockFormatWriter.docx differ diff --git a/twml/libtwml/src/lib/CMakeLists.docx b/twml/libtwml/src/lib/CMakeLists.docx new file mode 100644 index 000000000..1a9723d42 Binary files /dev/null and b/twml/libtwml/src/lib/CMakeLists.docx differ diff --git a/twml/libtwml/src/lib/CMakeLists.txt b/twml/libtwml/src/lib/CMakeLists.txt deleted file mode 100644 index 6bf2a6e7c..000000000 --- a/twml/libtwml/src/lib/CMakeLists.txt +++ /dev/null @@ -1,36 +0,0 @@ -set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}) -cmake_minimum_required(VERSION 2.8 FATAL_ERROR) -cmake_policy(VERSION 2.8) - - -set(TWML_VERSION "2.0.0") -string(REPLACE "." ";" TWML_VERSION_LIST ${TWML_VERSION}) -list(GET TWML_VERSION_LIST 0 TWML_SOVERSION) - -execute_process( - COMMAND - $ENV{LIBTWML_HOME}/src/ops/scripts/get_inc.sh - RESULT_VARIABLE - TF_RES - OUTPUT_VARIABLE - TF_INC) - -file(GLOB_RECURSE sources *.cpp) - -set (CMAKE_CXX_FLAGS "-Wall -std=c++11 ${CMAKE_CXX_FLAGS} -fPIC") - -add_library(twml STATIC ${sources}) - -target_include_directories( - twml - PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR}/../../include - PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR} - ${TF_INC} # Absail dependency from tensorflow - ) - -set_target_properties(twml PROPERTIES - VERSION "${TWML_VERSION}" - SOVERSION "${TWML_SOVERSION}" - ) diff --git a/twml/libtwml/src/lib/CPPLINT.cfg b/twml/libtwml/src/lib/CPPLINT.cfg deleted file mode 100644 index dfe873a9d..000000000 --- a/twml/libtwml/src/lib/CPPLINT.cfg +++ /dev/null @@ -1 +0,0 @@ -exclude_files=murmur_hash3.cpp \ No newline at end of file diff --git a/twml/libtwml/src/lib/CPPLINT.docx b/twml/libtwml/src/lib/CPPLINT.docx new file mode 100644 index 000000000..62a59a720 Binary files /dev/null and b/twml/libtwml/src/lib/CPPLINT.docx differ diff --git a/twml/libtwml/src/lib/DataRecord.cpp b/twml/libtwml/src/lib/DataRecord.cpp deleted file mode 100644 index 766422063..000000000 --- a/twml/libtwml/src/lib/DataRecord.cpp +++ /dev/null @@ -1,72 +0,0 @@ -#include "internal/thrift.h" -#include "internal/error.h" - -#include -#include -#include -#include - -#include -#include - -namespace twml { - -void DataRecord::decode(DataRecordReader &reader) { - uint8_t feature_type = reader.readByte(); - while (feature_type != TTYPE_STOP) { - int16_t field_id = reader.readInt16(); - switch (field_id) { - case DR_BINARY: - reader.readBinary(feature_type, this); - break; - case DR_CONTINUOUS: - reader.readContinuous(feature_type, this); - break; - case DR_DISCRETE: - reader.readDiscrete(feature_type, this); - break; - case DR_STRING: - reader.readString(feature_type, this); - break; - case DR_SPARSE_BINARY: - reader.readSparseBinary(feature_type, this); - break; - case DR_SPARSE_CONTINUOUS: - reader.readSparseContinuous(feature_type, this); - break; - case DR_BLOB: - reader.readBlob(feature_type, this); - break; - case DR_GENERAL_TENSOR: - reader.readTensor(feature_type, dynamic_cast(this)); - break; - case DR_SPARSE_TENSOR: - reader.readSparseTensor(feature_type, dynamic_cast(this)); - break; - default: - throw ThriftInvalidField(field_id, "DataRecord::decode"); - } - feature_type = reader.readByte(); - } -} - -void DataRecord::addLabel(int64_t id, double label) { - m_labels[id] = label; -} - -void DataRecord::addWeight(int64_t id, double val) { - m_weights[id] = val; -} - -void DataRecord::clear() { - std::fill(m_labels.begin(), m_labels.end(), std::nanf("")); - std::fill(m_weights.begin(), m_weights.end(), 0.0); - m_binary.clear(); - m_continuous.clear(); - m_discrete.clear(); - m_string.clear(); - m_sparsebinary.clear(); - m_sparsecontinuous.clear(); -} - -} // namespace twml diff --git a/twml/libtwml/src/lib/DataRecord.docx b/twml/libtwml/src/lib/DataRecord.docx new file mode 100644 index 000000000..f245a0b0f Binary files /dev/null and b/twml/libtwml/src/lib/DataRecord.docx differ diff --git a/twml/libtwml/src/lib/DataRecordReader.cpp b/twml/libtwml/src/lib/DataRecordReader.cpp deleted file mode 100644 index f151e07a7..000000000 --- a/twml/libtwml/src/lib/DataRecordReader.cpp +++ /dev/null @@ -1,230 +0,0 @@ -#include "internal/thrift.h" -#include "internal/error.h" -#include -#include - -#include - -namespace twml { - -inline std::string bufferToString(int32_t str_len, const uint8_t *str) { - return std::string(str, str + str_len); -} - - -bool DataRecordReader::keepKey(const int64_t &key, int64_t &code) { - auto it = m_keep_map->find(key); - if (it == m_keep_map->end()) return false; - code = it->second; - return true; -} - -bool DataRecordReader::isLabel(const int64_t &key, int64_t &code) { - if (m_labels_map == nullptr) return false; - auto it = m_labels_map->find(key); - if (it == m_labels_map->end()) return false; - code = it->second; - return true; -} - -bool DataRecordReader::isWeight(const int64_t &key, int64_t &code) { - if (m_weights_map == nullptr) return false; - auto it = m_weights_map->find(key); - if (it == m_weights_map->end()) return false; - code = it->second; - return true; -} - - -void DataRecordReader::readBinary( - const int feature_type, - DataRecord *record) { - CHECK_THRIFT_TYPE(feature_type, TTYPE_SET, "type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "key_type"); - int32_t length = readInt32(); - int64_t id, code; -#ifdef USE_DENSE_HASH - record->m_binary.resize(2 * length); -#else - record->m_binary.reserve(2 * length); -#endif - for (int32_t i = 0; i < length; i++) { - id = readInt64(); - record->m_binary.insert(id); - if (isLabel(id, code)) { - record->addLabel(code); - } - } -} - -void DataRecordReader::readContinuous( - const int feature_type, - DataRecord *record) { - CHECK_THRIFT_TYPE(feature_type, TTYPE_MAP, "type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "key_type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_DOUBLE, "value_type"); - - int32_t length = readInt32(); - int64_t id, code; -#ifdef USE_DENSE_HASH - record->m_continuous.resize(2 * length); -#else - record->m_continuous.reserve(2 * length); -#endif - for (int32_t i = 0; i < length; i++) { - id = readInt64(); - double val = readDouble(); - if (!std::isnan(val)) { - record->m_continuous[id] = val; - } - if (isLabel(id, code)) { - record->addLabel(code, val); - } else if (isWeight(id, code)) { - record->addWeight(code, val); - } - } -} - -void DataRecordReader::readDiscrete( - const int feature_type, - DataRecord *record) { - CHECK_THRIFT_TYPE(feature_type, TTYPE_MAP, "type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "key_type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "value_type"); - - int32_t length = readInt32(); - int64_t id; -#ifdef USE_DENSE_HASH - record->m_discrete.resize(2 * length); -#else - record->m_discrete.reserve(2 * length); -#endif - for (int32_t i = 0; i < length; i++) { - id = readInt64(); - record->m_discrete[id] = readInt64(); - } -} - -void DataRecordReader::readString( - const int feature_type, - DataRecord *record) { - CHECK_THRIFT_TYPE(feature_type, TTYPE_MAP, "type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "key_type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_STRING, "value_type"); - int32_t length = readInt32(); - int64_t id; - -#ifdef USE_DENSE_HASH - record->m_string.resize(2 * length); -#else - record->m_string.reserve(2 * length); -#endif - - for (int32_t i = 0; i < length; i++) { - id = readInt64(); - const uint8_t *begin = nullptr; - int32_t str_len = getRawBuffer(&begin); - record->m_string[id] = bufferToString(str_len, begin); - } -} - -void DataRecordReader::readSparseBinary( - const int feature_type, - DataRecord *record) { - CHECK_THRIFT_TYPE(feature_type, TTYPE_MAP, "type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "key_type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_SET, "value_type"); - - int32_t length = readInt32(); - int64_t id, code; - -#ifdef USE_DENSE_HASH - record->m_sparsebinary.resize(2 * length); -#else - record->m_sparsebinary.reserve(2 * length); -#endif - - for (int32_t i = 0; i < length; i++) { - id = readInt64(); - CHECK_THRIFT_TYPE(readByte(), TTYPE_STRING, "set:key_type"); - int32_t set_length = readInt32(); - if (keepKey(id, code)) { - record->m_sparsebinary[id].reserve(set_length); - for (int32_t j = 0; j < set_length; j++) { - const uint8_t *begin = nullptr; - int32_t str_len = getRawBuffer(&begin); - record->m_sparsebinary[id].push_back(bufferToString(str_len, begin)); - } - } else { - for (int32_t j = 0; j < set_length; j++) { - int32_t str_len = readInt32(); - skipLength(str_len); - } - } - } -} - -void DataRecordReader::readSparseContinuous( - const int feature_type, - DataRecord *record) { - CHECK_THRIFT_TYPE(feature_type, TTYPE_MAP, "type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "key_type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_MAP, "value_type"); - - int32_t length = readInt32(); - int64_t id, code; - -#ifdef USE_DENSE_HASH - record->m_sparsecontinuous.resize(2 * length); -#else - record->m_sparsecontinuous.reserve(2 * length); -#endif - - for (int32_t i = 0; i < length; i++) { - id = readInt64(); - CHECK_THRIFT_TYPE(readByte(), TTYPE_STRING, "map::key_type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_DOUBLE, "map::value_type"); - int32_t map_length = readInt32(); - if (keepKey(id, code)) { - record->m_sparsecontinuous[id].reserve(map_length); - for (int32_t j = 0; j < map_length; j++) { - const uint8_t *begin = nullptr; - int32_t str_len = getRawBuffer(&begin); - double val = readDouble(); - if (!std::isnan(val)) { - record->m_sparsecontinuous[id].push_back({bufferToString(str_len, begin), val}); - } - } - } else { - for (int32_t j = 0; j < map_length; j++) { - int32_t str_len = readInt32(); - skipLength(str_len); - skip(); - } - } - } -} - -void DataRecordReader::readBlob( - const int feature_type, - DataRecord *record) { - CHECK_THRIFT_TYPE(feature_type, TTYPE_MAP, "type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "key_type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_STRING, "value_type"); - - int32_t length = readInt32(); - int64_t id, code; - for (int32_t i = 0; i < length; i++) { - id = readInt64(); - if (keepKey(id, code)) { - const uint8_t *begin = nullptr; - int32_t blob_len = getRawBuffer(&begin); - record->m_blob[id] = std::vector(begin, begin + blob_len); - } else { - int32_t str_len = readInt32(); - skipLength(str_len); - } - } -} - -} // namespace twml diff --git a/twml/libtwml/src/lib/DataRecordReader.docx b/twml/libtwml/src/lib/DataRecordReader.docx new file mode 100644 index 000000000..c5c08a6fc Binary files /dev/null and b/twml/libtwml/src/lib/DataRecordReader.docx differ diff --git a/twml/libtwml/src/lib/DataRecordWriter.cpp b/twml/libtwml/src/lib/DataRecordWriter.cpp deleted file mode 100644 index e12a50d48..000000000 --- a/twml/libtwml/src/lib/DataRecordWriter.cpp +++ /dev/null @@ -1,162 +0,0 @@ -#include "internal/error.h" -#include "internal/thrift.h" - -#include -#include -#include -#include -#include - -using namespace twml::io; - -namespace twml { - -void DataRecordWriter::writeBinary(twml::DataRecord &record) { - const DataRecord::BinaryFeatures bin_features = record.getBinary(); - - if (bin_features.size() > 0) { - m_thrift_writer.writeStructFieldHeader(TTYPE_SET, DR_BINARY); - m_thrift_writer.writeListHeader(TTYPE_I64, bin_features.size()); - - for (const auto &it : bin_features) { - m_thrift_writer.writeInt64(it); - } - } -} - -void DataRecordWriter::writeContinuous(twml::DataRecord &record) { - const DataRecord::ContinuousFeatures cont_features = record.getContinuous(); - - if (cont_features.size() > 0) { - m_thrift_writer.writeStructFieldHeader(TTYPE_MAP, DR_CONTINUOUS); - m_thrift_writer.writeMapHeader(TTYPE_I64, TTYPE_DOUBLE, cont_features.size()); - - for (const auto &it : cont_features) { - m_thrift_writer.writeInt64(it.first); - m_thrift_writer.writeDouble(it.second); - } - } -} - -void DataRecordWriter::writeDiscrete(twml::DataRecord &record) { - const DataRecord::DiscreteFeatures disc_features = record.getDiscrete(); - - if (disc_features.size() > 0) { - m_thrift_writer.writeStructFieldHeader(TTYPE_MAP, DR_DISCRETE); - m_thrift_writer.writeMapHeader(TTYPE_I64, TTYPE_I64, disc_features.size()); - - for (const auto &it : disc_features) { - m_thrift_writer.writeInt64(it.first); - m_thrift_writer.writeInt64(it.second); - } - } -} - -void DataRecordWriter::writeString(twml::DataRecord &record) { - const DataRecord::StringFeatures str_features = record.getString(); - - if (str_features.size() > 0) { - m_thrift_writer.writeStructFieldHeader(TTYPE_MAP, DR_STRING); - m_thrift_writer.writeMapHeader(TTYPE_I64, TTYPE_STRING, str_features.size()); - - - for (const auto &it : str_features) { - m_thrift_writer.writeInt64(it.first); - m_thrift_writer.writeString(it.second); - } - } -} - -// convert from internal representation list<(i64, string)> -// to Thrift representation map> -void DataRecordWriter::writeSparseBinaryFeatures(twml::DataRecord &record) { - const DataRecord::SparseBinaryFeatures sp_bin_features = record.getSparseBinary(); - - // write map> as Thrift - if (sp_bin_features.size() > 0) { - m_thrift_writer.writeStructFieldHeader(TTYPE_MAP, DR_SPARSE_BINARY); - m_thrift_writer.writeMapHeader(TTYPE_I64, TTYPE_SET, sp_bin_features.size()); - - for (auto key_vals : sp_bin_features) { - m_thrift_writer.writeInt64(key_vals.first); - m_thrift_writer.writeListHeader(TTYPE_STRING, key_vals.second.size()); - - for (auto name : key_vals.second) - m_thrift_writer.writeString(name); - } - } -} - -// convert from internal representation list<(i64, string, double)> -// to Thrift representation map> -void DataRecordWriter::writeSparseContinuousFeatures(twml::DataRecord &record) { - const DataRecord::SparseContinuousFeatures sp_cont_features = record.getSparseContinuous(); - - // write map> as Thrift - if (sp_cont_features.size() > 0) { - m_thrift_writer.writeStructFieldHeader(TTYPE_MAP, DR_SPARSE_CONTINUOUS); - m_thrift_writer.writeMapHeader(TTYPE_I64, TTYPE_MAP, sp_cont_features.size()); - - for (auto key_vals : sp_cont_features) { - m_thrift_writer.writeInt64(key_vals.first); - - if (key_vals.second.size() == 0) - throw IOError(IOError::MALFORMED_MEMORY_RECORD); - - m_thrift_writer.writeMapHeader(TTYPE_STRING, TTYPE_DOUBLE, key_vals.second.size()); - - for (auto map_str_double : key_vals.second) { - m_thrift_writer.writeString(map_str_double.first); - m_thrift_writer.writeDouble(map_str_double.second); - } - } - } -} - -void DataRecordWriter::writeBlobFeatures(twml::DataRecord &record) { - const DataRecord::BlobFeatures blob_features = record.getBlob(); - - if (blob_features.size() > 0) { - m_thrift_writer.writeStructFieldHeader(TTYPE_MAP, DR_BLOB); - m_thrift_writer.writeMapHeader(TTYPE_I64, TTYPE_STRING, blob_features.size()); - - for (const auto &it : blob_features) { - m_thrift_writer.writeInt64(it.first); - std::vector value = it.second; - m_thrift_writer.writeBinary(value.data(), value.size()); - } - } -} - -void DataRecordWriter::writeDenseTensors(twml::DataRecord &record) { - TensorRecord::RawTensors raw_tensors = record.getRawTensors(); - if (raw_tensors.size() > 0) { - m_thrift_writer.writeStructFieldHeader(TTYPE_MAP, DR_GENERAL_TENSOR); - m_tensor_writer.write(record); - } -} - -TWMLAPI uint32_t DataRecordWriter::getRecordsWritten() { - return m_records_written; -} - -TWMLAPI uint64_t DataRecordWriter::write(twml::DataRecord &record) { - uint64_t bytes_written_before = m_thrift_writer.getBytesWritten(); - - writeBinary(record); - writeContinuous(record); - writeDiscrete(record); - writeString(record); - writeSparseBinaryFeatures(record); - writeSparseContinuousFeatures(record); - writeBlobFeatures(record); - writeDenseTensors(record); - // TODO add sparse tensor field - - m_thrift_writer.writeStructStop(); - m_records_written++; - - return m_thrift_writer.getBytesWritten() - bytes_written_before; -} - -} // namespace twml diff --git a/twml/libtwml/src/lib/DataRecordWriter.docx b/twml/libtwml/src/lib/DataRecordWriter.docx new file mode 100644 index 000000000..ece728b6d Binary files /dev/null and b/twml/libtwml/src/lib/DataRecordWriter.docx differ diff --git a/twml/libtwml/src/lib/HashedDataRecord.cpp b/twml/libtwml/src/lib/HashedDataRecord.cpp deleted file mode 100644 index 6bbecee70..000000000 --- a/twml/libtwml/src/lib/HashedDataRecord.cpp +++ /dev/null @@ -1,80 +0,0 @@ -#include "internal/thrift.h" -#include "internal/error.h" - -#include -#include -#include - -#include -#include -#include - -namespace twml { - -void HashedDataRecord::decode(HashedDataRecordReader &reader) { - uint8_t feature_type = reader.readByte(); - while (feature_type != TTYPE_STOP) { - int16_t field_id = reader.readInt16(); - switch (field_id) { - case DR_BINARY: - reader.readBinary(feature_type, this); - break; - case DR_CONTINUOUS: - reader.readContinuous(feature_type, this); - break; - case DR_DISCRETE: - reader.readDiscrete(feature_type, this); - break; - case DR_STRING: - reader.readString(feature_type, this); - break; - case DR_SPARSE_BINARY: - reader.readSparseBinary(feature_type, this); - break; - case DR_SPARSE_CONTINUOUS: - reader.readSparseContinuous(feature_type, this); - break; - case DR_BLOB: - reader.readBlob(feature_type, this); - break; - case DR_GENERAL_TENSOR: - reader.readTensor(feature_type, dynamic_cast(this)); - break; - case DR_SPARSE_TENSOR: - reader.readSparseTensor(feature_type, dynamic_cast(this)); - break; - default: - throw ThriftInvalidField(field_id, "HashedDataRecord::readThrift"); - } - feature_type = reader.readByte(); - } -} - -void HashedDataRecord::addKey(int64_t key, int64_t transformed_key, - int64_t code, uint8_t type, double value) { - m_keys.push_back(key); - m_transformed_keys.push_back(transformed_key); - m_values.push_back(value); - m_codes.push_back(code); - m_types.push_back(type); -} - -void HashedDataRecord::addLabel(int64_t id, double label) { - m_labels[id] = label; -} - -void HashedDataRecord::addWeight(int64_t id, double val) { - m_weights[id] = val; -} - -void HashedDataRecord::clear() { - std::fill(m_labels.begin(), m_labels.end(), std::nanf("")); - std::fill(m_weights.begin(), m_weights.end(), 0.0); - m_keys.clear(); - m_transformed_keys.clear(); - m_values.clear(); - m_codes.clear(); - m_types.clear(); -} - -} // namespace twml \ No newline at end of file diff --git a/twml/libtwml/src/lib/HashedDataRecord.docx b/twml/libtwml/src/lib/HashedDataRecord.docx new file mode 100644 index 000000000..d562b1d88 Binary files /dev/null and b/twml/libtwml/src/lib/HashedDataRecord.docx differ diff --git a/twml/libtwml/src/lib/HashedDataRecordReader.cpp b/twml/libtwml/src/lib/HashedDataRecordReader.cpp deleted file mode 100644 index 93c86001b..000000000 --- a/twml/libtwml/src/lib/HashedDataRecordReader.cpp +++ /dev/null @@ -1,218 +0,0 @@ -#include "internal/thrift.h" -#include "internal/error.h" - -#include -#include -#include -#include - -namespace twml { - -bool HashedDataRecordReader::keepId(const int64_t &key, int64_t &code) { - auto it = m_keep_map->find(key); - if (it == m_keep_map->end()) return false; - code = it->second; - return true; -} - -bool HashedDataRecordReader::isLabel(const int64_t &key, int64_t &code) { - if (m_labels_map == nullptr) return false; - auto it = m_labels_map->find(key); - if (it == m_labels_map->end()) return false; - code = it->second; - return true; -} - -bool HashedDataRecordReader::isWeight(const int64_t &key, int64_t &code) { - if (m_weights_map == nullptr) return false; - auto it = m_weights_map->find(key); - if (it == m_weights_map->end()) return false; - code = it->second; - return true; -} - -void HashedDataRecordReader::readBinary( - const int feature_type, - HashedDataRecord *record) { - CHECK_THRIFT_TYPE(feature_type, TTYPE_SET, "type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "key_type"); - - int32_t length = readInt32(); - record->extendSize(length); - int64_t id, code; - for (int32_t i = 0; i < length; i++) { - id = readInt64(); - if (keepId(id, code)) { - record->addKey(id, id, code, DR_BINARY); - } else if (isLabel(id, code)) { - record->addLabel(code); - } - } -} - -void HashedDataRecordReader::readContinuous( - const int feature_type, - HashedDataRecord *record) { - CHECK_THRIFT_TYPE(feature_type, TTYPE_MAP, "type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "key_type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_DOUBLE, "value_type"); - - int32_t length = readInt32(); - record->extendSize(length); - int64_t id, code; - for (int32_t i = 0; i < length; i++) { - id = readInt64(); - if (keepId(id, code)) { - double value = readDouble(); - if (!std::isnan(value)) { - record->addKey(id, id, code, DR_CONTINUOUS, value); - } - } else if (isLabel(id, code)) { - record->addLabel(code, readDouble()); - } else if (isWeight(id, code)) { - record->addWeight(code, readDouble()); - } else { - skip(); - } - } -} - -void HashedDataRecordReader::readDiscrete( - const int feature_type, - HashedDataRecord *record) { - CHECK_THRIFT_TYPE(feature_type, TTYPE_MAP, "type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "key_type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "value_type"); - - int32_t length = readInt32(); - record->extendSize(length); - int64_t id, code; - for (int32_t i = 0; i < length; i++) { - id = readInt64(); - if (keepId(id, code)) { - int64_t transformed_key = mixDiscreteIdAndValue(id, readInt64()); - record->addKey(id, transformed_key, code, DR_DISCRETE); - } else { - skip(); - } - } -} - -void HashedDataRecordReader::readString( - const int feature_type, - HashedDataRecord *record) { - CHECK_THRIFT_TYPE(feature_type, TTYPE_MAP, "type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "key_type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_STRING, "value_type"); - - int32_t length = readInt32(); - record->extendSize(length); - int64_t id, code; - for (int32_t i = 0; i < length; i++) { - id = readInt64(); - if (keepId(id, code)) { - const uint8_t *begin = nullptr; - int32_t str_len = getRawBuffer(&begin); - int64_t transformed_key = mixStringIdAndValue(id, str_len, begin); - record->addKey(id, transformed_key, code, DR_STRING); - } else { - int32_t str_len = readInt32(); - skipLength(str_len); - } - } -} - -void HashedDataRecordReader::readSparseBinary( - const int feature_type, - HashedDataRecord *record) { - CHECK_THRIFT_TYPE(feature_type, TTYPE_MAP, "type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "key_type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_SET, "value_type"); - - int32_t length = readInt32(); - record->extendSize(length); - int64_t id, code; - for (int32_t i = 0; i < length; i++) { - id = readInt64(); - if (keepId(id, code)) { - CHECK_THRIFT_TYPE(readByte(), TTYPE_STRING, "set:key_type"); - int32_t set_length = readInt32(); - for (int32_t j = 0; j < set_length; j++) { - const uint8_t *begin = nullptr; - int32_t str_len = getRawBuffer(&begin); - int64_t transformed_key = mixStringIdAndValue(id, str_len, begin); - record->addKey(id, transformed_key, code, DR_SPARSE_BINARY); - } - } else { - CHECK_THRIFT_TYPE(readByte(), TTYPE_STRING, "set:key_type"); - int32_t set_length = readInt32(); - for (int32_t j = 0; j < set_length; j++) { - int32_t str_len = readInt32(); - skipLength(str_len); - } - } - } -} - -void HashedDataRecordReader::readSparseContinuous( - const int feature_type, - HashedDataRecord *record) { - CHECK_THRIFT_TYPE(feature_type, TTYPE_MAP, "type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "key_type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_MAP, "value_type"); - - int32_t length = readInt32(); - record->extendSize(length); - int64_t id, code; - for (int32_t i = 0; i < length; i++) { - id = readInt64(); - if (keepId(id, code)) { - CHECK_THRIFT_TYPE(readByte(), TTYPE_STRING, "map::key_type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_DOUBLE, "map::value_type"); - int32_t map_length = readInt32(); - for (int32_t j = 0; j < map_length; j++) { - const uint8_t *begin = nullptr; - int32_t str_len = getRawBuffer(&begin); - int64_t transformed_key = 0; - switch(m_decode_mode) { - case DecodeMode::hash_fname_and_valname: - transformed_key = mixStringIdAndValue(id, str_len, begin); - break; - default: // m_decode_mode == DecodeMode::hash_valname == 0 is default - twml_get_feature_id(&transformed_key, str_len, reinterpret_cast(begin)); - } - double value = readDouble(); - if (!std::isnan(value)) { - record->addKey(id, transformed_key, code, DR_SPARSE_CONTINUOUS, value); - } - } - } else { - CHECK_THRIFT_TYPE(readByte(), TTYPE_STRING, "map::key_type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_DOUBLE, "map::value_type"); - int32_t map_length = readInt32(); - for (int32_t j = 0; j < map_length; j++) { - int32_t str_len = readInt32(); - skipLength(str_len); - skip(); - } - } - } -} - -void HashedDataRecordReader::readBlob( - const int feature_type, - HashedDataRecord *record) { - CHECK_THRIFT_TYPE(feature_type, TTYPE_MAP, "type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "key_type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_STRING, "value_type"); - - int32_t length = readInt32(); - int64_t id; - for (int32_t i = 0; i < length; i++) { - // Skips the BlobFeatures if they are defined or not in the FeatureConfig - id = readInt64(); - int32_t str_len = readInt32(); - skipLength(str_len); - } -} -} // namespace twml \ No newline at end of file diff --git a/twml/libtwml/src/lib/HashedDataRecordReader.docx b/twml/libtwml/src/lib/HashedDataRecordReader.docx new file mode 100644 index 000000000..a20f50457 Binary files /dev/null and b/twml/libtwml/src/lib/HashedDataRecordReader.docx differ diff --git a/twml/libtwml/src/lib/Hashmap.cpp b/twml/libtwml/src/lib/Hashmap.cpp deleted file mode 100644 index 4086e8a16..000000000 --- a/twml/libtwml/src/lib/Hashmap.cpp +++ /dev/null @@ -1,380 +0,0 @@ -#include "internal/khash.h" -#include "internal/error.h" -#include -#include -#include - -namespace twml { - HashMap::HashMap() : - m_hashmap(nullptr) { - TWML_CHECK(twml_hashmap_create(&m_hashmap), "Failed to create HashMap"); - } - - HashMap::~HashMap() { - // Do not throw exceptions from the destructor - twml_hashmap_delete(m_hashmap); - } - - void HashMap::clear() { - TWML_CHECK(twml_hashmap_clear(m_hashmap), "Failed to clear HashMap"); - } - - uint64_t HashMap::size() const { - uint64_t size; - TWML_CHECK(twml_hashmap_get_size(&size, m_hashmap), "Failed to get HashMap size"); - return size; - } - - int8_t HashMap::insert(const HashKey_t key) { - int8_t result; - TWML_CHECK(twml_hashmap_insert_key(&result, m_hashmap, key), - "Failed to insert key"); - return result; - } - - int8_t HashMap::insert(const HashKey_t key, const HashKey_t val) { - int8_t result; - TWML_CHECK(twml_hashmap_insert_key_and_value(&result, m_hashmap, key, val), - "Failed to insert key"); - return result; - } - - int8_t HashMap::get(HashVal_t &val, const HashKey_t key) const { - int8_t result; - TWML_CHECK(twml_hashmap_get_value(&result, &val, m_hashmap, key), - "Failed to insert key,value pair"); - return result; - } - - void HashMap::insert(Tensor &mask, const Tensor keys) { - TWML_CHECK(twml_hashmap_insert_keys(mask.getHandle(), m_hashmap, keys.getHandle()), - "Failed to insert keys tensor"); - } - - void HashMap::insert(Tensor &mask, const Tensor keys, const Tensor vals) { - TWML_CHECK(twml_hashmap_insert_keys_and_values(mask.getHandle(), m_hashmap, - keys.getHandle(), vals.getHandle()), - "Failed to insert keys,values tensor pair"); - } - - void HashMap::remove(const Tensor keys) { - TWML_CHECK(twml_hashmap_remove_keys(m_hashmap, keys.getHandle()), - "Failed to remove keys tensor"); - } - - void HashMap::get(Tensor &mask, Tensor &vals, const Tensor keys) const { - TWML_CHECK(twml_hashmap_get_values(mask.getHandle(), vals.getHandle(), - m_hashmap, keys.getHandle()), - "Failed to get values tensor"); - } - - void HashMap::getInplace(Tensor &mask, Tensor &keys_vals) const { - TWML_CHECK(twml_hashmap_get_values_inplace(mask.getHandle(), - keys_vals.getHandle(), - m_hashmap), - "Failed to get values tensor"); - } - - void HashMap::toTensors(Tensor &keys, Tensor &vals) const { - TWML_CHECK(twml_hashmap_to_tensors(keys.getHandle(), - vals.getHandle(), - m_hashmap), - "Failed to get keys,values tensors from HashMap"); - } -} // namespace twml - -using twml::HashKey_t; -using twml::HashVal_t; - -KHASH_MAP_INIT_INT64(HashKey_t, HashVal_t); -typedef khash_t(HashKey_t)* hash_map_t; - - -twml_err twml_hashmap_create(twml_hashmap *hashmap) { - hash_map_t *h = reinterpret_cast(hashmap); - *h = kh_init(HashKey_t); - return TWML_ERR_NONE; -} - -twml_err twml_hashmap_clear(const twml_hashmap hashmap) { - hash_map_t h = (hash_map_t)hashmap; - kh_clear(HashKey_t, h); - return TWML_ERR_NONE; -} - -twml_err twml_hashmap_get_size(uint64_t *size, const twml_hashmap hashmap) { - hash_map_t h = (hash_map_t)hashmap; - *size = kh_size(h); - return TWML_ERR_NONE; -} - - -twml_err twml_hashmap_delete(const twml_hashmap hashmap) { - hash_map_t h = (hash_map_t)hashmap; - kh_destroy(HashKey_t, h); - return TWML_ERR_NONE; -} - -// insert, remove, get single key / value -twml_err twml_hashmap_insert_key(int8_t *mask, - const twml_hashmap hashmap, - const HashKey_t key) { - hash_map_t h = (hash_map_t)hashmap; - int ret = 0; - khiter_t k = kh_put(HashKey_t, h, key, &ret); - *mask = ret >= 0; - if (*mask) { - HashVal_t v = kh_size(h); - kh_value(h, k) = v; - } - return TWML_ERR_NONE; -} - -twml_err twml_hashmap_insert_key_and_value(int8_t *mask, twml_hashmap hashmap, - const HashKey_t key, const HashVal_t val) { - hash_map_t h = (hash_map_t)hashmap; - int ret = 0; - khiter_t k = kh_put(HashKey_t, h, key, &ret); - *mask = ret >= 0; - if (*mask) { - kh_value(h, k) = val; - } - return TWML_ERR_NONE; -} - -twml_err twml_hashmap_remove_key(const twml_hashmap hashmap, - const HashKey_t key) { - hash_map_t h = (hash_map_t)hashmap; - khiter_t k = kh_get(HashKey_t, h, key); - if (k != kh_end(h)) { - kh_del(HashKey_t, h, k); - } - return TWML_ERR_NONE; -} - -twml_err twml_hashmap_get_value(int8_t *mask, HashVal_t *val, - const twml_hashmap hashmap, const HashKey_t key) { - hash_map_t h = (hash_map_t)hashmap; - khiter_t k = kh_get(HashKey_t, h, key); - if (k == kh_end(h)) { - *mask = false; - } else { - *val = kh_value(h, k); - *mask = true; - } - return TWML_ERR_NONE; -} - -// insert, get, remove tensors of keys / values -twml_err twml_hashmap_insert_keys(twml_tensor masks, - const twml_hashmap hashmap, - const twml_tensor keys) { - auto masks_tensor = twml::getTensor(masks); - auto keys_tensor = twml::getConstTensor(keys); - - if (masks_tensor->getType() != TWML_TYPE_INT8) { - return TWML_ERR_TYPE; - } - - if (keys_tensor->getType() != TWML_TYPE_INT64) { - return TWML_ERR_TYPE; - } - - if (keys_tensor->getNumElements() != masks_tensor->getNumElements()) { - return TWML_ERR_SIZE; - } - - int8_t *mptr = masks_tensor->getData(); - const HashKey_t *kptr = keys_tensor->getData(); - - uint64_t num_elements = keys_tensor->getNumElements(); - - hash_map_t h = (hash_map_t)hashmap; - for (uint64_t i = 0; i < num_elements; i++) { - int ret = 0; - khiter_t k = kh_put(HashKey_t, h, kptr[i], &ret); - mptr[i] = ret >= 0; - if (mptr[i]) { - HashVal_t v = kh_size(h); - kh_value(h, k) = v; - } - } - return TWML_ERR_NONE; -} - -twml_err twml_hashmap_insert_keys_and_values(twml_tensor masks, - twml_hashmap hashmap, - const twml_tensor keys, - const twml_tensor vals) { - auto masks_tensor = twml::getTensor(masks); - auto keys_tensor = twml::getConstTensor(keys); - auto vals_tensor = twml::getConstTensor(vals); - - if (masks_tensor->getType() != TWML_TYPE_INT8) { - return TWML_ERR_TYPE; - } - - if (keys_tensor->getType() != TWML_TYPE_INT64) { - return TWML_ERR_TYPE; - } - - if (vals_tensor->getType() != TWML_TYPE_INT64) { - return TWML_ERR_TYPE; - } - - if (keys_tensor->getNumElements() != vals_tensor->getNumElements() || - keys_tensor->getNumElements() != masks_tensor->getNumElements()) { - return TWML_ERR_SIZE; - } - - int8_t *mptr = masks_tensor->getData(); - const HashKey_t *kptr = keys_tensor->getData(); - const HashVal_t *vptr = twml::getConstTensor(vals)->getData(); - - uint64_t num_elements = keys_tensor->getNumElements(); - - hash_map_t h = (hash_map_t)hashmap; - for (uint64_t i = 0; i < num_elements; i++) { - int ret = 0; - khiter_t k = kh_put(HashKey_t, h, kptr[i], &ret); - mptr[i] = ret >= 0; - if (mptr[i]) { - kh_value(h, k) = vptr[i]; - } - } - return TWML_ERR_NONE; -} - -twml_err twml_hashmap_remove_keys(const twml_hashmap hashmap, - const twml_tensor keys) { - auto keys_tensor = twml::getConstTensor(keys); - - if (keys_tensor->getType() != TWML_TYPE_INT64) { - return TWML_ERR_TYPE; - } - - const HashKey_t *kptr = keys_tensor->getData(); - uint64_t num_elements = keys_tensor->getNumElements(); - - hash_map_t h = (hash_map_t)hashmap; - for (uint64_t i = 0; i < num_elements; i++) { - khiter_t k = kh_get(HashKey_t, h, kptr[i]); - if (k != kh_end(h)) { - kh_del(HashKey_t, h, kptr[i]); - } - } - return TWML_ERR_NONE; -} - -twml_err twml_hashmap_get_values(twml_tensor masks, twml_tensor vals, - const twml_hashmap hashmap, const twml_tensor keys) { - auto masks_tensor = twml::getTensor(masks); - auto vals_tensor = twml::getTensor(vals); - auto keys_tensor = twml::getConstTensor(keys); - - if (masks_tensor->getType() != TWML_TYPE_INT8) { - return TWML_ERR_TYPE; - } - - if (keys_tensor->getType() != TWML_TYPE_INT64) { - return TWML_ERR_TYPE; - } - - if (vals_tensor->getType() != TWML_TYPE_INT64) { - return TWML_ERR_TYPE; - } - - if (keys_tensor->getNumElements() != vals_tensor->getNumElements() || - keys_tensor->getNumElements() != masks_tensor->getNumElements()) { - return TWML_ERR_SIZE; - } - - int8_t *mptr = masks_tensor->getData(); - HashVal_t *vptr = vals_tensor->getData(); - const HashKey_t *kptr = keys_tensor->getData(); - - uint64_t num_elements = keys_tensor->getNumElements(); - - hash_map_t h = (hash_map_t)hashmap; - for (uint64_t i = 0; i < num_elements; i++) { - khiter_t k = kh_get(HashKey_t, h, kptr[i]); - if (k == kh_end(h)) { - mptr[i] = false; - } else { - mptr[i] = true; - vptr[i] = kh_value(h, k); - } - } - return TWML_ERR_NONE; -} - -twml_err twml_hashmap_get_values_inplace(twml_tensor masks, twml_tensor keys_vals, - const twml_hashmap hashmap) { - auto masks_tensor = twml::getTensor(masks); - auto keys_tensor = twml::getTensor(keys_vals); - - if (masks_tensor->getType() != TWML_TYPE_INT8) { - return TWML_ERR_TYPE; - } - - if (keys_tensor->getType() != TWML_TYPE_INT64) { - return TWML_ERR_TYPE; - } - - if (keys_tensor->getNumElements() != masks_tensor->getNumElements()) { - return TWML_ERR_SIZE; - } - - int8_t *mptr = masks_tensor->getData(); - HashKey_t *kptr = keys_tensor->getData(); - - uint64_t num_elements = keys_tensor->getNumElements(); - - hash_map_t h = (hash_map_t)hashmap; - for (uint64_t i = 0; i < num_elements; i++) { - khiter_t k = kh_get(HashKey_t, h, kptr[i]); - if (k == kh_end(h)) { - mptr[i] = false; - } else { - mptr[i] = true; - kptr[i] = kh_value(h, k); - } - } - return TWML_ERR_NONE; -} - -twml_err twml_hashmap_to_tensors(twml_tensor keys, twml_tensor vals, - const twml_hashmap hashmap) { - hash_map_t h = (hash_map_t)hashmap; - const uint64_t size = kh_size(h); - - auto keys_tensor = twml::getTensor(keys); - auto vals_tensor = twml::getTensor(vals); - - if (keys_tensor->getType() != TWML_TYPE_INT64) { - return TWML_ERR_TYPE; - } - - if (vals_tensor->getType() != TWML_TYPE_INT64) { - return TWML_ERR_TYPE; - } - - if (size != keys_tensor->getNumElements() || - size != vals_tensor->getNumElements()) { - return TWML_ERR_SIZE; - } - - HashKey_t *kptr = keys_tensor->getData(); - HashVal_t *vptr = vals_tensor->getData(); - - HashKey_t key, i = 0; - HashKey_t val; - - kh_foreach(h, key, val, { - kptr[i] = key; - vptr[i] = val; - i++; - }); - - return TWML_ERR_NONE; -} diff --git a/twml/libtwml/src/lib/Hashmap.docx b/twml/libtwml/src/lib/Hashmap.docx new file mode 100644 index 000000000..1f12580f3 Binary files /dev/null and b/twml/libtwml/src/lib/Hashmap.docx differ diff --git a/twml/libtwml/src/lib/Tensor.cpp b/twml/libtwml/src/lib/Tensor.cpp deleted file mode 100644 index d610d9316..000000000 --- a/twml/libtwml/src/lib/Tensor.cpp +++ /dev/null @@ -1,191 +0,0 @@ -#include "internal/error.h" -#include -#include -#include -#include -#include - -namespace twml { - -using std::vector; - -Tensor::Tensor(void *data, int ndims, const uint64_t *dims, const uint64_t *strides, twml_type type) : - m_type(type), m_data(data), - m_dims(dims, dims + ndims), - m_strides(strides, strides + ndims) { -} - -Tensor::Tensor(void *data, - const vector &dims, - const vector &strides, - twml_type type) : - m_type(type), m_data(data), - m_dims(dims.begin(), dims.end()), - m_strides(strides.begin(), strides.end()) { - if (dims.size() != strides.size()) { - throw twml::Error(TWML_ERR_SIZE, "The number size of dims and strides don't match"); - } -} - -int Tensor::getNumDims() const { - return static_cast(m_dims.size()); -} - -uint64_t Tensor::getDim(int id) const { - if (id >= this->getNumDims()) { - throw twml::Error(TWML_ERR_SIZE, "Requested dimension exceeds tensor dimension"); - } - return m_dims[id]; -} - -uint64_t Tensor::getStride(int id) const { - if (id >= this->getNumDims()) { - throw twml::Error(TWML_ERR_SIZE, "Requested dimension exceeds tensor dimension"); - } - return m_strides[id]; -} - -uint64_t Tensor::getNumElements() const { - return std::accumulate(m_dims.begin(), m_dims.end(), 1, std::multiplies()); -} - -twml_type Tensor::getType() const { - return m_type; -} - -twml_tensor Tensor::getHandle() { - return reinterpret_cast(this); -} - -const twml_tensor Tensor::getHandle() const { - return reinterpret_cast(const_cast(this)); -} - -const Tensor *getConstTensor(const twml_tensor t) { - return reinterpret_cast(t); -} - -Tensor *getTensor(twml_tensor t) { - return reinterpret_cast(t); -} - -#define INSTANTIATE(T) \ - template<> TWMLAPI T *Tensor::getData() { \ - if ((twml_type)Type::type != m_type) { \ - throw twml::Error(TWML_ERR_TYPE, \ - "Requested invalid type"); \ - } \ - return reinterpret_cast(m_data); \ - } \ - template<> TWMLAPI const T *Tensor::getData() const { \ - if ((twml_type)Type::type != m_type) { \ - throw twml::Error(TWML_ERR_TYPE, \ - "Requested invalid type"); \ - } \ - return (const T *)m_data; \ - } \ - -INSTANTIATE(int32_t) -INSTANTIATE(int64_t) -INSTANTIATE(int8_t) -INSTANTIATE(uint8_t) -INSTANTIATE(float) -INSTANTIATE(double) -INSTANTIATE(bool) -INSTANTIATE(std::string) - -// This is used for the C api. No checks needed for void. -template<> TWMLAPI void *Tensor::getData() { - return m_data; -} -template<> TWMLAPI const void *Tensor::getData() const { - return (const void *)m_data; -} - -std::string getTypeName(twml_type type) { - switch (type) { - case TWML_TYPE_FLOAT32 : return "float32"; - case TWML_TYPE_FLOAT64 : return "float64"; - case TWML_TYPE_INT32 : return "int32"; - case TWML_TYPE_INT64 : return "int64"; - case TWML_TYPE_INT8 : return "int8"; - case TWML_TYPE_UINT8 : return "uint8"; - case TWML_TYPE_BOOL : return "bool"; - case TWML_TYPE_STRING : return "string"; - case TWML_TYPE_UNKNOWN : return "Unknown type"; - } - throw twml::Error(TWML_ERR_TYPE, "Uknown type"); -} - -uint64_t getSizeOf(twml_type dtype) { - switch (dtype) { - case TWML_TYPE_FLOAT : return 4; - case TWML_TYPE_DOUBLE : return 8; - case TWML_TYPE_INT64 : return 8; - case TWML_TYPE_INT32 : return 4; - case TWML_TYPE_UINT8 : return 1; - case TWML_TYPE_BOOL : return 1; - case TWML_TYPE_INT8 : return 1; - case TWML_TYPE_STRING : - throw twml::Error(TWML_ERR_THRIFT, "getSizeOf not supported for strings"); - case TWML_TYPE_UNKNOWN: - throw twml::Error(TWML_ERR_THRIFT, "Can't get size of unknown types"); - } - throw twml::Error(TWML_ERR_THRIFT, "Invalid twml_type"); -} - -} // namespace twml - -twml_err twml_tensor_create(twml_tensor *t, void *data, int ndims, uint64_t *dims, - uint64_t *strides, twml_type type) { - HANDLE_EXCEPTIONS( - twml::Tensor *res = new twml::Tensor(data, ndims, dims, strides, type); - *t = reinterpret_cast(res);); - return TWML_ERR_NONE; -} - -twml_err twml_tensor_delete(const twml_tensor t) { - HANDLE_EXCEPTIONS( - delete twml::getConstTensor(t);); - return TWML_ERR_NONE; -} - -twml_err twml_tensor_get_type(twml_type *type, const twml_tensor t) { - HANDLE_EXCEPTIONS( - *type = twml::getConstTensor(t)->getType();); - return TWML_ERR_NONE; -} - -twml_err twml_tensor_get_data(void **data, const twml_tensor t) { - HANDLE_EXCEPTIONS( - *data = twml::getTensor(t)->getData();); - return TWML_ERR_NONE; -} - -twml_err twml_tensor_get_dim(uint64_t *dim, const twml_tensor t, int id) { - HANDLE_EXCEPTIONS( - const twml::Tensor *tensor = twml::getConstTensor(t); - *dim = tensor->getDim(id);); - return TWML_ERR_NONE; -} - -twml_err twml_tensor_get_stride(uint64_t *stride, const twml_tensor t, int id) { - HANDLE_EXCEPTIONS( - const twml::Tensor *tensor = twml::getConstTensor(t); - *stride = tensor->getStride(id);); - return TWML_ERR_NONE; -} - -twml_err twml_tensor_get_num_dims(int *ndim, const twml_tensor t) { - HANDLE_EXCEPTIONS( - const twml::Tensor *tensor = twml::getConstTensor(t); - *ndim = tensor->getNumDims();); - return TWML_ERR_NONE; -} - -twml_err twml_tensor_get_num_elements(uint64_t *nelements, const twml_tensor t) { - HANDLE_EXCEPTIONS( - const twml::Tensor *tensor = twml::getConstTensor(t); - *nelements = tensor->getNumElements();); - return TWML_ERR_NONE; -} diff --git a/twml/libtwml/src/lib/Tensor.docx b/twml/libtwml/src/lib/Tensor.docx new file mode 100644 index 000000000..e6551b608 Binary files /dev/null and b/twml/libtwml/src/lib/Tensor.docx differ diff --git a/twml/libtwml/src/lib/TensorRecordReader.cpp b/twml/libtwml/src/lib/TensorRecordReader.cpp deleted file mode 100644 index 3ffb1b98a..000000000 --- a/twml/libtwml/src/lib/TensorRecordReader.cpp +++ /dev/null @@ -1,323 +0,0 @@ -#include "internal/thrift.h" -#include "internal/error.h" -#include - -#include -#include - -namespace twml { - -template struct TensorTraits; - -#define INSTANTIATE(TYPE, THRIFT_TYPE, TWML_TYPE) \ - template<> struct TensorTraits { \ - static const TTYPES ThriftType = THRIFT_TYPE; \ - static const twml_type TwmlType = TWML_TYPE; \ - }; \ - -INSTANTIATE(int64_t, TTYPE_I64, TWML_TYPE_INT64) -INSTANTIATE(int32_t, TTYPE_I32, TWML_TYPE_INT32) -INSTANTIATE(double, TTYPE_DOUBLE, TWML_TYPE_DOUBLE) -INSTANTIATE(bool, TTYPE_BOOL, TWML_TYPE_BOOL) - -static -std::vector calcStrides(const std::vector &shape) { - int ndims = static_cast(shape.size()); - std::vector strides(ndims); - uint64_t stride = 1; - for (int i = ndims-1; i >= 0; i--) { - strides[i] = stride; - stride *= shape[i]; - } - return strides; -} - -static twml_type getTwmlType(int dtype) { - // Convert tensor.thrift enum to twml enum - switch (dtype) { - case DATA_TYPE_FLOAT: - return TWML_TYPE_FLOAT; - case DATA_TYPE_DOUBLE: - return TWML_TYPE_DOUBLE; - case DATA_TYPE_INT64: - return TWML_TYPE_INT64; - case DATA_TYPE_INT32: - return TWML_TYPE_INT32; - case DATA_TYPE_UINT8: - return TWML_TYPE_UINT8; - case DATA_TYPE_STRING: - return TWML_TYPE_STRING; - case DATA_TYPE_BOOL: - return TWML_TYPE_BOOL; - } - return TWML_TYPE_UNKNOWN; -} - -std::vector TensorRecordReader::readShape() { - int32_t length = readInt32(); - - std::vector shape; - shape.reserve(length); - for (int32_t i = 0; i < length; i++) { - shape.push_back(static_cast(readInt64())); - } - - return shape; -} - -template -RawTensor TensorRecordReader::readTypedTensor() { - std::vector shape; - int32_t length = 0; - const uint8_t *data = nullptr; - uint64_t raw_length = 0; - uint8_t field_type = TTYPE_STOP; - - while ((field_type = readByte()) != TTYPE_STOP) { - int16_t field_id = readInt16(); - switch (field_id) { - case 1: - CHECK_THRIFT_TYPE(field_type, TTYPE_LIST, "data"); - CHECK_THRIFT_TYPE(readByte(), TensorTraits::ThriftType, "data_type"); - length = getRawBuffer(&data); - raw_length = length * sizeof(T); - break; - case 2: - CHECK_THRIFT_TYPE(field_type, TTYPE_LIST, "shape"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "shape_type"); - shape = readShape(); - break; - default: - throw ThriftInvalidField(field_id, "TensorRecordReader::readTypedTensor"); - } - } - - // data is required - if (data == nullptr) { - throw twml::Error(TWML_ERR_THRIFT, "data field not found for TypedTensor"); - } - - // shape is optional - if (shape.size() == 0) { - shape.push_back((uint64_t)length); - } - - // TODO: Try avoiding stride calculation - std::vector strides = calcStrides(shape); - // FIXME: Try to use const void * in Tensors. - return RawTensor(const_cast(static_cast(data)), - shape, strides, (twml_type)TensorTraits::TwmlType, true, raw_length); -} - -RawTensor TensorRecordReader::readRawTypedTensor() { - std::vector shape; - const uint8_t *data = nullptr; - twml_type type = TWML_TYPE_UNKNOWN; - uint64_t raw_length = 0; - uint8_t field_type = TTYPE_STOP; - - while ((field_type = readByte()) != TTYPE_STOP) { - int16_t field_id = readInt16(); - switch (field_id) { - case 1: - CHECK_THRIFT_TYPE(field_type, TTYPE_I32, "DataType"); - type = getTwmlType(readInt32()); - break; - case 2: - CHECK_THRIFT_TYPE(field_type, TTYPE_STRING, "content"); - raw_length = getRawBuffer(&data); - break; - case 3: - CHECK_THRIFT_TYPE(field_type, TTYPE_LIST, "shape"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "shape_type"); - shape = readShape(); - break; - default: - throw ThriftInvalidField(field_id, "TensorRecordReader::readRawTypedTensor"); - } - } - - // data type is required - if (type == TWML_TYPE_UNKNOWN) { - throw twml::Error(TWML_ERR_THRIFT, "DataType is a required field for RawTypedTensor"); - } - - // data is required - if (data == nullptr) { - throw twml::Error(TWML_ERR_THRIFT, "content is a required field for RawTypedTensor"); - } - - // shape is optional in the thrift file, but it is really required for string types. - if (shape.size() == 0) { - if (type == TWML_TYPE_STRING) { - throw twml::Error(TWML_ERR_THRIFT, "shape required for string types in RawTypedTensor"); - } - shape.push_back((uint64_t)(raw_length / getSizeOf(type))); - } - - // TODO: Try avoiding stride calculation - std::vector strides = calcStrides(shape); - // FIXME: Try to use const void * data inside Tensors. - return RawTensor(const_cast(static_cast(data)), - shape, strides, type, false, raw_length); -} - -RawTensor TensorRecordReader::readStringTensor() { - std::vector shape; - int32_t length = 0; - const uint8_t *data = nullptr; - uint64_t raw_length = 0; - uint8_t field_type = TTYPE_STOP; - const uint8_t *dummy = nullptr; - - while ((field_type = readByte()) != TTYPE_STOP) { - int16_t field_id = readInt16(); - switch (field_id) { - case 1: - CHECK_THRIFT_TYPE(field_type, TTYPE_LIST, "data"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_STRING, "data_type"); - length = readInt32(); - // Store the current location of the byte stream. - // Use this at to "deocde strings" at a later point. - data = getBuffer(); - for (int32_t i = 0; i < length; i++) { - // Skip reading the strings - getRawBuffer(&dummy); - } - raw_length = length; - break; - case 2: - CHECK_THRIFT_TYPE(field_type, TTYPE_LIST, "shape"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "shape_type"); - shape = readShape(); - break; - default: - throw ThriftInvalidField(field_id, "TensorRecordReader::readTypedTensor"); - } - } - - // data is required - if (data == nullptr) { - throw twml::Error(TWML_ERR_THRIFT, "data field not found for TypedTensor"); - } - - // shape is optional - if (shape.size() == 0) { - shape.push_back((uint64_t)length); - } - - // TODO: Try avoiding stride calculation - std::vector strides = calcStrides(shape); - // FIXME: Try to use const void * in Tensors. - return RawTensor(const_cast(static_cast(data)), - shape, strides, TWML_TYPE_UINT8, false, raw_length); -} - -RawTensor TensorRecordReader::readGeneralTensor() { - // No loop is required because GeneralTensor is union. It is going to contain one field only. - // All the fields are structs - CHECK_THRIFT_TYPE(readByte(), TTYPE_STRUCT, "type"); - int16_t field_id = readInt16(); - RawTensor output; - - switch (field_id) { - case GT_RAW: - output = readRawTypedTensor(); - break; - case GT_STRING: - output = readStringTensor(); - break; - case GT_INT32: - output = readTypedTensor(); - break; - case GT_INT64: - output = readTypedTensor(); - break; - case GT_FLOAT: - case GT_DOUBLE: - // Store both FloatTensor and DoubleTensor as double tensor as both are list of doubles. - output = readTypedTensor(); - break; - case GT_BOOL: - output = readTypedTensor(); - break; - default: - throw ThriftInvalidField(field_id, "TensorRecordReader::readGeneralTensor()"); - } - - CHECK_THRIFT_TYPE(readByte(), TTYPE_STOP, "stop"); - return output; -} - -RawSparseTensor TensorRecordReader::readCOOSparseTensor() { - std::vector shape; - uint8_t field_type = TTYPE_STOP; - RawTensor indices, values; - - while ((field_type = readByte()) != TTYPE_STOP) { - int16_t field_id = readInt16(); - switch (field_id) { - case 1: - CHECK_THRIFT_TYPE(field_type, TTYPE_LIST, "shape"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "shape_type"); - shape = readShape(); - break; - case 2: - indices = readTypedTensor(); - break; - case 3: - values = readGeneralTensor(); - break; - default: - throw twml::Error(TWML_ERR_THRIFT, "Invalid field when deocidng COOSparseTensor"); - } - } - - return RawSparseTensor(indices, values, shape); -} - -void TensorRecordReader::readTensor(const int feature_type, TensorRecord *record) { - CHECK_THRIFT_TYPE(feature_type, TTYPE_MAP, "type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "key_type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_STRUCT, "value_type"); - - int32_t length = readInt32(); - for (int32_t i = 0; i < length; i++) { - int64_t id = readInt64(); - record->m_tensors.emplace(id, readGeneralTensor()); - } -} - -void TensorRecordReader::readSparseTensor(const int feature_type, TensorRecord *record) { - CHECK_THRIFT_TYPE(feature_type, TTYPE_MAP, "type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_I64, "key_type"); - CHECK_THRIFT_TYPE(readByte(), TTYPE_STRUCT, "value_type"); - - int32_t length = readInt32(); - for (int32_t i = 0; i < length; i++) { - int64_t id = readInt64(); - - // No loop is required because SparseTensor is union. It is going to contain one field only. - // All the fields are structs - CHECK_THRIFT_TYPE(readByte(), TTYPE_STRUCT, "field"); - int16_t field_id = readInt16(); - RawSparseTensor output; - - // Only COOSparsetensor is supported. - switch (field_id) { - case SP_COO: - output = readCOOSparseTensor(); - break; - default: - throw ThriftInvalidField(field_id, "TensorRecordReader::readSparseTensor()"); - } - - // Read the last byte of the struct. - CHECK_THRIFT_TYPE(readByte(), TTYPE_STOP, "stop"); - - // Add to the map. - record->m_sparse_tensors.emplace(id, output); - } -} - -} // namespace twml diff --git a/twml/libtwml/src/lib/TensorRecordReader.docx b/twml/libtwml/src/lib/TensorRecordReader.docx new file mode 100644 index 000000000..293ab658a Binary files /dev/null and b/twml/libtwml/src/lib/TensorRecordReader.docx differ