[docx] split commit for file 600

Signed-off-by: Ari Archer <ari.web.xyz@gmail.com>
This commit is contained in:
Ari Archer 2024-01-23 19:05:00 +02:00
parent 3c586de8ec
commit 78b3118da4
No known key found for this signature in database
GPG Key ID: A50D5B4B599AF8A2
400 changed files with 0 additions and 16377 deletions

View File

@ -1,139 +0,0 @@
package com.twitter.cr_mixer.logging
import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialAdsCandidate
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.param.decider.DeciderConstants
import com.twitter.cr_mixer.thriftscala.AdsRecommendationTopLevelApiResult
import com.twitter.cr_mixer.thriftscala.AdsRecommendationsResult
import com.twitter.cr_mixer.thriftscala.AdsRequest
import com.twitter.cr_mixer.thriftscala.AdsResponse
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
import com.twitter.cr_mixer.thriftscala.GetAdsRecommendationsScribe
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
import com.twitter.cr_mixer.util.CandidateGenerationKeyUtil
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.tracing.Trace
import com.twitter.logging.Logger
import com.twitter.simclusters_v2.common.UserId
import com.twitter.util.Future
import com.twitter.util.Stopwatch
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
@Singleton
case class AdsRecommendationsScribeLogger @Inject() (
@Named(ModuleNames.AdsRecommendationsLogger) adsRecommendationsScribeLogger: Logger,
decider: CrMixerDecider,
statsReceiver: StatsReceiver) {
private val scopedStats = statsReceiver.scope(this.getClass.getCanonicalName)
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
/*
* Scribe first step results after fetching initial ads candidate
* */
def scribeInitialAdsCandidates(
query: AdsCandidateGeneratorQuery,
getResultFn: => Future[Seq[Seq[InitialAdsCandidate]]],
enableScribe: Boolean // controlled by feature switch so that we can scribe for certain DDG
): Future[Seq[Seq[InitialAdsCandidate]]] = {
val scribeMetadata = ScribeMetadata.from(query)
val timer = Stopwatch.start()
getResultFn.onSuccess { input =>
val latencyMs = timer().inMilliseconds
val result = convertFetchCandidatesResult(input, scribeMetadata.userId)
val traceId = Trace.id.traceId.toLong
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
if (enableScribe && decider.isAvailableForId(
scribeMetadata.userId,
DeciderConstants.adsRecommendationsPerExperimentScribeRate)) {
upperFunnelsStats.counter(scribeMetadata.product.originalName).incr()
scribeResult(scribeMsg)
}
}
}
/*
* Scribe top level API results
* */
def scribeGetAdsRecommendations(
request: AdsRequest,
startTime: Long,
scribeMetadata: ScribeMetadata,
getResultFn: => Future[AdsResponse],
enableScribe: Boolean
): Future[AdsResponse] = {
val timer = Stopwatch.start()
getResultFn.onSuccess { response =>
val latencyMs = timer().inMilliseconds
val result = AdsRecommendationsResult.AdsRecommendationTopLevelApiResult(
AdsRecommendationTopLevelApiResult(
timestamp = startTime,
request = request,
response = response
))
val traceId = Trace.id.traceId.toLong
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
if (enableScribe && decider.isAvailableForId(
scribeMetadata.userId,
DeciderConstants.adsRecommendationsPerExperimentScribeRate)) {
topLevelApiStats.counter(scribeMetadata.product.originalName).incr()
scribeResult(scribeMsg)
}
}
}
private def convertFetchCandidatesResult(
candidatesSeq: Seq[Seq[InitialAdsCandidate]],
requestUserId: UserId
): AdsRecommendationsResult = {
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
candidates.map { candidate =>
TweetCandidateWithMetadata(
tweetId = candidate.tweetId,
candidateGenerationKey = Some(
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)),
score = Some(candidate.getSimilarityScore),
numCandidateGenerationKeys = None // not populated yet
)
}
}
AdsRecommendationsResult.FetchCandidatesResult(
FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
}
private def buildScribeMessage(
result: AdsRecommendationsResult,
scribeMetadata: ScribeMetadata,
latencyMs: Long,
traceId: Long
): GetAdsRecommendationsScribe = {
GetAdsRecommendationsScribe(
uuid = scribeMetadata.requestUUID,
userId = scribeMetadata.userId,
result = result,
traceId = Some(traceId),
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
impressedBuckets = getImpressedBuckets(scopedStats)
)
}
private def scribeResult(
scribeMsg: GetAdsRecommendationsScribe
): Unit = {
publish(
logger = adsRecommendationsScribeLogger,
codec = GetAdsRecommendationsScribe,
message = scribeMsg)
}
}

View File

@ -1,34 +0,0 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/javax/inject:javax.inject",
"abdecider/src/main/scala",
"content-recommender/thrift/src/main/thrift:content-recommender-common-scala",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
"cr-mixer/thrift/src/main/thrift:thrift-scala",
"decider/src/main/scala",
"featureswitches/featureswitches-core/src/main/scala:experimentation-settings",
"finagle/finagle-core/src/main",
"frigate/frigate-common:base",
"frigate/frigate-common:util",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
"kafka/finagle-kafka/finatra-kafka/src/main/scala",
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
"scribelib/marshallers/src/main/scala/com/twitter/scribelib/marshallers",
"scribelib/validators/src/main/scala/com/twitter/scribelib/validators",
"scrooge/scrooge-serializer/src/main/scala",
"src/scala/com/twitter/simclusters_v2/common",
"src/thrift/com/twitter/ml/api:data-scala",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
"timelines/src/main/scala/com/twitter/timelines/clientevent",
"util-internal/scribe/src/main/scala/com/twitter/logging",
],
)

View File

@ -1,489 +0,0 @@
package com.twitter.cr_mixer.logging
import com.google.common.base.CaseFormat
import com.twitter.abdecider.ScribingABDeciderUtil
import com.twitter.scribelib.marshallers.ClientDataProvider
import com.twitter.scribelib.marshallers.ScribeSerialization
import com.twitter.timelines.clientevent.MinimalClientDataProvider
import com.twitter.cr_mixer.model.BlendedCandidate
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.RankedCandidate
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
import com.twitter.cr_mixer.model.GraphSourceInfo
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.param.decider.DeciderConstants
import com.twitter.cr_mixer.scribe.ScribeCategories
import com.twitter.cr_mixer.thriftscala.CrMixerTweetRequest
import com.twitter.cr_mixer.thriftscala.CrMixerTweetResponse
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
import com.twitter.cr_mixer.thriftscala.FetchSignalSourcesResult
import com.twitter.cr_mixer.thriftscala.GetTweetsRecommendationsScribe
import com.twitter.cr_mixer.thriftscala.InterleaveResult
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
import com.twitter.cr_mixer.thriftscala.PreRankFilterResult
import com.twitter.cr_mixer.thriftscala.Product
import com.twitter.cr_mixer.thriftscala.RankResult
import com.twitter.cr_mixer.thriftscala.Result
import com.twitter.cr_mixer.thriftscala.SourceSignal
import com.twitter.cr_mixer.thriftscala.TopLevelApiResult
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
import com.twitter.cr_mixer.thriftscala.VITTweetCandidateScribe
import com.twitter.cr_mixer.thriftscala.VITTweetCandidatesScribe
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.model.SourceInfo
import com.twitter.cr_mixer.util.CandidateGenerationKeyUtil
import com.twitter.cr_mixer.util.MetricTagUtil
import com.twitter.decider.SimpleRecipient
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.tracing.Trace
import com.twitter.finatra.kafka.producers.KafkaProducerBase
import com.twitter.logging.Logger
import com.twitter.simclusters_v2.common.UserId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.util.Future
import com.twitter.util.Stopwatch
import com.twitter.util.Time
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
import scala.util.Random
@Singleton
case class CrMixerScribeLogger @Inject() (
decider: CrMixerDecider,
statsReceiver: StatsReceiver,
@Named(ModuleNames.TweetRecsLogger) tweetRecsScribeLogger: Logger,
@Named(ModuleNames.BlueVerifiedTweetRecsLogger) blueVerifiedTweetRecsScribeLogger: Logger,
@Named(ModuleNames.TopLevelApiDdgMetricsLogger) ddgMetricsLogger: Logger,
kafkaProducer: KafkaProducerBase[String, GetTweetsRecommendationsScribe]) {
import CrMixerScribeLogger._
private val scopedStats = statsReceiver.scope("CrMixerScribeLogger")
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
private val kafkaMessagesStats = scopedStats.scope("KafkaMessages")
private val topLevelApiDdgMetricsStats = scopedStats.scope("TopLevelApiDdgMetrics")
private val blueVerifiedTweetCandidatesStats = scopedStats.scope("BlueVerifiedTweetCandidates")
private val serialization = new ScribeSerialization {}
def scribeSignalSources(
query: CrCandidateGeneratorQuery,
getResultFn: => Future[(Set[SourceInfo], Map[String, Option[GraphSourceInfo]])]
): Future[(Set[SourceInfo], Map[String, Option[GraphSourceInfo]])] = {
scribeResultsAndPerformanceMetrics(
ScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertFetchSignalSourcesResult
)
}
def scribeInitialCandidates(
query: CrCandidateGeneratorQuery,
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
): Future[Seq[Seq[InitialCandidate]]] = {
scribeResultsAndPerformanceMetrics(
ScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertFetchCandidatesResult
)
}
def scribePreRankFilterCandidates(
query: CrCandidateGeneratorQuery,
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
): Future[Seq[Seq[InitialCandidate]]] = {
scribeResultsAndPerformanceMetrics(
ScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertPreRankFilterResult
)
}
def scribeInterleaveCandidates(
query: CrCandidateGeneratorQuery,
getResultFn: => Future[Seq[BlendedCandidate]]
): Future[Seq[BlendedCandidate]] = {
scribeResultsAndPerformanceMetrics(
ScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertInterleaveResult,
enableKafkaScribe = true
)
}
def scribeRankedCandidates(
query: CrCandidateGeneratorQuery,
getResultFn: => Future[Seq[RankedCandidate]]
): Future[Seq[RankedCandidate]] = {
scribeResultsAndPerformanceMetrics(
ScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertRankResult
)
}
/**
* Scribe Top Level API Request / Response and performance metrics
* for the getTweetRecommendations() endpoint.
*/
def scribeGetTweetRecommendations(
request: CrMixerTweetRequest,
startTime: Long,
scribeMetadata: ScribeMetadata,
getResultFn: => Future[CrMixerTweetResponse]
): Future[CrMixerTweetResponse] = {
val timer = Stopwatch.start()
getResultFn.onSuccess { response =>
val latencyMs = timer().inMilliseconds
val result = convertTopLevelAPIResult(request, response, startTime)
val traceId = Trace.id.traceId.toLong
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
// We use upperFunnelPerStepScribeRate to cover TopLevelApi scribe logs
if (decider.isAvailableForId(
scribeMetadata.userId,
DeciderConstants.upperFunnelPerStepScribeRate)) {
topLevelApiStats.counter(scribeMetadata.product.originalName).incr()
scribeResult(scribeMsg)
}
if (decider.isAvailableForId(
scribeMetadata.userId,
DeciderConstants.topLevelApiDdgMetricsScribeRate)) {
topLevelApiDdgMetricsStats.counter(scribeMetadata.product.originalName).incr()
val topLevelDdgMetricsMetadata = TopLevelDdgMetricsMetadata.from(request)
publishTopLevelDdgMetrics(
logger = ddgMetricsLogger,
topLevelDdgMetricsMetadata = topLevelDdgMetricsMetadata,
latencyMs = latencyMs,
candidateSize = response.tweets.length)
}
}
}
/**
* Scribe all of the Blue Verified tweets that are candidates from cr-mixer
* from the getTweetRecommendations() endpoint for stats tracking/debugging purposes.
*/
def scribeGetTweetRecommendationsForBlueVerified(
scribeMetadata: ScribeMetadata,
getResultFn: => Future[Seq[RankedCandidate]]
): Future[Seq[RankedCandidate]] = {
getResultFn.onSuccess { rankedCandidates =>
if (decider.isAvailable(DeciderConstants.enableScribeForBlueVerifiedTweetCandidates)) {
blueVerifiedTweetCandidatesStats.counter("process_request").incr()
val blueVerifiedTweetCandidates = rankedCandidates.filter { tweet =>
tweet.tweetInfo.hasBlueVerifiedAnnotation.contains(true)
}
val impressedBuckets = getImpressedBuckets(blueVerifiedTweetCandidatesStats).getOrElse(Nil)
val blueVerifiedCandidateScribes = blueVerifiedTweetCandidates.map { candidate =>
blueVerifiedTweetCandidatesStats
.scope(scribeMetadata.product.name).counter(
candidate.tweetInfo.authorId.toString).incr()
VITTweetCandidateScribe(
tweetId = candidate.tweetId,
authorId = candidate.tweetInfo.authorId,
score = candidate.predictionScore,
metricTags = MetricTagUtil.buildMetricTags(candidate)
)
}
val blueVerifiedScribe =
VITTweetCandidatesScribe(
uuid = scribeMetadata.requestUUID,
userId = scribeMetadata.userId,
candidates = blueVerifiedCandidateScribes,
product = scribeMetadata.product,
impressedBuckets = impressedBuckets
)
publish(
logger = blueVerifiedTweetRecsScribeLogger,
codec = VITTweetCandidatesScribe,
message = blueVerifiedScribe)
}
}
}
/**
* Scribe Per-step intermediate results and performance metrics
* for each step: fetch signals, fetch candidates, filters, ranker, etc
*/
private[logging] def scribeResultsAndPerformanceMetrics[T](
scribeMetadata: ScribeMetadata,
getResultFn: => Future[T],
convertToResultFn: (T, UserId) => Result,
enableKafkaScribe: Boolean = false
): Future[T] = {
val timer = Stopwatch.start()
getResultFn.onSuccess { input =>
val latencyMs = timer().inMilliseconds
val result = convertToResultFn(input, scribeMetadata.userId)
val traceId = Trace.id.traceId.toLong
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
if (decider.isAvailableForId(
scribeMetadata.userId,
DeciderConstants.upperFunnelPerStepScribeRate)) {
upperFunnelsStats.counter(scribeMetadata.product.originalName).incr()
scribeResult(scribeMsg)
}
// forks the scribe as a Kafka message for async feature hydration
if (enableKafkaScribe && shouldScribeKafkaMessage(
scribeMetadata.userId,
scribeMetadata.product)) {
kafkaMessagesStats.counter(scribeMetadata.product.originalName).incr()
val batchedKafkaMessages = downsampleKafkaMessage(scribeMsg)
batchedKafkaMessages.foreach { kafkaMessage =>
kafkaProducer.send(
topic = ScribeCategories.TweetsRecs.scribeCategory,
key = traceId.toString,
value = kafkaMessage,
timestamp = Time.now.inMilliseconds
)
}
}
}
}
private def convertTopLevelAPIResult(
request: CrMixerTweetRequest,
response: CrMixerTweetResponse,
startTime: Long
): Result = {
Result.TopLevelApiResult(
TopLevelApiResult(
timestamp = startTime,
request = request,
response = response
))
}
private def convertFetchSignalSourcesResult(
sourceInfoSetTuple: (Set[SourceInfo], Map[String, Option[GraphSourceInfo]]),
requestUserId: UserId
): Result = {
val sourceSignals = sourceInfoSetTuple._1.map { sourceInfo =>
SourceSignal(id = Some(sourceInfo.internalId))
}
// For source graphs, we pass in requestUserId as a placeholder
val sourceGraphs = sourceInfoSetTuple._2.map {
case (_, _) =>
SourceSignal(id = Some(InternalId.UserId(requestUserId)))
}
Result.FetchSignalSourcesResult(
FetchSignalSourcesResult(
signals = Some(sourceSignals ++ sourceGraphs)
))
}
private def convertFetchCandidatesResult(
candidatesSeq: Seq[Seq[InitialCandidate]],
requestUserId: UserId
): Result = {
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
candidates.map { candidate =>
TweetCandidateWithMetadata(
tweetId = candidate.tweetId,
candidateGenerationKey = Some(
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)),
score = Some(candidate.getSimilarityScore),
numCandidateGenerationKeys = None // not populated yet
)
}
}
Result.FetchCandidatesResult(FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
}
private def convertPreRankFilterResult(
candidatesSeq: Seq[Seq[InitialCandidate]],
requestUserId: UserId
): Result = {
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
candidates.map { candidate =>
TweetCandidateWithMetadata(
tweetId = candidate.tweetId,
candidateGenerationKey = Some(
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)),
score = Some(candidate.getSimilarityScore),
numCandidateGenerationKeys = None // not populated yet
)
}
}
Result.PreRankFilterResult(PreRankFilterResult(Some(tweetCandidatesWithMetadata)))
}
// We take InterleaveResult for Unconstrained dataset ML ranker training
private def convertInterleaveResult(
blendedCandidates: Seq[BlendedCandidate],
requestUserId: UserId
): Result = {
val tweetCandidatesWithMetadata = blendedCandidates.map { blendedCandidate =>
val candidateGenerationKey =
CandidateGenerationKeyUtil.toThrift(blendedCandidate.reasonChosen, requestUserId)
TweetCandidateWithMetadata(
tweetId = blendedCandidate.tweetId,
candidateGenerationKey = Some(candidateGenerationKey),
authorId = Some(blendedCandidate.tweetInfo.authorId), // for ML pipeline training
score = Some(blendedCandidate.getSimilarityScore),
numCandidateGenerationKeys = Some(blendedCandidate.potentialReasons.size)
) // hydrate fields for light ranking training data
}
Result.InterleaveResult(InterleaveResult(Some(tweetCandidatesWithMetadata)))
}
private def convertRankResult(
rankedCandidates: Seq[RankedCandidate],
requestUserId: UserId
): Result = {
val tweetCandidatesWithMetadata = rankedCandidates.map { rankedCandidate =>
val candidateGenerationKey =
CandidateGenerationKeyUtil.toThrift(rankedCandidate.reasonChosen, requestUserId)
TweetCandidateWithMetadata(
tweetId = rankedCandidate.tweetId,
candidateGenerationKey = Some(candidateGenerationKey),
score = Some(rankedCandidate.getSimilarityScore),
numCandidateGenerationKeys = Some(rankedCandidate.potentialReasons.size)
)
}
Result.RankResult(RankResult(Some(tweetCandidatesWithMetadata)))
}
private def buildScribeMessage(
result: Result,
scribeMetadata: ScribeMetadata,
latencyMs: Long,
traceId: Long
): GetTweetsRecommendationsScribe = {
GetTweetsRecommendationsScribe(
uuid = scribeMetadata.requestUUID,
userId = scribeMetadata.userId,
result = result,
traceId = Some(traceId),
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
impressedBuckets = getImpressedBuckets(scopedStats)
)
}
private def scribeResult(
scribeMsg: GetTweetsRecommendationsScribe
): Unit = {
publish(
logger = tweetRecsScribeLogger,
codec = GetTweetsRecommendationsScribe,
message = scribeMsg)
}
/**
* Gate for producing messages to Kafka for async feature hydration
*/
private def shouldScribeKafkaMessage(
userId: UserId,
product: Product
): Boolean = {
val isEligibleUser = decider.isAvailable(
DeciderConstants.kafkaMessageScribeSampleRate,
Some(SimpleRecipient(userId)))
val isHomeProduct = (product == Product.Home)
isEligibleUser && isHomeProduct
}
/**
* Due to size limits of Strato (see SD-19028), each Kafka message must be downsampled
*/
private[logging] def downsampleKafkaMessage(
scribeMsg: GetTweetsRecommendationsScribe
): Seq[GetTweetsRecommendationsScribe] = {
val sampledResultSeq: Seq[Result] = scribeMsg.result match {
case Result.InterleaveResult(interleaveResult) =>
val sampledTweetsSeq = interleaveResult.tweets
.map { tweets =>
Random
.shuffle(tweets).take(KafkaMaxTweetsPerMessage)
.grouped(BatchSize).toSeq
}.getOrElse(Seq.empty)
sampledTweetsSeq.map { sampledTweets =>
Result.InterleaveResult(InterleaveResult(Some(sampledTweets)))
}
// if it's an unrecognized type, err on the side of sending no candidates
case _ =>
kafkaMessagesStats.counter("InvalidKafkaMessageResultType").incr()
Seq(Result.InterleaveResult(InterleaveResult(None)))
}
sampledResultSeq.map { sampledResult =>
GetTweetsRecommendationsScribe(
uuid = scribeMsg.uuid,
userId = scribeMsg.userId,
result = sampledResult,
traceId = scribeMsg.traceId,
performanceMetrics = None,
impressedBuckets = None
)
}
}
/**
* Handles client_event serialization to log data into DDG metrics
*/
private[logging] def publishTopLevelDdgMetrics(
logger: Logger,
topLevelDdgMetricsMetadata: TopLevelDdgMetricsMetadata,
candidateSize: Long,
latencyMs: Long,
): Unit = {
val data = Map[Any, Any](
"latency_ms" -> latencyMs,
"event_value" -> candidateSize
)
val label: (String, String) = ("tweetrec", "")
val namespace = getNamespace(topLevelDdgMetricsMetadata, label) + ("action" -> "candidates")
val message =
serialization
.serializeClientEvent(namespace, getClientData(topLevelDdgMetricsMetadata), data)
logger.info(message)
}
private def getClientData(
topLevelDdgMetricsMetadata: TopLevelDdgMetricsMetadata
): ClientDataProvider =
MinimalClientDataProvider(
userId = topLevelDdgMetricsMetadata.userId,
guestId = None,
clientApplicationId = topLevelDdgMetricsMetadata.clientApplicationId,
countryCode = topLevelDdgMetricsMetadata.countryCode
)
private def getNamespace(
topLevelDdgMetricsMetadata: TopLevelDdgMetricsMetadata,
label: (String, String)
): Map[String, String] = {
val productName =
CaseFormat.UPPER_CAMEL
.to(CaseFormat.LOWER_UNDERSCORE, topLevelDdgMetricsMetadata.product.originalName)
Map(
"client" -> ScribingABDeciderUtil.clientForAppId(
topLevelDdgMetricsMetadata.clientApplicationId),
"page" -> "cr-mixer",
"section" -> productName,
"component" -> label._1,
"element" -> label._2
)
}
}
object CrMixerScribeLogger {
val KafkaMaxTweetsPerMessage: Int = 200
val BatchSize: Int = 20
}

View File

@ -1,193 +0,0 @@
package com.twitter.cr_mixer.logging
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.param.decider.DeciderConstants
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
import com.twitter.cr_mixer.thriftscala.GetRelatedTweetsScribe
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
import com.twitter.cr_mixer.thriftscala.PreRankFilterResult
import com.twitter.cr_mixer.thriftscala.RelatedTweetRequest
import com.twitter.cr_mixer.thriftscala.RelatedTweetResponse
import com.twitter.cr_mixer.thriftscala.RelatedTweetResult
import com.twitter.cr_mixer.thriftscala.RelatedTweetTopLevelApiResult
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
import com.twitter.cr_mixer.util.CandidateGenerationKeyUtil
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.tracing.Trace
import com.twitter.logging.Logger
import com.twitter.simclusters_v2.common.UserId
import com.twitter.util.Future
import com.twitter.util.Stopwatch
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
@Singleton
case class RelatedTweetScribeLogger @Inject() (
decider: CrMixerDecider,
statsReceiver: StatsReceiver,
@Named(ModuleNames.RelatedTweetsLogger) relatedTweetsScribeLogger: Logger) {
private val scopedStats = statsReceiver.scope("RelatedTweetsScribeLogger")
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
private val topLevelApiNoUserIdStats = scopedStats.scope("TopLevelApiNoUserId")
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
private val upperFunnelsNoUserIdStats = scopedStats.scope("UpperFunnelsNoUserId")
def scribeInitialCandidates(
query: RelatedTweetCandidateGeneratorQuery,
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
): Future[Seq[Seq[InitialCandidate]]] = {
scribeResultsAndPerformanceMetrics(
RelatedTweetScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertFetchCandidatesResult
)
}
def scribePreRankFilterCandidates(
query: RelatedTweetCandidateGeneratorQuery,
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
): Future[Seq[Seq[InitialCandidate]]] = {
scribeResultsAndPerformanceMetrics(
RelatedTweetScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertPreRankFilterResult
)
}
/**
* Scribe Top Level API Request / Response and performance metrics
* for the getRelatedTweets endpoint.
*/
def scribeGetRelatedTweets(
request: RelatedTweetRequest,
startTime: Long,
relatedTweetScribeMetadata: RelatedTweetScribeMetadata,
getResultFn: => Future[RelatedTweetResponse]
): Future[RelatedTweetResponse] = {
val timer = Stopwatch.start()
getResultFn.onSuccess { response =>
relatedTweetScribeMetadata.clientContext.userId match {
case Some(userId) =>
if (decider.isAvailableForId(userId, DeciderConstants.upperFunnelPerStepScribeRate)) {
topLevelApiStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
val latencyMs = timer().inMilliseconds
val result = convertTopLevelAPIResult(request, response, startTime)
val traceId = Trace.id.traceId.toLong
val scribeMsg =
buildScribeMessage(result, relatedTweetScribeMetadata, latencyMs, traceId)
scribeResult(scribeMsg)
}
case _ =>
topLevelApiNoUserIdStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
}
}
}
/**
* Scribe Per-step intermediate results and performance metrics
* for each step: fetch candidates, filters.
*/
private def scribeResultsAndPerformanceMetrics[T](
relatedTweetScribeMetadata: RelatedTweetScribeMetadata,
getResultFn: => Future[T],
convertToResultFn: (T, UserId) => RelatedTweetResult
): Future[T] = {
val timer = Stopwatch.start()
getResultFn.onSuccess { input =>
relatedTweetScribeMetadata.clientContext.userId match {
case Some(userId) =>
if (decider.isAvailableForId(userId, DeciderConstants.upperFunnelPerStepScribeRate)) {
upperFunnelsStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
val latencyMs = timer().inMilliseconds
val result = convertToResultFn(input, userId)
val traceId = Trace.id.traceId.toLong
val scribeMsg =
buildScribeMessage(result, relatedTweetScribeMetadata, latencyMs, traceId)
scribeResult(scribeMsg)
}
case _ =>
upperFunnelsNoUserIdStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
}
}
}
private def convertTopLevelAPIResult(
request: RelatedTweetRequest,
response: RelatedTweetResponse,
startTime: Long
): RelatedTweetResult = {
RelatedTweetResult.RelatedTweetTopLevelApiResult(
RelatedTweetTopLevelApiResult(
timestamp = startTime,
request = request,
response = response
))
}
private def convertFetchCandidatesResult(
candidatesSeq: Seq[Seq[InitialCandidate]],
requestUserId: UserId
): RelatedTweetResult = {
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
candidates.map { candidate =>
TweetCandidateWithMetadata(
tweetId = candidate.tweetId,
candidateGenerationKey = None
) // do not hydrate candidateGenerationKey to save cost
}
}
RelatedTweetResult.FetchCandidatesResult(
FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
}
private def convertPreRankFilterResult(
candidatesSeq: Seq[Seq[InitialCandidate]],
requestUserId: UserId
): RelatedTweetResult = {
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
candidates.map { candidate =>
val candidateGenerationKey =
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)
TweetCandidateWithMetadata(
tweetId = candidate.tweetId,
candidateGenerationKey = Some(candidateGenerationKey),
authorId = Some(candidate.tweetInfo.authorId),
score = Some(candidate.getSimilarityScore),
numCandidateGenerationKeys = None
)
}
}
RelatedTweetResult.PreRankFilterResult(PreRankFilterResult(Some(tweetCandidatesWithMetadata)))
}
private def buildScribeMessage(
relatedTweetResult: RelatedTweetResult,
relatedTweetScribeMetadata: RelatedTweetScribeMetadata,
latencyMs: Long,
traceId: Long
): GetRelatedTweetsScribe = {
GetRelatedTweetsScribe(
uuid = relatedTweetScribeMetadata.requestUUID,
internalId = relatedTweetScribeMetadata.internalId,
relatedTweetResult = relatedTweetResult,
requesterId = relatedTweetScribeMetadata.clientContext.userId,
guestId = relatedTweetScribeMetadata.clientContext.guestId,
traceId = Some(traceId),
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
impressedBuckets = getImpressedBuckets(scopedStats)
)
}
private def scribeResult(
scribeMsg: GetRelatedTweetsScribe
): Unit = {
publish(logger = relatedTweetsScribeLogger, codec = GetRelatedTweetsScribe, message = scribeMsg)
}
}

View File

@ -1,43 +0,0 @@
package com.twitter.cr_mixer.logging
import com.twitter.cr_mixer.featureswitch.CrMixerImpressedBuckets
import com.twitter.cr_mixer.thriftscala.ImpressesedBucketInfo
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.logging.Logger
import com.twitter.scrooge.BinaryThriftStructSerializer
import com.twitter.scrooge.ThriftStruct
import com.twitter.scrooge.ThriftStructCodec
object ScribeLoggerUtils {
/**
* Handles base64-encoding, serialization, and publish.
*/
private[logging] def publish[T <: ThriftStruct](
logger: Logger,
codec: ThriftStructCodec[T],
message: T
): Unit = {
logger.info(BinaryThriftStructSerializer(codec).toString(message))
}
private[logging] def getImpressedBuckets(
scopedStats: StatsReceiver
): Option[List[ImpressesedBucketInfo]] = {
StatsUtil.trackNonFutureBlockStats(scopedStats.scope("getImpressedBuckets")) {
CrMixerImpressedBuckets.getAllImpressedBuckets.map { listBuckets =>
val listBucketsSet = listBuckets.toSet
scopedStats.stat("impressed_buckets").add(listBucketsSet.size)
listBucketsSet.map { bucket =>
ImpressesedBucketInfo(
experimentId = bucket.experiment.settings.experimentId.getOrElse(-1L),
bucketName = bucket.name,
version = bucket.experiment.settings.version,
)
}.toList
}
}
}
}

View File

@ -1,45 +0,0 @@
package com.twitter.cr_mixer.logging
import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.thriftscala.Product
import com.twitter.product_mixer.core.thriftscala.ClientContext
import com.twitter.simclusters_v2.common.UserId
import com.twitter.simclusters_v2.thriftscala.InternalId
case class ScribeMetadata(
requestUUID: Long,
userId: UserId,
product: Product)
object ScribeMetadata {
def from(query: CrCandidateGeneratorQuery): ScribeMetadata = {
ScribeMetadata(query.requestUUID, query.userId, query.product)
}
def from(query: UtegTweetCandidateGeneratorQuery): ScribeMetadata = {
ScribeMetadata(query.requestUUID, query.userId, query.product)
}
def from(query: AdsCandidateGeneratorQuery): ScribeMetadata = {
ScribeMetadata(query.requestUUID, query.userId, query.product)
}
}
case class RelatedTweetScribeMetadata(
requestUUID: Long,
internalId: InternalId,
clientContext: ClientContext,
product: Product)
object RelatedTweetScribeMetadata {
def from(query: RelatedTweetCandidateGeneratorQuery): RelatedTweetScribeMetadata = {
RelatedTweetScribeMetadata(
query.requestUUID,
query.internalId,
query.clientContext,
query.product)
}
}

View File

@ -1,22 +0,0 @@
package com.twitter.cr_mixer
package logging
import com.twitter.cr_mixer.thriftscala.CrMixerTweetRequest
import com.twitter.cr_mixer.thriftscala.Product
case class TopLevelDdgMetricsMetadata(
userId: Option[Long],
product: Product,
clientApplicationId: Option[Long],
countryCode: Option[String])
object TopLevelDdgMetricsMetadata {
def from(request: CrMixerTweetRequest): TopLevelDdgMetricsMetadata = {
TopLevelDdgMetricsMetadata(
userId = request.clientContext.userId,
product = request.product,
clientApplicationId = request.clientContext.appId,
countryCode = request.clientContext.countryCode
)
}
}

View File

@ -1,147 +0,0 @@
package com.twitter.cr_mixer.logging
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.param.decider.DeciderConstants
import com.twitter.cr_mixer.thriftscala.UtegTweetRequest
import com.twitter.cr_mixer.thriftscala.UtegTweetResponse
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
import com.twitter.cr_mixer.thriftscala.GetUtegTweetsScribe
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
import com.twitter.cr_mixer.thriftscala.UtegTweetResult
import com.twitter.cr_mixer.thriftscala.UtegTweetTopLevelApiResult
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.tracing.Trace
import com.twitter.logging.Logger
import com.twitter.simclusters_v2.common.UserId
import com.twitter.util.Future
import com.twitter.util.Stopwatch
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
@Singleton
case class UtegTweetScribeLogger @Inject() (
decider: CrMixerDecider,
statsReceiver: StatsReceiver,
@Named(ModuleNames.UtegTweetsLogger) utegTweetScribeLogger: Logger) {
private val scopedStats = statsReceiver.scope("UtegTweetScribeLogger")
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
def scribeInitialCandidates(
query: UtegTweetCandidateGeneratorQuery,
getResultFn: => Future[Seq[TweetWithScoreAndSocialProof]]
): Future[Seq[TweetWithScoreAndSocialProof]] = {
scribeResultsAndPerformanceMetrics(
ScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertFetchCandidatesResult
)
}
/**
* Scribe Top Level API Request / Response and performance metrics
* for the GetUtegTweetRecommendations() endpoint.
*/
def scribeGetUtegTweetRecommendations(
request: UtegTweetRequest,
startTime: Long,
scribeMetadata: ScribeMetadata,
getResultFn: => Future[UtegTweetResponse]
): Future[UtegTweetResponse] = {
val timer = Stopwatch.start()
getResultFn.onSuccess { response =>
if (decider.isAvailableForId(
scribeMetadata.userId,
DeciderConstants.upperFunnelPerStepScribeRate)) {
topLevelApiStats.counter(scribeMetadata.product.originalName).incr()
val latencyMs = timer().inMilliseconds
val result = convertTopLevelAPIResult(request, response, startTime)
val traceId = Trace.id.traceId.toLong
val scribeMsg =
buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
scribeResult(scribeMsg)
}
}
}
private def convertTopLevelAPIResult(
request: UtegTweetRequest,
response: UtegTweetResponse,
startTime: Long
): UtegTweetResult = {
UtegTweetResult.UtegTweetTopLevelApiResult(
UtegTweetTopLevelApiResult(
timestamp = startTime,
request = request,
response = response
))
}
private def buildScribeMessage(
utegTweetResult: UtegTweetResult,
scribeMetadata: ScribeMetadata,
latencyMs: Long,
traceId: Long
): GetUtegTweetsScribe = {
GetUtegTweetsScribe(
uuid = scribeMetadata.requestUUID,
userId = scribeMetadata.userId,
utegTweetResult = utegTweetResult,
traceId = Some(traceId),
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
impressedBuckets = getImpressedBuckets(scopedStats)
)
}
private def scribeResult(
scribeMsg: GetUtegTweetsScribe
): Unit = {
publish(logger = utegTweetScribeLogger, codec = GetUtegTweetsScribe, message = scribeMsg)
}
private def convertFetchCandidatesResult(
candidates: Seq[TweetWithScoreAndSocialProof],
requestUserId: UserId
): UtegTweetResult = {
val tweetCandidatesWithMetadata = candidates.map { candidate =>
TweetCandidateWithMetadata(
tweetId = candidate.tweetId,
candidateGenerationKey = None
) // do not hydrate candidateGenerationKey to save cost
}
UtegTweetResult.FetchCandidatesResult(FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
}
/**
* Scribe Per-step intermediate results and performance metrics
* for each step: fetch candidates, filters.
*/
private def scribeResultsAndPerformanceMetrics[T](
scribeMetadata: ScribeMetadata,
getResultFn: => Future[T],
convertToResultFn: (T, UserId) => UtegTweetResult
): Future[T] = {
val timer = Stopwatch.start()
getResultFn.onSuccess { input =>
if (decider.isAvailableForId(
scribeMetadata.userId,
DeciderConstants.upperFunnelPerStepScribeRate)) {
upperFunnelsStats.counter(scribeMetadata.product.originalName).incr()
val latencyMs = timer().inMilliseconds
val result = convertToResultFn(input, scribeMetadata.userId)
val traceId = Trace.id.traceId.toLong
val scribeMsg =
buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
scribeResult(scribeMsg)
}
}
}
}

View File

@ -1,16 +0,0 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"configapi/configapi-core",
"content-recommender/thrift/src/main/thrift:thrift-scala",
"cr-mixer/thrift/src/main/thrift:thrift-scala",
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
"src/scala/com/twitter/simclusters_v2/common",
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
"src/thrift/com/twitter/recos:recos-common-scala",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
],
)

View File

@ -1,200 +0,0 @@
package com.twitter.cr_mixer.model
import com.twitter.contentrecommender.thriftscala.TweetInfo
import com.twitter.cr_mixer.thriftscala.LineItemInfo
import com.twitter.simclusters_v2.common.TweetId
sealed trait Candidate {
val tweetId: TweetId
override def hashCode: Int = tweetId.toInt
}
case class TweetWithCandidateGenerationInfo(
tweetId: TweetId,
candidateGenerationInfo: CandidateGenerationInfo)
extends Candidate {
def getSimilarityScore: Double =
candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0)
}
case class InitialCandidate(
tweetId: TweetId,
tweetInfo: TweetInfo,
candidateGenerationInfo: CandidateGenerationInfo)
extends Candidate {
/** *
* Get the Similarity Score of a Tweet from its CG Info. For instance,
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
*/
def getSimilarityScore: Double =
candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0)
/**
* The same candidate can be generated by multiple algorithms.
* During blending, candidate deduping happens. In order to retain the candidateGenerationInfo
* from different algorithms, we attach them to a list of potentialReasons.
*/
def toBlendedCandidate(
potentialReasons: Seq[CandidateGenerationInfo],
): BlendedCandidate = {
BlendedCandidate(
tweetId,
tweetInfo,
candidateGenerationInfo,
potentialReasons,
)
}
// for experimental purposes only when bypassing interleave / ranking
def toRankedCandidate(): RankedCandidate = {
RankedCandidate(
tweetId,
tweetInfo,
0.0, // prediction score is default to 0.0 to help differentiate that it is a no-op
candidateGenerationInfo,
Seq(candidateGenerationInfo)
)
}
}
case class InitialAdsCandidate(
tweetId: TweetId,
lineItemInfo: Seq[LineItemInfo],
candidateGenerationInfo: CandidateGenerationInfo)
extends Candidate {
/** *
* Get the Similarity Score of a Tweet from its CG Info. For instance,
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
*/
def getSimilarityScore: Double =
candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0)
/**
* The same candidate can be generated by multiple algorithms.
* During blending, candidate deduping happens. In order to retain the candidateGenerationInfo
* from different algorithms, we attach them to a list of potentialReasons.
*/
def toBlendedAdsCandidate(
potentialReasons: Seq[CandidateGenerationInfo],
): BlendedAdsCandidate = {
BlendedAdsCandidate(
tweetId,
lineItemInfo,
candidateGenerationInfo,
potentialReasons,
)
}
// for experimental purposes only when bypassing interleave / ranking
def toRankedAdsCandidate(): RankedAdsCandidate = {
RankedAdsCandidate(
tweetId,
lineItemInfo,
0.0, // prediction score is default to 0.0 to help differentiate that it is a no-op
candidateGenerationInfo,
Seq(candidateGenerationInfo)
)
}
}
case class BlendedCandidate(
tweetId: TweetId,
tweetInfo: TweetInfo,
reasonChosen: CandidateGenerationInfo,
potentialReasons: Seq[CandidateGenerationInfo])
extends Candidate {
/** *
* Get the Similarity Score of a Tweet from its CG Info. For instance,
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
*/
def getSimilarityScore: Double =
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
assert(potentialReasons.contains(reasonChosen))
def toRankedCandidate(predictionScore: Double): RankedCandidate = {
RankedCandidate(
tweetId,
tweetInfo,
predictionScore,
reasonChosen,
potentialReasons
)
}
}
case class BlendedAdsCandidate(
tweetId: TweetId,
lineItemInfo: Seq[LineItemInfo],
reasonChosen: CandidateGenerationInfo,
potentialReasons: Seq[CandidateGenerationInfo])
extends Candidate {
/** *
* Get the Similarity Score of a Tweet from its CG Info. For instance,
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
*/
def getSimilarityScore: Double =
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
assert(potentialReasons.contains(reasonChosen))
def toRankedAdsCandidate(predictionScore: Double): RankedAdsCandidate = {
RankedAdsCandidate(
tweetId,
lineItemInfo,
predictionScore,
reasonChosen,
potentialReasons
)
}
}
case class RankedCandidate(
tweetId: TweetId,
tweetInfo: TweetInfo,
predictionScore: Double,
reasonChosen: CandidateGenerationInfo,
potentialReasons: Seq[CandidateGenerationInfo])
extends Candidate {
/** *
* Get the Similarity Score of a Tweet from its CG Info. For instance,
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
*/
def getSimilarityScore: Double =
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
assert(potentialReasons.contains(reasonChosen))
}
case class RankedAdsCandidate(
tweetId: TweetId,
lineItemInfo: Seq[LineItemInfo],
predictionScore: Double,
reasonChosen: CandidateGenerationInfo,
potentialReasons: Seq[CandidateGenerationInfo])
extends Candidate {
/** *
* Get the Similarity Score of a Tweet from its CG Info. For instance,
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
*/
def getSimilarityScore: Double =
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
assert(potentialReasons.contains(reasonChosen))
}
case class TripTweetWithScore(tweetId: TweetId, score: Double) extends Candidate

View File

@ -1,67 +0,0 @@
package com.twitter.cr_mixer.model
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
import com.twitter.cr_mixer.thriftscala.SourceType
import com.twitter.simclusters_v2.common.UserId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.util.Time
/***
* Tweet-level attributes. Represents the source used in candidate generation
* Due to legacy reason, SourceType used to represent both SourceType and SimilarityEngineType
* Moving forward, SourceType will be used for SourceType ONLY. eg., TweetFavorite, UserFollow, TwiceUserId
* At the same time, We create a new SimilarityEngineType to separate them. eg., SimClustersANN
*
* Currently, one special case is that we have TwiceUserId as a source, which is not necessarily a "signal"
* @param sourceType, e.g., SourceType.TweetFavorite, SourceType.UserFollow, SourceType.TwiceUserId
* @param internalId, e.g., UserId(0L), TweetId(0L)
*/
case class SourceInfo(
sourceType: SourceType,
internalId: InternalId,
sourceEventTime: Option[Time])
/***
* Tweet-level attributes. Represents the source User Graph used in candidate generation
* It is an intermediate product, and will not be stored, unlike SourceInfo.
* Essentially, CrMixer queries a graph, and the graph returns a list of users to be used as sources.
* For instance, RealGraph, EarlyBird, FRS, Stp, etc. The underlying similarity engines such as
* UTG or UTEG will leverage these sources to build candidates.
*
* We extended the definition of SourceType to cover both "Source Signal" and "Source Graph"
* See [CrMixer] Graph Based Source Fetcher Abstraction Proposal:
*
* consider making both SourceInfo and GraphSourceInfo extends the same trait to
* have a unified interface.
*/
case class GraphSourceInfo(
sourceType: SourceType,
seedWithScores: Map[UserId, Double])
/***
* Tweet-level attributes. Represents the similarity engine (the algorithm) used for
* candidate generation along with their metadata.
* @param similarityEngineType, e.g., SimClustersANN, UserTweetGraph
* @param modelId. e.g., UserTweetGraphConsumerEmbedding_ALL_20210708
* @param score - a score generated by this sim engine
*/
case class SimilarityEngineInfo(
similarityEngineType: SimilarityEngineType,
modelId: Option[String], // ModelId can be a None. e.g., UTEG, UnifiedTweetBasedSE. etc
score: Option[Double])
/****
* Tweet-level attributes. A combination for both SourceInfo and SimilarityEngineInfo
* SimilarityEngine is a composition, and it can be composed by many leaf Similarity Engines.
* For instance, the TweetBasedUnified SE could be a composition of both UserTweetGraph SE, SimClustersANN SE.
* Note that a SimilarityEngine (Composite) may call other SimilarityEngines (Atomic, Contributing)
* to contribute to its final candidate list. We track these Contributing SEs in the contributingSimilarityEngines list
*
* @param sourceInfoOpt - this is optional as many consumerBased CG does not have a source
* @param similarityEngineInfo - the similarity engine used in Candidate Generation (eg., TweetBasedUnifiedSE). It can be an atomic SE or an composite SE
* @param contributingSimilarityEngines - only composite SE will have it (e.g., SANNN, UTG). Otherwise it is an empty Seq. All contributing SEs mst be atomic
*/
case class CandidateGenerationInfo(
sourceInfoOpt: Option[SourceInfo],
similarityEngineInfo: SimilarityEngineInfo,
contributingSimilarityEngines: Seq[SimilarityEngineInfo])

View File

@ -1,96 +0,0 @@
package com.twitter.cr_mixer.model
import com.twitter.core_workflows.user_model.thriftscala.UserState
import com.twitter.cr_mixer.thriftscala.Product
import com.twitter.product_mixer.core.thriftscala.ClientContext
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.simclusters_v2.thriftscala.TopicId
import com.twitter.timelines.configapi.Params
sealed trait CandidateGeneratorQuery {
val product: Product
val maxNumResults: Int
val impressedTweetList: Set[TweetId]
val params: Params
val requestUUID: Long
}
sealed trait HasUserId {
val userId: UserId
}
case class CrCandidateGeneratorQuery(
userId: UserId,
product: Product,
userState: UserState,
maxNumResults: Int,
impressedTweetList: Set[TweetId],
params: Params,
requestUUID: Long,
languageCode: Option[String] = None)
extends CandidateGeneratorQuery
with HasUserId
case class UtegTweetCandidateGeneratorQuery(
userId: UserId,
product: Product,
userState: UserState,
maxNumResults: Int,
impressedTweetList: Set[TweetId],
params: Params,
requestUUID: Long)
extends CandidateGeneratorQuery
with HasUserId
case class RelatedTweetCandidateGeneratorQuery(
internalId: InternalId,
clientContext: ClientContext, // To scribe LogIn/LogOut requests
product: Product,
maxNumResults: Int,
impressedTweetList: Set[TweetId],
params: Params,
requestUUID: Long)
extends CandidateGeneratorQuery
case class RelatedVideoTweetCandidateGeneratorQuery(
internalId: InternalId,
clientContext: ClientContext, // To scribe LogIn/LogOut requests
product: Product,
maxNumResults: Int,
impressedTweetList: Set[TweetId],
params: Params,
requestUUID: Long)
extends CandidateGeneratorQuery
case class FrsTweetCandidateGeneratorQuery(
userId: UserId,
product: Product,
maxNumResults: Int,
impressedUserList: Set[UserId],
impressedTweetList: Set[TweetId],
params: Params,
languageCodeOpt: Option[String] = None,
countryCodeOpt: Option[String] = None,
requestUUID: Long)
extends CandidateGeneratorQuery
case class AdsCandidateGeneratorQuery(
userId: UserId,
product: Product,
userState: UserState,
maxNumResults: Int,
params: Params,
requestUUID: Long)
case class TopicTweetCandidateGeneratorQuery(
userId: UserId,
topicIds: Set[TopicId],
product: Product,
maxNumResults: Int,
impressedTweetList: Set[TweetId],
params: Params,
requestUUID: Long,
isVideoOnly: Boolean)
extends CandidateGeneratorQuery

View File

@ -1,6 +0,0 @@
package com.twitter.cr_mixer.model
sealed trait EarlybirdSimilarityEngineType
object EarlybirdSimilarityEngineType_RecencyBased extends EarlybirdSimilarityEngineType
object EarlybirdSimilarityEngineType_ModelBased extends EarlybirdSimilarityEngineType
object EarlybirdSimilarityEngineType_TensorflowBased extends EarlybirdSimilarityEngineType

View File

@ -1,11 +0,0 @@
package com.twitter.cr_mixer.model
object HealthThreshold {
object Enum extends Enumeration {
val Off: Value = Value(1)
val Moderate: Value = Value(2)
val Strict: Value = Value(3)
val Stricter: Value = Value(4)
val StricterPlus: Value = Value(5)
}
}

View File

@ -1,77 +0,0 @@
package com.twitter.cr_mixer.model
/**
* A Configuration class for all Model Based Candidate Sources.
*
* The Model Name Guideline. Please your modelId as "Algorithm_Product_Date"
* If your model is used for multiple product surfaces, name it as all
* Don't name your algorithm as MBCG. All the algorithms here are MBCG =.=
*
* Don't forgot to add your new models into allHnswANNSimilarityEngineModelIds list.
*/
object ModelConfig {
// Offline SimClusters CG Experiment related Model Ids
val OfflineInterestedInFromKnownFor2020: String = "OfflineIIKF_ALL_20220414"
val OfflineInterestedInFromKnownFor2020Hl0El15: String = "OfflineIIKF_ALL_20220414_Hl0_El15"
val OfflineInterestedInFromKnownFor2020Hl2El15: String = "OfflineIIKF_ALL_20220414_Hl2_El15"
val OfflineInterestedInFromKnownFor2020Hl2El50: String = "OfflineIIKF_ALL_20220414_Hl2_El50"
val OfflineInterestedInFromKnownFor2020Hl8El50: String = "OfflineIIKF_ALL_20220414_Hl8_El50"
val OfflineMTSConsumerEmbeddingsFav90P20M: String =
"OfflineMTSConsumerEmbeddingsFav90P20M_ALL_20220414"
// Twhin Model Ids
val ConsumerBasedTwHINRegularUpdateAll20221024: String =
"ConsumerBasedTwHINRegularUpdate_All_20221024"
// Averaged Twhin Model Ids
val TweetBasedTwHINRegularUpdateAll20221024: String =
"TweetBasedTwHINRegularUpdate_All_20221024"
// Collaborative Filtering Twhin Model Ids
val TwhinCollabFilterForFollow: String =
"TwhinCollabFilterForFollow"
val TwhinCollabFilterForEngagement: String =
"TwhinCollabFilterForEngagement"
val TwhinMultiClusterForFollow: String =
"TwhinMultiClusterForFollow"
val TwhinMultiClusterForEngagement: String =
"TwhinMultiClusterForEngagement"
// Two Tower model Ids
val TwoTowerFavALL20220808: String =
"TwoTowerFav_ALL_20220808"
// Debugger Demo-Only Model Ids
val DebuggerDemo: String = "DebuggerDemo"
// ColdStartLookalike - this is not really a model name, it is as a placeholder to
// indicate ColdStartLookalike candidate source, which is currently being pluged into
// CustomizedRetrievalCandidateGeneration temporarily.
val ColdStartLookalikeModelName: String = "ConsumersBasedUtgColdStartLookalike20220707"
// consumersBasedUTG-RealGraphOon Model Id
val ConsumersBasedUtgRealGraphOon20220705: String = "ConsumersBasedUtgRealGraphOon_All_20220705"
// consumersBasedUAG-RealGraphOon Model Id
val ConsumersBasedUagRealGraphOon20221205: String = "ConsumersBasedUagRealGraphOon_All_20221205"
// FTR
val OfflineFavDecayedSum: String = "OfflineFavDecayedSum"
val OfflineFtrAt5Pop1000RnkDcy11: String = "OfflineFtrAt5Pop1000RnkDcy11"
val OfflineFtrAt5Pop10000RnkDcy11: String = "OfflineFtrAt5Pop10000RnkDcy11"
// All Model Ids of HnswANNSimilarityEngines
val allHnswANNSimilarityEngineModelIds = Seq(
ConsumerBasedTwHINRegularUpdateAll20221024,
TwoTowerFavALL20220808,
DebuggerDemo
)
val ConsumerLogFavBasedInterestedInEmbedding: String =
"ConsumerLogFavBasedInterestedIn_ALL_20221228"
val ConsumerFollowBasedInterestedInEmbedding: String =
"ConsumerFollowBasedInterestedIn_ALL_20221228"
val RetweetBasedDiffusion: String =
"RetweetBasedDiffusion"
}

View File

@ -1,122 +0,0 @@
package com.twitter.cr_mixer.model
/**
* Define name annotated module names here
*/
object ModuleNames {
final val FrsStore = "FrsStore"
final val UssStore = "UssStore"
final val UssStratoColumn = "UssStratoColumn"
final val RsxStore = "RsxStore"
final val RmsTweetLogFavLongestL2EmbeddingStore = "RmsTweetLogFavLongestL2EmbeddingStore"
final val RmsUserFavBasedProducerEmbeddingStore = "RmsUserFavBasedProducerEmbeddingStore"
final val RmsUserLogFavInterestedInEmbeddingStore = "RmsUserLogFavInterestedInEmbeddingStore"
final val RmsUserFollowInterestedInEmbeddingStore = "RmsUserFollowInterestedInEmbeddingStore"
final val StpStore = "StpStore"
final val TwiceClustersMembersStore = "TwiceClustersMembersStore"
final val TripCandidateStore = "TripCandidateStore"
final val ConsumerEmbeddingBasedTripSimilarityEngine =
"ConsumerEmbeddingBasedTripSimilarityEngine"
final val ConsumerEmbeddingBasedTwHINANNSimilarityEngine =
"ConsumerEmbeddingBasedTwHINANNSimilarityEngine"
final val ConsumerEmbeddingBasedTwoTowerANNSimilarityEngine =
"ConsumerEmbeddingBasedTwoTowerANNSimilarityEngine"
final val ConsumersBasedUserAdGraphSimilarityEngine =
"ConsumersBasedUserAdGraphSimilarityEngine"
final val ConsumersBasedUserVideoGraphSimilarityEngine =
"ConsumersBasedUserVideoGraphSimilarityEngine"
final val ConsumerBasedWalsSimilarityEngine = "ConsumerBasedWalsSimilarityEngine"
final val TweetBasedTwHINANNSimilarityEngine = "TweetBasedTwHINANNSimilarityEngine"
final val SimClustersANNSimilarityEngine = "SimClustersANNSimilarityEngine"
final val ProdSimClustersANNServiceClientName = "ProdSimClustersANNServiceClient"
final val ExperimentalSimClustersANNServiceClientName = "ExperimentalSimClustersANNServiceClient"
final val SimClustersANNServiceClientName1 = "SimClustersANNServiceClient1"
final val SimClustersANNServiceClientName2 = "SimClustersANNServiceClient2"
final val SimClustersANNServiceClientName3 = "SimClustersANNServiceClient3"
final val SimClustersANNServiceClientName5 = "SimClustersANNServiceClient5"
final val SimClustersANNServiceClientName4 = "SimClustersANNServiceClient4"
final val UnifiedCache = "unifiedCache"
final val MLScoreCache = "mlScoreCache"
final val TweetRecommendationResultsCache = "tweetRecommendationResultsCache"
final val EarlybirdTweetsCache = "earlybirdTweetsCache"
final val EarlybirdRecencyBasedWithoutRetweetsRepliesTweetsCache =
"earlybirdTweetsWithoutRetweetsRepliesCacheStore"
final val EarlybirdRecencyBasedWithRetweetsRepliesTweetsCache =
"earlybirdTweetsWithRetweetsRepliesCacheStore"
final val AbDeciderLogger = "abDeciderLogger"
final val TopLevelApiDdgMetricsLogger = "topLevelApiDdgMetricsLogger"
final val TweetRecsLogger = "tweetRecsLogger"
final val BlueVerifiedTweetRecsLogger = "blueVerifiedTweetRecsLogger"
final val RelatedTweetsLogger = "relatedTweetsLogger"
final val UtegTweetsLogger = "utegTweetsLogger"
final val AdsRecommendationsLogger = "adsRecommendationLogger"
final val OfflineSimClustersANNInterestedInSimilarityEngine =
"OfflineSimClustersANNInterestedInSimilarityEngine"
final val RealGraphOonStore = "RealGraphOonStore"
final val RealGraphInStore = "RealGraphInStore"
final val OfflineTweet2020CandidateStore = "OfflineTweet2020CandidateStore"
final val OfflineTweet2020Hl0El15CandidateStore = "OfflineTweet2020Hl0El15CandidateStore"
final val OfflineTweet2020Hl2El15CandidateStore = "OfflineTweet2020Hl2El15CandidateStore"
final val OfflineTweet2020Hl2El50CandidateStore = "OfflineTweet2020Hl2El50CandidateStore"
final val OfflineTweet2020Hl8El50CandidateStore = "OfflineTweet2020Hl8El50CandidateStore"
final val OfflineTweetMTSCandidateStore = "OfflineTweetMTSCandidateStore"
final val OfflineFavDecayedSumCandidateStore = "OfflineFavDecayedSumCandidateStore"
final val OfflineFtrAt5Pop1000RankDecay11CandidateStore =
"OfflineFtrAt5Pop1000RankDecay11CandidateStore"
final val OfflineFtrAt5Pop10000RankDecay11CandidateStore =
"OfflineFtrAt5Pop10000RankDecay11CandidateStore"
final val TwhinCollabFilterStratoStoreForFollow = "TwhinCollabFilterStratoStoreForFollow"
final val TwhinCollabFilterStratoStoreForEngagement = "TwhinCollabFilterStratoStoreForEngagement"
final val TwhinMultiClusterStratoStoreForFollow = "TwhinMultiClusterStratoStoreForFollow"
final val TwhinMultiClusterStratoStoreForEngagement = "TwhinMultiClusterStratoStoreForEngagement"
final val ProducerBasedUserAdGraphSimilarityEngine =
"ProducerBasedUserAdGraphSimilarityEngine"
final val ProducerBasedUserTweetGraphSimilarityEngine =
"ProducerBasedUserTweetGraphSimilarityEngine"
final val ProducerBasedUnifiedSimilarityEngine = "ProducerBasedUnifiedSimilarityEngine"
final val TweetBasedUserAdGraphSimilarityEngine = "TweetBasedUserAdGraphSimilarityEngine"
final val TweetBasedUserTweetGraphSimilarityEngine = "TweetBasedUserTweetGraphSimilarityEngine"
final val TweetBasedUserVideoGraphSimilarityEngine = "TweetBasedUserVideoGraphSimilarityEngine"
final val TweetBasedQigSimilarityEngine = "TweetBasedQigSimilarityEngine"
final val TweetBasedUnifiedSimilarityEngine = "TweetBasedUnifiedSimilarityEngine"
final val TwhinCollabFilterSimilarityEngine = "TwhinCollabFilterSimilarityEngine"
final val ConsumerBasedUserTweetGraphStore = "ConsumerBasedUserTweetGraphStore"
final val ConsumerBasedUserVideoGraphStore = "ConsumerBasedUserVideoGraphStore"
final val ConsumerBasedUserAdGraphStore = "ConsumerBasedUserAdGraphStore"
final val UserTweetEntityGraphSimilarityEngine =
"UserTweetEntityGraphSimilarityEngine"
final val CertoTopicTweetSimilarityEngine = "CertoTopicTweetSimilarityEngine"
final val CertoStratoStoreName = "CertoStratoStore"
final val SkitTopicTweetSimilarityEngine = "SkitTopicTweetSimilarityEngine"
final val SkitHighPrecisionTopicTweetSimilarityEngine =
"SkitHighPrecisionTopicTweetSimilarityEngine"
final val SkitStratoStoreName = "SkitStratoStore"
final val HomeNaviGRPCClient = "HomeNaviGRPCClient"
final val AdsFavedNaviGRPCClient = "AdsFavedNaviGRPCClient"
final val AdsMonetizableNaviGRPCClient = "AdsMonetizableNaviGRPCClient"
final val RetweetBasedDiffusionRecsMhStore = "RetweetBasedDiffusionRecsMhStore"
final val DiffusionBasedSimilarityEngine = "DiffusionBasedSimilarityEngine"
final val BlueVerifiedAnnotationStore = "BlueVerifiedAnnotationStore"
}

View File

@ -1,13 +0,0 @@
package com.twitter.cr_mixer.model
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
import com.twitter.simclusters_v2.common.TweetId
/***
* Bind a tweetId with a raw score generated from one single Similarity Engine
* @param similarityEngineType, which underlying topic source the topic tweet is from
*/
case class TopicTweetWithScore(
tweetId: TweetId,
score: Double,
similarityEngineType: SimilarityEngineType)

View File

@ -1,6 +0,0 @@
package com.twitter.cr_mixer.model
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
case class TweetWithAuthor(tweetId: TweetId, authorId: UserId)

View File

@ -1,8 +0,0 @@
package com.twitter.cr_mixer.model
import com.twitter.simclusters_v2.common.TweetId
/***
* Bind a tweetId with a raw score generated from one single Similarity Engine
*/
case class TweetWithScore(tweetId: TweetId, score: Double)

View File

@ -1,12 +0,0 @@
package com.twitter.cr_mixer.model
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.recos.recos_common.thriftscala.SocialProofType
/***
* Bind a tweetId with a raw score and social proofs by type
*/
case class TweetWithScoreAndSocialProof(
tweetId: TweetId,
score: Double,
socialProofByType: Map[SocialProofType, Seq[Long]])

View File

@ -1,135 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.bijection.thrift.CompactThriftCodec
import com.twitter.ads.entities.db.thriftscala.LineItemObjective
import com.twitter.bijection.Injection
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.thriftscala.LineItemInfo
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.hermit.store.common.ObservedCachedReadableStore
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.ml.api.DataRecord
import com.twitter.ml.api.DataType
import com.twitter.ml.api.Feature
import com.twitter.ml.api.GeneralTensor
import com.twitter.ml.api.RichDataRecord
import com.twitter.relevance_platform.common.injection.LZ4Injection
import com.twitter.relevance_platform.common.injection.SeqObjectInjection
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import com.twitter.storehaus_internal.manhattan.ManhattanRO
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
import com.twitter.storehaus_internal.manhattan.Revenue
import com.twitter.storehaus_internal.util.ApplicationID
import com.twitter.storehaus_internal.util.DatasetName
import com.twitter.storehaus_internal.util.HDFSPath
import com.twitter.util.Future
import javax.inject.Named
import scala.collection.JavaConverters._
object ActivePromotedTweetStoreModule extends TwitterModule {
case class ActivePromotedTweetStore(
activePromotedTweetMHStore: ReadableStore[String, DataRecord],
statsReceiver: StatsReceiver)
extends ReadableStore[TweetId, Seq[LineItemInfo]] {
override def get(tweetId: TweetId): Future[Option[Seq[LineItemInfo]]] = {
activePromotedTweetMHStore.get(tweetId.toString).map {
_.map { dataRecord =>
val richDataRecord = new RichDataRecord(dataRecord)
val lineItemIdsFeature: Feature[GeneralTensor] =
new Feature.Tensor("active_promoted_tweets.line_item_ids", DataType.INT64)
val lineItemObjectivesFeature: Feature[GeneralTensor] =
new Feature.Tensor("active_promoted_tweets.line_item_objectives", DataType.INT64)
val lineItemIdsTensor: GeneralTensor = richDataRecord.getFeatureValue(lineItemIdsFeature)
val lineItemObjectivesTensor: GeneralTensor =
richDataRecord.getFeatureValue(lineItemObjectivesFeature)
val lineItemIds: Seq[Long] =
if (lineItemIdsTensor.getSetField == GeneralTensor._Fields.INT64_TENSOR && lineItemIdsTensor.getInt64Tensor.isSetLongs) {
lineItemIdsTensor.getInt64Tensor.getLongs.asScala.map(_.toLong)
} else Seq.empty
val lineItemObjectives: Seq[LineItemObjective] =
if (lineItemObjectivesTensor.getSetField == GeneralTensor._Fields.INT64_TENSOR && lineItemObjectivesTensor.getInt64Tensor.isSetLongs) {
lineItemObjectivesTensor.getInt64Tensor.getLongs.asScala.map(objective =>
LineItemObjective(objective.toInt))
} else Seq.empty
val lineItemInfo =
if (lineItemIds.size == lineItemObjectives.size) {
lineItemIds.zipWithIndex.map {
case (lineItemId, index) =>
LineItemInfo(
lineItemId = lineItemId,
lineItemObjective = lineItemObjectives(index)
)
}
} else Seq.empty
lineItemInfo
}
}
}
}
@Provides
@Singleton
def providesActivePromotedTweetStore(
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
crMixerStatsReceiver: StatsReceiver
): ReadableStore[TweetId, Seq[LineItemInfo]] = {
val mhConfig = new ManhattanROConfig {
val hdfsPath = HDFSPath("")
val applicationID = ApplicationID("ads_bigquery_features")
val datasetName = DatasetName("active_promoted_tweets")
val cluster = Revenue
override def statsReceiver: StatsReceiver =
crMixerStatsReceiver.scope("active_promoted_tweets_mh")
}
val mhStore: ReadableStore[String, DataRecord] =
ManhattanRO
.getReadableStoreWithMtls[String, DataRecord](
mhConfig,
manhattanKVClientMtlsParams
)(
implicitly[Injection[String, Array[Byte]]],
CompactThriftCodec[DataRecord]
)
val underlyingStore =
ActivePromotedTweetStore(mhStore, crMixerStatsReceiver.scope("ActivePromotedTweetStore"))
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlyingStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 60.minutes,
asyncUpdate = false
)(
valueInjection = LZ4Injection.compose(SeqObjectInjection[LineItemInfo]()),
statsReceiver = crMixerStatsReceiver.scope("memCachedActivePromotedTweetStore"),
keyToString = { k: TweetId => s"apt/$k" }
)
ObservedCachedReadableStore.from(
memcachedStore,
ttl = 30.minutes,
maxKeys = 250000, // size of promoted tweet is around 200,000
windowSize = 10000L,
cacheName = "active_promoted_tweet_cache",
maxMultiGetSize = 20
)(crMixerStatsReceiver.scope("inMemoryCachedActivePromotedTweetStore"))
}
}

View File

@ -1,130 +0,0 @@
scala_library(
sources = [
"*.scala",
"core/*.scala",
"grpc_client/*.scala",
"similarity_engine/*.scala",
"source_signal/*.scala",
"thrift_client/*.scala",
],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/twitter/bijection:core",
"3rdparty/jvm/com/twitter/bijection:scrooge",
"3rdparty/jvm/com/twitter/storehaus:core",
"3rdparty/jvm/com/twitter/storehaus:memcache",
"3rdparty/jvm/io/grpc:grpc-api",
"3rdparty/jvm/io/grpc:grpc-auth",
"3rdparty/jvm/io/grpc:grpc-core",
"3rdparty/jvm/io/grpc:grpc-netty",
"3rdparty/jvm/io/grpc:grpc-protobuf",
"3rdparty/jvm/io/grpc:grpc-stub",
"3rdparty/jvm/javax/inject:javax.inject",
"3rdparty/jvm/org/scalanlp:breeze",
"3rdparty/src/jvm/com/twitter/storehaus:core",
"abdecider/src/main/scala",
"ann/src/main/thrift/com/twitter/ann/common:ann-common-scala",
"configapi/configapi-abdecider",
"configapi/configapi-core",
"configapi/configapi-featureswitches:v2",
"content-recommender/server/src/main/scala/com/twitter/contentrecommender:cr-mixer-deps",
"content-recommender/thrift/src/main/thrift:thrift-scala",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
"cr-mixer/thrift/src/main/thrift:thrift-scala",
"decider/src/main/scala",
"discovery-common/src/main/scala/com/twitter/discovery/common/configapi",
"featureswitches/featureswitches-core",
"featureswitches/featureswitches-core/src/main/scala/com/twitter/featureswitches/v2/builder",
"finagle-internal/finagle-grpc/src/main/scala",
"finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication",
"finatra-internal/kafka/src/main/scala/com/twitter/finatra/kafka/consumers",
"finatra-internal/mtls-thriftmux/src/main/scala",
"finatra/inject/inject-core/src/main/scala",
"finatra/inject/inject-modules/src/main/scala",
"finatra/inject/inject-thrift-client",
"follow-recommendations-service/thrift/src/main/thrift:thrift-scala",
"frigate/frigate-common:util",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/candidate",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/health",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/interests",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato",
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
"hydra/partition/thrift/src/main/thrift:thrift-scala",
"hydra/root/thrift/src/main/thrift:thrift-scala",
"mediaservices/commons/src/main/scala:futuretracker",
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
"qig-ranker/thrift/src/main/thrift:thrift-scala",
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/health_store",
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/injection",
"relevance-platform/thrift/src/main/thrift:thrift-scala",
"representation-manager/client/src/main/scala/com/twitter/representation_manager",
"representation-manager/client/src/main/scala/com/twitter/representation_manager/config",
"representation-manager/server/src/main/scala/com/twitter/representation_manager/migration",
"representation-manager/server/src/main/thrift:thrift-scala",
"representation-scorer/server/src/main/thrift:thrift-scala",
"servo/decider",
"servo/util/src/main/scala",
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
"snowflake/src/main/scala/com/twitter/snowflake/id",
"src/java/com/twitter/ml/api:api-base",
"src/java/com/twitter/search/queryparser/query:core-query-nodes",
"src/java/com/twitter/search/queryparser/query/search:search-query-nodes",
"src/scala/com/twitter/algebird_internal/injection",
"src/scala/com/twitter/cortex/ml/embeddings/common:Helpers",
"src/scala/com/twitter/ml/api/embedding",
"src/scala/com/twitter/ml/featurestore/lib",
"src/scala/com/twitter/scalding_internal/multiformat/format",
"src/scala/com/twitter/simclusters_v2/candidate_source",
"src/scala/com/twitter/simclusters_v2/common",
"src/scala/com/twitter/storehaus_internal/manhattan",
"src/scala/com/twitter/storehaus_internal/manhattan/config",
"src/scala/com/twitter/storehaus_internal/memcache",
"src/scala/com/twitter/storehaus_internal/memcache/config",
"src/scala/com/twitter/storehaus_internal/offline",
"src/scala/com/twitter/storehaus_internal/util",
"src/scala/com/twitter/topic_recos/stores",
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
"src/thrift/com/twitter/frigate:frigate-common-thrift-scala",
"src/thrift/com/twitter/frigate:frigate-thrift-scala",
"src/thrift/com/twitter/frigate/data_pipeline/scalding:blue_verified_annotations-scala",
"src/thrift/com/twitter/hermit/stp:hermit-stp-scala",
"src/thrift/com/twitter/ml/api:data-java",
"src/thrift/com/twitter/ml/api:embedding-scala",
"src/thrift/com/twitter/ml/featurestore:ml-feature-store-embedding-scala",
"src/thrift/com/twitter/onboarding/relevance/coldstart_lookalike:coldstartlookalike-thrift-scala",
"src/thrift/com/twitter/recos:recos-common-scala",
"src/thrift/com/twitter/recos/user_ad_graph:user_ad_graph-scala",
"src/thrift/com/twitter/recos/user_tweet_entity_graph:user_tweet_entity_graph-scala",
"src/thrift/com/twitter/recos/user_tweet_graph:user_tweet_graph-scala",
"src/thrift/com/twitter/recos/user_tweet_graph_plus:user_tweet_graph_plus-scala",
"src/thrift/com/twitter/recos/user_video_graph:user_video_graph-scala",
"src/thrift/com/twitter/search:earlybird-scala",
"src/thrift/com/twitter/search/query_interaction_graph/service:qig-service-scala",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
"src/thrift/com/twitter/topic_recos:topic_recos-thrift-scala",
"src/thrift/com/twitter/trends/trip_v1:trip-tweets-thrift-scala",
"src/thrift/com/twitter/tweetypie:service-scala",
"src/thrift/com/twitter/twistly:twistly-scala",
"src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala",
"stitch/stitch-storehaus",
"stitch/stitch-tweetypie/src/main/scala",
"strato/src/main/scala/com/twitter/strato/client",
"user-signal-service/thrift/src/main/thrift:thrift-scala",
"util-internal/scribe/src/main/scala/com/twitter/logging",
"util/util-hashing",
],
)

View File

@ -1,52 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.google.inject.name.Named
import com.twitter.inject.TwitterModule
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.data_pipeline.scalding.thriftscala.BlueVerifiedAnnotationsV2
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import com.twitter.storehaus_internal.manhattan.Athena
import com.twitter.storehaus_internal.manhattan.ManhattanRO
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
import com.twitter.storehaus_internal.util.ApplicationID
import com.twitter.storehaus_internal.util.DatasetName
import com.twitter.storehaus_internal.util.HDFSPath
import com.twitter.bijection.scrooge.BinaryScalaCodec
import com.twitter.hermit.store.common.ObservedCachedReadableStore
object BlueVerifiedAnnotationStoreModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.BlueVerifiedAnnotationStore)
def providesBlueVerifiedAnnotationStore(
statsReceiver: StatsReceiver,
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
): ReadableStore[String, BlueVerifiedAnnotationsV2] = {
implicit val valueCodec = new BinaryScalaCodec(BlueVerifiedAnnotationsV2)
val underlyingStore = ManhattanRO
.getReadableStoreWithMtls[String, BlueVerifiedAnnotationsV2](
ManhattanROConfig(
HDFSPath(""),
ApplicationID("content_recommender_athena"),
DatasetName("blue_verified_annotations"),
Athena),
manhattanKVClientMtlsParams
)
ObservedCachedReadableStore.from(
underlyingStore,
ttl = 24.hours,
maxKeys = 100000,
windowSize = 10000L,
cacheName = "blue_verified_annotation_cache"
)(statsReceiver.scope("inMemoryCachedBlueVerifiedAnnotationStore"))
}
}

View File

@ -1,57 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.google.inject.name.Named
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.keyHasher
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.hermit.store.common.ObservedCachedReadableStore
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.relevance_platform.common.injection.LZ4Injection
import com.twitter.relevance_platform.common.injection.SeqObjectInjection
import com.twitter.simclusters_v2.thriftscala.TopicId
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.Client
import com.twitter.topic_recos.stores.CertoTopicTopKTweetsStore
import com.twitter.topic_recos.thriftscala.TweetWithScores
object CertoStratoStoreModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.CertoStratoStoreName)
def providesCertoStratoStore(
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
stratoClient: Client,
statsReceiver: StatsReceiver
): ReadableStore[TopicId, Seq[TweetWithScores]] = {
val certoStore = ObservedReadableStore(CertoTopicTopKTweetsStore.prodStore(stratoClient))(
statsReceiver.scope(ModuleNames.CertoStratoStoreName)).mapValues { topKTweetsWithScores =>
topKTweetsWithScores.topTweetsByFollowerL2NormalizedCosineSimilarityScore
}
val memCachedStore = ObservedMemcachedReadableStore
.fromCacheClient(
backingStore = certoStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 10.minutes
)(
valueInjection = LZ4Injection.compose(SeqObjectInjection[TweetWithScores]()),
statsReceiver = statsReceiver.scope("memcached_certo_store"),
keyToString = { k => s"certo:${keyHasher.hashKey(k.toString.getBytes)}" }
)
ObservedCachedReadableStore.from[TopicId, Seq[TweetWithScores]](
memCachedStore,
ttl = 5.minutes,
maxKeys = 100000, // ~150MB max
cacheName = "certo_in_memory_cache",
windowSize = 10000L
)(statsReceiver.scope("certo_in_memory_cache"))
}
}

View File

@ -1,30 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.inject.TwitterModule
import com.twitter.recos.user_ad_graph.thriftscala.ConsumersBasedRelatedAdRequest
import com.twitter.recos.user_ad_graph.thriftscala.RelatedAdResponse
import com.twitter.recos.user_ad_graph.thriftscala.UserAdGraph
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Future
import javax.inject.Named
import javax.inject.Singleton
object ConsumersBasedUserAdGraphStoreModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.ConsumerBasedUserAdGraphStore)
def providesConsumerBasedUserAdGraphStore(
userAdGraphService: UserAdGraph.MethodPerEndpoint
): ReadableStore[ConsumersBasedRelatedAdRequest, RelatedAdResponse] = {
new ReadableStore[ConsumersBasedRelatedAdRequest, RelatedAdResponse] {
override def get(
k: ConsumersBasedRelatedAdRequest
): Future[Option[RelatedAdResponse]] = {
userAdGraphService.consumersBasedRelatedAds(k).map(Some(_))
}
}
}
}

View File

@ -1,30 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.inject.TwitterModule
import com.twitter.recos.user_tweet_graph.thriftscala.ConsumersBasedRelatedTweetRequest
import com.twitter.recos.user_tweet_graph.thriftscala.RelatedTweetResponse
import com.twitter.recos.user_tweet_graph.thriftscala.UserTweetGraph
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Future
import javax.inject.Named
import javax.inject.Singleton
object ConsumersBasedUserTweetGraphStoreModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.ConsumerBasedUserTweetGraphStore)
def providesConsumerBasedUserTweetGraphStore(
userTweetGraphService: UserTweetGraph.MethodPerEndpoint
): ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] = {
new ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] {
override def get(
k: ConsumersBasedRelatedTweetRequest
): Future[Option[RelatedTweetResponse]] = {
userTweetGraphService.consumersBasedRelatedTweets(k).map(Some(_))
}
}
}
}

View File

@ -1,30 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.inject.TwitterModule
import com.twitter.recos.user_video_graph.thriftscala.ConsumersBasedRelatedTweetRequest
import com.twitter.recos.user_video_graph.thriftscala.RelatedTweetResponse
import com.twitter.recos.user_video_graph.thriftscala.UserVideoGraph
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Future
import javax.inject.Named
import javax.inject.Singleton
object ConsumersBasedUserVideoGraphStoreModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.ConsumerBasedUserVideoGraphStore)
def providesConsumerBasedUserVideoGraphStore(
userVideoGraphService: UserVideoGraph.MethodPerEndpoint
): ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] = {
new ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] {
override def get(
k: ConsumersBasedRelatedTweetRequest
): Future[Option[RelatedTweetResponse]] = {
userVideoGraphService.consumersBasedRelatedTweets(k).map(Some(_))
}
}
}
}

View File

@ -1,16 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.timelines.configapi.Config
import com.twitter.cr_mixer.param.CrMixerParamConfig
import com.twitter.inject.TwitterModule
import javax.inject.Singleton
object CrMixerParamConfigModule extends TwitterModule {
@Provides
@Singleton
def provideConfig(): Config = {
CrMixerParamConfig.config
}
}

View File

@ -1,54 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.bijection.Injection
import com.twitter.bijection.scrooge.BinaryScalaCodec
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.thriftscala.TweetsWithScore
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import com.twitter.storehaus_internal.manhattan.Apollo
import com.twitter.storehaus_internal.manhattan.ManhattanRO
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
import com.twitter.storehaus_internal.util.ApplicationID
import com.twitter.storehaus_internal.util.DatasetName
import com.twitter.storehaus_internal.util.HDFSPath
import javax.inject.Named
import javax.inject.Singleton
object DiffusionStoreModule extends TwitterModule {
type UserId = Long
implicit val longCodec = implicitly[Injection[Long, Array[Byte]]]
implicit val tweetRecsInjection: Injection[TweetsWithScore, Array[Byte]] =
BinaryScalaCodec(TweetsWithScore)
@Provides
@Singleton
@Named(ModuleNames.RetweetBasedDiffusionRecsMhStore)
def retweetBasedDiffusionRecsMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[Long, TweetsWithScore] = {
val manhattanROConfig = ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("cr_mixer_apollo"),
DatasetName("diffusion_retweet_tweet_recs"),
Apollo
)
buildTweetRecsStore(serviceIdentifier, manhattanROConfig)
}
private def buildTweetRecsStore(
serviceIdentifier: ServiceIdentifier,
manhattanROConfig: ManhattanROConfig
): ReadableStore[Long, TweetsWithScore] = {
ManhattanRO
.getReadableStoreWithMtls[Long, TweetsWithScore](
manhattanROConfig,
ManhattanKVClientMtlsParams(serviceIdentifier)
)(longCodec, tweetRecsInjection)
}
}

View File

@ -1,189 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.cr_mixer.config.TimeoutConfig
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.EarlybirdClientId
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.FacetsToFetch
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.GetCollectorTerminationParams
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.GetEarlybirdQuery
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.MetadataOptions
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.util.SeqLongInjection
import com.twitter.hashing.KeyHasher
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.search.common.query.thriftjava.thriftscala.CollectorParams
import com.twitter.search.earlybird.thriftscala.EarlybirdRequest
import com.twitter.search.earlybird.thriftscala.EarlybirdResponseCode
import com.twitter.search.earlybird.thriftscala.EarlybirdService
import com.twitter.search.earlybird.thriftscala.ThriftSearchQuery
import com.twitter.search.earlybird.thriftscala.ThriftSearchRankingMode
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Duration
import com.twitter.util.Future
import javax.inject.Named
object EarlybirdRecencyBasedCandidateStoreModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.EarlybirdRecencyBasedWithoutRetweetsRepliesTweetsCache)
def providesEarlybirdRecencyBasedWithoutRetweetsRepliesCandidateStore(
statsReceiver: StatsReceiver,
earlybirdSearchClient: EarlybirdService.MethodPerEndpoint,
@Named(ModuleNames.EarlybirdTweetsCache) earlybirdRecencyBasedTweetsCache: MemcachedClient,
timeoutConfig: TimeoutConfig
): ReadableStore[UserId, Seq[TweetId]] = {
val stats = statsReceiver.scope("EarlybirdRecencyBasedWithoutRetweetsRepliesCandidateStore")
val underlyingStore = new ReadableStore[UserId, Seq[TweetId]] {
override def get(userId: UserId): Future[Option[Seq[TweetId]]] = {
// Home based EB filters out retweets and replies
val earlybirdRequest =
buildEarlybirdRequest(
userId,
FilterOutRetweetsAndReplies,
DefaultMaxNumTweetPerUser,
timeoutConfig.earlybirdServerTimeout)
getEarlybirdSearchResult(earlybirdSearchClient, earlybirdRequest, stats)
}
}
ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlyingStore,
cacheClient = earlybirdRecencyBasedTweetsCache,
ttl = MemcacheKeyTimeToLiveDuration,
asyncUpdate = true
)(
valueInjection = SeqLongInjection,
statsReceiver = statsReceiver.scope("earlybird_recency_based_tweets_home_memcache"),
keyToString = { k =>
f"uEBRBHM:${keyHasher.hashKey(k.toString.getBytes)}%X" // prefix = EarlyBirdRecencyBasedHoMe
}
)
}
@Provides
@Singleton
@Named(ModuleNames.EarlybirdRecencyBasedWithRetweetsRepliesTweetsCache)
def providesEarlybirdRecencyBasedWithRetweetsRepliesCandidateStore(
statsReceiver: StatsReceiver,
earlybirdSearchClient: EarlybirdService.MethodPerEndpoint,
@Named(ModuleNames.EarlybirdTweetsCache) earlybirdRecencyBasedTweetsCache: MemcachedClient,
timeoutConfig: TimeoutConfig
): ReadableStore[UserId, Seq[TweetId]] = {
val stats = statsReceiver.scope("EarlybirdRecencyBasedWithRetweetsRepliesCandidateStore")
val underlyingStore = new ReadableStore[UserId, Seq[TweetId]] {
override def get(userId: UserId): Future[Option[Seq[TweetId]]] = {
val earlybirdRequest = buildEarlybirdRequest(
userId,
// Notifications based EB keeps retweets and replies
NotFilterOutRetweetsAndReplies,
DefaultMaxNumTweetPerUser,
processingTimeout = timeoutConfig.earlybirdServerTimeout
)
getEarlybirdSearchResult(earlybirdSearchClient, earlybirdRequest, stats)
}
}
ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlyingStore,
cacheClient = earlybirdRecencyBasedTweetsCache,
ttl = MemcacheKeyTimeToLiveDuration,
asyncUpdate = true
)(
valueInjection = SeqLongInjection,
statsReceiver = statsReceiver.scope("earlybird_recency_based_tweets_notifications_memcache"),
keyToString = { k =>
f"uEBRBN:${keyHasher.hashKey(k.toString.getBytes)}%X" // prefix = EarlyBirdRecencyBasedNotifications
}
)
}
private val keyHasher: KeyHasher = KeyHasher.FNV1A_64
/**
* Note the DefaultMaxNumTweetPerUser is used to adjust the result size per cache entry.
* If the value changes, it will increase the size of the memcache.
*/
private val DefaultMaxNumTweetPerUser: Int = 100
private val FilterOutRetweetsAndReplies = true
private val NotFilterOutRetweetsAndReplies = false
private val MemcacheKeyTimeToLiveDuration: Duration = Duration.fromMinutes(15)
private def buildEarlybirdRequest(
seedUserId: UserId,
filterOutRetweetsAndReplies: Boolean,
maxNumTweetsPerSeedUser: Int,
processingTimeout: Duration
): EarlybirdRequest =
EarlybirdRequest(
searchQuery = getThriftSearchQuery(
seedUserId = seedUserId,
filterOutRetweetsAndReplies = filterOutRetweetsAndReplies,
maxNumTweetsPerSeedUser = maxNumTweetsPerSeedUser,
processingTimeout = processingTimeout
),
clientId = Some(EarlybirdClientId),
timeoutMs = processingTimeout.inMilliseconds.intValue(),
getOlderResults = Some(false),
adjustedProtectedRequestParams = None,
adjustedFullArchiveRequestParams = None,
getProtectedTweetsOnly = Some(false),
skipVeryRecentTweets = true,
)
private def getThriftSearchQuery(
seedUserId: UserId,
filterOutRetweetsAndReplies: Boolean,
maxNumTweetsPerSeedUser: Int,
processingTimeout: Duration
): ThriftSearchQuery = ThriftSearchQuery(
serializedQuery = GetEarlybirdQuery(
None,
None,
Set.empty,
filterOutRetweetsAndReplies
).map(_.serialize),
fromUserIDFilter64 = Some(Seq(seedUserId)),
numResults = maxNumTweetsPerSeedUser,
rankingMode = ThriftSearchRankingMode.Recency,
collectorParams = Some(
CollectorParams(
// numResultsToReturn defines how many results each EB shard will return to search root
numResultsToReturn = maxNumTweetsPerSeedUser,
// terminationParams.maxHitsToProcess is used for early terminating per shard results fetching.
terminationParams =
GetCollectorTerminationParams(maxNumTweetsPerSeedUser, processingTimeout)
)),
facetFieldNames = Some(FacetsToFetch),
resultMetadataOptions = Some(MetadataOptions),
searchStatusIds = None
)
private def getEarlybirdSearchResult(
earlybirdSearchClient: EarlybirdService.MethodPerEndpoint,
request: EarlybirdRequest,
statsReceiver: StatsReceiver
): Future[Option[Seq[TweetId]]] = earlybirdSearchClient
.search(request)
.map { response =>
response.responseCode match {
case EarlybirdResponseCode.Success =>
val earlybirdSearchResult =
response.searchResults
.map {
_.results
.map(searchResult => searchResult.id)
}
statsReceiver.scope("result").stat("size").add(earlybirdSearchResult.size)
earlybirdSearchResult
case e =>
statsReceiver.scope("failures").counter(e.getClass.getSimpleName).incr()
Some(Seq.empty)
}
}
}

View File

@ -1,195 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.bijection.Injection
import com.twitter.bijection.scrooge.BinaryScalaCodec
import com.twitter.bijection.scrooge.CompactScalaCodec
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.inject.TwitterModule
import com.twitter.ml.api.{thriftscala => api}
import com.twitter.simclusters_v2.thriftscala.CandidateTweetsList
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import com.twitter.storehaus_internal.manhattan.Apollo
import com.twitter.storehaus_internal.manhattan.ManhattanRO
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
import com.twitter.storehaus_internal.util.ApplicationID
import com.twitter.storehaus_internal.util.DatasetName
import com.twitter.storehaus_internal.util.HDFSPath
import javax.inject.Named
import javax.inject.Singleton
object EmbeddingStoreModule extends TwitterModule {
type UserId = Long
implicit val mbcgUserEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
CompactScalaCodec(api.Embedding)
implicit val tweetCandidatesInjection: Injection[CandidateTweetsList, Array[Byte]] =
CompactScalaCodec(CandidateTweetsList)
final val TwHINEmbeddingRegularUpdateMhStoreName = "TwHINEmbeddingRegularUpdateMhStore"
@Provides
@Singleton
@Named(TwHINEmbeddingRegularUpdateMhStoreName)
def twHINEmbeddingRegularUpdateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[InternalId, api.Embedding] = {
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
BinaryScalaCodec(api.Embedding)
val longCodec = implicitly[Injection[Long, Array[Byte]]]
ManhattanRO
.getReadableStoreWithMtls[TweetId, api.Embedding](
ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("cr_mixer_apollo"),
DatasetName("twhin_regular_update_tweet_embedding_apollo"),
Apollo
),
ManhattanKVClientMtlsParams(serviceIdentifier)
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
case InternalId.TweetId(tweetId) =>
tweetId
case _ =>
throw new UnsupportedOperationException("Invalid Internal Id")
}
}
final val ConsumerBasedTwHINEmbeddingRegularUpdateMhStoreName =
"ConsumerBasedTwHINEmbeddingRegularUpdateMhStore"
@Provides
@Singleton
@Named(ConsumerBasedTwHINEmbeddingRegularUpdateMhStoreName)
def consumerBasedTwHINEmbeddingRegularUpdateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[InternalId, api.Embedding] = {
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
BinaryScalaCodec(api.Embedding)
val longCodec = implicitly[Injection[Long, Array[Byte]]]
ManhattanRO
.getReadableStoreWithMtls[UserId, api.Embedding](
ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("cr_mixer_apollo"),
DatasetName("twhin_user_embedding_regular_update_apollo"),
Apollo
),
ManhattanKVClientMtlsParams(serviceIdentifier)
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
case InternalId.UserId(userId) =>
userId
case _ =>
throw new UnsupportedOperationException("Invalid Internal Id")
}
}
final val TwoTowerFavConsumerEmbeddingMhStoreName = "TwoTowerFavConsumerEmbeddingMhStore"
@Provides
@Singleton
@Named(TwoTowerFavConsumerEmbeddingMhStoreName)
def twoTowerFavConsumerEmbeddingMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[InternalId, api.Embedding] = {
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
BinaryScalaCodec(api.Embedding)
val longCodec = implicitly[Injection[Long, Array[Byte]]]
ManhattanRO
.getReadableStoreWithMtls[UserId, api.Embedding](
ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("cr_mixer_apollo"),
DatasetName("two_tower_fav_user_embedding_apollo"),
Apollo
),
ManhattanKVClientMtlsParams(serviceIdentifier)
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
case InternalId.UserId(userId) =>
userId
case _ =>
throw new UnsupportedOperationException("Invalid Internal Id")
}
}
final val DebuggerDemoUserEmbeddingMhStoreName = "DebuggerDemoUserEmbeddingMhStoreName"
@Provides
@Singleton
@Named(DebuggerDemoUserEmbeddingMhStoreName)
def debuggerDemoUserEmbeddingStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[InternalId, api.Embedding] = {
// This dataset is from src/scala/com/twitter/wtf/beam/bq_embedding_export/sql/MlfExperimentalUserEmbeddingScalaDataset.sql
// Change the above sql if you want to use a diff embedding
val manhattanROConfig = ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("cr_mixer_apollo"),
DatasetName("experimental_user_embedding"),
Apollo
)
buildUserEmbeddingStore(serviceIdentifier, manhattanROConfig)
}
final val DebuggerDemoTweetEmbeddingMhStoreName = "DebuggerDemoTweetEmbeddingMhStore"
@Provides
@Singleton
@Named(DebuggerDemoTweetEmbeddingMhStoreName)
def debuggerDemoTweetEmbeddingStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[InternalId, api.Embedding] = {
// This dataset is from src/scala/com/twitter/wtf/beam/bq_embedding_export/sql/MlfExperimentalTweetEmbeddingScalaDataset.sql
// Change the above sql if you want to use a diff embedding
val manhattanROConfig = ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("cr_mixer_apollo"),
DatasetName("experimental_tweet_embedding"),
Apollo
)
buildTweetEmbeddingStore(serviceIdentifier, manhattanROConfig)
}
private def buildUserEmbeddingStore(
serviceIdentifier: ServiceIdentifier,
manhattanROConfig: ManhattanROConfig
): ReadableStore[InternalId, api.Embedding] = {
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
BinaryScalaCodec(api.Embedding)
val longCodec = implicitly[Injection[Long, Array[Byte]]]
ManhattanRO
.getReadableStoreWithMtls[UserId, api.Embedding](
manhattanROConfig,
ManhattanKVClientMtlsParams(serviceIdentifier)
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
case InternalId.UserId(userId) =>
userId
case _ =>
throw new UnsupportedOperationException("Invalid Internal Id")
}
}
private def buildTweetEmbeddingStore(
serviceIdentifier: ServiceIdentifier,
manhattanROConfig: ManhattanROConfig
): ReadableStore[InternalId, api.Embedding] = {
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
BinaryScalaCodec(api.Embedding)
val longCodec = implicitly[Injection[Long, Array[Byte]]]
ManhattanRO
.getReadableStoreWithMtls[TweetId, api.Embedding](
manhattanROConfig,
ManhattanKVClientMtlsParams(serviceIdentifier)
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
case InternalId.TweetId(tweetId) =>
tweetId
case _ =>
throw new UnsupportedOperationException("Invalid Internal Id")
}
}
}

View File

@ -1,29 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.source_signal.FrsStore
import com.twitter.cr_mixer.source_signal.FrsStore.FrsQueryResult
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.inject.TwitterModule
import com.twitter.follow_recommendations.thriftscala.FollowRecommendationsThriftService
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.storehaus.ReadableStore
import javax.inject.Named
object FrsStoreModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.FrsStore)
def providesFrsStore(
frsClient: FollowRecommendationsThriftService.MethodPerEndpoint,
statsReceiver: StatsReceiver,
decider: CrMixerDecider
): ReadableStore[FrsStore.Query, Seq[FrsQueryResult]] = {
ObservedReadableStore(FrsStore(frsClient, statsReceiver, decider))(
statsReceiver.scope("follow_recommendations_store"))
}
}

View File

@ -1,17 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.inject.TwitterModule
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import javax.inject.Singleton
object MHMtlsParamsModule extends TwitterModule {
@Singleton
@Provides
def providesManhattanMtlsParams(
serviceIdentifier: ServiceIdentifier
): ManhattanKVClientMtlsParams = {
ManhattanKVClientMtlsParams(serviceIdentifier)
}
}

View File

@ -1,150 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.bijection.Injection
import com.twitter.bijection.scrooge.CompactScalaCodec
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.thriftscala.CandidateTweetsList
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import com.twitter.storehaus_internal.manhattan.Apollo
import com.twitter.storehaus_internal.manhattan.ManhattanRO
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
import com.twitter.storehaus_internal.util.ApplicationID
import com.twitter.storehaus_internal.util.DatasetName
import com.twitter.storehaus_internal.util.HDFSPath
import javax.inject.Named
import javax.inject.Singleton
object OfflineCandidateStoreModule extends TwitterModule {
type UserId = Long
implicit val tweetCandidatesInjection: Injection[CandidateTweetsList, Array[Byte]] =
CompactScalaCodec(CandidateTweetsList)
@Provides
@Singleton
@Named(ModuleNames.OfflineTweet2020CandidateStore)
def offlineTweet2020CandidateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_interestedin_2020"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineTweet2020Hl0El15CandidateStore)
def offlineTweet2020Hl0El15CandidateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_0_el_15"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineTweet2020Hl2El15CandidateStore)
def offlineTweet2020Hl2El15CandidateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_2_el_15"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineTweet2020Hl2El50CandidateStore)
def offlineTweet2020Hl2El50CandidateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_2_el_50"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineTweet2020Hl8El50CandidateStore)
def offlineTweet2020Hl8El50CandidateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_8_el_50"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineTweetMTSCandidateStore)
def offlineTweetMTSCandidateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_mts_consumer_embeddings"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineFavDecayedSumCandidateStore)
def offlineFavDecayedSumCandidateStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_decayed_sum"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineFtrAt5Pop1000RankDecay11CandidateStore)
def offlineFtrAt5Pop1000RankDecay11CandidateStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_ftrat5_pop1000_rank_decay_1_1"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineFtrAt5Pop10000RankDecay11CandidateStore)
def offlineFtrAt5Pop10000RankDecay11CandidateStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_ftrat5_pop10000_rank_decay_1_1"
)
}
private def buildOfflineCandidateStore(
serviceIdentifier: ServiceIdentifier,
datasetName: String
): ReadableStore[UserId, CandidateTweetsList] = {
ManhattanRO
.getReadableStoreWithMtls[Long, CandidateTweetsList](
ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("multi_type_simclusters"),
DatasetName(datasetName),
Apollo
),
ManhattanKVClientMtlsParams(serviceIdentifier)
)
}
}

View File

@ -1,39 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.app.Flag
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import javax.inject.Named
import javax.inject.Singleton
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.wtf.candidate.thriftscala.CandidateSeq
object RealGraphOonStoreModule extends TwitterModule {
private val userRealGraphOonColumnPath: Flag[String] = flag[String](
name = "crMixer.userRealGraphOonColumnPath",
default = "recommendations/twistly/userRealgraphOon",
help = "Strato column path for user real graph OON Store"
)
@Provides
@Singleton
@Named(ModuleNames.RealGraphOonStore)
def providesRealGraphOonStore(
stratoClient: StratoClient,
statsReceiver: StatsReceiver
): ReadableStore[UserId, CandidateSeq] = {
val realGraphOonStratoFetchableStore = StratoFetchableStore
.withUnitView[UserId, CandidateSeq](stratoClient, userRealGraphOonColumnPath())
ObservedReadableStore(
realGraphOonStratoFetchableStore
)(statsReceiver.scope("user_real_graph_oon_store"))
}
}

View File

@ -1,67 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.google.inject.name.Named
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.UserId
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import com.twitter.storehaus_internal.manhattan.Apollo
import com.twitter.storehaus_internal.manhattan.ManhattanRO
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
import com.twitter.storehaus_internal.util.ApplicationID
import com.twitter.storehaus_internal.util.DatasetName
import com.twitter.storehaus_internal.util.HDFSPath
import com.twitter.bijection.scrooge.BinaryScalaCodec
import com.twitter.cr_mixer.param.decider.DeciderKey
import com.twitter.hermit.store.common.DeciderableReadableStore
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.wtf.candidate.thriftscala.CandidateSeq
object RealGraphStoreMhModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.RealGraphInStore)
def providesRealGraphStoreMh(
decider: CrMixerDecider,
statsReceiver: StatsReceiver,
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
): ReadableStore[UserId, CandidateSeq] = {
implicit val valueCodec = new BinaryScalaCodec(CandidateSeq)
val underlyingStore = ManhattanRO
.getReadableStoreWithMtls[UserId, CandidateSeq](
ManhattanROConfig(
HDFSPath(""),
ApplicationID("cr_mixer_apollo"),
DatasetName("real_graph_scores_apollo"),
Apollo),
manhattanKVClientMtlsParams
)
val memCachedStore = ObservedMemcachedReadableStore
.fromCacheClient(
backingStore = underlyingStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 24.hours,
)(
valueInjection = valueCodec,
statsReceiver = statsReceiver.scope("memCachedUserRealGraphMh"),
keyToString = { k: UserId => s"uRGraph/$k" }
)
DeciderableReadableStore(
memCachedStore,
decider.deciderGateBuilder.idGate(DeciderKey.enableRealGraphMhStoreDeciderKey),
statsReceiver.scope("RealGraphMh")
)
}
}

View File

@ -1,107 +0,0 @@
package com.twitter.cr_mixer.module
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.SimClustersEmbedding
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclusters_v2.thriftscala.ModelVersion
import com.google.inject.Provides
import com.google.inject.Singleton
import javax.inject.Named
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
object RepresentationManagerModule extends TwitterModule {
private val ColPathPrefix = "recommendations/representation_manager/"
private val SimclustersTweetColPath = ColPathPrefix + "simClustersEmbedding.Tweet"
private val SimclustersUserColPath = ColPathPrefix + "simClustersEmbedding.User"
@Provides
@Singleton
@Named(ModuleNames.RmsTweetLogFavLongestL2EmbeddingStore)
def providesRepresentationManagerTweetStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[TweetId, SimClustersEmbedding] = {
ObservedReadableStore(
StratoFetchableStore
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
stratoClient,
SimclustersTweetColPath,
SimClustersEmbeddingView(
EmbeddingType.LogFavLongestL2EmbeddingTweet,
ModelVersion.Model20m145k2020))
.mapValues(SimClustersEmbedding(_)))(
statsReceiver.scope("rms_tweet_log_fav_longest_l2_store"))
}
@Provides
@Singleton
@Named(ModuleNames.RmsUserFavBasedProducerEmbeddingStore)
def providesRepresentationManagerUserFavBasedProducerEmbeddingStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[UserId, SimClustersEmbedding] = {
ObservedReadableStore(
StratoFetchableStore
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
stratoClient,
SimclustersUserColPath,
SimClustersEmbeddingView(
EmbeddingType.FavBasedProducer,
ModelVersion.Model20m145k2020
)
)
.mapValues(SimClustersEmbedding(_)))(
statsReceiver.scope("rms_user_fav_based_producer_store"))
}
@Provides
@Singleton
@Named(ModuleNames.RmsUserLogFavInterestedInEmbeddingStore)
def providesRepresentationManagerUserLogFavConsumerEmbeddingStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[UserId, SimClustersEmbedding] = {
ObservedReadableStore(
StratoFetchableStore
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
stratoClient,
SimclustersUserColPath,
SimClustersEmbeddingView(
EmbeddingType.LogFavBasedUserInterestedIn,
ModelVersion.Model20m145k2020
)
)
.mapValues(SimClustersEmbedding(_)))(
statsReceiver.scope("rms_user_log_fav_interestedin_store"))
}
@Provides
@Singleton
@Named(ModuleNames.RmsUserFollowInterestedInEmbeddingStore)
def providesRepresentationManagerUserFollowInterestedInEmbeddingStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[UserId, SimClustersEmbedding] = {
ObservedReadableStore(
StratoFetchableStore
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
stratoClient,
SimclustersUserColPath,
SimClustersEmbeddingView(
EmbeddingType.FollowBasedUserInterestedIn,
ModelVersion.Model20m145k2020
)
)
.mapValues(SimClustersEmbedding(_)))(
statsReceiver.scope("rms_user_follow_interestedin_store"))
}
}

View File

@ -1,56 +0,0 @@
package com.twitter.cr_mixer.module
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.simclusters_v2.thriftscala.ModelVersion
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.simclusters_v2.common.UserId
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.storehaus.ReadableStore
import com.twitter.simclusters_v2.thriftscala.ScoringAlgorithm
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.hermit.store.common.ObservedReadableStore
import javax.inject.Named
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.representationscorer.thriftscala.ListScoreId
object RepresentationScorerModule extends TwitterModule {
private val rsxColumnPath = "recommendations/representation_scorer/listScore"
private final val SimClusterModelVersion = ModelVersion.Model20m145k2020
private final val TweetEmbeddingType = EmbeddingType.LogFavBasedTweet
@Provides
@Singleton
@Named(ModuleNames.RsxStore)
def providesRepresentationScorerStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[(UserId, TweetId), Double] = {
ObservedReadableStore(
StratoFetchableStore
.withUnitView[ListScoreId, Double](stratoClient, rsxColumnPath).composeKeyMapping[(
UserId,
TweetId
)] { key =>
representationScorerStoreKeyMapping(key._1, key._2)
}
)(statsReceiver.scope("rsx_store"))
}
private def representationScorerStoreKeyMapping(t1: TweetId, t2: TweetId): ListScoreId = {
ListScoreId(
algorithm = ScoringAlgorithm.PairEmbeddingLogCosineSimilarity,
modelVersion = SimClusterModelVersion,
targetEmbeddingType = TweetEmbeddingType,
targetId = InternalId.TweetId(t1),
candidateEmbeddingType = TweetEmbeddingType,
candidateIds = Seq(InternalId.TweetId(t2))
)
}
}

View File

@ -1,90 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.cr_mixer.config.TimeoutConfig
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.LookupSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import javax.inject.Singleton
/**
* In this example we build a [[StandardSimilarityEngine]] to wrap a dummy store
*/
object SimpleSimilarityEngineModule extends TwitterModule {
@Provides
@Singleton
def providesSimpleSimilarityEngine(
timeoutConfig: TimeoutConfig,
globalStats: StatsReceiver
): StandardSimilarityEngine[UserId, (TweetId, Double)] = {
// Inject your readableStore implementation here
val dummyStore = ReadableStore.fromMap(
Map(
1L -> Seq((100L, 1.0), (101L, 1.0)),
2L -> Seq((200L, 2.0), (201L, 2.0)),
3L -> Seq((300L, 3.0), (301L, 3.0))
))
new StandardSimilarityEngine[UserId, (TweetId, Double)](
implementingStore = dummyStore,
identifier = SimilarityEngineType.EnumUnknownSimilarityEngineType(9997),
globalStats = globalStats,
engineConfig = SimilarityEngineConfig(
timeout = timeoutConfig.similarityEngineTimeout,
gatingConfig = GatingConfig(
deciderConfig = None,
enableFeatureSwitch = None
)
)
)
}
}
/**
* In this example we build a [[LookupSimilarityEngine]] to wrap a dummy store with 2 versions
*/
object LookupSimilarityEngineModule extends TwitterModule {
@Provides
@Singleton
def providesLookupSimilarityEngine(
timeoutConfig: TimeoutConfig,
globalStats: StatsReceiver
): LookupSimilarityEngine[UserId, (TweetId, Double)] = {
// Inject your readableStore implementation here
val dummyStoreV1 = ReadableStore.fromMap(
Map(
1L -> Seq((100L, 1.0), (101L, 1.0)),
2L -> Seq((200L, 2.0), (201L, 2.0)),
))
val dummyStoreV2 = ReadableStore.fromMap(
Map(
1L -> Seq((100L, 1.0), (101L, 1.0)),
2L -> Seq((200L, 2.0), (201L, 2.0)),
))
new LookupSimilarityEngine[UserId, (TweetId, Double)](
versionedStoreMap = Map(
"V1" -> dummyStoreV1,
"V2" -> dummyStoreV2
),
identifier = SimilarityEngineType.EnumUnknownSimilarityEngineType(9998),
globalStats = globalStats,
engineConfig = SimilarityEngineConfig(
timeout = timeoutConfig.similarityEngineTimeout,
gatingConfig = GatingConfig(
deciderConfig = None,
enableFeatureSwitch = None
)
)
)
}
}

View File

@ -1,33 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.inject.TwitterModule
import com.twitter.simclustersann.thriftscala.SimClustersANNService
import javax.inject.Named
object SimClustersANNServiceNameToClientMapper extends TwitterModule {
@Provides
@Singleton
def providesSimClustersANNServiceNameToClientMapping(
@Named(ModuleNames.ProdSimClustersANNServiceClientName) simClustersANNServiceProd: SimClustersANNService.MethodPerEndpoint,
@Named(ModuleNames.ExperimentalSimClustersANNServiceClientName) simClustersANNServiceExperimental: SimClustersANNService.MethodPerEndpoint,
@Named(ModuleNames.SimClustersANNServiceClientName1) simClustersANNService1: SimClustersANNService.MethodPerEndpoint,
@Named(ModuleNames.SimClustersANNServiceClientName2) simClustersANNService2: SimClustersANNService.MethodPerEndpoint,
@Named(ModuleNames.SimClustersANNServiceClientName3) simClustersANNService3: SimClustersANNService.MethodPerEndpoint,
@Named(ModuleNames.SimClustersANNServiceClientName5) simClustersANNService5: SimClustersANNService.MethodPerEndpoint,
@Named(ModuleNames.SimClustersANNServiceClientName4) simClustersANNService4: SimClustersANNService.MethodPerEndpoint
): Map[String, SimClustersANNService.MethodPerEndpoint] = {
Map[String, SimClustersANNService.MethodPerEndpoint](
"simclusters-ann" -> simClustersANNServiceProd,
"simclusters-ann-experimental" -> simClustersANNServiceExperimental,
"simclusters-ann-1" -> simClustersANNService1,
"simclusters-ann-2" -> simClustersANNService2,
"simclusters-ann-3" -> simClustersANNService3,
"simclusters-ann-5" -> simClustersANNService5,
"simclusters-ann-4" -> simClustersANNService4
)
}
}

View File

@ -1,65 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.google.inject.name.Named
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.keyHasher
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedCachedReadableStore
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.relevance_platform.common.injection.LZ4Injection
import com.twitter.relevance_platform.common.injection.SeqObjectInjection
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.Client
import com.twitter.topic_recos.thriftscala.TopicTopTweets
import com.twitter.topic_recos.thriftscala.TopicTweet
import com.twitter.topic_recos.thriftscala.TopicTweetPartitionFlatKey
/**
* Strato store that wraps the topic top tweets pipeline indexed from a Summingbird job
*/
object SkitStratoStoreModule extends TwitterModule {
val column = "recommendations/topic_recos/topicTopTweets"
@Provides
@Singleton
@Named(ModuleNames.SkitStratoStoreName)
def providesSkitStratoStore(
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
stratoClient: Client,
statsReceiver: StatsReceiver
): ReadableStore[TopicTweetPartitionFlatKey, Seq[TopicTweet]] = {
val skitStore = ObservedReadableStore(
StratoFetchableStore
.withUnitView[TopicTweetPartitionFlatKey, TopicTopTweets](stratoClient, column))(
statsReceiver.scope(ModuleNames.SkitStratoStoreName)).mapValues { topicTopTweets =>
topicTopTweets.topTweets
}
val memCachedStore = ObservedMemcachedReadableStore
.fromCacheClient(
backingStore = skitStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 10.minutes
)(
valueInjection = LZ4Injection.compose(SeqObjectInjection[TopicTweet]()),
statsReceiver = statsReceiver.scope("memcached_skit_store"),
keyToString = { k => s"skit:${keyHasher.hashKey(k.toString.getBytes)}" }
)
ObservedCachedReadableStore.from[TopicTweetPartitionFlatKey, Seq[TopicTweet]](
memCachedStore,
ttl = 5.minutes,
maxKeys = 100000, // ~150MB max
cacheName = "skit_in_memory_cache",
windowSize = 10000L
)(statsReceiver.scope("skit_in_memory_cache"))
}
}

View File

@ -1,39 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.app.Flag
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.UserId
import com.twitter.hermit.stp.thriftscala.STPResult
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.{Client => StratoClient}
import javax.inject.Named
object StrongTiePredictionStoreModule extends TwitterModule {
private val strongTiePredictionColumnPath: Flag[String] = flag[String](
name = "crMixer.strongTiePredictionColumnPath",
default = "onboarding/userrecs/strong_tie_prediction_big",
help = "Strato column path for StrongTiePredictionStore"
)
@Provides
@Singleton
@Named(ModuleNames.StpStore)
def providesStrongTiePredictionStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[UserId, STPResult] = {
val strongTiePredictionStratoFetchableStore = StratoFetchableStore
.withUnitView[UserId, STPResult](stratoClient, strongTiePredictionColumnPath())
ObservedReadableStore(
strongTiePredictionStratoFetchableStore
)(statsReceiver.scope("strong_tie_prediction_big_store"))
}
}

View File

@ -1,34 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweet
import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweets
import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripDomain
import javax.inject.Named
object TripCandidateStoreModule extends TwitterModule {
private val stratoColumn = "trends/trip/tripTweetsDataflowProd"
@Provides
@Named(ModuleNames.TripCandidateStore)
def providesSimClustersTripCandidateStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient
): ReadableStore[TripDomain, Seq[TripTweet]] = {
val tripCandidateStratoFetchableStore =
StratoFetchableStore
.withUnitView[TripDomain, TripTweets](stratoClient, stratoColumn)
.mapValues(_.tweets)
ObservedReadableStore(
tripCandidateStratoFetchableStore
)(statsReceiver.scope("simclusters_trip_candidate_store"))
}
}

View File

@ -1,205 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.bijection.scrooge.BinaryScalaCodec
import com.twitter.contentrecommender.thriftscala.TweetInfo
import com.twitter.conversions.DurationOps._
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.frigate.common.store.health.TweetHealthModelStore
import com.twitter.frigate.common.store.health.TweetHealthModelStore.TweetHealthModelStoreConfig
import com.twitter.frigate.common.store.health.UserHealthModelStore
import com.twitter.frigate.thriftscala.TweetHealthScores
import com.twitter.frigate.thriftscala.UserAgathaScores
import com.twitter.hermit.store.common.DeciderableReadableStore
import com.twitter.hermit.store.common.ObservedCachedReadableStore
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.contentrecommender.store.TweetInfoStore
import com.twitter.contentrecommender.store.TweetyPieFieldsStore
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.param.decider.DeciderKey
import com.twitter.frigate.data_pipeline.scalding.thriftscala.BlueVerifiedAnnotationsV2
import com.twitter.recos.user_tweet_graph_plus.thriftscala.UserTweetGraphPlus
import com.twitter.recos.user_tweet_graph_plus.thriftscala.TweetEngagementScores
import com.twitter.relevance_platform.common.health_store.UserMediaRepresentationHealthStore
import com.twitter.relevance_platform.common.health_store.MagicRecsRealTimeAggregatesStore
import com.twitter.relevance_platform.thriftscala.MagicRecsRealTimeAggregatesScores
import com.twitter.relevance_platform.thriftscala.UserMediaRepresentationScores
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.tweetypie.thriftscala.TweetService
import com.twitter.util.Future
import com.twitter.util.JavaTimer
import com.twitter.util.Timer
import javax.inject.Named
object TweetInfoStoreModule extends TwitterModule {
implicit val timer: Timer = new JavaTimer(true)
override def modules: Seq[Module] = Seq(UnifiedCacheClient)
@Provides
@Singleton
def providesTweetInfoStore(
statsReceiver: StatsReceiver,
serviceIdentifier: ServiceIdentifier,
stratoClient: StratoClient,
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
tweetyPieService: TweetService.MethodPerEndpoint,
userTweetGraphPlusService: UserTweetGraphPlus.MethodPerEndpoint,
@Named(ModuleNames.BlueVerifiedAnnotationStore) blueVerifiedAnnotationStore: ReadableStore[
String,
BlueVerifiedAnnotationsV2
],
decider: CrMixerDecider
): ReadableStore[TweetId, TweetInfo] = {
val tweetEngagementScoreStore: ReadableStore[TweetId, TweetEngagementScores] = {
val underlyingStore =
ObservedReadableStore(new ReadableStore[TweetId, TweetEngagementScores] {
override def get(
k: TweetId
): Future[Option[TweetEngagementScores]] = {
userTweetGraphPlusService.tweetEngagementScore(k).map {
Some(_)
}
}
})(statsReceiver.scope("UserTweetGraphTweetEngagementScoreStore"))
DeciderableReadableStore(
underlyingStore,
decider.deciderGateBuilder.idGate(
DeciderKey.enableUtgRealTimeTweetEngagementScoreDeciderKey),
statsReceiver.scope("UserTweetGraphTweetEngagementScoreStore")
)
}
val tweetHealthModelStore: ReadableStore[TweetId, TweetHealthScores] = {
val underlyingStore = TweetHealthModelStore.buildReadableStore(
stratoClient,
Some(
TweetHealthModelStoreConfig(
enablePBlock = true,
enableToxicity = true,
enablePSpammy = true,
enablePReported = true,
enableSpammyTweetContent = true,
enablePNegMultimodal = true,
))
)(statsReceiver.scope("UnderlyingTweetHealthModelStore"))
DeciderableReadableStore(
ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlyingStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 2.hours
)(
valueInjection = BinaryScalaCodec(TweetHealthScores),
statsReceiver = statsReceiver.scope("memCachedTweetHealthModelStore"),
keyToString = { k: TweetId => s"tHMS/$k" }
),
decider.deciderGateBuilder.idGate(DeciderKey.enableHealthSignalsScoreDeciderKey),
statsReceiver.scope("TweetHealthModelStore")
) // use s"tHMS/$k" instead of s"tweetHealthModelStore/$k" to differentiate from CR cache
}
val userHealthModelStore: ReadableStore[UserId, UserAgathaScores] = {
val underlyingStore = UserHealthModelStore.buildReadableStore(stratoClient)(
statsReceiver.scope("UnderlyingUserHealthModelStore"))
DeciderableReadableStore(
ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlyingStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 18.hours
)(
valueInjection = BinaryScalaCodec(UserAgathaScores),
statsReceiver = statsReceiver.scope("memCachedUserHealthModelStore"),
keyToString = { k: UserId => s"uHMS/$k" }
),
decider.deciderGateBuilder.idGate(DeciderKey.enableUserAgathaScoreDeciderKey),
statsReceiver.scope("UserHealthModelStore")
)
}
val userMediaRepresentationHealthStore: ReadableStore[UserId, UserMediaRepresentationScores] = {
val underlyingStore =
UserMediaRepresentationHealthStore.buildReadableStore(
manhattanKVClientMtlsParams,
statsReceiver.scope("UnderlyingUserMediaRepresentationHealthStore")
)
DeciderableReadableStore(
ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlyingStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 12.hours
)(
valueInjection = BinaryScalaCodec(UserMediaRepresentationScores),
statsReceiver = statsReceiver.scope("memCacheUserMediaRepresentationHealthStore"),
keyToString = { k: UserId => s"uMRHS/$k" }
),
decider.deciderGateBuilder.idGate(DeciderKey.enableUserMediaRepresentationStoreDeciderKey),
statsReceiver.scope("UserMediaRepresentationHealthStore")
)
}
val magicRecsRealTimeAggregatesStore: ReadableStore[
TweetId,
MagicRecsRealTimeAggregatesScores
] = {
val underlyingStore =
MagicRecsRealTimeAggregatesStore.buildReadableStore(
serviceIdentifier,
statsReceiver.scope("UnderlyingMagicRecsRealTimeAggregatesScores")
)
DeciderableReadableStore(
underlyingStore,
decider.deciderGateBuilder.idGate(DeciderKey.enableMagicRecsRealTimeAggregatesStore),
statsReceiver.scope("MagicRecsRealTimeAggregatesStore")
)
}
val tweetInfoStore: ReadableStore[TweetId, TweetInfo] = {
val underlyingStore = TweetInfoStore(
TweetyPieFieldsStore.getStoreFromTweetyPie(tweetyPieService),
userMediaRepresentationHealthStore,
magicRecsRealTimeAggregatesStore,
tweetEngagementScoreStore,
blueVerifiedAnnotationStore
)(statsReceiver.scope("tweetInfoStore"))
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlyingStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 15.minutes,
// Hydrating tweetInfo is now a required step for all candidates,
// hence we needed to tune these thresholds.
asyncUpdate = serviceIdentifier.environment == "prod"
)(
valueInjection = BinaryScalaCodec(TweetInfo),
statsReceiver = statsReceiver.scope("memCachedTweetInfoStore"),
keyToString = { k: TweetId => s"tIS/$k" }
)
ObservedCachedReadableStore.from(
memcachedStore,
ttl = 15.minutes,
maxKeys = 8388607, // Check TweetInfo definition. size~92b. Around 736 MB
windowSize = 10000L,
cacheName = "tweet_info_cache",
maxMultiGetSize = 20
)(statsReceiver.scope("inMemoryCachedTweetInfoStore"))
}
tweetInfoStore
}
}

View File

@ -1,42 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.app.Flag
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.twistly.thriftscala.TweetRecentEngagedUsers
object TweetRecentEngagedUserStoreModule extends TwitterModule {
private val tweetRecentEngagedUsersStoreDefaultVersion =
0 // DefaultVersion for tweetEngagedUsersStore, whose key = (tweetId, DefaultVersion)
private val tweetRecentEngagedUsersColumnPath: Flag[String] = flag[String](
name = "crMixer.tweetRecentEngagedUsersColumnPath",
default = "recommendations/twistly/tweetRecentEngagedUsers",
help = "Strato column path for TweetRecentEngagedUsersStore"
)
private type Version = Long
@Provides
@Singleton
def providesTweetRecentEngagedUserStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[TweetId, TweetRecentEngagedUsers] = {
val tweetRecentEngagedUsersStratoFetchableStore = StratoFetchableStore
.withUnitView[(TweetId, Version), TweetRecentEngagedUsers](
stratoClient,
tweetRecentEngagedUsersColumnPath()).composeKeyMapping[TweetId](tweetId =>
(tweetId, tweetRecentEngagedUsersStoreDefaultVersion))
ObservedReadableStore(
tweetRecentEngagedUsersStratoFetchableStore
)(statsReceiver.scope("tweet_recent_engaged_users_store"))
}
}

View File

@ -1,32 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.bijection.scrooge.BinaryScalaCodec
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.thriftscala.CrMixerTweetResponse
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.inject.TwitterModule
import com.twitter.hermit.store.common.ReadableWritableStore
import com.twitter.hermit.store.common.ObservedReadableWritableMemcacheStore
import com.twitter.simclusters_v2.common.UserId
import javax.inject.Named
object TweetRecommendationResultsStoreModule extends TwitterModule {
@Provides
@Singleton
def providesTweetRecommendationResultsStore(
@Named(ModuleNames.TweetRecommendationResultsCache) tweetRecommendationResultsCacheClient: MemcachedClient,
statsReceiver: StatsReceiver
): ReadableWritableStore[UserId, CrMixerTweetResponse] = {
ObservedReadableWritableMemcacheStore.fromCacheClient(
cacheClient = tweetRecommendationResultsCacheClient,
ttl = 24.hours)(
valueInjection = BinaryScalaCodec(CrMixerTweetResponse),
statsReceiver = statsReceiver.scope("TweetRecommendationResultsMemcacheStore"),
keyToString = { k: UserId => k.toString }
)
}
}

View File

@ -1,67 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.inject.TwitterModule
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.cr_mixer.similarity_engine.TwhinCollabFilterSimilarityEngine.TwhinCollabFilterView
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.storehaus.ReadableStore
import javax.inject.Named
object TwhinCollabFilterStratoStoreModule extends TwitterModule {
val stratoColumnPath: String = "cuad/twhin/getCollabFilterTweetCandidatesProd.User"
@Provides
@Singleton
@Named(ModuleNames.TwhinCollabFilterStratoStoreForFollow)
def providesTwhinCollabFilterStratoStoreForFollow(
stratoClient: StratoClient
): ReadableStore[Long, Seq[TweetId]] = {
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
stratoClient,
column = stratoColumnPath,
view = TwhinCollabFilterView("follow_2022_03_10_c_500K")
)
}
@Provides
@Singleton
@Named(ModuleNames.TwhinCollabFilterStratoStoreForEngagement)
def providesTwhinCollabFilterStratoStoreForEngagement(
stratoClient: StratoClient
): ReadableStore[Long, Seq[TweetId]] = {
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
stratoClient,
column = stratoColumnPath,
view = TwhinCollabFilterView("engagement_2022_04_10_c_500K"))
}
@Provides
@Singleton
@Named(ModuleNames.TwhinMultiClusterStratoStoreForFollow)
def providesTwhinMultiClusterStratoStoreForFollow(
stratoClient: StratoClient
): ReadableStore[Long, Seq[TweetId]] = {
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
stratoClient,
column = stratoColumnPath,
view = TwhinCollabFilterView("multiclusterFollow20220921")
)
}
@Provides
@Singleton
@Named(ModuleNames.TwhinMultiClusterStratoStoreForEngagement)
def providesTwhinMultiClusterStratoStoreForEngagement(
stratoClient: StratoClient
): ReadableStore[Long, Seq[TweetId]] = {
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
stratoClient,
column = stratoColumnPath,
view = TwhinCollabFilterView("multiclusterEng20220921"))
}
}

View File

@ -1,42 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.app.Flag
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.simclusters_v2.thriftscala.OrderedClustersAndMembers
import javax.inject.Named
object TwiceClustersMembersStoreModule extends TwitterModule {
private val twiceClustersMembersColumnPath: Flag[String] = flag[String](
name = "crMixer.twiceClustersMembersColumnPath",
default =
"recommendations/simclusters_v2/embeddings/TwiceClustersMembersLargestDimApeSimilarity",
help = "Strato column path for TweetRecentEngagedUsersStore"
)
@Provides
@Singleton
@Named(ModuleNames.TwiceClustersMembersStore)
def providesTweetRecentEngagedUserStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[UserId, OrderedClustersAndMembers] = {
val twiceClustersMembersStratoFetchableStore = StratoFetchableStore
.withUnitView[UserId, OrderedClustersAndMembers](
stratoClient,
twiceClustersMembersColumnPath())
ObservedReadableStore(
twiceClustersMembersStratoFetchableStore
)(statsReceiver.scope("twice_clusters_members_largestDimApe_similarity_store"))
}
}

View File

@ -1,83 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.app.Flag
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.memcached.Client
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.inject.TwitterModule
import com.twitter.storehaus_internal.memcache.MemcacheStore
import com.twitter.storehaus_internal.util.ClientName
import com.twitter.storehaus_internal.util.ZkEndPoint
import javax.inject.Named
object UnifiedCacheClient extends TwitterModule {
private val TIME_OUT = 20.milliseconds
val crMixerUnifiedCacheDest: Flag[String] = flag[String](
name = "crMixer.unifiedCacheDest",
default = "/s/cache/content_recommender_unified_v2",
help = "Wily path to Content Recommender unified cache"
)
val tweetRecommendationResultsCacheDest: Flag[String] = flag[String](
name = "tweetRecommendationResults.CacheDest",
default = "/s/cache/tweet_recommendation_results",
help = "Wily path to CrMixer getTweetRecommendations() results cache"
)
val earlybirdTweetsCacheDest: Flag[String] = flag[String](
name = "earlybirdTweets.CacheDest",
default = "/s/cache/crmixer_earlybird_tweets",
help = "Wily path to CrMixer Earlybird Recency Based Similarity Engine result cache"
)
@Provides
@Singleton
@Named(ModuleNames.UnifiedCache)
def provideUnifiedCacheClient(
serviceIdentifier: ServiceIdentifier,
statsReceiver: StatsReceiver,
): Client =
MemcacheStore.memcachedClient(
name = ClientName("memcache-content-recommender-unified"),
dest = ZkEndPoint(crMixerUnifiedCacheDest()),
statsReceiver = statsReceiver.scope("cache_client"),
serviceIdentifier = serviceIdentifier,
timeout = TIME_OUT
)
@Provides
@Singleton
@Named(ModuleNames.TweetRecommendationResultsCache)
def providesTweetRecommendationResultsCache(
serviceIdentifier: ServiceIdentifier,
statsReceiver: StatsReceiver,
): Client =
MemcacheStore.memcachedClient(
name = ClientName("memcache-tweet-recommendation-results"),
dest = ZkEndPoint(tweetRecommendationResultsCacheDest()),
statsReceiver = statsReceiver.scope("cache_client"),
serviceIdentifier = serviceIdentifier,
timeout = TIME_OUT
)
@Provides
@Singleton
@Named(ModuleNames.EarlybirdTweetsCache)
def providesEarlybirdTweetsCache(
serviceIdentifier: ServiceIdentifier,
statsReceiver: StatsReceiver,
): Client =
MemcacheStore.memcachedClient(
name = ClientName("memcache-crmixer-earlybird-tweets"),
dest = ZkEndPoint(earlybirdTweetsCacheDest()),
statsReceiver = statsReceiver.scope("cache_client"),
serviceIdentifier = serviceIdentifier,
timeout = TIME_OUT
)
}

View File

@ -1,30 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.inject.TwitterModule
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.usersignalservice.thriftscala.BatchSignalRequest
import com.twitter.usersignalservice.thriftscala.BatchSignalResponse
import javax.inject.Named
object UserSignalServiceColumnModule extends TwitterModule {
private val UssColumnPath = "recommendations/user-signal-service/signals"
@Provides
@Singleton
@Named(ModuleNames.UssStratoColumn)
def providesUserSignalServiceStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[BatchSignalRequest, BatchSignalResponse] = {
ObservedReadableStore(
StratoFetchableStore
.withUnitView[BatchSignalRequest, BatchSignalResponse](stratoClient, UssColumnPath))(
statsReceiver.scope("user_signal_service_store"))
}
}

Some files were not shown because too many files have changed in this diff Show More