mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-11-13 23:25:10 +01:00
[docx] split commit for file 600
Signed-off-by: Ari Archer <ari.web.xyz@gmail.com>
This commit is contained in:
parent
3c586de8ec
commit
78b3118da4
Binary file not shown.
@ -1,139 +0,0 @@
|
||||
package com.twitter.cr_mixer.logging
|
||||
|
||||
import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialAdsCandidate
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.param.decider.DeciderConstants
|
||||
import com.twitter.cr_mixer.thriftscala.AdsRecommendationTopLevelApiResult
|
||||
import com.twitter.cr_mixer.thriftscala.AdsRecommendationsResult
|
||||
import com.twitter.cr_mixer.thriftscala.AdsRequest
|
||||
import com.twitter.cr_mixer.thriftscala.AdsResponse
|
||||
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
|
||||
import com.twitter.cr_mixer.thriftscala.GetAdsRecommendationsScribe
|
||||
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
|
||||
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
|
||||
import com.twitter.cr_mixer.util.CandidateGenerationKeyUtil
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.tracing.Trace
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Stopwatch
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
case class AdsRecommendationsScribeLogger @Inject() (
|
||||
@Named(ModuleNames.AdsRecommendationsLogger) adsRecommendationsScribeLogger: Logger,
|
||||
decider: CrMixerDecider,
|
||||
statsReceiver: StatsReceiver) {
|
||||
|
||||
private val scopedStats = statsReceiver.scope(this.getClass.getCanonicalName)
|
||||
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
|
||||
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
|
||||
|
||||
/*
|
||||
* Scribe first step results after fetching initial ads candidate
|
||||
* */
|
||||
def scribeInitialAdsCandidates(
|
||||
query: AdsCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[Seq[InitialAdsCandidate]]],
|
||||
enableScribe: Boolean // controlled by feature switch so that we can scribe for certain DDG
|
||||
): Future[Seq[Seq[InitialAdsCandidate]]] = {
|
||||
val scribeMetadata = ScribeMetadata.from(query)
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { input =>
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = convertFetchCandidatesResult(input, scribeMetadata.userId)
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
|
||||
|
||||
if (enableScribe && decider.isAvailableForId(
|
||||
scribeMetadata.userId,
|
||||
DeciderConstants.adsRecommendationsPerExperimentScribeRate)) {
|
||||
upperFunnelsStats.counter(scribeMetadata.product.originalName).incr()
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Scribe top level API results
|
||||
* */
|
||||
def scribeGetAdsRecommendations(
|
||||
request: AdsRequest,
|
||||
startTime: Long,
|
||||
scribeMetadata: ScribeMetadata,
|
||||
getResultFn: => Future[AdsResponse],
|
||||
enableScribe: Boolean
|
||||
): Future[AdsResponse] = {
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { response =>
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = AdsRecommendationsResult.AdsRecommendationTopLevelApiResult(
|
||||
AdsRecommendationTopLevelApiResult(
|
||||
timestamp = startTime,
|
||||
request = request,
|
||||
response = response
|
||||
))
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
|
||||
|
||||
if (enableScribe && decider.isAvailableForId(
|
||||
scribeMetadata.userId,
|
||||
DeciderConstants.adsRecommendationsPerExperimentScribeRate)) {
|
||||
topLevelApiStats.counter(scribeMetadata.product.originalName).incr()
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def convertFetchCandidatesResult(
|
||||
candidatesSeq: Seq[Seq[InitialAdsCandidate]],
|
||||
requestUserId: UserId
|
||||
): AdsRecommendationsResult = {
|
||||
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
|
||||
candidates.map { candidate =>
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = candidate.tweetId,
|
||||
candidateGenerationKey = Some(
|
||||
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)),
|
||||
score = Some(candidate.getSimilarityScore),
|
||||
numCandidateGenerationKeys = None // not populated yet
|
||||
)
|
||||
}
|
||||
}
|
||||
AdsRecommendationsResult.FetchCandidatesResult(
|
||||
FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
private def buildScribeMessage(
|
||||
result: AdsRecommendationsResult,
|
||||
scribeMetadata: ScribeMetadata,
|
||||
latencyMs: Long,
|
||||
traceId: Long
|
||||
): GetAdsRecommendationsScribe = {
|
||||
GetAdsRecommendationsScribe(
|
||||
uuid = scribeMetadata.requestUUID,
|
||||
userId = scribeMetadata.userId,
|
||||
result = result,
|
||||
traceId = Some(traceId),
|
||||
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
|
||||
impressedBuckets = getImpressedBuckets(scopedStats)
|
||||
)
|
||||
}
|
||||
|
||||
private def scribeResult(
|
||||
scribeMsg: GetAdsRecommendationsScribe
|
||||
): Unit = {
|
||||
publish(
|
||||
logger = adsRecommendationsScribeLogger,
|
||||
codec = GetAdsRecommendationsScribe,
|
||||
message = scribeMsg)
|
||||
}
|
||||
|
||||
}
|
@ -1,34 +0,0 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/javax/inject:javax.inject",
|
||||
"abdecider/src/main/scala",
|
||||
"content-recommender/thrift/src/main/thrift:content-recommender-common-scala",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
|
||||
"cr-mixer/thrift/src/main/thrift:thrift-scala",
|
||||
"decider/src/main/scala",
|
||||
"featureswitches/featureswitches-core/src/main/scala:experimentation-settings",
|
||||
"finagle/finagle-core/src/main",
|
||||
"frigate/frigate-common:base",
|
||||
"frigate/frigate-common:util",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
|
||||
"kafka/finagle-kafka/finatra-kafka/src/main/scala",
|
||||
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
|
||||
"scribelib/marshallers/src/main/scala/com/twitter/scribelib/marshallers",
|
||||
"scribelib/validators/src/main/scala/com/twitter/scribelib/validators",
|
||||
"scrooge/scrooge-serializer/src/main/scala",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/thrift/com/twitter/ml/api:data-scala",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
"timelines/src/main/scala/com/twitter/timelines/clientevent",
|
||||
"util-internal/scribe/src/main/scala/com/twitter/logging",
|
||||
],
|
||||
)
|
Binary file not shown.
Binary file not shown.
@ -1,489 +0,0 @@
|
||||
package com.twitter.cr_mixer.logging
|
||||
|
||||
import com.google.common.base.CaseFormat
|
||||
import com.twitter.abdecider.ScribingABDeciderUtil
|
||||
import com.twitter.scribelib.marshallers.ClientDataProvider
|
||||
import com.twitter.scribelib.marshallers.ScribeSerialization
|
||||
import com.twitter.timelines.clientevent.MinimalClientDataProvider
|
||||
import com.twitter.cr_mixer.model.BlendedCandidate
|
||||
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.RankedCandidate
|
||||
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
|
||||
import com.twitter.cr_mixer.model.GraphSourceInfo
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.param.decider.DeciderConstants
|
||||
import com.twitter.cr_mixer.scribe.ScribeCategories
|
||||
import com.twitter.cr_mixer.thriftscala.CrMixerTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.CrMixerTweetResponse
|
||||
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
|
||||
import com.twitter.cr_mixer.thriftscala.FetchSignalSourcesResult
|
||||
import com.twitter.cr_mixer.thriftscala.GetTweetsRecommendationsScribe
|
||||
import com.twitter.cr_mixer.thriftscala.InterleaveResult
|
||||
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
|
||||
import com.twitter.cr_mixer.thriftscala.PreRankFilterResult
|
||||
import com.twitter.cr_mixer.thriftscala.Product
|
||||
import com.twitter.cr_mixer.thriftscala.RankResult
|
||||
import com.twitter.cr_mixer.thriftscala.Result
|
||||
import com.twitter.cr_mixer.thriftscala.SourceSignal
|
||||
import com.twitter.cr_mixer.thriftscala.TopLevelApiResult
|
||||
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
|
||||
import com.twitter.cr_mixer.thriftscala.VITTweetCandidateScribe
|
||||
import com.twitter.cr_mixer.thriftscala.VITTweetCandidatesScribe
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.model.SourceInfo
|
||||
import com.twitter.cr_mixer.util.CandidateGenerationKeyUtil
|
||||
import com.twitter.cr_mixer.util.MetricTagUtil
|
||||
import com.twitter.decider.SimpleRecipient
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.tracing.Trace
|
||||
import com.twitter.finatra.kafka.producers.KafkaProducerBase
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Stopwatch
|
||||
import com.twitter.util.Time
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
import scala.util.Random
|
||||
|
||||
@Singleton
|
||||
case class CrMixerScribeLogger @Inject() (
|
||||
decider: CrMixerDecider,
|
||||
statsReceiver: StatsReceiver,
|
||||
@Named(ModuleNames.TweetRecsLogger) tweetRecsScribeLogger: Logger,
|
||||
@Named(ModuleNames.BlueVerifiedTweetRecsLogger) blueVerifiedTweetRecsScribeLogger: Logger,
|
||||
@Named(ModuleNames.TopLevelApiDdgMetricsLogger) ddgMetricsLogger: Logger,
|
||||
kafkaProducer: KafkaProducerBase[String, GetTweetsRecommendationsScribe]) {
|
||||
|
||||
import CrMixerScribeLogger._
|
||||
|
||||
private val scopedStats = statsReceiver.scope("CrMixerScribeLogger")
|
||||
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
|
||||
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
|
||||
private val kafkaMessagesStats = scopedStats.scope("KafkaMessages")
|
||||
private val topLevelApiDdgMetricsStats = scopedStats.scope("TopLevelApiDdgMetrics")
|
||||
private val blueVerifiedTweetCandidatesStats = scopedStats.scope("BlueVerifiedTweetCandidates")
|
||||
|
||||
private val serialization = new ScribeSerialization {}
|
||||
|
||||
def scribeSignalSources(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
getResultFn: => Future[(Set[SourceInfo], Map[String, Option[GraphSourceInfo]])]
|
||||
): Future[(Set[SourceInfo], Map[String, Option[GraphSourceInfo]])] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
ScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertFetchSignalSourcesResult
|
||||
)
|
||||
}
|
||||
|
||||
def scribeInitialCandidates(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
ScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertFetchCandidatesResult
|
||||
)
|
||||
}
|
||||
|
||||
def scribePreRankFilterCandidates(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
ScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertPreRankFilterResult
|
||||
)
|
||||
}
|
||||
|
||||
def scribeInterleaveCandidates(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[BlendedCandidate]]
|
||||
): Future[Seq[BlendedCandidate]] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
ScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertInterleaveResult,
|
||||
enableKafkaScribe = true
|
||||
)
|
||||
}
|
||||
|
||||
def scribeRankedCandidates(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[RankedCandidate]]
|
||||
): Future[Seq[RankedCandidate]] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
ScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertRankResult
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Scribe Top Level API Request / Response and performance metrics
|
||||
* for the getTweetRecommendations() endpoint.
|
||||
*/
|
||||
def scribeGetTweetRecommendations(
|
||||
request: CrMixerTweetRequest,
|
||||
startTime: Long,
|
||||
scribeMetadata: ScribeMetadata,
|
||||
getResultFn: => Future[CrMixerTweetResponse]
|
||||
): Future[CrMixerTweetResponse] = {
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { response =>
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = convertTopLevelAPIResult(request, response, startTime)
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
|
||||
|
||||
// We use upperFunnelPerStepScribeRate to cover TopLevelApi scribe logs
|
||||
if (decider.isAvailableForId(
|
||||
scribeMetadata.userId,
|
||||
DeciderConstants.upperFunnelPerStepScribeRate)) {
|
||||
topLevelApiStats.counter(scribeMetadata.product.originalName).incr()
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
if (decider.isAvailableForId(
|
||||
scribeMetadata.userId,
|
||||
DeciderConstants.topLevelApiDdgMetricsScribeRate)) {
|
||||
topLevelApiDdgMetricsStats.counter(scribeMetadata.product.originalName).incr()
|
||||
val topLevelDdgMetricsMetadata = TopLevelDdgMetricsMetadata.from(request)
|
||||
publishTopLevelDdgMetrics(
|
||||
logger = ddgMetricsLogger,
|
||||
topLevelDdgMetricsMetadata = topLevelDdgMetricsMetadata,
|
||||
latencyMs = latencyMs,
|
||||
candidateSize = response.tweets.length)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scribe all of the Blue Verified tweets that are candidates from cr-mixer
|
||||
* from the getTweetRecommendations() endpoint for stats tracking/debugging purposes.
|
||||
*/
|
||||
def scribeGetTweetRecommendationsForBlueVerified(
|
||||
scribeMetadata: ScribeMetadata,
|
||||
getResultFn: => Future[Seq[RankedCandidate]]
|
||||
): Future[Seq[RankedCandidate]] = {
|
||||
getResultFn.onSuccess { rankedCandidates =>
|
||||
if (decider.isAvailable(DeciderConstants.enableScribeForBlueVerifiedTweetCandidates)) {
|
||||
blueVerifiedTweetCandidatesStats.counter("process_request").incr()
|
||||
|
||||
val blueVerifiedTweetCandidates = rankedCandidates.filter { tweet =>
|
||||
tweet.tweetInfo.hasBlueVerifiedAnnotation.contains(true)
|
||||
}
|
||||
|
||||
val impressedBuckets = getImpressedBuckets(blueVerifiedTweetCandidatesStats).getOrElse(Nil)
|
||||
|
||||
val blueVerifiedCandidateScribes = blueVerifiedTweetCandidates.map { candidate =>
|
||||
blueVerifiedTweetCandidatesStats
|
||||
.scope(scribeMetadata.product.name).counter(
|
||||
candidate.tweetInfo.authorId.toString).incr()
|
||||
VITTweetCandidateScribe(
|
||||
tweetId = candidate.tweetId,
|
||||
authorId = candidate.tweetInfo.authorId,
|
||||
score = candidate.predictionScore,
|
||||
metricTags = MetricTagUtil.buildMetricTags(candidate)
|
||||
)
|
||||
}
|
||||
|
||||
val blueVerifiedScribe =
|
||||
VITTweetCandidatesScribe(
|
||||
uuid = scribeMetadata.requestUUID,
|
||||
userId = scribeMetadata.userId,
|
||||
candidates = blueVerifiedCandidateScribes,
|
||||
product = scribeMetadata.product,
|
||||
impressedBuckets = impressedBuckets
|
||||
)
|
||||
|
||||
publish(
|
||||
logger = blueVerifiedTweetRecsScribeLogger,
|
||||
codec = VITTweetCandidatesScribe,
|
||||
message = blueVerifiedScribe)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scribe Per-step intermediate results and performance metrics
|
||||
* for each step: fetch signals, fetch candidates, filters, ranker, etc
|
||||
*/
|
||||
private[logging] def scribeResultsAndPerformanceMetrics[T](
|
||||
scribeMetadata: ScribeMetadata,
|
||||
getResultFn: => Future[T],
|
||||
convertToResultFn: (T, UserId) => Result,
|
||||
enableKafkaScribe: Boolean = false
|
||||
): Future[T] = {
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { input =>
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = convertToResultFn(input, scribeMetadata.userId)
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
|
||||
|
||||
if (decider.isAvailableForId(
|
||||
scribeMetadata.userId,
|
||||
DeciderConstants.upperFunnelPerStepScribeRate)) {
|
||||
upperFunnelsStats.counter(scribeMetadata.product.originalName).incr()
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
|
||||
// forks the scribe as a Kafka message for async feature hydration
|
||||
if (enableKafkaScribe && shouldScribeKafkaMessage(
|
||||
scribeMetadata.userId,
|
||||
scribeMetadata.product)) {
|
||||
kafkaMessagesStats.counter(scribeMetadata.product.originalName).incr()
|
||||
|
||||
val batchedKafkaMessages = downsampleKafkaMessage(scribeMsg)
|
||||
batchedKafkaMessages.foreach { kafkaMessage =>
|
||||
kafkaProducer.send(
|
||||
topic = ScribeCategories.TweetsRecs.scribeCategory,
|
||||
key = traceId.toString,
|
||||
value = kafkaMessage,
|
||||
timestamp = Time.now.inMilliseconds
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def convertTopLevelAPIResult(
|
||||
request: CrMixerTweetRequest,
|
||||
response: CrMixerTweetResponse,
|
||||
startTime: Long
|
||||
): Result = {
|
||||
Result.TopLevelApiResult(
|
||||
TopLevelApiResult(
|
||||
timestamp = startTime,
|
||||
request = request,
|
||||
response = response
|
||||
))
|
||||
}
|
||||
|
||||
private def convertFetchSignalSourcesResult(
|
||||
sourceInfoSetTuple: (Set[SourceInfo], Map[String, Option[GraphSourceInfo]]),
|
||||
requestUserId: UserId
|
||||
): Result = {
|
||||
val sourceSignals = sourceInfoSetTuple._1.map { sourceInfo =>
|
||||
SourceSignal(id = Some(sourceInfo.internalId))
|
||||
}
|
||||
// For source graphs, we pass in requestUserId as a placeholder
|
||||
val sourceGraphs = sourceInfoSetTuple._2.map {
|
||||
case (_, _) =>
|
||||
SourceSignal(id = Some(InternalId.UserId(requestUserId)))
|
||||
}
|
||||
Result.FetchSignalSourcesResult(
|
||||
FetchSignalSourcesResult(
|
||||
signals = Some(sourceSignals ++ sourceGraphs)
|
||||
))
|
||||
}
|
||||
|
||||
private def convertFetchCandidatesResult(
|
||||
candidatesSeq: Seq[Seq[InitialCandidate]],
|
||||
requestUserId: UserId
|
||||
): Result = {
|
||||
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
|
||||
candidates.map { candidate =>
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = candidate.tweetId,
|
||||
candidateGenerationKey = Some(
|
||||
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)),
|
||||
score = Some(candidate.getSimilarityScore),
|
||||
numCandidateGenerationKeys = None // not populated yet
|
||||
)
|
||||
}
|
||||
}
|
||||
Result.FetchCandidatesResult(FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
private def convertPreRankFilterResult(
|
||||
candidatesSeq: Seq[Seq[InitialCandidate]],
|
||||
requestUserId: UserId
|
||||
): Result = {
|
||||
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
|
||||
candidates.map { candidate =>
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = candidate.tweetId,
|
||||
candidateGenerationKey = Some(
|
||||
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)),
|
||||
score = Some(candidate.getSimilarityScore),
|
||||
numCandidateGenerationKeys = None // not populated yet
|
||||
)
|
||||
}
|
||||
}
|
||||
Result.PreRankFilterResult(PreRankFilterResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
// We take InterleaveResult for Unconstrained dataset ML ranker training
|
||||
private def convertInterleaveResult(
|
||||
blendedCandidates: Seq[BlendedCandidate],
|
||||
requestUserId: UserId
|
||||
): Result = {
|
||||
val tweetCandidatesWithMetadata = blendedCandidates.map { blendedCandidate =>
|
||||
val candidateGenerationKey =
|
||||
CandidateGenerationKeyUtil.toThrift(blendedCandidate.reasonChosen, requestUserId)
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = blendedCandidate.tweetId,
|
||||
candidateGenerationKey = Some(candidateGenerationKey),
|
||||
authorId = Some(blendedCandidate.tweetInfo.authorId), // for ML pipeline training
|
||||
score = Some(blendedCandidate.getSimilarityScore),
|
||||
numCandidateGenerationKeys = Some(blendedCandidate.potentialReasons.size)
|
||||
) // hydrate fields for light ranking training data
|
||||
}
|
||||
Result.InterleaveResult(InterleaveResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
private def convertRankResult(
|
||||
rankedCandidates: Seq[RankedCandidate],
|
||||
requestUserId: UserId
|
||||
): Result = {
|
||||
val tweetCandidatesWithMetadata = rankedCandidates.map { rankedCandidate =>
|
||||
val candidateGenerationKey =
|
||||
CandidateGenerationKeyUtil.toThrift(rankedCandidate.reasonChosen, requestUserId)
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = rankedCandidate.tweetId,
|
||||
candidateGenerationKey = Some(candidateGenerationKey),
|
||||
score = Some(rankedCandidate.getSimilarityScore),
|
||||
numCandidateGenerationKeys = Some(rankedCandidate.potentialReasons.size)
|
||||
)
|
||||
}
|
||||
Result.RankResult(RankResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
private def buildScribeMessage(
|
||||
result: Result,
|
||||
scribeMetadata: ScribeMetadata,
|
||||
latencyMs: Long,
|
||||
traceId: Long
|
||||
): GetTweetsRecommendationsScribe = {
|
||||
GetTweetsRecommendationsScribe(
|
||||
uuid = scribeMetadata.requestUUID,
|
||||
userId = scribeMetadata.userId,
|
||||
result = result,
|
||||
traceId = Some(traceId),
|
||||
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
|
||||
impressedBuckets = getImpressedBuckets(scopedStats)
|
||||
)
|
||||
}
|
||||
|
||||
private def scribeResult(
|
||||
scribeMsg: GetTweetsRecommendationsScribe
|
||||
): Unit = {
|
||||
publish(
|
||||
logger = tweetRecsScribeLogger,
|
||||
codec = GetTweetsRecommendationsScribe,
|
||||
message = scribeMsg)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gate for producing messages to Kafka for async feature hydration
|
||||
*/
|
||||
private def shouldScribeKafkaMessage(
|
||||
userId: UserId,
|
||||
product: Product
|
||||
): Boolean = {
|
||||
val isEligibleUser = decider.isAvailable(
|
||||
DeciderConstants.kafkaMessageScribeSampleRate,
|
||||
Some(SimpleRecipient(userId)))
|
||||
val isHomeProduct = (product == Product.Home)
|
||||
isEligibleUser && isHomeProduct
|
||||
}
|
||||
|
||||
/**
|
||||
* Due to size limits of Strato (see SD-19028), each Kafka message must be downsampled
|
||||
*/
|
||||
private[logging] def downsampleKafkaMessage(
|
||||
scribeMsg: GetTweetsRecommendationsScribe
|
||||
): Seq[GetTweetsRecommendationsScribe] = {
|
||||
val sampledResultSeq: Seq[Result] = scribeMsg.result match {
|
||||
case Result.InterleaveResult(interleaveResult) =>
|
||||
val sampledTweetsSeq = interleaveResult.tweets
|
||||
.map { tweets =>
|
||||
Random
|
||||
.shuffle(tweets).take(KafkaMaxTweetsPerMessage)
|
||||
.grouped(BatchSize).toSeq
|
||||
}.getOrElse(Seq.empty)
|
||||
|
||||
sampledTweetsSeq.map { sampledTweets =>
|
||||
Result.InterleaveResult(InterleaveResult(Some(sampledTweets)))
|
||||
}
|
||||
|
||||
// if it's an unrecognized type, err on the side of sending no candidates
|
||||
case _ =>
|
||||
kafkaMessagesStats.counter("InvalidKafkaMessageResultType").incr()
|
||||
Seq(Result.InterleaveResult(InterleaveResult(None)))
|
||||
}
|
||||
|
||||
sampledResultSeq.map { sampledResult =>
|
||||
GetTweetsRecommendationsScribe(
|
||||
uuid = scribeMsg.uuid,
|
||||
userId = scribeMsg.userId,
|
||||
result = sampledResult,
|
||||
traceId = scribeMsg.traceId,
|
||||
performanceMetrics = None,
|
||||
impressedBuckets = None
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles client_event serialization to log data into DDG metrics
|
||||
*/
|
||||
private[logging] def publishTopLevelDdgMetrics(
|
||||
logger: Logger,
|
||||
topLevelDdgMetricsMetadata: TopLevelDdgMetricsMetadata,
|
||||
candidateSize: Long,
|
||||
latencyMs: Long,
|
||||
): Unit = {
|
||||
val data = Map[Any, Any](
|
||||
"latency_ms" -> latencyMs,
|
||||
"event_value" -> candidateSize
|
||||
)
|
||||
val label: (String, String) = ("tweetrec", "")
|
||||
val namespace = getNamespace(topLevelDdgMetricsMetadata, label) + ("action" -> "candidates")
|
||||
val message =
|
||||
serialization
|
||||
.serializeClientEvent(namespace, getClientData(topLevelDdgMetricsMetadata), data)
|
||||
logger.info(message)
|
||||
}
|
||||
|
||||
private def getClientData(
|
||||
topLevelDdgMetricsMetadata: TopLevelDdgMetricsMetadata
|
||||
): ClientDataProvider =
|
||||
MinimalClientDataProvider(
|
||||
userId = topLevelDdgMetricsMetadata.userId,
|
||||
guestId = None,
|
||||
clientApplicationId = topLevelDdgMetricsMetadata.clientApplicationId,
|
||||
countryCode = topLevelDdgMetricsMetadata.countryCode
|
||||
)
|
||||
|
||||
private def getNamespace(
|
||||
topLevelDdgMetricsMetadata: TopLevelDdgMetricsMetadata,
|
||||
label: (String, String)
|
||||
): Map[String, String] = {
|
||||
val productName =
|
||||
CaseFormat.UPPER_CAMEL
|
||||
.to(CaseFormat.LOWER_UNDERSCORE, topLevelDdgMetricsMetadata.product.originalName)
|
||||
|
||||
Map(
|
||||
"client" -> ScribingABDeciderUtil.clientForAppId(
|
||||
topLevelDdgMetricsMetadata.clientApplicationId),
|
||||
"page" -> "cr-mixer",
|
||||
"section" -> productName,
|
||||
"component" -> label._1,
|
||||
"element" -> label._2
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
object CrMixerScribeLogger {
|
||||
val KafkaMaxTweetsPerMessage: Int = 200
|
||||
val BatchSize: Int = 20
|
||||
}
|
Binary file not shown.
@ -1,193 +0,0 @@
|
||||
package com.twitter.cr_mixer.logging
|
||||
|
||||
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.param.decider.DeciderConstants
|
||||
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
|
||||
import com.twitter.cr_mixer.thriftscala.GetRelatedTweetsScribe
|
||||
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
|
||||
import com.twitter.cr_mixer.thriftscala.PreRankFilterResult
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedTweetResponse
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedTweetResult
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedTweetTopLevelApiResult
|
||||
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
|
||||
import com.twitter.cr_mixer.util.CandidateGenerationKeyUtil
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.tracing.Trace
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Stopwatch
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
case class RelatedTweetScribeLogger @Inject() (
|
||||
decider: CrMixerDecider,
|
||||
statsReceiver: StatsReceiver,
|
||||
@Named(ModuleNames.RelatedTweetsLogger) relatedTweetsScribeLogger: Logger) {
|
||||
|
||||
private val scopedStats = statsReceiver.scope("RelatedTweetsScribeLogger")
|
||||
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
|
||||
private val topLevelApiNoUserIdStats = scopedStats.scope("TopLevelApiNoUserId")
|
||||
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
|
||||
private val upperFunnelsNoUserIdStats = scopedStats.scope("UpperFunnelsNoUserId")
|
||||
|
||||
def scribeInitialCandidates(
|
||||
query: RelatedTweetCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
RelatedTweetScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertFetchCandidatesResult
|
||||
)
|
||||
}
|
||||
|
||||
def scribePreRankFilterCandidates(
|
||||
query: RelatedTweetCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
RelatedTweetScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertPreRankFilterResult
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Scribe Top Level API Request / Response and performance metrics
|
||||
* for the getRelatedTweets endpoint.
|
||||
*/
|
||||
def scribeGetRelatedTweets(
|
||||
request: RelatedTweetRequest,
|
||||
startTime: Long,
|
||||
relatedTweetScribeMetadata: RelatedTweetScribeMetadata,
|
||||
getResultFn: => Future[RelatedTweetResponse]
|
||||
): Future[RelatedTweetResponse] = {
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { response =>
|
||||
relatedTweetScribeMetadata.clientContext.userId match {
|
||||
case Some(userId) =>
|
||||
if (decider.isAvailableForId(userId, DeciderConstants.upperFunnelPerStepScribeRate)) {
|
||||
topLevelApiStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = convertTopLevelAPIResult(request, response, startTime)
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg =
|
||||
buildScribeMessage(result, relatedTweetScribeMetadata, latencyMs, traceId)
|
||||
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
case _ =>
|
||||
topLevelApiNoUserIdStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scribe Per-step intermediate results and performance metrics
|
||||
* for each step: fetch candidates, filters.
|
||||
*/
|
||||
private def scribeResultsAndPerformanceMetrics[T](
|
||||
relatedTweetScribeMetadata: RelatedTweetScribeMetadata,
|
||||
getResultFn: => Future[T],
|
||||
convertToResultFn: (T, UserId) => RelatedTweetResult
|
||||
): Future[T] = {
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { input =>
|
||||
relatedTweetScribeMetadata.clientContext.userId match {
|
||||
case Some(userId) =>
|
||||
if (decider.isAvailableForId(userId, DeciderConstants.upperFunnelPerStepScribeRate)) {
|
||||
upperFunnelsStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = convertToResultFn(input, userId)
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg =
|
||||
buildScribeMessage(result, relatedTweetScribeMetadata, latencyMs, traceId)
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
case _ =>
|
||||
upperFunnelsNoUserIdStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def convertTopLevelAPIResult(
|
||||
request: RelatedTweetRequest,
|
||||
response: RelatedTweetResponse,
|
||||
startTime: Long
|
||||
): RelatedTweetResult = {
|
||||
RelatedTweetResult.RelatedTweetTopLevelApiResult(
|
||||
RelatedTweetTopLevelApiResult(
|
||||
timestamp = startTime,
|
||||
request = request,
|
||||
response = response
|
||||
))
|
||||
}
|
||||
|
||||
private def convertFetchCandidatesResult(
|
||||
candidatesSeq: Seq[Seq[InitialCandidate]],
|
||||
requestUserId: UserId
|
||||
): RelatedTweetResult = {
|
||||
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
|
||||
candidates.map { candidate =>
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = candidate.tweetId,
|
||||
candidateGenerationKey = None
|
||||
) // do not hydrate candidateGenerationKey to save cost
|
||||
}
|
||||
}
|
||||
RelatedTweetResult.FetchCandidatesResult(
|
||||
FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
private def convertPreRankFilterResult(
|
||||
candidatesSeq: Seq[Seq[InitialCandidate]],
|
||||
requestUserId: UserId
|
||||
): RelatedTweetResult = {
|
||||
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
|
||||
candidates.map { candidate =>
|
||||
val candidateGenerationKey =
|
||||
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = candidate.tweetId,
|
||||
candidateGenerationKey = Some(candidateGenerationKey),
|
||||
authorId = Some(candidate.tweetInfo.authorId),
|
||||
score = Some(candidate.getSimilarityScore),
|
||||
numCandidateGenerationKeys = None
|
||||
)
|
||||
}
|
||||
}
|
||||
RelatedTweetResult.PreRankFilterResult(PreRankFilterResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
private def buildScribeMessage(
|
||||
relatedTweetResult: RelatedTweetResult,
|
||||
relatedTweetScribeMetadata: RelatedTweetScribeMetadata,
|
||||
latencyMs: Long,
|
||||
traceId: Long
|
||||
): GetRelatedTweetsScribe = {
|
||||
GetRelatedTweetsScribe(
|
||||
uuid = relatedTweetScribeMetadata.requestUUID,
|
||||
internalId = relatedTweetScribeMetadata.internalId,
|
||||
relatedTweetResult = relatedTweetResult,
|
||||
requesterId = relatedTweetScribeMetadata.clientContext.userId,
|
||||
guestId = relatedTweetScribeMetadata.clientContext.guestId,
|
||||
traceId = Some(traceId),
|
||||
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
|
||||
impressedBuckets = getImpressedBuckets(scopedStats)
|
||||
)
|
||||
}
|
||||
|
||||
private def scribeResult(
|
||||
scribeMsg: GetRelatedTweetsScribe
|
||||
): Unit = {
|
||||
publish(logger = relatedTweetsScribeLogger, codec = GetRelatedTweetsScribe, message = scribeMsg)
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,43 +0,0 @@
|
||||
package com.twitter.cr_mixer.logging
|
||||
|
||||
import com.twitter.cr_mixer.featureswitch.CrMixerImpressedBuckets
|
||||
import com.twitter.cr_mixer.thriftscala.ImpressesedBucketInfo
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.scrooge.BinaryThriftStructSerializer
|
||||
import com.twitter.scrooge.ThriftStruct
|
||||
import com.twitter.scrooge.ThriftStructCodec
|
||||
|
||||
object ScribeLoggerUtils {
|
||||
|
||||
/**
|
||||
* Handles base64-encoding, serialization, and publish.
|
||||
*/
|
||||
private[logging] def publish[T <: ThriftStruct](
|
||||
logger: Logger,
|
||||
codec: ThriftStructCodec[T],
|
||||
message: T
|
||||
): Unit = {
|
||||
logger.info(BinaryThriftStructSerializer(codec).toString(message))
|
||||
}
|
||||
|
||||
private[logging] def getImpressedBuckets(
|
||||
scopedStats: StatsReceiver
|
||||
): Option[List[ImpressesedBucketInfo]] = {
|
||||
StatsUtil.trackNonFutureBlockStats(scopedStats.scope("getImpressedBuckets")) {
|
||||
CrMixerImpressedBuckets.getAllImpressedBuckets.map { listBuckets =>
|
||||
val listBucketsSet = listBuckets.toSet
|
||||
scopedStats.stat("impressed_buckets").add(listBucketsSet.size)
|
||||
listBucketsSet.map { bucket =>
|
||||
ImpressesedBucketInfo(
|
||||
experimentId = bucket.experiment.settings.experimentId.getOrElse(-1L),
|
||||
bucketName = bucket.name,
|
||||
version = bucket.experiment.settings.version,
|
||||
)
|
||||
}.toList
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Binary file not shown.
@ -1,45 +0,0 @@
|
||||
package com.twitter.cr_mixer.logging
|
||||
|
||||
import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.thriftscala.Product
|
||||
import com.twitter.product_mixer.core.thriftscala.ClientContext
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
|
||||
case class ScribeMetadata(
|
||||
requestUUID: Long,
|
||||
userId: UserId,
|
||||
product: Product)
|
||||
|
||||
object ScribeMetadata {
|
||||
def from(query: CrCandidateGeneratorQuery): ScribeMetadata = {
|
||||
ScribeMetadata(query.requestUUID, query.userId, query.product)
|
||||
}
|
||||
|
||||
def from(query: UtegTweetCandidateGeneratorQuery): ScribeMetadata = {
|
||||
ScribeMetadata(query.requestUUID, query.userId, query.product)
|
||||
}
|
||||
|
||||
def from(query: AdsCandidateGeneratorQuery): ScribeMetadata = {
|
||||
ScribeMetadata(query.requestUUID, query.userId, query.product)
|
||||
}
|
||||
}
|
||||
|
||||
case class RelatedTweetScribeMetadata(
|
||||
requestUUID: Long,
|
||||
internalId: InternalId,
|
||||
clientContext: ClientContext,
|
||||
product: Product)
|
||||
|
||||
object RelatedTweetScribeMetadata {
|
||||
def from(query: RelatedTweetCandidateGeneratorQuery): RelatedTweetScribeMetadata = {
|
||||
RelatedTweetScribeMetadata(
|
||||
query.requestUUID,
|
||||
query.internalId,
|
||||
query.clientContext,
|
||||
query.product)
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,22 +0,0 @@
|
||||
package com.twitter.cr_mixer
|
||||
package logging
|
||||
|
||||
import com.twitter.cr_mixer.thriftscala.CrMixerTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.Product
|
||||
|
||||
case class TopLevelDdgMetricsMetadata(
|
||||
userId: Option[Long],
|
||||
product: Product,
|
||||
clientApplicationId: Option[Long],
|
||||
countryCode: Option[String])
|
||||
|
||||
object TopLevelDdgMetricsMetadata {
|
||||
def from(request: CrMixerTweetRequest): TopLevelDdgMetricsMetadata = {
|
||||
TopLevelDdgMetricsMetadata(
|
||||
userId = request.clientContext.userId,
|
||||
product = request.product,
|
||||
clientApplicationId = request.clientContext.appId,
|
||||
countryCode = request.clientContext.countryCode
|
||||
)
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,147 +0,0 @@
|
||||
package com.twitter.cr_mixer.logging
|
||||
|
||||
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
|
||||
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.param.decider.DeciderConstants
|
||||
import com.twitter.cr_mixer.thriftscala.UtegTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.UtegTweetResponse
|
||||
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
|
||||
import com.twitter.cr_mixer.thriftscala.GetUtegTweetsScribe
|
||||
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
|
||||
import com.twitter.cr_mixer.thriftscala.UtegTweetResult
|
||||
import com.twitter.cr_mixer.thriftscala.UtegTweetTopLevelApiResult
|
||||
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.tracing.Trace
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Stopwatch
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
case class UtegTweetScribeLogger @Inject() (
|
||||
decider: CrMixerDecider,
|
||||
statsReceiver: StatsReceiver,
|
||||
@Named(ModuleNames.UtegTweetsLogger) utegTweetScribeLogger: Logger) {
|
||||
|
||||
private val scopedStats = statsReceiver.scope("UtegTweetScribeLogger")
|
||||
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
|
||||
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
|
||||
|
||||
def scribeInitialCandidates(
|
||||
query: UtegTweetCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[TweetWithScoreAndSocialProof]]
|
||||
): Future[Seq[TweetWithScoreAndSocialProof]] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
ScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertFetchCandidatesResult
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Scribe Top Level API Request / Response and performance metrics
|
||||
* for the GetUtegTweetRecommendations() endpoint.
|
||||
*/
|
||||
def scribeGetUtegTweetRecommendations(
|
||||
request: UtegTweetRequest,
|
||||
startTime: Long,
|
||||
scribeMetadata: ScribeMetadata,
|
||||
getResultFn: => Future[UtegTweetResponse]
|
||||
): Future[UtegTweetResponse] = {
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { response =>
|
||||
if (decider.isAvailableForId(
|
||||
scribeMetadata.userId,
|
||||
DeciderConstants.upperFunnelPerStepScribeRate)) {
|
||||
topLevelApiStats.counter(scribeMetadata.product.originalName).incr()
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = convertTopLevelAPIResult(request, response, startTime)
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg =
|
||||
buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
|
||||
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def convertTopLevelAPIResult(
|
||||
request: UtegTweetRequest,
|
||||
response: UtegTweetResponse,
|
||||
startTime: Long
|
||||
): UtegTweetResult = {
|
||||
UtegTweetResult.UtegTweetTopLevelApiResult(
|
||||
UtegTweetTopLevelApiResult(
|
||||
timestamp = startTime,
|
||||
request = request,
|
||||
response = response
|
||||
))
|
||||
}
|
||||
|
||||
private def buildScribeMessage(
|
||||
utegTweetResult: UtegTweetResult,
|
||||
scribeMetadata: ScribeMetadata,
|
||||
latencyMs: Long,
|
||||
traceId: Long
|
||||
): GetUtegTweetsScribe = {
|
||||
GetUtegTweetsScribe(
|
||||
uuid = scribeMetadata.requestUUID,
|
||||
userId = scribeMetadata.userId,
|
||||
utegTweetResult = utegTweetResult,
|
||||
traceId = Some(traceId),
|
||||
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
|
||||
impressedBuckets = getImpressedBuckets(scopedStats)
|
||||
)
|
||||
}
|
||||
|
||||
private def scribeResult(
|
||||
scribeMsg: GetUtegTweetsScribe
|
||||
): Unit = {
|
||||
publish(logger = utegTweetScribeLogger, codec = GetUtegTweetsScribe, message = scribeMsg)
|
||||
}
|
||||
|
||||
private def convertFetchCandidatesResult(
|
||||
candidates: Seq[TweetWithScoreAndSocialProof],
|
||||
requestUserId: UserId
|
||||
): UtegTweetResult = {
|
||||
val tweetCandidatesWithMetadata = candidates.map { candidate =>
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = candidate.tweetId,
|
||||
candidateGenerationKey = None
|
||||
) // do not hydrate candidateGenerationKey to save cost
|
||||
}
|
||||
UtegTweetResult.FetchCandidatesResult(FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
/**
|
||||
* Scribe Per-step intermediate results and performance metrics
|
||||
* for each step: fetch candidates, filters.
|
||||
*/
|
||||
private def scribeResultsAndPerformanceMetrics[T](
|
||||
scribeMetadata: ScribeMetadata,
|
||||
getResultFn: => Future[T],
|
||||
convertToResultFn: (T, UserId) => UtegTweetResult
|
||||
): Future[T] = {
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { input =>
|
||||
if (decider.isAvailableForId(
|
||||
scribeMetadata.userId,
|
||||
DeciderConstants.upperFunnelPerStepScribeRate)) {
|
||||
upperFunnelsStats.counter(scribeMetadata.product.originalName).incr()
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = convertToResultFn(input, scribeMetadata.userId)
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg =
|
||||
buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,16 +0,0 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"configapi/configapi-core",
|
||||
"content-recommender/thrift/src/main/thrift:thrift-scala",
|
||||
"cr-mixer/thrift/src/main/thrift:thrift-scala",
|
||||
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
|
||||
"src/thrift/com/twitter/recos:recos-common-scala",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
],
|
||||
)
|
Binary file not shown.
Binary file not shown.
@ -1,200 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
import com.twitter.contentrecommender.thriftscala.TweetInfo
|
||||
import com.twitter.cr_mixer.thriftscala.LineItemInfo
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
|
||||
sealed trait Candidate {
|
||||
val tweetId: TweetId
|
||||
|
||||
override def hashCode: Int = tweetId.toInt
|
||||
}
|
||||
|
||||
case class TweetWithCandidateGenerationInfo(
|
||||
tweetId: TweetId,
|
||||
candidateGenerationInfo: CandidateGenerationInfo)
|
||||
extends Candidate {
|
||||
|
||||
def getSimilarityScore: Double =
|
||||
candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0)
|
||||
}
|
||||
|
||||
case class InitialCandidate(
|
||||
tweetId: TweetId,
|
||||
tweetInfo: TweetInfo,
|
||||
candidateGenerationInfo: CandidateGenerationInfo)
|
||||
extends Candidate {
|
||||
|
||||
/** *
|
||||
* Get the Similarity Score of a Tweet from its CG Info. For instance,
|
||||
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
|
||||
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
|
||||
*/
|
||||
def getSimilarityScore: Double =
|
||||
candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0)
|
||||
|
||||
/**
|
||||
* The same candidate can be generated by multiple algorithms.
|
||||
* During blending, candidate deduping happens. In order to retain the candidateGenerationInfo
|
||||
* from different algorithms, we attach them to a list of potentialReasons.
|
||||
*/
|
||||
def toBlendedCandidate(
|
||||
potentialReasons: Seq[CandidateGenerationInfo],
|
||||
): BlendedCandidate = {
|
||||
BlendedCandidate(
|
||||
tweetId,
|
||||
tweetInfo,
|
||||
candidateGenerationInfo,
|
||||
potentialReasons,
|
||||
)
|
||||
}
|
||||
|
||||
// for experimental purposes only when bypassing interleave / ranking
|
||||
def toRankedCandidate(): RankedCandidate = {
|
||||
RankedCandidate(
|
||||
tweetId,
|
||||
tweetInfo,
|
||||
0.0, // prediction score is default to 0.0 to help differentiate that it is a no-op
|
||||
candidateGenerationInfo,
|
||||
Seq(candidateGenerationInfo)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
case class InitialAdsCandidate(
|
||||
tweetId: TweetId,
|
||||
lineItemInfo: Seq[LineItemInfo],
|
||||
candidateGenerationInfo: CandidateGenerationInfo)
|
||||
extends Candidate {
|
||||
|
||||
/** *
|
||||
* Get the Similarity Score of a Tweet from its CG Info. For instance,
|
||||
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
|
||||
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
|
||||
*/
|
||||
def getSimilarityScore: Double =
|
||||
candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0)
|
||||
|
||||
/**
|
||||
* The same candidate can be generated by multiple algorithms.
|
||||
* During blending, candidate deduping happens. In order to retain the candidateGenerationInfo
|
||||
* from different algorithms, we attach them to a list of potentialReasons.
|
||||
*/
|
||||
def toBlendedAdsCandidate(
|
||||
potentialReasons: Seq[CandidateGenerationInfo],
|
||||
): BlendedAdsCandidate = {
|
||||
BlendedAdsCandidate(
|
||||
tweetId,
|
||||
lineItemInfo,
|
||||
candidateGenerationInfo,
|
||||
potentialReasons,
|
||||
)
|
||||
}
|
||||
|
||||
// for experimental purposes only when bypassing interleave / ranking
|
||||
def toRankedAdsCandidate(): RankedAdsCandidate = {
|
||||
RankedAdsCandidate(
|
||||
tweetId,
|
||||
lineItemInfo,
|
||||
0.0, // prediction score is default to 0.0 to help differentiate that it is a no-op
|
||||
candidateGenerationInfo,
|
||||
Seq(candidateGenerationInfo)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
case class BlendedCandidate(
|
||||
tweetId: TweetId,
|
||||
tweetInfo: TweetInfo,
|
||||
reasonChosen: CandidateGenerationInfo,
|
||||
potentialReasons: Seq[CandidateGenerationInfo])
|
||||
extends Candidate {
|
||||
|
||||
/** *
|
||||
* Get the Similarity Score of a Tweet from its CG Info. For instance,
|
||||
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
|
||||
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
|
||||
*/
|
||||
def getSimilarityScore: Double =
|
||||
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
|
||||
|
||||
assert(potentialReasons.contains(reasonChosen))
|
||||
|
||||
def toRankedCandidate(predictionScore: Double): RankedCandidate = {
|
||||
RankedCandidate(
|
||||
tweetId,
|
||||
tweetInfo,
|
||||
predictionScore,
|
||||
reasonChosen,
|
||||
potentialReasons
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
case class BlendedAdsCandidate(
|
||||
tweetId: TweetId,
|
||||
lineItemInfo: Seq[LineItemInfo],
|
||||
reasonChosen: CandidateGenerationInfo,
|
||||
potentialReasons: Seq[CandidateGenerationInfo])
|
||||
extends Candidate {
|
||||
|
||||
/** *
|
||||
* Get the Similarity Score of a Tweet from its CG Info. For instance,
|
||||
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
|
||||
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
|
||||
*/
|
||||
def getSimilarityScore: Double =
|
||||
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
|
||||
|
||||
assert(potentialReasons.contains(reasonChosen))
|
||||
|
||||
def toRankedAdsCandidate(predictionScore: Double): RankedAdsCandidate = {
|
||||
RankedAdsCandidate(
|
||||
tweetId,
|
||||
lineItemInfo,
|
||||
predictionScore,
|
||||
reasonChosen,
|
||||
potentialReasons
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
case class RankedCandidate(
|
||||
tweetId: TweetId,
|
||||
tweetInfo: TweetInfo,
|
||||
predictionScore: Double,
|
||||
reasonChosen: CandidateGenerationInfo,
|
||||
potentialReasons: Seq[CandidateGenerationInfo])
|
||||
extends Candidate {
|
||||
|
||||
/** *
|
||||
* Get the Similarity Score of a Tweet from its CG Info. For instance,
|
||||
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
|
||||
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
|
||||
*/
|
||||
def getSimilarityScore: Double =
|
||||
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
|
||||
|
||||
assert(potentialReasons.contains(reasonChosen))
|
||||
}
|
||||
|
||||
case class RankedAdsCandidate(
|
||||
tweetId: TweetId,
|
||||
lineItemInfo: Seq[LineItemInfo],
|
||||
predictionScore: Double,
|
||||
reasonChosen: CandidateGenerationInfo,
|
||||
potentialReasons: Seq[CandidateGenerationInfo])
|
||||
extends Candidate {
|
||||
|
||||
/** *
|
||||
* Get the Similarity Score of a Tweet from its CG Info. For instance,
|
||||
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
|
||||
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
|
||||
*/
|
||||
def getSimilarityScore: Double =
|
||||
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
|
||||
|
||||
assert(potentialReasons.contains(reasonChosen))
|
||||
}
|
||||
|
||||
case class TripTweetWithScore(tweetId: TweetId, score: Double) extends Candidate
|
Binary file not shown.
@ -1,67 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
|
||||
import com.twitter.cr_mixer.thriftscala.SourceType
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.util.Time
|
||||
|
||||
/***
|
||||
* Tweet-level attributes. Represents the source used in candidate generation
|
||||
* Due to legacy reason, SourceType used to represent both SourceType and SimilarityEngineType
|
||||
* Moving forward, SourceType will be used for SourceType ONLY. eg., TweetFavorite, UserFollow, TwiceUserId
|
||||
* At the same time, We create a new SimilarityEngineType to separate them. eg., SimClustersANN
|
||||
*
|
||||
* Currently, one special case is that we have TwiceUserId as a source, which is not necessarily a "signal"
|
||||
* @param sourceType, e.g., SourceType.TweetFavorite, SourceType.UserFollow, SourceType.TwiceUserId
|
||||
* @param internalId, e.g., UserId(0L), TweetId(0L)
|
||||
*/
|
||||
case class SourceInfo(
|
||||
sourceType: SourceType,
|
||||
internalId: InternalId,
|
||||
sourceEventTime: Option[Time])
|
||||
|
||||
/***
|
||||
* Tweet-level attributes. Represents the source User Graph used in candidate generation
|
||||
* It is an intermediate product, and will not be stored, unlike SourceInfo.
|
||||
* Essentially, CrMixer queries a graph, and the graph returns a list of users to be used as sources.
|
||||
* For instance, RealGraph, EarlyBird, FRS, Stp, etc. The underlying similarity engines such as
|
||||
* UTG or UTEG will leverage these sources to build candidates.
|
||||
*
|
||||
* We extended the definition of SourceType to cover both "Source Signal" and "Source Graph"
|
||||
* See [CrMixer] Graph Based Source Fetcher Abstraction Proposal:
|
||||
*
|
||||
* consider making both SourceInfo and GraphSourceInfo extends the same trait to
|
||||
* have a unified interface.
|
||||
*/
|
||||
case class GraphSourceInfo(
|
||||
sourceType: SourceType,
|
||||
seedWithScores: Map[UserId, Double])
|
||||
|
||||
/***
|
||||
* Tweet-level attributes. Represents the similarity engine (the algorithm) used for
|
||||
* candidate generation along with their metadata.
|
||||
* @param similarityEngineType, e.g., SimClustersANN, UserTweetGraph
|
||||
* @param modelId. e.g., UserTweetGraphConsumerEmbedding_ALL_20210708
|
||||
* @param score - a score generated by this sim engine
|
||||
*/
|
||||
case class SimilarityEngineInfo(
|
||||
similarityEngineType: SimilarityEngineType,
|
||||
modelId: Option[String], // ModelId can be a None. e.g., UTEG, UnifiedTweetBasedSE. etc
|
||||
score: Option[Double])
|
||||
|
||||
/****
|
||||
* Tweet-level attributes. A combination for both SourceInfo and SimilarityEngineInfo
|
||||
* SimilarityEngine is a composition, and it can be composed by many leaf Similarity Engines.
|
||||
* For instance, the TweetBasedUnified SE could be a composition of both UserTweetGraph SE, SimClustersANN SE.
|
||||
* Note that a SimilarityEngine (Composite) may call other SimilarityEngines (Atomic, Contributing)
|
||||
* to contribute to its final candidate list. We track these Contributing SEs in the contributingSimilarityEngines list
|
||||
*
|
||||
* @param sourceInfoOpt - this is optional as many consumerBased CG does not have a source
|
||||
* @param similarityEngineInfo - the similarity engine used in Candidate Generation (eg., TweetBasedUnifiedSE). It can be an atomic SE or an composite SE
|
||||
* @param contributingSimilarityEngines - only composite SE will have it (e.g., SANNN, UTG). Otherwise it is an empty Seq. All contributing SEs mst be atomic
|
||||
*/
|
||||
case class CandidateGenerationInfo(
|
||||
sourceInfoOpt: Option[SourceInfo],
|
||||
similarityEngineInfo: SimilarityEngineInfo,
|
||||
contributingSimilarityEngines: Seq[SimilarityEngineInfo])
|
Binary file not shown.
@ -1,96 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
import com.twitter.core_workflows.user_model.thriftscala.UserState
|
||||
import com.twitter.cr_mixer.thriftscala.Product
|
||||
import com.twitter.product_mixer.core.thriftscala.ClientContext
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.simclusters_v2.thriftscala.TopicId
|
||||
import com.twitter.timelines.configapi.Params
|
||||
|
||||
sealed trait CandidateGeneratorQuery {
|
||||
val product: Product
|
||||
val maxNumResults: Int
|
||||
val impressedTweetList: Set[TweetId]
|
||||
val params: Params
|
||||
val requestUUID: Long
|
||||
}
|
||||
|
||||
sealed trait HasUserId {
|
||||
val userId: UserId
|
||||
}
|
||||
|
||||
case class CrCandidateGeneratorQuery(
|
||||
userId: UserId,
|
||||
product: Product,
|
||||
userState: UserState,
|
||||
maxNumResults: Int,
|
||||
impressedTweetList: Set[TweetId],
|
||||
params: Params,
|
||||
requestUUID: Long,
|
||||
languageCode: Option[String] = None)
|
||||
extends CandidateGeneratorQuery
|
||||
with HasUserId
|
||||
|
||||
case class UtegTweetCandidateGeneratorQuery(
|
||||
userId: UserId,
|
||||
product: Product,
|
||||
userState: UserState,
|
||||
maxNumResults: Int,
|
||||
impressedTweetList: Set[TweetId],
|
||||
params: Params,
|
||||
requestUUID: Long)
|
||||
extends CandidateGeneratorQuery
|
||||
with HasUserId
|
||||
|
||||
case class RelatedTweetCandidateGeneratorQuery(
|
||||
internalId: InternalId,
|
||||
clientContext: ClientContext, // To scribe LogIn/LogOut requests
|
||||
product: Product,
|
||||
maxNumResults: Int,
|
||||
impressedTweetList: Set[TweetId],
|
||||
params: Params,
|
||||
requestUUID: Long)
|
||||
extends CandidateGeneratorQuery
|
||||
|
||||
case class RelatedVideoTweetCandidateGeneratorQuery(
|
||||
internalId: InternalId,
|
||||
clientContext: ClientContext, // To scribe LogIn/LogOut requests
|
||||
product: Product,
|
||||
maxNumResults: Int,
|
||||
impressedTweetList: Set[TweetId],
|
||||
params: Params,
|
||||
requestUUID: Long)
|
||||
extends CandidateGeneratorQuery
|
||||
|
||||
case class FrsTweetCandidateGeneratorQuery(
|
||||
userId: UserId,
|
||||
product: Product,
|
||||
maxNumResults: Int,
|
||||
impressedUserList: Set[UserId],
|
||||
impressedTweetList: Set[TweetId],
|
||||
params: Params,
|
||||
languageCodeOpt: Option[String] = None,
|
||||
countryCodeOpt: Option[String] = None,
|
||||
requestUUID: Long)
|
||||
extends CandidateGeneratorQuery
|
||||
|
||||
case class AdsCandidateGeneratorQuery(
|
||||
userId: UserId,
|
||||
product: Product,
|
||||
userState: UserState,
|
||||
maxNumResults: Int,
|
||||
params: Params,
|
||||
requestUUID: Long)
|
||||
|
||||
case class TopicTweetCandidateGeneratorQuery(
|
||||
userId: UserId,
|
||||
topicIds: Set[TopicId],
|
||||
product: Product,
|
||||
maxNumResults: Int,
|
||||
impressedTweetList: Set[TweetId],
|
||||
params: Params,
|
||||
requestUUID: Long,
|
||||
isVideoOnly: Boolean)
|
||||
extends CandidateGeneratorQuery
|
Binary file not shown.
@ -1,6 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
sealed trait EarlybirdSimilarityEngineType
|
||||
object EarlybirdSimilarityEngineType_RecencyBased extends EarlybirdSimilarityEngineType
|
||||
object EarlybirdSimilarityEngineType_ModelBased extends EarlybirdSimilarityEngineType
|
||||
object EarlybirdSimilarityEngineType_TensorflowBased extends EarlybirdSimilarityEngineType
|
Binary file not shown.
@ -1,11 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
object HealthThreshold {
|
||||
object Enum extends Enumeration {
|
||||
val Off: Value = Value(1)
|
||||
val Moderate: Value = Value(2)
|
||||
val Strict: Value = Value(3)
|
||||
val Stricter: Value = Value(4)
|
||||
val StricterPlus: Value = Value(5)
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,77 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
/**
|
||||
* A Configuration class for all Model Based Candidate Sources.
|
||||
*
|
||||
* The Model Name Guideline. Please your modelId as "Algorithm_Product_Date"
|
||||
* If your model is used for multiple product surfaces, name it as all
|
||||
* Don't name your algorithm as MBCG. All the algorithms here are MBCG =.=
|
||||
*
|
||||
* Don't forgot to add your new models into allHnswANNSimilarityEngineModelIds list.
|
||||
*/
|
||||
object ModelConfig {
|
||||
// Offline SimClusters CG Experiment related Model Ids
|
||||
val OfflineInterestedInFromKnownFor2020: String = "OfflineIIKF_ALL_20220414"
|
||||
val OfflineInterestedInFromKnownFor2020Hl0El15: String = "OfflineIIKF_ALL_20220414_Hl0_El15"
|
||||
val OfflineInterestedInFromKnownFor2020Hl2El15: String = "OfflineIIKF_ALL_20220414_Hl2_El15"
|
||||
val OfflineInterestedInFromKnownFor2020Hl2El50: String = "OfflineIIKF_ALL_20220414_Hl2_El50"
|
||||
val OfflineInterestedInFromKnownFor2020Hl8El50: String = "OfflineIIKF_ALL_20220414_Hl8_El50"
|
||||
val OfflineMTSConsumerEmbeddingsFav90P20M: String =
|
||||
"OfflineMTSConsumerEmbeddingsFav90P20M_ALL_20220414"
|
||||
|
||||
// Twhin Model Ids
|
||||
val ConsumerBasedTwHINRegularUpdateAll20221024: String =
|
||||
"ConsumerBasedTwHINRegularUpdate_All_20221024"
|
||||
|
||||
// Averaged Twhin Model Ids
|
||||
val TweetBasedTwHINRegularUpdateAll20221024: String =
|
||||
"TweetBasedTwHINRegularUpdate_All_20221024"
|
||||
|
||||
// Collaborative Filtering Twhin Model Ids
|
||||
val TwhinCollabFilterForFollow: String =
|
||||
"TwhinCollabFilterForFollow"
|
||||
val TwhinCollabFilterForEngagement: String =
|
||||
"TwhinCollabFilterForEngagement"
|
||||
val TwhinMultiClusterForFollow: String =
|
||||
"TwhinMultiClusterForFollow"
|
||||
val TwhinMultiClusterForEngagement: String =
|
||||
"TwhinMultiClusterForEngagement"
|
||||
|
||||
// Two Tower model Ids
|
||||
val TwoTowerFavALL20220808: String =
|
||||
"TwoTowerFav_ALL_20220808"
|
||||
|
||||
// Debugger Demo-Only Model Ids
|
||||
val DebuggerDemo: String = "DebuggerDemo"
|
||||
|
||||
// ColdStartLookalike - this is not really a model name, it is as a placeholder to
|
||||
// indicate ColdStartLookalike candidate source, which is currently being pluged into
|
||||
// CustomizedRetrievalCandidateGeneration temporarily.
|
||||
val ColdStartLookalikeModelName: String = "ConsumersBasedUtgColdStartLookalike20220707"
|
||||
|
||||
// consumersBasedUTG-RealGraphOon Model Id
|
||||
val ConsumersBasedUtgRealGraphOon20220705: String = "ConsumersBasedUtgRealGraphOon_All_20220705"
|
||||
// consumersBasedUAG-RealGraphOon Model Id
|
||||
val ConsumersBasedUagRealGraphOon20221205: String = "ConsumersBasedUagRealGraphOon_All_20221205"
|
||||
|
||||
// FTR
|
||||
val OfflineFavDecayedSum: String = "OfflineFavDecayedSum"
|
||||
val OfflineFtrAt5Pop1000RnkDcy11: String = "OfflineFtrAt5Pop1000RnkDcy11"
|
||||
val OfflineFtrAt5Pop10000RnkDcy11: String = "OfflineFtrAt5Pop10000RnkDcy11"
|
||||
|
||||
// All Model Ids of HnswANNSimilarityEngines
|
||||
val allHnswANNSimilarityEngineModelIds = Seq(
|
||||
ConsumerBasedTwHINRegularUpdateAll20221024,
|
||||
TwoTowerFavALL20220808,
|
||||
DebuggerDemo
|
||||
)
|
||||
|
||||
val ConsumerLogFavBasedInterestedInEmbedding: String =
|
||||
"ConsumerLogFavBasedInterestedIn_ALL_20221228"
|
||||
val ConsumerFollowBasedInterestedInEmbedding: String =
|
||||
"ConsumerFollowBasedInterestedIn_ALL_20221228"
|
||||
|
||||
val RetweetBasedDiffusion: String =
|
||||
"RetweetBasedDiffusion"
|
||||
|
||||
}
|
Binary file not shown.
@ -1,122 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
/**
|
||||
* Define name annotated module names here
|
||||
*/
|
||||
object ModuleNames {
|
||||
|
||||
final val FrsStore = "FrsStore"
|
||||
final val UssStore = "UssStore"
|
||||
final val UssStratoColumn = "UssStratoColumn"
|
||||
final val RsxStore = "RsxStore"
|
||||
final val RmsTweetLogFavLongestL2EmbeddingStore = "RmsTweetLogFavLongestL2EmbeddingStore"
|
||||
final val RmsUserFavBasedProducerEmbeddingStore = "RmsUserFavBasedProducerEmbeddingStore"
|
||||
final val RmsUserLogFavInterestedInEmbeddingStore = "RmsUserLogFavInterestedInEmbeddingStore"
|
||||
final val RmsUserFollowInterestedInEmbeddingStore = "RmsUserFollowInterestedInEmbeddingStore"
|
||||
final val StpStore = "StpStore"
|
||||
final val TwiceClustersMembersStore = "TwiceClustersMembersStore"
|
||||
final val TripCandidateStore = "TripCandidateStore"
|
||||
|
||||
final val ConsumerEmbeddingBasedTripSimilarityEngine =
|
||||
"ConsumerEmbeddingBasedTripSimilarityEngine"
|
||||
final val ConsumerEmbeddingBasedTwHINANNSimilarityEngine =
|
||||
"ConsumerEmbeddingBasedTwHINANNSimilarityEngine"
|
||||
final val ConsumerEmbeddingBasedTwoTowerANNSimilarityEngine =
|
||||
"ConsumerEmbeddingBasedTwoTowerANNSimilarityEngine"
|
||||
final val ConsumersBasedUserAdGraphSimilarityEngine =
|
||||
"ConsumersBasedUserAdGraphSimilarityEngine"
|
||||
final val ConsumersBasedUserVideoGraphSimilarityEngine =
|
||||
"ConsumersBasedUserVideoGraphSimilarityEngine"
|
||||
|
||||
final val ConsumerBasedWalsSimilarityEngine = "ConsumerBasedWalsSimilarityEngine"
|
||||
|
||||
final val TweetBasedTwHINANNSimilarityEngine = "TweetBasedTwHINANNSimilarityEngine"
|
||||
|
||||
final val SimClustersANNSimilarityEngine = "SimClustersANNSimilarityEngine"
|
||||
|
||||
final val ProdSimClustersANNServiceClientName = "ProdSimClustersANNServiceClient"
|
||||
final val ExperimentalSimClustersANNServiceClientName = "ExperimentalSimClustersANNServiceClient"
|
||||
final val SimClustersANNServiceClientName1 = "SimClustersANNServiceClient1"
|
||||
final val SimClustersANNServiceClientName2 = "SimClustersANNServiceClient2"
|
||||
final val SimClustersANNServiceClientName3 = "SimClustersANNServiceClient3"
|
||||
final val SimClustersANNServiceClientName5 = "SimClustersANNServiceClient5"
|
||||
final val SimClustersANNServiceClientName4 = "SimClustersANNServiceClient4"
|
||||
final val UnifiedCache = "unifiedCache"
|
||||
final val MLScoreCache = "mlScoreCache"
|
||||
final val TweetRecommendationResultsCache = "tweetRecommendationResultsCache"
|
||||
final val EarlybirdTweetsCache = "earlybirdTweetsCache"
|
||||
final val EarlybirdRecencyBasedWithoutRetweetsRepliesTweetsCache =
|
||||
"earlybirdTweetsWithoutRetweetsRepliesCacheStore"
|
||||
final val EarlybirdRecencyBasedWithRetweetsRepliesTweetsCache =
|
||||
"earlybirdTweetsWithRetweetsRepliesCacheStore"
|
||||
|
||||
final val AbDeciderLogger = "abDeciderLogger"
|
||||
final val TopLevelApiDdgMetricsLogger = "topLevelApiDdgMetricsLogger"
|
||||
final val TweetRecsLogger = "tweetRecsLogger"
|
||||
final val BlueVerifiedTweetRecsLogger = "blueVerifiedTweetRecsLogger"
|
||||
final val RelatedTweetsLogger = "relatedTweetsLogger"
|
||||
final val UtegTweetsLogger = "utegTweetsLogger"
|
||||
final val AdsRecommendationsLogger = "adsRecommendationLogger"
|
||||
|
||||
final val OfflineSimClustersANNInterestedInSimilarityEngine =
|
||||
"OfflineSimClustersANNInterestedInSimilarityEngine"
|
||||
|
||||
final val RealGraphOonStore = "RealGraphOonStore"
|
||||
final val RealGraphInStore = "RealGraphInStore"
|
||||
|
||||
final val OfflineTweet2020CandidateStore = "OfflineTweet2020CandidateStore"
|
||||
final val OfflineTweet2020Hl0El15CandidateStore = "OfflineTweet2020Hl0El15CandidateStore"
|
||||
final val OfflineTweet2020Hl2El15CandidateStore = "OfflineTweet2020Hl2El15CandidateStore"
|
||||
final val OfflineTweet2020Hl2El50CandidateStore = "OfflineTweet2020Hl2El50CandidateStore"
|
||||
final val OfflineTweet2020Hl8El50CandidateStore = "OfflineTweet2020Hl8El50CandidateStore"
|
||||
final val OfflineTweetMTSCandidateStore = "OfflineTweetMTSCandidateStore"
|
||||
|
||||
final val OfflineFavDecayedSumCandidateStore = "OfflineFavDecayedSumCandidateStore"
|
||||
final val OfflineFtrAt5Pop1000RankDecay11CandidateStore =
|
||||
"OfflineFtrAt5Pop1000RankDecay11CandidateStore"
|
||||
final val OfflineFtrAt5Pop10000RankDecay11CandidateStore =
|
||||
"OfflineFtrAt5Pop10000RankDecay11CandidateStore"
|
||||
|
||||
final val TwhinCollabFilterStratoStoreForFollow = "TwhinCollabFilterStratoStoreForFollow"
|
||||
final val TwhinCollabFilterStratoStoreForEngagement = "TwhinCollabFilterStratoStoreForEngagement"
|
||||
final val TwhinMultiClusterStratoStoreForFollow = "TwhinMultiClusterStratoStoreForFollow"
|
||||
final val TwhinMultiClusterStratoStoreForEngagement = "TwhinMultiClusterStratoStoreForEngagement"
|
||||
|
||||
final val ProducerBasedUserAdGraphSimilarityEngine =
|
||||
"ProducerBasedUserAdGraphSimilarityEngine"
|
||||
final val ProducerBasedUserTweetGraphSimilarityEngine =
|
||||
"ProducerBasedUserTweetGraphSimilarityEngine"
|
||||
final val ProducerBasedUnifiedSimilarityEngine = "ProducerBasedUnifiedSimilarityEngine"
|
||||
|
||||
final val TweetBasedUserAdGraphSimilarityEngine = "TweetBasedUserAdGraphSimilarityEngine"
|
||||
final val TweetBasedUserTweetGraphSimilarityEngine = "TweetBasedUserTweetGraphSimilarityEngine"
|
||||
final val TweetBasedUserVideoGraphSimilarityEngine = "TweetBasedUserVideoGraphSimilarityEngine"
|
||||
final val TweetBasedQigSimilarityEngine = "TweetBasedQigSimilarityEngine"
|
||||
final val TweetBasedUnifiedSimilarityEngine = "TweetBasedUnifiedSimilarityEngine"
|
||||
|
||||
final val TwhinCollabFilterSimilarityEngine = "TwhinCollabFilterSimilarityEngine"
|
||||
|
||||
final val ConsumerBasedUserTweetGraphStore = "ConsumerBasedUserTweetGraphStore"
|
||||
final val ConsumerBasedUserVideoGraphStore = "ConsumerBasedUserVideoGraphStore"
|
||||
final val ConsumerBasedUserAdGraphStore = "ConsumerBasedUserAdGraphStore"
|
||||
|
||||
final val UserTweetEntityGraphSimilarityEngine =
|
||||
"UserTweetEntityGraphSimilarityEngine"
|
||||
|
||||
final val CertoTopicTweetSimilarityEngine = "CertoTopicTweetSimilarityEngine"
|
||||
final val CertoStratoStoreName = "CertoStratoStore"
|
||||
|
||||
final val SkitTopicTweetSimilarityEngine = "SkitTopicTweetSimilarityEngine"
|
||||
final val SkitHighPrecisionTopicTweetSimilarityEngine =
|
||||
"SkitHighPrecisionTopicTweetSimilarityEngine"
|
||||
final val SkitStratoStoreName = "SkitStratoStore"
|
||||
|
||||
final val HomeNaviGRPCClient = "HomeNaviGRPCClient"
|
||||
final val AdsFavedNaviGRPCClient = "AdsFavedNaviGRPCClient"
|
||||
final val AdsMonetizableNaviGRPCClient = "AdsMonetizableNaviGRPCClient"
|
||||
|
||||
final val RetweetBasedDiffusionRecsMhStore = "RetweetBasedDiffusionRecsMhStore"
|
||||
final val DiffusionBasedSimilarityEngine = "DiffusionBasedSimilarityEngine"
|
||||
|
||||
final val BlueVerifiedAnnotationStore = "BlueVerifiedAnnotationStore"
|
||||
}
|
Binary file not shown.
@ -1,13 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
|
||||
/***
|
||||
* Bind a tweetId with a raw score generated from one single Similarity Engine
|
||||
* @param similarityEngineType, which underlying topic source the topic tweet is from
|
||||
*/
|
||||
case class TopicTweetWithScore(
|
||||
tweetId: TweetId,
|
||||
score: Double,
|
||||
similarityEngineType: SimilarityEngineType)
|
Binary file not shown.
@ -1,6 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
|
||||
case class TweetWithAuthor(tweetId: TweetId, authorId: UserId)
|
Binary file not shown.
@ -1,8 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
|
||||
/***
|
||||
* Bind a tweetId with a raw score generated from one single Similarity Engine
|
||||
*/
|
||||
case class TweetWithScore(tweetId: TweetId, score: Double)
|
Binary file not shown.
@ -1,12 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.recos.recos_common.thriftscala.SocialProofType
|
||||
|
||||
/***
|
||||
* Bind a tweetId with a raw score and social proofs by type
|
||||
*/
|
||||
case class TweetWithScoreAndSocialProof(
|
||||
tweetId: TweetId,
|
||||
score: Double,
|
||||
socialProofByType: Map[SocialProofType, Seq[Long]])
|
Binary file not shown.
@ -1,135 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.bijection.thrift.CompactThriftCodec
|
||||
import com.twitter.ads.entities.db.thriftscala.LineItemObjective
|
||||
import com.twitter.bijection.Injection
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.thriftscala.LineItemInfo
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.hermit.store.common.ObservedCachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.ml.api.DataRecord
|
||||
import com.twitter.ml.api.DataType
|
||||
import com.twitter.ml.api.Feature
|
||||
import com.twitter.ml.api.GeneralTensor
|
||||
import com.twitter.ml.api.RichDataRecord
|
||||
import com.twitter.relevance_platform.common.injection.LZ4Injection
|
||||
import com.twitter.relevance_platform.common.injection.SeqObjectInjection
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanRO
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
|
||||
import com.twitter.storehaus_internal.manhattan.Revenue
|
||||
import com.twitter.storehaus_internal.util.ApplicationID
|
||||
import com.twitter.storehaus_internal.util.DatasetName
|
||||
import com.twitter.storehaus_internal.util.HDFSPath
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Named
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object ActivePromotedTweetStoreModule extends TwitterModule {
|
||||
|
||||
case class ActivePromotedTweetStore(
|
||||
activePromotedTweetMHStore: ReadableStore[String, DataRecord],
|
||||
statsReceiver: StatsReceiver)
|
||||
extends ReadableStore[TweetId, Seq[LineItemInfo]] {
|
||||
override def get(tweetId: TweetId): Future[Option[Seq[LineItemInfo]]] = {
|
||||
activePromotedTweetMHStore.get(tweetId.toString).map {
|
||||
_.map { dataRecord =>
|
||||
val richDataRecord = new RichDataRecord(dataRecord)
|
||||
val lineItemIdsFeature: Feature[GeneralTensor] =
|
||||
new Feature.Tensor("active_promoted_tweets.line_item_ids", DataType.INT64)
|
||||
|
||||
val lineItemObjectivesFeature: Feature[GeneralTensor] =
|
||||
new Feature.Tensor("active_promoted_tweets.line_item_objectives", DataType.INT64)
|
||||
|
||||
val lineItemIdsTensor: GeneralTensor = richDataRecord.getFeatureValue(lineItemIdsFeature)
|
||||
val lineItemObjectivesTensor: GeneralTensor =
|
||||
richDataRecord.getFeatureValue(lineItemObjectivesFeature)
|
||||
|
||||
val lineItemIds: Seq[Long] =
|
||||
if (lineItemIdsTensor.getSetField == GeneralTensor._Fields.INT64_TENSOR && lineItemIdsTensor.getInt64Tensor.isSetLongs) {
|
||||
lineItemIdsTensor.getInt64Tensor.getLongs.asScala.map(_.toLong)
|
||||
} else Seq.empty
|
||||
|
||||
val lineItemObjectives: Seq[LineItemObjective] =
|
||||
if (lineItemObjectivesTensor.getSetField == GeneralTensor._Fields.INT64_TENSOR && lineItemObjectivesTensor.getInt64Tensor.isSetLongs) {
|
||||
lineItemObjectivesTensor.getInt64Tensor.getLongs.asScala.map(objective =>
|
||||
LineItemObjective(objective.toInt))
|
||||
} else Seq.empty
|
||||
|
||||
val lineItemInfo =
|
||||
if (lineItemIds.size == lineItemObjectives.size) {
|
||||
lineItemIds.zipWithIndex.map {
|
||||
case (lineItemId, index) =>
|
||||
LineItemInfo(
|
||||
lineItemId = lineItemId,
|
||||
lineItemObjective = lineItemObjectives(index)
|
||||
)
|
||||
}
|
||||
} else Seq.empty
|
||||
|
||||
lineItemInfo
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
def providesActivePromotedTweetStore(
|
||||
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
|
||||
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
|
||||
crMixerStatsReceiver: StatsReceiver
|
||||
): ReadableStore[TweetId, Seq[LineItemInfo]] = {
|
||||
|
||||
val mhConfig = new ManhattanROConfig {
|
||||
val hdfsPath = HDFSPath("")
|
||||
val applicationID = ApplicationID("ads_bigquery_features")
|
||||
val datasetName = DatasetName("active_promoted_tweets")
|
||||
val cluster = Revenue
|
||||
|
||||
override def statsReceiver: StatsReceiver =
|
||||
crMixerStatsReceiver.scope("active_promoted_tweets_mh")
|
||||
}
|
||||
val mhStore: ReadableStore[String, DataRecord] =
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[String, DataRecord](
|
||||
mhConfig,
|
||||
manhattanKVClientMtlsParams
|
||||
)(
|
||||
implicitly[Injection[String, Array[Byte]]],
|
||||
CompactThriftCodec[DataRecord]
|
||||
)
|
||||
|
||||
val underlyingStore =
|
||||
ActivePromotedTweetStore(mhStore, crMixerStatsReceiver.scope("ActivePromotedTweetStore"))
|
||||
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 60.minutes,
|
||||
asyncUpdate = false
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(SeqObjectInjection[LineItemInfo]()),
|
||||
statsReceiver = crMixerStatsReceiver.scope("memCachedActivePromotedTweetStore"),
|
||||
keyToString = { k: TweetId => s"apt/$k" }
|
||||
)
|
||||
|
||||
ObservedCachedReadableStore.from(
|
||||
memcachedStore,
|
||||
ttl = 30.minutes,
|
||||
maxKeys = 250000, // size of promoted tweet is around 200,000
|
||||
windowSize = 10000L,
|
||||
cacheName = "active_promoted_tweet_cache",
|
||||
maxMultiGetSize = 20
|
||||
)(crMixerStatsReceiver.scope("inMemoryCachedActivePromotedTweetStore"))
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -1,130 +0,0 @@
|
||||
scala_library(
|
||||
sources = [
|
||||
"*.scala",
|
||||
"core/*.scala",
|
||||
"grpc_client/*.scala",
|
||||
"similarity_engine/*.scala",
|
||||
"source_signal/*.scala",
|
||||
"thrift_client/*.scala",
|
||||
],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/twitter/bijection:core",
|
||||
"3rdparty/jvm/com/twitter/bijection:scrooge",
|
||||
"3rdparty/jvm/com/twitter/storehaus:core",
|
||||
"3rdparty/jvm/com/twitter/storehaus:memcache",
|
||||
"3rdparty/jvm/io/grpc:grpc-api",
|
||||
"3rdparty/jvm/io/grpc:grpc-auth",
|
||||
"3rdparty/jvm/io/grpc:grpc-core",
|
||||
"3rdparty/jvm/io/grpc:grpc-netty",
|
||||
"3rdparty/jvm/io/grpc:grpc-protobuf",
|
||||
"3rdparty/jvm/io/grpc:grpc-stub",
|
||||
"3rdparty/jvm/javax/inject:javax.inject",
|
||||
"3rdparty/jvm/org/scalanlp:breeze",
|
||||
"3rdparty/src/jvm/com/twitter/storehaus:core",
|
||||
"abdecider/src/main/scala",
|
||||
"ann/src/main/thrift/com/twitter/ann/common:ann-common-scala",
|
||||
"configapi/configapi-abdecider",
|
||||
"configapi/configapi-core",
|
||||
"configapi/configapi-featureswitches:v2",
|
||||
"content-recommender/server/src/main/scala/com/twitter/contentrecommender:cr-mixer-deps",
|
||||
"content-recommender/thrift/src/main/thrift:thrift-scala",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
|
||||
"cr-mixer/thrift/src/main/thrift:thrift-scala",
|
||||
"decider/src/main/scala",
|
||||
"discovery-common/src/main/scala/com/twitter/discovery/common/configapi",
|
||||
"featureswitches/featureswitches-core",
|
||||
"featureswitches/featureswitches-core/src/main/scala/com/twitter/featureswitches/v2/builder",
|
||||
"finagle-internal/finagle-grpc/src/main/scala",
|
||||
"finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication",
|
||||
"finatra-internal/kafka/src/main/scala/com/twitter/finatra/kafka/consumers",
|
||||
"finatra-internal/mtls-thriftmux/src/main/scala",
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"finatra/inject/inject-modules/src/main/scala",
|
||||
"finatra/inject/inject-thrift-client",
|
||||
"follow-recommendations-service/thrift/src/main/thrift:thrift-scala",
|
||||
"frigate/frigate-common:util",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/candidate",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/health",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/interests",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato",
|
||||
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
|
||||
"hydra/partition/thrift/src/main/thrift:thrift-scala",
|
||||
"hydra/root/thrift/src/main/thrift:thrift-scala",
|
||||
"mediaservices/commons/src/main/scala:futuretracker",
|
||||
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
|
||||
"qig-ranker/thrift/src/main/thrift:thrift-scala",
|
||||
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/health_store",
|
||||
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/injection",
|
||||
"relevance-platform/thrift/src/main/thrift:thrift-scala",
|
||||
"representation-manager/client/src/main/scala/com/twitter/representation_manager",
|
||||
"representation-manager/client/src/main/scala/com/twitter/representation_manager/config",
|
||||
"representation-manager/server/src/main/scala/com/twitter/representation_manager/migration",
|
||||
"representation-manager/server/src/main/thrift:thrift-scala",
|
||||
"representation-scorer/server/src/main/thrift:thrift-scala",
|
||||
"servo/decider",
|
||||
"servo/util/src/main/scala",
|
||||
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
|
||||
"snowflake/src/main/scala/com/twitter/snowflake/id",
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/java/com/twitter/search/queryparser/query:core-query-nodes",
|
||||
"src/java/com/twitter/search/queryparser/query/search:search-query-nodes",
|
||||
"src/scala/com/twitter/algebird_internal/injection",
|
||||
"src/scala/com/twitter/cortex/ml/embeddings/common:Helpers",
|
||||
"src/scala/com/twitter/ml/api/embedding",
|
||||
"src/scala/com/twitter/ml/featurestore/lib",
|
||||
"src/scala/com/twitter/scalding_internal/multiformat/format",
|
||||
"src/scala/com/twitter/simclusters_v2/candidate_source",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/scala/com/twitter/storehaus_internal/manhattan",
|
||||
"src/scala/com/twitter/storehaus_internal/manhattan/config",
|
||||
"src/scala/com/twitter/storehaus_internal/memcache",
|
||||
"src/scala/com/twitter/storehaus_internal/memcache/config",
|
||||
"src/scala/com/twitter/storehaus_internal/offline",
|
||||
"src/scala/com/twitter/storehaus_internal/util",
|
||||
"src/scala/com/twitter/topic_recos/stores",
|
||||
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
|
||||
"src/thrift/com/twitter/frigate:frigate-common-thrift-scala",
|
||||
"src/thrift/com/twitter/frigate:frigate-thrift-scala",
|
||||
"src/thrift/com/twitter/frigate/data_pipeline/scalding:blue_verified_annotations-scala",
|
||||
"src/thrift/com/twitter/hermit/stp:hermit-stp-scala",
|
||||
"src/thrift/com/twitter/ml/api:data-java",
|
||||
"src/thrift/com/twitter/ml/api:embedding-scala",
|
||||
"src/thrift/com/twitter/ml/featurestore:ml-feature-store-embedding-scala",
|
||||
"src/thrift/com/twitter/onboarding/relevance/coldstart_lookalike:coldstartlookalike-thrift-scala",
|
||||
"src/thrift/com/twitter/recos:recos-common-scala",
|
||||
"src/thrift/com/twitter/recos/user_ad_graph:user_ad_graph-scala",
|
||||
"src/thrift/com/twitter/recos/user_tweet_entity_graph:user_tweet_entity_graph-scala",
|
||||
"src/thrift/com/twitter/recos/user_tweet_graph:user_tweet_graph-scala",
|
||||
"src/thrift/com/twitter/recos/user_tweet_graph_plus:user_tweet_graph_plus-scala",
|
||||
"src/thrift/com/twitter/recos/user_video_graph:user_video_graph-scala",
|
||||
"src/thrift/com/twitter/search:earlybird-scala",
|
||||
"src/thrift/com/twitter/search/query_interaction_graph/service:qig-service-scala",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
"src/thrift/com/twitter/topic_recos:topic_recos-thrift-scala",
|
||||
"src/thrift/com/twitter/trends/trip_v1:trip-tweets-thrift-scala",
|
||||
"src/thrift/com/twitter/tweetypie:service-scala",
|
||||
"src/thrift/com/twitter/twistly:twistly-scala",
|
||||
"src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala",
|
||||
"stitch/stitch-storehaus",
|
||||
"stitch/stitch-tweetypie/src/main/scala",
|
||||
"strato/src/main/scala/com/twitter/strato/client",
|
||||
"user-signal-service/thrift/src/main/thrift:thrift-scala",
|
||||
"util-internal/scribe/src/main/scala/com/twitter/logging",
|
||||
"util/util-hashing",
|
||||
],
|
||||
)
|
Binary file not shown.
Binary file not shown.
@ -1,52 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.google.inject.name.Named
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.data_pipeline.scalding.thriftscala.BlueVerifiedAnnotationsV2
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.storehaus_internal.manhattan.Athena
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanRO
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
|
||||
import com.twitter.storehaus_internal.util.ApplicationID
|
||||
import com.twitter.storehaus_internal.util.DatasetName
|
||||
import com.twitter.storehaus_internal.util.HDFSPath
|
||||
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||
import com.twitter.hermit.store.common.ObservedCachedReadableStore
|
||||
|
||||
object BlueVerifiedAnnotationStoreModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.BlueVerifiedAnnotationStore)
|
||||
def providesBlueVerifiedAnnotationStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
|
||||
): ReadableStore[String, BlueVerifiedAnnotationsV2] = {
|
||||
|
||||
implicit val valueCodec = new BinaryScalaCodec(BlueVerifiedAnnotationsV2)
|
||||
|
||||
val underlyingStore = ManhattanRO
|
||||
.getReadableStoreWithMtls[String, BlueVerifiedAnnotationsV2](
|
||||
ManhattanROConfig(
|
||||
HDFSPath(""),
|
||||
ApplicationID("content_recommender_athena"),
|
||||
DatasetName("blue_verified_annotations"),
|
||||
Athena),
|
||||
manhattanKVClientMtlsParams
|
||||
)
|
||||
|
||||
ObservedCachedReadableStore.from(
|
||||
underlyingStore,
|
||||
ttl = 24.hours,
|
||||
maxKeys = 100000,
|
||||
windowSize = 10000L,
|
||||
cacheName = "blue_verified_annotation_cache"
|
||||
)(statsReceiver.scope("inMemoryCachedBlueVerifiedAnnotationStore"))
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,57 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.google.inject.name.Named
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.keyHasher
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.hermit.store.common.ObservedCachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.relevance_platform.common.injection.LZ4Injection
|
||||
import com.twitter.relevance_platform.common.injection.SeqObjectInjection
|
||||
import com.twitter.simclusters_v2.thriftscala.TopicId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.Client
|
||||
import com.twitter.topic_recos.stores.CertoTopicTopKTweetsStore
|
||||
import com.twitter.topic_recos.thriftscala.TweetWithScores
|
||||
|
||||
object CertoStratoStoreModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.CertoStratoStoreName)
|
||||
def providesCertoStratoStore(
|
||||
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
|
||||
stratoClient: Client,
|
||||
statsReceiver: StatsReceiver
|
||||
): ReadableStore[TopicId, Seq[TweetWithScores]] = {
|
||||
val certoStore = ObservedReadableStore(CertoTopicTopKTweetsStore.prodStore(stratoClient))(
|
||||
statsReceiver.scope(ModuleNames.CertoStratoStoreName)).mapValues { topKTweetsWithScores =>
|
||||
topKTweetsWithScores.topTweetsByFollowerL2NormalizedCosineSimilarityScore
|
||||
}
|
||||
|
||||
val memCachedStore = ObservedMemcachedReadableStore
|
||||
.fromCacheClient(
|
||||
backingStore = certoStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 10.minutes
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(SeqObjectInjection[TweetWithScores]()),
|
||||
statsReceiver = statsReceiver.scope("memcached_certo_store"),
|
||||
keyToString = { k => s"certo:${keyHasher.hashKey(k.toString.getBytes)}" }
|
||||
)
|
||||
|
||||
ObservedCachedReadableStore.from[TopicId, Seq[TweetWithScores]](
|
||||
memCachedStore,
|
||||
ttl = 5.minutes,
|
||||
maxKeys = 100000, // ~150MB max
|
||||
cacheName = "certo_in_memory_cache",
|
||||
windowSize = 10000L
|
||||
)(statsReceiver.scope("certo_in_memory_cache"))
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,30 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.recos.user_ad_graph.thriftscala.ConsumersBasedRelatedAdRequest
|
||||
import com.twitter.recos.user_ad_graph.thriftscala.RelatedAdResponse
|
||||
import com.twitter.recos.user_ad_graph.thriftscala.UserAdGraph
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
object ConsumersBasedUserAdGraphStoreModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.ConsumerBasedUserAdGraphStore)
|
||||
def providesConsumerBasedUserAdGraphStore(
|
||||
userAdGraphService: UserAdGraph.MethodPerEndpoint
|
||||
): ReadableStore[ConsumersBasedRelatedAdRequest, RelatedAdResponse] = {
|
||||
new ReadableStore[ConsumersBasedRelatedAdRequest, RelatedAdResponse] {
|
||||
override def get(
|
||||
k: ConsumersBasedRelatedAdRequest
|
||||
): Future[Option[RelatedAdResponse]] = {
|
||||
userAdGraphService.consumersBasedRelatedAds(k).map(Some(_))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,30 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.recos.user_tweet_graph.thriftscala.ConsumersBasedRelatedTweetRequest
|
||||
import com.twitter.recos.user_tweet_graph.thriftscala.RelatedTweetResponse
|
||||
import com.twitter.recos.user_tweet_graph.thriftscala.UserTweetGraph
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
object ConsumersBasedUserTweetGraphStoreModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.ConsumerBasedUserTweetGraphStore)
|
||||
def providesConsumerBasedUserTweetGraphStore(
|
||||
userTweetGraphService: UserTweetGraph.MethodPerEndpoint
|
||||
): ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] = {
|
||||
new ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] {
|
||||
override def get(
|
||||
k: ConsumersBasedRelatedTweetRequest
|
||||
): Future[Option[RelatedTweetResponse]] = {
|
||||
userTweetGraphService.consumersBasedRelatedTweets(k).map(Some(_))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,30 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.recos.user_video_graph.thriftscala.ConsumersBasedRelatedTweetRequest
|
||||
import com.twitter.recos.user_video_graph.thriftscala.RelatedTweetResponse
|
||||
import com.twitter.recos.user_video_graph.thriftscala.UserVideoGraph
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
object ConsumersBasedUserVideoGraphStoreModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.ConsumerBasedUserVideoGraphStore)
|
||||
def providesConsumerBasedUserVideoGraphStore(
|
||||
userVideoGraphService: UserVideoGraph.MethodPerEndpoint
|
||||
): ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] = {
|
||||
new ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] {
|
||||
override def get(
|
||||
k: ConsumersBasedRelatedTweetRequest
|
||||
): Future[Option[RelatedTweetResponse]] = {
|
||||
userVideoGraphService.consumersBasedRelatedTweets(k).map(Some(_))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,16 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.timelines.configapi.Config
|
||||
import com.twitter.cr_mixer.param.CrMixerParamConfig
|
||||
import com.twitter.inject.TwitterModule
|
||||
import javax.inject.Singleton
|
||||
|
||||
object CrMixerParamConfigModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
def provideConfig(): Config = {
|
||||
CrMixerParamConfig.config
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,54 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.bijection.Injection
|
||||
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.thriftscala.TweetsWithScore
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.storehaus_internal.manhattan.Apollo
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanRO
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
|
||||
import com.twitter.storehaus_internal.util.ApplicationID
|
||||
import com.twitter.storehaus_internal.util.DatasetName
|
||||
import com.twitter.storehaus_internal.util.HDFSPath
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
object DiffusionStoreModule extends TwitterModule {
|
||||
type UserId = Long
|
||||
implicit val longCodec = implicitly[Injection[Long, Array[Byte]]]
|
||||
implicit val tweetRecsInjection: Injection[TweetsWithScore, Array[Byte]] =
|
||||
BinaryScalaCodec(TweetsWithScore)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RetweetBasedDiffusionRecsMhStore)
|
||||
def retweetBasedDiffusionRecsMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[Long, TweetsWithScore] = {
|
||||
val manhattanROConfig = ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("cr_mixer_apollo"),
|
||||
DatasetName("diffusion_retweet_tweet_recs"),
|
||||
Apollo
|
||||
)
|
||||
|
||||
buildTweetRecsStore(serviceIdentifier, manhattanROConfig)
|
||||
}
|
||||
|
||||
private def buildTweetRecsStore(
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
manhattanROConfig: ManhattanROConfig
|
||||
): ReadableStore[Long, TweetsWithScore] = {
|
||||
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[Long, TweetsWithScore](
|
||||
manhattanROConfig,
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)(longCodec, tweetRecsInjection)
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,189 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.cr_mixer.config.TimeoutConfig
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.EarlybirdClientId
|
||||
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.FacetsToFetch
|
||||
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.GetCollectorTerminationParams
|
||||
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.GetEarlybirdQuery
|
||||
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.MetadataOptions
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.util.SeqLongInjection
|
||||
import com.twitter.hashing.KeyHasher
|
||||
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.search.common.query.thriftjava.thriftscala.CollectorParams
|
||||
import com.twitter.search.earlybird.thriftscala.EarlybirdRequest
|
||||
import com.twitter.search.earlybird.thriftscala.EarlybirdResponseCode
|
||||
import com.twitter.search.earlybird.thriftscala.EarlybirdService
|
||||
import com.twitter.search.earlybird.thriftscala.ThriftSearchQuery
|
||||
import com.twitter.search.earlybird.thriftscala.ThriftSearchRankingMode
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Duration
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Named
|
||||
|
||||
object EarlybirdRecencyBasedCandidateStoreModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.EarlybirdRecencyBasedWithoutRetweetsRepliesTweetsCache)
|
||||
def providesEarlybirdRecencyBasedWithoutRetweetsRepliesCandidateStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
earlybirdSearchClient: EarlybirdService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.EarlybirdTweetsCache) earlybirdRecencyBasedTweetsCache: MemcachedClient,
|
||||
timeoutConfig: TimeoutConfig
|
||||
): ReadableStore[UserId, Seq[TweetId]] = {
|
||||
val stats = statsReceiver.scope("EarlybirdRecencyBasedWithoutRetweetsRepliesCandidateStore")
|
||||
val underlyingStore = new ReadableStore[UserId, Seq[TweetId]] {
|
||||
override def get(userId: UserId): Future[Option[Seq[TweetId]]] = {
|
||||
// Home based EB filters out retweets and replies
|
||||
val earlybirdRequest =
|
||||
buildEarlybirdRequest(
|
||||
userId,
|
||||
FilterOutRetweetsAndReplies,
|
||||
DefaultMaxNumTweetPerUser,
|
||||
timeoutConfig.earlybirdServerTimeout)
|
||||
getEarlybirdSearchResult(earlybirdSearchClient, earlybirdRequest, stats)
|
||||
}
|
||||
}
|
||||
ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = earlybirdRecencyBasedTweetsCache,
|
||||
ttl = MemcacheKeyTimeToLiveDuration,
|
||||
asyncUpdate = true
|
||||
)(
|
||||
valueInjection = SeqLongInjection,
|
||||
statsReceiver = statsReceiver.scope("earlybird_recency_based_tweets_home_memcache"),
|
||||
keyToString = { k =>
|
||||
f"uEBRBHM:${keyHasher.hashKey(k.toString.getBytes)}%X" // prefix = EarlyBirdRecencyBasedHoMe
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.EarlybirdRecencyBasedWithRetweetsRepliesTweetsCache)
|
||||
def providesEarlybirdRecencyBasedWithRetweetsRepliesCandidateStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
earlybirdSearchClient: EarlybirdService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.EarlybirdTweetsCache) earlybirdRecencyBasedTweetsCache: MemcachedClient,
|
||||
timeoutConfig: TimeoutConfig
|
||||
): ReadableStore[UserId, Seq[TweetId]] = {
|
||||
val stats = statsReceiver.scope("EarlybirdRecencyBasedWithRetweetsRepliesCandidateStore")
|
||||
val underlyingStore = new ReadableStore[UserId, Seq[TweetId]] {
|
||||
override def get(userId: UserId): Future[Option[Seq[TweetId]]] = {
|
||||
val earlybirdRequest = buildEarlybirdRequest(
|
||||
userId,
|
||||
// Notifications based EB keeps retweets and replies
|
||||
NotFilterOutRetweetsAndReplies,
|
||||
DefaultMaxNumTweetPerUser,
|
||||
processingTimeout = timeoutConfig.earlybirdServerTimeout
|
||||
)
|
||||
getEarlybirdSearchResult(earlybirdSearchClient, earlybirdRequest, stats)
|
||||
}
|
||||
}
|
||||
ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = earlybirdRecencyBasedTweetsCache,
|
||||
ttl = MemcacheKeyTimeToLiveDuration,
|
||||
asyncUpdate = true
|
||||
)(
|
||||
valueInjection = SeqLongInjection,
|
||||
statsReceiver = statsReceiver.scope("earlybird_recency_based_tweets_notifications_memcache"),
|
||||
keyToString = { k =>
|
||||
f"uEBRBN:${keyHasher.hashKey(k.toString.getBytes)}%X" // prefix = EarlyBirdRecencyBasedNotifications
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
private val keyHasher: KeyHasher = KeyHasher.FNV1A_64
|
||||
|
||||
/**
|
||||
* Note the DefaultMaxNumTweetPerUser is used to adjust the result size per cache entry.
|
||||
* If the value changes, it will increase the size of the memcache.
|
||||
*/
|
||||
private val DefaultMaxNumTweetPerUser: Int = 100
|
||||
private val FilterOutRetweetsAndReplies = true
|
||||
private val NotFilterOutRetweetsAndReplies = false
|
||||
private val MemcacheKeyTimeToLiveDuration: Duration = Duration.fromMinutes(15)
|
||||
|
||||
private def buildEarlybirdRequest(
|
||||
seedUserId: UserId,
|
||||
filterOutRetweetsAndReplies: Boolean,
|
||||
maxNumTweetsPerSeedUser: Int,
|
||||
processingTimeout: Duration
|
||||
): EarlybirdRequest =
|
||||
EarlybirdRequest(
|
||||
searchQuery = getThriftSearchQuery(
|
||||
seedUserId = seedUserId,
|
||||
filterOutRetweetsAndReplies = filterOutRetweetsAndReplies,
|
||||
maxNumTweetsPerSeedUser = maxNumTweetsPerSeedUser,
|
||||
processingTimeout = processingTimeout
|
||||
),
|
||||
clientId = Some(EarlybirdClientId),
|
||||
timeoutMs = processingTimeout.inMilliseconds.intValue(),
|
||||
getOlderResults = Some(false),
|
||||
adjustedProtectedRequestParams = None,
|
||||
adjustedFullArchiveRequestParams = None,
|
||||
getProtectedTweetsOnly = Some(false),
|
||||
skipVeryRecentTweets = true,
|
||||
)
|
||||
|
||||
private def getThriftSearchQuery(
|
||||
seedUserId: UserId,
|
||||
filterOutRetweetsAndReplies: Boolean,
|
||||
maxNumTweetsPerSeedUser: Int,
|
||||
processingTimeout: Duration
|
||||
): ThriftSearchQuery = ThriftSearchQuery(
|
||||
serializedQuery = GetEarlybirdQuery(
|
||||
None,
|
||||
None,
|
||||
Set.empty,
|
||||
filterOutRetweetsAndReplies
|
||||
).map(_.serialize),
|
||||
fromUserIDFilter64 = Some(Seq(seedUserId)),
|
||||
numResults = maxNumTweetsPerSeedUser,
|
||||
rankingMode = ThriftSearchRankingMode.Recency,
|
||||
collectorParams = Some(
|
||||
CollectorParams(
|
||||
// numResultsToReturn defines how many results each EB shard will return to search root
|
||||
numResultsToReturn = maxNumTweetsPerSeedUser,
|
||||
// terminationParams.maxHitsToProcess is used for early terminating per shard results fetching.
|
||||
terminationParams =
|
||||
GetCollectorTerminationParams(maxNumTweetsPerSeedUser, processingTimeout)
|
||||
)),
|
||||
facetFieldNames = Some(FacetsToFetch),
|
||||
resultMetadataOptions = Some(MetadataOptions),
|
||||
searchStatusIds = None
|
||||
)
|
||||
|
||||
private def getEarlybirdSearchResult(
|
||||
earlybirdSearchClient: EarlybirdService.MethodPerEndpoint,
|
||||
request: EarlybirdRequest,
|
||||
statsReceiver: StatsReceiver
|
||||
): Future[Option[Seq[TweetId]]] = earlybirdSearchClient
|
||||
.search(request)
|
||||
.map { response =>
|
||||
response.responseCode match {
|
||||
case EarlybirdResponseCode.Success =>
|
||||
val earlybirdSearchResult =
|
||||
response.searchResults
|
||||
.map {
|
||||
_.results
|
||||
.map(searchResult => searchResult.id)
|
||||
}
|
||||
statsReceiver.scope("result").stat("size").add(earlybirdSearchResult.size)
|
||||
earlybirdSearchResult
|
||||
case e =>
|
||||
statsReceiver.scope("failures").counter(e.getClass.getSimpleName).incr()
|
||||
Some(Seq.empty)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Binary file not shown.
@ -1,195 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.bijection.Injection
|
||||
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||
import com.twitter.bijection.scrooge.CompactScalaCodec
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.ml.api.{thriftscala => api}
|
||||
import com.twitter.simclusters_v2.thriftscala.CandidateTweetsList
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.storehaus_internal.manhattan.Apollo
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanRO
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
|
||||
import com.twitter.storehaus_internal.util.ApplicationID
|
||||
import com.twitter.storehaus_internal.util.DatasetName
|
||||
import com.twitter.storehaus_internal.util.HDFSPath
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
object EmbeddingStoreModule extends TwitterModule {
|
||||
type UserId = Long
|
||||
implicit val mbcgUserEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
|
||||
CompactScalaCodec(api.Embedding)
|
||||
implicit val tweetCandidatesInjection: Injection[CandidateTweetsList, Array[Byte]] =
|
||||
CompactScalaCodec(CandidateTweetsList)
|
||||
|
||||
final val TwHINEmbeddingRegularUpdateMhStoreName = "TwHINEmbeddingRegularUpdateMhStore"
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(TwHINEmbeddingRegularUpdateMhStoreName)
|
||||
def twHINEmbeddingRegularUpdateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[InternalId, api.Embedding] = {
|
||||
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
|
||||
BinaryScalaCodec(api.Embedding)
|
||||
|
||||
val longCodec = implicitly[Injection[Long, Array[Byte]]]
|
||||
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[TweetId, api.Embedding](
|
||||
ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("cr_mixer_apollo"),
|
||||
DatasetName("twhin_regular_update_tweet_embedding_apollo"),
|
||||
Apollo
|
||||
),
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
|
||||
case InternalId.TweetId(tweetId) =>
|
||||
tweetId
|
||||
case _ =>
|
||||
throw new UnsupportedOperationException("Invalid Internal Id")
|
||||
}
|
||||
}
|
||||
|
||||
final val ConsumerBasedTwHINEmbeddingRegularUpdateMhStoreName =
|
||||
"ConsumerBasedTwHINEmbeddingRegularUpdateMhStore"
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ConsumerBasedTwHINEmbeddingRegularUpdateMhStoreName)
|
||||
def consumerBasedTwHINEmbeddingRegularUpdateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[InternalId, api.Embedding] = {
|
||||
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
|
||||
BinaryScalaCodec(api.Embedding)
|
||||
|
||||
val longCodec = implicitly[Injection[Long, Array[Byte]]]
|
||||
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[UserId, api.Embedding](
|
||||
ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("cr_mixer_apollo"),
|
||||
DatasetName("twhin_user_embedding_regular_update_apollo"),
|
||||
Apollo
|
||||
),
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
|
||||
case InternalId.UserId(userId) =>
|
||||
userId
|
||||
case _ =>
|
||||
throw new UnsupportedOperationException("Invalid Internal Id")
|
||||
}
|
||||
}
|
||||
|
||||
final val TwoTowerFavConsumerEmbeddingMhStoreName = "TwoTowerFavConsumerEmbeddingMhStore"
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(TwoTowerFavConsumerEmbeddingMhStoreName)
|
||||
def twoTowerFavConsumerEmbeddingMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[InternalId, api.Embedding] = {
|
||||
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
|
||||
BinaryScalaCodec(api.Embedding)
|
||||
|
||||
val longCodec = implicitly[Injection[Long, Array[Byte]]]
|
||||
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[UserId, api.Embedding](
|
||||
ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("cr_mixer_apollo"),
|
||||
DatasetName("two_tower_fav_user_embedding_apollo"),
|
||||
Apollo
|
||||
),
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
|
||||
case InternalId.UserId(userId) =>
|
||||
userId
|
||||
case _ =>
|
||||
throw new UnsupportedOperationException("Invalid Internal Id")
|
||||
}
|
||||
}
|
||||
|
||||
final val DebuggerDemoUserEmbeddingMhStoreName = "DebuggerDemoUserEmbeddingMhStoreName"
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(DebuggerDemoUserEmbeddingMhStoreName)
|
||||
def debuggerDemoUserEmbeddingStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[InternalId, api.Embedding] = {
|
||||
// This dataset is from src/scala/com/twitter/wtf/beam/bq_embedding_export/sql/MlfExperimentalUserEmbeddingScalaDataset.sql
|
||||
// Change the above sql if you want to use a diff embedding
|
||||
val manhattanROConfig = ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("cr_mixer_apollo"),
|
||||
DatasetName("experimental_user_embedding"),
|
||||
Apollo
|
||||
)
|
||||
buildUserEmbeddingStore(serviceIdentifier, manhattanROConfig)
|
||||
}
|
||||
|
||||
final val DebuggerDemoTweetEmbeddingMhStoreName = "DebuggerDemoTweetEmbeddingMhStore"
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(DebuggerDemoTweetEmbeddingMhStoreName)
|
||||
def debuggerDemoTweetEmbeddingStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[InternalId, api.Embedding] = {
|
||||
// This dataset is from src/scala/com/twitter/wtf/beam/bq_embedding_export/sql/MlfExperimentalTweetEmbeddingScalaDataset.sql
|
||||
// Change the above sql if you want to use a diff embedding
|
||||
val manhattanROConfig = ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("cr_mixer_apollo"),
|
||||
DatasetName("experimental_tweet_embedding"),
|
||||
Apollo
|
||||
)
|
||||
buildTweetEmbeddingStore(serviceIdentifier, manhattanROConfig)
|
||||
}
|
||||
|
||||
private def buildUserEmbeddingStore(
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
manhattanROConfig: ManhattanROConfig
|
||||
): ReadableStore[InternalId, api.Embedding] = {
|
||||
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
|
||||
BinaryScalaCodec(api.Embedding)
|
||||
|
||||
val longCodec = implicitly[Injection[Long, Array[Byte]]]
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[UserId, api.Embedding](
|
||||
manhattanROConfig,
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
|
||||
case InternalId.UserId(userId) =>
|
||||
userId
|
||||
case _ =>
|
||||
throw new UnsupportedOperationException("Invalid Internal Id")
|
||||
}
|
||||
}
|
||||
|
||||
private def buildTweetEmbeddingStore(
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
manhattanROConfig: ManhattanROConfig
|
||||
): ReadableStore[InternalId, api.Embedding] = {
|
||||
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
|
||||
BinaryScalaCodec(api.Embedding)
|
||||
|
||||
val longCodec = implicitly[Injection[Long, Array[Byte]]]
|
||||
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[TweetId, api.Embedding](
|
||||
manhattanROConfig,
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
|
||||
case InternalId.TweetId(tweetId) =>
|
||||
tweetId
|
||||
case _ =>
|
||||
throw new UnsupportedOperationException("Invalid Internal Id")
|
||||
}
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,29 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.source_signal.FrsStore
|
||||
import com.twitter.cr_mixer.source_signal.FrsStore.FrsQueryResult
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.follow_recommendations.thriftscala.FollowRecommendationsThriftService
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import javax.inject.Named
|
||||
|
||||
object FrsStoreModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.FrsStore)
|
||||
def providesFrsStore(
|
||||
frsClient: FollowRecommendationsThriftService.MethodPerEndpoint,
|
||||
statsReceiver: StatsReceiver,
|
||||
decider: CrMixerDecider
|
||||
): ReadableStore[FrsStore.Query, Seq[FrsQueryResult]] = {
|
||||
ObservedReadableStore(FrsStore(frsClient, statsReceiver, decider))(
|
||||
statsReceiver.scope("follow_recommendations_store"))
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,17 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import javax.inject.Singleton
|
||||
|
||||
object MHMtlsParamsModule extends TwitterModule {
|
||||
@Singleton
|
||||
@Provides
|
||||
def providesManhattanMtlsParams(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ManhattanKVClientMtlsParams = {
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,150 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.bijection.Injection
|
||||
import com.twitter.bijection.scrooge.CompactScalaCodec
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.thriftscala.CandidateTweetsList
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.storehaus_internal.manhattan.Apollo
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanRO
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
|
||||
import com.twitter.storehaus_internal.util.ApplicationID
|
||||
import com.twitter.storehaus_internal.util.DatasetName
|
||||
import com.twitter.storehaus_internal.util.HDFSPath
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
object OfflineCandidateStoreModule extends TwitterModule {
|
||||
type UserId = Long
|
||||
implicit val tweetCandidatesInjection: Injection[CandidateTweetsList, Array[Byte]] =
|
||||
CompactScalaCodec(CandidateTweetsList)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineTweet2020CandidateStore)
|
||||
def offlineTweet2020CandidateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_interestedin_2020"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineTweet2020Hl0El15CandidateStore)
|
||||
def offlineTweet2020Hl0El15CandidateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_0_el_15"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineTweet2020Hl2El15CandidateStore)
|
||||
def offlineTweet2020Hl2El15CandidateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_2_el_15"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineTweet2020Hl2El50CandidateStore)
|
||||
def offlineTweet2020Hl2El50CandidateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_2_el_50"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineTweet2020Hl8El50CandidateStore)
|
||||
def offlineTweet2020Hl8El50CandidateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_8_el_50"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineTweetMTSCandidateStore)
|
||||
def offlineTweetMTSCandidateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_mts_consumer_embeddings"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineFavDecayedSumCandidateStore)
|
||||
def offlineFavDecayedSumCandidateStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_decayed_sum"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineFtrAt5Pop1000RankDecay11CandidateStore)
|
||||
def offlineFtrAt5Pop1000RankDecay11CandidateStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_ftrat5_pop1000_rank_decay_1_1"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineFtrAt5Pop10000RankDecay11CandidateStore)
|
||||
def offlineFtrAt5Pop10000RankDecay11CandidateStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_ftrat5_pop10000_rank_decay_1_1"
|
||||
)
|
||||
}
|
||||
|
||||
private def buildOfflineCandidateStore(
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
datasetName: String
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[Long, CandidateTweetsList](
|
||||
ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("multi_type_simclusters"),
|
||||
DatasetName(datasetName),
|
||||
Apollo
|
||||
),
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)
|
||||
}
|
||||
|
||||
}
|
Binary file not shown.
@ -1,39 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.app.Flag
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.wtf.candidate.thriftscala.CandidateSeq
|
||||
|
||||
object RealGraphOonStoreModule extends TwitterModule {
|
||||
|
||||
private val userRealGraphOonColumnPath: Flag[String] = flag[String](
|
||||
name = "crMixer.userRealGraphOonColumnPath",
|
||||
default = "recommendations/twistly/userRealgraphOon",
|
||||
help = "Strato column path for user real graph OON Store"
|
||||
)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RealGraphOonStore)
|
||||
def providesRealGraphOonStore(
|
||||
stratoClient: StratoClient,
|
||||
statsReceiver: StatsReceiver
|
||||
): ReadableStore[UserId, CandidateSeq] = {
|
||||
val realGraphOonStratoFetchableStore = StratoFetchableStore
|
||||
.withUnitView[UserId, CandidateSeq](stratoClient, userRealGraphOonColumnPath())
|
||||
|
||||
ObservedReadableStore(
|
||||
realGraphOonStratoFetchableStore
|
||||
)(statsReceiver.scope("user_real_graph_oon_store"))
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,67 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.google.inject.name.Named
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.storehaus_internal.manhattan.Apollo
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanRO
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
|
||||
import com.twitter.storehaus_internal.util.ApplicationID
|
||||
import com.twitter.storehaus_internal.util.DatasetName
|
||||
import com.twitter.storehaus_internal.util.HDFSPath
|
||||
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||
import com.twitter.cr_mixer.param.decider.DeciderKey
|
||||
import com.twitter.hermit.store.common.DeciderableReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||
import com.twitter.wtf.candidate.thriftscala.CandidateSeq
|
||||
|
||||
object RealGraphStoreMhModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RealGraphInStore)
|
||||
def providesRealGraphStoreMh(
|
||||
decider: CrMixerDecider,
|
||||
statsReceiver: StatsReceiver,
|
||||
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
|
||||
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
|
||||
): ReadableStore[UserId, CandidateSeq] = {
|
||||
|
||||
implicit val valueCodec = new BinaryScalaCodec(CandidateSeq)
|
||||
val underlyingStore = ManhattanRO
|
||||
.getReadableStoreWithMtls[UserId, CandidateSeq](
|
||||
ManhattanROConfig(
|
||||
HDFSPath(""),
|
||||
ApplicationID("cr_mixer_apollo"),
|
||||
DatasetName("real_graph_scores_apollo"),
|
||||
Apollo),
|
||||
manhattanKVClientMtlsParams
|
||||
)
|
||||
|
||||
val memCachedStore = ObservedMemcachedReadableStore
|
||||
.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 24.hours,
|
||||
)(
|
||||
valueInjection = valueCodec,
|
||||
statsReceiver = statsReceiver.scope("memCachedUserRealGraphMh"),
|
||||
keyToString = { k: UserId => s"uRGraph/$k" }
|
||||
)
|
||||
|
||||
DeciderableReadableStore(
|
||||
memCachedStore,
|
||||
decider.deciderGateBuilder.idGate(DeciderKey.enableRealGraphMhStoreDeciderKey),
|
||||
statsReceiver.scope("RealGraphMh")
|
||||
)
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,107 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.SimClustersEmbedding
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||
import com.twitter.simclusters_v2.thriftscala.ModelVersion
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import javax.inject.Named
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
|
||||
|
||||
object RepresentationManagerModule extends TwitterModule {
|
||||
private val ColPathPrefix = "recommendations/representation_manager/"
|
||||
private val SimclustersTweetColPath = ColPathPrefix + "simClustersEmbedding.Tweet"
|
||||
private val SimclustersUserColPath = ColPathPrefix + "simClustersEmbedding.User"
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RmsTweetLogFavLongestL2EmbeddingStore)
|
||||
def providesRepresentationManagerTweetStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[TweetId, SimClustersEmbedding] = {
|
||||
ObservedReadableStore(
|
||||
StratoFetchableStore
|
||||
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
SimclustersTweetColPath,
|
||||
SimClustersEmbeddingView(
|
||||
EmbeddingType.LogFavLongestL2EmbeddingTweet,
|
||||
ModelVersion.Model20m145k2020))
|
||||
.mapValues(SimClustersEmbedding(_)))(
|
||||
statsReceiver.scope("rms_tweet_log_fav_longest_l2_store"))
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RmsUserFavBasedProducerEmbeddingStore)
|
||||
def providesRepresentationManagerUserFavBasedProducerEmbeddingStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[UserId, SimClustersEmbedding] = {
|
||||
ObservedReadableStore(
|
||||
StratoFetchableStore
|
||||
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
SimclustersUserColPath,
|
||||
SimClustersEmbeddingView(
|
||||
EmbeddingType.FavBasedProducer,
|
||||
ModelVersion.Model20m145k2020
|
||||
)
|
||||
)
|
||||
.mapValues(SimClustersEmbedding(_)))(
|
||||
statsReceiver.scope("rms_user_fav_based_producer_store"))
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RmsUserLogFavInterestedInEmbeddingStore)
|
||||
def providesRepresentationManagerUserLogFavConsumerEmbeddingStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[UserId, SimClustersEmbedding] = {
|
||||
ObservedReadableStore(
|
||||
StratoFetchableStore
|
||||
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
SimclustersUserColPath,
|
||||
SimClustersEmbeddingView(
|
||||
EmbeddingType.LogFavBasedUserInterestedIn,
|
||||
ModelVersion.Model20m145k2020
|
||||
)
|
||||
)
|
||||
.mapValues(SimClustersEmbedding(_)))(
|
||||
statsReceiver.scope("rms_user_log_fav_interestedin_store"))
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RmsUserFollowInterestedInEmbeddingStore)
|
||||
def providesRepresentationManagerUserFollowInterestedInEmbeddingStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[UserId, SimClustersEmbedding] = {
|
||||
ObservedReadableStore(
|
||||
StratoFetchableStore
|
||||
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
SimclustersUserColPath,
|
||||
SimClustersEmbeddingView(
|
||||
EmbeddingType.FollowBasedUserInterestedIn,
|
||||
ModelVersion.Model20m145k2020
|
||||
)
|
||||
)
|
||||
.mapValues(SimClustersEmbedding(_)))(
|
||||
statsReceiver.scope("rms_user_follow_interestedin_store"))
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,56 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.simclusters_v2.thriftscala.ModelVersion
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.simclusters_v2.thriftscala.ScoringAlgorithm
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import javax.inject.Named
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.representationscorer.thriftscala.ListScoreId
|
||||
|
||||
object RepresentationScorerModule extends TwitterModule {
|
||||
|
||||
private val rsxColumnPath = "recommendations/representation_scorer/listScore"
|
||||
|
||||
private final val SimClusterModelVersion = ModelVersion.Model20m145k2020
|
||||
private final val TweetEmbeddingType = EmbeddingType.LogFavBasedTweet
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RsxStore)
|
||||
def providesRepresentationScorerStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[(UserId, TweetId), Double] = {
|
||||
ObservedReadableStore(
|
||||
StratoFetchableStore
|
||||
.withUnitView[ListScoreId, Double](stratoClient, rsxColumnPath).composeKeyMapping[(
|
||||
UserId,
|
||||
TweetId
|
||||
)] { key =>
|
||||
representationScorerStoreKeyMapping(key._1, key._2)
|
||||
}
|
||||
)(statsReceiver.scope("rsx_store"))
|
||||
}
|
||||
|
||||
private def representationScorerStoreKeyMapping(t1: TweetId, t2: TweetId): ListScoreId = {
|
||||
ListScoreId(
|
||||
algorithm = ScoringAlgorithm.PairEmbeddingLogCosineSimilarity,
|
||||
modelVersion = SimClusterModelVersion,
|
||||
targetEmbeddingType = TweetEmbeddingType,
|
||||
targetId = InternalId.TweetId(t1),
|
||||
candidateEmbeddingType = TweetEmbeddingType,
|
||||
candidateIds = Seq(InternalId.TweetId(t2))
|
||||
)
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,90 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.cr_mixer.config.TimeoutConfig
|
||||
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.LookupSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig
|
||||
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig
|
||||
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import javax.inject.Singleton
|
||||
|
||||
/**
|
||||
* In this example we build a [[StandardSimilarityEngine]] to wrap a dummy store
|
||||
*/
|
||||
object SimpleSimilarityEngineModule extends TwitterModule {
|
||||
@Provides
|
||||
@Singleton
|
||||
def providesSimpleSimilarityEngine(
|
||||
timeoutConfig: TimeoutConfig,
|
||||
globalStats: StatsReceiver
|
||||
): StandardSimilarityEngine[UserId, (TweetId, Double)] = {
|
||||
// Inject your readableStore implementation here
|
||||
val dummyStore = ReadableStore.fromMap(
|
||||
Map(
|
||||
1L -> Seq((100L, 1.0), (101L, 1.0)),
|
||||
2L -> Seq((200L, 2.0), (201L, 2.0)),
|
||||
3L -> Seq((300L, 3.0), (301L, 3.0))
|
||||
))
|
||||
|
||||
new StandardSimilarityEngine[UserId, (TweetId, Double)](
|
||||
implementingStore = dummyStore,
|
||||
identifier = SimilarityEngineType.EnumUnknownSimilarityEngineType(9997),
|
||||
globalStats = globalStats,
|
||||
engineConfig = SimilarityEngineConfig(
|
||||
timeout = timeoutConfig.similarityEngineTimeout,
|
||||
gatingConfig = GatingConfig(
|
||||
deciderConfig = None,
|
||||
enableFeatureSwitch = None
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* In this example we build a [[LookupSimilarityEngine]] to wrap a dummy store with 2 versions
|
||||
*/
|
||||
object LookupSimilarityEngineModule extends TwitterModule {
|
||||
@Provides
|
||||
@Singleton
|
||||
def providesLookupSimilarityEngine(
|
||||
timeoutConfig: TimeoutConfig,
|
||||
globalStats: StatsReceiver
|
||||
): LookupSimilarityEngine[UserId, (TweetId, Double)] = {
|
||||
// Inject your readableStore implementation here
|
||||
val dummyStoreV1 = ReadableStore.fromMap(
|
||||
Map(
|
||||
1L -> Seq((100L, 1.0), (101L, 1.0)),
|
||||
2L -> Seq((200L, 2.0), (201L, 2.0)),
|
||||
))
|
||||
|
||||
val dummyStoreV2 = ReadableStore.fromMap(
|
||||
Map(
|
||||
1L -> Seq((100L, 1.0), (101L, 1.0)),
|
||||
2L -> Seq((200L, 2.0), (201L, 2.0)),
|
||||
))
|
||||
|
||||
new LookupSimilarityEngine[UserId, (TweetId, Double)](
|
||||
versionedStoreMap = Map(
|
||||
"V1" -> dummyStoreV1,
|
||||
"V2" -> dummyStoreV2
|
||||
),
|
||||
identifier = SimilarityEngineType.EnumUnknownSimilarityEngineType(9998),
|
||||
globalStats = globalStats,
|
||||
engineConfig = SimilarityEngineConfig(
|
||||
timeout = timeoutConfig.similarityEngineTimeout,
|
||||
gatingConfig = GatingConfig(
|
||||
deciderConfig = None,
|
||||
enableFeatureSwitch = None
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
}
|
Binary file not shown.
@ -1,33 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclustersann.thriftscala.SimClustersANNService
|
||||
import javax.inject.Named
|
||||
|
||||
object SimClustersANNServiceNameToClientMapper extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
def providesSimClustersANNServiceNameToClientMapping(
|
||||
@Named(ModuleNames.ProdSimClustersANNServiceClientName) simClustersANNServiceProd: SimClustersANNService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.ExperimentalSimClustersANNServiceClientName) simClustersANNServiceExperimental: SimClustersANNService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.SimClustersANNServiceClientName1) simClustersANNService1: SimClustersANNService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.SimClustersANNServiceClientName2) simClustersANNService2: SimClustersANNService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.SimClustersANNServiceClientName3) simClustersANNService3: SimClustersANNService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.SimClustersANNServiceClientName5) simClustersANNService5: SimClustersANNService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.SimClustersANNServiceClientName4) simClustersANNService4: SimClustersANNService.MethodPerEndpoint
|
||||
): Map[String, SimClustersANNService.MethodPerEndpoint] = {
|
||||
Map[String, SimClustersANNService.MethodPerEndpoint](
|
||||
"simclusters-ann" -> simClustersANNServiceProd,
|
||||
"simclusters-ann-experimental" -> simClustersANNServiceExperimental,
|
||||
"simclusters-ann-1" -> simClustersANNService1,
|
||||
"simclusters-ann-2" -> simClustersANNService2,
|
||||
"simclusters-ann-3" -> simClustersANNService3,
|
||||
"simclusters-ann-5" -> simClustersANNService5,
|
||||
"simclusters-ann-4" -> simClustersANNService4
|
||||
)
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,65 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.google.inject.name.Named
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.keyHasher
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedCachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.relevance_platform.common.injection.LZ4Injection
|
||||
import com.twitter.relevance_platform.common.injection.SeqObjectInjection
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.Client
|
||||
import com.twitter.topic_recos.thriftscala.TopicTopTweets
|
||||
import com.twitter.topic_recos.thriftscala.TopicTweet
|
||||
import com.twitter.topic_recos.thriftscala.TopicTweetPartitionFlatKey
|
||||
|
||||
/**
|
||||
* Strato store that wraps the topic top tweets pipeline indexed from a Summingbird job
|
||||
*/
|
||||
object SkitStratoStoreModule extends TwitterModule {
|
||||
|
||||
val column = "recommendations/topic_recos/topicTopTweets"
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.SkitStratoStoreName)
|
||||
def providesSkitStratoStore(
|
||||
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
|
||||
stratoClient: Client,
|
||||
statsReceiver: StatsReceiver
|
||||
): ReadableStore[TopicTweetPartitionFlatKey, Seq[TopicTweet]] = {
|
||||
val skitStore = ObservedReadableStore(
|
||||
StratoFetchableStore
|
||||
.withUnitView[TopicTweetPartitionFlatKey, TopicTopTweets](stratoClient, column))(
|
||||
statsReceiver.scope(ModuleNames.SkitStratoStoreName)).mapValues { topicTopTweets =>
|
||||
topicTopTweets.topTweets
|
||||
}
|
||||
|
||||
val memCachedStore = ObservedMemcachedReadableStore
|
||||
.fromCacheClient(
|
||||
backingStore = skitStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 10.minutes
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(SeqObjectInjection[TopicTweet]()),
|
||||
statsReceiver = statsReceiver.scope("memcached_skit_store"),
|
||||
keyToString = { k => s"skit:${keyHasher.hashKey(k.toString.getBytes)}" }
|
||||
)
|
||||
|
||||
ObservedCachedReadableStore.from[TopicTweetPartitionFlatKey, Seq[TopicTweet]](
|
||||
memCachedStore,
|
||||
ttl = 5.minutes,
|
||||
maxKeys = 100000, // ~150MB max
|
||||
cacheName = "skit_in_memory_cache",
|
||||
windowSize = 10000L
|
||||
)(statsReceiver.scope("skit_in_memory_cache"))
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,39 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.app.Flag
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.hermit.stp.thriftscala.STPResult
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import javax.inject.Named
|
||||
|
||||
object StrongTiePredictionStoreModule extends TwitterModule {
|
||||
|
||||
private val strongTiePredictionColumnPath: Flag[String] = flag[String](
|
||||
name = "crMixer.strongTiePredictionColumnPath",
|
||||
default = "onboarding/userrecs/strong_tie_prediction_big",
|
||||
help = "Strato column path for StrongTiePredictionStore"
|
||||
)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.StpStore)
|
||||
def providesStrongTiePredictionStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[UserId, STPResult] = {
|
||||
val strongTiePredictionStratoFetchableStore = StratoFetchableStore
|
||||
.withUnitView[UserId, STPResult](stratoClient, strongTiePredictionColumnPath())
|
||||
|
||||
ObservedReadableStore(
|
||||
strongTiePredictionStratoFetchableStore
|
||||
)(statsReceiver.scope("strong_tie_prediction_big_store"))
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,34 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweet
|
||||
import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweets
|
||||
import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripDomain
|
||||
import javax.inject.Named
|
||||
|
||||
object TripCandidateStoreModule extends TwitterModule {
|
||||
private val stratoColumn = "trends/trip/tripTweetsDataflowProd"
|
||||
|
||||
@Provides
|
||||
@Named(ModuleNames.TripCandidateStore)
|
||||
def providesSimClustersTripCandidateStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient
|
||||
): ReadableStore[TripDomain, Seq[TripTweet]] = {
|
||||
val tripCandidateStratoFetchableStore =
|
||||
StratoFetchableStore
|
||||
.withUnitView[TripDomain, TripTweets](stratoClient, stratoColumn)
|
||||
.mapValues(_.tweets)
|
||||
|
||||
ObservedReadableStore(
|
||||
tripCandidateStratoFetchableStore
|
||||
)(statsReceiver.scope("simclusters_trip_candidate_store"))
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,205 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Module
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||
import com.twitter.contentrecommender.thriftscala.TweetInfo
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.frigate.common.store.health.TweetHealthModelStore
|
||||
import com.twitter.frigate.common.store.health.TweetHealthModelStore.TweetHealthModelStoreConfig
|
||||
import com.twitter.frigate.common.store.health.UserHealthModelStore
|
||||
import com.twitter.frigate.thriftscala.TweetHealthScores
|
||||
import com.twitter.frigate.thriftscala.UserAgathaScores
|
||||
import com.twitter.hermit.store.common.DeciderableReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedCachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.contentrecommender.store.TweetInfoStore
|
||||
import com.twitter.contentrecommender.store.TweetyPieFieldsStore
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.param.decider.DeciderKey
|
||||
import com.twitter.frigate.data_pipeline.scalding.thriftscala.BlueVerifiedAnnotationsV2
|
||||
import com.twitter.recos.user_tweet_graph_plus.thriftscala.UserTweetGraphPlus
|
||||
import com.twitter.recos.user_tweet_graph_plus.thriftscala.TweetEngagementScores
|
||||
import com.twitter.relevance_platform.common.health_store.UserMediaRepresentationHealthStore
|
||||
import com.twitter.relevance_platform.common.health_store.MagicRecsRealTimeAggregatesStore
|
||||
import com.twitter.relevance_platform.thriftscala.MagicRecsRealTimeAggregatesScores
|
||||
import com.twitter.relevance_platform.thriftscala.UserMediaRepresentationScores
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.tweetypie.thriftscala.TweetService
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.JavaTimer
|
||||
import com.twitter.util.Timer
|
||||
|
||||
import javax.inject.Named
|
||||
|
||||
object TweetInfoStoreModule extends TwitterModule {
|
||||
implicit val timer: Timer = new JavaTimer(true)
|
||||
override def modules: Seq[Module] = Seq(UnifiedCacheClient)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
def providesTweetInfoStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
stratoClient: StratoClient,
|
||||
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
|
||||
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
|
||||
tweetyPieService: TweetService.MethodPerEndpoint,
|
||||
userTweetGraphPlusService: UserTweetGraphPlus.MethodPerEndpoint,
|
||||
@Named(ModuleNames.BlueVerifiedAnnotationStore) blueVerifiedAnnotationStore: ReadableStore[
|
||||
String,
|
||||
BlueVerifiedAnnotationsV2
|
||||
],
|
||||
decider: CrMixerDecider
|
||||
): ReadableStore[TweetId, TweetInfo] = {
|
||||
|
||||
val tweetEngagementScoreStore: ReadableStore[TweetId, TweetEngagementScores] = {
|
||||
val underlyingStore =
|
||||
ObservedReadableStore(new ReadableStore[TweetId, TweetEngagementScores] {
|
||||
override def get(
|
||||
k: TweetId
|
||||
): Future[Option[TweetEngagementScores]] = {
|
||||
userTweetGraphPlusService.tweetEngagementScore(k).map {
|
||||
Some(_)
|
||||
}
|
||||
}
|
||||
})(statsReceiver.scope("UserTweetGraphTweetEngagementScoreStore"))
|
||||
|
||||
DeciderableReadableStore(
|
||||
underlyingStore,
|
||||
decider.deciderGateBuilder.idGate(
|
||||
DeciderKey.enableUtgRealTimeTweetEngagementScoreDeciderKey),
|
||||
statsReceiver.scope("UserTweetGraphTweetEngagementScoreStore")
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
val tweetHealthModelStore: ReadableStore[TweetId, TweetHealthScores] = {
|
||||
val underlyingStore = TweetHealthModelStore.buildReadableStore(
|
||||
stratoClient,
|
||||
Some(
|
||||
TweetHealthModelStoreConfig(
|
||||
enablePBlock = true,
|
||||
enableToxicity = true,
|
||||
enablePSpammy = true,
|
||||
enablePReported = true,
|
||||
enableSpammyTweetContent = true,
|
||||
enablePNegMultimodal = true,
|
||||
))
|
||||
)(statsReceiver.scope("UnderlyingTweetHealthModelStore"))
|
||||
|
||||
DeciderableReadableStore(
|
||||
ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 2.hours
|
||||
)(
|
||||
valueInjection = BinaryScalaCodec(TweetHealthScores),
|
||||
statsReceiver = statsReceiver.scope("memCachedTweetHealthModelStore"),
|
||||
keyToString = { k: TweetId => s"tHMS/$k" }
|
||||
),
|
||||
decider.deciderGateBuilder.idGate(DeciderKey.enableHealthSignalsScoreDeciderKey),
|
||||
statsReceiver.scope("TweetHealthModelStore")
|
||||
) // use s"tHMS/$k" instead of s"tweetHealthModelStore/$k" to differentiate from CR cache
|
||||
}
|
||||
|
||||
val userHealthModelStore: ReadableStore[UserId, UserAgathaScores] = {
|
||||
val underlyingStore = UserHealthModelStore.buildReadableStore(stratoClient)(
|
||||
statsReceiver.scope("UnderlyingUserHealthModelStore"))
|
||||
DeciderableReadableStore(
|
||||
ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 18.hours
|
||||
)(
|
||||
valueInjection = BinaryScalaCodec(UserAgathaScores),
|
||||
statsReceiver = statsReceiver.scope("memCachedUserHealthModelStore"),
|
||||
keyToString = { k: UserId => s"uHMS/$k" }
|
||||
),
|
||||
decider.deciderGateBuilder.idGate(DeciderKey.enableUserAgathaScoreDeciderKey),
|
||||
statsReceiver.scope("UserHealthModelStore")
|
||||
)
|
||||
}
|
||||
|
||||
val userMediaRepresentationHealthStore: ReadableStore[UserId, UserMediaRepresentationScores] = {
|
||||
val underlyingStore =
|
||||
UserMediaRepresentationHealthStore.buildReadableStore(
|
||||
manhattanKVClientMtlsParams,
|
||||
statsReceiver.scope("UnderlyingUserMediaRepresentationHealthStore")
|
||||
)
|
||||
DeciderableReadableStore(
|
||||
ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 12.hours
|
||||
)(
|
||||
valueInjection = BinaryScalaCodec(UserMediaRepresentationScores),
|
||||
statsReceiver = statsReceiver.scope("memCacheUserMediaRepresentationHealthStore"),
|
||||
keyToString = { k: UserId => s"uMRHS/$k" }
|
||||
),
|
||||
decider.deciderGateBuilder.idGate(DeciderKey.enableUserMediaRepresentationStoreDeciderKey),
|
||||
statsReceiver.scope("UserMediaRepresentationHealthStore")
|
||||
)
|
||||
}
|
||||
|
||||
val magicRecsRealTimeAggregatesStore: ReadableStore[
|
||||
TweetId,
|
||||
MagicRecsRealTimeAggregatesScores
|
||||
] = {
|
||||
val underlyingStore =
|
||||
MagicRecsRealTimeAggregatesStore.buildReadableStore(
|
||||
serviceIdentifier,
|
||||
statsReceiver.scope("UnderlyingMagicRecsRealTimeAggregatesScores")
|
||||
)
|
||||
DeciderableReadableStore(
|
||||
underlyingStore,
|
||||
decider.deciderGateBuilder.idGate(DeciderKey.enableMagicRecsRealTimeAggregatesStore),
|
||||
statsReceiver.scope("MagicRecsRealTimeAggregatesStore")
|
||||
)
|
||||
}
|
||||
|
||||
val tweetInfoStore: ReadableStore[TweetId, TweetInfo] = {
|
||||
val underlyingStore = TweetInfoStore(
|
||||
TweetyPieFieldsStore.getStoreFromTweetyPie(tweetyPieService),
|
||||
userMediaRepresentationHealthStore,
|
||||
magicRecsRealTimeAggregatesStore,
|
||||
tweetEngagementScoreStore,
|
||||
blueVerifiedAnnotationStore
|
||||
)(statsReceiver.scope("tweetInfoStore"))
|
||||
|
||||
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 15.minutes,
|
||||
// Hydrating tweetInfo is now a required step for all candidates,
|
||||
// hence we needed to tune these thresholds.
|
||||
asyncUpdate = serviceIdentifier.environment == "prod"
|
||||
)(
|
||||
valueInjection = BinaryScalaCodec(TweetInfo),
|
||||
statsReceiver = statsReceiver.scope("memCachedTweetInfoStore"),
|
||||
keyToString = { k: TweetId => s"tIS/$k" }
|
||||
)
|
||||
|
||||
ObservedCachedReadableStore.from(
|
||||
memcachedStore,
|
||||
ttl = 15.minutes,
|
||||
maxKeys = 8388607, // Check TweetInfo definition. size~92b. Around 736 MB
|
||||
windowSize = 10000L,
|
||||
cacheName = "tweet_info_cache",
|
||||
maxMultiGetSize = 20
|
||||
)(statsReceiver.scope("inMemoryCachedTweetInfoStore"))
|
||||
}
|
||||
tweetInfoStore
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,42 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.app.Flag
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.twistly.thriftscala.TweetRecentEngagedUsers
|
||||
|
||||
object TweetRecentEngagedUserStoreModule extends TwitterModule {
|
||||
|
||||
private val tweetRecentEngagedUsersStoreDefaultVersion =
|
||||
0 // DefaultVersion for tweetEngagedUsersStore, whose key = (tweetId, DefaultVersion)
|
||||
private val tweetRecentEngagedUsersColumnPath: Flag[String] = flag[String](
|
||||
name = "crMixer.tweetRecentEngagedUsersColumnPath",
|
||||
default = "recommendations/twistly/tweetRecentEngagedUsers",
|
||||
help = "Strato column path for TweetRecentEngagedUsersStore"
|
||||
)
|
||||
private type Version = Long
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
def providesTweetRecentEngagedUserStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[TweetId, TweetRecentEngagedUsers] = {
|
||||
val tweetRecentEngagedUsersStratoFetchableStore = StratoFetchableStore
|
||||
.withUnitView[(TweetId, Version), TweetRecentEngagedUsers](
|
||||
stratoClient,
|
||||
tweetRecentEngagedUsersColumnPath()).composeKeyMapping[TweetId](tweetId =>
|
||||
(tweetId, tweetRecentEngagedUsersStoreDefaultVersion))
|
||||
|
||||
ObservedReadableStore(
|
||||
tweetRecentEngagedUsersStratoFetchableStore
|
||||
)(statsReceiver.scope("tweet_recent_engaged_users_store"))
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,32 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.thriftscala.CrMixerTweetResponse
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.hermit.store.common.ReadableWritableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableWritableMemcacheStore
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import javax.inject.Named
|
||||
|
||||
object TweetRecommendationResultsStoreModule extends TwitterModule {
|
||||
@Provides
|
||||
@Singleton
|
||||
def providesTweetRecommendationResultsStore(
|
||||
@Named(ModuleNames.TweetRecommendationResultsCache) tweetRecommendationResultsCacheClient: MemcachedClient,
|
||||
statsReceiver: StatsReceiver
|
||||
): ReadableWritableStore[UserId, CrMixerTweetResponse] = {
|
||||
ObservedReadableWritableMemcacheStore.fromCacheClient(
|
||||
cacheClient = tweetRecommendationResultsCacheClient,
|
||||
ttl = 24.hours)(
|
||||
valueInjection = BinaryScalaCodec(CrMixerTweetResponse),
|
||||
statsReceiver = statsReceiver.scope("TweetRecommendationResultsMemcacheStore"),
|
||||
keyToString = { k: UserId => k.toString }
|
||||
)
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,67 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.cr_mixer.similarity_engine.TwhinCollabFilterSimilarityEngine.TwhinCollabFilterView
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import javax.inject.Named
|
||||
|
||||
object TwhinCollabFilterStratoStoreModule extends TwitterModule {
|
||||
|
||||
val stratoColumnPath: String = "cuad/twhin/getCollabFilterTweetCandidatesProd.User"
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.TwhinCollabFilterStratoStoreForFollow)
|
||||
def providesTwhinCollabFilterStratoStoreForFollow(
|
||||
stratoClient: StratoClient
|
||||
): ReadableStore[Long, Seq[TweetId]] = {
|
||||
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
|
||||
stratoClient,
|
||||
column = stratoColumnPath,
|
||||
view = TwhinCollabFilterView("follow_2022_03_10_c_500K")
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.TwhinCollabFilterStratoStoreForEngagement)
|
||||
def providesTwhinCollabFilterStratoStoreForEngagement(
|
||||
stratoClient: StratoClient
|
||||
): ReadableStore[Long, Seq[TweetId]] = {
|
||||
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
|
||||
stratoClient,
|
||||
column = stratoColumnPath,
|
||||
view = TwhinCollabFilterView("engagement_2022_04_10_c_500K"))
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.TwhinMultiClusterStratoStoreForFollow)
|
||||
def providesTwhinMultiClusterStratoStoreForFollow(
|
||||
stratoClient: StratoClient
|
||||
): ReadableStore[Long, Seq[TweetId]] = {
|
||||
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
|
||||
stratoClient,
|
||||
column = stratoColumnPath,
|
||||
view = TwhinCollabFilterView("multiclusterFollow20220921")
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.TwhinMultiClusterStratoStoreForEngagement)
|
||||
def providesTwhinMultiClusterStratoStoreForEngagement(
|
||||
stratoClient: StratoClient
|
||||
): ReadableStore[Long, Seq[TweetId]] = {
|
||||
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
|
||||
stratoClient,
|
||||
column = stratoColumnPath,
|
||||
view = TwhinCollabFilterView("multiclusterEng20220921"))
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,42 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.app.Flag
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.simclusters_v2.thriftscala.OrderedClustersAndMembers
|
||||
import javax.inject.Named
|
||||
|
||||
object TwiceClustersMembersStoreModule extends TwitterModule {
|
||||
|
||||
private val twiceClustersMembersColumnPath: Flag[String] = flag[String](
|
||||
name = "crMixer.twiceClustersMembersColumnPath",
|
||||
default =
|
||||
"recommendations/simclusters_v2/embeddings/TwiceClustersMembersLargestDimApeSimilarity",
|
||||
help = "Strato column path for TweetRecentEngagedUsersStore"
|
||||
)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.TwiceClustersMembersStore)
|
||||
def providesTweetRecentEngagedUserStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[UserId, OrderedClustersAndMembers] = {
|
||||
val twiceClustersMembersStratoFetchableStore = StratoFetchableStore
|
||||
.withUnitView[UserId, OrderedClustersAndMembers](
|
||||
stratoClient,
|
||||
twiceClustersMembersColumnPath())
|
||||
|
||||
ObservedReadableStore(
|
||||
twiceClustersMembersStratoFetchableStore
|
||||
)(statsReceiver.scope("twice_clusters_members_largestDimApe_similarity_store"))
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,83 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.app.Flag
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.memcached.Client
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.storehaus_internal.memcache.MemcacheStore
|
||||
import com.twitter.storehaus_internal.util.ClientName
|
||||
import com.twitter.storehaus_internal.util.ZkEndPoint
|
||||
import javax.inject.Named
|
||||
|
||||
object UnifiedCacheClient extends TwitterModule {
|
||||
|
||||
private val TIME_OUT = 20.milliseconds
|
||||
|
||||
val crMixerUnifiedCacheDest: Flag[String] = flag[String](
|
||||
name = "crMixer.unifiedCacheDest",
|
||||
default = "/s/cache/content_recommender_unified_v2",
|
||||
help = "Wily path to Content Recommender unified cache"
|
||||
)
|
||||
|
||||
val tweetRecommendationResultsCacheDest: Flag[String] = flag[String](
|
||||
name = "tweetRecommendationResults.CacheDest",
|
||||
default = "/s/cache/tweet_recommendation_results",
|
||||
help = "Wily path to CrMixer getTweetRecommendations() results cache"
|
||||
)
|
||||
|
||||
val earlybirdTweetsCacheDest: Flag[String] = flag[String](
|
||||
name = "earlybirdTweets.CacheDest",
|
||||
default = "/s/cache/crmixer_earlybird_tweets",
|
||||
help = "Wily path to CrMixer Earlybird Recency Based Similarity Engine result cache"
|
||||
)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.UnifiedCache)
|
||||
def provideUnifiedCacheClient(
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
statsReceiver: StatsReceiver,
|
||||
): Client =
|
||||
MemcacheStore.memcachedClient(
|
||||
name = ClientName("memcache-content-recommender-unified"),
|
||||
dest = ZkEndPoint(crMixerUnifiedCacheDest()),
|
||||
statsReceiver = statsReceiver.scope("cache_client"),
|
||||
serviceIdentifier = serviceIdentifier,
|
||||
timeout = TIME_OUT
|
||||
)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.TweetRecommendationResultsCache)
|
||||
def providesTweetRecommendationResultsCache(
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
statsReceiver: StatsReceiver,
|
||||
): Client =
|
||||
MemcacheStore.memcachedClient(
|
||||
name = ClientName("memcache-tweet-recommendation-results"),
|
||||
dest = ZkEndPoint(tweetRecommendationResultsCacheDest()),
|
||||
statsReceiver = statsReceiver.scope("cache_client"),
|
||||
serviceIdentifier = serviceIdentifier,
|
||||
timeout = TIME_OUT
|
||||
)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.EarlybirdTweetsCache)
|
||||
def providesEarlybirdTweetsCache(
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
statsReceiver: StatsReceiver,
|
||||
): Client =
|
||||
MemcacheStore.memcachedClient(
|
||||
name = ClientName("memcache-crmixer-earlybird-tweets"),
|
||||
dest = ZkEndPoint(earlybirdTweetsCacheDest()),
|
||||
statsReceiver = statsReceiver.scope("cache_client"),
|
||||
serviceIdentifier = serviceIdentifier,
|
||||
timeout = TIME_OUT
|
||||
)
|
||||
}
|
Binary file not shown.
@ -1,30 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.usersignalservice.thriftscala.BatchSignalRequest
|
||||
import com.twitter.usersignalservice.thriftscala.BatchSignalResponse
|
||||
import javax.inject.Named
|
||||
|
||||
object UserSignalServiceColumnModule extends TwitterModule {
|
||||
private val UssColumnPath = "recommendations/user-signal-service/signals"
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.UssStratoColumn)
|
||||
def providesUserSignalServiceStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[BatchSignalRequest, BatchSignalResponse] = {
|
||||
ObservedReadableStore(
|
||||
StratoFetchableStore
|
||||
.withUnitView[BatchSignalRequest, BatchSignalResponse](stratoClient, UssColumnPath))(
|
||||
statsReceiver.scope("user_signal_service_store"))
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user