mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-11-16 00:25:11 +01:00
Delete follow-recommendations-service directory
This commit is contained in:
parent
d0717e339b
commit
14f78e176b
@ -1,48 +0,0 @@
|
||||
# Without this alias, library :follow-recommendations-service_lib would conflict with :bin
|
||||
alias(
|
||||
name = "follow-recommendations-service",
|
||||
target = ":follow-recommendations-service_lib",
|
||||
)
|
||||
|
||||
target(
|
||||
name = "follow-recommendations-service_lib",
|
||||
dependencies = [
|
||||
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations",
|
||||
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/models",
|
||||
],
|
||||
)
|
||||
|
||||
jvm_binary(
|
||||
name = "bin",
|
||||
basename = "follow-recommendations-service",
|
||||
main = "com.twitter.follow_recommendations.FollowRecommendationsServiceThriftServerMain",
|
||||
runtime_platform = "java11",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
":follow-recommendations-service",
|
||||
"3rdparty/jvm/ch/qos/logback:logback-classic",
|
||||
"finagle/finagle-zipkin-scribe/src/main/scala",
|
||||
"finatra/inject/inject-logback/src/main/scala",
|
||||
"loglens/loglens-logback/src/main/scala/com/twitter/loglens/logback",
|
||||
"twitter-server-internal/src/main/scala",
|
||||
"twitter-server/logback-classic/src/main/scala",
|
||||
],
|
||||
)
|
||||
|
||||
# Aurora Workflows build phase convention requires a jvm_app named with ${project-name}-app
|
||||
jvm_app(
|
||||
name = "follow-recommendations-service-app",
|
||||
archive = "zip",
|
||||
binary = ":bin",
|
||||
bundles = [
|
||||
bundle(
|
||||
fileset = [
|
||||
"server/src/main/resources/*",
|
||||
"server/src/main/resources/**/*",
|
||||
],
|
||||
owning_target = "follow-recommendations-service/server/src/main/resources:frs_resources",
|
||||
relative_to = "server/src/main/resources",
|
||||
),
|
||||
],
|
||||
tags = ["bazel-compatible"],
|
||||
)
|
@ -1,24 +0,0 @@
|
||||
[code-coverage]
|
||||
package = com.twitter.follow_recommendations
|
||||
|
||||
[docbird]
|
||||
project_name = follow-recommendations-service
|
||||
project_type = service
|
||||
; example settings:
|
||||
;
|
||||
; project_name = fluffybird
|
||||
; description = fluffybird is a service for fluffing up feathers.
|
||||
; tags = python,documentation,fluffybird
|
||||
; project_type = service
|
||||
; - allowed options: essay, library, service, hub, cookbook, styleguide, policy
|
||||
; owner_links = roster
|
||||
; - allowed options: roster, find, email
|
||||
; scrolling_tocs = yes
|
||||
; comments = yes
|
||||
; verifications = yes
|
||||
; support_widget = yes
|
||||
; health_score = yes
|
||||
; sticky_sidebar = no
|
||||
|
||||
[jira]
|
||||
project = CJREL
|
Binary file not shown.
Before Width: | Height: | Size: 178 KiB |
@ -1,40 +0,0 @@
|
||||
# Follow Recommendations Service
|
||||
|
||||
## Introduction to the Follow Recommendations Service (FRS)
|
||||
The Follow Recommendations Service (FRS) is a robust recommendation engine designed to provide users with personalized suggestions for accounts to follow. At present, FRS supports Who-To-Follow (WTF) module recommendations across a variety of Twitter product interfaces. Additionally, by suggesting tweet authors, FRS also delivers FutureGraph tweet recommendations, which consist of tweets from accounts that users may be interested in following in the future.
|
||||
|
||||
## Design
|
||||
The system is tailored to accommodate diverse use cases, such as Post New-User-Experience (NUX), advertisements, FutureGraph tweets, and more. Each use case features a unique display location identifier. To view all display locations, refer to the following path: `follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models/DisplayLocation.scala`.
|
||||
|
||||
Recommendation steps are customized according to each display location. Common and high-level steps are encapsulated within the "RecommendationFlow," which includes operations like candidate generation, ranker selection, filtering, transformation, and beyond. To explore all flows, refer to this path: `follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/flows`.
|
||||
|
||||
For each product (corresponding to a display location), one or multiple flows can be selected to generate candidates based on code and configurations. To view all products, refer to the following path: `follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/products/home_timeline_tweet_recs`.
|
||||
|
||||
The FRS overview diagram is depicted below:
|
||||
|
||||
![FRS_architecture.png](FRS_architecture.png)
|
||||
|
||||
|
||||
### Candidate Generation
|
||||
During this step, FRS utilizes various user signals and algorithms to identify candidates from all Twitter accounts. The candidate source folder is located at `follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/`, with a README file provided within each candidate source folder.
|
||||
|
||||
### Filtering
|
||||
In this phase, FRS applies different filtering logic after generating account candidates to improve quality and health. Filtering may occur before and/or after the ranking step, with heavier filtering logic (e.g., higher latency) typically applied after the ranking step. The filters' folder is located at `follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/predicates`.
|
||||
|
||||
### Ranking
|
||||
During this step, FRS employs both Machine Learning (ML) and heuristic rule-based candidate ranking. For the ML ranker, ML features are fetched beforehand (i.e., feature hydration),
|
||||
and a DataRecord (the Twitter-standard Machine Learning data format used to represent feature data, labels, and predictions when training or serving) is constructed for each <user, candidate> pair.
|
||||
These pairs are then sent to a separate ML prediction service, which houses the ML model trained offline.
|
||||
The ML prediction service returns a prediction score, representing the probability that a user will follow and engage with the candidate.
|
||||
This score is a weighted sum of p(follow|recommendation) and p(positive engagement|follow), and FRS uses this score to rank the candidates.
|
||||
|
||||
The rankers' folder is located at `follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/rankers`.
|
||||
|
||||
### Transform
|
||||
In this phase, the sequence of candidates undergoes necessary transformations, such as deduplication, attaching social proof (i.e., "followed by XX user"), adding tracking tokens, and more.
|
||||
The transformers' folder can be found at `follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/transforms`.
|
||||
|
||||
### Truncation
|
||||
During this final step, FRS trims the candidate pool to a specified size. This process ensures that only the most relevant and engaging candidates are presented to users while maintaining an optimal user experience.
|
||||
|
||||
By implementing these comprehensive steps and adapting to various use cases, the Follow Recommendations Service (FRS) effectively curates tailored suggestions for Twitter users, enhancing their overall experience and promoting meaningful connections within the platform.
|
@ -1,18 +0,0 @@
|
||||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/google/guava",
|
||||
"configapi/configapi-core/src/main/scala/com/twitter/timelines/configapi",
|
||||
"finagle/finagle-core/src/main",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
|
||||
"hermit/hermit-core/src/main/scala/com/twitter/hermit/model",
|
||||
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/candidate_source",
|
||||
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/pipeline/recommendation",
|
||||
"stitch/stitch-core",
|
||||
],
|
||||
exports = [
|
||||
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/pipeline/recommendation",
|
||||
],
|
||||
)
|
@ -1,36 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.base
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.follow_recommendations.common.base.EnrichedCandidateSource.toEnriched
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
|
||||
// a helper structure to register and select candidate sources based on identifiers
|
||||
trait CandidateSourceRegistry[Target, Candidate] {
|
||||
|
||||
val statsReceiver: StatsReceiver
|
||||
|
||||
def sources: Set[CandidateSource[Target, Candidate]]
|
||||
|
||||
final lazy val candidateSources: Map[
|
||||
CandidateSourceIdentifier,
|
||||
CandidateSource[Target, Candidate]
|
||||
] = {
|
||||
val map = sources.map { c =>
|
||||
c.identifier -> c.observe(statsReceiver)
|
||||
}.toMap
|
||||
|
||||
if (map.size != sources.size) {
|
||||
throw new IllegalArgumentException("Duplicate Candidate Source Identifiers")
|
||||
}
|
||||
|
||||
map
|
||||
}
|
||||
|
||||
def select(
|
||||
identifiers: Set[CandidateSourceIdentifier]
|
||||
): Set[CandidateSource[Target, Candidate]] = {
|
||||
// fails loud if the candidate source is not registered
|
||||
identifiers.map(candidateSources(_))
|
||||
}
|
||||
}
|
@ -1,164 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.base
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.util.Duration
|
||||
import com.twitter.util.TimeoutException
|
||||
import scala.language.implicitConversions
|
||||
|
||||
class EnrichedCandidateSource[Target, Candidate](original: CandidateSource[Target, Candidate]) {
|
||||
|
||||
/**
|
||||
* Gate the candidate source based on the Predicate of target.
|
||||
* It returns results only if the predicate returns Valid.
|
||||
*
|
||||
* @param predicate
|
||||
* @return
|
||||
*/
|
||||
def gate(predicate: Predicate[Target]): CandidateSource[Target, Candidate] = {
|
||||
throw new UnsupportedOperationException()
|
||||
}
|
||||
|
||||
def observe(statsReceiver: StatsReceiver): CandidateSource[Target, Candidate] = {
|
||||
val originalIdentifier = original.identifier
|
||||
val stats = statsReceiver.scope(originalIdentifier.name)
|
||||
new CandidateSource[Target, Candidate] {
|
||||
val identifier = originalIdentifier
|
||||
override def apply(target: Target): Stitch[Seq[Candidate]] = {
|
||||
StatsUtil.profileStitchSeqResults[Candidate](original(target), stats)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Map target type into new target type (1 to optional mapping)
|
||||
*/
|
||||
def stitchMapKey[Target2](
|
||||
targetMapper: Target2 => Stitch[Option[Target]]
|
||||
): CandidateSource[Target2, Candidate] = {
|
||||
val targetsMapper: Target2 => Stitch[Seq[Target]] = { target =>
|
||||
targetMapper(target).map(_.toSeq)
|
||||
}
|
||||
stitchMapKeys(targetsMapper)
|
||||
}
|
||||
|
||||
/**
|
||||
* Map target type into new target type (1 to many mapping)
|
||||
*/
|
||||
def stitchMapKeys[Target2](
|
||||
targetMapper: Target2 => Stitch[Seq[Target]]
|
||||
): CandidateSource[Target2, Candidate] = {
|
||||
new CandidateSource[Target2, Candidate] {
|
||||
val identifier = original.identifier
|
||||
override def apply(target: Target2): Stitch[Seq[Candidate]] = {
|
||||
for {
|
||||
mappedTargets <- targetMapper(target)
|
||||
results <- Stitch.traverse(mappedTargets)(original(_))
|
||||
} yield results.flatten
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Map target type into new target type (1 to many mapping)
|
||||
*/
|
||||
def mapKeys[Target2](
|
||||
targetMapper: Target2 => Seq[Target]
|
||||
): CandidateSource[Target2, Candidate] = {
|
||||
val stitchMapper: Target2 => Stitch[Seq[Target]] = { target =>
|
||||
Stitch.value(targetMapper(target))
|
||||
}
|
||||
stitchMapKeys(stitchMapper)
|
||||
}
|
||||
|
||||
/**
|
||||
* Map candidate types to new type based on candidateMapper
|
||||
*/
|
||||
def mapValues[Candidate2](
|
||||
candidateMapper: Candidate => Stitch[Option[Candidate2]]
|
||||
): CandidateSource[Target, Candidate2] = {
|
||||
|
||||
new CandidateSource[Target, Candidate2] {
|
||||
val identifier = original.identifier
|
||||
override def apply(target: Target): Stitch[Seq[Candidate2]] = {
|
||||
original(target).flatMap { candidates =>
|
||||
val results = Stitch.traverse(candidates)(candidateMapper(_))
|
||||
results.map(_.flatten)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Map candidate types to new type based on candidateMapper
|
||||
*/
|
||||
def mapValue[Candidate2](
|
||||
candidateMapper: Candidate => Candidate2
|
||||
): CandidateSource[Target, Candidate2] = {
|
||||
val stitchMapper: Candidate => Stitch[Option[Candidate2]] = { c =>
|
||||
Stitch.value(Some(candidateMapper(c)))
|
||||
}
|
||||
mapValues(stitchMapper)
|
||||
}
|
||||
|
||||
/**
|
||||
* This method wraps the candidate source in a designated timeout so that a single candidate
|
||||
* source does not result in a timeout for the entire flow
|
||||
*/
|
||||
def within(
|
||||
candidateTimeout: Duration,
|
||||
statsReceiver: StatsReceiver
|
||||
): CandidateSource[Target, Candidate] = {
|
||||
val originalIdentifier = original.identifier
|
||||
val timeoutCounter =
|
||||
statsReceiver.counter(originalIdentifier.name, "timeout")
|
||||
|
||||
new CandidateSource[Target, Candidate] {
|
||||
val identifier = originalIdentifier
|
||||
override def apply(target: Target): Stitch[Seq[Candidate]] = {
|
||||
original
|
||||
.apply(target)
|
||||
.within(candidateTimeout)(com.twitter.finagle.util.DefaultTimer)
|
||||
.rescue {
|
||||
case _: TimeoutException =>
|
||||
timeoutCounter.incr()
|
||||
Stitch.Nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def failOpenWithin(
|
||||
candidateTimeout: Duration,
|
||||
statsReceiver: StatsReceiver
|
||||
): CandidateSource[Target, Candidate] = {
|
||||
val originalIdentifier = original.identifier
|
||||
val timeoutCounter =
|
||||
statsReceiver.counter(originalIdentifier.name, "timeout")
|
||||
|
||||
new CandidateSource[Target, Candidate] {
|
||||
val identifier = originalIdentifier
|
||||
override def apply(target: Target): Stitch[Seq[Candidate]] = {
|
||||
original
|
||||
.apply(target)
|
||||
.within(candidateTimeout)(com.twitter.finagle.util.DefaultTimer)
|
||||
.handle {
|
||||
case _: TimeoutException =>
|
||||
timeoutCounter.incr()
|
||||
Seq.empty
|
||||
case e: Exception =>
|
||||
statsReceiver
|
||||
.scope("candidate_source_error").scope(originalIdentifier.name).counter(
|
||||
e.getClass.getSimpleName).incr
|
||||
Seq.empty
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object EnrichedCandidateSource {
|
||||
implicit def toEnriched[K, V](original: CandidateSource[K, V]): EnrichedCandidateSource[K, V] =
|
||||
new EnrichedCandidateSource(original)
|
||||
}
|
@ -1,17 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.base
|
||||
|
||||
import com.twitter.follow_recommendations.common.models.FilterReason.ParamReason
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
import com.twitter.timelines.configapi.Param
|
||||
|
||||
case class ParamPredicate[Request <: HasParams](param: Param[Boolean]) extends Predicate[Request] {
|
||||
|
||||
def apply(request: Request): Stitch[PredicateResult] = {
|
||||
if (request.params(param)) {
|
||||
Stitch.value(PredicateResult.Valid)
|
||||
} else {
|
||||
Stitch.value(PredicateResult.Invalid(Set(ParamReason(param.statName))))
|
||||
}
|
||||
}
|
||||
}
|
@ -1,282 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.base
|
||||
|
||||
import com.twitter.finagle.stats.NullStatsReceiver
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.follow_recommendations.common.models.FilterReason
|
||||
import com.twitter.stitch.Arrow
|
||||
import com.twitter.stitch.Stitch
|
||||
|
||||
trait Predicate[-Q] {
|
||||
|
||||
def apply(item: Q): Stitch[PredicateResult]
|
||||
def arrow: Arrow[Q, PredicateResult] = Arrow.apply(apply)
|
||||
|
||||
def map[K](mapper: K => Q): Predicate[K] = Predicate(arrow.contramap(mapper))
|
||||
|
||||
/**
|
||||
* check the predicate results for a batch of items for convenience.
|
||||
*
|
||||
* mark it as final to avoid potential abuse usage
|
||||
*/
|
||||
final def batch(items: Seq[Q]): Stitch[Seq[PredicateResult]] = {
|
||||
this.arrow.traverse(items)
|
||||
}
|
||||
|
||||
/**
|
||||
* Syntax sugar for functions which take in 2 inputs as a tuple.
|
||||
*/
|
||||
def apply[Q1, Q2](item1: Q1, item2: Q2)(implicit ev: ((Q1, Q2)) => Q): Stitch[PredicateResult] = {
|
||||
apply((item1, item2))
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs the predicates in sequence. The returned predicate will return true iff both the predicates return true.
|
||||
* ie. it is an AND operation
|
||||
*
|
||||
* We short-circuit the evaluation, ie we don't evaluate the 2nd predicate if the 1st is false
|
||||
*
|
||||
* @param p predicate to run in sequence
|
||||
*
|
||||
* @return a new predicate object that represents the logical AND of both predicates
|
||||
*/
|
||||
def andThen[Q1 <: Q](p: Predicate[Q1]): Predicate[Q1] = {
|
||||
Predicate({ query: Q1 =>
|
||||
apply(query).flatMap {
|
||||
case PredicateResult.Valid => p(query)
|
||||
case PredicateResult.Invalid(reasons) => Stitch.value(PredicateResult.Invalid(reasons))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a predicate which runs the current & given predicate in sequence.
|
||||
* The returned predicate will return true if either current or given predicate returns true.
|
||||
* That is, given predicate will be only run if current predicate returns false.
|
||||
*
|
||||
* @param p predicate to run in sequence
|
||||
*
|
||||
* @return new predicate object that represents the logical OR of both predicates.
|
||||
* if both are invalid, the reason would be the set of all invalid reasons.
|
||||
*/
|
||||
def or[Q1 <: Q](p: Predicate[Q1]): Predicate[Q1] = {
|
||||
Predicate({ query: Q1 =>
|
||||
apply(query).flatMap {
|
||||
case PredicateResult.Valid => Stitch.value(PredicateResult.Valid)
|
||||
case PredicateResult.Invalid(reasons) =>
|
||||
p(query).flatMap {
|
||||
case PredicateResult.Valid => Stitch.value(PredicateResult.Valid)
|
||||
case PredicateResult.Invalid(newReasons) =>
|
||||
Stitch.value(PredicateResult.Invalid(reasons ++ newReasons))
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/*
|
||||
* Runs the predicate only if the provided predicate is valid, otherwise returns valid.
|
||||
* */
|
||||
def gate[Q1 <: Q](gatingPredicate: Predicate[Q1]): Predicate[Q1] = {
|
||||
Predicate { query: Q1 =>
|
||||
gatingPredicate(query).flatMap { result =>
|
||||
if (result == PredicateResult.Valid) {
|
||||
apply(query)
|
||||
} else {
|
||||
Stitch.value(PredicateResult.Valid)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def observe(statsReceiver: StatsReceiver): Predicate[Q] = Predicate(
|
||||
StatsUtil.profilePredicateResult(this.arrow, statsReceiver))
|
||||
|
||||
def convertToFailOpenWithResultType(resultType: PredicateResult): Predicate[Q] = {
|
||||
Predicate { query: Q =>
|
||||
apply(query).handle {
|
||||
case _: Exception =>
|
||||
resultType
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class TruePredicate[Q] extends Predicate[Q] {
|
||||
override def apply(item: Q): Stitch[PredicateResult] = Predicate.AlwaysTrueStitch
|
||||
}
|
||||
|
||||
class FalsePredicate[Q](reason: FilterReason) extends Predicate[Q] {
|
||||
val InvalidResult = Stitch.value(PredicateResult.Invalid(Set(reason)))
|
||||
override def apply(item: Q): Stitch[PredicateResult] = InvalidResult
|
||||
}
|
||||
|
||||
object Predicate {
|
||||
|
||||
val AlwaysTrueStitch = Stitch.value(PredicateResult.Valid)
|
||||
|
||||
val NumBatchesStat = "num_batches_stats"
|
||||
val NumBatchesCount = "num_batches"
|
||||
|
||||
def apply[Q](func: Q => Stitch[PredicateResult]): Predicate[Q] = new Predicate[Q] {
|
||||
override def apply(item: Q): Stitch[PredicateResult] = func(item)
|
||||
|
||||
override val arrow: Arrow[Q, PredicateResult] = Arrow(func)
|
||||
}
|
||||
|
||||
def apply[Q](outerArrow: Arrow[Q, PredicateResult]): Predicate[Q] = new Predicate[Q] {
|
||||
override def apply(item: Q): Stitch[PredicateResult] = arrow(item)
|
||||
|
||||
override val arrow: Arrow[Q, PredicateResult] = outerArrow
|
||||
}
|
||||
|
||||
/**
|
||||
* Given some items, this function
|
||||
* 1. chunks them up in groups
|
||||
* 2. lazily applies a predicate on each group
|
||||
* 3. filters based on the predicate
|
||||
* 4. takes first numToTake items.
|
||||
*
|
||||
* If numToTake is satisfied, then any later predicates are not called.
|
||||
*
|
||||
* @param items items of type Q
|
||||
* @param predicate predicate that determines whether an item is acceptable
|
||||
* @param batchSize batch size to call the predicate with
|
||||
* @param numToTake max number of items to return
|
||||
* @param stats stats receiver
|
||||
* @tparam Q type of item
|
||||
*
|
||||
* @return a future of K items
|
||||
*/
|
||||
def batchFilterTake[Q](
|
||||
items: Seq[Q],
|
||||
predicate: Predicate[Q],
|
||||
batchSize: Int,
|
||||
numToTake: Int,
|
||||
stats: StatsReceiver
|
||||
): Stitch[Seq[Q]] = {
|
||||
|
||||
def take(
|
||||
input: Iterator[Stitch[Seq[Q]]],
|
||||
prev: Seq[Q],
|
||||
takeSize: Int,
|
||||
numOfBatch: Int
|
||||
): Stitch[(Seq[Q], Int)] = {
|
||||
if (input.hasNext) {
|
||||
val currFut = input.next()
|
||||
currFut.flatMap { curr =>
|
||||
val taken = curr.take(takeSize)
|
||||
val combined = prev ++ taken
|
||||
if (taken.size < takeSize)
|
||||
take(input, combined, takeSize - taken.size, numOfBatch + 1)
|
||||
else Stitch.value((combined, numOfBatch + 1))
|
||||
}
|
||||
} else {
|
||||
Stitch.value((prev, numOfBatch))
|
||||
}
|
||||
}
|
||||
|
||||
val batchedItems = items.view.grouped(batchSize)
|
||||
val batchedFutures = batchedItems.map { batch =>
|
||||
Stitch.traverse(batch)(predicate.apply).map { conds =>
|
||||
(batch.zip(conds)).withFilter(_._2.value).map(_._1)
|
||||
}
|
||||
}
|
||||
take(batchedFutures, Nil, numToTake, 0).map {
|
||||
case (filtered: Seq[Q], numOfBatch: Int) =>
|
||||
stats.stat(NumBatchesStat).add(numOfBatch)
|
||||
stats.counter(NumBatchesCount).incr(numOfBatch)
|
||||
filtered
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* filter a list of items based on the predicate
|
||||
*
|
||||
* @param items a list of items
|
||||
* @param predicate predicate of the item
|
||||
* @tparam Q item type
|
||||
* @return the list of items that satisfy the predicate
|
||||
*/
|
||||
def filter[Q](items: Seq[Q], predicate: Predicate[Q]): Stitch[Seq[Q]] = {
|
||||
predicate.batch(items).map { results =>
|
||||
items.zip(results).collect {
|
||||
case (item, PredicateResult.Valid) => item
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* filter a list of items based on the predicate given the target
|
||||
*
|
||||
* @param target target item
|
||||
* @param items a list of items
|
||||
* @param predicate predicate of the (target, item) pair
|
||||
* @tparam Q item type
|
||||
* @return the list of items that satisfy the predicate given the target
|
||||
*/
|
||||
def filter[T, Q](target: T, items: Seq[Q], predicate: Predicate[(T, Q)]): Stitch[Seq[Q]] = {
|
||||
predicate.batch(items.map(i => (target, i))).map { results =>
|
||||
items.zip(results).collect {
|
||||
case (item, PredicateResult.Valid) => item
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a predicate, where an element is true iff it that element is true for all input predicates.
|
||||
* ie. it is an AND operation
|
||||
*
|
||||
* This is done concurrently.
|
||||
*
|
||||
* @param predicates list of predicates
|
||||
* @tparam Q Type parameter
|
||||
*
|
||||
* @return new predicate object that is the logical "and" of the input predicates
|
||||
*/
|
||||
def andConcurrently[Q](predicates: Seq[Predicate[Q]]): Predicate[Q] = {
|
||||
Predicate { query: Q =>
|
||||
Stitch.traverse(predicates)(p => p(query)).map { predicateResults =>
|
||||
val allInvalid = predicateResults
|
||||
.collect {
|
||||
case PredicateResult.Invalid(reason) =>
|
||||
reason
|
||||
}
|
||||
if (allInvalid.isEmpty) {
|
||||
PredicateResult.Valid
|
||||
} else {
|
||||
val allInvalidReasons = allInvalid.reduce(_ ++ _)
|
||||
PredicateResult.Invalid(allInvalidReasons)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* applies the underlying predicate when the param is on.
|
||||
*/
|
||||
abstract class GatedPredicateBase[Q](
|
||||
underlyingPredicate: Predicate[Q],
|
||||
stats: StatsReceiver = NullStatsReceiver)
|
||||
extends Predicate[Q] {
|
||||
def gate(item: Q): Boolean
|
||||
|
||||
val underlyingPredicateTotal = stats.counter("underlying_total")
|
||||
val underlyingPredicateValid = stats.counter("underlying_valid")
|
||||
val underlyingPredicateInvalid = stats.counter("underlying_invalid")
|
||||
val notGatedCounter = stats.counter("not_gated")
|
||||
|
||||
val ValidStitch: Stitch[PredicateResult.Valid.type] = Stitch.value(PredicateResult.Valid)
|
||||
|
||||
override def apply(item: Q): Stitch[PredicateResult] = {
|
||||
if (gate(item)) {
|
||||
underlyingPredicateTotal.incr()
|
||||
underlyingPredicate(item)
|
||||
} else {
|
||||
notGatedCounter.incr()
|
||||
ValidStitch
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.base
|
||||
|
||||
import com.twitter.follow_recommendations.common.models.FilterReason
|
||||
|
||||
sealed trait PredicateResult {
|
||||
def value: Boolean
|
||||
}
|
||||
|
||||
object PredicateResult {
|
||||
|
||||
case object Valid extends PredicateResult {
|
||||
override val value = true
|
||||
}
|
||||
|
||||
case class Invalid(reasons: Set[FilterReason] = Set.empty[FilterReason]) extends PredicateResult {
|
||||
override val value = false
|
||||
}
|
||||
}
|
@ -1,90 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.base
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.util.Duration
|
||||
import com.twitter.util.TimeoutException
|
||||
|
||||
/**
|
||||
* Ranker is a special kind of transform that would only change the order of a list of items.
|
||||
* If a single item is given, it "may" attach additional scoring information to the item.
|
||||
*
|
||||
* @tparam Target target to recommend the candidates
|
||||
* @tparam Candidate candidate type to rank
|
||||
*/
|
||||
trait Ranker[Target, Candidate] extends Transform[Target, Candidate] { ranker =>
|
||||
|
||||
def rank(target: Target, candidates: Seq[Candidate]): Stitch[Seq[Candidate]]
|
||||
|
||||
override def transform(target: Target, candidates: Seq[Candidate]): Stitch[Seq[Candidate]] = {
|
||||
rank(target, candidates)
|
||||
}
|
||||
|
||||
override def observe(statsReceiver: StatsReceiver): Ranker[Target, Candidate] = {
|
||||
val originalRanker = this
|
||||
new Ranker[Target, Candidate] {
|
||||
override def rank(target: Target, items: Seq[Candidate]): Stitch[Seq[Candidate]] = {
|
||||
statsReceiver.counter(Transform.InputCandidatesCount).incr(items.size)
|
||||
statsReceiver.stat(Transform.InputCandidatesStat).add(items.size)
|
||||
StatsUtil.profileStitchSeqResults(originalRanker.rank(target, items), statsReceiver)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def reverse: Ranker[Target, Candidate] = new Ranker[Target, Candidate] {
|
||||
def rank(target: Target, candidates: Seq[Candidate]): Stitch[Seq[Candidate]] =
|
||||
ranker.rank(target, candidates).map(_.reverse)
|
||||
}
|
||||
|
||||
def andThen(other: Ranker[Target, Candidate]): Ranker[Target, Candidate] = {
|
||||
val original = this
|
||||
new Ranker[Target, Candidate] {
|
||||
def rank(target: Target, candidates: Seq[Candidate]): Stitch[Seq[Candidate]] = {
|
||||
original.rank(target, candidates).flatMap { results => other.rank(target, results) }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This method wraps the Ranker in a designated timeout.
|
||||
* If the ranker timeouts, it would return the original candidates directly,
|
||||
* instead of failing the whole recommendation flow
|
||||
*/
|
||||
def within(timeout: Duration, statsReceiver: StatsReceiver): Ranker[Target, Candidate] = {
|
||||
val timeoutCounter = statsReceiver.counter("timeout")
|
||||
val original = this
|
||||
new Ranker[Target, Candidate] {
|
||||
override def rank(target: Target, candidates: Seq[Candidate]): Stitch[Seq[Candidate]] = {
|
||||
original
|
||||
.rank(target, candidates)
|
||||
.within(timeout)(com.twitter.finagle.util.DefaultTimer)
|
||||
.rescue {
|
||||
case _: TimeoutException =>
|
||||
timeoutCounter.incr()
|
||||
Stitch.value(candidates)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object Ranker {
|
||||
|
||||
def chain[Target, Candidate](
|
||||
transformer: Transform[Target, Candidate],
|
||||
ranker: Ranker[Target, Candidate]
|
||||
): Ranker[Target, Candidate] = {
|
||||
new Ranker[Target, Candidate] {
|
||||
def rank(target: Target, candidates: Seq[Candidate]): Stitch[Seq[Candidate]] = {
|
||||
transformer
|
||||
.transform(target, candidates)
|
||||
.flatMap { results => ranker.rank(target, results) }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class IdentityRanker[Target, Candidate] extends Ranker[Target, Candidate] {
|
||||
def rank(target: Target, candidates: Seq[Candidate]): Stitch[Seq[Candidate]] =
|
||||
Stitch.value(candidates)
|
||||
}
|
@ -1,250 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.base
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.UniversalNoun
|
||||
import com.twitter.product_mixer.core.model.common.identifier.RecommendationPipelineIdentifier
|
||||
import com.twitter.product_mixer.core.pipeline.recommendation.RecommendationPipelineResult
|
||||
import com.twitter.product_mixer.core.quality_factor.QualityFactorObserver
|
||||
import com.twitter.stitch.Stitch
|
||||
|
||||
/**
|
||||
* configs for results generated from the recommendation flow
|
||||
*
|
||||
* @param desiredCandidateCount num of desired candidates to return
|
||||
* @param batchForCandidatesCheck batch size for candidates check
|
||||
*/
|
||||
case class RecommendationResultsConfig(desiredCandidateCount: Int, batchForCandidatesCheck: Int)
|
||||
|
||||
trait BaseRecommendationFlow[Target, Candidate <: UniversalNoun[Long]] {
|
||||
val identifier = RecommendationPipelineIdentifier("RecommendationFlow")
|
||||
|
||||
def process(
|
||||
pipelineRequest: Target
|
||||
): Stitch[RecommendationPipelineResult[Candidate, Seq[Candidate]]]
|
||||
|
||||
def mapKey[Target2](fn: Target2 => Target): BaseRecommendationFlow[Target2, Candidate] = {
|
||||
val original = this
|
||||
new BaseRecommendationFlow[Target2, Candidate] {
|
||||
override def process(
|
||||
pipelineRequest: Target2
|
||||
): Stitch[RecommendationPipelineResult[Candidate, Seq[Candidate]]] =
|
||||
original.process(fn(pipelineRequest))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines a typical recommendation flow to fetch, filter, rank and transform candidates.
|
||||
*
|
||||
* 1. targetEligibility: determine the eligibility of target request
|
||||
* 2. candidateSources: fetch candidates from candidate sources based on target type
|
||||
* 3. preRankerCandidateFilter: light filtering of candidates
|
||||
* 4. ranker: ranking of candidates (could be composed of multiple stages, light ranking, heavy ranking and etc)
|
||||
* 5. postRankerTransform: deduping, grouping, rule based promotion / demotions and etc
|
||||
* 6. validateCandidates: heavy filters to determine the eligibility of the candidates.
|
||||
* will only be applied to candidates that we expect to return.
|
||||
* 7. transformResults: transform the individual candidates into desired format (e.g. hydrate social proof)
|
||||
*
|
||||
* Note that the actual implementations may not need to implement all the steps if not needed
|
||||
* (could just leave to IdentityRanker if ranking is not needed).
|
||||
*
|
||||
* Theoretically, the actual implementation could override the above flow to add
|
||||
* more steps (e.g. add a transform step before ranking).
|
||||
* But it is recommended to add the additional steps into this base flow if the step proves
|
||||
* to have significant justification, or merge it into an existing step if it is a minor change.
|
||||
*
|
||||
* @tparam Target type of target request
|
||||
* @tparam Candidate type of candidate to return
|
||||
*/
|
||||
trait RecommendationFlow[Target, Candidate <: UniversalNoun[Long]]
|
||||
extends BaseRecommendationFlow[Target, Candidate]
|
||||
with SideEffectsUtil[Target, Candidate] {
|
||||
|
||||
/**
|
||||
* optionally update or enrich the request before executing the flows
|
||||
*/
|
||||
protected def updateTarget(target: Target): Stitch[Target] = Stitch.value(target)
|
||||
|
||||
/**
|
||||
* check if the target is eligible for the flow
|
||||
*/
|
||||
protected def targetEligibility: Predicate[Target]
|
||||
|
||||
/**
|
||||
* define the candidate sources that should be used for the given target
|
||||
*/
|
||||
protected def candidateSources(target: Target): Seq[CandidateSource[Target, Candidate]]
|
||||
|
||||
/**
|
||||
* filter invalid candidates before the ranking phase.
|
||||
*/
|
||||
protected def preRankerCandidateFilter: Predicate[(Target, Candidate)]
|
||||
|
||||
/**
|
||||
* rank the candidates
|
||||
*/
|
||||
protected def selectRanker(target: Target): Ranker[Target, Candidate]
|
||||
|
||||
/**
|
||||
* transform the candidates after ranking (e.g. dedupping, grouping and etc)
|
||||
*/
|
||||
protected def postRankerTransform: Transform[Target, Candidate]
|
||||
|
||||
/**
|
||||
* filter invalid candidates before returning the results.
|
||||
*
|
||||
* Some heavy filters e.g. SGS filter could be applied in this step
|
||||
*/
|
||||
protected def validateCandidates: Predicate[(Target, Candidate)]
|
||||
|
||||
/**
|
||||
* transform the candidates into results and return
|
||||
*/
|
||||
protected def transformResults: Transform[Target, Candidate]
|
||||
|
||||
/**
|
||||
* configuration for recommendation results
|
||||
*/
|
||||
protected def resultsConfig(target: Target): RecommendationResultsConfig
|
||||
|
||||
/**
|
||||
* track the quality factor the recommendation pipeline
|
||||
*/
|
||||
protected def qualityFactorObserver: Option[QualityFactorObserver] = None
|
||||
|
||||
def statsReceiver: StatsReceiver
|
||||
|
||||
/**
|
||||
* high level monitoring for the whole flow
|
||||
* (make sure to add monitoring for each individual component by yourself)
|
||||
*
|
||||
* additional candidates: count, stats, non_empty_count
|
||||
* target eligibility: latency, success, failures, request, count, valid_count, invalid_count, invalid_reasons
|
||||
* candidate generation: latency, success, failures, request, count, non_empty_count, results_stat
|
||||
* pre ranker filter: latency, success, failures, request, count, non_empty_count, results_stat
|
||||
* ranker: latency, success, failures, request, count, non_empty_count, results_stat
|
||||
* post ranker: latency, success, failures, request, count, non_empty_count, results_stat
|
||||
* filter and take: latency, success, failures, request, count, non_empty_count, results_stat, batch count
|
||||
* transform results: latency, success, failures, request, count, non_empty_count, results_stat
|
||||
*/
|
||||
import RecommendationFlow._
|
||||
lazy val additionalCandidatesStats = statsReceiver.scope(AdditionalCandidatesStats)
|
||||
lazy val targetEligibilityStats = statsReceiver.scope(TargetEligibilityStats)
|
||||
lazy val candidateGenerationStats = statsReceiver.scope(CandidateGenerationStats)
|
||||
lazy val preRankerFilterStats = statsReceiver.scope(PreRankerFilterStats)
|
||||
lazy val rankerStats = statsReceiver.scope(RankerStats)
|
||||
lazy val postRankerTransformStats = statsReceiver.scope(PostRankerTransformStats)
|
||||
lazy val filterAndTakeStats = statsReceiver.scope(FilterAndTakeStats)
|
||||
lazy val transformResultsStats = statsReceiver.scope(TransformResultsStats)
|
||||
|
||||
lazy val overallStats = statsReceiver.scope(OverallStats)
|
||||
|
||||
import StatsUtil._
|
||||
|
||||
override def process(
|
||||
pipelineRequest: Target
|
||||
): Stitch[RecommendationPipelineResult[Candidate, Seq[Candidate]]] = {
|
||||
|
||||
observeStitchQualityFactor(
|
||||
profileStitchSeqResults(
|
||||
updateTarget(pipelineRequest).flatMap { target =>
|
||||
profilePredicateResult(targetEligibility(target), targetEligibilityStats).flatMap {
|
||||
case PredicateResult.Valid => processValidTarget(target, Seq.empty)
|
||||
case PredicateResult.Invalid(_) => Stitch.Nil
|
||||
}
|
||||
},
|
||||
overallStats
|
||||
).map { candidates =>
|
||||
RecommendationPipelineResult.empty.withResult(candidates)
|
||||
},
|
||||
qualityFactorObserver,
|
||||
overallStats
|
||||
)
|
||||
}
|
||||
|
||||
protected def processValidTarget(
|
||||
target: Target,
|
||||
additionalCandidates: Seq[Candidate]
|
||||
): Stitch[Seq[Candidate]] = {
|
||||
|
||||
/**
|
||||
* A basic recommendation flow looks like this:
|
||||
*
|
||||
* 1. fetch candidates from candidate sources
|
||||
* 2. blend candidates with existing candidates
|
||||
* 3. filter the candidates (light filters) before ranking
|
||||
* 4. ranking
|
||||
* 5. filter and truncate the candidates using postRankerCandidateFilter
|
||||
* 6. transform the candidates based on product requirement
|
||||
*/
|
||||
val candidateSourcesToFetch = candidateSources(target)
|
||||
for {
|
||||
candidates <- profileStitchSeqResults(
|
||||
Stitch.traverse(candidateSourcesToFetch)(_(target)).map(_.flatten),
|
||||
candidateGenerationStats
|
||||
)
|
||||
mergedCandidates =
|
||||
profileSeqResults(additionalCandidates, additionalCandidatesStats) ++
|
||||
candidates
|
||||
filteredCandidates <- profileStitchSeqResults(
|
||||
Predicate.filter(target, mergedCandidates, preRankerCandidateFilter),
|
||||
preRankerFilterStats
|
||||
)
|
||||
rankedCandidates <- profileStitchSeqResults(
|
||||
selectRanker(target).rank(target, filteredCandidates),
|
||||
rankerStats
|
||||
)
|
||||
transformed <- profileStitchSeqResults(
|
||||
postRankerTransform.transform(target, rankedCandidates),
|
||||
postRankerTransformStats
|
||||
)
|
||||
truncated <- profileStitchSeqResults(
|
||||
take(target, transformed, resultsConfig(target)),
|
||||
filterAndTakeStats
|
||||
)
|
||||
results <- profileStitchSeqResults(
|
||||
transformResults.transform(target, truncated),
|
||||
transformResultsStats
|
||||
)
|
||||
_ <- applySideEffects(
|
||||
target,
|
||||
candidateSourcesToFetch,
|
||||
candidates,
|
||||
mergedCandidates,
|
||||
filteredCandidates,
|
||||
rankedCandidates,
|
||||
transformed,
|
||||
truncated,
|
||||
results)
|
||||
} yield results
|
||||
}
|
||||
|
||||
private[this] def take(
|
||||
target: Target,
|
||||
candidates: Seq[Candidate],
|
||||
config: RecommendationResultsConfig
|
||||
): Stitch[Seq[Candidate]] = {
|
||||
Predicate
|
||||
.batchFilterTake(
|
||||
candidates.map(c => (target, c)),
|
||||
validateCandidates,
|
||||
config.batchForCandidatesCheck,
|
||||
config.desiredCandidateCount,
|
||||
statsReceiver
|
||||
).map(_.map(_._2))
|
||||
}
|
||||
}
|
||||
|
||||
object RecommendationFlow {
|
||||
|
||||
val AdditionalCandidatesStats = "additional_candidates"
|
||||
val TargetEligibilityStats = "target_eligibility"
|
||||
val CandidateGenerationStats = "candidate_generation"
|
||||
val PreRankerFilterStats = "pre_ranker_filter"
|
||||
val RankerStats = "ranker"
|
||||
val PostRankerTransformStats = "post_ranker_transform"
|
||||
val FilterAndTakeStats = "filter_and_take"
|
||||
val TransformResultsStats = "transform_results"
|
||||
val OverallStats = "overall"
|
||||
}
|
@ -1,24 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.base
|
||||
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.stitch.Stitch
|
||||
|
||||
/**
|
||||
* SideEffectsUtil applies side effects to the intermediate candidate results from a recommendation flow pipeline.
|
||||
*
|
||||
* @tparam Target target to recommend the candidates
|
||||
* @tparam Candidate candidate type to rank
|
||||
*/
|
||||
trait SideEffectsUtil[Target, Candidate] {
|
||||
def applySideEffects(
|
||||
target: Target,
|
||||
candidateSources: Seq[CandidateSource[Target, Candidate]],
|
||||
candidatesFromCandidateSources: Seq[Candidate],
|
||||
mergedCandidates: Seq[Candidate],
|
||||
filteredCandidates: Seq[Candidate],
|
||||
rankedCandidates: Seq[Candidate],
|
||||
transformedCandidates: Seq[Candidate],
|
||||
truncatedCandidates: Seq[Candidate],
|
||||
results: Seq[Candidate]
|
||||
): Stitch[Unit] = Stitch.Unit
|
||||
}
|
@ -1,272 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.base
|
||||
import com.twitter.finagle.stats.Stat
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.product_mixer.core.quality_factor.QualityFactorObserver
|
||||
import com.twitter.stitch.Arrow
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.util.Stopwatch
|
||||
import java.util.concurrent.TimeUnit
|
||||
import scala.util.control.NonFatal
|
||||
|
||||
object StatsUtil {
|
||||
val LatencyName = "latency_ms"
|
||||
val RequestName = "requests"
|
||||
val SuccessName = "success"
|
||||
val FailureName = "failures"
|
||||
val ResultsName = "results"
|
||||
val ResultsStat = "results_stat"
|
||||
val EmptyResultsName = "empty"
|
||||
val NonEmptyResultsName = "non_empty"
|
||||
val ValidCount = "valid"
|
||||
val InvalidCount = "invalid"
|
||||
val InvalidHasReasons = "has_reasons"
|
||||
val Reasons = "reasons"
|
||||
val QualityFactorStat = "quality_factor_stat"
|
||||
val QualityFactorCounts = "quality_factor_counts"
|
||||
|
||||
/**
|
||||
* Helper function for timing a stitch, returning the original stitch.
|
||||
*/
|
||||
def profileStitch[T](stitch: Stitch[T], stat: StatsReceiver): Stitch[T] = {
|
||||
|
||||
Stitch
|
||||
.time(stitch)
|
||||
.map {
|
||||
case (response, stitchRunDuration) =>
|
||||
stat.counter(RequestName).incr()
|
||||
stat.stat(LatencyName).add(stitchRunDuration.inMilliseconds)
|
||||
response
|
||||
.onSuccess { _ => stat.counter(SuccessName).incr() }
|
||||
.onFailure { e =>
|
||||
stat.counter(FailureName).incr()
|
||||
stat.scope(FailureName).counter(getCleanClassName(e)).incr()
|
||||
}
|
||||
}
|
||||
.lowerFromTry
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for timing an arrow, returning the original arrow.
|
||||
*/
|
||||
def profileArrow[T, U](arrow: Arrow[T, U], stat: StatsReceiver): Arrow[T, U] = {
|
||||
|
||||
Arrow
|
||||
.time(arrow)
|
||||
.map {
|
||||
case (response, stitchRunDuration) =>
|
||||
stat.counter(RequestName).incr()
|
||||
stat.stat(LatencyName).add(stitchRunDuration.inMilliseconds)
|
||||
response
|
||||
.onSuccess { _ => stat.counter(SuccessName).incr() }
|
||||
.onFailure { e =>
|
||||
stat.counter(FailureName).incr()
|
||||
stat.scope(FailureName).counter(getCleanClassName(e)).incr()
|
||||
}
|
||||
}
|
||||
.lowerFromTry
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to count and track the distribution of results
|
||||
*/
|
||||
def profileResults[T](results: T, stat: StatsReceiver, size: T => Int): T = {
|
||||
val numResults = size(results)
|
||||
stat.counter(ResultsName).incr(numResults)
|
||||
if (numResults == 0) {
|
||||
stat.counter(EmptyResultsName).incr()
|
||||
results
|
||||
} else {
|
||||
stat.stat(ResultsStat).add(numResults)
|
||||
stat.counter(NonEmptyResultsName).incr()
|
||||
results
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to count and track the distribution of a list of results
|
||||
*/
|
||||
def profileSeqResults[T](results: Seq[T], stat: StatsReceiver): Seq[T] = {
|
||||
profileResults[Seq[T]](results, stat, _.size)
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for timing a stitch and count the number of results, returning the original stitch.
|
||||
*/
|
||||
def profileStitchResults[T](stitch: Stitch[T], stat: StatsReceiver, size: T => Int): Stitch[T] = {
|
||||
profileStitch(stitch, stat).onSuccess { results => profileResults(results, stat, size) }
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for timing an arrow and count the number of results, returning the original arrow.
|
||||
*/
|
||||
def profileArrowResults[T, U](
|
||||
arrow: Arrow[T, U],
|
||||
stat: StatsReceiver,
|
||||
size: U => Int
|
||||
): Arrow[T, U] = {
|
||||
profileArrow(arrow, stat).onSuccess { results => profileResults(results, stat, size) }
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for timing a stitch and count a seq of results, returning the original stitch.
|
||||
*/
|
||||
def profileStitchSeqResults[T](stitch: Stitch[Seq[T]], stat: StatsReceiver): Stitch[Seq[T]] = {
|
||||
profileStitchResults[Seq[T]](stitch, stat, _.size)
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for timing a stitch and count optional results, returning the original stitch.
|
||||
*/
|
||||
def profileStitchOptionalResults[T](
|
||||
stitch: Stitch[Option[T]],
|
||||
stat: StatsReceiver
|
||||
): Stitch[Option[T]] = {
|
||||
profileStitchResults[Option[T]](stitch, stat, _.size)
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for timing a stitch and count a map of results, returning the original stitch.
|
||||
*/
|
||||
def profileStitchMapResults[K, V](
|
||||
stitch: Stitch[Map[K, V]],
|
||||
stat: StatsReceiver
|
||||
): Stitch[Map[K, V]] = {
|
||||
profileStitchResults[Map[K, V]](stitch, stat, _.size)
|
||||
}
|
||||
|
||||
def getCleanClassName(obj: Object): String =
|
||||
obj.getClass.getSimpleName.stripSuffix("$")
|
||||
|
||||
/**
|
||||
* Helper function for timing a stitch and count a list of PredicateResult
|
||||
*/
|
||||
def profilePredicateResults(
|
||||
predicateResult: Stitch[Seq[PredicateResult]],
|
||||
statsReceiver: StatsReceiver
|
||||
): Stitch[Seq[PredicateResult]] = {
|
||||
profileStitch[Seq[PredicateResult]](
|
||||
predicateResult,
|
||||
statsReceiver
|
||||
).onSuccess {
|
||||
_.map {
|
||||
case PredicateResult.Valid =>
|
||||
statsReceiver.counter(ValidCount).incr()
|
||||
case PredicateResult.Invalid(reasons) =>
|
||||
statsReceiver.counter(InvalidCount).incr()
|
||||
reasons.map { filterReason =>
|
||||
statsReceiver.counter(InvalidHasReasons).incr()
|
||||
statsReceiver.scope(Reasons).counter(filterReason.reason).incr()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for timing a stitch and count individual PredicateResult
|
||||
*/
|
||||
def profilePredicateResult(
|
||||
predicateResult: Stitch[PredicateResult],
|
||||
statsReceiver: StatsReceiver
|
||||
): Stitch[PredicateResult] = {
|
||||
profilePredicateResults(
|
||||
predicateResult.map(Seq(_)),
|
||||
statsReceiver
|
||||
).map(_.head)
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for timing an arrow and count a list of PredicateResult
|
||||
*/
|
||||
def profilePredicateResults[Q](
|
||||
predicateResult: Arrow[Q, Seq[PredicateResult]],
|
||||
statsReceiver: StatsReceiver
|
||||
): Arrow[Q, Seq[PredicateResult]] = {
|
||||
profileArrow[Q, Seq[PredicateResult]](
|
||||
predicateResult,
|
||||
statsReceiver
|
||||
).onSuccess {
|
||||
_.map {
|
||||
case PredicateResult.Valid =>
|
||||
statsReceiver.counter(ValidCount).incr()
|
||||
case PredicateResult.Invalid(reasons) =>
|
||||
statsReceiver.counter(InvalidCount).incr()
|
||||
reasons.map { filterReason =>
|
||||
statsReceiver.counter(InvalidHasReasons).incr()
|
||||
statsReceiver.scope(Reasons).counter(filterReason.reason).incr()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for timing an arrow and count individual PredicateResult
|
||||
*/
|
||||
def profilePredicateResult[Q](
|
||||
predicateResult: Arrow[Q, PredicateResult],
|
||||
statsReceiver: StatsReceiver
|
||||
): Arrow[Q, PredicateResult] = {
|
||||
profilePredicateResults(
|
||||
predicateResult.map(Seq(_)),
|
||||
statsReceiver
|
||||
).map(_.head)
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for timing a stitch code block
|
||||
*/
|
||||
def profileStitchSeqResults[T](
|
||||
stats: StatsReceiver
|
||||
)(
|
||||
block: => Stitch[Seq[T]]
|
||||
): Stitch[Seq[T]] = {
|
||||
stats.counter(RequestName).incr()
|
||||
profileStitch(stats.stat(LatencyName), TimeUnit.MILLISECONDS) {
|
||||
block onSuccess { r =>
|
||||
if (r.isEmpty) stats.counter(EmptyResultsName).incr()
|
||||
stats.stat(ResultsStat).add(r.size)
|
||||
} onFailure { e =>
|
||||
{
|
||||
stats.counter(FailureName).incr()
|
||||
stats.scope(FailureName).counter(e.getClass.getName).incr()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Time a given asynchronous `f` using the given `unit`.
|
||||
*/
|
||||
def profileStitch[A](stat: Stat, unit: TimeUnit)(f: => Stitch[A]): Stitch[A] = {
|
||||
val start = Stopwatch.timeNanos()
|
||||
try {
|
||||
f.respond { _ => stat.add(unit.convert(Stopwatch.timeNanos() - start, TimeUnit.NANOSECONDS)) }
|
||||
} catch {
|
||||
case NonFatal(e) =>
|
||||
stat.add(unit.convert(Stopwatch.timeNanos() - start, TimeUnit.NANOSECONDS))
|
||||
Stitch.exception(e)
|
||||
}
|
||||
}
|
||||
|
||||
def observeStitchQualityFactor[T](
|
||||
stitch: Stitch[T],
|
||||
qualityFactorObserverOption: Option[QualityFactorObserver],
|
||||
statsReceiver: StatsReceiver
|
||||
): Stitch[T] = {
|
||||
qualityFactorObserverOption
|
||||
.map { observer =>
|
||||
Stitch
|
||||
.time(stitch)
|
||||
.map {
|
||||
case (response, stitchRunDuration) =>
|
||||
observer(response, stitchRunDuration)
|
||||
val qfVal = observer.qualityFactor.currentValue.floatValue() * 10000
|
||||
statsReceiver.counter(QualityFactorCounts).incr()
|
||||
statsReceiver
|
||||
.stat(QualityFactorStat)
|
||||
.add(qfVal)
|
||||
response
|
||||
}
|
||||
.lowerFromTry
|
||||
}.getOrElse(stitch)
|
||||
}
|
||||
}
|
@ -1,85 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.base
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
import com.twitter.timelines.configapi.Param
|
||||
|
||||
/**
|
||||
* transform a or a list of candidate for target T
|
||||
*
|
||||
* @tparam T target type
|
||||
* @tparam C candidate type
|
||||
*/
|
||||
trait Transform[-T, C] {
|
||||
|
||||
// you need to implement at least one of the two methods here.
|
||||
def transformItem(target: T, item: C): Stitch[C] = {
|
||||
transform(target, Seq(item)).map(_.head)
|
||||
}
|
||||
|
||||
def transform(target: T, items: Seq[C]): Stitch[Seq[C]]
|
||||
|
||||
def mapTarget[T2](mapper: T2 => T): Transform[T2, C] = {
|
||||
val original = this
|
||||
new Transform[T2, C] {
|
||||
override def transformItem(target: T2, item: C): Stitch[C] = {
|
||||
original.transformItem(mapper(target), item)
|
||||
}
|
||||
override def transform(target: T2, items: Seq[C]): Stitch[Seq[C]] = {
|
||||
original.transform(mapper(target), items)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* sequential composition. we execute this' transform first, followed by the other's transform
|
||||
*/
|
||||
def andThen[T1 <: T](other: Transform[T1, C]): Transform[T1, C] = {
|
||||
val original = this
|
||||
new Transform[T1, C] {
|
||||
override def transformItem(target: T1, item: C): Stitch[C] =
|
||||
original.transformItem(target, item).flatMap(other.transformItem(target, _))
|
||||
override def transform(target: T1, items: Seq[C]): Stitch[Seq[C]] =
|
||||
original.transform(target, items).flatMap(other.transform(target, _))
|
||||
}
|
||||
}
|
||||
|
||||
def observe(statsReceiver: StatsReceiver): Transform[T, C] = {
|
||||
val originalTransform = this
|
||||
new Transform[T, C] {
|
||||
override def transform(target: T, items: Seq[C]): Stitch[Seq[C]] = {
|
||||
statsReceiver.counter(Transform.InputCandidatesCount).incr(items.size)
|
||||
statsReceiver.stat(Transform.InputCandidatesStat).add(items.size)
|
||||
StatsUtil.profileStitchSeqResults(originalTransform.transform(target, items), statsReceiver)
|
||||
}
|
||||
|
||||
override def transformItem(target: T, item: C): Stitch[C] = {
|
||||
statsReceiver.counter(Transform.InputCandidatesCount).incr()
|
||||
StatsUtil.profileStitch(originalTransform.transformItem(target, item), statsReceiver)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
trait GatedTransform[T <: HasParams, C] extends Transform[T, C] {
|
||||
def gated(param: Param[Boolean]): Transform[T, C] = {
|
||||
val original = this
|
||||
(target: T, items: Seq[C]) => {
|
||||
if (target.params(param)) {
|
||||
original.transform(target, items)
|
||||
} else {
|
||||
Stitch.value(items)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object Transform {
|
||||
val InputCandidatesCount = "input_candidates"
|
||||
val InputCandidatesStat = "input_candidates_stat"
|
||||
}
|
||||
|
||||
class IdentityTransform[T, C] extends Transform[T, C] {
|
||||
override def transform(target: T, items: Seq[C]): Stitch[Seq[C]] = Stitch.value(items)
|
||||
}
|
@ -1,9 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.addressbook
|
||||
|
||||
import com.twitter.timelines.configapi.FSParam
|
||||
|
||||
object AddressBookParams {
|
||||
// Used by display locations that want only to read from the ABV2 Client and ignore Manhattan
|
||||
// Currently the only display location that does this is the ABUploadInjection DisplayLocation
|
||||
object ReadFromABV2Only extends FSParam[Boolean]("addressbook_read_only_from_abv2", false)
|
||||
}
|
@ -1,27 +0,0 @@
|
||||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/google/inject:guice",
|
||||
"3rdparty/jvm/com/google/inject/extensions:guice-assistedinject",
|
||||
"3rdparty/jvm/net/codingwell:scala-guice",
|
||||
"3rdparty/jvm/org/slf4j:slf4j-api",
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/base",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/addressbook",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/email_storage_service",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/gizmoduck",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/phone_storage_service",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/strato",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/constants",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
|
||||
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/configapi/deciders",
|
||||
"src/thrift/com/twitter/hermit/candidate:hermit-candidate-scala",
|
||||
"src/thrift/com/twitter/hermit/usercontacts:hermit-usercontacts-scala",
|
||||
"strato/config/columns/onboarding/userrecs:userrecs-strato-client",
|
||||
"strato/src/main/scala/com/twitter/strato/client",
|
||||
"util/util-slf4j-api/src/main/scala",
|
||||
],
|
||||
)
|
@ -1,74 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.addressbook
|
||||
|
||||
import com.twitter.finagle.stats.NullStatsReceiver
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.addressbook.AddressBookParams.ReadFromABV2Only
|
||||
import com.twitter.follow_recommendations.common.clients.addressbook.AddressbookClient
|
||||
import com.twitter.follow_recommendations.common.clients.addressbook.models.EdgeType
|
||||
import com.twitter.follow_recommendations.common.clients.addressbook.models.RecordIdentifier
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.follow_recommendations.common.utils.RescueWithStatsUtils.rescueWithStats
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.strato.generated.client.onboarding.userrecs.ForwardEmailBookClientColumn
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class ForwardEmailBookSource @Inject() (
|
||||
forwardEmailBookClientColumn: ForwardEmailBookClientColumn,
|
||||
addressBookClient: AddressbookClient,
|
||||
statsReceiver: StatsReceiver = NullStatsReceiver)
|
||||
extends CandidateSource[HasParams with HasClientContext, CandidateUser] {
|
||||
|
||||
override val identifier: CandidateSourceIdentifier =
|
||||
ForwardEmailBookSource.Identifier
|
||||
private val stats: StatsReceiver = statsReceiver.scope(this.getClass.getSimpleName)
|
||||
|
||||
/**
|
||||
* Generate a list of candidates for the target
|
||||
*/
|
||||
override def apply(
|
||||
target: HasParams with HasClientContext
|
||||
): Stitch[Seq[CandidateUser]] = {
|
||||
val candidateUsers: Stitch[Seq[Long]] = target.getOptionalUserId
|
||||
.map { userId =>
|
||||
rescueWithStats(
|
||||
addressBookClient.getUsers(
|
||||
userId = userId,
|
||||
identifiers =
|
||||
Seq(RecordIdentifier(userId = Some(userId), email = None, phoneNumber = None)),
|
||||
batchSize = AddressbookClient.AddressBook2BatchSize,
|
||||
edgeType = ForwardEmailBookSource.DefaultEdgeType,
|
||||
fetcherOption =
|
||||
if (target.params.apply(ReadFromABV2Only)) None
|
||||
else Some(forwardEmailBookClientColumn.fetcher),
|
||||
queryOption = AddressbookClient
|
||||
.createQueryOption(
|
||||
edgeType = ForwardEmailBookSource.DefaultEdgeType,
|
||||
isPhone = ForwardEmailBookSource.IsPhone)
|
||||
),
|
||||
stats,
|
||||
"AddressBookClient"
|
||||
)
|
||||
}.getOrElse(Stitch.Nil)
|
||||
|
||||
candidateUsers
|
||||
.map(
|
||||
_.take(ForwardEmailBookSource.NumEmailBookEntries)
|
||||
.map(CandidateUser(_, score = Some(CandidateUser.DefaultCandidateScore))
|
||||
.withCandidateSource(identifier)))
|
||||
}
|
||||
}
|
||||
|
||||
object ForwardEmailBookSource {
|
||||
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
|
||||
Algorithm.ForwardEmailBook.toString)
|
||||
val NumEmailBookEntries: Int = 1000
|
||||
val IsPhone = false
|
||||
val DefaultEdgeType: EdgeType = EdgeType.Forward
|
||||
}
|
@ -1,72 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.addressbook
|
||||
|
||||
import com.twitter.finagle.stats.NullStatsReceiver
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.addressbook.AddressBookParams.ReadFromABV2Only
|
||||
import com.twitter.follow_recommendations.common.clients.addressbook.AddressbookClient
|
||||
import com.twitter.follow_recommendations.common.clients.addressbook.models.EdgeType
|
||||
import com.twitter.follow_recommendations.common.clients.addressbook.models.RecordIdentifier
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.follow_recommendations.common.utils.RescueWithStatsUtils.rescueWithStats
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.strato.generated.client.onboarding.userrecs.ForwardPhoneContactsClientColumn
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class ForwardPhoneBookSource @Inject() (
|
||||
forwardPhoneContactsClientColumn: ForwardPhoneContactsClientColumn,
|
||||
addressBookClient: AddressbookClient,
|
||||
statsReceiver: StatsReceiver = NullStatsReceiver)
|
||||
extends CandidateSource[HasParams with HasClientContext, CandidateUser] {
|
||||
|
||||
override val identifier: CandidateSourceIdentifier =
|
||||
ForwardPhoneBookSource.Identifier
|
||||
private val stats: StatsReceiver = statsReceiver.scope(this.getClass.getSimpleName)
|
||||
|
||||
/**
|
||||
* Generate a list of candidates for the target
|
||||
*/
|
||||
override def apply(target: HasParams with HasClientContext): Stitch[Seq[CandidateUser]] = {
|
||||
val candidateUsers: Stitch[Seq[Long]] = target.getOptionalUserId
|
||||
.map { userId =>
|
||||
rescueWithStats(
|
||||
addressBookClient.getUsers(
|
||||
userId,
|
||||
identifiers =
|
||||
Seq(RecordIdentifier(userId = Some(userId), email = None, phoneNumber = None)),
|
||||
batchSize = AddressbookClient.AddressBook2BatchSize,
|
||||
edgeType = ForwardPhoneBookSource.DefaultEdgeType,
|
||||
fetcherOption =
|
||||
if (target.params.apply(ReadFromABV2Only)) None
|
||||
else Some(forwardPhoneContactsClientColumn.fetcher),
|
||||
queryOption = AddressbookClient
|
||||
.createQueryOption(
|
||||
edgeType = ForwardPhoneBookSource.DefaultEdgeType,
|
||||
isPhone = ForwardPhoneBookSource.IsPhone)
|
||||
),
|
||||
stats,
|
||||
"AddressBookClient"
|
||||
)
|
||||
}.getOrElse(Stitch.Nil)
|
||||
|
||||
candidateUsers
|
||||
.map(
|
||||
_.take(ForwardPhoneBookSource.NumPhoneBookEntries)
|
||||
.map(CandidateUser(_, score = Some(CandidateUser.DefaultCandidateScore))
|
||||
.withCandidateSource(identifier)))
|
||||
}
|
||||
}
|
||||
|
||||
object ForwardPhoneBookSource {
|
||||
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
|
||||
Algorithm.ForwardPhoneBook.toString)
|
||||
val NumPhoneBookEntries: Int = 1000
|
||||
val IsPhone = true
|
||||
val DefaultEdgeType: EdgeType = EdgeType.Forward
|
||||
}
|
@ -1,4 +0,0 @@
|
||||
# Address Book Candidate Source
|
||||
Provides the accounts of a given user's forward and reverse phone and email book contacts.
|
||||
It is only available when the user has synced their address book with the service.
|
||||
|
@ -1,78 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.addressbook
|
||||
|
||||
import com.twitter.cds.contact_consent_state.thriftscala.PurposeOfProcessing
|
||||
import com.twitter.finagle.stats.NullStatsReceiver
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.follow_recommendations.common.clients.addressbook.AddressbookClient
|
||||
import com.twitter.follow_recommendations.common.clients.addressbook.models.EdgeType
|
||||
import com.twitter.follow_recommendations.common.clients.addressbook.models.RecordIdentifier
|
||||
import com.twitter.follow_recommendations.common.clients.email_storage_service.EmailStorageServiceClient
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.follow_recommendations.common.utils.RescueWithStatsUtils.rescueOptionalWithStats
|
||||
import com.twitter.follow_recommendations.common.utils.RescueWithStatsUtils.rescueWithStats
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.strato.generated.client.onboarding.userrecs.ReverseEmailContactsClientColumn
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class ReverseEmailBookSource @Inject() (
|
||||
reverseEmailContactsClientColumn: ReverseEmailContactsClientColumn,
|
||||
essClient: EmailStorageServiceClient,
|
||||
addressBookClient: AddressbookClient,
|
||||
statsReceiver: StatsReceiver = NullStatsReceiver)
|
||||
extends CandidateSource[HasParams with HasClientContext, CandidateUser] {
|
||||
override val identifier: CandidateSourceIdentifier = ReverseEmailBookSource.Identifier
|
||||
private val rescueStats = statsReceiver.scope("ReverseEmailBookSource")
|
||||
|
||||
/**
|
||||
* Generate a list of candidates for the target
|
||||
*/
|
||||
override def apply(target: HasParams with HasClientContext): Stitch[Seq[CandidateUser]] = {
|
||||
val reverseCandidatesFromEmail = target.getOptionalUserId
|
||||
.map { userId =>
|
||||
val verifiedEmailStitchOpt =
|
||||
rescueOptionalWithStats(
|
||||
essClient.getVerifiedEmail(userId, PurposeOfProcessing.ContentRecommendations),
|
||||
rescueStats,
|
||||
"getVerifiedEmail")
|
||||
verifiedEmailStitchOpt.flatMap { emailOpt =>
|
||||
rescueWithStats(
|
||||
addressBookClient.getUsers(
|
||||
userId = userId,
|
||||
identifiers = emailOpt
|
||||
.map(email =>
|
||||
RecordIdentifier(userId = None, email = Some(email), phoneNumber = None)).toSeq,
|
||||
batchSize = ReverseEmailBookSource.NumEmailBookEntries,
|
||||
edgeType = ReverseEmailBookSource.DefaultEdgeType,
|
||||
fetcherOption =
|
||||
if (target.params(AddressBookParams.ReadFromABV2Only)) None
|
||||
else Some(reverseEmailContactsClientColumn.fetcher)
|
||||
),
|
||||
rescueStats,
|
||||
"AddressBookClient"
|
||||
)
|
||||
}
|
||||
}.getOrElse(Stitch.Nil)
|
||||
|
||||
reverseCandidatesFromEmail.map(
|
||||
_.take(ReverseEmailBookSource.NumEmailBookEntries)
|
||||
.map(
|
||||
CandidateUser(_, score = Some(CandidateUser.DefaultCandidateScore))
|
||||
.withCandidateSource(identifier))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
object ReverseEmailBookSource {
|
||||
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
|
||||
Algorithm.ReverseEmailBookIbis.toString)
|
||||
val NumEmailBookEntries: Int = 500
|
||||
val IsPhone = false
|
||||
val DefaultEdgeType: EdgeType = EdgeType.Reverse
|
||||
}
|
@ -1,77 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.addressbook
|
||||
|
||||
import com.twitter.cds.contact_consent_state.thriftscala.PurposeOfProcessing
|
||||
import com.twitter.finagle.stats.NullStatsReceiver
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.follow_recommendations.common.clients.addressbook.AddressbookClient
|
||||
import com.twitter.follow_recommendations.common.clients.addressbook.models.EdgeType
|
||||
import com.twitter.follow_recommendations.common.clients.addressbook.models.RecordIdentifier
|
||||
import com.twitter.follow_recommendations.common.clients.phone_storage_service.PhoneStorageServiceClient
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.follow_recommendations.common.utils.RescueWithStatsUtils.rescueWithStats
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.strato.generated.client.onboarding.userrecs.ReversePhoneContactsClientColumn
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class ReversePhoneBookSource @Inject() (
|
||||
reversePhoneContactsClientColumn: ReversePhoneContactsClientColumn,
|
||||
pssClient: PhoneStorageServiceClient,
|
||||
addressBookClient: AddressbookClient,
|
||||
statsReceiver: StatsReceiver = NullStatsReceiver)
|
||||
extends CandidateSource[HasParams with HasClientContext, CandidateUser] {
|
||||
|
||||
override val identifier: CandidateSourceIdentifier = ReversePhoneBookSource.Identifier
|
||||
private val stats: StatsReceiver = statsReceiver.scope(this.getClass.getSimpleName)
|
||||
|
||||
/**
|
||||
* Generate a list of candidates for the target
|
||||
*/
|
||||
override def apply(target: HasParams with HasClientContext): Stitch[Seq[CandidateUser]] = {
|
||||
val reverseCandidatesFromPhones: Stitch[Seq[Long]] = target.getOptionalUserId
|
||||
.map { userId =>
|
||||
pssClient
|
||||
.getPhoneNumbers(userId, PurposeOfProcessing.ContentRecommendations)
|
||||
.flatMap { phoneNumbers =>
|
||||
rescueWithStats(
|
||||
addressBookClient.getUsers(
|
||||
userId = userId,
|
||||
identifiers = phoneNumbers.map(phoneNumber =>
|
||||
RecordIdentifier(userId = None, email = None, phoneNumber = Some(phoneNumber))),
|
||||
batchSize = ReversePhoneBookSource.NumPhoneBookEntries,
|
||||
edgeType = ReversePhoneBookSource.DefaultEdgeType,
|
||||
fetcherOption =
|
||||
if (target.params(AddressBookParams.ReadFromABV2Only)) None
|
||||
else Some(reversePhoneContactsClientColumn.fetcher),
|
||||
queryOption = AddressbookClient.createQueryOption(
|
||||
edgeType = ReversePhoneBookSource.DefaultEdgeType,
|
||||
isPhone = ReversePhoneBookSource.IsPhone)
|
||||
),
|
||||
stats,
|
||||
"AddressBookClient"
|
||||
)
|
||||
}
|
||||
}.getOrElse(Stitch.Nil)
|
||||
|
||||
reverseCandidatesFromPhones.map(
|
||||
_.take(ReversePhoneBookSource.NumPhoneBookEntries)
|
||||
.map(
|
||||
CandidateUser(_, score = Some(CandidateUser.DefaultCandidateScore))
|
||||
.withCandidateSource(identifier))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
object ReversePhoneBookSource {
|
||||
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
|
||||
Algorithm.ReversePhoneBook.toString)
|
||||
val NumPhoneBookEntries: Int = 500
|
||||
val IsPhone = true
|
||||
val DefaultEdgeType: EdgeType = EdgeType.Reverse
|
||||
}
|
@ -1,23 +0,0 @@
|
||||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/google/inject:guice",
|
||||
"3rdparty/jvm/com/google/inject/extensions:guice-assistedinject",
|
||||
"3rdparty/jvm/net/codingwell:scala-guice",
|
||||
"3rdparty/jvm/org/slf4j:slf4j-api",
|
||||
"content-recommender/thrift/src/main/thrift:thrift-scala",
|
||||
"escherbird/src/scala/com/twitter/escherbird/util/stitchcache",
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/strato",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/transforms/modify_social_proof",
|
||||
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/configapi/common",
|
||||
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/candidate_source",
|
||||
"src/scala/com/twitter/onboarding/relevance/features/ymbii",
|
||||
"strato/src/main/scala/com/twitter/strato/client",
|
||||
"util/util-slf4j-api/src/main/scala",
|
||||
],
|
||||
)
|
@ -1,26 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.base
|
||||
|
||||
import com.twitter.escherbird.util.stitchcache.StitchCache
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.util.Duration
|
||||
|
||||
class CachedCandidateSource[K <: Object, V <: Object](
|
||||
candidateSource: CandidateSource[K, V],
|
||||
maxCacheSize: Int,
|
||||
cacheTTL: Duration,
|
||||
statsReceiver: StatsReceiver,
|
||||
override val identifier: CandidateSourceIdentifier)
|
||||
extends CandidateSource[K, V] {
|
||||
|
||||
private val cache = StitchCache[K, Seq[V]](
|
||||
maxCacheSize = maxCacheSize,
|
||||
ttl = cacheTTL,
|
||||
statsReceiver = statsReceiver.scope(identifier.name, "cache"),
|
||||
underlyingCall = (k: K) => candidateSource(k)
|
||||
)
|
||||
|
||||
override def apply(target: K): Stitch[Seq[V]] = cache.readThrough(target)
|
||||
}
|
@ -1,66 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.base
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
import com.twitter.timelines.configapi.Param
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
|
||||
/**
|
||||
* A wrapper of CandidateSource to make it easier to do experimentation
|
||||
* on new candidate generation algorithms
|
||||
*
|
||||
* @param baseSource base candidate source
|
||||
* @param darkreadAlgorithmParam controls whether or not to darkread candidates (fetch them even if they will not be included)
|
||||
* @param keepCandidatesParam controls whether or not to keep candidates from the base source
|
||||
* @param resultCountThresholdParam controls how many results the source must return to bucket the user and return results (greater-than-or-equal-to)
|
||||
* @tparam T request type. it must extend HasParams
|
||||
* @tparam V value type
|
||||
*/
|
||||
class ExperimentalCandidateSource[T <: HasParams, V](
|
||||
baseSource: CandidateSource[T, V],
|
||||
darkreadAlgorithmParam: Param[Boolean],
|
||||
keepCandidatesParam: Param[Boolean],
|
||||
resultCountThresholdParam: Param[Int],
|
||||
baseStatsReceiver: StatsReceiver)
|
||||
extends CandidateSource[T, V] {
|
||||
|
||||
override val identifier: CandidateSourceIdentifier = baseSource.identifier
|
||||
private[base] val statsReceiver =
|
||||
baseStatsReceiver.scope(s"Experimental/${identifier.name}")
|
||||
private[base] val requestsCounter = statsReceiver.counter("requests")
|
||||
private[base] val resultCountGreaterThanThresholdCounter =
|
||||
statsReceiver.counter("with_results_at_or_above_count_threshold")
|
||||
private[base] val keepResultsCounter = statsReceiver.counter("keep_results")
|
||||
private[base] val discardResultsCounter = statsReceiver.counter("discard_results")
|
||||
|
||||
override def apply(request: T): Stitch[Seq[V]] = {
|
||||
if (request.params(darkreadAlgorithmParam)) {
|
||||
requestsCounter.incr()
|
||||
fetchFromCandidateSourceAndProcessResults(request)
|
||||
} else {
|
||||
Stitch.Nil
|
||||
}
|
||||
}
|
||||
|
||||
private def fetchFromCandidateSourceAndProcessResults(request: T): Stitch[Seq[V]] = {
|
||||
baseSource(request).map { results =>
|
||||
if (results.length >= request.params(resultCountThresholdParam)) {
|
||||
processResults(results, request.params(keepCandidatesParam))
|
||||
} else {
|
||||
Nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def processResults(results: Seq[V], keepResults: Boolean): Seq[V] = {
|
||||
resultCountGreaterThanThresholdCounter.incr()
|
||||
if (keepResults) {
|
||||
keepResultsCounter.incr()
|
||||
results
|
||||
} else {
|
||||
discardResultsCounter.incr()
|
||||
Nil
|
||||
}
|
||||
}
|
||||
}
|
@ -1,208 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.base
|
||||
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.finagle.stats.NullStatsReceiver
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.util.DefaultTimer
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.base.RealGraphExpansionRepository.DefaultScore
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.base.RealGraphExpansionRepository.MaxNumIntermediateNodesToKeep
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.base.RealGraphExpansionRepository.FirstDegreeCandidatesTimeout
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.follow_recommendations.common.models._
|
||||
import com.twitter.onboarding.relevance.features.ymbii.ExpansionCandidateScores
|
||||
import com.twitter.onboarding.relevance.features.ymbii.RawYMBIICandidateFeatures
|
||||
import com.twitter.onboarding.relevance.store.thriftscala.CandidatesFollowedV1
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.strato.client.Fetcher
|
||||
import com.twitter.util.Duration
|
||||
import scala.collection.immutable
|
||||
import scala.util.control.NonFatal
|
||||
|
||||
private final case class InterestExpansionCandidate(
|
||||
userID: Long,
|
||||
score: Double,
|
||||
features: RawYMBIICandidateFeatures)
|
||||
|
||||
abstract class RealGraphExpansionRepository[Request](
|
||||
realgraphExpansionStore: Fetcher[
|
||||
Long,
|
||||
Unit,
|
||||
CandidatesFollowedV1
|
||||
],
|
||||
override val identifier: CandidateSourceIdentifier,
|
||||
statsReceiver: StatsReceiver = NullStatsReceiver,
|
||||
maxUnderlyingCandidatesToQuery: Int = 50,
|
||||
maxCandidatesToReturn: Int = 40,
|
||||
overrideUnderlyingTimeout: Option[Duration] = None,
|
||||
appendSocialProof: Boolean = false)
|
||||
extends CandidateSource[
|
||||
Request,
|
||||
CandidateUser
|
||||
] {
|
||||
|
||||
val underlyingCandidateSource: Seq[
|
||||
CandidateSource[
|
||||
Request,
|
||||
CandidateUser
|
||||
]
|
||||
]
|
||||
|
||||
private val stats = statsReceiver.scope(this.getClass.getSimpleName).scope(identifier.name)
|
||||
private val underlyingCandidateSourceFailureStats =
|
||||
stats.scope("underlying_candidate_source_failure")
|
||||
|
||||
def apply(
|
||||
request: Request,
|
||||
): Stitch[Seq[CandidateUser]] = {
|
||||
|
||||
val candidatesFromUnderlyingSourcesStitch: Seq[Stitch[Seq[CandidateUser]]] =
|
||||
underlyingCandidateSource.map { candidateSource =>
|
||||
candidateSource
|
||||
.apply(request)
|
||||
.within(overrideUnderlyingTimeout.getOrElse(FirstDegreeCandidatesTimeout))(
|
||||
DefaultTimer
|
||||
)
|
||||
.handle {
|
||||
case NonFatal(e) =>
|
||||
underlyingCandidateSourceFailureStats
|
||||
.counter(candidateSource.identifier.name, e.getClass.getSimpleName).incr()
|
||||
Seq.empty
|
||||
}
|
||||
}
|
||||
|
||||
for {
|
||||
underlyingCandidatesFromEachAlgo <- Stitch.collect(candidatesFromUnderlyingSourcesStitch)
|
||||
// The first algorithm in the list has the highest priority. Depending on if its not
|
||||
// populated, fall back to other algorithms. Once a particular algorithm is chosen, only
|
||||
// take the top few candidates from the underlying store for expansion.
|
||||
underlyingCandidatesTuple =
|
||||
underlyingCandidatesFromEachAlgo
|
||||
.zip(underlyingCandidateSource)
|
||||
.find(_._1.nonEmpty)
|
||||
|
||||
underlyingAlgorithmUsed: Option[CandidateSourceIdentifier] = underlyingCandidatesTuple.map {
|
||||
case (_, candidateSource) => candidateSource.identifier
|
||||
}
|
||||
|
||||
// Take maxUnderlyingCandidatesToQuery to query realgraphExpansionStore
|
||||
underlyingCandidates =
|
||||
underlyingCandidatesTuple
|
||||
.map {
|
||||
case (candidates, candidateSource) =>
|
||||
stats
|
||||
.scope("underlyingAlgorithmUsedScope").counter(
|
||||
candidateSource.identifier.name).incr()
|
||||
candidates
|
||||
}
|
||||
.getOrElse(Seq.empty)
|
||||
.sortBy(_.score.getOrElse(DefaultScore))(Ordering.Double.reverse)
|
||||
.take(maxUnderlyingCandidatesToQuery)
|
||||
|
||||
underlyingCandidateMap: Map[Long, Double] = underlyingCandidates.map { candidate =>
|
||||
(candidate.id, candidate.score.getOrElse(DefaultScore))
|
||||
}.toMap
|
||||
|
||||
expansionCandidates <-
|
||||
Stitch
|
||||
.traverse(underlyingCandidateMap.keySet.toSeq) { candidateId =>
|
||||
Stitch.join(
|
||||
Stitch.value(candidateId),
|
||||
realgraphExpansionStore.fetch(candidateId).map(_.v))
|
||||
|
||||
}.map(_.toMap)
|
||||
|
||||
rerankedCandidates: Seq[InterestExpansionCandidate] =
|
||||
rerankCandidateExpansions(underlyingCandidateMap, expansionCandidates)
|
||||
|
||||
rerankedCandidatesFiltered = rerankedCandidates.take(maxCandidatesToReturn)
|
||||
|
||||
} yield {
|
||||
rerankedCandidatesFiltered.map { candidate =>
|
||||
val socialProofReason = if (appendSocialProof) {
|
||||
val socialProofIds = candidate.features.expansionCandidateScores
|
||||
.map(_.intermediateCandidateId)
|
||||
Some(
|
||||
Reason(Some(
|
||||
AccountProof(followProof = Some(FollowProof(socialProofIds, socialProofIds.size))))))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
CandidateUser(
|
||||
id = candidate.userID,
|
||||
score = Some(candidate.score),
|
||||
reason = socialProofReason,
|
||||
userCandidateSourceDetails = Some(
|
||||
UserCandidateSourceDetails(
|
||||
primaryCandidateSource = Some(identifier),
|
||||
candidateSourceFeatures = Map(identifier -> Seq(candidate.features))
|
||||
))
|
||||
).addAddressBookMetadataIfAvailable(underlyingAlgorithmUsed.toSeq)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expands underlying candidates, returning them in sorted order.
|
||||
*
|
||||
* @param underlyingCandidatesMap A map from underlying candidate id to score
|
||||
* @param expansionCandidateMap A map from underlying candidate id to optional expansion candidates
|
||||
* @return A sorted sequence of expansion candidates and associated scores
|
||||
*/
|
||||
private def rerankCandidateExpansions(
|
||||
underlyingCandidatesMap: Map[Long, Double],
|
||||
expansionCandidateMap: Map[Long, Option[CandidatesFollowedV1]]
|
||||
): Seq[InterestExpansionCandidate] = {
|
||||
|
||||
// extract features
|
||||
val candidates: Seq[(Long, ExpansionCandidateScores)] = for {
|
||||
(underlyingCandidateId, underlyingCandidateScore) <- underlyingCandidatesMap.toSeq
|
||||
expansionCandidates =
|
||||
expansionCandidateMap
|
||||
.get(underlyingCandidateId)
|
||||
.flatten
|
||||
.map(_.candidatesFollowed)
|
||||
.getOrElse(Seq.empty)
|
||||
expansionCandidate <- expansionCandidates
|
||||
} yield expansionCandidate.candidateID -> ExpansionCandidateScores(
|
||||
underlyingCandidateId,
|
||||
Some(underlyingCandidateScore),
|
||||
Some(expansionCandidate.score)
|
||||
)
|
||||
|
||||
// merge intermediate nodes for the same candidate
|
||||
val dedupedCandidates: Seq[(Long, Seq[ExpansionCandidateScores])] =
|
||||
candidates.groupBy(_._1).mapValues(_.map(_._2).sortBy(_.intermediateCandidateId)).toSeq
|
||||
|
||||
// score the candidate
|
||||
val candidatesWithTotalScore: Seq[((Long, Seq[ExpansionCandidateScores]), Double)] =
|
||||
dedupedCandidates.map { candidate: (Long, Seq[ExpansionCandidateScores]) =>
|
||||
(
|
||||
candidate,
|
||||
candidate._2.map { ieScore: ExpansionCandidateScores =>
|
||||
ieScore.scoreFromUserToIntermediateCandidate.getOrElse(DefaultScore) *
|
||||
ieScore.scoreFromIntermediateToExpansionCandidate.getOrElse(DefaultScore)
|
||||
}.sum)
|
||||
}
|
||||
|
||||
// sort candidate by score
|
||||
for {
|
||||
((candidate, edges), score) <- candidatesWithTotalScore.sortBy(_._2)(Ordering[Double].reverse)
|
||||
} yield InterestExpansionCandidate(
|
||||
candidate,
|
||||
score,
|
||||
RawYMBIICandidateFeatures(
|
||||
edges.size,
|
||||
edges.take(MaxNumIntermediateNodesToKeep).to[immutable.Seq])
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
object RealGraphExpansionRepository {
|
||||
private val FirstDegreeCandidatesTimeout: Duration = 250.milliseconds
|
||||
private val MaxNumIntermediateNodesToKeep = 20
|
||||
private val DefaultScore = 0.0d
|
||||
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.base
|
||||
|
||||
import com.twitter.timelines.configapi.FSBoundedParam
|
||||
import com.twitter.timelines.configapi.FSParam
|
||||
|
||||
object SimilarUserExpanderParams {
|
||||
|
||||
case object EnableNonDirectFollowExpansion
|
||||
extends FSParam[Boolean]("similar_user_enable_non_direct_follow_expansion", true)
|
||||
|
||||
case object EnableSimsExpandSeedAccountsSort
|
||||
extends FSParam[Boolean]("similar_user_enable_sims_expander_seed_account_sort", false)
|
||||
|
||||
case object DefaultExpansionInputCount
|
||||
extends FSBoundedParam[Int](
|
||||
name = "similar_user_default_expansion_input_count",
|
||||
default = Integer.MAX_VALUE,
|
||||
min = 0,
|
||||
max = Integer.MAX_VALUE)
|
||||
|
||||
case object DefaultFinalCandidatesReturnedCount
|
||||
extends FSBoundedParam[Int](
|
||||
name = "similar_user_default_final_candidates_returned_count",
|
||||
default = Integer.MAX_VALUE,
|
||||
min = 0,
|
||||
max = Integer.MAX_VALUE)
|
||||
|
||||
case object DefaultEnableImplicitEngagedExpansion
|
||||
extends FSParam[Boolean]("similar_user_enable_implicit_engaged_expansion", true)
|
||||
|
||||
}
|
@ -1,313 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.base
|
||||
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.base.SimilarUserExpanderParams.DefaultEnableImplicitEngagedExpansion
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.base.SimilarUserExpanderParams.DefaultExpansionInputCount
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.base.SimilarUserExpanderParams.DefaultFinalCandidatesReturnedCount
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.base.SimilarUserExpanderParams.EnableNonDirectFollowExpansion
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.base.SimilarUserExpanderParams.EnableSimsExpandSeedAccountsSort
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.base.SimilarUserExpanderRepository.DefaultCandidateBuilder
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.base.SimilarUserExpanderRepository.DefaultScore
|
||||
import com.twitter.follow_recommendations.common.models.AccountProof
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.follow_recommendations.common.models.EngagementType
|
||||
import com.twitter.follow_recommendations.common.models.FollowProof
|
||||
import com.twitter.follow_recommendations.common.models.Reason
|
||||
import com.twitter.follow_recommendations.common.models.SimilarToProof
|
||||
import com.twitter.follow_recommendations.common.models.UserCandidateSourceDetails
|
||||
import com.twitter.hermit.candidate.thriftscala.Candidates
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.strato.client.Fetcher
|
||||
import com.twitter.timelines.configapi.FSBoundedParam
|
||||
import com.twitter.timelines.configapi.FSParam
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
import com.twitter.timelines.configapi.Params
|
||||
|
||||
case class SecondDegreeCandidate(userId: Long, score: Double, socialProof: Option[Seq[Long]])
|
||||
|
||||
abstract class SimilarUserExpanderRepository[-Request <: HasParams](
|
||||
override val identifier: CandidateSourceIdentifier,
|
||||
similarToCandidatesFetcher: Fetcher[
|
||||
Long,
|
||||
Unit,
|
||||
Candidates
|
||||
],
|
||||
expansionInputSizeParam: FSBoundedParam[Int] = DefaultExpansionInputCount,
|
||||
candidatesReturnedSizeParam: FSBoundedParam[Int] = DefaultFinalCandidatesReturnedCount,
|
||||
enableImplicitEngagedExpansion: FSParam[Boolean] = DefaultEnableImplicitEngagedExpansion,
|
||||
thresholdToAvoidExpansion: Int = 30,
|
||||
maxExpansionPerCandidate: Option[Int] = None,
|
||||
includingOriginalCandidates: Boolean = false,
|
||||
scorer: (Double, Double) => Double = SimilarUserExpanderRepository.DefaultScorer,
|
||||
aggregator: (Seq[Double]) => Double = ScoreAggregator.Max,
|
||||
candidateBuilder: (Long, CandidateSourceIdentifier, Double, CandidateUser) => CandidateUser =
|
||||
DefaultCandidateBuilder)
|
||||
extends TwoHopExpansionCandidateSource[
|
||||
Request,
|
||||
CandidateUser,
|
||||
SecondDegreeCandidate,
|
||||
CandidateUser
|
||||
] {
|
||||
|
||||
val originalCandidateSource: CandidateSource[Request, CandidateUser]
|
||||
val backupOriginalCandidateSource: Option[CandidateSource[Request, CandidateUser]] = None
|
||||
|
||||
override def firstDegreeNodes(request: Request): Stitch[Seq[CandidateUser]] = {
|
||||
|
||||
val originalCandidatesStitch: Stitch[Seq[CandidateUser]] =
|
||||
originalCandidateSource(request)
|
||||
|
||||
val backupCandidatesStitch: Stitch[Seq[CandidateUser]] =
|
||||
if (request.params(EnableNonDirectFollowExpansion)) {
|
||||
backupOriginalCandidateSource.map(_.apply(request)).getOrElse(Stitch.Nil)
|
||||
} else {
|
||||
Stitch.Nil
|
||||
}
|
||||
|
||||
val firstDegreeCandidatesCombinedStitch: Stitch[Seq[CandidateUser]] =
|
||||
Stitch
|
||||
.join(originalCandidatesStitch, backupCandidatesStitch).map {
|
||||
case (firstDegreeOrigCandidates, backupFirstDegreeCandidates) =>
|
||||
if (request.params(EnableSimsExpandSeedAccountsSort)) {
|
||||
firstDegreeOrigCandidates ++ backupFirstDegreeCandidates sortBy {
|
||||
-_.score.getOrElse(DefaultScore)
|
||||
}
|
||||
} else {
|
||||
firstDegreeOrigCandidates ++ backupFirstDegreeCandidates
|
||||
}
|
||||
}
|
||||
|
||||
val candidatesAfterImplicitEngagementsRemovalStitch: Stitch[Seq[CandidateUser]] =
|
||||
getCandidatesAfterImplicitEngagementFiltering(
|
||||
request.params,
|
||||
firstDegreeCandidatesCombinedStitch)
|
||||
|
||||
val firstDegreeCandidatesCombinedTrimmed = candidatesAfterImplicitEngagementsRemovalStitch.map {
|
||||
candidates: Seq[CandidateUser] =>
|
||||
candidates.take(request.params(expansionInputSizeParam))
|
||||
}
|
||||
|
||||
firstDegreeCandidatesCombinedTrimmed.map { firstDegreeResults: Seq[CandidateUser] =>
|
||||
if (firstDegreeResults.nonEmpty && firstDegreeResults.size < thresholdToAvoidExpansion) {
|
||||
firstDegreeResults
|
||||
.groupBy(_.id).mapValues(
|
||||
_.maxBy(_.score)
|
||||
).values.toSeq
|
||||
} else {
|
||||
Nil
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
override def secondaryDegreeNodes(
|
||||
request: Request,
|
||||
firstDegreeCandidate: CandidateUser
|
||||
): Stitch[Seq[SecondDegreeCandidate]] = {
|
||||
similarToCandidatesFetcher.fetch(firstDegreeCandidate.id).map(_.v).map { candidateListOption =>
|
||||
candidateListOption
|
||||
.map { candidatesList =>
|
||||
candidatesList.candidates.map(candidate =>
|
||||
SecondDegreeCandidate(candidate.userId, candidate.score, candidate.socialProof))
|
||||
}.getOrElse(Nil)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
override def aggregateAndScore(
|
||||
req: Request,
|
||||
firstDegreeToSecondDegreeNodesMap: Map[CandidateUser, Seq[SecondDegreeCandidate]]
|
||||
): Stitch[Seq[CandidateUser]] = {
|
||||
|
||||
val similarExpanderResults = firstDegreeToSecondDegreeNodesMap.flatMap {
|
||||
case (firstDegreeCandidate, seqOfSecondDegreeCandidates) =>
|
||||
val sourceScore = firstDegreeCandidate.score.getOrElse(DefaultScore)
|
||||
val results: Seq[CandidateUser] = seqOfSecondDegreeCandidates.map { secondDegreeCandidate =>
|
||||
val score = scorer(sourceScore, secondDegreeCandidate.score)
|
||||
candidateBuilder(secondDegreeCandidate.userId, identifier, score, firstDegreeCandidate)
|
||||
}
|
||||
maxExpansionPerCandidate match {
|
||||
case None => results
|
||||
case Some(limit) => results.sortBy(-_.score.getOrElse(DefaultScore)).take(limit)
|
||||
}
|
||||
}.toSeq
|
||||
|
||||
val allCandidates = {
|
||||
if (includingOriginalCandidates)
|
||||
firstDegreeToSecondDegreeNodesMap.keySet.toSeq
|
||||
else
|
||||
Nil
|
||||
} ++ similarExpanderResults
|
||||
|
||||
val groupedCandidates: Seq[CandidateUser] = allCandidates
|
||||
.groupBy(_.id)
|
||||
.flatMap {
|
||||
case (_, candidates) =>
|
||||
val finalScore = aggregator(candidates.map(_.score.getOrElse(DefaultScore)))
|
||||
val candidateSourceDetailsCombined = aggregateCandidateSourceDetails(candidates)
|
||||
val accountSocialProofcombined = aggregateAccountSocialProof(candidates)
|
||||
|
||||
candidates.headOption.map(
|
||||
_.copy(
|
||||
score = Some(finalScore),
|
||||
reason = accountSocialProofcombined,
|
||||
userCandidateSourceDetails = candidateSourceDetailsCombined)
|
||||
.withCandidateSource(identifier))
|
||||
}
|
||||
.toSeq
|
||||
|
||||
Stitch.value(
|
||||
groupedCandidates
|
||||
.sortBy { -_.score.getOrElse(DefaultScore) }.take(req.params(candidatesReturnedSizeParam))
|
||||
)
|
||||
}
|
||||
|
||||
def aggregateCandidateSourceDetails(
|
||||
candidates: Seq[CandidateUser]
|
||||
): Option[UserCandidateSourceDetails] = {
|
||||
candidates
|
||||
.map { candidate =>
|
||||
candidate.userCandidateSourceDetails.map(_.candidateSourceScores).getOrElse(Map.empty)
|
||||
}.reduceLeftOption { (scoreMap1, scoreMap2) =>
|
||||
scoreMap1 ++ scoreMap2
|
||||
}.map {
|
||||
UserCandidateSourceDetails(primaryCandidateSource = None, _)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
def aggregateAccountSocialProof(candidates: Seq[CandidateUser]): Option[Reason] = {
|
||||
candidates
|
||||
.map { candidate =>
|
||||
(
|
||||
candidate.reason
|
||||
.flatMap(_.accountProof.flatMap(_.similarToProof.map(_.similarTo))).getOrElse(Nil),
|
||||
candidate.reason
|
||||
.flatMap(_.accountProof.flatMap(_.followProof.map(_.followedBy))).getOrElse(Nil),
|
||||
candidate.reason
|
||||
.flatMap(_.accountProof.flatMap(_.followProof.map(_.numIds))).getOrElse(0)
|
||||
)
|
||||
}.reduceLeftOption { (accountProofOne, accountProofTwo) =>
|
||||
(
|
||||
// merge similarToIds
|
||||
accountProofOne._1 ++ accountProofTwo._1,
|
||||
// merge followedByIds
|
||||
accountProofOne._2 ++ accountProofTwo._2,
|
||||
// add numIds
|
||||
accountProofOne._3 + accountProofTwo._3)
|
||||
}.map { proofs =>
|
||||
Reason(accountProof = Some(
|
||||
AccountProof(
|
||||
similarToProof = Some(SimilarToProof(proofs._1)),
|
||||
followProof = if (proofs._2.nonEmpty) Some(FollowProof(proofs._2, proofs._3)) else None
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
def getCandidatesAfterImplicitEngagementFiltering(
|
||||
params: Params,
|
||||
firstDegreeCandidatesStitch: Stitch[Seq[CandidateUser]]
|
||||
): Stitch[Seq[CandidateUser]] = {
|
||||
|
||||
if (!params(enableImplicitEngagedExpansion)) {
|
||||
|
||||
/**
|
||||
* Remove candidates whose engagement types only contain implicit engagements
|
||||
* (e.g. Profile View, Tweet Click) and only expand those candidates who contain explicit
|
||||
* engagements.
|
||||
*/
|
||||
firstDegreeCandidatesStitch.map { candidates =>
|
||||
candidates.filter { cand =>
|
||||
cand.engagements.exists(engage =>
|
||||
engage == EngagementType.Like || engage == EngagementType.Retweet || engage == EngagementType.Mention)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
firstDegreeCandidatesStitch
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
object SimilarUserExpanderRepository {
|
||||
val DefaultScorer: (Double, Double) => Double = (sourceScore: Double, similarScore: Double) =>
|
||||
similarScore
|
||||
val MultiplyScorer: (Double, Double) => Double = (sourceScore: Double, similarScore: Double) =>
|
||||
sourceScore * similarScore
|
||||
val SourceScorer: (Double, Double) => Double = (sourceScore: Double, similarScore: Double) =>
|
||||
sourceScore
|
||||
|
||||
val DefaultScore = 0.0d
|
||||
|
||||
val DefaultCandidateBuilder: (
|
||||
Long,
|
||||
CandidateSourceIdentifier,
|
||||
Double,
|
||||
CandidateUser
|
||||
) => CandidateUser =
|
||||
(
|
||||
userId: Long,
|
||||
_: CandidateSourceIdentifier,
|
||||
score: Double,
|
||||
candidate: CandidateUser
|
||||
) => {
|
||||
val originalCandidateSourceDetails =
|
||||
candidate.userCandidateSourceDetails.flatMap { candSourceDetails =>
|
||||
candSourceDetails.primaryCandidateSource.map { primaryCandidateSource =>
|
||||
UserCandidateSourceDetails(
|
||||
primaryCandidateSource = None,
|
||||
candidateSourceScores = Map(primaryCandidateSource -> candidate.score))
|
||||
}
|
||||
}
|
||||
CandidateUser(
|
||||
id = userId,
|
||||
score = Some(score),
|
||||
userCandidateSourceDetails = originalCandidateSourceDetails,
|
||||
reason =
|
||||
Some(Reason(Some(AccountProof(similarToProof = Some(SimilarToProof(Seq(candidate.id)))))))
|
||||
)
|
||||
}
|
||||
|
||||
val FollowClusterCandidateBuilder: (
|
||||
Long,
|
||||
CandidateSourceIdentifier,
|
||||
Double,
|
||||
CandidateUser
|
||||
) => CandidateUser =
|
||||
(userId: Long, _: CandidateSourceIdentifier, score: Double, candidate: CandidateUser) => {
|
||||
val originalCandidateSourceDetails =
|
||||
candidate.userCandidateSourceDetails.flatMap { candSourceDetails =>
|
||||
candSourceDetails.primaryCandidateSource.map { primaryCandidateSource =>
|
||||
UserCandidateSourceDetails(
|
||||
primaryCandidateSource = None,
|
||||
candidateSourceScores = Map(primaryCandidateSource -> candidate.score))
|
||||
}
|
||||
}
|
||||
|
||||
val originalFollowCluster = candidate.reason
|
||||
.flatMap(_.accountProof.flatMap(_.followProof.map(_.followedBy)))
|
||||
|
||||
CandidateUser(
|
||||
id = userId,
|
||||
score = Some(score),
|
||||
userCandidateSourceDetails = originalCandidateSourceDetails,
|
||||
reason = Some(
|
||||
Reason(
|
||||
Some(
|
||||
AccountProof(
|
||||
similarToProof = Some(SimilarToProof(Seq(candidate.id))),
|
||||
followProof = originalFollowCluster.map(follows =>
|
||||
FollowProof(follows, follows.size)))))
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
object ScoreAggregator {
|
||||
// aggregate the same candidates with same id by taking the one with largest score
|
||||
val Max: Seq[Double] => Double = (candidateScores: Seq[Double]) => { candidateScores.max }
|
||||
|
||||
// aggregate the same candidates with same id by taking the sum of the scores
|
||||
val Sum: Seq[Double] => Double = (candidateScores: Seq[Double]) => { candidateScores.sum }
|
||||
}
|
@ -1,86 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.base
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.follow_recommendations.common.transforms.modify_social_proof.ModifySocialProof
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
import com.twitter.util.Duration
|
||||
|
||||
abstract class SocialProofEnforcedCandidateSource(
|
||||
candidateSource: CandidateSource[HasClientContext with HasParams, CandidateUser],
|
||||
modifySocialProof: ModifySocialProof,
|
||||
minNumSocialProofsRequired: Int,
|
||||
override val identifier: CandidateSourceIdentifier,
|
||||
baseStatsReceiver: StatsReceiver)
|
||||
extends CandidateSource[HasClientContext with HasParams, CandidateUser] {
|
||||
|
||||
val statsReceiver = baseStatsReceiver.scope(identifier.name)
|
||||
|
||||
override def apply(target: HasClientContext with HasParams): Stitch[Seq[CandidateUser]] = {
|
||||
val mustCallSgs: Boolean = target.params(SocialProofEnforcedCandidateSourceParams.MustCallSgs)
|
||||
val callSgsCachedColumn: Boolean =
|
||||
target.params(SocialProofEnforcedCandidateSourceParams.CallSgsCachedColumn)
|
||||
val QueryIntersectionIdsNum: Int =
|
||||
target.params(SocialProofEnforcedCandidateSourceParams.QueryIntersectionIdsNum)
|
||||
val MaxNumCandidatesToAnnotate: Int =
|
||||
target.params(SocialProofEnforcedCandidateSourceParams.MaxNumCandidatesToAnnotate)
|
||||
val gfsIntersectionIdsNum: Int =
|
||||
target.params(SocialProofEnforcedCandidateSourceParams.GfsIntersectionIdsNum)
|
||||
val sgsIntersectionIdsNum: Int =
|
||||
target.params(SocialProofEnforcedCandidateSourceParams.SgsIntersectionIdsNum)
|
||||
val gfsLagDuration: Duration =
|
||||
target.params(SocialProofEnforcedCandidateSourceParams.GfsLagDurationInDays)
|
||||
|
||||
candidateSource(target)
|
||||
.flatMap { candidates =>
|
||||
val candidatesWithoutEnoughSocialProof = candidates
|
||||
.collect {
|
||||
case candidate if !candidate.followedBy.exists(_.size >= minNumSocialProofsRequired) =>
|
||||
candidate
|
||||
}
|
||||
statsReceiver
|
||||
.stat("candidates_with_no_social_proofs").add(candidatesWithoutEnoughSocialProof.size)
|
||||
val candidatesToAnnotate =
|
||||
candidatesWithoutEnoughSocialProof.take(MaxNumCandidatesToAnnotate)
|
||||
statsReceiver.stat("candidates_to_annotate").add(candidatesToAnnotate.size)
|
||||
|
||||
val annotatedCandidatesMapStitch = target.getOptionalUserId
|
||||
.map { userId =>
|
||||
modifySocialProof
|
||||
.hydrateSocialProof(
|
||||
userId,
|
||||
candidatesToAnnotate,
|
||||
Some(QueryIntersectionIdsNum),
|
||||
mustCallSgs,
|
||||
callSgsCachedColumn,
|
||||
gfsLagDuration = gfsLagDuration,
|
||||
gfsIntersectionIds = gfsIntersectionIdsNum,
|
||||
sgsIntersectionIds = sgsIntersectionIdsNum
|
||||
).map { annotatedCandidates =>
|
||||
annotatedCandidates
|
||||
.map(annotatedCandidate => (annotatedCandidate.id, annotatedCandidate)).toMap
|
||||
}
|
||||
}.getOrElse(Stitch.value(Map.empty[Long, CandidateUser]))
|
||||
|
||||
annotatedCandidatesMapStitch.map { annotatedCandidatesMap =>
|
||||
candidates
|
||||
.flatMap { candidate =>
|
||||
if (candidate.followedBy.exists(_.size >= minNumSocialProofsRequired)) {
|
||||
Some(candidate)
|
||||
} else {
|
||||
annotatedCandidatesMap.get(candidate.id).collect {
|
||||
case annotatedCandidate
|
||||
if annotatedCandidate.followedBy.exists(
|
||||
_.size >= minNumSocialProofsRequired) =>
|
||||
annotatedCandidate
|
||||
}
|
||||
}
|
||||
}.map(_.withCandidateSource(identifier))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,30 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.base
|
||||
|
||||
import com.twitter.follow_recommendations.configapi.common.FeatureSwitchConfig
|
||||
import com.twitter.timelines.configapi.FSBoundedParam
|
||||
import com.twitter.timelines.configapi.FSName
|
||||
import com.twitter.timelines.configapi.HasDurationConversion
|
||||
import com.twitter.timelines.configapi.Param
|
||||
import com.twitter.util.Duration
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class SocialProofEnforcedCandidateSourceFSConfig @Inject() () extends FeatureSwitchConfig {
|
||||
override val booleanFSParams: Seq[Param[Boolean] with FSName] =
|
||||
Seq(
|
||||
SocialProofEnforcedCandidateSourceParams.MustCallSgs,
|
||||
SocialProofEnforcedCandidateSourceParams.CallSgsCachedColumn,
|
||||
)
|
||||
override val intFSParams: Seq[FSBoundedParam[Int]] =
|
||||
Seq(
|
||||
SocialProofEnforcedCandidateSourceParams.QueryIntersectionIdsNum,
|
||||
SocialProofEnforcedCandidateSourceParams.MaxNumCandidatesToAnnotate,
|
||||
SocialProofEnforcedCandidateSourceParams.GfsIntersectionIdsNum,
|
||||
SocialProofEnforcedCandidateSourceParams.SgsIntersectionIdsNum,
|
||||
)
|
||||
|
||||
override val durationFSParams: Seq[FSBoundedParam[Duration] with HasDurationConversion] = Seq(
|
||||
SocialProofEnforcedCandidateSourceParams.GfsLagDurationInDays
|
||||
)
|
||||
}
|
@ -1,56 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.base
|
||||
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.timelines.configapi.DurationConversion
|
||||
import com.twitter.timelines.configapi.FSBoundedParam
|
||||
import com.twitter.timelines.configapi.FSParam
|
||||
import com.twitter.timelines.configapi.HasDurationConversion
|
||||
import com.twitter.util.Duration
|
||||
|
||||
object SocialProofEnforcedCandidateSourceParams {
|
||||
case object MustCallSgs
|
||||
extends FSParam[Boolean]("social_proof_enforced_candidate_source_must_call_sgs", true)
|
||||
|
||||
case object CallSgsCachedColumn
|
||||
extends FSParam[Boolean](
|
||||
"social_proof_enforced_candidate_source_call_sgs_cached_column",
|
||||
false)
|
||||
|
||||
case object QueryIntersectionIdsNum
|
||||
extends FSBoundedParam[Int](
|
||||
name = "social_proof_enforced_candidate_source_query_intersection_ids_num",
|
||||
default = 3,
|
||||
min = 0,
|
||||
max = Integer.MAX_VALUE)
|
||||
|
||||
case object MaxNumCandidatesToAnnotate
|
||||
extends FSBoundedParam[Int](
|
||||
name = "social_proof_enforced_candidate_source_max_num_candidates_to_annotate",
|
||||
default = 50,
|
||||
min = 0,
|
||||
max = Integer.MAX_VALUE)
|
||||
|
||||
case object GfsIntersectionIdsNum
|
||||
extends FSBoundedParam[Int](
|
||||
name = "social_proof_enforced_candidate_source_gfs_intersection_ids_num",
|
||||
default = 3,
|
||||
min = 0,
|
||||
max = Integer.MAX_VALUE)
|
||||
|
||||
case object SgsIntersectionIdsNum
|
||||
extends FSBoundedParam[Int](
|
||||
name = "social_proof_enforced_candidate_source_sgs_intersection_ids_num",
|
||||
default = 10,
|
||||
min = 0,
|
||||
max = Integer.MAX_VALUE)
|
||||
|
||||
case object GfsLagDurationInDays
|
||||
extends FSBoundedParam[Duration](
|
||||
name = "social_proof_enforced_candidate_source_gfs_lag_duration_in_days",
|
||||
default = 14.days,
|
||||
min = 1.days,
|
||||
max = 60.days)
|
||||
with HasDurationConversion {
|
||||
override val durationConversion: DurationConversion = DurationConversion.FromDays
|
||||
}
|
||||
}
|
@ -1,27 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.base
|
||||
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.strato.client.Fetcher
|
||||
|
||||
abstract class StratoFetcherSource[K, U, V](
|
||||
fetcher: Fetcher[K, U, V],
|
||||
view: U,
|
||||
override val identifier: CandidateSourceIdentifier)
|
||||
extends CandidateSource[K, CandidateUser] {
|
||||
|
||||
def map(user: K, v: V): Seq[CandidateUser]
|
||||
|
||||
override def apply(target: K): Stitch[Seq[CandidateUser]] = {
|
||||
fetcher
|
||||
.fetch(target, view)
|
||||
.map { result =>
|
||||
result.v
|
||||
.map { candidates => map(target, candidates) }
|
||||
.getOrElse(Nil)
|
||||
.map(_.withCandidateSource(identifier))
|
||||
}
|
||||
}
|
||||
}
|
@ -1,9 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.base
|
||||
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.strato.client.Fetcher
|
||||
|
||||
abstract class StratoFetcherWithUnitViewSource[K, V](
|
||||
fetcher: Fetcher[K, Unit, V],
|
||||
override val identifier: CandidateSourceIdentifier)
|
||||
extends StratoFetcherSource[K, Unit, V](fetcher, Unit, identifier)
|
@ -1,71 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.base
|
||||
|
||||
import com.twitter.follow_recommendations.common.models.TweetCandidate
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.stitch.Stitch
|
||||
|
||||
/**
|
||||
* base trait for tweet authors based algorithms, e.g. topical tweet authors, twistly, ...
|
||||
*
|
||||
* @tparam Target target type
|
||||
* @tparam Candidate output candidate types
|
||||
*/
|
||||
trait TweetAuthorsCandidateSource[-Target, +Candidate] extends CandidateSource[Target, Candidate] {
|
||||
|
||||
/**
|
||||
* fetch Tweet candidates
|
||||
*/
|
||||
def getTweetCandidates(target: Target): Stitch[Seq[TweetCandidate]]
|
||||
|
||||
/**
|
||||
* fetch authorId
|
||||
*/
|
||||
def getTweetAuthorId(tweetCandidate: TweetCandidate): Stitch[Option[Long]]
|
||||
|
||||
/**
|
||||
* wrap candidate ID and TweetAuthorProof in Candidate
|
||||
*/
|
||||
def toCandidate(authorId: Long, tweetIds: Seq[Long], score: Option[Double]): Candidate
|
||||
|
||||
/**
|
||||
* aggregate scores, default to the first score
|
||||
*/
|
||||
def aggregator(scores: Seq[Double]): Double =
|
||||
scores.headOption.getOrElse(TweetAuthorsCandidateSource.DefaultScore)
|
||||
|
||||
/**
|
||||
* aggregation method for a group of tweet candidates
|
||||
*/
|
||||
def aggregateAndScore(
|
||||
target: Target,
|
||||
tweetCandidates: Seq[TweetCandidate]
|
||||
): Seq[Candidate]
|
||||
|
||||
/**
|
||||
* generate a list of candidates for the target
|
||||
*/
|
||||
def build(
|
||||
target: Target
|
||||
): Stitch[Seq[Candidate]] = {
|
||||
// Fetch Tweet candidates and hydrate author IDs
|
||||
val tweetCandidatesStitch = for {
|
||||
tweetCandidates <- getTweetCandidates(target)
|
||||
authorIds <- Stitch.collect(tweetCandidates.map(getTweetAuthorId(_)))
|
||||
} yield {
|
||||
for {
|
||||
(authorIdOpt, tweetCandidate) <- authorIds.zip(tweetCandidates)
|
||||
authorId <- authorIdOpt
|
||||
} yield tweetCandidate.copy(authorId = authorId)
|
||||
}
|
||||
|
||||
// Aggregate and score, convert to candidate
|
||||
tweetCandidatesStitch.map(aggregateAndScore(target, _))
|
||||
}
|
||||
|
||||
def apply(target: Target): Stitch[Seq[Candidate]] =
|
||||
build(target)
|
||||
}
|
||||
|
||||
object TweetAuthorsCandidateSource {
|
||||
final val DefaultScore: Double = 0.0
|
||||
}
|
@ -1,46 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.base
|
||||
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.stitch.Stitch
|
||||
|
||||
/**
|
||||
* base trait for two-hop expansion based algorithms, e.g. online_stp, phonebook_prediction,
|
||||
* recent following sims, recent engagement sims, ...
|
||||
*
|
||||
* @tparam Target target type
|
||||
* @tparam FirstDegree type of first degree nodes
|
||||
* @tparam SecondaryDegree type of secondary degree nodes
|
||||
* @tparam Candidate output candidate types
|
||||
*/
|
||||
trait TwoHopExpansionCandidateSource[-Target, FirstDegree, SecondaryDegree, +Candidate]
|
||||
extends CandidateSource[Target, Candidate] {
|
||||
|
||||
/**
|
||||
* fetch first degree nodes given request
|
||||
*/
|
||||
def firstDegreeNodes(req: Target): Stitch[Seq[FirstDegree]]
|
||||
|
||||
/**
|
||||
* fetch secondary degree nodes given request and first degree nodes
|
||||
*/
|
||||
def secondaryDegreeNodes(req: Target, node: FirstDegree): Stitch[Seq[SecondaryDegree]]
|
||||
|
||||
/**
|
||||
* aggregate and score the candidates to generate final results
|
||||
*/
|
||||
def aggregateAndScore(
|
||||
req: Target,
|
||||
firstDegreeToSecondDegreeNodesMap: Map[FirstDegree, Seq[SecondaryDegree]]
|
||||
): Stitch[Seq[Candidate]]
|
||||
|
||||
/**
|
||||
* Generate a list of candidates for the target
|
||||
*/
|
||||
def apply(target: Target): Stitch[Seq[Candidate]] = {
|
||||
for {
|
||||
firstDegreeNodes <- firstDegreeNodes(target)
|
||||
secondaryDegreeNodes <- Stitch.traverse(firstDegreeNodes)(secondaryDegreeNodes(target, _))
|
||||
aggregated <- aggregateAndScore(target, firstDegreeNodes.zip(secondaryDegreeNodes).toMap)
|
||||
} yield aggregated
|
||||
}
|
||||
}
|
@ -1,22 +0,0 @@
|
||||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/google/inject:guice",
|
||||
"escherbird/src/scala/com/twitter/escherbird/util/stitchcache",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/base",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/geoduck",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/constants",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
|
||||
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/configapi/common",
|
||||
"hermit/hermit-core/src/main/scala/com/twitter/hermit/model",
|
||||
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/candidate_source",
|
||||
"src/thrift/com/twitter/onboarding/relevance/crowd_search_accounts:crowd_search_accounts-scala",
|
||||
"strato/config/columns/onboarding/userrecs:userrecs-strato-client",
|
||||
"strato/src/main/scala/com/twitter/strato/client",
|
||||
"util/util-core/src/main/scala/com/twitter/conversions",
|
||||
],
|
||||
)
|
@ -1,18 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.crowd_search_accounts
|
||||
|
||||
import com.twitter.follow_recommendations.configapi.common.FeatureSwitchConfig
|
||||
import com.twitter.timelines.configapi.FSBoundedParam
|
||||
import com.twitter.timelines.configapi.FSName
|
||||
import com.twitter.timelines.configapi.Param
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class CrowdSearchAccountsFSConfig @Inject() () extends FeatureSwitchConfig {
|
||||
override val booleanFSParams: Seq[Param[Boolean] with FSName] = Seq(
|
||||
CrowdSearchAccountsParams.CandidateSourceEnabled,
|
||||
)
|
||||
override val doubleFSParams: Seq[FSBoundedParam[Double]] = Seq(
|
||||
CrowdSearchAccountsParams.CandidateSourceWeight,
|
||||
)
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.crowd_search_accounts
|
||||
|
||||
import com.twitter.timelines.configapi.FSBoundedParam
|
||||
import com.twitter.timelines.configapi.FSEnumSeqParam
|
||||
import com.twitter.timelines.configapi.FSParam
|
||||
|
||||
object CrowdSearchAccountsParams {
|
||||
// whether or not to fetch CrowdSearchAccounts candidate sources
|
||||
case object CandidateSourceEnabled
|
||||
extends FSParam[Boolean]("crowd_search_accounts_candidate_source_enabled", false)
|
||||
|
||||
/**
|
||||
* Contains the logic key for account filtering and ranking. Currently we have 3 main logic keys
|
||||
* - new_daily: filtering top searched accounts with max daily searches based on new users
|
||||
* - new_weekly: filtering top searched accounts with max weekly searches based on new users
|
||||
* - daily: filtering top searched accounts with max daily searches
|
||||
* - weekly: filtering top searched accounts with max weekly searches
|
||||
* Mapping of the Logic Id to Logic key is done via @enum AccountsFilteringAndRankingLogic
|
||||
*/
|
||||
case object AccountsFilteringAndRankingLogics
|
||||
extends FSEnumSeqParam[AccountsFilteringAndRankingLogicId.type](
|
||||
name = "crowd_search_accounts_filtering_and_ranking_logic_ids",
|
||||
default = Seq(AccountsFilteringAndRankingLogicId.SearchesWeekly),
|
||||
enum = AccountsFilteringAndRankingLogicId)
|
||||
|
||||
case object CandidateSourceWeight
|
||||
extends FSBoundedParam[Double](
|
||||
"crowd_search_accounts_candidate_source_weight",
|
||||
default = 1200,
|
||||
min = 0.001,
|
||||
max = 2000)
|
||||
}
|
@ -1,111 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.crowd_search_accounts
|
||||
|
||||
import com.twitter.escherbird.util.stitchcache.StitchCache
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.crowd_search_accounts.CrowdSearchAccountsParams.AccountsFilteringAndRankingLogics
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.crowd_search_accounts.CrowdSearchAccountsParams.CandidateSourceEnabled
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.follow_recommendations.common.models.HasGeohashAndCountryCode
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.onboarding.relevance.crowd_search_accounts.thriftscala.CrowdSearchAccounts
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.strato.generated.client.onboarding.userrecs.CrowdSearchAccountsClientColumn
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
import com.twitter.util.Duration
|
||||
import com.twitter.util.logging.Logging
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
object AccountsFilteringAndRankingLogicId extends Enumeration {
|
||||
type AccountsFilteringAndRankingLogicId = Value
|
||||
|
||||
val NewSearchesDaily: AccountsFilteringAndRankingLogicId = Value("new_searches_daily")
|
||||
val NewSearchesWeekly: AccountsFilteringAndRankingLogicId = Value("new_searches_weekly")
|
||||
val SearchesDaily: AccountsFilteringAndRankingLogicId = Value("searches_daily")
|
||||
val SearchesWeekly: AccountsFilteringAndRankingLogicId = Value("searches_weekly")
|
||||
}
|
||||
|
||||
object CrowdSearchAccountsSource {
|
||||
val MaxCacheSize = 500
|
||||
val CacheTTL: Duration = Duration.fromHours(24)
|
||||
|
||||
type Target = HasParams with HasClientContext with HasGeohashAndCountryCode
|
||||
|
||||
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
|
||||
Algorithm.CrowdSearchAccounts.toString)
|
||||
}
|
||||
|
||||
@Singleton
|
||||
class CrowdSearchAccountsSource @Inject() (
|
||||
crowdSearchAccountsClientColumn: CrowdSearchAccountsClientColumn,
|
||||
statsReceiver: StatsReceiver,
|
||||
) extends CandidateSource[CrowdSearchAccountsSource.Target, CandidateUser]
|
||||
with Logging {
|
||||
|
||||
/** @see [[CandidateSourceIdentifier]] */
|
||||
override val identifier: CandidateSourceIdentifier =
|
||||
CrowdSearchAccountsSource.Identifier
|
||||
|
||||
private val stats = statsReceiver.scope(identifier.name)
|
||||
private val requestsStats = stats.counter("requests")
|
||||
private val noCountryCodeStats = stats.counter("no_country_code")
|
||||
private val successStats = stats.counter("success")
|
||||
private val errorStats = stats.counter("error")
|
||||
|
||||
private val cache = StitchCache[String, Option[CrowdSearchAccounts]](
|
||||
maxCacheSize = CrowdSearchAccountsSource.MaxCacheSize,
|
||||
ttl = CrowdSearchAccountsSource.CacheTTL,
|
||||
statsReceiver = statsReceiver.scope(identifier.name, "cache"),
|
||||
underlyingCall = (k: String) => {
|
||||
crowdSearchAccountsClientColumn.fetcher
|
||||
.fetch(k)
|
||||
.map { result => result.v }
|
||||
}
|
||||
)
|
||||
|
||||
/** returns a Seq of ''potential'' content */
|
||||
override def apply(
|
||||
target: CrowdSearchAccountsSource.Target
|
||||
): Stitch[Seq[CandidateUser]] = {
|
||||
if (!target.params(CandidateSourceEnabled)) {
|
||||
return Stitch.value(Seq[CandidateUser]())
|
||||
}
|
||||
requestsStats.incr()
|
||||
target.getCountryCode
|
||||
.orElse(target.geohashAndCountryCode.flatMap(_.countryCode)).map { countryCode =>
|
||||
Stitch
|
||||
.collect(target
|
||||
.params(AccountsFilteringAndRankingLogics).map(logic =>
|
||||
cache.readThrough(countryCode.toUpperCase() + "-" + logic)))
|
||||
.onSuccess(_ => {
|
||||
successStats.incr()
|
||||
})
|
||||
.onFailure(t => {
|
||||
debug("candidate source failed identifier = %s".format(identifier), t)
|
||||
errorStats.incr()
|
||||
})
|
||||
.map(transformCrowdSearchAccountsToCandidateSource)
|
||||
}.getOrElse {
|
||||
noCountryCodeStats.incr()
|
||||
Stitch.value(Seq[CandidateUser]())
|
||||
}
|
||||
}
|
||||
|
||||
private def transformCrowdSearchAccountsToCandidateSource(
|
||||
crowdSearchAccounts: Seq[Option[CrowdSearchAccounts]]
|
||||
): Seq[CandidateUser] = {
|
||||
crowdSearchAccounts
|
||||
.flatMap(opt =>
|
||||
opt
|
||||
.map(accounts =>
|
||||
accounts.accounts.map(account =>
|
||||
CandidateUser(
|
||||
id = account.accountId,
|
||||
score = Some(account.searchActivityScore),
|
||||
).withCandidateSource(identifier)))
|
||||
.getOrElse(Seq[CandidateUser]()))
|
||||
}
|
||||
}
|
@ -1,4 +0,0 @@
|
||||
# Crowd Search Candidate Source
|
||||
Provides the most searched accounts within a specific country over the past 1 and 7 days.
|
||||
* When we refer to "most searched accounts", we are referring to accounts that have been clicked on the most frequently by users after they see search results in both the typeahead and search results page.
|
||||
* The results returned by the service have undergone health filters.
|
@ -1,23 +0,0 @@
|
||||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/google/inject:guice",
|
||||
"3rdparty/jvm/com/google/inject/extensions:guice-assistedinject",
|
||||
"3rdparty/jvm/net/codingwell:scala-guice",
|
||||
"3rdparty/jvm/org/slf4j:slf4j-api",
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/base",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/geoduck",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/strato",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/constants",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
|
||||
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/configapi/common",
|
||||
"src/thrift/com/twitter/hermit/pop_geo:hermit-pop-geo-scala",
|
||||
"strato/config/columns/onboarding/userrecs:userrecs-strato-client",
|
||||
"strato/src/main/scala/com/twitter/strato/client",
|
||||
"util/util-slf4j-api/src/main/scala",
|
||||
],
|
||||
)
|
@ -1,74 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.geo
|
||||
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.finagle.stats.Counter
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.follow_recommendations.common.models.HasGeohashAndCountryCode
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
import javax.inject.Inject
|
||||
|
||||
@Singleton
|
||||
class BasePopGeohashSource @Inject() (
|
||||
popGeoSource: CandidateSource[String, CandidateUser],
|
||||
statsReceiver: StatsReceiver)
|
||||
extends CandidateSource[
|
||||
HasParams with HasClientContext with HasGeohashAndCountryCode,
|
||||
CandidateUser
|
||||
]
|
||||
with BasePopGeohashSourceConfig {
|
||||
|
||||
val stats: StatsReceiver = statsReceiver
|
||||
|
||||
// counter to check if we found a geohash value in the request
|
||||
val foundGeohashCounter: Counter = stats.counter("found_geohash_value")
|
||||
// counter to check if we are missing a geohash value in the request
|
||||
val missingGeohashCounter: Counter = stats.counter("missing_geohash_value")
|
||||
|
||||
/** @see [[CandidateSourceIdentifier]] */
|
||||
override val identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
|
||||
"BasePopGeohashSource")
|
||||
|
||||
override def apply(
|
||||
target: HasParams with HasClientContext with HasGeohashAndCountryCode
|
||||
): Stitch[Seq[CandidateUser]] = {
|
||||
if (!candidateSourceEnabled(target)) {
|
||||
return Stitch.Nil
|
||||
}
|
||||
target.geohashAndCountryCode
|
||||
.flatMap(_.geohash).map { geohash =>
|
||||
foundGeohashCounter.incr()
|
||||
val keys = (minGeohashLength(target) to math.min(maxGeohashLength(target), geohash.length))
|
||||
.map("geohash_" + geohash.take(_)).reverse
|
||||
if (returnResultFromAllPrecision(target)) {
|
||||
Stitch
|
||||
.collect(keys.map(popGeoSource.apply)).map(
|
||||
_.flatten.map(_.withCandidateSource(identifier))
|
||||
)
|
||||
} else {
|
||||
Stitch
|
||||
.collect(keys.map(popGeoSource.apply)).map(
|
||||
_.find(_.nonEmpty)
|
||||
.getOrElse(Nil)
|
||||
.take(maxResults(target)).map(_.withCandidateSource(identifier))
|
||||
)
|
||||
}
|
||||
}.getOrElse {
|
||||
missingGeohashCounter.incr()
|
||||
Stitch.Nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
trait BasePopGeohashSourceConfig {
|
||||
type Target = HasParams with HasClientContext
|
||||
def maxResults(target: Target): Int = 200
|
||||
def minGeohashLength(target: Target): Int = 2
|
||||
def maxGeohashLength(target: Target): Int = 4
|
||||
def returnResultFromAllPrecision(target: Target): Boolean = false
|
||||
def candidateSourceEnabled(target: Target): Boolean = false
|
||||
}
|
@ -1,33 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.geo
|
||||
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
import javax.inject.Inject
|
||||
|
||||
@Singleton
|
||||
class PopCountryBackFillSource @Inject() (popGeoSource: PopGeoSource)
|
||||
extends CandidateSource[HasClientContext with HasParams, CandidateUser] {
|
||||
|
||||
override val identifier: CandidateSourceIdentifier = PopCountryBackFillSource.Identifier
|
||||
|
||||
override def apply(target: HasClientContext with HasParams): Stitch[Seq[CandidateUser]] = {
|
||||
target.getOptionalUserId
|
||||
.map(_ =>
|
||||
popGeoSource(PopCountryBackFillSource.DefaultKey)
|
||||
.map(_.take(PopCountryBackFillSource.MaxResults).map(_.withCandidateSource(identifier))))
|
||||
.getOrElse(Stitch.Nil)
|
||||
}
|
||||
}
|
||||
|
||||
object PopCountryBackFillSource {
|
||||
val Identifier: CandidateSourceIdentifier =
|
||||
CandidateSourceIdentifier(Algorithm.PopCountryBackFill.toString)
|
||||
val MaxResults = 40
|
||||
val DefaultKey = "country_US"
|
||||
}
|
@ -1,63 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.geo
|
||||
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.core_workflows.user_model.thriftscala.UserState
|
||||
import com.twitter.finagle.stats.Counter
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.follow_recommendations.common.models.HasGeohashAndCountryCode
|
||||
import com.twitter.follow_recommendations.common.models.HasUserState
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
import javax.inject.Inject
|
||||
|
||||
@Singleton
|
||||
class PopCountrySource @Inject() (
|
||||
popGeoSource: PopGeoSource,
|
||||
statsReceiver: StatsReceiver)
|
||||
extends CandidateSource[
|
||||
HasClientContext with HasParams with HasUserState with HasGeohashAndCountryCode,
|
||||
CandidateUser
|
||||
] {
|
||||
|
||||
override val identifier: CandidateSourceIdentifier = PopCountrySource.Identifier
|
||||
val stats: StatsReceiver = statsReceiver.scope("PopCountrySource")
|
||||
|
||||
// counter to check if we found a country code value in the request
|
||||
val foundCountryCodeCounter: Counter = stats.counter("found_country_code_value")
|
||||
// counter to check if we are missing a country code value in the request
|
||||
val missingCountryCodeCounter: Counter = stats.counter("missing_country_code_value")
|
||||
|
||||
override def apply(
|
||||
target: HasClientContext with HasParams with HasUserState with HasGeohashAndCountryCode
|
||||
): Stitch[Seq[CandidateUser]] = {
|
||||
target.geohashAndCountryCode
|
||||
.flatMap(_.countryCode).map { countryCode =>
|
||||
foundCountryCodeCounter.incr()
|
||||
if (target.userState.exists(PopCountrySource.BlacklistedTargetUserStates.contains)) {
|
||||
Stitch.Nil
|
||||
} else {
|
||||
popGeoSource("country_" + countryCode)
|
||||
.map(_.take(PopCountrySource.MaxResults).map(_.withCandidateSource(identifier)))
|
||||
}
|
||||
}.getOrElse {
|
||||
missingCountryCodeCounter.incr()
|
||||
Stitch.Nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object PopCountrySource {
|
||||
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
|
||||
Algorithm.PopCountry.toString)
|
||||
val MaxResults = 40
|
||||
val BlacklistedTargetUserStates: Set[UserState] = Set(
|
||||
UserState.HeavyTweeter,
|
||||
UserState.HeavyNonTweeter,
|
||||
UserState.MediumTweeter,
|
||||
UserState.MediumNonTweeter)
|
||||
}
|
@ -1,99 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.geo
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.escherbird.util.stitchcache.StitchCache
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.follow_recommendations.common.models.AccountProof
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.follow_recommendations.common.models.PopularInGeoProof
|
||||
import com.twitter.follow_recommendations.common.models.Reason
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.hermit.pop_geo.thriftscala.PopUsersInPlace
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.strato.generated.client.onboarding.userrecs.UniquePopQualityFollowUsersInPlaceClientColumn
|
||||
import com.twitter.util.Duration
|
||||
import javax.inject.Inject
|
||||
|
||||
@Singleton
|
||||
class PopGeohashQualityFollowSource @Inject() (
|
||||
popGeoSource: PopGeoQualityFollowSource,
|
||||
statsReceiver: StatsReceiver)
|
||||
extends BasePopGeohashSource(
|
||||
popGeoSource = popGeoSource,
|
||||
statsReceiver = statsReceiver.scope("PopGeohashQualityFollowSource"),
|
||||
) {
|
||||
override val identifier: CandidateSourceIdentifier = PopGeohashQualityFollowSource.Identifier
|
||||
override def maxResults(target: Target): Int = {
|
||||
target.params(PopGeoQualityFollowSourceParams.PopGeoSourceMaxResultsPerPrecision)
|
||||
}
|
||||
override def minGeohashLength(target: Target): Int = {
|
||||
target.params(PopGeoQualityFollowSourceParams.PopGeoSourceGeoHashMinPrecision)
|
||||
}
|
||||
override def maxGeohashLength(target: Target): Int = {
|
||||
target.params(PopGeoQualityFollowSourceParams.PopGeoSourceGeoHashMaxPrecision)
|
||||
}
|
||||
override def returnResultFromAllPrecision(target: Target): Boolean = {
|
||||
target.params(PopGeoQualityFollowSourceParams.PopGeoSourceReturnFromAllPrecisions)
|
||||
}
|
||||
override def candidateSourceEnabled(target: Target): Boolean = {
|
||||
target.params(PopGeoQualityFollowSourceParams.CandidateSourceEnabled)
|
||||
}
|
||||
}
|
||||
|
||||
object PopGeohashQualityFollowSource {
|
||||
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
|
||||
Algorithm.PopGeohashQualityFollow.toString)
|
||||
}
|
||||
|
||||
object PopGeoQualityFollowSource {
|
||||
val MaxCacheSize = 20000
|
||||
val CacheTTL: Duration = Duration.fromHours(24)
|
||||
val MaxResults = 200
|
||||
}
|
||||
|
||||
@Singleton
|
||||
class PopGeoQualityFollowSource @Inject() (
|
||||
popGeoQualityFollowClientColumn: UniquePopQualityFollowUsersInPlaceClientColumn,
|
||||
statsReceiver: StatsReceiver,
|
||||
) extends CandidateSource[String, CandidateUser] {
|
||||
|
||||
/** @see [[CandidateSourceIdentifier]] */
|
||||
override val identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
|
||||
"PopGeoQualityFollowSource")
|
||||
|
||||
private val cache = StitchCache[String, Option[PopUsersInPlace]](
|
||||
maxCacheSize = PopGeoQualityFollowSource.MaxCacheSize,
|
||||
ttl = PopGeoQualityFollowSource.CacheTTL,
|
||||
statsReceiver = statsReceiver.scope(identifier.name, "cache"),
|
||||
underlyingCall = (k: String) => {
|
||||
popGeoQualityFollowClientColumn.fetcher
|
||||
.fetch(k)
|
||||
.map { result => result.v }
|
||||
}
|
||||
)
|
||||
|
||||
override def apply(target: String): Stitch[Seq[CandidateUser]] = {
|
||||
val result: Stitch[Option[PopUsersInPlace]] = cache.readThrough(target)
|
||||
result.map { pu =>
|
||||
pu.map { candidates =>
|
||||
candidates.popUsers.sortBy(-_.score).take(PopGeoQualityFollowSource.MaxResults).map {
|
||||
candidate =>
|
||||
CandidateUser(
|
||||
id = candidate.userId,
|
||||
score = Some(candidate.score),
|
||||
reason = Some(
|
||||
Reason(
|
||||
Some(
|
||||
AccountProof(
|
||||
popularInGeoProof = Some(PopularInGeoProof(location = candidates.place))
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
}.getOrElse(Nil)
|
||||
}
|
||||
}
|
||||
}
|
@ -1,24 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.geo
|
||||
|
||||
import com.twitter.follow_recommendations.configapi.common.FeatureSwitchConfig
|
||||
import com.twitter.timelines.configapi.FSBoundedParam
|
||||
import com.twitter.timelines.configapi.FSName
|
||||
import com.twitter.timelines.configapi.FSParam
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class PopGeoQualityFollowSourceFSConfig @Inject() () extends FeatureSwitchConfig {
|
||||
override val intFSParams: Seq[FSBoundedParam[Int] with FSName] = Seq(
|
||||
PopGeoQualityFollowSourceParams.PopGeoSourceGeoHashMaxPrecision,
|
||||
PopGeoQualityFollowSourceParams.PopGeoSourceGeoHashMinPrecision,
|
||||
PopGeoQualityFollowSourceParams.PopGeoSourceMaxResultsPerPrecision
|
||||
)
|
||||
override val doubleFSParams: Seq[FSBoundedParam[Double] with FSName] = Seq(
|
||||
PopGeoQualityFollowSourceParams.CandidateSourceWeight
|
||||
)
|
||||
override val booleanFSParams: Seq[FSParam[Boolean] with FSName] = Seq(
|
||||
PopGeoQualityFollowSourceParams.CandidateSourceEnabled,
|
||||
PopGeoQualityFollowSourceParams.PopGeoSourceReturnFromAllPrecisions
|
||||
)
|
||||
}
|
@ -1,42 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.geo
|
||||
|
||||
import com.twitter.timelines.configapi.FSBoundedParam
|
||||
import com.twitter.timelines.configapi.FSParam
|
||||
|
||||
object PopGeoQualityFollowSourceParams {
|
||||
case object CandidateSourceEnabled
|
||||
extends FSParam[Boolean]("pop_geo_quality_follow_source_enabled", false)
|
||||
|
||||
case object PopGeoSourceGeoHashMinPrecision
|
||||
extends FSBoundedParam[Int](
|
||||
"pop_geo_quality_follow_source_geo_hash_min_precision",
|
||||
default = 2,
|
||||
min = 0,
|
||||
max = 10)
|
||||
|
||||
case object PopGeoSourceGeoHashMaxPrecision
|
||||
extends FSBoundedParam[Int](
|
||||
"pop_geo_quality_follow_source_geo_hash_max_precision",
|
||||
default = 3,
|
||||
min = 0,
|
||||
max = 10)
|
||||
|
||||
case object PopGeoSourceReturnFromAllPrecisions
|
||||
extends FSParam[Boolean](
|
||||
"pop_geo_quality_follow_source_return_from_all_precisions",
|
||||
default = false)
|
||||
|
||||
case object PopGeoSourceMaxResultsPerPrecision
|
||||
extends FSBoundedParam[Int](
|
||||
"pop_geo_quality_follow_source_max_results_per_precision",
|
||||
default = 200,
|
||||
min = 0,
|
||||
max = 1000)
|
||||
|
||||
case object CandidateSourceWeight
|
||||
extends FSBoundedParam[Double](
|
||||
"pop_geo_quality_follow_source_weight",
|
||||
default = 200,
|
||||
min = 0.001,
|
||||
max = 2000)
|
||||
}
|
@ -1,69 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.geo
|
||||
|
||||
import com.google.inject.Singleton
|
||||
import com.google.inject.name.Named
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.base.CachedCandidateSource
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.base.StratoFetcherWithUnitViewSource
|
||||
import com.twitter.follow_recommendations.common.constants.GuiceNamedConstants
|
||||
import com.twitter.follow_recommendations.common.models.AccountProof
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.follow_recommendations.common.models.PopularInGeoProof
|
||||
import com.twitter.follow_recommendations.common.models.Reason
|
||||
import com.twitter.hermit.pop_geo.thriftscala.PopUsersInPlace
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.strato.client.Fetcher
|
||||
import com.twitter.util.Duration
|
||||
import javax.inject.Inject
|
||||
|
||||
@Singleton
|
||||
class BasePopGeoSource @Inject() (
|
||||
@Named(GuiceNamedConstants.POP_USERS_IN_PLACE_FETCHER) fetcher: Fetcher[
|
||||
String,
|
||||
Unit,
|
||||
PopUsersInPlace
|
||||
]) extends StratoFetcherWithUnitViewSource[String, PopUsersInPlace](
|
||||
fetcher,
|
||||
BasePopGeoSource.Identifier) {
|
||||
|
||||
override def map(target: String, candidates: PopUsersInPlace): Seq[CandidateUser] =
|
||||
BasePopGeoSource.map(target, candidates)
|
||||
}
|
||||
|
||||
object BasePopGeoSource {
|
||||
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier("BasePopGeoSource")
|
||||
val MaxResults = 200
|
||||
|
||||
def map(target: String, candidates: PopUsersInPlace): Seq[CandidateUser] =
|
||||
candidates.popUsers.sortBy(-_.score).take(BasePopGeoSource.MaxResults).view.map { candidate =>
|
||||
CandidateUser(
|
||||
id = candidate.userId,
|
||||
score = Some(candidate.score),
|
||||
reason = Some(
|
||||
Reason(
|
||||
Some(
|
||||
AccountProof(
|
||||
popularInGeoProof = Some(PopularInGeoProof(location = candidates.place))
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@Singleton
|
||||
class PopGeoSource @Inject() (basePopGeoSource: BasePopGeoSource, statsReceiver: StatsReceiver)
|
||||
extends CachedCandidateSource[String, CandidateUser](
|
||||
basePopGeoSource,
|
||||
PopGeoSource.MaxCacheSize,
|
||||
PopGeoSource.CacheTTL,
|
||||
statsReceiver,
|
||||
PopGeoSource.Identifier)
|
||||
|
||||
object PopGeoSource {
|
||||
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier("PopGeoSource")
|
||||
val MaxCacheSize = 20000
|
||||
val CacheTTL: Duration = 1.hours
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.geo
|
||||
|
||||
import com.twitter.follow_recommendations.configapi.common.FeatureSwitchConfig
|
||||
import com.twitter.timelines.configapi.FSBoundedParam
|
||||
import com.twitter.timelines.configapi.FSName
|
||||
import com.twitter.timelines.configapi.FSParam
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class PopGeoSourceFSConfig @Inject() () extends FeatureSwitchConfig {
|
||||
override val intFSParams: Seq[FSBoundedParam[Int] with FSName] = Seq(
|
||||
PopGeoSourceParams.PopGeoSourceGeoHashMaxPrecision,
|
||||
PopGeoSourceParams.PopGeoSourceMaxResultsPerPrecision,
|
||||
PopGeoSourceParams.PopGeoSourceGeoHashMinPrecision,
|
||||
)
|
||||
override val booleanFSParams: Seq[FSParam[Boolean] with FSName] = Seq(
|
||||
PopGeoSourceParams.PopGeoSourceReturnFromAllPrecisions,
|
||||
)
|
||||
}
|
@ -1,30 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.geo
|
||||
|
||||
import com.twitter.timelines.configapi.FSBoundedParam
|
||||
import com.twitter.timelines.configapi.FSParam
|
||||
|
||||
object PopGeoSourceParams {
|
||||
case object PopGeoSourceGeoHashMinPrecision
|
||||
extends FSBoundedParam[Int](
|
||||
"pop_geo_source_geo_hash_min_precision",
|
||||
default = 2,
|
||||
min = 0,
|
||||
max = 10)
|
||||
|
||||
case object PopGeoSourceGeoHashMaxPrecision
|
||||
extends FSBoundedParam[Int](
|
||||
"pop_geo_source_geo_hash_max_precision",
|
||||
default = 4,
|
||||
min = 0,
|
||||
max = 10)
|
||||
|
||||
case object PopGeoSourceReturnFromAllPrecisions
|
||||
extends FSParam[Boolean]("pop_geo_source_return_from_all_precisions", default = false)
|
||||
|
||||
case object PopGeoSourceMaxResultsPerPrecision
|
||||
extends FSBoundedParam[Int](
|
||||
"pop_geo_source_max_results_per_precision",
|
||||
default = 200,
|
||||
min = 0,
|
||||
max = 1000)
|
||||
}
|
@ -1,36 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.geo
|
||||
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import javax.inject.Inject
|
||||
|
||||
@Singleton
|
||||
class PopGeohashSource @Inject() (
|
||||
popGeoSource: PopGeoSource,
|
||||
statsReceiver: StatsReceiver)
|
||||
extends BasePopGeohashSource(
|
||||
popGeoSource = popGeoSource,
|
||||
statsReceiver = statsReceiver.scope("PopGeohashSource"),
|
||||
) {
|
||||
override def candidateSourceEnabled(target: Target): Boolean = true
|
||||
override val identifier: CandidateSourceIdentifier = PopGeohashSource.Identifier
|
||||
override def minGeohashLength(target: Target): Int = {
|
||||
target.params(PopGeoSourceParams.PopGeoSourceGeoHashMinPrecision)
|
||||
}
|
||||
override def maxResults(target: Target): Int = {
|
||||
target.params(PopGeoSourceParams.PopGeoSourceMaxResultsPerPrecision)
|
||||
}
|
||||
override def maxGeohashLength(target: Target): Int = {
|
||||
target.params(PopGeoSourceParams.PopGeoSourceGeoHashMaxPrecision)
|
||||
}
|
||||
override def returnResultFromAllPrecision(target: Target): Boolean = {
|
||||
target.params(PopGeoSourceParams.PopGeoSourceReturnFromAllPrecisions)
|
||||
}
|
||||
}
|
||||
|
||||
object PopGeohashSource {
|
||||
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
|
||||
Algorithm.PopGeohash.toString)
|
||||
}
|
@ -1,4 +0,0 @@
|
||||
# Pop Geo Candidate Source
|
||||
Provides the most followed / quality followed accounts in a specific country and a geolocation within past 2 weeks.
|
||||
* A "quality follow" refers to any follow that leads to visible engagement, such as favorites, mentions, retweets, direct messages, replies, and quote tweets. The engagement must be allowed in either direction, and must occur on the day of the follow or within one subsequent day. Additionally, there must be no unfollowing, blocking, muting, or reporting of the account in the same time period.
|
||||
* The minimum geolocation precision used is ±20 km (12 mi), and precise user geolocation is not utilized.
|
@ -1,23 +0,0 @@
|
||||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/google/inject:guice",
|
||||
"3rdparty/jvm/com/google/inject/extensions:guice-assistedinject",
|
||||
"3rdparty/jvm/net/codingwell:scala-guice",
|
||||
"3rdparty/jvm/org/slf4j:slf4j-api",
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/base",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/strato",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/constants",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
|
||||
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/configapi/common",
|
||||
"src/thrift/com/twitter/hermit/candidate:hermit-candidate-scala",
|
||||
"strato/config/columns/onboarding:onboarding-strato-client",
|
||||
"strato/config/columns/onboarding/userrecs:userrecs-strato-client",
|
||||
"strato/src/main/scala/com/twitter/strato/client",
|
||||
"util/util-slf4j-api/src/main/scala",
|
||||
],
|
||||
)
|
@ -1,84 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.ppmi_locale_follow
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.ppmi_locale_follow.PPMILocaleFollowSourceParams.CandidateSourceEnabled
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.ppmi_locale_follow.PPMILocaleFollowSourceParams.LocaleToExcludeFromRecommendation
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
|
||||
import com.twitter.stitch.Stitch
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
import com.twitter.strato.generated.client.onboarding.UserPreferredLanguagesOnUserClientColumn
|
||||
import com.twitter.strato.generated.client.onboarding.userrecs.LocaleFollowPpmiClientColumn
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
|
||||
/**
|
||||
* Fetches candidates based on the Positive Pointwise Mutual Information (PPMI) statistic
|
||||
* for a set of locales
|
||||
* */
|
||||
@Singleton
|
||||
class PPMILocaleFollowSource @Inject() (
|
||||
userPreferredLanguagesOnUserClientColumn: UserPreferredLanguagesOnUserClientColumn,
|
||||
localeFollowPpmiClientColumn: LocaleFollowPpmiClientColumn,
|
||||
statsReceiver: StatsReceiver)
|
||||
extends CandidateSource[HasClientContext with HasParams, CandidateUser] {
|
||||
|
||||
override val identifier: CandidateSourceIdentifier = PPMILocaleFollowSource.Identifier
|
||||
private val stats = statsReceiver.scope("PPMILocaleFollowSource")
|
||||
|
||||
override def apply(target: HasClientContext with HasParams): Stitch[Seq[CandidateUser]] = {
|
||||
(for {
|
||||
countryCode <- target.getCountryCode
|
||||
userId <- target.getOptionalUserId
|
||||
} yield {
|
||||
getPreferredLocales(userId, countryCode.toLowerCase())
|
||||
.flatMap { locale =>
|
||||
stats.addGauge("allLocale") {
|
||||
locale.length
|
||||
}
|
||||
val filteredLocale =
|
||||
locale.filter(!target.params(LocaleToExcludeFromRecommendation).contains(_))
|
||||
stats.addGauge("postFilterLocale") {
|
||||
filteredLocale.length
|
||||
}
|
||||
if (target.params(CandidateSourceEnabled)) {
|
||||
getPPMILocaleFollowCandidates(filteredLocale)
|
||||
} else Stitch(Seq.empty)
|
||||
}
|
||||
.map(_.sortBy(_.score)(Ordering[Option[Double]].reverse)
|
||||
.take(PPMILocaleFollowSource.DefaultMaxCandidatesToReturn))
|
||||
}).getOrElse(Stitch.Nil)
|
||||
}
|
||||
|
||||
private def getPPMILocaleFollowCandidates(
|
||||
locales: Seq[String]
|
||||
): Stitch[Seq[CandidateUser]] = {
|
||||
Stitch
|
||||
.traverse(locales) { locale =>
|
||||
// Get PPMI candidates for each locale
|
||||
localeFollowPpmiClientColumn.fetcher
|
||||
.fetch(locale)
|
||||
.map(_.v
|
||||
.map(_.candidates).getOrElse(Nil).map { candidate =>
|
||||
CandidateUser(id = candidate.userId, score = Some(candidate.score))
|
||||
}.map(_.withCandidateSource(identifier)))
|
||||
}.map(_.flatten)
|
||||
}
|
||||
|
||||
private def getPreferredLocales(userId: Long, countryCode: String): Stitch[Seq[String]] = {
|
||||
userPreferredLanguagesOnUserClientColumn.fetcher
|
||||
.fetch(userId)
|
||||
.map(_.v.map(_.languages).getOrElse(Nil).map { lang =>
|
||||
s"$countryCode-$lang".toLowerCase
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
object PPMILocaleFollowSource {
|
||||
val Identifier = CandidateSourceIdentifier(Algorithm.PPMILocaleFollow.toString)
|
||||
val DefaultMaxCandidatesToReturn = 100
|
||||
}
|
@ -1,24 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.ppmi_locale_follow
|
||||
|
||||
import com.twitter.follow_recommendations.configapi.common.FeatureSwitchConfig
|
||||
import com.twitter.timelines.configapi.FSBoundedParam
|
||||
import com.twitter.timelines.configapi.FSName
|
||||
import com.twitter.timelines.configapi.Param
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class PPMILocaleFollowSourceFSConfig @Inject() () extends FeatureSwitchConfig {
|
||||
override val booleanFSParams: Seq[Param[Boolean] with FSName] = Seq(
|
||||
PPMILocaleFollowSourceParams.CandidateSourceEnabled,
|
||||
)
|
||||
|
||||
override val stringSeqFSParams: Seq[Param[Seq[String]] with FSName] = Seq(
|
||||
PPMILocaleFollowSourceParams.LocaleToExcludeFromRecommendation,
|
||||
)
|
||||
|
||||
override val doubleFSParams: Seq[FSBoundedParam[Double]] = Seq(
|
||||
PPMILocaleFollowSourceParams.CandidateSourceWeight,
|
||||
)
|
||||
}
|
@ -1,22 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.ppmi_locale_follow
|
||||
|
||||
import com.twitter.timelines.configapi.FSBoundedParam
|
||||
import com.twitter.timelines.configapi.FSParam
|
||||
|
||||
class PPMILocaleFollowSourceParams {}
|
||||
object PPMILocaleFollowSourceParams {
|
||||
case object LocaleToExcludeFromRecommendation
|
||||
extends FSParam[Seq[String]](
|
||||
"ppmilocale_follow_source_locales_to_exclude_from_recommendation",
|
||||
default = Seq.empty)
|
||||
|
||||
case object CandidateSourceEnabled
|
||||
extends FSParam[Boolean]("ppmilocale_follow_source_enabled", true)
|
||||
|
||||
case object CandidateSourceWeight
|
||||
extends FSBoundedParam[Double](
|
||||
"ppmilocale_follow_source_candidate_source_weight",
|
||||
default = 1,
|
||||
min = 0.001,
|
||||
max = 2000)
|
||||
}
|
@ -1,6 +0,0 @@
|
||||
# PPMI Locale Follow Candidate Source
|
||||
Provides accounts based on PPMI ([Positive Pointwise Mutual Information](https://en.wikipedia.org/wiki/Pointwise_mutual_information#Positive_PMI)) using follow actions as a feature for a specific local (language + country) within a week. In simpler terms, it provides a list of the most followed accounts for a given country and language input, based on the PPMI algorithm.
|
||||
|
||||
PPMI is a statistical measure of the association between two events. In this case, it measures the association between the follow actions and the accounts being followed.
|
||||
|
||||
In summary, the service utilizes PPMI and follow actions to provide a list of the most followed accounts for a specific country and language input.
|
@ -1,11 +0,0 @@
|
||||
scala_library(
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/adserver",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/socialgraph",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
|
||||
"src/thrift/com/twitter/socialgraph:thrift-scala",
|
||||
],
|
||||
)
|
@ -1,111 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.promoted_accounts
|
||||
|
||||
import com.twitter.adserver.thriftscala.AdServerException
|
||||
import com.twitter.adserver.{thriftscala => adthrift}
|
||||
import com.twitter.finagle.TimeoutException
|
||||
import com.twitter.finagle.stats.Counter
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.follow_recommendations.common.clients.adserver.AdRequest
|
||||
import com.twitter.follow_recommendations.common.clients.adserver.AdserverClient
|
||||
import com.twitter.follow_recommendations.common.clients.socialgraph.SocialGraphClient
|
||||
import com.twitter.follow_recommendations.common.models.FollowProof
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.inject.Logging
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.stitch.Stitch
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
case class PromotedCandidateUser(
|
||||
id: Long,
|
||||
position: Int,
|
||||
adImpression: adthrift.AdImpression,
|
||||
followProof: FollowProof,
|
||||
primaryCandidateSource: Option[CandidateSourceIdentifier])
|
||||
|
||||
@Singleton
|
||||
class PromotedAccountsCandidateSource @Inject() (
|
||||
adserverClient: AdserverClient,
|
||||
sgsClient: SocialGraphClient,
|
||||
statsReceiver: StatsReceiver)
|
||||
extends CandidateSource[AdRequest, PromotedCandidateUser]
|
||||
with Logging {
|
||||
|
||||
override val identifier: CandidateSourceIdentifier =
|
||||
PromotedAccountsCandidateSource.Identifier
|
||||
|
||||
val stats: StatsReceiver = statsReceiver.scope(identifier.name)
|
||||
val failureStat: StatsReceiver = stats.scope("failures")
|
||||
val adServerExceptionsCounter: Counter = failureStat.counter("AdServerException")
|
||||
val timeoutCounter: Counter = failureStat.counter("TimeoutException")
|
||||
|
||||
def apply(request: AdRequest): Stitch[Seq[PromotedCandidateUser]] = {
|
||||
adserverClient
|
||||
.getAdImpressions(request)
|
||||
.rescue {
|
||||
case e: TimeoutException =>
|
||||
timeoutCounter.incr()
|
||||
logger.warn("Timeout on Adserver", e)
|
||||
Stitch.Nil
|
||||
case e: AdServerException =>
|
||||
adServerExceptionsCounter.incr()
|
||||
logger.warn("Failed to fetch ads", e)
|
||||
Stitch.Nil
|
||||
}
|
||||
.flatMap { adImpressions: Seq[adthrift.AdImpression] =>
|
||||
profileNumResults(adImpressions.size, "results_from_ad_server")
|
||||
val idToImpMap = (for {
|
||||
imp <- adImpressions
|
||||
promotedAccountId <- imp.promotedAccountId
|
||||
} yield promotedAccountId -> imp).toMap
|
||||
request.clientContext.userId
|
||||
.map { userId =>
|
||||
sgsClient
|
||||
.getIntersections(
|
||||
userId,
|
||||
adImpressions.filter(shouldShowSocialContext).flatMap(_.promotedAccountId),
|
||||
PromotedAccountsCandidateSource.NumIntersections
|
||||
).map { promotedAccountWithIntersections =>
|
||||
idToImpMap.map {
|
||||
case (promotedAccountId, imp) =>
|
||||
PromotedCandidateUser(
|
||||
promotedAccountId,
|
||||
imp.insertionPosition
|
||||
.map(_.toInt).getOrElse(
|
||||
getInsertionPositionDefaultValue(request.isTest.getOrElse(false))
|
||||
),
|
||||
imp,
|
||||
promotedAccountWithIntersections
|
||||
.getOrElse(promotedAccountId, FollowProof(Nil, 0)),
|
||||
Some(identifier)
|
||||
)
|
||||
}.toSeq
|
||||
}.onSuccess(result => profileNumResults(result.size, "final_results"))
|
||||
}.getOrElse(Stitch.Nil)
|
||||
}
|
||||
}
|
||||
|
||||
private def shouldShowSocialContext(imp: adthrift.AdImpression): Boolean =
|
||||
imp.experimentValues.exists { expValues =>
|
||||
expValues.get("display.display_style").contains("show_social_context")
|
||||
}
|
||||
|
||||
private def getInsertionPositionDefaultValue(isTest: Boolean): Int = {
|
||||
if (isTest) 0 else -1
|
||||
}
|
||||
|
||||
private def profileNumResults(resultsSize: Int, statName: String): Unit = {
|
||||
if (resultsSize <= 5) {
|
||||
stats.scope(statName).counter(resultsSize.toString).incr()
|
||||
} else {
|
||||
stats.scope(statName).counter("more_than_5").incr()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object PromotedAccountsCandidateSource {
|
||||
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
|
||||
Algorithm.PromotedAccount.toString)
|
||||
val NumIntersections = 3
|
||||
}
|
@ -1,2 +0,0 @@
|
||||
# Promoted Accounts Candidate Source
|
||||
Promoted accounts returned from Ads server.
|
@ -1,24 +0,0 @@
|
||||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/google/inject:guice",
|
||||
"3rdparty/jvm/com/google/inject/extensions:guice-assistedinject",
|
||||
"3rdparty/jvm/net/codingwell:scala-guice",
|
||||
"3rdparty/jvm/org/slf4j:slf4j-api",
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/base",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/real_time_real_graph",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/constants",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/stores",
|
||||
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/configapi/common",
|
||||
"strato/config/columns/onboarding/realGraph:realGraph-strato-client",
|
||||
"strato/config/columns/onboarding/userrecs:userrecs-strato-client",
|
||||
"strato/config/columns/recommendations/twistly:twistly-strato-client",
|
||||
"strato/src/main/scala/com/twitter/strato/client",
|
||||
"util/util-slf4j-api/src/main/scala",
|
||||
],
|
||||
)
|
@ -1,6 +0,0 @@
|
||||
# RealGraph Candidate Source
|
||||
Provides out-of-network RealGraph candidates for a given user. RealGraph is a user-user graph dataset that aims to measure the strength of the relationship between two users.
|
||||
|
||||
RealGraph comprises two components: a real-time pipeline that tracks various counts and relationships between user-user edges (such as the number of favorites, replies, retweets, clicks, whether followed, muted, or blocked), and an offline pipeline of a larger set of such user-user edge counts and relationships. Currently, the top k in-network scores have been exported for use by various teams.
|
||||
|
||||
The RealGraph dataset is used to predict user interactions at Twitter, and is based on the paper "[Realgraph: User interaction prediction at Twitter](http://www.ueo-workshop.com/wp-content/uploads/2014/04/sig-alternate.pdf)" by the UEO workshop at KDD'14.
|
@ -1,27 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.real_graph
|
||||
|
||||
import com.twitter.follow_recommendations.configapi.common.FeatureSwitchConfig
|
||||
import com.twitter.timelines.configapi.FSBoundedParam
|
||||
import com.twitter.timelines.configapi.FSName
|
||||
import com.twitter.timelines.configapi.Param
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class RealGraphOonFSConfig @Inject() () extends FeatureSwitchConfig {
|
||||
override val booleanFSParams: Seq[Param[Boolean] with FSName] =
|
||||
Seq(
|
||||
RealGraphOonParams.IncludeRealGraphOonCandidates,
|
||||
RealGraphOonParams.TryToReadRealGraphOonCandidates,
|
||||
RealGraphOonParams.UseV2
|
||||
)
|
||||
override val doubleFSParams: Seq[FSBoundedParam[Double]] =
|
||||
Seq(
|
||||
RealGraphOonParams.ScoreThreshold
|
||||
)
|
||||
override val intFSParams: Seq[FSBoundedParam[Int]] =
|
||||
Seq(
|
||||
RealGraphOonParams.RealGraphOonResultCountThreshold,
|
||||
RealGraphOonParams.MaxResults,
|
||||
)
|
||||
}
|
@ -1,47 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.real_graph
|
||||
|
||||
import com.twitter.timelines.configapi.FSBoundedParam
|
||||
import com.twitter.timelines.configapi.FSParam
|
||||
|
||||
object RealGraphOonParams {
|
||||
case object IncludeRealGraphOonCandidates
|
||||
extends FSParam[Boolean](
|
||||
"real_graph_oon_include_candidates",
|
||||
false
|
||||
)
|
||||
case object TryToReadRealGraphOonCandidates
|
||||
extends FSParam[Boolean](
|
||||
"real_graph_oon_try_to_read_candidates",
|
||||
false
|
||||
)
|
||||
case object RealGraphOonResultCountThreshold
|
||||
extends FSBoundedParam[Int](
|
||||
"real_graph_oon_result_count_threshold",
|
||||
default = 1,
|
||||
min = 0,
|
||||
max = Integer.MAX_VALUE
|
||||
)
|
||||
|
||||
case object UseV2
|
||||
extends FSParam[Boolean](
|
||||
"real_graph_oon_use_v2",
|
||||
false
|
||||
)
|
||||
|
||||
case object ScoreThreshold
|
||||
extends FSBoundedParam[Double](
|
||||
"real_graph_oon_score_threshold",
|
||||
default = 0.26,
|
||||
min = 0,
|
||||
max = 1.0
|
||||
)
|
||||
|
||||
case object MaxResults
|
||||
extends FSBoundedParam[Int](
|
||||
"real_graph_oon_max_results",
|
||||
default = 200,
|
||||
min = 0,
|
||||
max = 1000
|
||||
)
|
||||
|
||||
}
|
@ -1,58 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.real_graph
|
||||
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.strato.generated.client.onboarding.realGraph.UserRealgraphOonV2ClientColumn
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
import com.twitter.wtf.candidate.thriftscala.CandidateSeq
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class RealGraphOonV2Source @Inject() (
|
||||
realGraphClientColumn: UserRealgraphOonV2ClientColumn)
|
||||
extends CandidateSource[HasParams with HasClientContext, CandidateUser] {
|
||||
|
||||
override val identifier: CandidateSourceIdentifier =
|
||||
RealGraphOonV2Source.Identifier
|
||||
|
||||
override def apply(request: HasParams with HasClientContext): Stitch[Seq[CandidateUser]] = {
|
||||
request.getOptionalUserId
|
||||
.map { userId =>
|
||||
realGraphClientColumn.fetcher
|
||||
.fetch(userId)
|
||||
.map { result =>
|
||||
result.v
|
||||
.map { candidates => parseStratoResults(request, candidates) }
|
||||
.getOrElse(Nil)
|
||||
// returned candidates are sorted by score in descending order
|
||||
.take(request.params(RealGraphOonParams.MaxResults))
|
||||
.map(_.withCandidateSource(identifier))
|
||||
}
|
||||
}.getOrElse(Stitch(Seq.empty))
|
||||
}
|
||||
|
||||
private def parseStratoResults(
|
||||
request: HasParams with HasClientContext,
|
||||
candidateSeqThrift: CandidateSeq
|
||||
): Seq[CandidateUser] = {
|
||||
candidateSeqThrift.candidates.collect {
|
||||
case candidate if candidate.score >= request.params(RealGraphOonParams.ScoreThreshold) =>
|
||||
CandidateUser(
|
||||
candidate.userId,
|
||||
Some(candidate.score)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
object RealGraphOonV2Source {
|
||||
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
|
||||
Algorithm.RealGraphOonV2.toString
|
||||
)
|
||||
}
|
@ -1,40 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.real_graph
|
||||
|
||||
import com.twitter.follow_recommendations.common.clients.real_time_real_graph.RealTimeRealGraphClient
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
/**
|
||||
* This source gets the already followed edges from the real graph column as a candidate source.
|
||||
*/
|
||||
@Singleton
|
||||
class RealGraphSource @Inject() (
|
||||
realGraph: RealTimeRealGraphClient)
|
||||
extends CandidateSource[HasParams with HasClientContext, CandidateUser] {
|
||||
override val identifier: CandidateSourceIdentifier = RealGraphSource.Identifier
|
||||
|
||||
override def apply(request: HasParams with HasClientContext): Stitch[Seq[CandidateUser]] = {
|
||||
request.getOptionalUserId
|
||||
.map { userId =>
|
||||
realGraph.getRealGraphWeights(userId).map { scoreMap =>
|
||||
scoreMap.map {
|
||||
case (candidateId, realGraphScore) =>
|
||||
CandidateUser(id = candidateId, score = Some(realGraphScore))
|
||||
.withCandidateSource(identifier)
|
||||
}.toSeq
|
||||
}
|
||||
}.getOrElse(Stitch.Nil)
|
||||
}
|
||||
}
|
||||
|
||||
object RealGraphSource {
|
||||
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
|
||||
Algorithm.RealGraphFollowed.toString)
|
||||
}
|
@ -1,29 +0,0 @@
|
||||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/google/inject:guice",
|
||||
"3rdparty/jvm/com/google/inject/extensions:guice-assistedinject",
|
||||
"3rdparty/jvm/net/codingwell:scala-guice",
|
||||
"3rdparty/jvm/org/slf4j:slf4j-api",
|
||||
"discovery-ds/src/main/thrift/com/twitter/dds/jobs/repeated_profile_visits:profile_visit-scala",
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/base",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/sims",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/sims_expansion",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/real_time_real_graph",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/strato",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/constants",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
|
||||
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/configapi/common",
|
||||
"hermit/hermit-core/src/main/scala/com/twitter/hermit/model",
|
||||
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/candidate_source",
|
||||
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/model/common/identifier",
|
||||
"src/thrift/com/twitter/experiments/general_metrics:general_metrics-scala",
|
||||
"strato/config/columns/rux:rux-strato-client",
|
||||
"strato/src/main/scala/com/twitter/strato/client",
|
||||
"util/util-slf4j-api/src/main/scala",
|
||||
],
|
||||
)
|
@ -1,4 +0,0 @@
|
||||
# Recent Engagement Candidate Source
|
||||
Provides recently engaged accounts for a given user:
|
||||
* Explicit engagements: like, retweet, reply
|
||||
* Implicit engagements: profile visit
|
@ -1,38 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.recent_engagement
|
||||
|
||||
import com.twitter.follow_recommendations.common.clients.real_time_real_graph.RealTimeRealGraphClient
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.stitch.Stitch
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class RecentEngagementDirectFollowSource @Inject() (
|
||||
realTimeRealGraphClient: RealTimeRealGraphClient)
|
||||
extends CandidateSource[Long, CandidateUser] {
|
||||
|
||||
val identifier: CandidateSourceIdentifier =
|
||||
RecentEngagementDirectFollowSource.Identifier
|
||||
|
||||
/**
|
||||
* Generate a list of candidates for the target using RealtimeGraphClient
|
||||
* and RecentEngagementStore.
|
||||
*/
|
||||
override def apply(targetUserId: Long): Stitch[Seq[CandidateUser]] = {
|
||||
realTimeRealGraphClient
|
||||
.getUsersRecentlyEngagedWith(
|
||||
userId = targetUserId,
|
||||
engagementScoreMap = RealTimeRealGraphClient.EngagementScoreMap,
|
||||
includeDirectFollowCandidates = true,
|
||||
includeNonDirectFollowCandidates = false
|
||||
)
|
||||
.map(_.map(_.withCandidateSource(identifier)).sortBy(-_.score.getOrElse(0.0)))
|
||||
}
|
||||
}
|
||||
|
||||
object RecentEngagementDirectFollowSource {
|
||||
val Identifier = CandidateSourceIdentifier(Algorithm.RecentEngagementDirectFollow.toString)
|
||||
}
|
@ -1,38 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.recent_engagement
|
||||
|
||||
import com.twitter.follow_recommendations.common.clients.real_time_real_graph.RealTimeRealGraphClient
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.stitch.Stitch
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class RecentEngagementNonDirectFollowSource @Inject() (
|
||||
realTimeRealGraphClient: RealTimeRealGraphClient)
|
||||
extends CandidateSource[Long, CandidateUser] {
|
||||
|
||||
val identifier: CandidateSourceIdentifier =
|
||||
RecentEngagementNonDirectFollowSource.Identifier
|
||||
|
||||
/**
|
||||
* Generate a list of candidates for the target using RealtimeGraphClient
|
||||
* and RecentEngagementStore.
|
||||
*/
|
||||
override def apply(targetUserId: Long): Stitch[Seq[CandidateUser]] = {
|
||||
realTimeRealGraphClient
|
||||
.getUsersRecentlyEngagedWith(
|
||||
userId = targetUserId,
|
||||
engagementScoreMap = RealTimeRealGraphClient.EngagementScoreMap,
|
||||
includeDirectFollowCandidates = false,
|
||||
includeNonDirectFollowCandidates = true
|
||||
)
|
||||
.map(_.map(_.withCandidateSource(identifier)).sortBy(-_.score.getOrElse(0.0)))
|
||||
}
|
||||
}
|
||||
|
||||
object RecentEngagementNonDirectFollowSource {
|
||||
val Identifier = CandidateSourceIdentifier(Algorithm.RecentEngagementNonDirectFollow.toString)
|
||||
}
|
@ -1,22 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.recent_engagement
|
||||
|
||||
import com.twitter.follow_recommendations.configapi.common.FeatureSwitchConfig
|
||||
import com.twitter.timelines.configapi.FSBoundedParam
|
||||
import com.twitter.timelines.configapi.FSName
|
||||
import com.twitter.timelines.configapi.Param
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class RepeatedProfileVisitsFSConfig @Inject() () extends FeatureSwitchConfig {
|
||||
override val booleanFSParams: Seq[Param[Boolean] with FSName] =
|
||||
Seq(
|
||||
RepeatedProfileVisitsParams.IncludeCandidates,
|
||||
RepeatedProfileVisitsParams.UseOnlineDataset,
|
||||
)
|
||||
override val intFSParams: Seq[FSBoundedParam[Int]] =
|
||||
Seq(
|
||||
RepeatedProfileVisitsParams.RecommendationThreshold,
|
||||
RepeatedProfileVisitsParams.BucketingThreshold,
|
||||
)
|
||||
}
|
@ -1,37 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.recent_engagement
|
||||
|
||||
import com.twitter.timelines.configapi.FSBoundedParam
|
||||
import com.twitter.timelines.configapi.FSParam
|
||||
|
||||
object RepeatedProfileVisitsParams {
|
||||
|
||||
// If RepeatedProfileVisitsSource is run and there are recommended candidates for the target user, whether or not
|
||||
// to actually include such candidates in our output recommendations. This FS will be used to control bucketing of
|
||||
// users into control vs treatment buckets.
|
||||
case object IncludeCandidates
|
||||
extends FSParam[Boolean](name = "repeated_profile_visits_include_candidates", default = false)
|
||||
|
||||
// The threshold at or above which we will consider a profile to have been visited "frequently enough" to recommend
|
||||
// the profile to the target user.
|
||||
case object RecommendationThreshold
|
||||
extends FSBoundedParam[Int](
|
||||
name = "repeated_profile_visits_recommendation_threshold",
|
||||
default = 3,
|
||||
min = 0,
|
||||
max = Integer.MAX_VALUE)
|
||||
|
||||
// The threshold at or above which we will consider a profile to have been visited "frequently enough" to recommend
|
||||
// the profile to the target user.
|
||||
case object BucketingThreshold
|
||||
extends FSBoundedParam[Int](
|
||||
name = "repeated_profile_visits_bucketing_threshold",
|
||||
default = 3,
|
||||
min = 0,
|
||||
max = Integer.MAX_VALUE)
|
||||
|
||||
// Whether or not to use the online dataset (which has repeated profile visits information updated to within minutes)
|
||||
// instead of the offline dataset (updated via offline jobs, which can have delays of hours to days).
|
||||
case object UseOnlineDataset
|
||||
extends FSParam[Boolean](name = "repeated_profile_visits_use_online_dataset", default = true)
|
||||
|
||||
}
|
@ -1,157 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.recent_engagement
|
||||
|
||||
import com.google.inject.Inject
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.dds.jobs.repeated_profile_visits.thriftscala.ProfileVisitorInfo
|
||||
import com.twitter.experiments.general_metrics.thriftscala.IdType
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.follow_recommendations.common.clients.real_time_real_graph.Engagement
|
||||
import com.twitter.follow_recommendations.common.clients.real_time_real_graph.RealTimeRealGraphClient
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
import com.twitter.timelines.configapi.Params
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.inject.Logging
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.strato.generated.client.rux.RepeatedProfileVisitsAggregateClientColumn
|
||||
|
||||
@Singleton
|
||||
class RepeatedProfileVisitsSource @Inject() (
|
||||
repeatedProfileVisitsAggregateClientColumn: RepeatedProfileVisitsAggregateClientColumn,
|
||||
realTimeRealGraphClient: RealTimeRealGraphClient,
|
||||
statsReceiver: StatsReceiver)
|
||||
extends CandidateSource[HasParams with HasClientContext, CandidateUser]
|
||||
with Logging {
|
||||
|
||||
val identifier: CandidateSourceIdentifier =
|
||||
RepeatedProfileVisitsSource.Identifier
|
||||
|
||||
val sourceStatsReceiver = statsReceiver.scope("repeated_profile_visits_source")
|
||||
val offlineFetchErrorCounter = sourceStatsReceiver.counter("offline_fetch_error")
|
||||
val offlineFetchSuccessCounter = sourceStatsReceiver.counter("offline_fetch_success")
|
||||
val onlineFetchErrorCounter = sourceStatsReceiver.counter("online_fetch_error")
|
||||
val onlineFetchSuccessCounter = sourceStatsReceiver.counter("online_fetch_success")
|
||||
val noRepeatedProfileVisitsAboveBucketingThresholdCounter =
|
||||
sourceStatsReceiver.counter("no_repeated_profile_visits_above_bucketing_threshold")
|
||||
val hasRepeatedProfileVisitsAboveBucketingThresholdCounter =
|
||||
sourceStatsReceiver.counter("has_repeated_profile_visits_above_bucketing_threshold")
|
||||
val noRepeatedProfileVisitsAboveRecommendationsThresholdCounter =
|
||||
sourceStatsReceiver.counter("no_repeated_profile_visits_above_recommendations_threshold")
|
||||
val hasRepeatedProfileVisitsAboveRecommendationsThresholdCounter =
|
||||
sourceStatsReceiver.counter("has_repeated_profile_visits_above_recommendations_threshold")
|
||||
val includeCandidatesCounter = sourceStatsReceiver.counter("include_candidates")
|
||||
val noIncludeCandidatesCounter = sourceStatsReceiver.counter("no_include_candidates")
|
||||
|
||||
// Returns visited user -> visit count, via off dataset.
|
||||
def applyWithOfflineDataset(targetUserId: Long): Stitch[Map[Long, Int]] = {
|
||||
repeatedProfileVisitsAggregateClientColumn.fetcher
|
||||
.fetch(ProfileVisitorInfo(id = targetUserId, idType = IdType.User)).map(_.v)
|
||||
.handle {
|
||||
case e: Throwable =>
|
||||
logger.error("Strato fetch for RepeatedProfileVisitsAggregateClientColumn failed: " + e)
|
||||
offlineFetchErrorCounter.incr()
|
||||
None
|
||||
}.onSuccess { result =>
|
||||
offlineFetchSuccessCounter.incr()
|
||||
}.map { resultOption =>
|
||||
resultOption
|
||||
.flatMap { result =>
|
||||
result.profileVisitSet.map { profileVisitSet =>
|
||||
profileVisitSet
|
||||
.filter(profileVisit => profileVisit.totalTargetVisitsInLast14Days.getOrElse(0) > 0)
|
||||
.filter(profileVisit => !profileVisit.doesSourceIdFollowTargetId.getOrElse(false))
|
||||
.flatMap { profileVisit =>
|
||||
(profileVisit.targetId, profileVisit.totalTargetVisitsInLast14Days) match {
|
||||
case (Some(targetId), Some(totalVisitsInLast14Days)) =>
|
||||
Some(targetId -> totalVisitsInLast14Days)
|
||||
case _ => None
|
||||
}
|
||||
}.toMap[Long, Int]
|
||||
}
|
||||
}.getOrElse(Map.empty)
|
||||
}
|
||||
}
|
||||
|
||||
// Returns visited user -> visit count, via online dataset.
|
||||
def applyWithOnlineData(targetUserId: Long): Stitch[Map[Long, Int]] = {
|
||||
val visitedUserToEngagementsStitch: Stitch[Map[Long, Seq[Engagement]]] =
|
||||
realTimeRealGraphClient.getRecentProfileViewEngagements(targetUserId)
|
||||
visitedUserToEngagementsStitch
|
||||
.onFailure { f =>
|
||||
onlineFetchErrorCounter.incr()
|
||||
}.onSuccess { result =>
|
||||
onlineFetchSuccessCounter.incr()
|
||||
}.map { visitedUserToEngagements =>
|
||||
visitedUserToEngagements
|
||||
.mapValues(engagements => engagements.size)
|
||||
}
|
||||
}
|
||||
|
||||
def getRepeatedVisitedAccounts(params: Params, targetUserId: Long): Stitch[Map[Long, Int]] = {
|
||||
var results: Stitch[Map[Long, Int]] = Stitch.value(Map.empty)
|
||||
if (params.getBoolean(RepeatedProfileVisitsParams.UseOnlineDataset)) {
|
||||
results = applyWithOnlineData(targetUserId)
|
||||
} else {
|
||||
results = applyWithOfflineDataset(targetUserId)
|
||||
}
|
||||
// Only keep users that had non-zero engagement counts.
|
||||
results.map(_.filter(input => input._2 > 0))
|
||||
}
|
||||
|
||||
def getRecommendations(params: Params, userId: Long): Stitch[Seq[CandidateUser]] = {
|
||||
val recommendationThreshold = params.getInt(RepeatedProfileVisitsParams.RecommendationThreshold)
|
||||
val bucketingThreshold = params.getInt(RepeatedProfileVisitsParams.BucketingThreshold)
|
||||
|
||||
// Get the list of repeatedly visited profilts. Only keep accounts with >= bucketingThreshold visits.
|
||||
val repeatedVisitedAccountsStitch: Stitch[Map[Long, Int]] =
|
||||
getRepeatedVisitedAccounts(params, userId).map(_.filter(kv => kv._2 >= bucketingThreshold))
|
||||
|
||||
repeatedVisitedAccountsStitch.map { candidates =>
|
||||
// Now check if we should includeCandidates (e.g. whether user is in control bucket or treatment buckets).
|
||||
if (candidates.isEmpty) {
|
||||
// User has not visited any accounts above bucketing threshold. We will not bucket user into experiment. Just
|
||||
// don't return no candidates.
|
||||
noRepeatedProfileVisitsAboveBucketingThresholdCounter.incr()
|
||||
Seq.empty
|
||||
} else {
|
||||
hasRepeatedProfileVisitsAboveBucketingThresholdCounter.incr()
|
||||
if (!params.getBoolean(RepeatedProfileVisitsParams.IncludeCandidates)) {
|
||||
// User has reached bucketing criteria. We check whether to include candidates (e.g. checking which bucket
|
||||
// the user is in for the experiment). In this case the user is in a bucket to not include any candidates.
|
||||
noIncludeCandidatesCounter.incr()
|
||||
Seq.empty
|
||||
} else {
|
||||
includeCandidatesCounter.incr()
|
||||
// We should include candidates. Include any candidates above recommendation thresholds.
|
||||
val outputCandidatesSeq = candidates
|
||||
.filter(kv => kv._2 >= recommendationThreshold).map { kv =>
|
||||
val user = kv._1
|
||||
val visitCount = kv._2
|
||||
CandidateUser(user, Some(visitCount.toDouble))
|
||||
.withCandidateSource(RepeatedProfileVisitsSource.Identifier)
|
||||
}.toSeq
|
||||
if (outputCandidatesSeq.isEmpty) {
|
||||
noRepeatedProfileVisitsAboveRecommendationsThresholdCounter.incr()
|
||||
} else {
|
||||
hasRepeatedProfileVisitsAboveRecommendationsThresholdCounter.incr()
|
||||
}
|
||||
outputCandidatesSeq
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
override def apply(request: HasParams with HasClientContext): Stitch[Seq[CandidateUser]] = {
|
||||
request.getOptionalUserId
|
||||
.map { userId =>
|
||||
getRecommendations(request.params, userId)
|
||||
}.getOrElse(Stitch.Nil)
|
||||
}
|
||||
}
|
||||
|
||||
object RepeatedProfileVisitsSource {
|
||||
val Identifier = CandidateSourceIdentifier(Algorithm.RepeatedProfileVisits.toString)
|
||||
}
|
@ -1,21 +0,0 @@
|
||||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/google/inject:guice",
|
||||
"3rdparty/jvm/com/google/inject/extensions:guice-assistedinject",
|
||||
"3rdparty/jvm/net/codingwell:scala-guice",
|
||||
"3rdparty/jvm/org/slf4j:slf4j-api",
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/base",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/real_time_real_graph",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/constants",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
|
||||
"src/thrift/com/twitter/onboarding/relevance/candidates:candidates-scala",
|
||||
"strato/config/columns/onboarding/userrecs:userrecs-strato-client",
|
||||
"strato/src/main/scala/com/twitter/strato/client",
|
||||
"util/util-slf4j-api/src/main/scala",
|
||||
],
|
||||
)
|
@ -1,10 +0,0 @@
|
||||
# SALSA Candidate Source
|
||||
Provides an account expansion based on the SALSA PYMK (People You May Know) algorithm for a given account. The algorithm focuses on the mutual follow and address book graph, making it highly effective at providing good mutual follow recommendations.
|
||||
|
||||
The SALSA algorithm constructs a local graph and performs personalized random walks to identify the best recommendations for the user. The local graph represents the community of users that are most similar to or most relevant to the user, while the personalized random walk identifies the most popular interests among them.
|
||||
|
||||
For each target user, the local graph is a bipartite graph with a left-hand side (LHS) and a right-hand side (RHS). The LHS is built from several sources, including the target user, forward and reverse address books, mutual follows, recent followings, and recent followers. We choose a specified number of top candidates from these sources for each target user with different weights assigned to each source to favor the corresponding source, and build the LHS using the target user and those top candidates. The RHS consists of two parts: the top candidates from the sources mentioned above for the target user and the mutual follows of the other entries in the LHS.
|
||||
|
||||
The random walk starts from the target user in the LHS and adopts a restarting strategy to realize personalization.
|
||||
|
||||
In summary, the SALSA Candidate Source provides an account expansion based on the SALSA PYMK algorithm, utilizing a bipartite graph with personalized random walks to identify the most relevant and interesting recommendations for the user.
|
@ -1,40 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.salsa
|
||||
|
||||
import com.twitter.follow_recommendations.common.clients.real_time_real_graph.RealTimeRealGraphClient
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.stitch.Stitch
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class RecentEngagementDirectFollowSalsaExpansionSource @Inject() (
|
||||
realTimeRealGraphClient: RealTimeRealGraphClient,
|
||||
salsaExpander: SalsaExpander)
|
||||
extends SalsaExpansionBasedCandidateSource[Long](salsaExpander) {
|
||||
|
||||
override val identifier: CandidateSourceIdentifier =
|
||||
RecentEngagementDirectFollowSalsaExpansionSource.Identifier
|
||||
|
||||
override def firstDegreeNodes(target: Long): Stitch[Seq[Long]] = realTimeRealGraphClient
|
||||
.getUsersRecentlyEngagedWith(
|
||||
target,
|
||||
RealTimeRealGraphClient.EngagementScoreMap,
|
||||
includeDirectFollowCandidates = true,
|
||||
includeNonDirectFollowCandidates = false
|
||||
).map { recentlyFollowed =>
|
||||
recentlyFollowed
|
||||
.take(RecentEngagementDirectFollowSalsaExpansionSource.NumFirstDegreeNodesToRetrieve)
|
||||
.map(_.id)
|
||||
}
|
||||
|
||||
override def maxResults(target: Long): Int =
|
||||
RecentEngagementDirectFollowSalsaExpansionSource.OutputSize
|
||||
}
|
||||
|
||||
object RecentEngagementDirectFollowSalsaExpansionSource {
|
||||
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
|
||||
Algorithm.RecentEngagementSarusOcCur.toString)
|
||||
val NumFirstDegreeNodesToRetrieve = 10
|
||||
val OutputSize = 200
|
||||
}
|
@ -1,117 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.salsa
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.strato.generated.client.onboarding.userrecs.SalsaFirstDegreeOnUserClientColumn
|
||||
import com.twitter.strato.generated.client.onboarding.userrecs.SalsaSecondDegreeOnUserClientColumn
|
||||
import com.twitter.follow_recommendations.common.models.AccountProof
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.follow_recommendations.common.models.FollowProof
|
||||
import com.twitter.follow_recommendations.common.models.Reason
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.wtf.candidate.thriftscala.Candidate
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
case class SalsaExpandedCandidate(
|
||||
candidateId: Long,
|
||||
numberOfConnections: Int,
|
||||
totalScore: Double,
|
||||
connectingUsers: Seq[Long]) {
|
||||
def toCandidateUser: CandidateUser =
|
||||
CandidateUser(
|
||||
id = candidateId,
|
||||
score = Some(totalScore),
|
||||
reason = Some(Reason(
|
||||
Some(AccountProof(followProof = Some(FollowProof(connectingUsers, connectingUsers.size))))))
|
||||
)
|
||||
}
|
||||
|
||||
case class SimilarUserCandidate(candidateId: Long, score: Double, similarToCandidate: Long)
|
||||
|
||||
/**
|
||||
* Salsa expander uses pre-computed lists of candidates for each input user id and returns the highest scored candidates in the pre-computed lists as the expansion for the corresponding input id.
|
||||
*/
|
||||
@Singleton
|
||||
class SalsaExpander @Inject() (
|
||||
statsReceiver: StatsReceiver,
|
||||
firstDegreeClient: SalsaFirstDegreeOnUserClientColumn,
|
||||
secondDegreeClient: SalsaSecondDegreeOnUserClientColumn,
|
||||
) {
|
||||
|
||||
val stats = statsReceiver.scope("salsa_expander")
|
||||
|
||||
private def similarUsers(
|
||||
input: Seq[Long],
|
||||
neighbors: Seq[Option[Seq[Candidate]]]
|
||||
): Seq[SalsaExpandedCandidate] = {
|
||||
input
|
||||
.zip(neighbors).flatMap {
|
||||
case (recId, Some(neighbors)) =>
|
||||
neighbors.map(neighbor => SimilarUserCandidate(neighbor.userId, neighbor.score, recId))
|
||||
case _ => Nil
|
||||
}.groupBy(_.candidateId).map {
|
||||
case (key, neighbors) =>
|
||||
val scores = neighbors.map(_.score)
|
||||
val connectingUsers = neighbors
|
||||
.sortBy(-_.score)
|
||||
.take(SalsaExpander.MaxConnectingUsersToOutputPerExpandedCandidate)
|
||||
.map(_.similarToCandidate)
|
||||
|
||||
SalsaExpandedCandidate(key, scores.size, scores.sum, connectingUsers)
|
||||
}
|
||||
.filter(
|
||||
_.numberOfConnections >= math
|
||||
.min(SalsaExpander.MinConnectingUsersThreshold, input.size)
|
||||
)
|
||||
.toSeq
|
||||
}
|
||||
|
||||
def apply(
|
||||
firstDegreeInput: Seq[Long],
|
||||
secondDegreeInput: Seq[Long],
|
||||
maxNumOfCandidatesToReturn: Int
|
||||
): Stitch[Seq[CandidateUser]] = {
|
||||
|
||||
val firstDegreeNeighborsStitch =
|
||||
Stitch
|
||||
.collect(firstDegreeInput.map(firstDegreeClient.fetcher
|
||||
.fetch(_).map(_.v.map(_.candidates.take(SalsaExpander.MaxDirectNeighbors))))).onSuccess {
|
||||
firstDegreeNeighbors =>
|
||||
stats.stat("first_degree_neighbors").add(firstDegreeNeighbors.flatten.size)
|
||||
}
|
||||
|
||||
val secondDegreeNeighborsStitch =
|
||||
Stitch
|
||||
.collect(
|
||||
secondDegreeInput.map(
|
||||
secondDegreeClient.fetcher
|
||||
.fetch(_).map(
|
||||
_.v.map(_.candidates.take(SalsaExpander.MaxIndirectNeighbors))))).onSuccess {
|
||||
secondDegreeNeighbors =>
|
||||
stats.stat("second_degree_neighbors").add(secondDegreeNeighbors.flatten.size)
|
||||
}
|
||||
|
||||
val neighborStitches =
|
||||
Stitch.join(firstDegreeNeighborsStitch, secondDegreeNeighborsStitch).map {
|
||||
case (first, second) => first ++ second
|
||||
}
|
||||
|
||||
val similarUsersToInput = neighborStitches.map { neighbors =>
|
||||
similarUsers(firstDegreeInput ++ secondDegreeInput, neighbors)
|
||||
}
|
||||
|
||||
similarUsersToInput.map {
|
||||
// Rank the candidate cot users by the combined weights from the connecting users. This is the default original implementation. It is unlikely to have weight ties and thus a second ranking function is not necessary.
|
||||
_.sortBy(-_.totalScore)
|
||||
.take(maxNumOfCandidatesToReturn)
|
||||
.map(_.toCandidateUser)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object SalsaExpander {
|
||||
val MaxDirectNeighbors = 2000
|
||||
val MaxIndirectNeighbors = 2000
|
||||
val MinConnectingUsersThreshold = 2
|
||||
val MaxConnectingUsersToOutputPerExpandedCandidate = 3
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.salsa
|
||||
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.stitch.Stitch
|
||||
|
||||
abstract class SalsaExpansionBasedCandidateSource[Target](salsaExpander: SalsaExpander)
|
||||
extends CandidateSource[Target, CandidateUser] {
|
||||
|
||||
// Define first/second degree as empty sequences in cases of subclasses
|
||||
// that don't implement one or the other.
|
||||
// Example: MagicRecs only uses first degree nodes, and can ignore implementing secondDegreeNodes
|
||||
//
|
||||
// This allows apply(target) to combine both in the base class
|
||||
def firstDegreeNodes(target: Target): Stitch[Seq[Long]] = Stitch.value(Seq())
|
||||
|
||||
def secondDegreeNodes(target: Target): Stitch[Seq[Long]] = Stitch.value(Seq())
|
||||
|
||||
// max number output results
|
||||
def maxResults(target: Target): Int
|
||||
|
||||
override def apply(target: Target): Stitch[Seq[CandidateUser]] = {
|
||||
val nodes = Stitch.join(firstDegreeNodes(target), secondDegreeNodes(target))
|
||||
|
||||
nodes.flatMap {
|
||||
case (firstDegreeCandidates, secondDegreeCandidates) => {
|
||||
salsaExpander(firstDegreeCandidates, secondDegreeCandidates, maxResults(target))
|
||||
.map(_.map(_.withCandidateSource(identifier)).sortBy(-_.score.getOrElse(0.0)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,24 +0,0 @@
|
||||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/google/inject:guice",
|
||||
"3rdparty/jvm/com/google/inject/extensions:guice-assistedinject",
|
||||
"3rdparty/jvm/net/codingwell:scala-guice",
|
||||
"3rdparty/jvm/org/slf4j:slf4j-api",
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/base",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/strato",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/constants",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
|
||||
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/configapi/common",
|
||||
"src/thrift/com/twitter/hermit/candidate:hermit-candidate-scala",
|
||||
"strato/config/columns/onboarding/userrecs:userrecs-strato-client",
|
||||
"strato/config/columns/recommendations/follow2vec:follow2vec-strato-client",
|
||||
"strato/config/columns/recommendations/similarity:similarity-strato-client",
|
||||
"strato/src/main/scala/com/twitter/strato/client",
|
||||
"util/util-slf4j-api/src/main/scala",
|
||||
],
|
||||
)
|
@ -1,50 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.sims
|
||||
|
||||
import com.twitter.escherbird.util.stitchcache.StitchCache
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.follow_recommendations.common.models.HasSimilarToContext
|
||||
import com.twitter.hermit.candidate.thriftscala.Candidates
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.strato.client.Fetcher
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
import com.twitter.util.Duration
|
||||
|
||||
import java.lang.{Long => JLong}
|
||||
|
||||
class CacheBasedSimsStore(
|
||||
id: CandidateSourceIdentifier,
|
||||
fetcher: Fetcher[Long, Unit, Candidates],
|
||||
maxCacheSize: Int,
|
||||
cacheTtl: Duration,
|
||||
statsReceiver: StatsReceiver)
|
||||
extends CandidateSource[HasParams with HasSimilarToContext, CandidateUser] {
|
||||
|
||||
override val identifier: CandidateSourceIdentifier = id
|
||||
private def getUsersFromSimsSource(userId: JLong): Stitch[Option[Candidates]] = {
|
||||
fetcher
|
||||
.fetch(userId)
|
||||
.map(_.v)
|
||||
}
|
||||
|
||||
private val simsCache = StitchCache[JLong, Option[Candidates]](
|
||||
maxCacheSize = maxCacheSize,
|
||||
ttl = cacheTtl,
|
||||
statsReceiver = statsReceiver,
|
||||
underlyingCall = getUsersFromSimsSource
|
||||
)
|
||||
|
||||
override def apply(request: HasParams with HasSimilarToContext): Stitch[Seq[CandidateUser]] = {
|
||||
Stitch
|
||||
.traverse(request.similarToUserIds) { userId =>
|
||||
simsCache.readThrough(userId).map { candidatesOpt =>
|
||||
candidatesOpt
|
||||
.map { candidates =>
|
||||
StratoBasedSimsCandidateSource.map(userId, candidates)
|
||||
}.getOrElse(Nil)
|
||||
}
|
||||
}.map(_.flatten.distinct.map(_.withCandidateSource(identifier)))
|
||||
}
|
||||
}
|
@ -1,35 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.sims
|
||||
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.strato.generated.client.onboarding.userrecs.NewSimsRefreshOnUserClientColumn
|
||||
import com.twitter.util.Duration
|
||||
|
||||
import javax.inject.Inject
|
||||
|
||||
@Singleton
|
||||
class DBV2SimsRefreshStore @Inject() (
|
||||
newSimsRefreshOnUserClientColumn: NewSimsRefreshOnUserClientColumn)
|
||||
extends StratoBasedSimsCandidateSourceWithUnitView(
|
||||
fetcher = newSimsRefreshOnUserClientColumn.fetcher,
|
||||
identifier = DBV2SimsRefreshStore.Identifier)
|
||||
|
||||
@Singleton
|
||||
class CachedDBV2SimsRefreshStore @Inject() (
|
||||
newSimsRefreshOnUserClientColumn: NewSimsRefreshOnUserClientColumn,
|
||||
statsReceiver: StatsReceiver)
|
||||
extends CacheBasedSimsStore(
|
||||
id = DBV2SimsRefreshStore.Identifier,
|
||||
fetcher = newSimsRefreshOnUserClientColumn.fetcher,
|
||||
maxCacheSize = DBV2SimsRefreshStore.MaxCacheSize,
|
||||
cacheTtl = DBV2SimsRefreshStore.CacheTTL,
|
||||
statsReceiver = statsReceiver.scope("CachedDBV2SimsRefreshStore", "cache")
|
||||
)
|
||||
|
||||
object DBV2SimsRefreshStore {
|
||||
val Identifier = CandidateSourceIdentifier(Algorithm.Sims.toString)
|
||||
val MaxCacheSize = 5000
|
||||
val CacheTTL: Duration = Duration.fromHours(24)
|
||||
}
|
@ -1,38 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.sims
|
||||
|
||||
import com.google.inject.Singleton
|
||||
import com.google.inject.name.Named
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.follow_recommendations.common.constants.GuiceNamedConstants
|
||||
import com.twitter.hermit.candidate.thriftscala.Candidates
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.strato.client.Fetcher
|
||||
import com.twitter.util.Duration
|
||||
|
||||
import javax.inject.Inject
|
||||
|
||||
@Singleton
|
||||
class DBV2SimsStore @Inject() (
|
||||
@Named(GuiceNamedConstants.DBV2_SIMS_FETCHER) fetcher: Fetcher[Long, Unit, Candidates])
|
||||
extends StratoBasedSimsCandidateSourceWithUnitView(
|
||||
fetcher,
|
||||
identifier = DBV2SimsStore.Identifier)
|
||||
|
||||
@Singleton
|
||||
class CachedDBV2SimsStore @Inject() (
|
||||
@Named(GuiceNamedConstants.DBV2_SIMS_FETCHER) fetcher: Fetcher[Long, Unit, Candidates],
|
||||
statsReceiver: StatsReceiver)
|
||||
extends CacheBasedSimsStore(
|
||||
id = DBV2SimsStore.Identifier,
|
||||
fetcher = fetcher,
|
||||
maxCacheSize = DBV2SimsStore.MaxCacheSize,
|
||||
cacheTtl = DBV2SimsStore.CacheTTL,
|
||||
statsReceiver = statsReceiver.scope("CachedDBV2SimsStore", "cache")
|
||||
)
|
||||
|
||||
object DBV2SimsStore {
|
||||
val Identifier = CandidateSourceIdentifier(Algorithm.Sims.toString)
|
||||
val MaxCacheSize = 1000
|
||||
val CacheTTL: Duration = Duration.fromHours(24)
|
||||
}
|
@ -1,69 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.sims
|
||||
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.sims.Follow2vecNearestNeighborsStore.NearestNeighborParamsType
|
||||
import com.twitter.hermit.candidate.thriftscala.Candidate
|
||||
import com.twitter.hermit.candidate.thriftscala.Candidates
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.strato.catalog.Fetch
|
||||
import com.twitter.strato.client.Fetcher
|
||||
import com.twitter.strato.generated.client.recommendations.follow2vec.LinearRegressionFollow2vecNearestNeighborsClientColumn
|
||||
import com.twitter.util.Return
|
||||
import com.twitter.util.Throw
|
||||
import javax.inject.Inject
|
||||
|
||||
@Singleton
|
||||
class LinearRegressionFollow2vecNearestNeighborsStore @Inject() (
|
||||
linearRegressionFollow2vecNearestNeighborsClientColumn: LinearRegressionFollow2vecNearestNeighborsClientColumn)
|
||||
extends StratoBasedSimsCandidateSource[NearestNeighborParamsType](
|
||||
Follow2vecNearestNeighborsStore.convertFetcher(
|
||||
linearRegressionFollow2vecNearestNeighborsClientColumn.fetcher),
|
||||
view = Follow2vecNearestNeighborsStore.defaultSearchParams,
|
||||
identifier = Follow2vecNearestNeighborsStore.IdentifierF2vLinearRegression
|
||||
)
|
||||
|
||||
object Follow2vecNearestNeighborsStore {
|
||||
// (userid, feature store version for data)
|
||||
type NearestNeighborKeyType = (Long, Long)
|
||||
// (neighbors to be returned, ef value: accuracy / latency tradeoff, distance for filtering)
|
||||
type NearestNeighborParamsType = (Option[Int], Option[Int], Option[Double])
|
||||
// (seq(found neighbor id, score), distance for filtering)
|
||||
type NearestNeighborValueType = (Seq[(Long, Option[Double])], Option[Double])
|
||||
|
||||
val IdentifierF2vLinearRegression: CandidateSourceIdentifier = CandidateSourceIdentifier(
|
||||
Algorithm.LinearRegressionFollow2VecNearestNeighbors.toString)
|
||||
|
||||
val defaultFeatureStoreVersion: Long = 20210708
|
||||
val defaultSearchParams: NearestNeighborParamsType = (None, None, None)
|
||||
|
||||
def convertFetcher(
|
||||
fetcher: Fetcher[NearestNeighborKeyType, NearestNeighborParamsType, NearestNeighborValueType]
|
||||
): Fetcher[Long, NearestNeighborParamsType, Candidates] = {
|
||||
(key: Long, view: NearestNeighborParamsType) =>
|
||||
{
|
||||
def toCandidates(
|
||||
results: Option[NearestNeighborValueType]
|
||||
): Option[Candidates] = {
|
||||
results.flatMap { r =>
|
||||
Some(
|
||||
Candidates(
|
||||
key,
|
||||
r._1.map { neighbor =>
|
||||
Candidate(neighbor._1, neighbor._2.getOrElse(0))
|
||||
}
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
val results: Stitch[Fetch.Result[NearestNeighborValueType]] =
|
||||
fetcher.fetch(key = (key, defaultFeatureStoreVersion), view = view)
|
||||
results.transform {
|
||||
case Return(r) => Stitch.value(Fetch.Result(toCandidates(r.v)))
|
||||
case Throw(e) => Stitch.exception(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
# Sims Candidate Source
|
||||
Offers various online sources for finding similar accounts based on a given user, whether it is the target user or an account candidate.
|
||||
|
||||
## Sims
|
||||
The objective is to identify a list of K users who are similar to a given user. In this scenario, we primarily focus on finding similar users as "producers" rather than "consumers." Sims has two steps: candidate generation and ranking.
|
||||
|
||||
### Sims Candidate Generation
|
||||
|
||||
With over 700 million users to consider, there are multiple ways to define similarities. Currently, we have three candidate sources for Sims:
|
||||
|
||||
**CosineFollow** (based on user-user follow graph): The similarity between two users is defined as the cosine similarity between their followers. Despite sounding simple, computing all-pair similarity on the entire follow graph is computationally challenging. We are currently using the WHIMP algorithm to find the top 1000 similar users for each user ID. This candidate source has the largest coverage, as it can find similar user candidates for more than 700 million users.
|
||||
|
||||
**CosineList** (based on user-list membership graph): The similarity between two users is defined as the cosine similarity between the lists they are included as members (e.g., [here](https://twitter.com/jack/lists/memberships) are the lists that @jack is on). The same algorithm as CosineFollow is used.
|
||||
|
||||
**Follow2Vec** (essentially Word2Vec on user-user follow graph): We first train the Word2Vec model on follow sequence data to obtain users' embeddings and then find the most similar users based on the similarity of the embeddings. However, we need enough data for each user to learn a meaningful embedding for them, so we can only obtain embeddings for the top 10 million users (currently in production, testing 30 million users). Furthermore, Word2Vec model training is limited by memory and computation as it is trained on a single machine.
|
||||
|
||||
##### Cosine Similarity
|
||||
A crucial component in Sims is calculating cosine similarities between users based on a user-X (X can be a user, list, or other entities) bipartite graph. This problem is technically challenging and took several years of effort to solve.
|
||||
|
||||
The current implementation uses the algorithm proposed in [When hashes met wedges: A distributed algorithm for finding high similarity vectors. WWW 2017](https://arxiv.org/pdf/1703.01054.pdf)
|
||||
|
||||
### Sims Ranking
|
||||
After the candidate generation step, we can obtain dozens to hundreds of similar user candidates for each user. However, since these candidates come from different algorithms, we need a way to rank them. To do this, we collect user feedback.
|
||||
|
||||
We use the "Profile Sidebar Impressions & Follow" (a module with follow suggestions displayed when a user visits a profile page and scrolls down) to collect training data. To alleviate any system bias, we use 4% of traffic to show randomly shuffled candidates to users and collect positive (followed impression) and negative (impression only) data from this traffic. This data is used as an evaluation set. We use a portion of the remaining 96% of traffic for training data, filtering only for sets of impressions that had at least one follow, ensuring that the user taking action was paying attention to the impressions.
|
||||
|
||||
The examples are in the format of (profile_user, candidate_user, label). We add features for profile_users and candidate_users based on some high-level aggregated statistics in a feature dataset provided by the Customer Journey team, as well as features that represent the similarity between the profile_user and candidate_user.
|
||||
|
||||
We employ a multi-tower MLP model and optimize the logistic loss. The model is refreshed weekly using an ML workflow.
|
||||
|
||||
We recompute the candidates and rank them daily. The ranked results are published to the Manhattan dataset.
|
||||
|
@ -1,36 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.sims
|
||||
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.strato.generated.client.recommendations.similarity.SimilarUsersBySimsExperimentalOnUserClientColumn
|
||||
import com.twitter.util.Duration
|
||||
|
||||
import javax.inject.Inject
|
||||
|
||||
@Singleton
|
||||
class SimsExperimentalStore @Inject() (
|
||||
simsExperimentalOnUserClientColumn: SimilarUsersBySimsExperimentalOnUserClientColumn)
|
||||
extends StratoBasedSimsCandidateSourceWithUnitView(
|
||||
fetcher = simsExperimentalOnUserClientColumn.fetcher,
|
||||
identifier = SimsExperimentalStore.Identifier
|
||||
)
|
||||
|
||||
@Singleton
|
||||
class CachedSimsExperimentalStore @Inject() (
|
||||
simsExperimentalOnUserClientColumn: SimilarUsersBySimsExperimentalOnUserClientColumn,
|
||||
statsReceiver: StatsReceiver)
|
||||
extends CacheBasedSimsStore(
|
||||
id = SimsExperimentalStore.Identifier,
|
||||
fetcher = simsExperimentalOnUserClientColumn.fetcher,
|
||||
maxCacheSize = SimsExperimentalStore.MaxCacheSize,
|
||||
cacheTtl = SimsExperimentalStore.CacheTTL,
|
||||
statsReceiver = statsReceiver.scope("CachedSimsExperimentalStore", "cache")
|
||||
)
|
||||
|
||||
object SimsExperimentalStore {
|
||||
val Identifier = CandidateSourceIdentifier(Algorithm.Sims.toString)
|
||||
val MaxCacheSize = 1000
|
||||
val CacheTTL: Duration = Duration.fromHours(12)
|
||||
}
|
@ -1,14 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.sims
|
||||
|
||||
import com.twitter.follow_recommendations.configapi.common.FeatureSwitchConfig
|
||||
import com.twitter.timelines.configapi.FSName
|
||||
import com.twitter.timelines.configapi.FSParam
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class SimsSourceFSConfig @Inject() () extends FeatureSwitchConfig {
|
||||
override val booleanFSParams: Seq[FSParam[Boolean] with FSName] = Seq(
|
||||
SimsSourceParams.DisableHeavyRanker
|
||||
)
|
||||
}
|
@ -1,16 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.sims
|
||||
|
||||
import com.twitter.timelines.configapi.FSParam
|
||||
|
||||
object SimsSourceParams {
|
||||
case object EnableDBV2SimsStore extends FSParam[Boolean]("sims_source_enable_dbv2_source", false)
|
||||
|
||||
case object EnableDBV2SimsRefreshStore
|
||||
extends FSParam[Boolean]("sims_source_enable_dbv2_refresh_source", false)
|
||||
|
||||
case object EnableExperimentalSimsStore
|
||||
extends FSParam[Boolean]("sims_source_enable_experimental_source", false)
|
||||
|
||||
case object DisableHeavyRanker
|
||||
extends FSParam[Boolean]("sims_source_disable_heavy_ranker", default = false)
|
||||
}
|
@ -1,36 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.sims
|
||||
|
||||
import com.google.inject.Singleton
|
||||
import com.google.inject.name.Named
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.follow_recommendations.common.constants.GuiceNamedConstants
|
||||
import com.twitter.hermit.candidate.thriftscala.Candidates
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.strato.client.Fetcher
|
||||
import com.twitter.util.Duration
|
||||
|
||||
import javax.inject.Inject
|
||||
|
||||
@Singleton
|
||||
class SimsStore @Inject() (
|
||||
@Named(GuiceNamedConstants.SIMS_FETCHER) fetcher: Fetcher[Long, Unit, Candidates])
|
||||
extends StratoBasedSimsCandidateSourceWithUnitView(fetcher, identifier = SimsStore.Identifier)
|
||||
|
||||
@Singleton
|
||||
class CachedSimsStore @Inject() (
|
||||
@Named(GuiceNamedConstants.SIMS_FETCHER) fetcher: Fetcher[Long, Unit, Candidates],
|
||||
statsReceiver: StatsReceiver)
|
||||
extends CacheBasedSimsStore(
|
||||
id = SimsStore.Identifier,
|
||||
fetcher = fetcher,
|
||||
maxCacheSize = SimsStore.MaxCacheSize,
|
||||
cacheTtl = SimsStore.CacheTTL,
|
||||
statsReceiver = statsReceiver.scope("CachedSimsStore", "cache")
|
||||
)
|
||||
|
||||
object SimsStore {
|
||||
val Identifier = CandidateSourceIdentifier(Algorithm.Sims.toString)
|
||||
val MaxCacheSize = 50000
|
||||
val CacheTTL: Duration = Duration.fromHours(24)
|
||||
}
|
@ -1,40 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.sims
|
||||
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.base.StratoFetcherSource
|
||||
import com.twitter.follow_recommendations.common.models.AccountProof
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.follow_recommendations.common.models.Reason
|
||||
import com.twitter.follow_recommendations.common.models.SimilarToProof
|
||||
import com.twitter.hermit.candidate.thriftscala.Candidates
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.strato.client.Fetcher
|
||||
|
||||
abstract class StratoBasedSimsCandidateSource[U](
|
||||
fetcher: Fetcher[Long, U, Candidates],
|
||||
view: U,
|
||||
override val identifier: CandidateSourceIdentifier)
|
||||
extends StratoFetcherSource[Long, U, Candidates](fetcher, view, identifier) {
|
||||
|
||||
override def map(target: Long, candidates: Candidates): Seq[CandidateUser] =
|
||||
StratoBasedSimsCandidateSource.map(target, candidates)
|
||||
}
|
||||
|
||||
object StratoBasedSimsCandidateSource {
|
||||
def map(target: Long, candidates: Candidates): Seq[CandidateUser] = {
|
||||
for {
|
||||
candidate <- candidates.candidates
|
||||
} yield CandidateUser(
|
||||
id = candidate.userId,
|
||||
score = Some(candidate.score),
|
||||
reason = Some(
|
||||
Reason(
|
||||
Some(
|
||||
AccountProof(
|
||||
similarToProof = Some(SimilarToProof(Seq(target)))
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.sims
|
||||
|
||||
import com.twitter.hermit.candidate.thriftscala.Candidates
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.strato.client.Fetcher
|
||||
|
||||
abstract class StratoBasedSimsCandidateSourceWithUnitView(
|
||||
fetcher: Fetcher[Long, Unit, Candidates],
|
||||
override val identifier: CandidateSourceIdentifier)
|
||||
extends StratoBasedSimsCandidateSource[Unit](fetcher, Unit, identifier)
|
@ -1,55 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.sims
|
||||
|
||||
import com.twitter.finagle.stats.NullStatsReceiver
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.follow_recommendations.common.models.HasSimilarToContext
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class SwitchingSimsSource @Inject() (
|
||||
cachedDBV2SimsStore: CachedDBV2SimsStore,
|
||||
cachedDBV2SimsRefreshStore: CachedDBV2SimsRefreshStore,
|
||||
cachedSimsExperimentalStore: CachedSimsExperimentalStore,
|
||||
cachedSimsStore: CachedSimsStore,
|
||||
statsReceiver: StatsReceiver = NullStatsReceiver)
|
||||
extends CandidateSource[HasParams with HasSimilarToContext, CandidateUser] {
|
||||
|
||||
override val identifier: CandidateSourceIdentifier = SwitchingSimsSource.Identifier
|
||||
|
||||
private val stats = statsReceiver.scope("SwitchingSimsSource")
|
||||
private val dbV2SimsStoreCounter = stats.counter("DBV2SimsStore")
|
||||
private val dbV2SimsRefreshStoreCounter = stats.counter("DBV2SimsRefreshStore")
|
||||
private val simsExperimentalStoreCounter = stats.counter("SimsExperimentalStore")
|
||||
private val simsStoreCounter = stats.counter("SimsStore")
|
||||
|
||||
override def apply(request: HasParams with HasSimilarToContext): Stitch[Seq[CandidateUser]] = {
|
||||
val selectedSimsStore =
|
||||
if (request.params(SimsSourceParams.EnableDBV2SimsStore)) {
|
||||
dbV2SimsStoreCounter.incr()
|
||||
cachedDBV2SimsStore
|
||||
} else if (request.params(SimsSourceParams.EnableDBV2SimsRefreshStore)) {
|
||||
dbV2SimsRefreshStoreCounter.incr()
|
||||
cachedDBV2SimsRefreshStore
|
||||
} else if (request.params(SimsSourceParams.EnableExperimentalSimsStore)) {
|
||||
simsExperimentalStoreCounter.incr()
|
||||
cachedSimsExperimentalStore
|
||||
} else {
|
||||
simsStoreCounter.incr()
|
||||
cachedSimsStore
|
||||
}
|
||||
stats.counter("total").incr()
|
||||
selectedSimsStore(request)
|
||||
}
|
||||
}
|
||||
|
||||
object SwitchingSimsSource {
|
||||
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(Algorithm.Sims.toString)
|
||||
}
|
@ -1,23 +0,0 @@
|
||||
scala_library(
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/google/inject:guice",
|
||||
"3rdparty/jvm/com/google/inject/extensions:guice-assistedinject",
|
||||
"3rdparty/jvm/net/codingwell:scala-guice",
|
||||
"3rdparty/jvm/org/slf4j:slf4j-api",
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/base",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/sims",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/real_time_real_graph",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/socialgraph",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/strato",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/constants",
|
||||
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
|
||||
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/configapi/common",
|
||||
"strato/src/main/scala/com/twitter/strato/client",
|
||||
"util/util-slf4j-api/src/main/scala",
|
||||
],
|
||||
)
|
@ -1,22 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.sims_expansion
|
||||
|
||||
import com.twitter.timelines.configapi.FSBoundedParam
|
||||
import com.twitter.timelines.configapi.FSParam
|
||||
|
||||
object DBV2SimsExpansionParams {
|
||||
// Theses divisors are used to calibrate DBv2Sims extension candidates scores
|
||||
case object RecentFollowingSimilarUsersDBV2CalibrateDivisor
|
||||
extends FSBoundedParam[Double](
|
||||
"sims_expansion_recent_following_similar_users_dbv2_divisor",
|
||||
default = 1.0d,
|
||||
min = 0.1d,
|
||||
max = 100d)
|
||||
case object RecentEngagementSimilarUsersDBV2CalibrateDivisor
|
||||
extends FSBoundedParam[Double](
|
||||
"sims_expansion_recent_engagement_similar_users_dbv2_divisor",
|
||||
default = 1.0d,
|
||||
min = 0.1d,
|
||||
max = 100d)
|
||||
case object DisableHeavyRanker
|
||||
extends FSParam[Boolean]("sims_expansion_disable_heavy_ranker", default = false)
|
||||
}
|
@ -1,6 +0,0 @@
|
||||
# Sims Expansion Candidate Source
|
||||
provides similar accounts based on the Sims algorithm for a given set of accounts.
|
||||
|
||||
This is a 2nd-hop expansion, meaning that the input accounts could be a user's recently engaged, followed, or algorithm-generated (such as RealGraph) accounts.
|
||||
|
||||
For more information on Sims and how it is utilized in the Follow Recommendations Service, please refer to the `follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/sims/README.md` file.
|
@ -1,14 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.sims_expansion
|
||||
|
||||
import com.twitter.follow_recommendations.configapi.common.FeatureSwitchConfig
|
||||
import com.twitter.timelines.configapi.FSParam
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class RecentEngagementSimilarUsersFSConfig @Inject() () extends FeatureSwitchConfig {
|
||||
override val booleanFSParams: Seq[FSParam[Boolean]] = Seq(
|
||||
RecentEngagementSimilarUsersParams.FirstDegreeSortEnabled
|
||||
)
|
||||
}
|
@ -1,17 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.sims_expansion
|
||||
|
||||
import com.twitter.timelines.configapi.FSEnumParam
|
||||
import com.twitter.timelines.configapi.FSParam
|
||||
|
||||
object RecentEngagementSimilarUsersParams {
|
||||
|
||||
case object FirstDegreeSortEnabled
|
||||
extends FSParam[Boolean](
|
||||
name = "sims_expansion_recent_engagement_first_degree_sort",
|
||||
default = true)
|
||||
case object Aggregator
|
||||
extends FSEnumParam[SimsExpansionSourceAggregatorId.type](
|
||||
name = "sims_expansion_recent_engagement_aggregator_id",
|
||||
default = SimsExpansionSourceAggregatorId.Sum,
|
||||
enum = SimsExpansionSourceAggregatorId)
|
||||
}
|
@ -1,113 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.sims_expansion
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.sims.SwitchingSimsSource
|
||||
import com.twitter.follow_recommendations.common.clients.real_time_real_graph.RealTimeRealGraphClient
|
||||
import com.twitter.follow_recommendations.common.models.AccountProof
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.follow_recommendations.common.models.Reason
|
||||
import com.twitter.follow_recommendations.common.models.SimilarToProof
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class RecentEngagementSimilarUsersSource @Inject() (
|
||||
realTimeRealGraphClient: RealTimeRealGraphClient,
|
||||
switchingSimsSource: SwitchingSimsSource,
|
||||
statsReceiver: StatsReceiver)
|
||||
extends SimsExpansionBasedCandidateSource[HasClientContext with HasParams](
|
||||
switchingSimsSource) {
|
||||
override def maxSecondaryDegreeNodes(req: HasClientContext with HasParams): Int = Int.MaxValue
|
||||
|
||||
override def maxResults(req: HasClientContext with HasParams): Int =
|
||||
RecentEngagementSimilarUsersSource.MaxResults
|
||||
|
||||
override val identifier: CandidateSourceIdentifier = RecentEngagementSimilarUsersSource.Identifier
|
||||
private val stats = statsReceiver.scope(identifier.name)
|
||||
private val calibratedScoreCounter = stats.counter("calibrated_scores_counter")
|
||||
|
||||
override def scoreCandidate(sourceScore: Double, similarToScore: Double): Double = {
|
||||
sourceScore * similarToScore
|
||||
}
|
||||
|
||||
override def calibrateDivisor(req: HasClientContext with HasParams): Double = {
|
||||
req.params(DBV2SimsExpansionParams.RecentEngagementSimilarUsersDBV2CalibrateDivisor)
|
||||
}
|
||||
|
||||
override def calibrateScore(
|
||||
candidateScore: Double,
|
||||
req: HasClientContext with HasParams
|
||||
): Double = {
|
||||
calibratedScoreCounter.incr()
|
||||
candidateScore / calibrateDivisor(req)
|
||||
}
|
||||
|
||||
/**
|
||||
* fetch first degree nodes given request
|
||||
*/
|
||||
override def firstDegreeNodes(
|
||||
target: HasClientContext with HasParams
|
||||
): Stitch[Seq[CandidateUser]] = {
|
||||
target.getOptionalUserId
|
||||
.map { userId =>
|
||||
realTimeRealGraphClient
|
||||
.getUsersRecentlyEngagedWith(
|
||||
userId,
|
||||
RealTimeRealGraphClient.EngagementScoreMap,
|
||||
includeDirectFollowCandidates = true,
|
||||
includeNonDirectFollowCandidates = true
|
||||
).map(_.sortBy(-_.score.getOrElse(0.0d))
|
||||
.take(RecentEngagementSimilarUsersSource.MaxFirstDegreeNodes))
|
||||
}.getOrElse(Stitch.Nil)
|
||||
}
|
||||
|
||||
override def aggregateAndScore(
|
||||
request: HasClientContext with HasParams,
|
||||
firstDegreeToSecondDegreeNodesMap: Map[CandidateUser, Seq[SimilarUser]]
|
||||
): Stitch[Seq[CandidateUser]] = {
|
||||
|
||||
val inputNodes = firstDegreeToSecondDegreeNodesMap.keys.map(_.id).toSet
|
||||
val aggregator = request.params(RecentEngagementSimilarUsersParams.Aggregator) match {
|
||||
case SimsExpansionSourceAggregatorId.Max =>
|
||||
SimsExpansionBasedCandidateSource.ScoreAggregator.Max
|
||||
case SimsExpansionSourceAggregatorId.Sum =>
|
||||
SimsExpansionBasedCandidateSource.ScoreAggregator.Sum
|
||||
case SimsExpansionSourceAggregatorId.MultiDecay =>
|
||||
SimsExpansionBasedCandidateSource.ScoreAggregator.MultiDecay
|
||||
}
|
||||
|
||||
val groupedCandidates = firstDegreeToSecondDegreeNodesMap.values.flatten
|
||||
.filterNot(c => inputNodes.contains(c.candidateId))
|
||||
.groupBy(_.candidateId)
|
||||
.map {
|
||||
case (id, candidates) =>
|
||||
// Different aggregators for final score
|
||||
val finalScore = aggregator(candidates.map(_.score).toSeq)
|
||||
val proofs = candidates.map(_.similarTo).toSet
|
||||
|
||||
CandidateUser(
|
||||
id = id,
|
||||
score = Some(finalScore),
|
||||
reason =
|
||||
Some(Reason(Some(AccountProof(similarToProof = Some(SimilarToProof(proofs.toSeq))))))
|
||||
).withCandidateSource(identifier)
|
||||
}
|
||||
.toSeq
|
||||
.sortBy(-_.score.getOrElse(0.0d))
|
||||
.take(maxResults(request))
|
||||
|
||||
Stitch.value(groupedCandidates)
|
||||
}
|
||||
}
|
||||
|
||||
object RecentEngagementSimilarUsersSource {
|
||||
val Identifier = CandidateSourceIdentifier(Algorithm.RecentEngagementSimilarUser.toString)
|
||||
val MaxFirstDegreeNodes = 10
|
||||
val MaxResults = 200
|
||||
}
|
@ -1,29 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.sims_expansion
|
||||
|
||||
import com.twitter.timelines.configapi.FSBoundedParam
|
||||
import com.twitter.timelines.configapi.FSParam
|
||||
|
||||
object RecentFollowingSimilarUsersParams {
|
||||
case object MaxFirstDegreeNodes
|
||||
extends FSBoundedParam[Int](
|
||||
name = "sims_expansion_recent_following_max_first_degree_nodes",
|
||||
default = 10,
|
||||
min = 0,
|
||||
max = 200)
|
||||
case object MaxSecondaryDegreeExpansionPerNode
|
||||
extends FSBoundedParam[Int](
|
||||
name = "sims_expansion_recent_following_max_secondary_degree_nodes",
|
||||
default = 40,
|
||||
min = 0,
|
||||
max = 200)
|
||||
case object MaxResults
|
||||
extends FSBoundedParam[Int](
|
||||
name = "sims_expansion_recent_following_max_results",
|
||||
default = 200,
|
||||
min = 0,
|
||||
max = 200)
|
||||
case object TimestampIntegrated
|
||||
extends FSParam[Boolean](
|
||||
name = "sims_expansion_recent_following_integ_timestamp",
|
||||
default = false)
|
||||
}
|
@ -1,99 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.sims_expansion
|
||||
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.sims.SwitchingSimsSource
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.follow_recommendations.common.models.HasRecentFollowedUserIds
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.follow_recommendations.common.clients.socialgraph.SocialGraphClient
|
||||
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
|
||||
import javax.inject.Inject
|
||||
|
||||
object RecentFollowingSimilarUsersSource {
|
||||
|
||||
val Identifier = CandidateSourceIdentifier(Algorithm.NewFollowingSimilarUser.toString)
|
||||
}
|
||||
|
||||
@Singleton
|
||||
class RecentFollowingSimilarUsersSource @Inject() (
|
||||
socialGraph: SocialGraphClient,
|
||||
switchingSimsSource: SwitchingSimsSource,
|
||||
statsReceiver: StatsReceiver)
|
||||
extends SimsExpansionBasedCandidateSource[
|
||||
HasParams with HasRecentFollowedUserIds with HasClientContext
|
||||
](switchingSimsSource) {
|
||||
|
||||
val identifier = RecentFollowingSimilarUsersSource.Identifier
|
||||
private val stats = statsReceiver.scope(identifier.name)
|
||||
private val maxResultsStats = stats.scope("max_results")
|
||||
private val calibratedScoreCounter = stats.counter("calibrated_scores_counter")
|
||||
|
||||
override def firstDegreeNodes(
|
||||
request: HasParams with HasRecentFollowedUserIds with HasClientContext
|
||||
): Stitch[Seq[CandidateUser]] = {
|
||||
if (request.params(RecentFollowingSimilarUsersParams.TimestampIntegrated)) {
|
||||
val recentFollowedUserIdsWithTimeStitch =
|
||||
socialGraph.getRecentFollowedUserIdsWithTime(request.clientContext.userId.get)
|
||||
|
||||
recentFollowedUserIdsWithTimeStitch.map { results =>
|
||||
val first_degree_nodes = results
|
||||
.sortBy(-_.timeInMs).take(
|
||||
request.params(RecentFollowingSimilarUsersParams.MaxFirstDegreeNodes))
|
||||
val max_timestamp = first_degree_nodes.head.timeInMs
|
||||
first_degree_nodes.map {
|
||||
case userIdWithTime =>
|
||||
CandidateUser(
|
||||
userIdWithTime.userId,
|
||||
score = Some(userIdWithTime.timeInMs.toDouble / max_timestamp))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Stitch.value(
|
||||
request.recentFollowedUserIds
|
||||
.getOrElse(Nil).take(
|
||||
request.params(RecentFollowingSimilarUsersParams.MaxFirstDegreeNodes)).map(
|
||||
CandidateUser(_, score = Some(1.0)))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
override def maxSecondaryDegreeNodes(
|
||||
req: HasParams with HasRecentFollowedUserIds with HasClientContext
|
||||
): Int = {
|
||||
req.params(RecentFollowingSimilarUsersParams.MaxSecondaryDegreeExpansionPerNode)
|
||||
}
|
||||
|
||||
override def maxResults(
|
||||
req: HasParams with HasRecentFollowedUserIds with HasClientContext
|
||||
): Int = {
|
||||
val firstDegreeNodes = req.params(RecentFollowingSimilarUsersParams.MaxFirstDegreeNodes)
|
||||
val maxResultsNum = req.params(RecentFollowingSimilarUsersParams.MaxResults)
|
||||
maxResultsStats
|
||||
.stat(
|
||||
s"RecentFollowingSimilarUsersSource_firstDegreeNodes_${firstDegreeNodes}_maxResults_${maxResultsNum}")
|
||||
.add(1)
|
||||
maxResultsNum
|
||||
}
|
||||
|
||||
override def scoreCandidate(sourceScore: Double, similarToScore: Double): Double = {
|
||||
sourceScore * similarToScore
|
||||
}
|
||||
|
||||
override def calibrateDivisor(
|
||||
req: HasParams with HasRecentFollowedUserIds with HasClientContext
|
||||
): Double = {
|
||||
req.params(DBV2SimsExpansionParams.RecentFollowingSimilarUsersDBV2CalibrateDivisor)
|
||||
}
|
||||
|
||||
override def calibrateScore(
|
||||
candidateScore: Double,
|
||||
req: HasParams with HasRecentFollowedUserIds with HasClientContext
|
||||
): Double = {
|
||||
calibratedScoreCounter.incr()
|
||||
candidateScore / calibrateDivisor(req)
|
||||
}
|
||||
}
|
@ -1,53 +0,0 @@
|
||||
package com.twitter.follow_recommendations.common.candidate_sources.sims_expansion
|
||||
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.follow_recommendations.common.candidate_sources.sims.SwitchingSimsSource
|
||||
import com.twitter.follow_recommendations.common.clients.real_time_real_graph.RealTimeRealGraphClient
|
||||
import com.twitter.follow_recommendations.common.models.CandidateUser
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
|
||||
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
|
||||
import com.twitter.stitch.Stitch
|
||||
import com.twitter.timelines.configapi.HasParams
|
||||
|
||||
import javax.inject.Inject
|
||||
|
||||
@Singleton
|
||||
class RecentStrongEngagementDirectFollowSimilarUsersSource @Inject() (
|
||||
realTimeRealGraphClient: RealTimeRealGraphClient,
|
||||
switchingSimsSource: SwitchingSimsSource)
|
||||
extends SimsExpansionBasedCandidateSource[HasClientContext with HasParams](
|
||||
switchingSimsSource) {
|
||||
|
||||
val identifier = RecentStrongEngagementDirectFollowSimilarUsersSource.Identifier
|
||||
|
||||
override def firstDegreeNodes(
|
||||
request: HasClientContext with HasParams
|
||||
): Stitch[Seq[CandidateUser]] = request.getOptionalUserId
|
||||
.map { userId =>
|
||||
realTimeRealGraphClient
|
||||
.getUsersRecentlyEngagedWith(
|
||||
userId,
|
||||
RealTimeRealGraphClient.StrongEngagementScoreMap,
|
||||
includeDirectFollowCandidates = true,
|
||||
includeNonDirectFollowCandidates = false
|
||||
).map(_.take(RecentStrongEngagementDirectFollowSimilarUsersSource.MaxFirstDegreeNodes))
|
||||
}.getOrElse(Stitch.Nil)
|
||||
|
||||
override def maxSecondaryDegreeNodes(request: HasClientContext with HasParams): Int = Int.MaxValue
|
||||
|
||||
override def maxResults(request: HasClientContext with HasParams): Int =
|
||||
RecentStrongEngagementDirectFollowSimilarUsersSource.MaxResults
|
||||
|
||||
override def scoreCandidate(sourceScore: Double, similarToScore: Double): Double = {
|
||||
sourceScore * similarToScore
|
||||
}
|
||||
|
||||
override def calibrateDivisor(req: HasClientContext with HasParams): Double = 1.0d
|
||||
}
|
||||
|
||||
object RecentStrongEngagementDirectFollowSimilarUsersSource {
|
||||
val Identifier = CandidateSourceIdentifier(Algorithm.RecentStrongEngagementSimilarUser.toString)
|
||||
val MaxFirstDegreeNodes = 10
|
||||
val MaxResults = 200
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user