Delete follow-recommendations-service directory

This commit is contained in:
dogemanttv 2024-01-10 17:06:57 -06:00 committed by GitHub
parent d0717e339b
commit 14f78e176b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
553 changed files with 0 additions and 27180 deletions

View File

@ -1,48 +0,0 @@
# Without this alias, library :follow-recommendations-service_lib would conflict with :bin
alias(
name = "follow-recommendations-service",
target = ":follow-recommendations-service_lib",
)
target(
name = "follow-recommendations-service_lib",
dependencies = [
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations",
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/models",
],
)
jvm_binary(
name = "bin",
basename = "follow-recommendations-service",
main = "com.twitter.follow_recommendations.FollowRecommendationsServiceThriftServerMain",
runtime_platform = "java11",
tags = ["bazel-compatible"],
dependencies = [
":follow-recommendations-service",
"3rdparty/jvm/ch/qos/logback:logback-classic",
"finagle/finagle-zipkin-scribe/src/main/scala",
"finatra/inject/inject-logback/src/main/scala",
"loglens/loglens-logback/src/main/scala/com/twitter/loglens/logback",
"twitter-server-internal/src/main/scala",
"twitter-server/logback-classic/src/main/scala",
],
)
# Aurora Workflows build phase convention requires a jvm_app named with ${project-name}-app
jvm_app(
name = "follow-recommendations-service-app",
archive = "zip",
binary = ":bin",
bundles = [
bundle(
fileset = [
"server/src/main/resources/*",
"server/src/main/resources/**/*",
],
owning_target = "follow-recommendations-service/server/src/main/resources:frs_resources",
relative_to = "server/src/main/resources",
),
],
tags = ["bazel-compatible"],
)

View File

@ -1,24 +0,0 @@
[code-coverage]
package = com.twitter.follow_recommendations
[docbird]
project_name = follow-recommendations-service
project_type = service
; example settings:
;
; project_name = fluffybird
; description = fluffybird is a service for fluffing up feathers.
; tags = python,documentation,fluffybird
; project_type = service
; - allowed options: essay, library, service, hub, cookbook, styleguide, policy
; owner_links = roster
; - allowed options: roster, find, email
; scrolling_tocs = yes
; comments = yes
; verifications = yes
; support_widget = yes
; health_score = yes
; sticky_sidebar = no
[jira]
project = CJREL

Binary file not shown.

Before

Width:  |  Height:  |  Size: 178 KiB

View File

@ -1,40 +0,0 @@
# Follow Recommendations Service
## Introduction to the Follow Recommendations Service (FRS)
The Follow Recommendations Service (FRS) is a robust recommendation engine designed to provide users with personalized suggestions for accounts to follow. At present, FRS supports Who-To-Follow (WTF) module recommendations across a variety of Twitter product interfaces. Additionally, by suggesting tweet authors, FRS also delivers FutureGraph tweet recommendations, which consist of tweets from accounts that users may be interested in following in the future.
## Design
The system is tailored to accommodate diverse use cases, such as Post New-User-Experience (NUX), advertisements, FutureGraph tweets, and more. Each use case features a unique display location identifier. To view all display locations, refer to the following path: `follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models/DisplayLocation.scala`.
Recommendation steps are customized according to each display location. Common and high-level steps are encapsulated within the "RecommendationFlow," which includes operations like candidate generation, ranker selection, filtering, transformation, and beyond. To explore all flows, refer to this path: `follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/flows`.
For each product (corresponding to a display location), one or multiple flows can be selected to generate candidates based on code and configurations. To view all products, refer to the following path: `follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/products/home_timeline_tweet_recs`.
The FRS overview diagram is depicted below:
![FRS_architecture.png](FRS_architecture.png)
### Candidate Generation
During this step, FRS utilizes various user signals and algorithms to identify candidates from all Twitter accounts. The candidate source folder is located at `follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/`, with a README file provided within each candidate source folder.
### Filtering
In this phase, FRS applies different filtering logic after generating account candidates to improve quality and health. Filtering may occur before and/or after the ranking step, with heavier filtering logic (e.g., higher latency) typically applied after the ranking step. The filters' folder is located at `follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/predicates`.
### Ranking
During this step, FRS employs both Machine Learning (ML) and heuristic rule-based candidate ranking. For the ML ranker, ML features are fetched beforehand (i.e., feature hydration),
and a DataRecord (the Twitter-standard Machine Learning data format used to represent feature data, labels, and predictions when training or serving) is constructed for each <user, candidate> pair.
These pairs are then sent to a separate ML prediction service, which houses the ML model trained offline.
The ML prediction service returns a prediction score, representing the probability that a user will follow and engage with the candidate.
This score is a weighted sum of p(follow|recommendation) and p(positive engagement|follow), and FRS uses this score to rank the candidates.
The rankers' folder is located at `follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/rankers`.
### Transform
In this phase, the sequence of candidates undergoes necessary transformations, such as deduplication, attaching social proof (i.e., "followed by XX user"), adding tracking tokens, and more.
The transformers' folder can be found at `follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/transforms`.
### Truncation
During this final step, FRS trims the candidate pool to a specified size. This process ensures that only the most relevant and engaging candidates are presented to users while maintaining an optimal user experience.
By implementing these comprehensive steps and adapting to various use cases, the Follow Recommendations Service (FRS) effectively curates tailored suggestions for Twitter users, enhancing their overall experience and promoting meaningful connections within the platform.

View File

@ -1,18 +0,0 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/google/guava",
"configapi/configapi-core/src/main/scala/com/twitter/timelines/configapi",
"finagle/finagle-core/src/main",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
"hermit/hermit-core/src/main/scala/com/twitter/hermit/model",
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/candidate_source",
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/pipeline/recommendation",
"stitch/stitch-core",
],
exports = [
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/pipeline/recommendation",
],
)

View File

@ -1,36 +0,0 @@
package com.twitter.follow_recommendations.common.base
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.follow_recommendations.common.base.EnrichedCandidateSource.toEnriched
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
// a helper structure to register and select candidate sources based on identifiers
trait CandidateSourceRegistry[Target, Candidate] {
val statsReceiver: StatsReceiver
def sources: Set[CandidateSource[Target, Candidate]]
final lazy val candidateSources: Map[
CandidateSourceIdentifier,
CandidateSource[Target, Candidate]
] = {
val map = sources.map { c =>
c.identifier -> c.observe(statsReceiver)
}.toMap
if (map.size != sources.size) {
throw new IllegalArgumentException("Duplicate Candidate Source Identifiers")
}
map
}
def select(
identifiers: Set[CandidateSourceIdentifier]
): Set[CandidateSource[Target, Candidate]] = {
// fails loud if the candidate source is not registered
identifiers.map(candidateSources(_))
}
}

View File

@ -1,164 +0,0 @@
package com.twitter.follow_recommendations.common.base
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.stitch.Stitch
import com.twitter.util.Duration
import com.twitter.util.TimeoutException
import scala.language.implicitConversions
class EnrichedCandidateSource[Target, Candidate](original: CandidateSource[Target, Candidate]) {
/**
* Gate the candidate source based on the Predicate of target.
* It returns results only if the predicate returns Valid.
*
* @param predicate
* @return
*/
def gate(predicate: Predicate[Target]): CandidateSource[Target, Candidate] = {
throw new UnsupportedOperationException()
}
def observe(statsReceiver: StatsReceiver): CandidateSource[Target, Candidate] = {
val originalIdentifier = original.identifier
val stats = statsReceiver.scope(originalIdentifier.name)
new CandidateSource[Target, Candidate] {
val identifier = originalIdentifier
override def apply(target: Target): Stitch[Seq[Candidate]] = {
StatsUtil.profileStitchSeqResults[Candidate](original(target), stats)
}
}
}
/**
* Map target type into new target type (1 to optional mapping)
*/
def stitchMapKey[Target2](
targetMapper: Target2 => Stitch[Option[Target]]
): CandidateSource[Target2, Candidate] = {
val targetsMapper: Target2 => Stitch[Seq[Target]] = { target =>
targetMapper(target).map(_.toSeq)
}
stitchMapKeys(targetsMapper)
}
/**
* Map target type into new target type (1 to many mapping)
*/
def stitchMapKeys[Target2](
targetMapper: Target2 => Stitch[Seq[Target]]
): CandidateSource[Target2, Candidate] = {
new CandidateSource[Target2, Candidate] {
val identifier = original.identifier
override def apply(target: Target2): Stitch[Seq[Candidate]] = {
for {
mappedTargets <- targetMapper(target)
results <- Stitch.traverse(mappedTargets)(original(_))
} yield results.flatten
}
}
}
/**
* Map target type into new target type (1 to many mapping)
*/
def mapKeys[Target2](
targetMapper: Target2 => Seq[Target]
): CandidateSource[Target2, Candidate] = {
val stitchMapper: Target2 => Stitch[Seq[Target]] = { target =>
Stitch.value(targetMapper(target))
}
stitchMapKeys(stitchMapper)
}
/**
* Map candidate types to new type based on candidateMapper
*/
def mapValues[Candidate2](
candidateMapper: Candidate => Stitch[Option[Candidate2]]
): CandidateSource[Target, Candidate2] = {
new CandidateSource[Target, Candidate2] {
val identifier = original.identifier
override def apply(target: Target): Stitch[Seq[Candidate2]] = {
original(target).flatMap { candidates =>
val results = Stitch.traverse(candidates)(candidateMapper(_))
results.map(_.flatten)
}
}
}
}
/**
* Map candidate types to new type based on candidateMapper
*/
def mapValue[Candidate2](
candidateMapper: Candidate => Candidate2
): CandidateSource[Target, Candidate2] = {
val stitchMapper: Candidate => Stitch[Option[Candidate2]] = { c =>
Stitch.value(Some(candidateMapper(c)))
}
mapValues(stitchMapper)
}
/**
* This method wraps the candidate source in a designated timeout so that a single candidate
* source does not result in a timeout for the entire flow
*/
def within(
candidateTimeout: Duration,
statsReceiver: StatsReceiver
): CandidateSource[Target, Candidate] = {
val originalIdentifier = original.identifier
val timeoutCounter =
statsReceiver.counter(originalIdentifier.name, "timeout")
new CandidateSource[Target, Candidate] {
val identifier = originalIdentifier
override def apply(target: Target): Stitch[Seq[Candidate]] = {
original
.apply(target)
.within(candidateTimeout)(com.twitter.finagle.util.DefaultTimer)
.rescue {
case _: TimeoutException =>
timeoutCounter.incr()
Stitch.Nil
}
}
}
}
def failOpenWithin(
candidateTimeout: Duration,
statsReceiver: StatsReceiver
): CandidateSource[Target, Candidate] = {
val originalIdentifier = original.identifier
val timeoutCounter =
statsReceiver.counter(originalIdentifier.name, "timeout")
new CandidateSource[Target, Candidate] {
val identifier = originalIdentifier
override def apply(target: Target): Stitch[Seq[Candidate]] = {
original
.apply(target)
.within(candidateTimeout)(com.twitter.finagle.util.DefaultTimer)
.handle {
case _: TimeoutException =>
timeoutCounter.incr()
Seq.empty
case e: Exception =>
statsReceiver
.scope("candidate_source_error").scope(originalIdentifier.name).counter(
e.getClass.getSimpleName).incr
Seq.empty
}
}
}
}
}
object EnrichedCandidateSource {
implicit def toEnriched[K, V](original: CandidateSource[K, V]): EnrichedCandidateSource[K, V] =
new EnrichedCandidateSource(original)
}

View File

@ -1,17 +0,0 @@
package com.twitter.follow_recommendations.common.base
import com.twitter.follow_recommendations.common.models.FilterReason.ParamReason
import com.twitter.stitch.Stitch
import com.twitter.timelines.configapi.HasParams
import com.twitter.timelines.configapi.Param
case class ParamPredicate[Request <: HasParams](param: Param[Boolean]) extends Predicate[Request] {
def apply(request: Request): Stitch[PredicateResult] = {
if (request.params(param)) {
Stitch.value(PredicateResult.Valid)
} else {
Stitch.value(PredicateResult.Invalid(Set(ParamReason(param.statName))))
}
}
}

View File

@ -1,282 +0,0 @@
package com.twitter.follow_recommendations.common.base
import com.twitter.finagle.stats.NullStatsReceiver
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.follow_recommendations.common.models.FilterReason
import com.twitter.stitch.Arrow
import com.twitter.stitch.Stitch
trait Predicate[-Q] {
def apply(item: Q): Stitch[PredicateResult]
def arrow: Arrow[Q, PredicateResult] = Arrow.apply(apply)
def map[K](mapper: K => Q): Predicate[K] = Predicate(arrow.contramap(mapper))
/**
* check the predicate results for a batch of items for convenience.
*
* mark it as final to avoid potential abuse usage
*/
final def batch(items: Seq[Q]): Stitch[Seq[PredicateResult]] = {
this.arrow.traverse(items)
}
/**
* Syntax sugar for functions which take in 2 inputs as a tuple.
*/
def apply[Q1, Q2](item1: Q1, item2: Q2)(implicit ev: ((Q1, Q2)) => Q): Stitch[PredicateResult] = {
apply((item1, item2))
}
/**
* Runs the predicates in sequence. The returned predicate will return true iff both the predicates return true.
* ie. it is an AND operation
*
* We short-circuit the evaluation, ie we don't evaluate the 2nd predicate if the 1st is false
*
* @param p predicate to run in sequence
*
* @return a new predicate object that represents the logical AND of both predicates
*/
def andThen[Q1 <: Q](p: Predicate[Q1]): Predicate[Q1] = {
Predicate({ query: Q1 =>
apply(query).flatMap {
case PredicateResult.Valid => p(query)
case PredicateResult.Invalid(reasons) => Stitch.value(PredicateResult.Invalid(reasons))
}
})
}
/**
* Creates a predicate which runs the current & given predicate in sequence.
* The returned predicate will return true if either current or given predicate returns true.
* That is, given predicate will be only run if current predicate returns false.
*
* @param p predicate to run in sequence
*
* @return new predicate object that represents the logical OR of both predicates.
* if both are invalid, the reason would be the set of all invalid reasons.
*/
def or[Q1 <: Q](p: Predicate[Q1]): Predicate[Q1] = {
Predicate({ query: Q1 =>
apply(query).flatMap {
case PredicateResult.Valid => Stitch.value(PredicateResult.Valid)
case PredicateResult.Invalid(reasons) =>
p(query).flatMap {
case PredicateResult.Valid => Stitch.value(PredicateResult.Valid)
case PredicateResult.Invalid(newReasons) =>
Stitch.value(PredicateResult.Invalid(reasons ++ newReasons))
}
}
})
}
/*
* Runs the predicate only if the provided predicate is valid, otherwise returns valid.
* */
def gate[Q1 <: Q](gatingPredicate: Predicate[Q1]): Predicate[Q1] = {
Predicate { query: Q1 =>
gatingPredicate(query).flatMap { result =>
if (result == PredicateResult.Valid) {
apply(query)
} else {
Stitch.value(PredicateResult.Valid)
}
}
}
}
def observe(statsReceiver: StatsReceiver): Predicate[Q] = Predicate(
StatsUtil.profilePredicateResult(this.arrow, statsReceiver))
def convertToFailOpenWithResultType(resultType: PredicateResult): Predicate[Q] = {
Predicate { query: Q =>
apply(query).handle {
case _: Exception =>
resultType
}
}
}
}
class TruePredicate[Q] extends Predicate[Q] {
override def apply(item: Q): Stitch[PredicateResult] = Predicate.AlwaysTrueStitch
}
class FalsePredicate[Q](reason: FilterReason) extends Predicate[Q] {
val InvalidResult = Stitch.value(PredicateResult.Invalid(Set(reason)))
override def apply(item: Q): Stitch[PredicateResult] = InvalidResult
}
object Predicate {
val AlwaysTrueStitch = Stitch.value(PredicateResult.Valid)
val NumBatchesStat = "num_batches_stats"
val NumBatchesCount = "num_batches"
def apply[Q](func: Q => Stitch[PredicateResult]): Predicate[Q] = new Predicate[Q] {
override def apply(item: Q): Stitch[PredicateResult] = func(item)
override val arrow: Arrow[Q, PredicateResult] = Arrow(func)
}
def apply[Q](outerArrow: Arrow[Q, PredicateResult]): Predicate[Q] = new Predicate[Q] {
override def apply(item: Q): Stitch[PredicateResult] = arrow(item)
override val arrow: Arrow[Q, PredicateResult] = outerArrow
}
/**
* Given some items, this function
* 1. chunks them up in groups
* 2. lazily applies a predicate on each group
* 3. filters based on the predicate
* 4. takes first numToTake items.
*
* If numToTake is satisfied, then any later predicates are not called.
*
* @param items items of type Q
* @param predicate predicate that determines whether an item is acceptable
* @param batchSize batch size to call the predicate with
* @param numToTake max number of items to return
* @param stats stats receiver
* @tparam Q type of item
*
* @return a future of K items
*/
def batchFilterTake[Q](
items: Seq[Q],
predicate: Predicate[Q],
batchSize: Int,
numToTake: Int,
stats: StatsReceiver
): Stitch[Seq[Q]] = {
def take(
input: Iterator[Stitch[Seq[Q]]],
prev: Seq[Q],
takeSize: Int,
numOfBatch: Int
): Stitch[(Seq[Q], Int)] = {
if (input.hasNext) {
val currFut = input.next()
currFut.flatMap { curr =>
val taken = curr.take(takeSize)
val combined = prev ++ taken
if (taken.size < takeSize)
take(input, combined, takeSize - taken.size, numOfBatch + 1)
else Stitch.value((combined, numOfBatch + 1))
}
} else {
Stitch.value((prev, numOfBatch))
}
}
val batchedItems = items.view.grouped(batchSize)
val batchedFutures = batchedItems.map { batch =>
Stitch.traverse(batch)(predicate.apply).map { conds =>
(batch.zip(conds)).withFilter(_._2.value).map(_._1)
}
}
take(batchedFutures, Nil, numToTake, 0).map {
case (filtered: Seq[Q], numOfBatch: Int) =>
stats.stat(NumBatchesStat).add(numOfBatch)
stats.counter(NumBatchesCount).incr(numOfBatch)
filtered
}
}
/**
* filter a list of items based on the predicate
*
* @param items a list of items
* @param predicate predicate of the item
* @tparam Q item type
* @return the list of items that satisfy the predicate
*/
def filter[Q](items: Seq[Q], predicate: Predicate[Q]): Stitch[Seq[Q]] = {
predicate.batch(items).map { results =>
items.zip(results).collect {
case (item, PredicateResult.Valid) => item
}
}
}
/**
* filter a list of items based on the predicate given the target
*
* @param target target item
* @param items a list of items
* @param predicate predicate of the (target, item) pair
* @tparam Q item type
* @return the list of items that satisfy the predicate given the target
*/
def filter[T, Q](target: T, items: Seq[Q], predicate: Predicate[(T, Q)]): Stitch[Seq[Q]] = {
predicate.batch(items.map(i => (target, i))).map { results =>
items.zip(results).collect {
case (item, PredicateResult.Valid) => item
}
}
}
/**
* Returns a predicate, where an element is true iff it that element is true for all input predicates.
* ie. it is an AND operation
*
* This is done concurrently.
*
* @param predicates list of predicates
* @tparam Q Type parameter
*
* @return new predicate object that is the logical "and" of the input predicates
*/
def andConcurrently[Q](predicates: Seq[Predicate[Q]]): Predicate[Q] = {
Predicate { query: Q =>
Stitch.traverse(predicates)(p => p(query)).map { predicateResults =>
val allInvalid = predicateResults
.collect {
case PredicateResult.Invalid(reason) =>
reason
}
if (allInvalid.isEmpty) {
PredicateResult.Valid
} else {
val allInvalidReasons = allInvalid.reduce(_ ++ _)
PredicateResult.Invalid(allInvalidReasons)
}
}
}
}
}
/**
* applies the underlying predicate when the param is on.
*/
abstract class GatedPredicateBase[Q](
underlyingPredicate: Predicate[Q],
stats: StatsReceiver = NullStatsReceiver)
extends Predicate[Q] {
def gate(item: Q): Boolean
val underlyingPredicateTotal = stats.counter("underlying_total")
val underlyingPredicateValid = stats.counter("underlying_valid")
val underlyingPredicateInvalid = stats.counter("underlying_invalid")
val notGatedCounter = stats.counter("not_gated")
val ValidStitch: Stitch[PredicateResult.Valid.type] = Stitch.value(PredicateResult.Valid)
override def apply(item: Q): Stitch[PredicateResult] = {
if (gate(item)) {
underlyingPredicateTotal.incr()
underlyingPredicate(item)
} else {
notGatedCounter.incr()
ValidStitch
}
}
}

View File

@ -1,18 +0,0 @@
package com.twitter.follow_recommendations.common.base
import com.twitter.follow_recommendations.common.models.FilterReason
sealed trait PredicateResult {
def value: Boolean
}
object PredicateResult {
case object Valid extends PredicateResult {
override val value = true
}
case class Invalid(reasons: Set[FilterReason] = Set.empty[FilterReason]) extends PredicateResult {
override val value = false
}
}

View File

@ -1,90 +0,0 @@
package com.twitter.follow_recommendations.common.base
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.stitch.Stitch
import com.twitter.util.Duration
import com.twitter.util.TimeoutException
/**
* Ranker is a special kind of transform that would only change the order of a list of items.
* If a single item is given, it "may" attach additional scoring information to the item.
*
* @tparam Target target to recommend the candidates
* @tparam Candidate candidate type to rank
*/
trait Ranker[Target, Candidate] extends Transform[Target, Candidate] { ranker =>
def rank(target: Target, candidates: Seq[Candidate]): Stitch[Seq[Candidate]]
override def transform(target: Target, candidates: Seq[Candidate]): Stitch[Seq[Candidate]] = {
rank(target, candidates)
}
override def observe(statsReceiver: StatsReceiver): Ranker[Target, Candidate] = {
val originalRanker = this
new Ranker[Target, Candidate] {
override def rank(target: Target, items: Seq[Candidate]): Stitch[Seq[Candidate]] = {
statsReceiver.counter(Transform.InputCandidatesCount).incr(items.size)
statsReceiver.stat(Transform.InputCandidatesStat).add(items.size)
StatsUtil.profileStitchSeqResults(originalRanker.rank(target, items), statsReceiver)
}
}
}
def reverse: Ranker[Target, Candidate] = new Ranker[Target, Candidate] {
def rank(target: Target, candidates: Seq[Candidate]): Stitch[Seq[Candidate]] =
ranker.rank(target, candidates).map(_.reverse)
}
def andThen(other: Ranker[Target, Candidate]): Ranker[Target, Candidate] = {
val original = this
new Ranker[Target, Candidate] {
def rank(target: Target, candidates: Seq[Candidate]): Stitch[Seq[Candidate]] = {
original.rank(target, candidates).flatMap { results => other.rank(target, results) }
}
}
}
/**
* This method wraps the Ranker in a designated timeout.
* If the ranker timeouts, it would return the original candidates directly,
* instead of failing the whole recommendation flow
*/
def within(timeout: Duration, statsReceiver: StatsReceiver): Ranker[Target, Candidate] = {
val timeoutCounter = statsReceiver.counter("timeout")
val original = this
new Ranker[Target, Candidate] {
override def rank(target: Target, candidates: Seq[Candidate]): Stitch[Seq[Candidate]] = {
original
.rank(target, candidates)
.within(timeout)(com.twitter.finagle.util.DefaultTimer)
.rescue {
case _: TimeoutException =>
timeoutCounter.incr()
Stitch.value(candidates)
}
}
}
}
}
object Ranker {
def chain[Target, Candidate](
transformer: Transform[Target, Candidate],
ranker: Ranker[Target, Candidate]
): Ranker[Target, Candidate] = {
new Ranker[Target, Candidate] {
def rank(target: Target, candidates: Seq[Candidate]): Stitch[Seq[Candidate]] = {
transformer
.transform(target, candidates)
.flatMap { results => ranker.rank(target, results) }
}
}
}
}
class IdentityRanker[Target, Candidate] extends Ranker[Target, Candidate] {
def rank(target: Target, candidates: Seq[Candidate]): Stitch[Seq[Candidate]] =
Stitch.value(candidates)
}

View File

@ -1,250 +0,0 @@
package com.twitter.follow_recommendations.common.base
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.UniversalNoun
import com.twitter.product_mixer.core.model.common.identifier.RecommendationPipelineIdentifier
import com.twitter.product_mixer.core.pipeline.recommendation.RecommendationPipelineResult
import com.twitter.product_mixer.core.quality_factor.QualityFactorObserver
import com.twitter.stitch.Stitch
/**
* configs for results generated from the recommendation flow
*
* @param desiredCandidateCount num of desired candidates to return
* @param batchForCandidatesCheck batch size for candidates check
*/
case class RecommendationResultsConfig(desiredCandidateCount: Int, batchForCandidatesCheck: Int)
trait BaseRecommendationFlow[Target, Candidate <: UniversalNoun[Long]] {
val identifier = RecommendationPipelineIdentifier("RecommendationFlow")
def process(
pipelineRequest: Target
): Stitch[RecommendationPipelineResult[Candidate, Seq[Candidate]]]
def mapKey[Target2](fn: Target2 => Target): BaseRecommendationFlow[Target2, Candidate] = {
val original = this
new BaseRecommendationFlow[Target2, Candidate] {
override def process(
pipelineRequest: Target2
): Stitch[RecommendationPipelineResult[Candidate, Seq[Candidate]]] =
original.process(fn(pipelineRequest))
}
}
}
/**
* Defines a typical recommendation flow to fetch, filter, rank and transform candidates.
*
* 1. targetEligibility: determine the eligibility of target request
* 2. candidateSources: fetch candidates from candidate sources based on target type
* 3. preRankerCandidateFilter: light filtering of candidates
* 4. ranker: ranking of candidates (could be composed of multiple stages, light ranking, heavy ranking and etc)
* 5. postRankerTransform: deduping, grouping, rule based promotion / demotions and etc
* 6. validateCandidates: heavy filters to determine the eligibility of the candidates.
* will only be applied to candidates that we expect to return.
* 7. transformResults: transform the individual candidates into desired format (e.g. hydrate social proof)
*
* Note that the actual implementations may not need to implement all the steps if not needed
* (could just leave to IdentityRanker if ranking is not needed).
*
* Theoretically, the actual implementation could override the above flow to add
* more steps (e.g. add a transform step before ranking).
* But it is recommended to add the additional steps into this base flow if the step proves
* to have significant justification, or merge it into an existing step if it is a minor change.
*
* @tparam Target type of target request
* @tparam Candidate type of candidate to return
*/
trait RecommendationFlow[Target, Candidate <: UniversalNoun[Long]]
extends BaseRecommendationFlow[Target, Candidate]
with SideEffectsUtil[Target, Candidate] {
/**
* optionally update or enrich the request before executing the flows
*/
protected def updateTarget(target: Target): Stitch[Target] = Stitch.value(target)
/**
* check if the target is eligible for the flow
*/
protected def targetEligibility: Predicate[Target]
/**
* define the candidate sources that should be used for the given target
*/
protected def candidateSources(target: Target): Seq[CandidateSource[Target, Candidate]]
/**
* filter invalid candidates before the ranking phase.
*/
protected def preRankerCandidateFilter: Predicate[(Target, Candidate)]
/**
* rank the candidates
*/
protected def selectRanker(target: Target): Ranker[Target, Candidate]
/**
* transform the candidates after ranking (e.g. dedupping, grouping and etc)
*/
protected def postRankerTransform: Transform[Target, Candidate]
/**
* filter invalid candidates before returning the results.
*
* Some heavy filters e.g. SGS filter could be applied in this step
*/
protected def validateCandidates: Predicate[(Target, Candidate)]
/**
* transform the candidates into results and return
*/
protected def transformResults: Transform[Target, Candidate]
/**
* configuration for recommendation results
*/
protected def resultsConfig(target: Target): RecommendationResultsConfig
/**
* track the quality factor the recommendation pipeline
*/
protected def qualityFactorObserver: Option[QualityFactorObserver] = None
def statsReceiver: StatsReceiver
/**
* high level monitoring for the whole flow
* (make sure to add monitoring for each individual component by yourself)
*
* additional candidates: count, stats, non_empty_count
* target eligibility: latency, success, failures, request, count, valid_count, invalid_count, invalid_reasons
* candidate generation: latency, success, failures, request, count, non_empty_count, results_stat
* pre ranker filter: latency, success, failures, request, count, non_empty_count, results_stat
* ranker: latency, success, failures, request, count, non_empty_count, results_stat
* post ranker: latency, success, failures, request, count, non_empty_count, results_stat
* filter and take: latency, success, failures, request, count, non_empty_count, results_stat, batch count
* transform results: latency, success, failures, request, count, non_empty_count, results_stat
*/
import RecommendationFlow._
lazy val additionalCandidatesStats = statsReceiver.scope(AdditionalCandidatesStats)
lazy val targetEligibilityStats = statsReceiver.scope(TargetEligibilityStats)
lazy val candidateGenerationStats = statsReceiver.scope(CandidateGenerationStats)
lazy val preRankerFilterStats = statsReceiver.scope(PreRankerFilterStats)
lazy val rankerStats = statsReceiver.scope(RankerStats)
lazy val postRankerTransformStats = statsReceiver.scope(PostRankerTransformStats)
lazy val filterAndTakeStats = statsReceiver.scope(FilterAndTakeStats)
lazy val transformResultsStats = statsReceiver.scope(TransformResultsStats)
lazy val overallStats = statsReceiver.scope(OverallStats)
import StatsUtil._
override def process(
pipelineRequest: Target
): Stitch[RecommendationPipelineResult[Candidate, Seq[Candidate]]] = {
observeStitchQualityFactor(
profileStitchSeqResults(
updateTarget(pipelineRequest).flatMap { target =>
profilePredicateResult(targetEligibility(target), targetEligibilityStats).flatMap {
case PredicateResult.Valid => processValidTarget(target, Seq.empty)
case PredicateResult.Invalid(_) => Stitch.Nil
}
},
overallStats
).map { candidates =>
RecommendationPipelineResult.empty.withResult(candidates)
},
qualityFactorObserver,
overallStats
)
}
protected def processValidTarget(
target: Target,
additionalCandidates: Seq[Candidate]
): Stitch[Seq[Candidate]] = {
/**
* A basic recommendation flow looks like this:
*
* 1. fetch candidates from candidate sources
* 2. blend candidates with existing candidates
* 3. filter the candidates (light filters) before ranking
* 4. ranking
* 5. filter and truncate the candidates using postRankerCandidateFilter
* 6. transform the candidates based on product requirement
*/
val candidateSourcesToFetch = candidateSources(target)
for {
candidates <- profileStitchSeqResults(
Stitch.traverse(candidateSourcesToFetch)(_(target)).map(_.flatten),
candidateGenerationStats
)
mergedCandidates =
profileSeqResults(additionalCandidates, additionalCandidatesStats) ++
candidates
filteredCandidates <- profileStitchSeqResults(
Predicate.filter(target, mergedCandidates, preRankerCandidateFilter),
preRankerFilterStats
)
rankedCandidates <- profileStitchSeqResults(
selectRanker(target).rank(target, filteredCandidates),
rankerStats
)
transformed <- profileStitchSeqResults(
postRankerTransform.transform(target, rankedCandidates),
postRankerTransformStats
)
truncated <- profileStitchSeqResults(
take(target, transformed, resultsConfig(target)),
filterAndTakeStats
)
results <- profileStitchSeqResults(
transformResults.transform(target, truncated),
transformResultsStats
)
_ <- applySideEffects(
target,
candidateSourcesToFetch,
candidates,
mergedCandidates,
filteredCandidates,
rankedCandidates,
transformed,
truncated,
results)
} yield results
}
private[this] def take(
target: Target,
candidates: Seq[Candidate],
config: RecommendationResultsConfig
): Stitch[Seq[Candidate]] = {
Predicate
.batchFilterTake(
candidates.map(c => (target, c)),
validateCandidates,
config.batchForCandidatesCheck,
config.desiredCandidateCount,
statsReceiver
).map(_.map(_._2))
}
}
object RecommendationFlow {
val AdditionalCandidatesStats = "additional_candidates"
val TargetEligibilityStats = "target_eligibility"
val CandidateGenerationStats = "candidate_generation"
val PreRankerFilterStats = "pre_ranker_filter"
val RankerStats = "ranker"
val PostRankerTransformStats = "post_ranker_transform"
val FilterAndTakeStats = "filter_and_take"
val TransformResultsStats = "transform_results"
val OverallStats = "overall"
}

View File

@ -1,24 +0,0 @@
package com.twitter.follow_recommendations.common.base
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.stitch.Stitch
/**
* SideEffectsUtil applies side effects to the intermediate candidate results from a recommendation flow pipeline.
*
* @tparam Target target to recommend the candidates
* @tparam Candidate candidate type to rank
*/
trait SideEffectsUtil[Target, Candidate] {
def applySideEffects(
target: Target,
candidateSources: Seq[CandidateSource[Target, Candidate]],
candidatesFromCandidateSources: Seq[Candidate],
mergedCandidates: Seq[Candidate],
filteredCandidates: Seq[Candidate],
rankedCandidates: Seq[Candidate],
transformedCandidates: Seq[Candidate],
truncatedCandidates: Seq[Candidate],
results: Seq[Candidate]
): Stitch[Unit] = Stitch.Unit
}

View File

@ -1,272 +0,0 @@
package com.twitter.follow_recommendations.common.base
import com.twitter.finagle.stats.Stat
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.product_mixer.core.quality_factor.QualityFactorObserver
import com.twitter.stitch.Arrow
import com.twitter.stitch.Stitch
import com.twitter.util.Stopwatch
import java.util.concurrent.TimeUnit
import scala.util.control.NonFatal
object StatsUtil {
val LatencyName = "latency_ms"
val RequestName = "requests"
val SuccessName = "success"
val FailureName = "failures"
val ResultsName = "results"
val ResultsStat = "results_stat"
val EmptyResultsName = "empty"
val NonEmptyResultsName = "non_empty"
val ValidCount = "valid"
val InvalidCount = "invalid"
val InvalidHasReasons = "has_reasons"
val Reasons = "reasons"
val QualityFactorStat = "quality_factor_stat"
val QualityFactorCounts = "quality_factor_counts"
/**
* Helper function for timing a stitch, returning the original stitch.
*/
def profileStitch[T](stitch: Stitch[T], stat: StatsReceiver): Stitch[T] = {
Stitch
.time(stitch)
.map {
case (response, stitchRunDuration) =>
stat.counter(RequestName).incr()
stat.stat(LatencyName).add(stitchRunDuration.inMilliseconds)
response
.onSuccess { _ => stat.counter(SuccessName).incr() }
.onFailure { e =>
stat.counter(FailureName).incr()
stat.scope(FailureName).counter(getCleanClassName(e)).incr()
}
}
.lowerFromTry
}
/**
* Helper function for timing an arrow, returning the original arrow.
*/
def profileArrow[T, U](arrow: Arrow[T, U], stat: StatsReceiver): Arrow[T, U] = {
Arrow
.time(arrow)
.map {
case (response, stitchRunDuration) =>
stat.counter(RequestName).incr()
stat.stat(LatencyName).add(stitchRunDuration.inMilliseconds)
response
.onSuccess { _ => stat.counter(SuccessName).incr() }
.onFailure { e =>
stat.counter(FailureName).incr()
stat.scope(FailureName).counter(getCleanClassName(e)).incr()
}
}
.lowerFromTry
}
/**
* Helper function to count and track the distribution of results
*/
def profileResults[T](results: T, stat: StatsReceiver, size: T => Int): T = {
val numResults = size(results)
stat.counter(ResultsName).incr(numResults)
if (numResults == 0) {
stat.counter(EmptyResultsName).incr()
results
} else {
stat.stat(ResultsStat).add(numResults)
stat.counter(NonEmptyResultsName).incr()
results
}
}
/**
* Helper function to count and track the distribution of a list of results
*/
def profileSeqResults[T](results: Seq[T], stat: StatsReceiver): Seq[T] = {
profileResults[Seq[T]](results, stat, _.size)
}
/**
* Helper function for timing a stitch and count the number of results, returning the original stitch.
*/
def profileStitchResults[T](stitch: Stitch[T], stat: StatsReceiver, size: T => Int): Stitch[T] = {
profileStitch(stitch, stat).onSuccess { results => profileResults(results, stat, size) }
}
/**
* Helper function for timing an arrow and count the number of results, returning the original arrow.
*/
def profileArrowResults[T, U](
arrow: Arrow[T, U],
stat: StatsReceiver,
size: U => Int
): Arrow[T, U] = {
profileArrow(arrow, stat).onSuccess { results => profileResults(results, stat, size) }
}
/**
* Helper function for timing a stitch and count a seq of results, returning the original stitch.
*/
def profileStitchSeqResults[T](stitch: Stitch[Seq[T]], stat: StatsReceiver): Stitch[Seq[T]] = {
profileStitchResults[Seq[T]](stitch, stat, _.size)
}
/**
* Helper function for timing a stitch and count optional results, returning the original stitch.
*/
def profileStitchOptionalResults[T](
stitch: Stitch[Option[T]],
stat: StatsReceiver
): Stitch[Option[T]] = {
profileStitchResults[Option[T]](stitch, stat, _.size)
}
/**
* Helper function for timing a stitch and count a map of results, returning the original stitch.
*/
def profileStitchMapResults[K, V](
stitch: Stitch[Map[K, V]],
stat: StatsReceiver
): Stitch[Map[K, V]] = {
profileStitchResults[Map[K, V]](stitch, stat, _.size)
}
def getCleanClassName(obj: Object): String =
obj.getClass.getSimpleName.stripSuffix("$")
/**
* Helper function for timing a stitch and count a list of PredicateResult
*/
def profilePredicateResults(
predicateResult: Stitch[Seq[PredicateResult]],
statsReceiver: StatsReceiver
): Stitch[Seq[PredicateResult]] = {
profileStitch[Seq[PredicateResult]](
predicateResult,
statsReceiver
).onSuccess {
_.map {
case PredicateResult.Valid =>
statsReceiver.counter(ValidCount).incr()
case PredicateResult.Invalid(reasons) =>
statsReceiver.counter(InvalidCount).incr()
reasons.map { filterReason =>
statsReceiver.counter(InvalidHasReasons).incr()
statsReceiver.scope(Reasons).counter(filterReason.reason).incr()
}
}
}
}
/**
* Helper function for timing a stitch and count individual PredicateResult
*/
def profilePredicateResult(
predicateResult: Stitch[PredicateResult],
statsReceiver: StatsReceiver
): Stitch[PredicateResult] = {
profilePredicateResults(
predicateResult.map(Seq(_)),
statsReceiver
).map(_.head)
}
/**
* Helper function for timing an arrow and count a list of PredicateResult
*/
def profilePredicateResults[Q](
predicateResult: Arrow[Q, Seq[PredicateResult]],
statsReceiver: StatsReceiver
): Arrow[Q, Seq[PredicateResult]] = {
profileArrow[Q, Seq[PredicateResult]](
predicateResult,
statsReceiver
).onSuccess {
_.map {
case PredicateResult.Valid =>
statsReceiver.counter(ValidCount).incr()
case PredicateResult.Invalid(reasons) =>
statsReceiver.counter(InvalidCount).incr()
reasons.map { filterReason =>
statsReceiver.counter(InvalidHasReasons).incr()
statsReceiver.scope(Reasons).counter(filterReason.reason).incr()
}
}
}
}
/**
* Helper function for timing an arrow and count individual PredicateResult
*/
def profilePredicateResult[Q](
predicateResult: Arrow[Q, PredicateResult],
statsReceiver: StatsReceiver
): Arrow[Q, PredicateResult] = {
profilePredicateResults(
predicateResult.map(Seq(_)),
statsReceiver
).map(_.head)
}
/**
* Helper function for timing a stitch code block
*/
def profileStitchSeqResults[T](
stats: StatsReceiver
)(
block: => Stitch[Seq[T]]
): Stitch[Seq[T]] = {
stats.counter(RequestName).incr()
profileStitch(stats.stat(LatencyName), TimeUnit.MILLISECONDS) {
block onSuccess { r =>
if (r.isEmpty) stats.counter(EmptyResultsName).incr()
stats.stat(ResultsStat).add(r.size)
} onFailure { e =>
{
stats.counter(FailureName).incr()
stats.scope(FailureName).counter(e.getClass.getName).incr()
}
}
}
}
/**
* Time a given asynchronous `f` using the given `unit`.
*/
def profileStitch[A](stat: Stat, unit: TimeUnit)(f: => Stitch[A]): Stitch[A] = {
val start = Stopwatch.timeNanos()
try {
f.respond { _ => stat.add(unit.convert(Stopwatch.timeNanos() - start, TimeUnit.NANOSECONDS)) }
} catch {
case NonFatal(e) =>
stat.add(unit.convert(Stopwatch.timeNanos() - start, TimeUnit.NANOSECONDS))
Stitch.exception(e)
}
}
def observeStitchQualityFactor[T](
stitch: Stitch[T],
qualityFactorObserverOption: Option[QualityFactorObserver],
statsReceiver: StatsReceiver
): Stitch[T] = {
qualityFactorObserverOption
.map { observer =>
Stitch
.time(stitch)
.map {
case (response, stitchRunDuration) =>
observer(response, stitchRunDuration)
val qfVal = observer.qualityFactor.currentValue.floatValue() * 10000
statsReceiver.counter(QualityFactorCounts).incr()
statsReceiver
.stat(QualityFactorStat)
.add(qfVal)
response
}
.lowerFromTry
}.getOrElse(stitch)
}
}

View File

@ -1,85 +0,0 @@
package com.twitter.follow_recommendations.common.base
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.stitch.Stitch
import com.twitter.timelines.configapi.HasParams
import com.twitter.timelines.configapi.Param
/**
* transform a or a list of candidate for target T
*
* @tparam T target type
* @tparam C candidate type
*/
trait Transform[-T, C] {
// you need to implement at least one of the two methods here.
def transformItem(target: T, item: C): Stitch[C] = {
transform(target, Seq(item)).map(_.head)
}
def transform(target: T, items: Seq[C]): Stitch[Seq[C]]
def mapTarget[T2](mapper: T2 => T): Transform[T2, C] = {
val original = this
new Transform[T2, C] {
override def transformItem(target: T2, item: C): Stitch[C] = {
original.transformItem(mapper(target), item)
}
override def transform(target: T2, items: Seq[C]): Stitch[Seq[C]] = {
original.transform(mapper(target), items)
}
}
}
/**
* sequential composition. we execute this' transform first, followed by the other's transform
*/
def andThen[T1 <: T](other: Transform[T1, C]): Transform[T1, C] = {
val original = this
new Transform[T1, C] {
override def transformItem(target: T1, item: C): Stitch[C] =
original.transformItem(target, item).flatMap(other.transformItem(target, _))
override def transform(target: T1, items: Seq[C]): Stitch[Seq[C]] =
original.transform(target, items).flatMap(other.transform(target, _))
}
}
def observe(statsReceiver: StatsReceiver): Transform[T, C] = {
val originalTransform = this
new Transform[T, C] {
override def transform(target: T, items: Seq[C]): Stitch[Seq[C]] = {
statsReceiver.counter(Transform.InputCandidatesCount).incr(items.size)
statsReceiver.stat(Transform.InputCandidatesStat).add(items.size)
StatsUtil.profileStitchSeqResults(originalTransform.transform(target, items), statsReceiver)
}
override def transformItem(target: T, item: C): Stitch[C] = {
statsReceiver.counter(Transform.InputCandidatesCount).incr()
StatsUtil.profileStitch(originalTransform.transformItem(target, item), statsReceiver)
}
}
}
}
trait GatedTransform[T <: HasParams, C] extends Transform[T, C] {
def gated(param: Param[Boolean]): Transform[T, C] = {
val original = this
(target: T, items: Seq[C]) => {
if (target.params(param)) {
original.transform(target, items)
} else {
Stitch.value(items)
}
}
}
}
object Transform {
val InputCandidatesCount = "input_candidates"
val InputCandidatesStat = "input_candidates_stat"
}
class IdentityTransform[T, C] extends Transform[T, C] {
override def transform(target: T, items: Seq[C]): Stitch[Seq[C]] = Stitch.value(items)
}

View File

@ -1,9 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.addressbook
import com.twitter.timelines.configapi.FSParam
object AddressBookParams {
// Used by display locations that want only to read from the ABV2 Client and ignore Manhattan
// Currently the only display location that does this is the ABUploadInjection DisplayLocation
object ReadFromABV2Only extends FSParam[Boolean]("addressbook_read_only_from_abv2", false)
}

View File

@ -1,27 +0,0 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/google/inject:guice",
"3rdparty/jvm/com/google/inject/extensions:guice-assistedinject",
"3rdparty/jvm/net/codingwell:scala-guice",
"3rdparty/jvm/org/slf4j:slf4j-api",
"finatra/inject/inject-core/src/main/scala",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/base",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/addressbook",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/email_storage_service",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/gizmoduck",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/phone_storage_service",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/strato",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/constants",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/configapi/deciders",
"src/thrift/com/twitter/hermit/candidate:hermit-candidate-scala",
"src/thrift/com/twitter/hermit/usercontacts:hermit-usercontacts-scala",
"strato/config/columns/onboarding/userrecs:userrecs-strato-client",
"strato/src/main/scala/com/twitter/strato/client",
"util/util-slf4j-api/src/main/scala",
],
)

View File

@ -1,74 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.addressbook
import com.twitter.finagle.stats.NullStatsReceiver
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.follow_recommendations.common.candidate_sources.addressbook.AddressBookParams.ReadFromABV2Only
import com.twitter.follow_recommendations.common.clients.addressbook.AddressbookClient
import com.twitter.follow_recommendations.common.clients.addressbook.models.EdgeType
import com.twitter.follow_recommendations.common.clients.addressbook.models.RecordIdentifier
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.follow_recommendations.common.utils.RescueWithStatsUtils.rescueWithStats
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
import com.twitter.stitch.Stitch
import com.twitter.strato.generated.client.onboarding.userrecs.ForwardEmailBookClientColumn
import com.twitter.timelines.configapi.HasParams
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class ForwardEmailBookSource @Inject() (
forwardEmailBookClientColumn: ForwardEmailBookClientColumn,
addressBookClient: AddressbookClient,
statsReceiver: StatsReceiver = NullStatsReceiver)
extends CandidateSource[HasParams with HasClientContext, CandidateUser] {
override val identifier: CandidateSourceIdentifier =
ForwardEmailBookSource.Identifier
private val stats: StatsReceiver = statsReceiver.scope(this.getClass.getSimpleName)
/**
* Generate a list of candidates for the target
*/
override def apply(
target: HasParams with HasClientContext
): Stitch[Seq[CandidateUser]] = {
val candidateUsers: Stitch[Seq[Long]] = target.getOptionalUserId
.map { userId =>
rescueWithStats(
addressBookClient.getUsers(
userId = userId,
identifiers =
Seq(RecordIdentifier(userId = Some(userId), email = None, phoneNumber = None)),
batchSize = AddressbookClient.AddressBook2BatchSize,
edgeType = ForwardEmailBookSource.DefaultEdgeType,
fetcherOption =
if (target.params.apply(ReadFromABV2Only)) None
else Some(forwardEmailBookClientColumn.fetcher),
queryOption = AddressbookClient
.createQueryOption(
edgeType = ForwardEmailBookSource.DefaultEdgeType,
isPhone = ForwardEmailBookSource.IsPhone)
),
stats,
"AddressBookClient"
)
}.getOrElse(Stitch.Nil)
candidateUsers
.map(
_.take(ForwardEmailBookSource.NumEmailBookEntries)
.map(CandidateUser(_, score = Some(CandidateUser.DefaultCandidateScore))
.withCandidateSource(identifier)))
}
}
object ForwardEmailBookSource {
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
Algorithm.ForwardEmailBook.toString)
val NumEmailBookEntries: Int = 1000
val IsPhone = false
val DefaultEdgeType: EdgeType = EdgeType.Forward
}

View File

@ -1,72 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.addressbook
import com.twitter.finagle.stats.NullStatsReceiver
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.follow_recommendations.common.candidate_sources.addressbook.AddressBookParams.ReadFromABV2Only
import com.twitter.follow_recommendations.common.clients.addressbook.AddressbookClient
import com.twitter.follow_recommendations.common.clients.addressbook.models.EdgeType
import com.twitter.follow_recommendations.common.clients.addressbook.models.RecordIdentifier
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.follow_recommendations.common.utils.RescueWithStatsUtils.rescueWithStats
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
import com.twitter.stitch.Stitch
import com.twitter.strato.generated.client.onboarding.userrecs.ForwardPhoneContactsClientColumn
import com.twitter.timelines.configapi.HasParams
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class ForwardPhoneBookSource @Inject() (
forwardPhoneContactsClientColumn: ForwardPhoneContactsClientColumn,
addressBookClient: AddressbookClient,
statsReceiver: StatsReceiver = NullStatsReceiver)
extends CandidateSource[HasParams with HasClientContext, CandidateUser] {
override val identifier: CandidateSourceIdentifier =
ForwardPhoneBookSource.Identifier
private val stats: StatsReceiver = statsReceiver.scope(this.getClass.getSimpleName)
/**
* Generate a list of candidates for the target
*/
override def apply(target: HasParams with HasClientContext): Stitch[Seq[CandidateUser]] = {
val candidateUsers: Stitch[Seq[Long]] = target.getOptionalUserId
.map { userId =>
rescueWithStats(
addressBookClient.getUsers(
userId,
identifiers =
Seq(RecordIdentifier(userId = Some(userId), email = None, phoneNumber = None)),
batchSize = AddressbookClient.AddressBook2BatchSize,
edgeType = ForwardPhoneBookSource.DefaultEdgeType,
fetcherOption =
if (target.params.apply(ReadFromABV2Only)) None
else Some(forwardPhoneContactsClientColumn.fetcher),
queryOption = AddressbookClient
.createQueryOption(
edgeType = ForwardPhoneBookSource.DefaultEdgeType,
isPhone = ForwardPhoneBookSource.IsPhone)
),
stats,
"AddressBookClient"
)
}.getOrElse(Stitch.Nil)
candidateUsers
.map(
_.take(ForwardPhoneBookSource.NumPhoneBookEntries)
.map(CandidateUser(_, score = Some(CandidateUser.DefaultCandidateScore))
.withCandidateSource(identifier)))
}
}
object ForwardPhoneBookSource {
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
Algorithm.ForwardPhoneBook.toString)
val NumPhoneBookEntries: Int = 1000
val IsPhone = true
val DefaultEdgeType: EdgeType = EdgeType.Forward
}

View File

@ -1,4 +0,0 @@
# Address Book Candidate Source
Provides the accounts of a given user's forward and reverse phone and email book contacts.
It is only available when the user has synced their address book with the service.

View File

@ -1,78 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.addressbook
import com.twitter.cds.contact_consent_state.thriftscala.PurposeOfProcessing
import com.twitter.finagle.stats.NullStatsReceiver
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.follow_recommendations.common.clients.addressbook.AddressbookClient
import com.twitter.follow_recommendations.common.clients.addressbook.models.EdgeType
import com.twitter.follow_recommendations.common.clients.addressbook.models.RecordIdentifier
import com.twitter.follow_recommendations.common.clients.email_storage_service.EmailStorageServiceClient
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.follow_recommendations.common.utils.RescueWithStatsUtils.rescueOptionalWithStats
import com.twitter.follow_recommendations.common.utils.RescueWithStatsUtils.rescueWithStats
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
import com.twitter.stitch.Stitch
import com.twitter.strato.generated.client.onboarding.userrecs.ReverseEmailContactsClientColumn
import com.twitter.timelines.configapi.HasParams
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class ReverseEmailBookSource @Inject() (
reverseEmailContactsClientColumn: ReverseEmailContactsClientColumn,
essClient: EmailStorageServiceClient,
addressBookClient: AddressbookClient,
statsReceiver: StatsReceiver = NullStatsReceiver)
extends CandidateSource[HasParams with HasClientContext, CandidateUser] {
override val identifier: CandidateSourceIdentifier = ReverseEmailBookSource.Identifier
private val rescueStats = statsReceiver.scope("ReverseEmailBookSource")
/**
* Generate a list of candidates for the target
*/
override def apply(target: HasParams with HasClientContext): Stitch[Seq[CandidateUser]] = {
val reverseCandidatesFromEmail = target.getOptionalUserId
.map { userId =>
val verifiedEmailStitchOpt =
rescueOptionalWithStats(
essClient.getVerifiedEmail(userId, PurposeOfProcessing.ContentRecommendations),
rescueStats,
"getVerifiedEmail")
verifiedEmailStitchOpt.flatMap { emailOpt =>
rescueWithStats(
addressBookClient.getUsers(
userId = userId,
identifiers = emailOpt
.map(email =>
RecordIdentifier(userId = None, email = Some(email), phoneNumber = None)).toSeq,
batchSize = ReverseEmailBookSource.NumEmailBookEntries,
edgeType = ReverseEmailBookSource.DefaultEdgeType,
fetcherOption =
if (target.params(AddressBookParams.ReadFromABV2Only)) None
else Some(reverseEmailContactsClientColumn.fetcher)
),
rescueStats,
"AddressBookClient"
)
}
}.getOrElse(Stitch.Nil)
reverseCandidatesFromEmail.map(
_.take(ReverseEmailBookSource.NumEmailBookEntries)
.map(
CandidateUser(_, score = Some(CandidateUser.DefaultCandidateScore))
.withCandidateSource(identifier))
)
}
}
object ReverseEmailBookSource {
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
Algorithm.ReverseEmailBookIbis.toString)
val NumEmailBookEntries: Int = 500
val IsPhone = false
val DefaultEdgeType: EdgeType = EdgeType.Reverse
}

View File

@ -1,77 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.addressbook
import com.twitter.cds.contact_consent_state.thriftscala.PurposeOfProcessing
import com.twitter.finagle.stats.NullStatsReceiver
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.follow_recommendations.common.clients.addressbook.AddressbookClient
import com.twitter.follow_recommendations.common.clients.addressbook.models.EdgeType
import com.twitter.follow_recommendations.common.clients.addressbook.models.RecordIdentifier
import com.twitter.follow_recommendations.common.clients.phone_storage_service.PhoneStorageServiceClient
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.follow_recommendations.common.utils.RescueWithStatsUtils.rescueWithStats
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
import com.twitter.stitch.Stitch
import com.twitter.strato.generated.client.onboarding.userrecs.ReversePhoneContactsClientColumn
import com.twitter.timelines.configapi.HasParams
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class ReversePhoneBookSource @Inject() (
reversePhoneContactsClientColumn: ReversePhoneContactsClientColumn,
pssClient: PhoneStorageServiceClient,
addressBookClient: AddressbookClient,
statsReceiver: StatsReceiver = NullStatsReceiver)
extends CandidateSource[HasParams with HasClientContext, CandidateUser] {
override val identifier: CandidateSourceIdentifier = ReversePhoneBookSource.Identifier
private val stats: StatsReceiver = statsReceiver.scope(this.getClass.getSimpleName)
/**
* Generate a list of candidates for the target
*/
override def apply(target: HasParams with HasClientContext): Stitch[Seq[CandidateUser]] = {
val reverseCandidatesFromPhones: Stitch[Seq[Long]] = target.getOptionalUserId
.map { userId =>
pssClient
.getPhoneNumbers(userId, PurposeOfProcessing.ContentRecommendations)
.flatMap { phoneNumbers =>
rescueWithStats(
addressBookClient.getUsers(
userId = userId,
identifiers = phoneNumbers.map(phoneNumber =>
RecordIdentifier(userId = None, email = None, phoneNumber = Some(phoneNumber))),
batchSize = ReversePhoneBookSource.NumPhoneBookEntries,
edgeType = ReversePhoneBookSource.DefaultEdgeType,
fetcherOption =
if (target.params(AddressBookParams.ReadFromABV2Only)) None
else Some(reversePhoneContactsClientColumn.fetcher),
queryOption = AddressbookClient.createQueryOption(
edgeType = ReversePhoneBookSource.DefaultEdgeType,
isPhone = ReversePhoneBookSource.IsPhone)
),
stats,
"AddressBookClient"
)
}
}.getOrElse(Stitch.Nil)
reverseCandidatesFromPhones.map(
_.take(ReversePhoneBookSource.NumPhoneBookEntries)
.map(
CandidateUser(_, score = Some(CandidateUser.DefaultCandidateScore))
.withCandidateSource(identifier))
)
}
}
object ReversePhoneBookSource {
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
Algorithm.ReversePhoneBook.toString)
val NumPhoneBookEntries: Int = 500
val IsPhone = true
val DefaultEdgeType: EdgeType = EdgeType.Reverse
}

View File

@ -1,23 +0,0 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/google/inject:guice",
"3rdparty/jvm/com/google/inject/extensions:guice-assistedinject",
"3rdparty/jvm/net/codingwell:scala-guice",
"3rdparty/jvm/org/slf4j:slf4j-api",
"content-recommender/thrift/src/main/thrift:thrift-scala",
"escherbird/src/scala/com/twitter/escherbird/util/stitchcache",
"finatra/inject/inject-core/src/main/scala",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/strato",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/transforms/modify_social_proof",
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/configapi/common",
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/candidate_source",
"src/scala/com/twitter/onboarding/relevance/features/ymbii",
"strato/src/main/scala/com/twitter/strato/client",
"util/util-slf4j-api/src/main/scala",
],
)

View File

@ -1,26 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.base
import com.twitter.escherbird.util.stitchcache.StitchCache
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.stitch.Stitch
import com.twitter.util.Duration
class CachedCandidateSource[K <: Object, V <: Object](
candidateSource: CandidateSource[K, V],
maxCacheSize: Int,
cacheTTL: Duration,
statsReceiver: StatsReceiver,
override val identifier: CandidateSourceIdentifier)
extends CandidateSource[K, V] {
private val cache = StitchCache[K, Seq[V]](
maxCacheSize = maxCacheSize,
ttl = cacheTTL,
statsReceiver = statsReceiver.scope(identifier.name, "cache"),
underlyingCall = (k: K) => candidateSource(k)
)
override def apply(target: K): Stitch[Seq[V]] = cache.readThrough(target)
}

View File

@ -1,66 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.base
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.stitch.Stitch
import com.twitter.timelines.configapi.HasParams
import com.twitter.timelines.configapi.Param
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
/**
* A wrapper of CandidateSource to make it easier to do experimentation
* on new candidate generation algorithms
*
* @param baseSource base candidate source
* @param darkreadAlgorithmParam controls whether or not to darkread candidates (fetch them even if they will not be included)
* @param keepCandidatesParam controls whether or not to keep candidates from the base source
* @param resultCountThresholdParam controls how many results the source must return to bucket the user and return results (greater-than-or-equal-to)
* @tparam T request type. it must extend HasParams
* @tparam V value type
*/
class ExperimentalCandidateSource[T <: HasParams, V](
baseSource: CandidateSource[T, V],
darkreadAlgorithmParam: Param[Boolean],
keepCandidatesParam: Param[Boolean],
resultCountThresholdParam: Param[Int],
baseStatsReceiver: StatsReceiver)
extends CandidateSource[T, V] {
override val identifier: CandidateSourceIdentifier = baseSource.identifier
private[base] val statsReceiver =
baseStatsReceiver.scope(s"Experimental/${identifier.name}")
private[base] val requestsCounter = statsReceiver.counter("requests")
private[base] val resultCountGreaterThanThresholdCounter =
statsReceiver.counter("with_results_at_or_above_count_threshold")
private[base] val keepResultsCounter = statsReceiver.counter("keep_results")
private[base] val discardResultsCounter = statsReceiver.counter("discard_results")
override def apply(request: T): Stitch[Seq[V]] = {
if (request.params(darkreadAlgorithmParam)) {
requestsCounter.incr()
fetchFromCandidateSourceAndProcessResults(request)
} else {
Stitch.Nil
}
}
private def fetchFromCandidateSourceAndProcessResults(request: T): Stitch[Seq[V]] = {
baseSource(request).map { results =>
if (results.length >= request.params(resultCountThresholdParam)) {
processResults(results, request.params(keepCandidatesParam))
} else {
Nil
}
}
}
private def processResults(results: Seq[V], keepResults: Boolean): Seq[V] = {
resultCountGreaterThanThresholdCounter.incr()
if (keepResults) {
keepResultsCounter.incr()
results
} else {
discardResultsCounter.incr()
Nil
}
}
}

View File

@ -1,208 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.base
import com.twitter.conversions.DurationOps._
import com.twitter.finagle.stats.NullStatsReceiver
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.util.DefaultTimer
import com.twitter.follow_recommendations.common.candidate_sources.base.RealGraphExpansionRepository.DefaultScore
import com.twitter.follow_recommendations.common.candidate_sources.base.RealGraphExpansionRepository.MaxNumIntermediateNodesToKeep
import com.twitter.follow_recommendations.common.candidate_sources.base.RealGraphExpansionRepository.FirstDegreeCandidatesTimeout
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.follow_recommendations.common.models._
import com.twitter.onboarding.relevance.features.ymbii.ExpansionCandidateScores
import com.twitter.onboarding.relevance.features.ymbii.RawYMBIICandidateFeatures
import com.twitter.onboarding.relevance.store.thriftscala.CandidatesFollowedV1
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.stitch.Stitch
import com.twitter.strato.client.Fetcher
import com.twitter.util.Duration
import scala.collection.immutable
import scala.util.control.NonFatal
private final case class InterestExpansionCandidate(
userID: Long,
score: Double,
features: RawYMBIICandidateFeatures)
abstract class RealGraphExpansionRepository[Request](
realgraphExpansionStore: Fetcher[
Long,
Unit,
CandidatesFollowedV1
],
override val identifier: CandidateSourceIdentifier,
statsReceiver: StatsReceiver = NullStatsReceiver,
maxUnderlyingCandidatesToQuery: Int = 50,
maxCandidatesToReturn: Int = 40,
overrideUnderlyingTimeout: Option[Duration] = None,
appendSocialProof: Boolean = false)
extends CandidateSource[
Request,
CandidateUser
] {
val underlyingCandidateSource: Seq[
CandidateSource[
Request,
CandidateUser
]
]
private val stats = statsReceiver.scope(this.getClass.getSimpleName).scope(identifier.name)
private val underlyingCandidateSourceFailureStats =
stats.scope("underlying_candidate_source_failure")
def apply(
request: Request,
): Stitch[Seq[CandidateUser]] = {
val candidatesFromUnderlyingSourcesStitch: Seq[Stitch[Seq[CandidateUser]]] =
underlyingCandidateSource.map { candidateSource =>
candidateSource
.apply(request)
.within(overrideUnderlyingTimeout.getOrElse(FirstDegreeCandidatesTimeout))(
DefaultTimer
)
.handle {
case NonFatal(e) =>
underlyingCandidateSourceFailureStats
.counter(candidateSource.identifier.name, e.getClass.getSimpleName).incr()
Seq.empty
}
}
for {
underlyingCandidatesFromEachAlgo <- Stitch.collect(candidatesFromUnderlyingSourcesStitch)
// The first algorithm in the list has the highest priority. Depending on if its not
// populated, fall back to other algorithms. Once a particular algorithm is chosen, only
// take the top few candidates from the underlying store for expansion.
underlyingCandidatesTuple =
underlyingCandidatesFromEachAlgo
.zip(underlyingCandidateSource)
.find(_._1.nonEmpty)
underlyingAlgorithmUsed: Option[CandidateSourceIdentifier] = underlyingCandidatesTuple.map {
case (_, candidateSource) => candidateSource.identifier
}
// Take maxUnderlyingCandidatesToQuery to query realgraphExpansionStore
underlyingCandidates =
underlyingCandidatesTuple
.map {
case (candidates, candidateSource) =>
stats
.scope("underlyingAlgorithmUsedScope").counter(
candidateSource.identifier.name).incr()
candidates
}
.getOrElse(Seq.empty)
.sortBy(_.score.getOrElse(DefaultScore))(Ordering.Double.reverse)
.take(maxUnderlyingCandidatesToQuery)
underlyingCandidateMap: Map[Long, Double] = underlyingCandidates.map { candidate =>
(candidate.id, candidate.score.getOrElse(DefaultScore))
}.toMap
expansionCandidates <-
Stitch
.traverse(underlyingCandidateMap.keySet.toSeq) { candidateId =>
Stitch.join(
Stitch.value(candidateId),
realgraphExpansionStore.fetch(candidateId).map(_.v))
}.map(_.toMap)
rerankedCandidates: Seq[InterestExpansionCandidate] =
rerankCandidateExpansions(underlyingCandidateMap, expansionCandidates)
rerankedCandidatesFiltered = rerankedCandidates.take(maxCandidatesToReturn)
} yield {
rerankedCandidatesFiltered.map { candidate =>
val socialProofReason = if (appendSocialProof) {
val socialProofIds = candidate.features.expansionCandidateScores
.map(_.intermediateCandidateId)
Some(
Reason(Some(
AccountProof(followProof = Some(FollowProof(socialProofIds, socialProofIds.size))))))
} else {
None
}
CandidateUser(
id = candidate.userID,
score = Some(candidate.score),
reason = socialProofReason,
userCandidateSourceDetails = Some(
UserCandidateSourceDetails(
primaryCandidateSource = Some(identifier),
candidateSourceFeatures = Map(identifier -> Seq(candidate.features))
))
).addAddressBookMetadataIfAvailable(underlyingAlgorithmUsed.toSeq)
}
}
}
/**
* Expands underlying candidates, returning them in sorted order.
*
* @param underlyingCandidatesMap A map from underlying candidate id to score
* @param expansionCandidateMap A map from underlying candidate id to optional expansion candidates
* @return A sorted sequence of expansion candidates and associated scores
*/
private def rerankCandidateExpansions(
underlyingCandidatesMap: Map[Long, Double],
expansionCandidateMap: Map[Long, Option[CandidatesFollowedV1]]
): Seq[InterestExpansionCandidate] = {
// extract features
val candidates: Seq[(Long, ExpansionCandidateScores)] = for {
(underlyingCandidateId, underlyingCandidateScore) <- underlyingCandidatesMap.toSeq
expansionCandidates =
expansionCandidateMap
.get(underlyingCandidateId)
.flatten
.map(_.candidatesFollowed)
.getOrElse(Seq.empty)
expansionCandidate <- expansionCandidates
} yield expansionCandidate.candidateID -> ExpansionCandidateScores(
underlyingCandidateId,
Some(underlyingCandidateScore),
Some(expansionCandidate.score)
)
// merge intermediate nodes for the same candidate
val dedupedCandidates: Seq[(Long, Seq[ExpansionCandidateScores])] =
candidates.groupBy(_._1).mapValues(_.map(_._2).sortBy(_.intermediateCandidateId)).toSeq
// score the candidate
val candidatesWithTotalScore: Seq[((Long, Seq[ExpansionCandidateScores]), Double)] =
dedupedCandidates.map { candidate: (Long, Seq[ExpansionCandidateScores]) =>
(
candidate,
candidate._2.map { ieScore: ExpansionCandidateScores =>
ieScore.scoreFromUserToIntermediateCandidate.getOrElse(DefaultScore) *
ieScore.scoreFromIntermediateToExpansionCandidate.getOrElse(DefaultScore)
}.sum)
}
// sort candidate by score
for {
((candidate, edges), score) <- candidatesWithTotalScore.sortBy(_._2)(Ordering[Double].reverse)
} yield InterestExpansionCandidate(
candidate,
score,
RawYMBIICandidateFeatures(
edges.size,
edges.take(MaxNumIntermediateNodesToKeep).to[immutable.Seq])
)
}
}
object RealGraphExpansionRepository {
private val FirstDegreeCandidatesTimeout: Duration = 250.milliseconds
private val MaxNumIntermediateNodesToKeep = 20
private val DefaultScore = 0.0d
}

View File

@ -1,31 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.base
import com.twitter.timelines.configapi.FSBoundedParam
import com.twitter.timelines.configapi.FSParam
object SimilarUserExpanderParams {
case object EnableNonDirectFollowExpansion
extends FSParam[Boolean]("similar_user_enable_non_direct_follow_expansion", true)
case object EnableSimsExpandSeedAccountsSort
extends FSParam[Boolean]("similar_user_enable_sims_expander_seed_account_sort", false)
case object DefaultExpansionInputCount
extends FSBoundedParam[Int](
name = "similar_user_default_expansion_input_count",
default = Integer.MAX_VALUE,
min = 0,
max = Integer.MAX_VALUE)
case object DefaultFinalCandidatesReturnedCount
extends FSBoundedParam[Int](
name = "similar_user_default_final_candidates_returned_count",
default = Integer.MAX_VALUE,
min = 0,
max = Integer.MAX_VALUE)
case object DefaultEnableImplicitEngagedExpansion
extends FSParam[Boolean]("similar_user_enable_implicit_engaged_expansion", true)
}

View File

@ -1,313 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.base
import com.twitter.follow_recommendations.common.candidate_sources.base.SimilarUserExpanderParams.DefaultEnableImplicitEngagedExpansion
import com.twitter.follow_recommendations.common.candidate_sources.base.SimilarUserExpanderParams.DefaultExpansionInputCount
import com.twitter.follow_recommendations.common.candidate_sources.base.SimilarUserExpanderParams.DefaultFinalCandidatesReturnedCount
import com.twitter.follow_recommendations.common.candidate_sources.base.SimilarUserExpanderParams.EnableNonDirectFollowExpansion
import com.twitter.follow_recommendations.common.candidate_sources.base.SimilarUserExpanderParams.EnableSimsExpandSeedAccountsSort
import com.twitter.follow_recommendations.common.candidate_sources.base.SimilarUserExpanderRepository.DefaultCandidateBuilder
import com.twitter.follow_recommendations.common.candidate_sources.base.SimilarUserExpanderRepository.DefaultScore
import com.twitter.follow_recommendations.common.models.AccountProof
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.follow_recommendations.common.models.EngagementType
import com.twitter.follow_recommendations.common.models.FollowProof
import com.twitter.follow_recommendations.common.models.Reason
import com.twitter.follow_recommendations.common.models.SimilarToProof
import com.twitter.follow_recommendations.common.models.UserCandidateSourceDetails
import com.twitter.hermit.candidate.thriftscala.Candidates
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.stitch.Stitch
import com.twitter.strato.client.Fetcher
import com.twitter.timelines.configapi.FSBoundedParam
import com.twitter.timelines.configapi.FSParam
import com.twitter.timelines.configapi.HasParams
import com.twitter.timelines.configapi.Params
case class SecondDegreeCandidate(userId: Long, score: Double, socialProof: Option[Seq[Long]])
abstract class SimilarUserExpanderRepository[-Request <: HasParams](
override val identifier: CandidateSourceIdentifier,
similarToCandidatesFetcher: Fetcher[
Long,
Unit,
Candidates
],
expansionInputSizeParam: FSBoundedParam[Int] = DefaultExpansionInputCount,
candidatesReturnedSizeParam: FSBoundedParam[Int] = DefaultFinalCandidatesReturnedCount,
enableImplicitEngagedExpansion: FSParam[Boolean] = DefaultEnableImplicitEngagedExpansion,
thresholdToAvoidExpansion: Int = 30,
maxExpansionPerCandidate: Option[Int] = None,
includingOriginalCandidates: Boolean = false,
scorer: (Double, Double) => Double = SimilarUserExpanderRepository.DefaultScorer,
aggregator: (Seq[Double]) => Double = ScoreAggregator.Max,
candidateBuilder: (Long, CandidateSourceIdentifier, Double, CandidateUser) => CandidateUser =
DefaultCandidateBuilder)
extends TwoHopExpansionCandidateSource[
Request,
CandidateUser,
SecondDegreeCandidate,
CandidateUser
] {
val originalCandidateSource: CandidateSource[Request, CandidateUser]
val backupOriginalCandidateSource: Option[CandidateSource[Request, CandidateUser]] = None
override def firstDegreeNodes(request: Request): Stitch[Seq[CandidateUser]] = {
val originalCandidatesStitch: Stitch[Seq[CandidateUser]] =
originalCandidateSource(request)
val backupCandidatesStitch: Stitch[Seq[CandidateUser]] =
if (request.params(EnableNonDirectFollowExpansion)) {
backupOriginalCandidateSource.map(_.apply(request)).getOrElse(Stitch.Nil)
} else {
Stitch.Nil
}
val firstDegreeCandidatesCombinedStitch: Stitch[Seq[CandidateUser]] =
Stitch
.join(originalCandidatesStitch, backupCandidatesStitch).map {
case (firstDegreeOrigCandidates, backupFirstDegreeCandidates) =>
if (request.params(EnableSimsExpandSeedAccountsSort)) {
firstDegreeOrigCandidates ++ backupFirstDegreeCandidates sortBy {
-_.score.getOrElse(DefaultScore)
}
} else {
firstDegreeOrigCandidates ++ backupFirstDegreeCandidates
}
}
val candidatesAfterImplicitEngagementsRemovalStitch: Stitch[Seq[CandidateUser]] =
getCandidatesAfterImplicitEngagementFiltering(
request.params,
firstDegreeCandidatesCombinedStitch)
val firstDegreeCandidatesCombinedTrimmed = candidatesAfterImplicitEngagementsRemovalStitch.map {
candidates: Seq[CandidateUser] =>
candidates.take(request.params(expansionInputSizeParam))
}
firstDegreeCandidatesCombinedTrimmed.map { firstDegreeResults: Seq[CandidateUser] =>
if (firstDegreeResults.nonEmpty && firstDegreeResults.size < thresholdToAvoidExpansion) {
firstDegreeResults
.groupBy(_.id).mapValues(
_.maxBy(_.score)
).values.toSeq
} else {
Nil
}
}
}
override def secondaryDegreeNodes(
request: Request,
firstDegreeCandidate: CandidateUser
): Stitch[Seq[SecondDegreeCandidate]] = {
similarToCandidatesFetcher.fetch(firstDegreeCandidate.id).map(_.v).map { candidateListOption =>
candidateListOption
.map { candidatesList =>
candidatesList.candidates.map(candidate =>
SecondDegreeCandidate(candidate.userId, candidate.score, candidate.socialProof))
}.getOrElse(Nil)
}
}
override def aggregateAndScore(
req: Request,
firstDegreeToSecondDegreeNodesMap: Map[CandidateUser, Seq[SecondDegreeCandidate]]
): Stitch[Seq[CandidateUser]] = {
val similarExpanderResults = firstDegreeToSecondDegreeNodesMap.flatMap {
case (firstDegreeCandidate, seqOfSecondDegreeCandidates) =>
val sourceScore = firstDegreeCandidate.score.getOrElse(DefaultScore)
val results: Seq[CandidateUser] = seqOfSecondDegreeCandidates.map { secondDegreeCandidate =>
val score = scorer(sourceScore, secondDegreeCandidate.score)
candidateBuilder(secondDegreeCandidate.userId, identifier, score, firstDegreeCandidate)
}
maxExpansionPerCandidate match {
case None => results
case Some(limit) => results.sortBy(-_.score.getOrElse(DefaultScore)).take(limit)
}
}.toSeq
val allCandidates = {
if (includingOriginalCandidates)
firstDegreeToSecondDegreeNodesMap.keySet.toSeq
else
Nil
} ++ similarExpanderResults
val groupedCandidates: Seq[CandidateUser] = allCandidates
.groupBy(_.id)
.flatMap {
case (_, candidates) =>
val finalScore = aggregator(candidates.map(_.score.getOrElse(DefaultScore)))
val candidateSourceDetailsCombined = aggregateCandidateSourceDetails(candidates)
val accountSocialProofcombined = aggregateAccountSocialProof(candidates)
candidates.headOption.map(
_.copy(
score = Some(finalScore),
reason = accountSocialProofcombined,
userCandidateSourceDetails = candidateSourceDetailsCombined)
.withCandidateSource(identifier))
}
.toSeq
Stitch.value(
groupedCandidates
.sortBy { -_.score.getOrElse(DefaultScore) }.take(req.params(candidatesReturnedSizeParam))
)
}
def aggregateCandidateSourceDetails(
candidates: Seq[CandidateUser]
): Option[UserCandidateSourceDetails] = {
candidates
.map { candidate =>
candidate.userCandidateSourceDetails.map(_.candidateSourceScores).getOrElse(Map.empty)
}.reduceLeftOption { (scoreMap1, scoreMap2) =>
scoreMap1 ++ scoreMap2
}.map {
UserCandidateSourceDetails(primaryCandidateSource = None, _)
}
}
def aggregateAccountSocialProof(candidates: Seq[CandidateUser]): Option[Reason] = {
candidates
.map { candidate =>
(
candidate.reason
.flatMap(_.accountProof.flatMap(_.similarToProof.map(_.similarTo))).getOrElse(Nil),
candidate.reason
.flatMap(_.accountProof.flatMap(_.followProof.map(_.followedBy))).getOrElse(Nil),
candidate.reason
.flatMap(_.accountProof.flatMap(_.followProof.map(_.numIds))).getOrElse(0)
)
}.reduceLeftOption { (accountProofOne, accountProofTwo) =>
(
// merge similarToIds
accountProofOne._1 ++ accountProofTwo._1,
// merge followedByIds
accountProofOne._2 ++ accountProofTwo._2,
// add numIds
accountProofOne._3 + accountProofTwo._3)
}.map { proofs =>
Reason(accountProof = Some(
AccountProof(
similarToProof = Some(SimilarToProof(proofs._1)),
followProof = if (proofs._2.nonEmpty) Some(FollowProof(proofs._2, proofs._3)) else None
)))
}
}
def getCandidatesAfterImplicitEngagementFiltering(
params: Params,
firstDegreeCandidatesStitch: Stitch[Seq[CandidateUser]]
): Stitch[Seq[CandidateUser]] = {
if (!params(enableImplicitEngagedExpansion)) {
/**
* Remove candidates whose engagement types only contain implicit engagements
* (e.g. Profile View, Tweet Click) and only expand those candidates who contain explicit
* engagements.
*/
firstDegreeCandidatesStitch.map { candidates =>
candidates.filter { cand =>
cand.engagements.exists(engage =>
engage == EngagementType.Like || engage == EngagementType.Retweet || engage == EngagementType.Mention)
}
}
} else {
firstDegreeCandidatesStitch
}
}
}
object SimilarUserExpanderRepository {
val DefaultScorer: (Double, Double) => Double = (sourceScore: Double, similarScore: Double) =>
similarScore
val MultiplyScorer: (Double, Double) => Double = (sourceScore: Double, similarScore: Double) =>
sourceScore * similarScore
val SourceScorer: (Double, Double) => Double = (sourceScore: Double, similarScore: Double) =>
sourceScore
val DefaultScore = 0.0d
val DefaultCandidateBuilder: (
Long,
CandidateSourceIdentifier,
Double,
CandidateUser
) => CandidateUser =
(
userId: Long,
_: CandidateSourceIdentifier,
score: Double,
candidate: CandidateUser
) => {
val originalCandidateSourceDetails =
candidate.userCandidateSourceDetails.flatMap { candSourceDetails =>
candSourceDetails.primaryCandidateSource.map { primaryCandidateSource =>
UserCandidateSourceDetails(
primaryCandidateSource = None,
candidateSourceScores = Map(primaryCandidateSource -> candidate.score))
}
}
CandidateUser(
id = userId,
score = Some(score),
userCandidateSourceDetails = originalCandidateSourceDetails,
reason =
Some(Reason(Some(AccountProof(similarToProof = Some(SimilarToProof(Seq(candidate.id)))))))
)
}
val FollowClusterCandidateBuilder: (
Long,
CandidateSourceIdentifier,
Double,
CandidateUser
) => CandidateUser =
(userId: Long, _: CandidateSourceIdentifier, score: Double, candidate: CandidateUser) => {
val originalCandidateSourceDetails =
candidate.userCandidateSourceDetails.flatMap { candSourceDetails =>
candSourceDetails.primaryCandidateSource.map { primaryCandidateSource =>
UserCandidateSourceDetails(
primaryCandidateSource = None,
candidateSourceScores = Map(primaryCandidateSource -> candidate.score))
}
}
val originalFollowCluster = candidate.reason
.flatMap(_.accountProof.flatMap(_.followProof.map(_.followedBy)))
CandidateUser(
id = userId,
score = Some(score),
userCandidateSourceDetails = originalCandidateSourceDetails,
reason = Some(
Reason(
Some(
AccountProof(
similarToProof = Some(SimilarToProof(Seq(candidate.id))),
followProof = originalFollowCluster.map(follows =>
FollowProof(follows, follows.size)))))
)
)
}
}
object ScoreAggregator {
// aggregate the same candidates with same id by taking the one with largest score
val Max: Seq[Double] => Double = (candidateScores: Seq[Double]) => { candidateScores.max }
// aggregate the same candidates with same id by taking the sum of the scores
val Sum: Seq[Double] => Double = (candidateScores: Seq[Double]) => { candidateScores.sum }
}

View File

@ -1,86 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.base
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.follow_recommendations.common.transforms.modify_social_proof.ModifySocialProof
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
import com.twitter.stitch.Stitch
import com.twitter.timelines.configapi.HasParams
import com.twitter.util.Duration
abstract class SocialProofEnforcedCandidateSource(
candidateSource: CandidateSource[HasClientContext with HasParams, CandidateUser],
modifySocialProof: ModifySocialProof,
minNumSocialProofsRequired: Int,
override val identifier: CandidateSourceIdentifier,
baseStatsReceiver: StatsReceiver)
extends CandidateSource[HasClientContext with HasParams, CandidateUser] {
val statsReceiver = baseStatsReceiver.scope(identifier.name)
override def apply(target: HasClientContext with HasParams): Stitch[Seq[CandidateUser]] = {
val mustCallSgs: Boolean = target.params(SocialProofEnforcedCandidateSourceParams.MustCallSgs)
val callSgsCachedColumn: Boolean =
target.params(SocialProofEnforcedCandidateSourceParams.CallSgsCachedColumn)
val QueryIntersectionIdsNum: Int =
target.params(SocialProofEnforcedCandidateSourceParams.QueryIntersectionIdsNum)
val MaxNumCandidatesToAnnotate: Int =
target.params(SocialProofEnforcedCandidateSourceParams.MaxNumCandidatesToAnnotate)
val gfsIntersectionIdsNum: Int =
target.params(SocialProofEnforcedCandidateSourceParams.GfsIntersectionIdsNum)
val sgsIntersectionIdsNum: Int =
target.params(SocialProofEnforcedCandidateSourceParams.SgsIntersectionIdsNum)
val gfsLagDuration: Duration =
target.params(SocialProofEnforcedCandidateSourceParams.GfsLagDurationInDays)
candidateSource(target)
.flatMap { candidates =>
val candidatesWithoutEnoughSocialProof = candidates
.collect {
case candidate if !candidate.followedBy.exists(_.size >= minNumSocialProofsRequired) =>
candidate
}
statsReceiver
.stat("candidates_with_no_social_proofs").add(candidatesWithoutEnoughSocialProof.size)
val candidatesToAnnotate =
candidatesWithoutEnoughSocialProof.take(MaxNumCandidatesToAnnotate)
statsReceiver.stat("candidates_to_annotate").add(candidatesToAnnotate.size)
val annotatedCandidatesMapStitch = target.getOptionalUserId
.map { userId =>
modifySocialProof
.hydrateSocialProof(
userId,
candidatesToAnnotate,
Some(QueryIntersectionIdsNum),
mustCallSgs,
callSgsCachedColumn,
gfsLagDuration = gfsLagDuration,
gfsIntersectionIds = gfsIntersectionIdsNum,
sgsIntersectionIds = sgsIntersectionIdsNum
).map { annotatedCandidates =>
annotatedCandidates
.map(annotatedCandidate => (annotatedCandidate.id, annotatedCandidate)).toMap
}
}.getOrElse(Stitch.value(Map.empty[Long, CandidateUser]))
annotatedCandidatesMapStitch.map { annotatedCandidatesMap =>
candidates
.flatMap { candidate =>
if (candidate.followedBy.exists(_.size >= minNumSocialProofsRequired)) {
Some(candidate)
} else {
annotatedCandidatesMap.get(candidate.id).collect {
case annotatedCandidate
if annotatedCandidate.followedBy.exists(
_.size >= minNumSocialProofsRequired) =>
annotatedCandidate
}
}
}.map(_.withCandidateSource(identifier))
}
}
}
}

View File

@ -1,30 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.base
import com.twitter.follow_recommendations.configapi.common.FeatureSwitchConfig
import com.twitter.timelines.configapi.FSBoundedParam
import com.twitter.timelines.configapi.FSName
import com.twitter.timelines.configapi.HasDurationConversion
import com.twitter.timelines.configapi.Param
import com.twitter.util.Duration
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class SocialProofEnforcedCandidateSourceFSConfig @Inject() () extends FeatureSwitchConfig {
override val booleanFSParams: Seq[Param[Boolean] with FSName] =
Seq(
SocialProofEnforcedCandidateSourceParams.MustCallSgs,
SocialProofEnforcedCandidateSourceParams.CallSgsCachedColumn,
)
override val intFSParams: Seq[FSBoundedParam[Int]] =
Seq(
SocialProofEnforcedCandidateSourceParams.QueryIntersectionIdsNum,
SocialProofEnforcedCandidateSourceParams.MaxNumCandidatesToAnnotate,
SocialProofEnforcedCandidateSourceParams.GfsIntersectionIdsNum,
SocialProofEnforcedCandidateSourceParams.SgsIntersectionIdsNum,
)
override val durationFSParams: Seq[FSBoundedParam[Duration] with HasDurationConversion] = Seq(
SocialProofEnforcedCandidateSourceParams.GfsLagDurationInDays
)
}

View File

@ -1,56 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.base
import com.twitter.conversions.DurationOps._
import com.twitter.timelines.configapi.DurationConversion
import com.twitter.timelines.configapi.FSBoundedParam
import com.twitter.timelines.configapi.FSParam
import com.twitter.timelines.configapi.HasDurationConversion
import com.twitter.util.Duration
object SocialProofEnforcedCandidateSourceParams {
case object MustCallSgs
extends FSParam[Boolean]("social_proof_enforced_candidate_source_must_call_sgs", true)
case object CallSgsCachedColumn
extends FSParam[Boolean](
"social_proof_enforced_candidate_source_call_sgs_cached_column",
false)
case object QueryIntersectionIdsNum
extends FSBoundedParam[Int](
name = "social_proof_enforced_candidate_source_query_intersection_ids_num",
default = 3,
min = 0,
max = Integer.MAX_VALUE)
case object MaxNumCandidatesToAnnotate
extends FSBoundedParam[Int](
name = "social_proof_enforced_candidate_source_max_num_candidates_to_annotate",
default = 50,
min = 0,
max = Integer.MAX_VALUE)
case object GfsIntersectionIdsNum
extends FSBoundedParam[Int](
name = "social_proof_enforced_candidate_source_gfs_intersection_ids_num",
default = 3,
min = 0,
max = Integer.MAX_VALUE)
case object SgsIntersectionIdsNum
extends FSBoundedParam[Int](
name = "social_proof_enforced_candidate_source_sgs_intersection_ids_num",
default = 10,
min = 0,
max = Integer.MAX_VALUE)
case object GfsLagDurationInDays
extends FSBoundedParam[Duration](
name = "social_proof_enforced_candidate_source_gfs_lag_duration_in_days",
default = 14.days,
min = 1.days,
max = 60.days)
with HasDurationConversion {
override val durationConversion: DurationConversion = DurationConversion.FromDays
}
}

View File

@ -1,27 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.base
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.stitch.Stitch
import com.twitter.strato.client.Fetcher
abstract class StratoFetcherSource[K, U, V](
fetcher: Fetcher[K, U, V],
view: U,
override val identifier: CandidateSourceIdentifier)
extends CandidateSource[K, CandidateUser] {
def map(user: K, v: V): Seq[CandidateUser]
override def apply(target: K): Stitch[Seq[CandidateUser]] = {
fetcher
.fetch(target, view)
.map { result =>
result.v
.map { candidates => map(target, candidates) }
.getOrElse(Nil)
.map(_.withCandidateSource(identifier))
}
}
}

View File

@ -1,9 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.base
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.strato.client.Fetcher
abstract class StratoFetcherWithUnitViewSource[K, V](
fetcher: Fetcher[K, Unit, V],
override val identifier: CandidateSourceIdentifier)
extends StratoFetcherSource[K, Unit, V](fetcher, Unit, identifier)

View File

@ -1,71 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.base
import com.twitter.follow_recommendations.common.models.TweetCandidate
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.stitch.Stitch
/**
* base trait for tweet authors based algorithms, e.g. topical tweet authors, twistly, ...
*
* @tparam Target target type
* @tparam Candidate output candidate types
*/
trait TweetAuthorsCandidateSource[-Target, +Candidate] extends CandidateSource[Target, Candidate] {
/**
* fetch Tweet candidates
*/
def getTweetCandidates(target: Target): Stitch[Seq[TweetCandidate]]
/**
* fetch authorId
*/
def getTweetAuthorId(tweetCandidate: TweetCandidate): Stitch[Option[Long]]
/**
* wrap candidate ID and TweetAuthorProof in Candidate
*/
def toCandidate(authorId: Long, tweetIds: Seq[Long], score: Option[Double]): Candidate
/**
* aggregate scores, default to the first score
*/
def aggregator(scores: Seq[Double]): Double =
scores.headOption.getOrElse(TweetAuthorsCandidateSource.DefaultScore)
/**
* aggregation method for a group of tweet candidates
*/
def aggregateAndScore(
target: Target,
tweetCandidates: Seq[TweetCandidate]
): Seq[Candidate]
/**
* generate a list of candidates for the target
*/
def build(
target: Target
): Stitch[Seq[Candidate]] = {
// Fetch Tweet candidates and hydrate author IDs
val tweetCandidatesStitch = for {
tweetCandidates <- getTweetCandidates(target)
authorIds <- Stitch.collect(tweetCandidates.map(getTweetAuthorId(_)))
} yield {
for {
(authorIdOpt, tweetCandidate) <- authorIds.zip(tweetCandidates)
authorId <- authorIdOpt
} yield tweetCandidate.copy(authorId = authorId)
}
// Aggregate and score, convert to candidate
tweetCandidatesStitch.map(aggregateAndScore(target, _))
}
def apply(target: Target): Stitch[Seq[Candidate]] =
build(target)
}
object TweetAuthorsCandidateSource {
final val DefaultScore: Double = 0.0
}

View File

@ -1,46 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.base
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.stitch.Stitch
/**
* base trait for two-hop expansion based algorithms, e.g. online_stp, phonebook_prediction,
* recent following sims, recent engagement sims, ...
*
* @tparam Target target type
* @tparam FirstDegree type of first degree nodes
* @tparam SecondaryDegree type of secondary degree nodes
* @tparam Candidate output candidate types
*/
trait TwoHopExpansionCandidateSource[-Target, FirstDegree, SecondaryDegree, +Candidate]
extends CandidateSource[Target, Candidate] {
/**
* fetch first degree nodes given request
*/
def firstDegreeNodes(req: Target): Stitch[Seq[FirstDegree]]
/**
* fetch secondary degree nodes given request and first degree nodes
*/
def secondaryDegreeNodes(req: Target, node: FirstDegree): Stitch[Seq[SecondaryDegree]]
/**
* aggregate and score the candidates to generate final results
*/
def aggregateAndScore(
req: Target,
firstDegreeToSecondDegreeNodesMap: Map[FirstDegree, Seq[SecondaryDegree]]
): Stitch[Seq[Candidate]]
/**
* Generate a list of candidates for the target
*/
def apply(target: Target): Stitch[Seq[Candidate]] = {
for {
firstDegreeNodes <- firstDegreeNodes(target)
secondaryDegreeNodes <- Stitch.traverse(firstDegreeNodes)(secondaryDegreeNodes(target, _))
aggregated <- aggregateAndScore(target, firstDegreeNodes.zip(secondaryDegreeNodes).toMap)
} yield aggregated
}
}

View File

@ -1,22 +0,0 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/google/inject:guice",
"escherbird/src/scala/com/twitter/escherbird/util/stitchcache",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/base",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/geoduck",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/constants",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/configapi/common",
"hermit/hermit-core/src/main/scala/com/twitter/hermit/model",
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/candidate_source",
"src/thrift/com/twitter/onboarding/relevance/crowd_search_accounts:crowd_search_accounts-scala",
"strato/config/columns/onboarding/userrecs:userrecs-strato-client",
"strato/src/main/scala/com/twitter/strato/client",
"util/util-core/src/main/scala/com/twitter/conversions",
],
)

View File

@ -1,18 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.crowd_search_accounts
import com.twitter.follow_recommendations.configapi.common.FeatureSwitchConfig
import com.twitter.timelines.configapi.FSBoundedParam
import com.twitter.timelines.configapi.FSName
import com.twitter.timelines.configapi.Param
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class CrowdSearchAccountsFSConfig @Inject() () extends FeatureSwitchConfig {
override val booleanFSParams: Seq[Param[Boolean] with FSName] = Seq(
CrowdSearchAccountsParams.CandidateSourceEnabled,
)
override val doubleFSParams: Seq[FSBoundedParam[Double]] = Seq(
CrowdSearchAccountsParams.CandidateSourceWeight,
)
}

View File

@ -1,32 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.crowd_search_accounts
import com.twitter.timelines.configapi.FSBoundedParam
import com.twitter.timelines.configapi.FSEnumSeqParam
import com.twitter.timelines.configapi.FSParam
object CrowdSearchAccountsParams {
// whether or not to fetch CrowdSearchAccounts candidate sources
case object CandidateSourceEnabled
extends FSParam[Boolean]("crowd_search_accounts_candidate_source_enabled", false)
/**
* Contains the logic key for account filtering and ranking. Currently we have 3 main logic keys
* - new_daily: filtering top searched accounts with max daily searches based on new users
* - new_weekly: filtering top searched accounts with max weekly searches based on new users
* - daily: filtering top searched accounts with max daily searches
* - weekly: filtering top searched accounts with max weekly searches
* Mapping of the Logic Id to Logic key is done via @enum AccountsFilteringAndRankingLogic
*/
case object AccountsFilteringAndRankingLogics
extends FSEnumSeqParam[AccountsFilteringAndRankingLogicId.type](
name = "crowd_search_accounts_filtering_and_ranking_logic_ids",
default = Seq(AccountsFilteringAndRankingLogicId.SearchesWeekly),
enum = AccountsFilteringAndRankingLogicId)
case object CandidateSourceWeight
extends FSBoundedParam[Double](
"crowd_search_accounts_candidate_source_weight",
default = 1200,
min = 0.001,
max = 2000)
}

View File

@ -1,111 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.crowd_search_accounts
import com.twitter.escherbird.util.stitchcache.StitchCache
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.follow_recommendations.common.candidate_sources.crowd_search_accounts.CrowdSearchAccountsParams.AccountsFilteringAndRankingLogics
import com.twitter.follow_recommendations.common.candidate_sources.crowd_search_accounts.CrowdSearchAccountsParams.CandidateSourceEnabled
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.follow_recommendations.common.models.HasGeohashAndCountryCode
import com.twitter.hermit.model.Algorithm
import com.twitter.onboarding.relevance.crowd_search_accounts.thriftscala.CrowdSearchAccounts
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
import com.twitter.stitch.Stitch
import com.twitter.strato.generated.client.onboarding.userrecs.CrowdSearchAccountsClientColumn
import com.twitter.timelines.configapi.HasParams
import com.twitter.util.Duration
import com.twitter.util.logging.Logging
import javax.inject.Inject
import javax.inject.Singleton
object AccountsFilteringAndRankingLogicId extends Enumeration {
type AccountsFilteringAndRankingLogicId = Value
val NewSearchesDaily: AccountsFilteringAndRankingLogicId = Value("new_searches_daily")
val NewSearchesWeekly: AccountsFilteringAndRankingLogicId = Value("new_searches_weekly")
val SearchesDaily: AccountsFilteringAndRankingLogicId = Value("searches_daily")
val SearchesWeekly: AccountsFilteringAndRankingLogicId = Value("searches_weekly")
}
object CrowdSearchAccountsSource {
val MaxCacheSize = 500
val CacheTTL: Duration = Duration.fromHours(24)
type Target = HasParams with HasClientContext with HasGeohashAndCountryCode
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
Algorithm.CrowdSearchAccounts.toString)
}
@Singleton
class CrowdSearchAccountsSource @Inject() (
crowdSearchAccountsClientColumn: CrowdSearchAccountsClientColumn,
statsReceiver: StatsReceiver,
) extends CandidateSource[CrowdSearchAccountsSource.Target, CandidateUser]
with Logging {
/** @see [[CandidateSourceIdentifier]] */
override val identifier: CandidateSourceIdentifier =
CrowdSearchAccountsSource.Identifier
private val stats = statsReceiver.scope(identifier.name)
private val requestsStats = stats.counter("requests")
private val noCountryCodeStats = stats.counter("no_country_code")
private val successStats = stats.counter("success")
private val errorStats = stats.counter("error")
private val cache = StitchCache[String, Option[CrowdSearchAccounts]](
maxCacheSize = CrowdSearchAccountsSource.MaxCacheSize,
ttl = CrowdSearchAccountsSource.CacheTTL,
statsReceiver = statsReceiver.scope(identifier.name, "cache"),
underlyingCall = (k: String) => {
crowdSearchAccountsClientColumn.fetcher
.fetch(k)
.map { result => result.v }
}
)
/** returns a Seq of ''potential'' content */
override def apply(
target: CrowdSearchAccountsSource.Target
): Stitch[Seq[CandidateUser]] = {
if (!target.params(CandidateSourceEnabled)) {
return Stitch.value(Seq[CandidateUser]())
}
requestsStats.incr()
target.getCountryCode
.orElse(target.geohashAndCountryCode.flatMap(_.countryCode)).map { countryCode =>
Stitch
.collect(target
.params(AccountsFilteringAndRankingLogics).map(logic =>
cache.readThrough(countryCode.toUpperCase() + "-" + logic)))
.onSuccess(_ => {
successStats.incr()
})
.onFailure(t => {
debug("candidate source failed identifier = %s".format(identifier), t)
errorStats.incr()
})
.map(transformCrowdSearchAccountsToCandidateSource)
}.getOrElse {
noCountryCodeStats.incr()
Stitch.value(Seq[CandidateUser]())
}
}
private def transformCrowdSearchAccountsToCandidateSource(
crowdSearchAccounts: Seq[Option[CrowdSearchAccounts]]
): Seq[CandidateUser] = {
crowdSearchAccounts
.flatMap(opt =>
opt
.map(accounts =>
accounts.accounts.map(account =>
CandidateUser(
id = account.accountId,
score = Some(account.searchActivityScore),
).withCandidateSource(identifier)))
.getOrElse(Seq[CandidateUser]()))
}
}

View File

@ -1,4 +0,0 @@
# Crowd Search Candidate Source
Provides the most searched accounts within a specific country over the past 1 and 7 days.
* When we refer to "most searched accounts", we are referring to accounts that have been clicked on the most frequently by users after they see search results in both the typeahead and search results page.
* The results returned by the service have undergone health filters.

View File

@ -1,23 +0,0 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/google/inject:guice",
"3rdparty/jvm/com/google/inject/extensions:guice-assistedinject",
"3rdparty/jvm/net/codingwell:scala-guice",
"3rdparty/jvm/org/slf4j:slf4j-api",
"finatra/inject/inject-core/src/main/scala",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/base",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/geoduck",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/strato",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/constants",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/configapi/common",
"src/thrift/com/twitter/hermit/pop_geo:hermit-pop-geo-scala",
"strato/config/columns/onboarding/userrecs:userrecs-strato-client",
"strato/src/main/scala/com/twitter/strato/client",
"util/util-slf4j-api/src/main/scala",
],
)

View File

@ -1,74 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.geo
import com.google.inject.Singleton
import com.twitter.finagle.stats.Counter
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.follow_recommendations.common.models.HasGeohashAndCountryCode
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
import com.twitter.stitch.Stitch
import com.twitter.timelines.configapi.HasParams
import javax.inject.Inject
@Singleton
class BasePopGeohashSource @Inject() (
popGeoSource: CandidateSource[String, CandidateUser],
statsReceiver: StatsReceiver)
extends CandidateSource[
HasParams with HasClientContext with HasGeohashAndCountryCode,
CandidateUser
]
with BasePopGeohashSourceConfig {
val stats: StatsReceiver = statsReceiver
// counter to check if we found a geohash value in the request
val foundGeohashCounter: Counter = stats.counter("found_geohash_value")
// counter to check if we are missing a geohash value in the request
val missingGeohashCounter: Counter = stats.counter("missing_geohash_value")
/** @see [[CandidateSourceIdentifier]] */
override val identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
"BasePopGeohashSource")
override def apply(
target: HasParams with HasClientContext with HasGeohashAndCountryCode
): Stitch[Seq[CandidateUser]] = {
if (!candidateSourceEnabled(target)) {
return Stitch.Nil
}
target.geohashAndCountryCode
.flatMap(_.geohash).map { geohash =>
foundGeohashCounter.incr()
val keys = (minGeohashLength(target) to math.min(maxGeohashLength(target), geohash.length))
.map("geohash_" + geohash.take(_)).reverse
if (returnResultFromAllPrecision(target)) {
Stitch
.collect(keys.map(popGeoSource.apply)).map(
_.flatten.map(_.withCandidateSource(identifier))
)
} else {
Stitch
.collect(keys.map(popGeoSource.apply)).map(
_.find(_.nonEmpty)
.getOrElse(Nil)
.take(maxResults(target)).map(_.withCandidateSource(identifier))
)
}
}.getOrElse {
missingGeohashCounter.incr()
Stitch.Nil
}
}
}
trait BasePopGeohashSourceConfig {
type Target = HasParams with HasClientContext
def maxResults(target: Target): Int = 200
def minGeohashLength(target: Target): Int = 2
def maxGeohashLength(target: Target): Int = 4
def returnResultFromAllPrecision(target: Target): Boolean = false
def candidateSourceEnabled(target: Target): Boolean = false
}

View File

@ -1,33 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.geo
import com.google.inject.Singleton
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
import com.twitter.stitch.Stitch
import com.twitter.timelines.configapi.HasParams
import javax.inject.Inject
@Singleton
class PopCountryBackFillSource @Inject() (popGeoSource: PopGeoSource)
extends CandidateSource[HasClientContext with HasParams, CandidateUser] {
override val identifier: CandidateSourceIdentifier = PopCountryBackFillSource.Identifier
override def apply(target: HasClientContext with HasParams): Stitch[Seq[CandidateUser]] = {
target.getOptionalUserId
.map(_ =>
popGeoSource(PopCountryBackFillSource.DefaultKey)
.map(_.take(PopCountryBackFillSource.MaxResults).map(_.withCandidateSource(identifier))))
.getOrElse(Stitch.Nil)
}
}
object PopCountryBackFillSource {
val Identifier: CandidateSourceIdentifier =
CandidateSourceIdentifier(Algorithm.PopCountryBackFill.toString)
val MaxResults = 40
val DefaultKey = "country_US"
}

View File

@ -1,63 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.geo
import com.google.inject.Singleton
import com.twitter.core_workflows.user_model.thriftscala.UserState
import com.twitter.finagle.stats.Counter
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.follow_recommendations.common.models.HasGeohashAndCountryCode
import com.twitter.follow_recommendations.common.models.HasUserState
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
import com.twitter.stitch.Stitch
import com.twitter.timelines.configapi.HasParams
import javax.inject.Inject
@Singleton
class PopCountrySource @Inject() (
popGeoSource: PopGeoSource,
statsReceiver: StatsReceiver)
extends CandidateSource[
HasClientContext with HasParams with HasUserState with HasGeohashAndCountryCode,
CandidateUser
] {
override val identifier: CandidateSourceIdentifier = PopCountrySource.Identifier
val stats: StatsReceiver = statsReceiver.scope("PopCountrySource")
// counter to check if we found a country code value in the request
val foundCountryCodeCounter: Counter = stats.counter("found_country_code_value")
// counter to check if we are missing a country code value in the request
val missingCountryCodeCounter: Counter = stats.counter("missing_country_code_value")
override def apply(
target: HasClientContext with HasParams with HasUserState with HasGeohashAndCountryCode
): Stitch[Seq[CandidateUser]] = {
target.geohashAndCountryCode
.flatMap(_.countryCode).map { countryCode =>
foundCountryCodeCounter.incr()
if (target.userState.exists(PopCountrySource.BlacklistedTargetUserStates.contains)) {
Stitch.Nil
} else {
popGeoSource("country_" + countryCode)
.map(_.take(PopCountrySource.MaxResults).map(_.withCandidateSource(identifier)))
}
}.getOrElse {
missingCountryCodeCounter.incr()
Stitch.Nil
}
}
}
object PopCountrySource {
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
Algorithm.PopCountry.toString)
val MaxResults = 40
val BlacklistedTargetUserStates: Set[UserState] = Set(
UserState.HeavyTweeter,
UserState.HeavyNonTweeter,
UserState.MediumTweeter,
UserState.MediumNonTweeter)
}

View File

@ -1,99 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.geo
import com.google.inject.Singleton
import com.twitter.escherbird.util.stitchcache.StitchCache
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.follow_recommendations.common.models.AccountProof
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.follow_recommendations.common.models.PopularInGeoProof
import com.twitter.follow_recommendations.common.models.Reason
import com.twitter.hermit.model.Algorithm
import com.twitter.hermit.pop_geo.thriftscala.PopUsersInPlace
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.stitch.Stitch
import com.twitter.strato.generated.client.onboarding.userrecs.UniquePopQualityFollowUsersInPlaceClientColumn
import com.twitter.util.Duration
import javax.inject.Inject
@Singleton
class PopGeohashQualityFollowSource @Inject() (
popGeoSource: PopGeoQualityFollowSource,
statsReceiver: StatsReceiver)
extends BasePopGeohashSource(
popGeoSource = popGeoSource,
statsReceiver = statsReceiver.scope("PopGeohashQualityFollowSource"),
) {
override val identifier: CandidateSourceIdentifier = PopGeohashQualityFollowSource.Identifier
override def maxResults(target: Target): Int = {
target.params(PopGeoQualityFollowSourceParams.PopGeoSourceMaxResultsPerPrecision)
}
override def minGeohashLength(target: Target): Int = {
target.params(PopGeoQualityFollowSourceParams.PopGeoSourceGeoHashMinPrecision)
}
override def maxGeohashLength(target: Target): Int = {
target.params(PopGeoQualityFollowSourceParams.PopGeoSourceGeoHashMaxPrecision)
}
override def returnResultFromAllPrecision(target: Target): Boolean = {
target.params(PopGeoQualityFollowSourceParams.PopGeoSourceReturnFromAllPrecisions)
}
override def candidateSourceEnabled(target: Target): Boolean = {
target.params(PopGeoQualityFollowSourceParams.CandidateSourceEnabled)
}
}
object PopGeohashQualityFollowSource {
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
Algorithm.PopGeohashQualityFollow.toString)
}
object PopGeoQualityFollowSource {
val MaxCacheSize = 20000
val CacheTTL: Duration = Duration.fromHours(24)
val MaxResults = 200
}
@Singleton
class PopGeoQualityFollowSource @Inject() (
popGeoQualityFollowClientColumn: UniquePopQualityFollowUsersInPlaceClientColumn,
statsReceiver: StatsReceiver,
) extends CandidateSource[String, CandidateUser] {
/** @see [[CandidateSourceIdentifier]] */
override val identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
"PopGeoQualityFollowSource")
private val cache = StitchCache[String, Option[PopUsersInPlace]](
maxCacheSize = PopGeoQualityFollowSource.MaxCacheSize,
ttl = PopGeoQualityFollowSource.CacheTTL,
statsReceiver = statsReceiver.scope(identifier.name, "cache"),
underlyingCall = (k: String) => {
popGeoQualityFollowClientColumn.fetcher
.fetch(k)
.map { result => result.v }
}
)
override def apply(target: String): Stitch[Seq[CandidateUser]] = {
val result: Stitch[Option[PopUsersInPlace]] = cache.readThrough(target)
result.map { pu =>
pu.map { candidates =>
candidates.popUsers.sortBy(-_.score).take(PopGeoQualityFollowSource.MaxResults).map {
candidate =>
CandidateUser(
id = candidate.userId,
score = Some(candidate.score),
reason = Some(
Reason(
Some(
AccountProof(
popularInGeoProof = Some(PopularInGeoProof(location = candidates.place))
)
)
)
)
)
}
}.getOrElse(Nil)
}
}
}

View File

@ -1,24 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.geo
import com.twitter.follow_recommendations.configapi.common.FeatureSwitchConfig
import com.twitter.timelines.configapi.FSBoundedParam
import com.twitter.timelines.configapi.FSName
import com.twitter.timelines.configapi.FSParam
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class PopGeoQualityFollowSourceFSConfig @Inject() () extends FeatureSwitchConfig {
override val intFSParams: Seq[FSBoundedParam[Int] with FSName] = Seq(
PopGeoQualityFollowSourceParams.PopGeoSourceGeoHashMaxPrecision,
PopGeoQualityFollowSourceParams.PopGeoSourceGeoHashMinPrecision,
PopGeoQualityFollowSourceParams.PopGeoSourceMaxResultsPerPrecision
)
override val doubleFSParams: Seq[FSBoundedParam[Double] with FSName] = Seq(
PopGeoQualityFollowSourceParams.CandidateSourceWeight
)
override val booleanFSParams: Seq[FSParam[Boolean] with FSName] = Seq(
PopGeoQualityFollowSourceParams.CandidateSourceEnabled,
PopGeoQualityFollowSourceParams.PopGeoSourceReturnFromAllPrecisions
)
}

View File

@ -1,42 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.geo
import com.twitter.timelines.configapi.FSBoundedParam
import com.twitter.timelines.configapi.FSParam
object PopGeoQualityFollowSourceParams {
case object CandidateSourceEnabled
extends FSParam[Boolean]("pop_geo_quality_follow_source_enabled", false)
case object PopGeoSourceGeoHashMinPrecision
extends FSBoundedParam[Int](
"pop_geo_quality_follow_source_geo_hash_min_precision",
default = 2,
min = 0,
max = 10)
case object PopGeoSourceGeoHashMaxPrecision
extends FSBoundedParam[Int](
"pop_geo_quality_follow_source_geo_hash_max_precision",
default = 3,
min = 0,
max = 10)
case object PopGeoSourceReturnFromAllPrecisions
extends FSParam[Boolean](
"pop_geo_quality_follow_source_return_from_all_precisions",
default = false)
case object PopGeoSourceMaxResultsPerPrecision
extends FSBoundedParam[Int](
"pop_geo_quality_follow_source_max_results_per_precision",
default = 200,
min = 0,
max = 1000)
case object CandidateSourceWeight
extends FSBoundedParam[Double](
"pop_geo_quality_follow_source_weight",
default = 200,
min = 0.001,
max = 2000)
}

View File

@ -1,69 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.geo
import com.google.inject.Singleton
import com.google.inject.name.Named
import com.twitter.conversions.DurationOps._
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.follow_recommendations.common.candidate_sources.base.CachedCandidateSource
import com.twitter.follow_recommendations.common.candidate_sources.base.StratoFetcherWithUnitViewSource
import com.twitter.follow_recommendations.common.constants.GuiceNamedConstants
import com.twitter.follow_recommendations.common.models.AccountProof
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.follow_recommendations.common.models.PopularInGeoProof
import com.twitter.follow_recommendations.common.models.Reason
import com.twitter.hermit.pop_geo.thriftscala.PopUsersInPlace
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.strato.client.Fetcher
import com.twitter.util.Duration
import javax.inject.Inject
@Singleton
class BasePopGeoSource @Inject() (
@Named(GuiceNamedConstants.POP_USERS_IN_PLACE_FETCHER) fetcher: Fetcher[
String,
Unit,
PopUsersInPlace
]) extends StratoFetcherWithUnitViewSource[String, PopUsersInPlace](
fetcher,
BasePopGeoSource.Identifier) {
override def map(target: String, candidates: PopUsersInPlace): Seq[CandidateUser] =
BasePopGeoSource.map(target, candidates)
}
object BasePopGeoSource {
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier("BasePopGeoSource")
val MaxResults = 200
def map(target: String, candidates: PopUsersInPlace): Seq[CandidateUser] =
candidates.popUsers.sortBy(-_.score).take(BasePopGeoSource.MaxResults).view.map { candidate =>
CandidateUser(
id = candidate.userId,
score = Some(candidate.score),
reason = Some(
Reason(
Some(
AccountProof(
popularInGeoProof = Some(PopularInGeoProof(location = candidates.place))
)
)
)
)
)
}
}
@Singleton
class PopGeoSource @Inject() (basePopGeoSource: BasePopGeoSource, statsReceiver: StatsReceiver)
extends CachedCandidateSource[String, CandidateUser](
basePopGeoSource,
PopGeoSource.MaxCacheSize,
PopGeoSource.CacheTTL,
statsReceiver,
PopGeoSource.Identifier)
object PopGeoSource {
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier("PopGeoSource")
val MaxCacheSize = 20000
val CacheTTL: Duration = 1.hours
}

View File

@ -1,20 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.geo
import com.twitter.follow_recommendations.configapi.common.FeatureSwitchConfig
import com.twitter.timelines.configapi.FSBoundedParam
import com.twitter.timelines.configapi.FSName
import com.twitter.timelines.configapi.FSParam
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class PopGeoSourceFSConfig @Inject() () extends FeatureSwitchConfig {
override val intFSParams: Seq[FSBoundedParam[Int] with FSName] = Seq(
PopGeoSourceParams.PopGeoSourceGeoHashMaxPrecision,
PopGeoSourceParams.PopGeoSourceMaxResultsPerPrecision,
PopGeoSourceParams.PopGeoSourceGeoHashMinPrecision,
)
override val booleanFSParams: Seq[FSParam[Boolean] with FSName] = Seq(
PopGeoSourceParams.PopGeoSourceReturnFromAllPrecisions,
)
}

View File

@ -1,30 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.geo
import com.twitter.timelines.configapi.FSBoundedParam
import com.twitter.timelines.configapi.FSParam
object PopGeoSourceParams {
case object PopGeoSourceGeoHashMinPrecision
extends FSBoundedParam[Int](
"pop_geo_source_geo_hash_min_precision",
default = 2,
min = 0,
max = 10)
case object PopGeoSourceGeoHashMaxPrecision
extends FSBoundedParam[Int](
"pop_geo_source_geo_hash_max_precision",
default = 4,
min = 0,
max = 10)
case object PopGeoSourceReturnFromAllPrecisions
extends FSParam[Boolean]("pop_geo_source_return_from_all_precisions", default = false)
case object PopGeoSourceMaxResultsPerPrecision
extends FSBoundedParam[Int](
"pop_geo_source_max_results_per_precision",
default = 200,
min = 0,
max = 1000)
}

View File

@ -1,36 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.geo
import com.google.inject.Singleton
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import javax.inject.Inject
@Singleton
class PopGeohashSource @Inject() (
popGeoSource: PopGeoSource,
statsReceiver: StatsReceiver)
extends BasePopGeohashSource(
popGeoSource = popGeoSource,
statsReceiver = statsReceiver.scope("PopGeohashSource"),
) {
override def candidateSourceEnabled(target: Target): Boolean = true
override val identifier: CandidateSourceIdentifier = PopGeohashSource.Identifier
override def minGeohashLength(target: Target): Int = {
target.params(PopGeoSourceParams.PopGeoSourceGeoHashMinPrecision)
}
override def maxResults(target: Target): Int = {
target.params(PopGeoSourceParams.PopGeoSourceMaxResultsPerPrecision)
}
override def maxGeohashLength(target: Target): Int = {
target.params(PopGeoSourceParams.PopGeoSourceGeoHashMaxPrecision)
}
override def returnResultFromAllPrecision(target: Target): Boolean = {
target.params(PopGeoSourceParams.PopGeoSourceReturnFromAllPrecisions)
}
}
object PopGeohashSource {
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
Algorithm.PopGeohash.toString)
}

View File

@ -1,4 +0,0 @@
# Pop Geo Candidate Source
Provides the most followed / quality followed accounts in a specific country and a geolocation within past 2 weeks.
* A "quality follow" refers to any follow that leads to visible engagement, such as favorites, mentions, retweets, direct messages, replies, and quote tweets. The engagement must be allowed in either direction, and must occur on the day of the follow or within one subsequent day. Additionally, there must be no unfollowing, blocking, muting, or reporting of the account in the same time period.
* The minimum geolocation precision used is ±20 km (12 mi), and precise user geolocation is not utilized.

View File

@ -1,23 +0,0 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/google/inject:guice",
"3rdparty/jvm/com/google/inject/extensions:guice-assistedinject",
"3rdparty/jvm/net/codingwell:scala-guice",
"3rdparty/jvm/org/slf4j:slf4j-api",
"finatra/inject/inject-core/src/main/scala",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/base",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/strato",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/constants",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/configapi/common",
"src/thrift/com/twitter/hermit/candidate:hermit-candidate-scala",
"strato/config/columns/onboarding:onboarding-strato-client",
"strato/config/columns/onboarding/userrecs:userrecs-strato-client",
"strato/src/main/scala/com/twitter/strato/client",
"util/util-slf4j-api/src/main/scala",
],
)

View File

@ -1,84 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.ppmi_locale_follow
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.follow_recommendations.common.candidate_sources.ppmi_locale_follow.PPMILocaleFollowSourceParams.CandidateSourceEnabled
import com.twitter.follow_recommendations.common.candidate_sources.ppmi_locale_follow.PPMILocaleFollowSourceParams.LocaleToExcludeFromRecommendation
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
import com.twitter.stitch.Stitch
import javax.inject.Inject
import javax.inject.Singleton
import com.twitter.strato.generated.client.onboarding.UserPreferredLanguagesOnUserClientColumn
import com.twitter.strato.generated.client.onboarding.userrecs.LocaleFollowPpmiClientColumn
import com.twitter.timelines.configapi.HasParams
/**
* Fetches candidates based on the Positive Pointwise Mutual Information (PPMI) statistic
* for a set of locales
* */
@Singleton
class PPMILocaleFollowSource @Inject() (
userPreferredLanguagesOnUserClientColumn: UserPreferredLanguagesOnUserClientColumn,
localeFollowPpmiClientColumn: LocaleFollowPpmiClientColumn,
statsReceiver: StatsReceiver)
extends CandidateSource[HasClientContext with HasParams, CandidateUser] {
override val identifier: CandidateSourceIdentifier = PPMILocaleFollowSource.Identifier
private val stats = statsReceiver.scope("PPMILocaleFollowSource")
override def apply(target: HasClientContext with HasParams): Stitch[Seq[CandidateUser]] = {
(for {
countryCode <- target.getCountryCode
userId <- target.getOptionalUserId
} yield {
getPreferredLocales(userId, countryCode.toLowerCase())
.flatMap { locale =>
stats.addGauge("allLocale") {
locale.length
}
val filteredLocale =
locale.filter(!target.params(LocaleToExcludeFromRecommendation).contains(_))
stats.addGauge("postFilterLocale") {
filteredLocale.length
}
if (target.params(CandidateSourceEnabled)) {
getPPMILocaleFollowCandidates(filteredLocale)
} else Stitch(Seq.empty)
}
.map(_.sortBy(_.score)(Ordering[Option[Double]].reverse)
.take(PPMILocaleFollowSource.DefaultMaxCandidatesToReturn))
}).getOrElse(Stitch.Nil)
}
private def getPPMILocaleFollowCandidates(
locales: Seq[String]
): Stitch[Seq[CandidateUser]] = {
Stitch
.traverse(locales) { locale =>
// Get PPMI candidates for each locale
localeFollowPpmiClientColumn.fetcher
.fetch(locale)
.map(_.v
.map(_.candidates).getOrElse(Nil).map { candidate =>
CandidateUser(id = candidate.userId, score = Some(candidate.score))
}.map(_.withCandidateSource(identifier)))
}.map(_.flatten)
}
private def getPreferredLocales(userId: Long, countryCode: String): Stitch[Seq[String]] = {
userPreferredLanguagesOnUserClientColumn.fetcher
.fetch(userId)
.map(_.v.map(_.languages).getOrElse(Nil).map { lang =>
s"$countryCode-$lang".toLowerCase
})
}
}
object PPMILocaleFollowSource {
val Identifier = CandidateSourceIdentifier(Algorithm.PPMILocaleFollow.toString)
val DefaultMaxCandidatesToReturn = 100
}

View File

@ -1,24 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.ppmi_locale_follow
import com.twitter.follow_recommendations.configapi.common.FeatureSwitchConfig
import com.twitter.timelines.configapi.FSBoundedParam
import com.twitter.timelines.configapi.FSName
import com.twitter.timelines.configapi.Param
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class PPMILocaleFollowSourceFSConfig @Inject() () extends FeatureSwitchConfig {
override val booleanFSParams: Seq[Param[Boolean] with FSName] = Seq(
PPMILocaleFollowSourceParams.CandidateSourceEnabled,
)
override val stringSeqFSParams: Seq[Param[Seq[String]] with FSName] = Seq(
PPMILocaleFollowSourceParams.LocaleToExcludeFromRecommendation,
)
override val doubleFSParams: Seq[FSBoundedParam[Double]] = Seq(
PPMILocaleFollowSourceParams.CandidateSourceWeight,
)
}

View File

@ -1,22 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.ppmi_locale_follow
import com.twitter.timelines.configapi.FSBoundedParam
import com.twitter.timelines.configapi.FSParam
class PPMILocaleFollowSourceParams {}
object PPMILocaleFollowSourceParams {
case object LocaleToExcludeFromRecommendation
extends FSParam[Seq[String]](
"ppmilocale_follow_source_locales_to_exclude_from_recommendation",
default = Seq.empty)
case object CandidateSourceEnabled
extends FSParam[Boolean]("ppmilocale_follow_source_enabled", true)
case object CandidateSourceWeight
extends FSBoundedParam[Double](
"ppmilocale_follow_source_candidate_source_weight",
default = 1,
min = 0.001,
max = 2000)
}

View File

@ -1,6 +0,0 @@
# PPMI Locale Follow Candidate Source
Provides accounts based on PPMI ([Positive Pointwise Mutual Information](https://en.wikipedia.org/wiki/Pointwise_mutual_information#Positive_PMI)) using follow actions as a feature for a specific local (language + country) within a week. In simpler terms, it provides a list of the most followed accounts for a given country and language input, based on the PPMI algorithm.
PPMI is a statistical measure of the association between two events. In this case, it measures the association between the follow actions and the accounts being followed.
In summary, the service utilizes PPMI and follow actions to provide a list of the most followed accounts for a specific country and language input.

View File

@ -1,11 +0,0 @@
scala_library(
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/adserver",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/socialgraph",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
"src/thrift/com/twitter/socialgraph:thrift-scala",
],
)

View File

@ -1,111 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.promoted_accounts
import com.twitter.adserver.thriftscala.AdServerException
import com.twitter.adserver.{thriftscala => adthrift}
import com.twitter.finagle.TimeoutException
import com.twitter.finagle.stats.Counter
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.follow_recommendations.common.clients.adserver.AdRequest
import com.twitter.follow_recommendations.common.clients.adserver.AdserverClient
import com.twitter.follow_recommendations.common.clients.socialgraph.SocialGraphClient
import com.twitter.follow_recommendations.common.models.FollowProof
import com.twitter.hermit.model.Algorithm
import com.twitter.inject.Logging
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.stitch.Stitch
import javax.inject.Inject
import javax.inject.Singleton
case class PromotedCandidateUser(
id: Long,
position: Int,
adImpression: adthrift.AdImpression,
followProof: FollowProof,
primaryCandidateSource: Option[CandidateSourceIdentifier])
@Singleton
class PromotedAccountsCandidateSource @Inject() (
adserverClient: AdserverClient,
sgsClient: SocialGraphClient,
statsReceiver: StatsReceiver)
extends CandidateSource[AdRequest, PromotedCandidateUser]
with Logging {
override val identifier: CandidateSourceIdentifier =
PromotedAccountsCandidateSource.Identifier
val stats: StatsReceiver = statsReceiver.scope(identifier.name)
val failureStat: StatsReceiver = stats.scope("failures")
val adServerExceptionsCounter: Counter = failureStat.counter("AdServerException")
val timeoutCounter: Counter = failureStat.counter("TimeoutException")
def apply(request: AdRequest): Stitch[Seq[PromotedCandidateUser]] = {
adserverClient
.getAdImpressions(request)
.rescue {
case e: TimeoutException =>
timeoutCounter.incr()
logger.warn("Timeout on Adserver", e)
Stitch.Nil
case e: AdServerException =>
adServerExceptionsCounter.incr()
logger.warn("Failed to fetch ads", e)
Stitch.Nil
}
.flatMap { adImpressions: Seq[adthrift.AdImpression] =>
profileNumResults(adImpressions.size, "results_from_ad_server")
val idToImpMap = (for {
imp <- adImpressions
promotedAccountId <- imp.promotedAccountId
} yield promotedAccountId -> imp).toMap
request.clientContext.userId
.map { userId =>
sgsClient
.getIntersections(
userId,
adImpressions.filter(shouldShowSocialContext).flatMap(_.promotedAccountId),
PromotedAccountsCandidateSource.NumIntersections
).map { promotedAccountWithIntersections =>
idToImpMap.map {
case (promotedAccountId, imp) =>
PromotedCandidateUser(
promotedAccountId,
imp.insertionPosition
.map(_.toInt).getOrElse(
getInsertionPositionDefaultValue(request.isTest.getOrElse(false))
),
imp,
promotedAccountWithIntersections
.getOrElse(promotedAccountId, FollowProof(Nil, 0)),
Some(identifier)
)
}.toSeq
}.onSuccess(result => profileNumResults(result.size, "final_results"))
}.getOrElse(Stitch.Nil)
}
}
private def shouldShowSocialContext(imp: adthrift.AdImpression): Boolean =
imp.experimentValues.exists { expValues =>
expValues.get("display.display_style").contains("show_social_context")
}
private def getInsertionPositionDefaultValue(isTest: Boolean): Int = {
if (isTest) 0 else -1
}
private def profileNumResults(resultsSize: Int, statName: String): Unit = {
if (resultsSize <= 5) {
stats.scope(statName).counter(resultsSize.toString).incr()
} else {
stats.scope(statName).counter("more_than_5").incr()
}
}
}
object PromotedAccountsCandidateSource {
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
Algorithm.PromotedAccount.toString)
val NumIntersections = 3
}

View File

@ -1,2 +0,0 @@
# Promoted Accounts Candidate Source
Promoted accounts returned from Ads server.

View File

@ -1,24 +0,0 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/google/inject:guice",
"3rdparty/jvm/com/google/inject/extensions:guice-assistedinject",
"3rdparty/jvm/net/codingwell:scala-guice",
"3rdparty/jvm/org/slf4j:slf4j-api",
"finatra/inject/inject-core/src/main/scala",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/base",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/real_time_real_graph",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/constants",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/stores",
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/configapi/common",
"strato/config/columns/onboarding/realGraph:realGraph-strato-client",
"strato/config/columns/onboarding/userrecs:userrecs-strato-client",
"strato/config/columns/recommendations/twistly:twistly-strato-client",
"strato/src/main/scala/com/twitter/strato/client",
"util/util-slf4j-api/src/main/scala",
],
)

View File

@ -1,6 +0,0 @@
# RealGraph Candidate Source
Provides out-of-network RealGraph candidates for a given user. RealGraph is a user-user graph dataset that aims to measure the strength of the relationship between two users.
RealGraph comprises two components: a real-time pipeline that tracks various counts and relationships between user-user edges (such as the number of favorites, replies, retweets, clicks, whether followed, muted, or blocked), and an offline pipeline of a larger set of such user-user edge counts and relationships. Currently, the top k in-network scores have been exported for use by various teams.
The RealGraph dataset is used to predict user interactions at Twitter, and is based on the paper "[Realgraph: User interaction prediction at Twitter](http://www.ueo-workshop.com/wp-content/uploads/2014/04/sig-alternate.pdf)" by the UEO workshop at KDD'14.

View File

@ -1,27 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.real_graph
import com.twitter.follow_recommendations.configapi.common.FeatureSwitchConfig
import com.twitter.timelines.configapi.FSBoundedParam
import com.twitter.timelines.configapi.FSName
import com.twitter.timelines.configapi.Param
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class RealGraphOonFSConfig @Inject() () extends FeatureSwitchConfig {
override val booleanFSParams: Seq[Param[Boolean] with FSName] =
Seq(
RealGraphOonParams.IncludeRealGraphOonCandidates,
RealGraphOonParams.TryToReadRealGraphOonCandidates,
RealGraphOonParams.UseV2
)
override val doubleFSParams: Seq[FSBoundedParam[Double]] =
Seq(
RealGraphOonParams.ScoreThreshold
)
override val intFSParams: Seq[FSBoundedParam[Int]] =
Seq(
RealGraphOonParams.RealGraphOonResultCountThreshold,
RealGraphOonParams.MaxResults,
)
}

View File

@ -1,47 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.real_graph
import com.twitter.timelines.configapi.FSBoundedParam
import com.twitter.timelines.configapi.FSParam
object RealGraphOonParams {
case object IncludeRealGraphOonCandidates
extends FSParam[Boolean](
"real_graph_oon_include_candidates",
false
)
case object TryToReadRealGraphOonCandidates
extends FSParam[Boolean](
"real_graph_oon_try_to_read_candidates",
false
)
case object RealGraphOonResultCountThreshold
extends FSBoundedParam[Int](
"real_graph_oon_result_count_threshold",
default = 1,
min = 0,
max = Integer.MAX_VALUE
)
case object UseV2
extends FSParam[Boolean](
"real_graph_oon_use_v2",
false
)
case object ScoreThreshold
extends FSBoundedParam[Double](
"real_graph_oon_score_threshold",
default = 0.26,
min = 0,
max = 1.0
)
case object MaxResults
extends FSBoundedParam[Int](
"real_graph_oon_max_results",
default = 200,
min = 0,
max = 1000
)
}

View File

@ -1,58 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.real_graph
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
import com.twitter.stitch.Stitch
import com.twitter.strato.generated.client.onboarding.realGraph.UserRealgraphOonV2ClientColumn
import com.twitter.timelines.configapi.HasParams
import com.twitter.wtf.candidate.thriftscala.CandidateSeq
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class RealGraphOonV2Source @Inject() (
realGraphClientColumn: UserRealgraphOonV2ClientColumn)
extends CandidateSource[HasParams with HasClientContext, CandidateUser] {
override val identifier: CandidateSourceIdentifier =
RealGraphOonV2Source.Identifier
override def apply(request: HasParams with HasClientContext): Stitch[Seq[CandidateUser]] = {
request.getOptionalUserId
.map { userId =>
realGraphClientColumn.fetcher
.fetch(userId)
.map { result =>
result.v
.map { candidates => parseStratoResults(request, candidates) }
.getOrElse(Nil)
// returned candidates are sorted by score in descending order
.take(request.params(RealGraphOonParams.MaxResults))
.map(_.withCandidateSource(identifier))
}
}.getOrElse(Stitch(Seq.empty))
}
private def parseStratoResults(
request: HasParams with HasClientContext,
candidateSeqThrift: CandidateSeq
): Seq[CandidateUser] = {
candidateSeqThrift.candidates.collect {
case candidate if candidate.score >= request.params(RealGraphOonParams.ScoreThreshold) =>
CandidateUser(
candidate.userId,
Some(candidate.score)
)
}
}
}
object RealGraphOonV2Source {
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
Algorithm.RealGraphOonV2.toString
)
}

View File

@ -1,40 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.real_graph
import com.twitter.follow_recommendations.common.clients.real_time_real_graph.RealTimeRealGraphClient
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
import com.twitter.stitch.Stitch
import com.twitter.timelines.configapi.HasParams
import javax.inject.Inject
import javax.inject.Singleton
/**
* This source gets the already followed edges from the real graph column as a candidate source.
*/
@Singleton
class RealGraphSource @Inject() (
realGraph: RealTimeRealGraphClient)
extends CandidateSource[HasParams with HasClientContext, CandidateUser] {
override val identifier: CandidateSourceIdentifier = RealGraphSource.Identifier
override def apply(request: HasParams with HasClientContext): Stitch[Seq[CandidateUser]] = {
request.getOptionalUserId
.map { userId =>
realGraph.getRealGraphWeights(userId).map { scoreMap =>
scoreMap.map {
case (candidateId, realGraphScore) =>
CandidateUser(id = candidateId, score = Some(realGraphScore))
.withCandidateSource(identifier)
}.toSeq
}
}.getOrElse(Stitch.Nil)
}
}
object RealGraphSource {
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
Algorithm.RealGraphFollowed.toString)
}

View File

@ -1,29 +0,0 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/google/inject:guice",
"3rdparty/jvm/com/google/inject/extensions:guice-assistedinject",
"3rdparty/jvm/net/codingwell:scala-guice",
"3rdparty/jvm/org/slf4j:slf4j-api",
"discovery-ds/src/main/thrift/com/twitter/dds/jobs/repeated_profile_visits:profile_visit-scala",
"finatra/inject/inject-core/src/main/scala",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/base",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/sims",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/sims_expansion",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/real_time_real_graph",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/strato",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/constants",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/configapi/common",
"hermit/hermit-core/src/main/scala/com/twitter/hermit/model",
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/candidate_source",
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/model/common/identifier",
"src/thrift/com/twitter/experiments/general_metrics:general_metrics-scala",
"strato/config/columns/rux:rux-strato-client",
"strato/src/main/scala/com/twitter/strato/client",
"util/util-slf4j-api/src/main/scala",
],
)

View File

@ -1,4 +0,0 @@
# Recent Engagement Candidate Source
Provides recently engaged accounts for a given user:
* Explicit engagements: like, retweet, reply
* Implicit engagements: profile visit

View File

@ -1,38 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.recent_engagement
import com.twitter.follow_recommendations.common.clients.real_time_real_graph.RealTimeRealGraphClient
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.stitch.Stitch
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class RecentEngagementDirectFollowSource @Inject() (
realTimeRealGraphClient: RealTimeRealGraphClient)
extends CandidateSource[Long, CandidateUser] {
val identifier: CandidateSourceIdentifier =
RecentEngagementDirectFollowSource.Identifier
/**
* Generate a list of candidates for the target using RealtimeGraphClient
* and RecentEngagementStore.
*/
override def apply(targetUserId: Long): Stitch[Seq[CandidateUser]] = {
realTimeRealGraphClient
.getUsersRecentlyEngagedWith(
userId = targetUserId,
engagementScoreMap = RealTimeRealGraphClient.EngagementScoreMap,
includeDirectFollowCandidates = true,
includeNonDirectFollowCandidates = false
)
.map(_.map(_.withCandidateSource(identifier)).sortBy(-_.score.getOrElse(0.0)))
}
}
object RecentEngagementDirectFollowSource {
val Identifier = CandidateSourceIdentifier(Algorithm.RecentEngagementDirectFollow.toString)
}

View File

@ -1,38 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.recent_engagement
import com.twitter.follow_recommendations.common.clients.real_time_real_graph.RealTimeRealGraphClient
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.stitch.Stitch
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class RecentEngagementNonDirectFollowSource @Inject() (
realTimeRealGraphClient: RealTimeRealGraphClient)
extends CandidateSource[Long, CandidateUser] {
val identifier: CandidateSourceIdentifier =
RecentEngagementNonDirectFollowSource.Identifier
/**
* Generate a list of candidates for the target using RealtimeGraphClient
* and RecentEngagementStore.
*/
override def apply(targetUserId: Long): Stitch[Seq[CandidateUser]] = {
realTimeRealGraphClient
.getUsersRecentlyEngagedWith(
userId = targetUserId,
engagementScoreMap = RealTimeRealGraphClient.EngagementScoreMap,
includeDirectFollowCandidates = false,
includeNonDirectFollowCandidates = true
)
.map(_.map(_.withCandidateSource(identifier)).sortBy(-_.score.getOrElse(0.0)))
}
}
object RecentEngagementNonDirectFollowSource {
val Identifier = CandidateSourceIdentifier(Algorithm.RecentEngagementNonDirectFollow.toString)
}

View File

@ -1,22 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.recent_engagement
import com.twitter.follow_recommendations.configapi.common.FeatureSwitchConfig
import com.twitter.timelines.configapi.FSBoundedParam
import com.twitter.timelines.configapi.FSName
import com.twitter.timelines.configapi.Param
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class RepeatedProfileVisitsFSConfig @Inject() () extends FeatureSwitchConfig {
override val booleanFSParams: Seq[Param[Boolean] with FSName] =
Seq(
RepeatedProfileVisitsParams.IncludeCandidates,
RepeatedProfileVisitsParams.UseOnlineDataset,
)
override val intFSParams: Seq[FSBoundedParam[Int]] =
Seq(
RepeatedProfileVisitsParams.RecommendationThreshold,
RepeatedProfileVisitsParams.BucketingThreshold,
)
}

View File

@ -1,37 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.recent_engagement
import com.twitter.timelines.configapi.FSBoundedParam
import com.twitter.timelines.configapi.FSParam
object RepeatedProfileVisitsParams {
// If RepeatedProfileVisitsSource is run and there are recommended candidates for the target user, whether or not
// to actually include such candidates in our output recommendations. This FS will be used to control bucketing of
// users into control vs treatment buckets.
case object IncludeCandidates
extends FSParam[Boolean](name = "repeated_profile_visits_include_candidates", default = false)
// The threshold at or above which we will consider a profile to have been visited "frequently enough" to recommend
// the profile to the target user.
case object RecommendationThreshold
extends FSBoundedParam[Int](
name = "repeated_profile_visits_recommendation_threshold",
default = 3,
min = 0,
max = Integer.MAX_VALUE)
// The threshold at or above which we will consider a profile to have been visited "frequently enough" to recommend
// the profile to the target user.
case object BucketingThreshold
extends FSBoundedParam[Int](
name = "repeated_profile_visits_bucketing_threshold",
default = 3,
min = 0,
max = Integer.MAX_VALUE)
// Whether or not to use the online dataset (which has repeated profile visits information updated to within minutes)
// instead of the offline dataset (updated via offline jobs, which can have delays of hours to days).
case object UseOnlineDataset
extends FSParam[Boolean](name = "repeated_profile_visits_use_online_dataset", default = true)
}

View File

@ -1,157 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.recent_engagement
import com.google.inject.Inject
import com.google.inject.Singleton
import com.twitter.dds.jobs.repeated_profile_visits.thriftscala.ProfileVisitorInfo
import com.twitter.experiments.general_metrics.thriftscala.IdType
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.follow_recommendations.common.clients.real_time_real_graph.Engagement
import com.twitter.follow_recommendations.common.clients.real_time_real_graph.RealTimeRealGraphClient
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.timelines.configapi.HasParams
import com.twitter.timelines.configapi.Params
import com.twitter.hermit.model.Algorithm
import com.twitter.inject.Logging
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
import com.twitter.stitch.Stitch
import com.twitter.strato.generated.client.rux.RepeatedProfileVisitsAggregateClientColumn
@Singleton
class RepeatedProfileVisitsSource @Inject() (
repeatedProfileVisitsAggregateClientColumn: RepeatedProfileVisitsAggregateClientColumn,
realTimeRealGraphClient: RealTimeRealGraphClient,
statsReceiver: StatsReceiver)
extends CandidateSource[HasParams with HasClientContext, CandidateUser]
with Logging {
val identifier: CandidateSourceIdentifier =
RepeatedProfileVisitsSource.Identifier
val sourceStatsReceiver = statsReceiver.scope("repeated_profile_visits_source")
val offlineFetchErrorCounter = sourceStatsReceiver.counter("offline_fetch_error")
val offlineFetchSuccessCounter = sourceStatsReceiver.counter("offline_fetch_success")
val onlineFetchErrorCounter = sourceStatsReceiver.counter("online_fetch_error")
val onlineFetchSuccessCounter = sourceStatsReceiver.counter("online_fetch_success")
val noRepeatedProfileVisitsAboveBucketingThresholdCounter =
sourceStatsReceiver.counter("no_repeated_profile_visits_above_bucketing_threshold")
val hasRepeatedProfileVisitsAboveBucketingThresholdCounter =
sourceStatsReceiver.counter("has_repeated_profile_visits_above_bucketing_threshold")
val noRepeatedProfileVisitsAboveRecommendationsThresholdCounter =
sourceStatsReceiver.counter("no_repeated_profile_visits_above_recommendations_threshold")
val hasRepeatedProfileVisitsAboveRecommendationsThresholdCounter =
sourceStatsReceiver.counter("has_repeated_profile_visits_above_recommendations_threshold")
val includeCandidatesCounter = sourceStatsReceiver.counter("include_candidates")
val noIncludeCandidatesCounter = sourceStatsReceiver.counter("no_include_candidates")
// Returns visited user -> visit count, via off dataset.
def applyWithOfflineDataset(targetUserId: Long): Stitch[Map[Long, Int]] = {
repeatedProfileVisitsAggregateClientColumn.fetcher
.fetch(ProfileVisitorInfo(id = targetUserId, idType = IdType.User)).map(_.v)
.handle {
case e: Throwable =>
logger.error("Strato fetch for RepeatedProfileVisitsAggregateClientColumn failed: " + e)
offlineFetchErrorCounter.incr()
None
}.onSuccess { result =>
offlineFetchSuccessCounter.incr()
}.map { resultOption =>
resultOption
.flatMap { result =>
result.profileVisitSet.map { profileVisitSet =>
profileVisitSet
.filter(profileVisit => profileVisit.totalTargetVisitsInLast14Days.getOrElse(0) > 0)
.filter(profileVisit => !profileVisit.doesSourceIdFollowTargetId.getOrElse(false))
.flatMap { profileVisit =>
(profileVisit.targetId, profileVisit.totalTargetVisitsInLast14Days) match {
case (Some(targetId), Some(totalVisitsInLast14Days)) =>
Some(targetId -> totalVisitsInLast14Days)
case _ => None
}
}.toMap[Long, Int]
}
}.getOrElse(Map.empty)
}
}
// Returns visited user -> visit count, via online dataset.
def applyWithOnlineData(targetUserId: Long): Stitch[Map[Long, Int]] = {
val visitedUserToEngagementsStitch: Stitch[Map[Long, Seq[Engagement]]] =
realTimeRealGraphClient.getRecentProfileViewEngagements(targetUserId)
visitedUserToEngagementsStitch
.onFailure { f =>
onlineFetchErrorCounter.incr()
}.onSuccess { result =>
onlineFetchSuccessCounter.incr()
}.map { visitedUserToEngagements =>
visitedUserToEngagements
.mapValues(engagements => engagements.size)
}
}
def getRepeatedVisitedAccounts(params: Params, targetUserId: Long): Stitch[Map[Long, Int]] = {
var results: Stitch[Map[Long, Int]] = Stitch.value(Map.empty)
if (params.getBoolean(RepeatedProfileVisitsParams.UseOnlineDataset)) {
results = applyWithOnlineData(targetUserId)
} else {
results = applyWithOfflineDataset(targetUserId)
}
// Only keep users that had non-zero engagement counts.
results.map(_.filter(input => input._2 > 0))
}
def getRecommendations(params: Params, userId: Long): Stitch[Seq[CandidateUser]] = {
val recommendationThreshold = params.getInt(RepeatedProfileVisitsParams.RecommendationThreshold)
val bucketingThreshold = params.getInt(RepeatedProfileVisitsParams.BucketingThreshold)
// Get the list of repeatedly visited profilts. Only keep accounts with >= bucketingThreshold visits.
val repeatedVisitedAccountsStitch: Stitch[Map[Long, Int]] =
getRepeatedVisitedAccounts(params, userId).map(_.filter(kv => kv._2 >= bucketingThreshold))
repeatedVisitedAccountsStitch.map { candidates =>
// Now check if we should includeCandidates (e.g. whether user is in control bucket or treatment buckets).
if (candidates.isEmpty) {
// User has not visited any accounts above bucketing threshold. We will not bucket user into experiment. Just
// don't return no candidates.
noRepeatedProfileVisitsAboveBucketingThresholdCounter.incr()
Seq.empty
} else {
hasRepeatedProfileVisitsAboveBucketingThresholdCounter.incr()
if (!params.getBoolean(RepeatedProfileVisitsParams.IncludeCandidates)) {
// User has reached bucketing criteria. We check whether to include candidates (e.g. checking which bucket
// the user is in for the experiment). In this case the user is in a bucket to not include any candidates.
noIncludeCandidatesCounter.incr()
Seq.empty
} else {
includeCandidatesCounter.incr()
// We should include candidates. Include any candidates above recommendation thresholds.
val outputCandidatesSeq = candidates
.filter(kv => kv._2 >= recommendationThreshold).map { kv =>
val user = kv._1
val visitCount = kv._2
CandidateUser(user, Some(visitCount.toDouble))
.withCandidateSource(RepeatedProfileVisitsSource.Identifier)
}.toSeq
if (outputCandidatesSeq.isEmpty) {
noRepeatedProfileVisitsAboveRecommendationsThresholdCounter.incr()
} else {
hasRepeatedProfileVisitsAboveRecommendationsThresholdCounter.incr()
}
outputCandidatesSeq
}
}
}
}
override def apply(request: HasParams with HasClientContext): Stitch[Seq[CandidateUser]] = {
request.getOptionalUserId
.map { userId =>
getRecommendations(request.params, userId)
}.getOrElse(Stitch.Nil)
}
}
object RepeatedProfileVisitsSource {
val Identifier = CandidateSourceIdentifier(Algorithm.RepeatedProfileVisits.toString)
}

View File

@ -1,21 +0,0 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/google/inject:guice",
"3rdparty/jvm/com/google/inject/extensions:guice-assistedinject",
"3rdparty/jvm/net/codingwell:scala-guice",
"3rdparty/jvm/org/slf4j:slf4j-api",
"finatra/inject/inject-core/src/main/scala",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/base",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/real_time_real_graph",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/constants",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
"src/thrift/com/twitter/onboarding/relevance/candidates:candidates-scala",
"strato/config/columns/onboarding/userrecs:userrecs-strato-client",
"strato/src/main/scala/com/twitter/strato/client",
"util/util-slf4j-api/src/main/scala",
],
)

View File

@ -1,10 +0,0 @@
# SALSA Candidate Source
Provides an account expansion based on the SALSA PYMK (People You May Know) algorithm for a given account. The algorithm focuses on the mutual follow and address book graph, making it highly effective at providing good mutual follow recommendations.
The SALSA algorithm constructs a local graph and performs personalized random walks to identify the best recommendations for the user. The local graph represents the community of users that are most similar to or most relevant to the user, while the personalized random walk identifies the most popular interests among them.
For each target user, the local graph is a bipartite graph with a left-hand side (LHS) and a right-hand side (RHS). The LHS is built from several sources, including the target user, forward and reverse address books, mutual follows, recent followings, and recent followers. We choose a specified number of top candidates from these sources for each target user with different weights assigned to each source to favor the corresponding source, and build the LHS using the target user and those top candidates. The RHS consists of two parts: the top candidates from the sources mentioned above for the target user and the mutual follows of the other entries in the LHS.
The random walk starts from the target user in the LHS and adopts a restarting strategy to realize personalization.
In summary, the SALSA Candidate Source provides an account expansion based on the SALSA PYMK algorithm, utilizing a bipartite graph with personalized random walks to identify the most relevant and interesting recommendations for the user.

View File

@ -1,40 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.salsa
import com.twitter.follow_recommendations.common.clients.real_time_real_graph.RealTimeRealGraphClient
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.stitch.Stitch
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class RecentEngagementDirectFollowSalsaExpansionSource @Inject() (
realTimeRealGraphClient: RealTimeRealGraphClient,
salsaExpander: SalsaExpander)
extends SalsaExpansionBasedCandidateSource[Long](salsaExpander) {
override val identifier: CandidateSourceIdentifier =
RecentEngagementDirectFollowSalsaExpansionSource.Identifier
override def firstDegreeNodes(target: Long): Stitch[Seq[Long]] = realTimeRealGraphClient
.getUsersRecentlyEngagedWith(
target,
RealTimeRealGraphClient.EngagementScoreMap,
includeDirectFollowCandidates = true,
includeNonDirectFollowCandidates = false
).map { recentlyFollowed =>
recentlyFollowed
.take(RecentEngagementDirectFollowSalsaExpansionSource.NumFirstDegreeNodesToRetrieve)
.map(_.id)
}
override def maxResults(target: Long): Int =
RecentEngagementDirectFollowSalsaExpansionSource.OutputSize
}
object RecentEngagementDirectFollowSalsaExpansionSource {
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(
Algorithm.RecentEngagementSarusOcCur.toString)
val NumFirstDegreeNodesToRetrieve = 10
val OutputSize = 200
}

View File

@ -1,117 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.salsa
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.strato.generated.client.onboarding.userrecs.SalsaFirstDegreeOnUserClientColumn
import com.twitter.strato.generated.client.onboarding.userrecs.SalsaSecondDegreeOnUserClientColumn
import com.twitter.follow_recommendations.common.models.AccountProof
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.follow_recommendations.common.models.FollowProof
import com.twitter.follow_recommendations.common.models.Reason
import com.twitter.stitch.Stitch
import com.twitter.wtf.candidate.thriftscala.Candidate
import javax.inject.Inject
import javax.inject.Singleton
case class SalsaExpandedCandidate(
candidateId: Long,
numberOfConnections: Int,
totalScore: Double,
connectingUsers: Seq[Long]) {
def toCandidateUser: CandidateUser =
CandidateUser(
id = candidateId,
score = Some(totalScore),
reason = Some(Reason(
Some(AccountProof(followProof = Some(FollowProof(connectingUsers, connectingUsers.size))))))
)
}
case class SimilarUserCandidate(candidateId: Long, score: Double, similarToCandidate: Long)
/**
* Salsa expander uses pre-computed lists of candidates for each input user id and returns the highest scored candidates in the pre-computed lists as the expansion for the corresponding input id.
*/
@Singleton
class SalsaExpander @Inject() (
statsReceiver: StatsReceiver,
firstDegreeClient: SalsaFirstDegreeOnUserClientColumn,
secondDegreeClient: SalsaSecondDegreeOnUserClientColumn,
) {
val stats = statsReceiver.scope("salsa_expander")
private def similarUsers(
input: Seq[Long],
neighbors: Seq[Option[Seq[Candidate]]]
): Seq[SalsaExpandedCandidate] = {
input
.zip(neighbors).flatMap {
case (recId, Some(neighbors)) =>
neighbors.map(neighbor => SimilarUserCandidate(neighbor.userId, neighbor.score, recId))
case _ => Nil
}.groupBy(_.candidateId).map {
case (key, neighbors) =>
val scores = neighbors.map(_.score)
val connectingUsers = neighbors
.sortBy(-_.score)
.take(SalsaExpander.MaxConnectingUsersToOutputPerExpandedCandidate)
.map(_.similarToCandidate)
SalsaExpandedCandidate(key, scores.size, scores.sum, connectingUsers)
}
.filter(
_.numberOfConnections >= math
.min(SalsaExpander.MinConnectingUsersThreshold, input.size)
)
.toSeq
}
def apply(
firstDegreeInput: Seq[Long],
secondDegreeInput: Seq[Long],
maxNumOfCandidatesToReturn: Int
): Stitch[Seq[CandidateUser]] = {
val firstDegreeNeighborsStitch =
Stitch
.collect(firstDegreeInput.map(firstDegreeClient.fetcher
.fetch(_).map(_.v.map(_.candidates.take(SalsaExpander.MaxDirectNeighbors))))).onSuccess {
firstDegreeNeighbors =>
stats.stat("first_degree_neighbors").add(firstDegreeNeighbors.flatten.size)
}
val secondDegreeNeighborsStitch =
Stitch
.collect(
secondDegreeInput.map(
secondDegreeClient.fetcher
.fetch(_).map(
_.v.map(_.candidates.take(SalsaExpander.MaxIndirectNeighbors))))).onSuccess {
secondDegreeNeighbors =>
stats.stat("second_degree_neighbors").add(secondDegreeNeighbors.flatten.size)
}
val neighborStitches =
Stitch.join(firstDegreeNeighborsStitch, secondDegreeNeighborsStitch).map {
case (first, second) => first ++ second
}
val similarUsersToInput = neighborStitches.map { neighbors =>
similarUsers(firstDegreeInput ++ secondDegreeInput, neighbors)
}
similarUsersToInput.map {
// Rank the candidate cot users by the combined weights from the connecting users. This is the default original implementation. It is unlikely to have weight ties and thus a second ranking function is not necessary.
_.sortBy(-_.totalScore)
.take(maxNumOfCandidatesToReturn)
.map(_.toCandidateUser)
}
}
}
object SalsaExpander {
val MaxDirectNeighbors = 2000
val MaxIndirectNeighbors = 2000
val MinConnectingUsersThreshold = 2
val MaxConnectingUsersToOutputPerExpandedCandidate = 3
}

View File

@ -1,32 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.salsa
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.stitch.Stitch
abstract class SalsaExpansionBasedCandidateSource[Target](salsaExpander: SalsaExpander)
extends CandidateSource[Target, CandidateUser] {
// Define first/second degree as empty sequences in cases of subclasses
// that don't implement one or the other.
// Example: MagicRecs only uses first degree nodes, and can ignore implementing secondDegreeNodes
//
// This allows apply(target) to combine both in the base class
def firstDegreeNodes(target: Target): Stitch[Seq[Long]] = Stitch.value(Seq())
def secondDegreeNodes(target: Target): Stitch[Seq[Long]] = Stitch.value(Seq())
// max number output results
def maxResults(target: Target): Int
override def apply(target: Target): Stitch[Seq[CandidateUser]] = {
val nodes = Stitch.join(firstDegreeNodes(target), secondDegreeNodes(target))
nodes.flatMap {
case (firstDegreeCandidates, secondDegreeCandidates) => {
salsaExpander(firstDegreeCandidates, secondDegreeCandidates, maxResults(target))
.map(_.map(_.withCandidateSource(identifier)).sortBy(-_.score.getOrElse(0.0)))
}
}
}
}

View File

@ -1,24 +0,0 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/google/inject:guice",
"3rdparty/jvm/com/google/inject/extensions:guice-assistedinject",
"3rdparty/jvm/net/codingwell:scala-guice",
"3rdparty/jvm/org/slf4j:slf4j-api",
"finatra/inject/inject-core/src/main/scala",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/base",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/strato",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/constants",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/configapi/common",
"src/thrift/com/twitter/hermit/candidate:hermit-candidate-scala",
"strato/config/columns/onboarding/userrecs:userrecs-strato-client",
"strato/config/columns/recommendations/follow2vec:follow2vec-strato-client",
"strato/config/columns/recommendations/similarity:similarity-strato-client",
"strato/src/main/scala/com/twitter/strato/client",
"util/util-slf4j-api/src/main/scala",
],
)

View File

@ -1,50 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.sims
import com.twitter.escherbird.util.stitchcache.StitchCache
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.follow_recommendations.common.models.HasSimilarToContext
import com.twitter.hermit.candidate.thriftscala.Candidates
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.stitch.Stitch
import com.twitter.strato.client.Fetcher
import com.twitter.timelines.configapi.HasParams
import com.twitter.util.Duration
import java.lang.{Long => JLong}
class CacheBasedSimsStore(
id: CandidateSourceIdentifier,
fetcher: Fetcher[Long, Unit, Candidates],
maxCacheSize: Int,
cacheTtl: Duration,
statsReceiver: StatsReceiver)
extends CandidateSource[HasParams with HasSimilarToContext, CandidateUser] {
override val identifier: CandidateSourceIdentifier = id
private def getUsersFromSimsSource(userId: JLong): Stitch[Option[Candidates]] = {
fetcher
.fetch(userId)
.map(_.v)
}
private val simsCache = StitchCache[JLong, Option[Candidates]](
maxCacheSize = maxCacheSize,
ttl = cacheTtl,
statsReceiver = statsReceiver,
underlyingCall = getUsersFromSimsSource
)
override def apply(request: HasParams with HasSimilarToContext): Stitch[Seq[CandidateUser]] = {
Stitch
.traverse(request.similarToUserIds) { userId =>
simsCache.readThrough(userId).map { candidatesOpt =>
candidatesOpt
.map { candidates =>
StratoBasedSimsCandidateSource.map(userId, candidates)
}.getOrElse(Nil)
}
}.map(_.flatten.distinct.map(_.withCandidateSource(identifier)))
}
}

View File

@ -1,35 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.sims
import com.google.inject.Singleton
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.strato.generated.client.onboarding.userrecs.NewSimsRefreshOnUserClientColumn
import com.twitter.util.Duration
import javax.inject.Inject
@Singleton
class DBV2SimsRefreshStore @Inject() (
newSimsRefreshOnUserClientColumn: NewSimsRefreshOnUserClientColumn)
extends StratoBasedSimsCandidateSourceWithUnitView(
fetcher = newSimsRefreshOnUserClientColumn.fetcher,
identifier = DBV2SimsRefreshStore.Identifier)
@Singleton
class CachedDBV2SimsRefreshStore @Inject() (
newSimsRefreshOnUserClientColumn: NewSimsRefreshOnUserClientColumn,
statsReceiver: StatsReceiver)
extends CacheBasedSimsStore(
id = DBV2SimsRefreshStore.Identifier,
fetcher = newSimsRefreshOnUserClientColumn.fetcher,
maxCacheSize = DBV2SimsRefreshStore.MaxCacheSize,
cacheTtl = DBV2SimsRefreshStore.CacheTTL,
statsReceiver = statsReceiver.scope("CachedDBV2SimsRefreshStore", "cache")
)
object DBV2SimsRefreshStore {
val Identifier = CandidateSourceIdentifier(Algorithm.Sims.toString)
val MaxCacheSize = 5000
val CacheTTL: Duration = Duration.fromHours(24)
}

View File

@ -1,38 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.sims
import com.google.inject.Singleton
import com.google.inject.name.Named
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.follow_recommendations.common.constants.GuiceNamedConstants
import com.twitter.hermit.candidate.thriftscala.Candidates
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.strato.client.Fetcher
import com.twitter.util.Duration
import javax.inject.Inject
@Singleton
class DBV2SimsStore @Inject() (
@Named(GuiceNamedConstants.DBV2_SIMS_FETCHER) fetcher: Fetcher[Long, Unit, Candidates])
extends StratoBasedSimsCandidateSourceWithUnitView(
fetcher,
identifier = DBV2SimsStore.Identifier)
@Singleton
class CachedDBV2SimsStore @Inject() (
@Named(GuiceNamedConstants.DBV2_SIMS_FETCHER) fetcher: Fetcher[Long, Unit, Candidates],
statsReceiver: StatsReceiver)
extends CacheBasedSimsStore(
id = DBV2SimsStore.Identifier,
fetcher = fetcher,
maxCacheSize = DBV2SimsStore.MaxCacheSize,
cacheTtl = DBV2SimsStore.CacheTTL,
statsReceiver = statsReceiver.scope("CachedDBV2SimsStore", "cache")
)
object DBV2SimsStore {
val Identifier = CandidateSourceIdentifier(Algorithm.Sims.toString)
val MaxCacheSize = 1000
val CacheTTL: Duration = Duration.fromHours(24)
}

View File

@ -1,69 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.sims
import com.google.inject.Singleton
import com.twitter.follow_recommendations.common.candidate_sources.sims.Follow2vecNearestNeighborsStore.NearestNeighborParamsType
import com.twitter.hermit.candidate.thriftscala.Candidate
import com.twitter.hermit.candidate.thriftscala.Candidates
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.stitch.Stitch
import com.twitter.strato.catalog.Fetch
import com.twitter.strato.client.Fetcher
import com.twitter.strato.generated.client.recommendations.follow2vec.LinearRegressionFollow2vecNearestNeighborsClientColumn
import com.twitter.util.Return
import com.twitter.util.Throw
import javax.inject.Inject
@Singleton
class LinearRegressionFollow2vecNearestNeighborsStore @Inject() (
linearRegressionFollow2vecNearestNeighborsClientColumn: LinearRegressionFollow2vecNearestNeighborsClientColumn)
extends StratoBasedSimsCandidateSource[NearestNeighborParamsType](
Follow2vecNearestNeighborsStore.convertFetcher(
linearRegressionFollow2vecNearestNeighborsClientColumn.fetcher),
view = Follow2vecNearestNeighborsStore.defaultSearchParams,
identifier = Follow2vecNearestNeighborsStore.IdentifierF2vLinearRegression
)
object Follow2vecNearestNeighborsStore {
// (userid, feature store version for data)
type NearestNeighborKeyType = (Long, Long)
// (neighbors to be returned, ef value: accuracy / latency tradeoff, distance for filtering)
type NearestNeighborParamsType = (Option[Int], Option[Int], Option[Double])
// (seq(found neighbor id, score), distance for filtering)
type NearestNeighborValueType = (Seq[(Long, Option[Double])], Option[Double])
val IdentifierF2vLinearRegression: CandidateSourceIdentifier = CandidateSourceIdentifier(
Algorithm.LinearRegressionFollow2VecNearestNeighbors.toString)
val defaultFeatureStoreVersion: Long = 20210708
val defaultSearchParams: NearestNeighborParamsType = (None, None, None)
def convertFetcher(
fetcher: Fetcher[NearestNeighborKeyType, NearestNeighborParamsType, NearestNeighborValueType]
): Fetcher[Long, NearestNeighborParamsType, Candidates] = {
(key: Long, view: NearestNeighborParamsType) =>
{
def toCandidates(
results: Option[NearestNeighborValueType]
): Option[Candidates] = {
results.flatMap { r =>
Some(
Candidates(
key,
r._1.map { neighbor =>
Candidate(neighbor._1, neighbor._2.getOrElse(0))
}
)
)
}
}
val results: Stitch[Fetch.Result[NearestNeighborValueType]] =
fetcher.fetch(key = (key, defaultFeatureStoreVersion), view = view)
results.transform {
case Return(r) => Stitch.value(Fetch.Result(toCandidates(r.v)))
case Throw(e) => Stitch.exception(e)
}
}
}
}

View File

@ -1,32 +0,0 @@
# Sims Candidate Source
Offers various online sources for finding similar accounts based on a given user, whether it is the target user or an account candidate.
## Sims
The objective is to identify a list of K users who are similar to a given user. In this scenario, we primarily focus on finding similar users as "producers" rather than "consumers." Sims has two steps: candidate generation and ranking.
### Sims Candidate Generation
With over 700 million users to consider, there are multiple ways to define similarities. Currently, we have three candidate sources for Sims:
**CosineFollow** (based on user-user follow graph): The similarity between two users is defined as the cosine similarity between their followers. Despite sounding simple, computing all-pair similarity on the entire follow graph is computationally challenging. We are currently using the WHIMP algorithm to find the top 1000 similar users for each user ID. This candidate source has the largest coverage, as it can find similar user candidates for more than 700 million users.
**CosineList** (based on user-list membership graph): The similarity between two users is defined as the cosine similarity between the lists they are included as members (e.g., [here](https://twitter.com/jack/lists/memberships) are the lists that @jack is on). The same algorithm as CosineFollow is used.
**Follow2Vec** (essentially Word2Vec on user-user follow graph): We first train the Word2Vec model on follow sequence data to obtain users' embeddings and then find the most similar users based on the similarity of the embeddings. However, we need enough data for each user to learn a meaningful embedding for them, so we can only obtain embeddings for the top 10 million users (currently in production, testing 30 million users). Furthermore, Word2Vec model training is limited by memory and computation as it is trained on a single machine.
##### Cosine Similarity
A crucial component in Sims is calculating cosine similarities between users based on a user-X (X can be a user, list, or other entities) bipartite graph. This problem is technically challenging and took several years of effort to solve.
The current implementation uses the algorithm proposed in [When hashes met wedges: A distributed algorithm for finding high similarity vectors. WWW 2017](https://arxiv.org/pdf/1703.01054.pdf)
### Sims Ranking
After the candidate generation step, we can obtain dozens to hundreds of similar user candidates for each user. However, since these candidates come from different algorithms, we need a way to rank them. To do this, we collect user feedback.
We use the "Profile Sidebar Impressions & Follow" (a module with follow suggestions displayed when a user visits a profile page and scrolls down) to collect training data. To alleviate any system bias, we use 4% of traffic to show randomly shuffled candidates to users and collect positive (followed impression) and negative (impression only) data from this traffic. This data is used as an evaluation set. We use a portion of the remaining 96% of traffic for training data, filtering only for sets of impressions that had at least one follow, ensuring that the user taking action was paying attention to the impressions.
The examples are in the format of (profile_user, candidate_user, label). We add features for profile_users and candidate_users based on some high-level aggregated statistics in a feature dataset provided by the Customer Journey team, as well as features that represent the similarity between the profile_user and candidate_user.
We employ a multi-tower MLP model and optimize the logistic loss. The model is refreshed weekly using an ML workflow.
We recompute the candidates and rank them daily. The ranked results are published to the Manhattan dataset.

View File

@ -1,36 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.sims
import com.google.inject.Singleton
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.strato.generated.client.recommendations.similarity.SimilarUsersBySimsExperimentalOnUserClientColumn
import com.twitter.util.Duration
import javax.inject.Inject
@Singleton
class SimsExperimentalStore @Inject() (
simsExperimentalOnUserClientColumn: SimilarUsersBySimsExperimentalOnUserClientColumn)
extends StratoBasedSimsCandidateSourceWithUnitView(
fetcher = simsExperimentalOnUserClientColumn.fetcher,
identifier = SimsExperimentalStore.Identifier
)
@Singleton
class CachedSimsExperimentalStore @Inject() (
simsExperimentalOnUserClientColumn: SimilarUsersBySimsExperimentalOnUserClientColumn,
statsReceiver: StatsReceiver)
extends CacheBasedSimsStore(
id = SimsExperimentalStore.Identifier,
fetcher = simsExperimentalOnUserClientColumn.fetcher,
maxCacheSize = SimsExperimentalStore.MaxCacheSize,
cacheTtl = SimsExperimentalStore.CacheTTL,
statsReceiver = statsReceiver.scope("CachedSimsExperimentalStore", "cache")
)
object SimsExperimentalStore {
val Identifier = CandidateSourceIdentifier(Algorithm.Sims.toString)
val MaxCacheSize = 1000
val CacheTTL: Duration = Duration.fromHours(12)
}

View File

@ -1,14 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.sims
import com.twitter.follow_recommendations.configapi.common.FeatureSwitchConfig
import com.twitter.timelines.configapi.FSName
import com.twitter.timelines.configapi.FSParam
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class SimsSourceFSConfig @Inject() () extends FeatureSwitchConfig {
override val booleanFSParams: Seq[FSParam[Boolean] with FSName] = Seq(
SimsSourceParams.DisableHeavyRanker
)
}

View File

@ -1,16 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.sims
import com.twitter.timelines.configapi.FSParam
object SimsSourceParams {
case object EnableDBV2SimsStore extends FSParam[Boolean]("sims_source_enable_dbv2_source", false)
case object EnableDBV2SimsRefreshStore
extends FSParam[Boolean]("sims_source_enable_dbv2_refresh_source", false)
case object EnableExperimentalSimsStore
extends FSParam[Boolean]("sims_source_enable_experimental_source", false)
case object DisableHeavyRanker
extends FSParam[Boolean]("sims_source_disable_heavy_ranker", default = false)
}

View File

@ -1,36 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.sims
import com.google.inject.Singleton
import com.google.inject.name.Named
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.follow_recommendations.common.constants.GuiceNamedConstants
import com.twitter.hermit.candidate.thriftscala.Candidates
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.strato.client.Fetcher
import com.twitter.util.Duration
import javax.inject.Inject
@Singleton
class SimsStore @Inject() (
@Named(GuiceNamedConstants.SIMS_FETCHER) fetcher: Fetcher[Long, Unit, Candidates])
extends StratoBasedSimsCandidateSourceWithUnitView(fetcher, identifier = SimsStore.Identifier)
@Singleton
class CachedSimsStore @Inject() (
@Named(GuiceNamedConstants.SIMS_FETCHER) fetcher: Fetcher[Long, Unit, Candidates],
statsReceiver: StatsReceiver)
extends CacheBasedSimsStore(
id = SimsStore.Identifier,
fetcher = fetcher,
maxCacheSize = SimsStore.MaxCacheSize,
cacheTtl = SimsStore.CacheTTL,
statsReceiver = statsReceiver.scope("CachedSimsStore", "cache")
)
object SimsStore {
val Identifier = CandidateSourceIdentifier(Algorithm.Sims.toString)
val MaxCacheSize = 50000
val CacheTTL: Duration = Duration.fromHours(24)
}

View File

@ -1,40 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.sims
import com.twitter.follow_recommendations.common.candidate_sources.base.StratoFetcherSource
import com.twitter.follow_recommendations.common.models.AccountProof
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.follow_recommendations.common.models.Reason
import com.twitter.follow_recommendations.common.models.SimilarToProof
import com.twitter.hermit.candidate.thriftscala.Candidates
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.strato.client.Fetcher
abstract class StratoBasedSimsCandidateSource[U](
fetcher: Fetcher[Long, U, Candidates],
view: U,
override val identifier: CandidateSourceIdentifier)
extends StratoFetcherSource[Long, U, Candidates](fetcher, view, identifier) {
override def map(target: Long, candidates: Candidates): Seq[CandidateUser] =
StratoBasedSimsCandidateSource.map(target, candidates)
}
object StratoBasedSimsCandidateSource {
def map(target: Long, candidates: Candidates): Seq[CandidateUser] = {
for {
candidate <- candidates.candidates
} yield CandidateUser(
id = candidate.userId,
score = Some(candidate.score),
reason = Some(
Reason(
Some(
AccountProof(
similarToProof = Some(SimilarToProof(Seq(target)))
)
)
)
)
)
}
}

View File

@ -1,10 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.sims
import com.twitter.hermit.candidate.thriftscala.Candidates
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.strato.client.Fetcher
abstract class StratoBasedSimsCandidateSourceWithUnitView(
fetcher: Fetcher[Long, Unit, Candidates],
override val identifier: CandidateSourceIdentifier)
extends StratoBasedSimsCandidateSource[Unit](fetcher, Unit, identifier)

View File

@ -1,55 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.sims
import com.twitter.finagle.stats.NullStatsReceiver
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.follow_recommendations.common.models.HasSimilarToContext
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.stitch.Stitch
import com.twitter.timelines.configapi.HasParams
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class SwitchingSimsSource @Inject() (
cachedDBV2SimsStore: CachedDBV2SimsStore,
cachedDBV2SimsRefreshStore: CachedDBV2SimsRefreshStore,
cachedSimsExperimentalStore: CachedSimsExperimentalStore,
cachedSimsStore: CachedSimsStore,
statsReceiver: StatsReceiver = NullStatsReceiver)
extends CandidateSource[HasParams with HasSimilarToContext, CandidateUser] {
override val identifier: CandidateSourceIdentifier = SwitchingSimsSource.Identifier
private val stats = statsReceiver.scope("SwitchingSimsSource")
private val dbV2SimsStoreCounter = stats.counter("DBV2SimsStore")
private val dbV2SimsRefreshStoreCounter = stats.counter("DBV2SimsRefreshStore")
private val simsExperimentalStoreCounter = stats.counter("SimsExperimentalStore")
private val simsStoreCounter = stats.counter("SimsStore")
override def apply(request: HasParams with HasSimilarToContext): Stitch[Seq[CandidateUser]] = {
val selectedSimsStore =
if (request.params(SimsSourceParams.EnableDBV2SimsStore)) {
dbV2SimsStoreCounter.incr()
cachedDBV2SimsStore
} else if (request.params(SimsSourceParams.EnableDBV2SimsRefreshStore)) {
dbV2SimsRefreshStoreCounter.incr()
cachedDBV2SimsRefreshStore
} else if (request.params(SimsSourceParams.EnableExperimentalSimsStore)) {
simsExperimentalStoreCounter.incr()
cachedSimsExperimentalStore
} else {
simsStoreCounter.incr()
cachedSimsStore
}
stats.counter("total").incr()
selectedSimsStore(request)
}
}
object SwitchingSimsSource {
val Identifier: CandidateSourceIdentifier = CandidateSourceIdentifier(Algorithm.Sims.toString)
}

View File

@ -1,23 +0,0 @@
scala_library(
compiler_option_sets = ["fatal_warnings"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/google/inject:guice",
"3rdparty/jvm/com/google/inject/extensions:guice-assistedinject",
"3rdparty/jvm/net/codingwell:scala-guice",
"3rdparty/jvm/org/slf4j:slf4j-api",
"finatra/inject/inject-core/src/main/scala",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/base",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/base",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/sims",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/real_time_real_graph",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/socialgraph",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/clients/strato",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/constants",
"follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/models",
"follow-recommendations-service/server/src/main/scala/com/twitter/follow_recommendations/configapi/common",
"strato/src/main/scala/com/twitter/strato/client",
"util/util-slf4j-api/src/main/scala",
],
)

View File

@ -1,22 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.sims_expansion
import com.twitter.timelines.configapi.FSBoundedParam
import com.twitter.timelines.configapi.FSParam
object DBV2SimsExpansionParams {
// Theses divisors are used to calibrate DBv2Sims extension candidates scores
case object RecentFollowingSimilarUsersDBV2CalibrateDivisor
extends FSBoundedParam[Double](
"sims_expansion_recent_following_similar_users_dbv2_divisor",
default = 1.0d,
min = 0.1d,
max = 100d)
case object RecentEngagementSimilarUsersDBV2CalibrateDivisor
extends FSBoundedParam[Double](
"sims_expansion_recent_engagement_similar_users_dbv2_divisor",
default = 1.0d,
min = 0.1d,
max = 100d)
case object DisableHeavyRanker
extends FSParam[Boolean]("sims_expansion_disable_heavy_ranker", default = false)
}

View File

@ -1,6 +0,0 @@
# Sims Expansion Candidate Source
provides similar accounts based on the Sims algorithm for a given set of accounts.
This is a 2nd-hop expansion, meaning that the input accounts could be a user's recently engaged, followed, or algorithm-generated (such as RealGraph) accounts.
For more information on Sims and how it is utilized in the Follow Recommendations Service, please refer to the `follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/sims/README.md` file.

View File

@ -1,14 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.sims_expansion
import com.twitter.follow_recommendations.configapi.common.FeatureSwitchConfig
import com.twitter.timelines.configapi.FSParam
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class RecentEngagementSimilarUsersFSConfig @Inject() () extends FeatureSwitchConfig {
override val booleanFSParams: Seq[FSParam[Boolean]] = Seq(
RecentEngagementSimilarUsersParams.FirstDegreeSortEnabled
)
}

View File

@ -1,17 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.sims_expansion
import com.twitter.timelines.configapi.FSEnumParam
import com.twitter.timelines.configapi.FSParam
object RecentEngagementSimilarUsersParams {
case object FirstDegreeSortEnabled
extends FSParam[Boolean](
name = "sims_expansion_recent_engagement_first_degree_sort",
default = true)
case object Aggregator
extends FSEnumParam[SimsExpansionSourceAggregatorId.type](
name = "sims_expansion_recent_engagement_aggregator_id",
default = SimsExpansionSourceAggregatorId.Sum,
enum = SimsExpansionSourceAggregatorId)
}

View File

@ -1,113 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.sims_expansion
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.follow_recommendations.common.candidate_sources.sims.SwitchingSimsSource
import com.twitter.follow_recommendations.common.clients.real_time_real_graph.RealTimeRealGraphClient
import com.twitter.follow_recommendations.common.models.AccountProof
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.follow_recommendations.common.models.Reason
import com.twitter.follow_recommendations.common.models.SimilarToProof
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
import com.twitter.stitch.Stitch
import com.twitter.timelines.configapi.HasParams
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class RecentEngagementSimilarUsersSource @Inject() (
realTimeRealGraphClient: RealTimeRealGraphClient,
switchingSimsSource: SwitchingSimsSource,
statsReceiver: StatsReceiver)
extends SimsExpansionBasedCandidateSource[HasClientContext with HasParams](
switchingSimsSource) {
override def maxSecondaryDegreeNodes(req: HasClientContext with HasParams): Int = Int.MaxValue
override def maxResults(req: HasClientContext with HasParams): Int =
RecentEngagementSimilarUsersSource.MaxResults
override val identifier: CandidateSourceIdentifier = RecentEngagementSimilarUsersSource.Identifier
private val stats = statsReceiver.scope(identifier.name)
private val calibratedScoreCounter = stats.counter("calibrated_scores_counter")
override def scoreCandidate(sourceScore: Double, similarToScore: Double): Double = {
sourceScore * similarToScore
}
override def calibrateDivisor(req: HasClientContext with HasParams): Double = {
req.params(DBV2SimsExpansionParams.RecentEngagementSimilarUsersDBV2CalibrateDivisor)
}
override def calibrateScore(
candidateScore: Double,
req: HasClientContext with HasParams
): Double = {
calibratedScoreCounter.incr()
candidateScore / calibrateDivisor(req)
}
/**
* fetch first degree nodes given request
*/
override def firstDegreeNodes(
target: HasClientContext with HasParams
): Stitch[Seq[CandidateUser]] = {
target.getOptionalUserId
.map { userId =>
realTimeRealGraphClient
.getUsersRecentlyEngagedWith(
userId,
RealTimeRealGraphClient.EngagementScoreMap,
includeDirectFollowCandidates = true,
includeNonDirectFollowCandidates = true
).map(_.sortBy(-_.score.getOrElse(0.0d))
.take(RecentEngagementSimilarUsersSource.MaxFirstDegreeNodes))
}.getOrElse(Stitch.Nil)
}
override def aggregateAndScore(
request: HasClientContext with HasParams,
firstDegreeToSecondDegreeNodesMap: Map[CandidateUser, Seq[SimilarUser]]
): Stitch[Seq[CandidateUser]] = {
val inputNodes = firstDegreeToSecondDegreeNodesMap.keys.map(_.id).toSet
val aggregator = request.params(RecentEngagementSimilarUsersParams.Aggregator) match {
case SimsExpansionSourceAggregatorId.Max =>
SimsExpansionBasedCandidateSource.ScoreAggregator.Max
case SimsExpansionSourceAggregatorId.Sum =>
SimsExpansionBasedCandidateSource.ScoreAggregator.Sum
case SimsExpansionSourceAggregatorId.MultiDecay =>
SimsExpansionBasedCandidateSource.ScoreAggregator.MultiDecay
}
val groupedCandidates = firstDegreeToSecondDegreeNodesMap.values.flatten
.filterNot(c => inputNodes.contains(c.candidateId))
.groupBy(_.candidateId)
.map {
case (id, candidates) =>
// Different aggregators for final score
val finalScore = aggregator(candidates.map(_.score).toSeq)
val proofs = candidates.map(_.similarTo).toSet
CandidateUser(
id = id,
score = Some(finalScore),
reason =
Some(Reason(Some(AccountProof(similarToProof = Some(SimilarToProof(proofs.toSeq))))))
).withCandidateSource(identifier)
}
.toSeq
.sortBy(-_.score.getOrElse(0.0d))
.take(maxResults(request))
Stitch.value(groupedCandidates)
}
}
object RecentEngagementSimilarUsersSource {
val Identifier = CandidateSourceIdentifier(Algorithm.RecentEngagementSimilarUser.toString)
val MaxFirstDegreeNodes = 10
val MaxResults = 200
}

View File

@ -1,29 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.sims_expansion
import com.twitter.timelines.configapi.FSBoundedParam
import com.twitter.timelines.configapi.FSParam
object RecentFollowingSimilarUsersParams {
case object MaxFirstDegreeNodes
extends FSBoundedParam[Int](
name = "sims_expansion_recent_following_max_first_degree_nodes",
default = 10,
min = 0,
max = 200)
case object MaxSecondaryDegreeExpansionPerNode
extends FSBoundedParam[Int](
name = "sims_expansion_recent_following_max_secondary_degree_nodes",
default = 40,
min = 0,
max = 200)
case object MaxResults
extends FSBoundedParam[Int](
name = "sims_expansion_recent_following_max_results",
default = 200,
min = 0,
max = 200)
case object TimestampIntegrated
extends FSParam[Boolean](
name = "sims_expansion_recent_following_integ_timestamp",
default = false)
}

View File

@ -1,99 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.sims_expansion
import com.google.inject.Singleton
import com.twitter.follow_recommendations.common.candidate_sources.sims.SwitchingSimsSource
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.follow_recommendations.common.models.HasRecentFollowedUserIds
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.stitch.Stitch
import com.twitter.timelines.configapi.HasParams
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.follow_recommendations.common.clients.socialgraph.SocialGraphClient
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
import javax.inject.Inject
object RecentFollowingSimilarUsersSource {
val Identifier = CandidateSourceIdentifier(Algorithm.NewFollowingSimilarUser.toString)
}
@Singleton
class RecentFollowingSimilarUsersSource @Inject() (
socialGraph: SocialGraphClient,
switchingSimsSource: SwitchingSimsSource,
statsReceiver: StatsReceiver)
extends SimsExpansionBasedCandidateSource[
HasParams with HasRecentFollowedUserIds with HasClientContext
](switchingSimsSource) {
val identifier = RecentFollowingSimilarUsersSource.Identifier
private val stats = statsReceiver.scope(identifier.name)
private val maxResultsStats = stats.scope("max_results")
private val calibratedScoreCounter = stats.counter("calibrated_scores_counter")
override def firstDegreeNodes(
request: HasParams with HasRecentFollowedUserIds with HasClientContext
): Stitch[Seq[CandidateUser]] = {
if (request.params(RecentFollowingSimilarUsersParams.TimestampIntegrated)) {
val recentFollowedUserIdsWithTimeStitch =
socialGraph.getRecentFollowedUserIdsWithTime(request.clientContext.userId.get)
recentFollowedUserIdsWithTimeStitch.map { results =>
val first_degree_nodes = results
.sortBy(-_.timeInMs).take(
request.params(RecentFollowingSimilarUsersParams.MaxFirstDegreeNodes))
val max_timestamp = first_degree_nodes.head.timeInMs
first_degree_nodes.map {
case userIdWithTime =>
CandidateUser(
userIdWithTime.userId,
score = Some(userIdWithTime.timeInMs.toDouble / max_timestamp))
}
}
} else {
Stitch.value(
request.recentFollowedUserIds
.getOrElse(Nil).take(
request.params(RecentFollowingSimilarUsersParams.MaxFirstDegreeNodes)).map(
CandidateUser(_, score = Some(1.0)))
)
}
}
override def maxSecondaryDegreeNodes(
req: HasParams with HasRecentFollowedUserIds with HasClientContext
): Int = {
req.params(RecentFollowingSimilarUsersParams.MaxSecondaryDegreeExpansionPerNode)
}
override def maxResults(
req: HasParams with HasRecentFollowedUserIds with HasClientContext
): Int = {
val firstDegreeNodes = req.params(RecentFollowingSimilarUsersParams.MaxFirstDegreeNodes)
val maxResultsNum = req.params(RecentFollowingSimilarUsersParams.MaxResults)
maxResultsStats
.stat(
s"RecentFollowingSimilarUsersSource_firstDegreeNodes_${firstDegreeNodes}_maxResults_${maxResultsNum}")
.add(1)
maxResultsNum
}
override def scoreCandidate(sourceScore: Double, similarToScore: Double): Double = {
sourceScore * similarToScore
}
override def calibrateDivisor(
req: HasParams with HasRecentFollowedUserIds with HasClientContext
): Double = {
req.params(DBV2SimsExpansionParams.RecentFollowingSimilarUsersDBV2CalibrateDivisor)
}
override def calibrateScore(
candidateScore: Double,
req: HasParams with HasRecentFollowedUserIds with HasClientContext
): Double = {
calibratedScoreCounter.incr()
candidateScore / calibrateDivisor(req)
}
}

View File

@ -1,53 +0,0 @@
package com.twitter.follow_recommendations.common.candidate_sources.sims_expansion
import com.google.inject.Singleton
import com.twitter.follow_recommendations.common.candidate_sources.sims.SwitchingSimsSource
import com.twitter.follow_recommendations.common.clients.real_time_real_graph.RealTimeRealGraphClient
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.hermit.model.Algorithm
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.product_mixer.core.model.marshalling.request.HasClientContext
import com.twitter.stitch.Stitch
import com.twitter.timelines.configapi.HasParams
import javax.inject.Inject
@Singleton
class RecentStrongEngagementDirectFollowSimilarUsersSource @Inject() (
realTimeRealGraphClient: RealTimeRealGraphClient,
switchingSimsSource: SwitchingSimsSource)
extends SimsExpansionBasedCandidateSource[HasClientContext with HasParams](
switchingSimsSource) {
val identifier = RecentStrongEngagementDirectFollowSimilarUsersSource.Identifier
override def firstDegreeNodes(
request: HasClientContext with HasParams
): Stitch[Seq[CandidateUser]] = request.getOptionalUserId
.map { userId =>
realTimeRealGraphClient
.getUsersRecentlyEngagedWith(
userId,
RealTimeRealGraphClient.StrongEngagementScoreMap,
includeDirectFollowCandidates = true,
includeNonDirectFollowCandidates = false
).map(_.take(RecentStrongEngagementDirectFollowSimilarUsersSource.MaxFirstDegreeNodes))
}.getOrElse(Stitch.Nil)
override def maxSecondaryDegreeNodes(request: HasClientContext with HasParams): Int = Int.MaxValue
override def maxResults(request: HasClientContext with HasParams): Int =
RecentStrongEngagementDirectFollowSimilarUsersSource.MaxResults
override def scoreCandidate(sourceScore: Double, similarToScore: Double): Double = {
sourceScore * similarToScore
}
override def calibrateDivisor(req: HasClientContext with HasParams): Double = 1.0d
}
object RecentStrongEngagementDirectFollowSimilarUsersSource {
val Identifier = CandidateSourceIdentifier(Algorithm.RecentStrongEngagementSimilarUser.toString)
val MaxFirstDegreeNodes = 10
val MaxResults = 200
}

Some files were not shown because too many files have changed in this diff Show More