mirror of
https://github.com/twitter/the-algorithm.git
synced 2025-01-02 23:51:53 +01:00
Merge 3abe69d1ef
into 72eda9a24f
This commit is contained in:
commit
7506f091cf
@ -40,7 +40,7 @@ import javax.inject.Singleton
|
||||
/**
|
||||
* In this filter, we want to check 4 categories of conditions:
|
||||
* - if candidate is discoverable given that it's from an address-book/phone-book based source
|
||||
* - if candidate is unsuitable based on it's safety sub-fields in gizmoduck
|
||||
* - if candidate is unsuitable based on its safety sub-fields in gizmoduck
|
||||
* - if candidate is withheld because of country-specific take-down policies
|
||||
* - if candidate is marked as bad/worst based on blink labels
|
||||
* We fail close on the query as this is a product-critical filter
|
||||
|
@ -17,7 +17,7 @@ private[selector] object InsertIntoModule {
|
||||
otherCandidates: Queue[CandidateWithDetails])
|
||||
|
||||
/**
|
||||
* Given a Seq of `candidates`, returns the first module with it's index that matches the
|
||||
* Given a Seq of `candidates`, returns the first module with its index that matches the
|
||||
* `targetModuleCandidatePipeline` with all the [[ItemCandidateWithDetails]] that match the
|
||||
* `candidatePipeline` added to the `itemsToInsert` and the remaining candidates, including the
|
||||
* module, in the `otherCandidates`
|
||||
|
@ -17,7 +17,7 @@ import scala.collection.immutable.Queue
|
||||
* before which the async [[Feature]]s are needed, and a [[Stitch]] of the async [[FeatureMap]].
|
||||
* It's expected that the [[Stitch]] has already been started and is running in the background.
|
||||
*
|
||||
* While not essential to it's core behavior, [[AsyncFeatureMap]] also keeps track of the [[FeatureHydratorIdentifier]]
|
||||
* While not essential to its core behavior, [[AsyncFeatureMap]] also keeps track of the [[FeatureHydratorIdentifier]]
|
||||
* and the Set of [[Feature]]s which will be hydrated for each [[Stitch]] of a [[FeatureMap]] it's given.
|
||||
*
|
||||
* @param asyncFeatureMaps the [[FeatureMap]]s for [[PipelineStepIdentifier]]s which have not been reached yet
|
||||
|
@ -14,7 +14,7 @@ import com.twitter.stitch.Stitch
|
||||
/**
|
||||
* A gate controls if a pipeline or other component is executed
|
||||
*
|
||||
* A gate is mostly controlled by it's `shouldContinue` function - when this function
|
||||
* A gate is mostly controlled by its `shouldContinue` function - when this function
|
||||
* returns true, execution Continues.
|
||||
*
|
||||
* Gates also have a optional `shouldSkip`- When it returns
|
||||
@ -61,7 +61,7 @@ sealed trait BaseGate[-Query <: PipelineQuery] extends Component {
|
||||
* A regular Gate which only has access to the Query typed PipelineQuery. This can be used anywhere
|
||||
* Gates are available.
|
||||
*
|
||||
* A gate is mostly controlled by it's `shouldContinue` function - when this function
|
||||
* A gate is mostly controlled by its `shouldContinue` function - when this function
|
||||
* returns true, execution Continues.
|
||||
*
|
||||
* Gates also have a optional `shouldSkip`- When it returns
|
||||
@ -78,7 +78,7 @@ trait Gate[-Query <: PipelineQuery] extends BaseGate[Query]
|
||||
* list of previously fetched candidates. This can be used on dependent candidate pipelines to
|
||||
* make a decision on whether to enable/disable them based on previous candidates.
|
||||
*
|
||||
* A gate is mostly controlled by it's `shouldContinue` function - when this function
|
||||
* A gate is mostly controlled by its `shouldContinue` function - when this function
|
||||
* returns true, execution Continues.
|
||||
*
|
||||
* Gates also have a optional `shouldSkip`- When it returns
|
||||
|
@ -4,7 +4,7 @@ import com.twitter.product_mixer.core.feature.featuremap.FeatureMap
|
||||
import com.twitter.product_mixer.core.model.common.CandidateWithFeatures
|
||||
import com.twitter.product_mixer.core.model.common.UniversalNoun
|
||||
|
||||
/** A [[Candidate]] and it's [[FeatureMap]] after being processed by a [[Scorer]] */
|
||||
/** A [[Candidate]] and its [[FeatureMap]] after being processed by a [[Scorer]] */
|
||||
case class ScoredCandidateResult[Candidate <: UniversalNoun[Any]](
|
||||
candidate: Candidate,
|
||||
scorerResult: FeatureMap)
|
||||
|
@ -2,7 +2,7 @@ package com.twitter.product_mixer.core.model.common
|
||||
|
||||
import com.twitter.product_mixer.core.feature.featuremap.FeatureMap
|
||||
|
||||
/** [[Candidate]] and it's FeatureMap */
|
||||
/** [[Candidate]] and its FeatureMap */
|
||||
trait CandidateWithFeatures[+Candidate <: UniversalNoun[Any]] {
|
||||
val candidate: Candidate
|
||||
val features: FeatureMap
|
||||
|
@ -14,7 +14,7 @@ import com.twitter.util.Duration
|
||||
/**
|
||||
* Domain model for the URT ShowAlert [[https://docbird.twitter.biz/unified_rich_timelines_urt/gen/com/twitter/timelines/render/thriftscala/ShowAlert.html]]
|
||||
*
|
||||
* @note the text field (id: 2) has been deliberately excluded as it's been deprecated since 2018. Use RichText instead.
|
||||
* @note the text field (id: 2) has been deliberately excluded as it has been deprecated since 2018. Use RichText instead.
|
||||
*/
|
||||
case class ShowAlert(
|
||||
override val id: String,
|
||||
|
@ -47,7 +47,7 @@ class ProductScopeStringCenterModule extends TwitterModule {
|
||||
/* The Guice injector is single threaded, but out of a preponderance of caution we use a concurrent Map.
|
||||
*
|
||||
* We need to ensure that we only build one StringSource, StringCenter client, and External String
|
||||
* Registry for each String Center Project. @ProductScoped doesn't ensure this on it's own as
|
||||
* Registry for each String Center Project. @ProductScoped doesn't ensure this on its own as
|
||||
* two products can have the same String Center Project set.
|
||||
*/
|
||||
val stringSources: concurrent.Map[String, StringSource] = concurrent.TrieMap.empty
|
||||
|
@ -12,7 +12,7 @@ import scala.util.control.NoStackTrace
|
||||
* The reason field should not be displayed to end-users, and is free to change over time.
|
||||
* It should always be free of private user data such that we can log it.
|
||||
*
|
||||
* The pipeline can classify it's own failures into categories (timeouts, invalid arguments,
|
||||
* The pipeline can classify its own failures into categories (timeouts, invalid arguments,
|
||||
* rate limited, etc) such that the caller can choose how to handle it.
|
||||
*
|
||||
* @note [[componentStack]] should only be set by the product mixer framework,
|
||||
|
@ -70,7 +70,7 @@ class ProductPipelineBuilder[TRequest <: Request, Query <: PipelineQuery, Respon
|
||||
* It's a simple, synchronous step that executes the query transformer.
|
||||
*
|
||||
* Since the output of the transformer is used in multiple other steps (Gate, Pipeline Execution),
|
||||
* we've promoted the transformer to a step so that it's outputs can be reused easily.
|
||||
* we've promoted the transformer to a step so that its outputs can be reused easily.
|
||||
*/
|
||||
def pipelineQueryTransformerStep(
|
||||
queryTransformer: (TRequest, Params) => Query,
|
||||
|
@ -72,7 +72,7 @@ trait ScoringPipelineConfig[-Query <: PipelineQuery, Candidate <: UniversalNoun[
|
||||
|
||||
/**
|
||||
* Ranker Function for candidates. Scorers are executed in parallel.
|
||||
* Note: Order does not matter, this could be a Set if Set was covariant over it's type.
|
||||
* Note: Order does not matter, this could be a Set if Set was covariant over its type.
|
||||
*/
|
||||
def scorers: Seq[Scorer[Query, Candidate]]
|
||||
|
||||
|
@ -106,7 +106,7 @@ private[core] trait Executor {
|
||||
def startArrowAsync[In, Out](arrow: Arrow[In, Out]): Arrow[In, Stitch[Out]] = {
|
||||
Arrow
|
||||
.map { arg: In =>
|
||||
// wrap in a `ref` so we only compute it's value once
|
||||
// wrap in a `ref` so we only compute its value once
|
||||
Stitch.ref(arrow(arg))
|
||||
}
|
||||
.andThen(
|
||||
@ -452,7 +452,7 @@ private[core] object Executor {
|
||||
* @note Should __never__ be called directly!
|
||||
*
|
||||
* It's expected that the contained `arrow` will invoke [[recordTraceData]] exactly ONCE
|
||||
* during it's execution.
|
||||
* during its execution.
|
||||
*
|
||||
* @note this does not record any data about the trace, it only sets the [[Trace]] Span
|
||||
* for the execution of `arrow`
|
||||
@ -630,7 +630,7 @@ private[core] object Executor {
|
||||
|
||||
/**
|
||||
* contains the scopes for recording metrics for the component by itself and
|
||||
* the relative scope of that component within it's parent component scope
|
||||
* the relative scope of that component within its parent component scope
|
||||
*
|
||||
* @see [[Executor.buildScopes]]
|
||||
*/
|
||||
|
@ -68,7 +68,7 @@ class ComponentRegistrySnapshot() extends Logging {
|
||||
* these 2 components would be indistinguishable.
|
||||
*
|
||||
* @throws ComponentIdentifierCollisionException if a [[Component]] with the same [[ComponentIdentifier]] is registered
|
||||
* but it's type is not the same as a previously registered [[Component]]
|
||||
* but its type is not the same as a previously registered [[Component]]
|
||||
* with the same [[ComponentIdentifier]]
|
||||
* e.g. if you register 2 [[Component]]s with the same [[ComponentIdentifier]]
|
||||
* such as `new Component` and an instance of
|
||||
|
@ -163,7 +163,7 @@ object Observer {
|
||||
rollupStatsReceiver.counter(Failures)
|
||||
rollupStatsReceiver.counter(Cancelled)
|
||||
|
||||
/** Serialize a throwable and it's causes into a seq of Strings for scoping metrics */
|
||||
/** Serialize a throwable and its causes into a seq of Strings for scoping metrics */
|
||||
protected def serializeThrowable(throwable: Throwable): Seq[String] =
|
||||
Throwables.mkString(throwable)
|
||||
|
||||
|
@ -254,7 +254,7 @@ object ResultsObserver {
|
||||
protected val foundCounter: Counter = statsReceiver.counter(scopes :+ Found: _*)
|
||||
protected val notFoundCounter: Counter = statsReceiver.counter(scopes :+ NotFound: _*)
|
||||
|
||||
/** given a [[T]] returns it's size. */
|
||||
/** given a [[T]] returns its size. */
|
||||
protected val size: T => Int
|
||||
|
||||
/** Records the size of the `results` using [[size]] and return the original value. */
|
||||
|
@ -209,7 +209,7 @@ public enum TweetFeatureType {
|
||||
/**
|
||||
* Constructing an enum for a type. The earlybirdField can be null if it's not prepared, they
|
||||
* can be here as placeholders but they can't be outputted.
|
||||
* The normalizer is null for the timestamp features that do not require normalization
|
||||
* The normalizer is null for the timestamp features that do not require normalization.
|
||||
*/
|
||||
TweetFeatureType(boolean incremental,
|
||||
int typeInt,
|
||||
|
@ -325,7 +325,7 @@ public final class FacetsResultsUtils {
|
||||
|
||||
/**
|
||||
* Replace "p.twimg.com/" part of the native photo (twimg) URL with "pbs.twimg.com/media/".
|
||||
* We need to do this because of blobstore and it's suppose to be a temporary measure. This
|
||||
* We need to do this because of blobstore and it's supposed to be a temporary measure. This
|
||||
* code should be removed once we verified that all native photo URL being sent to Search
|
||||
* are prefixed with "pbs.twimg.com/media/" and no native photo URL in our index contains
|
||||
* "p.twimg.com/"
|
||||
@ -357,7 +357,7 @@ public final class FacetsResultsUtils {
|
||||
|
||||
/**
|
||||
* Replace "p.twimg.com/" part of the native photo (twimg) URL with "pbs.twimg.com/media/".
|
||||
* We need to do this because of blobstore and it's suppose to be a temporary measure. This
|
||||
* We need to do this because of blobstore and it's supposed to be a temporary measure. This
|
||||
* code should be removed once we verified that all native photo URL being sent to Search
|
||||
* are prefixed with "pbs.twimg.com/media/" and no native photo URL in our index contains
|
||||
* "p.twimg.com/"
|
||||
|
@ -514,7 +514,7 @@ public final class EarlybirdRealtimeIndexSegmentWriter extends EarlybirdIndexSeg
|
||||
// chokes on a given document), then it's
|
||||
// non-aborting and (above) this one document
|
||||
// will be marked as deleted, but still
|
||||
// consume a docID
|
||||
// consume a docID.
|
||||
|
||||
int posIncr = posIncrAttribute.getPositionIncrement();
|
||||
currentPosition += posIncr;
|
||||
|
@ -1307,7 +1307,7 @@ public class EarlybirdSearcher {
|
||||
}
|
||||
if (!searchQuery.getRelevanceOptions().isSetRankingParams()) {
|
||||
searchQuery.getRelevanceOptions().setRankingParams(
|
||||
// this is important, or it's gonna pick DefaultScoringFunction which pretty much
|
||||
// this is important, or it's going to pick DefaultScoringFunction which pretty much
|
||||
// does nothing.
|
||||
new ThriftRankingParams().setType(ThriftScoringFunctionType.TOPTWEETS));
|
||||
}
|
||||
|
@ -321,7 +321,7 @@ public class HashingAndPruningFacetAccumulator extends FacetAccumulator {
|
||||
}
|
||||
|
||||
// Compacts the hashtable entries in place by removing empty hashes. After
|
||||
// this operation it's no longer a hash table but a array of entries.
|
||||
// this operation, it's no longer a hash table but a array of entries.
|
||||
private void copyToSortBuffer() {
|
||||
int upto = 0;
|
||||
|
||||
|
@ -63,7 +63,7 @@ import com.twitter.search.earlybird.thrift.ThriftSocialFilterType;
|
||||
* - computeScore
|
||||
* - generateExplanationForScoring
|
||||
*
|
||||
* They are called for scoring and generating the debug information of the document that it's
|
||||
* They are called for scoring and generating the debug information of the document that is
|
||||
* currently being evaluated. The field 'data' holds the features of the document.
|
||||
*/
|
||||
public abstract class FeatureBasedScoringFunction extends ScoringFunction {
|
||||
@ -709,7 +709,7 @@ public abstract class FeatureBasedScoringFunction extends ScoringFunction {
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the score of the document that it's currently being evaluated.
|
||||
* Computes the score of the document that is currently being evaluated.
|
||||
*
|
||||
* The extracted features from the document are available in the field 'data'.
|
||||
*
|
||||
|
@ -85,7 +85,7 @@ public class QueryTokenizerFilter extends SimpleFilter<EarlybirdRequestContext,
|
||||
public void performExpensiveInitialization() throws QueryParserException {
|
||||
SerializedQueryParser queryParser = new SerializedQueryParser(tokenizationOption);
|
||||
|
||||
// The Korean query parser takes a few seconds on it's own to initialize.
|
||||
// The Korean query parser takes a few seconds on its own to initialize.
|
||||
String koreanQuery = "스포츠";
|
||||
queryParser.parse(koreanQuery);
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ object GetRelatedTweetCandidatesUtil {
|
||||
* for non-tweetBasedRelatedTweet, We don't have a query tweet, to keep scoring function consistent,
|
||||
* scorePreFactor = 1000.0 / LHSuserSize (queryTweetDegree's average is ~10k, 1000 ~= 10k/log(10k))
|
||||
* Though scorePreFactor is applied for all results within a request, it's still useful to make score comparable across requests,
|
||||
* so we can have a unifed min_score and help with downstream score normalization
|
||||
* so we can have a unifed min_score and help with downstream score normalization.
|
||||
* **/
|
||||
def getRelatedTweetCandidates(
|
||||
relatedTweetCandidates: Seq[Long],
|
||||
|
@ -80,7 +80,7 @@ object ClusterEvaluation {
|
||||
* Evaluate the quality of a cluster.
|
||||
* @param memberScores A map with the members of the cluster as the keys and their scores
|
||||
* inside the cluster as values. The more central a member is inside the score,
|
||||
* the higher it's score is.
|
||||
* the higher its score is.
|
||||
* @param membersAdjLists A map that gives the weighted neighbors of each member in the cluster.
|
||||
*/
|
||||
def evaluateCluster(
|
||||
|
@ -211,7 +211,7 @@ object UpdateKnownFor {
|
||||
*
|
||||
* This function is where all the crucial steps take place. First get the cluster that each
|
||||
* node belongs to, and then broadcast information about this node and cluster membership to each
|
||||
* of it's neighbors. Now bring together all records with the same nodeId as the key and create
|
||||
* of its neighbors. Now bring together all records with the same nodeId as the key and create
|
||||
* the NodeInformation dataset.
|
||||
* @param graph symmetric graph i.e. if u is in v's adj list, then v is in u's adj list.
|
||||
* @param userToClusters current knownFor.
|
||||
|
@ -73,7 +73,7 @@ object UpdateKnownForApps {
|
||||
* For each cluster, get two statistics about it: the number of nodes assigned to it, and the
|
||||
* sum of the membership scores
|
||||
*
|
||||
* @param knownFor TypedPipe from nodeId to the clusters it's been assigned to along with
|
||||
* @param knownFor TypedPipe from nodeId to the clusters it's being assigned to along with
|
||||
* membership scores.
|
||||
* @return Map giving the NeighborhoodInformation for each cluster. The nodeCount and
|
||||
* sumOfMembershipWeights fields in NeighborhoodInformation are populated, others are 0.
|
||||
|
@ -478,7 +478,7 @@ class EarlyStopDuration(tf.train.SessionRunHook):
|
||||
return self.elapsed_time() > self._max_duration
|
||||
|
||||
def elapsed_time(self) -> float:
|
||||
# Recorded elapsed time is 0 unless it's been recorded in a file already
|
||||
# Recorded elapsed time is 0 unless it has been recorded in a file already
|
||||
recorded_elapsed_time = 0
|
||||
if tf.io.gfile.exists(self.elapsed_file_path):
|
||||
with tf.io.gfile.GFile(self.elapsed_file_path, mode="r") as file:
|
||||
|
Loading…
Reference in New Issue
Block a user