the-algorithm/src/thrift/com/twitter/search/common/ranking/ranking.thrift

namespace java com.twitter.search.common.ranking.thriftjava
#@namespace scala com.twitter.search.common.ranking.thriftscala
#@namespace strato com.twitter.search.common.ranking
namespace py gen.twitter.search.common.ranking.ranking

struct ThriftLinearFeatureRankingParams {
  // values below this will set the score to the minimal one
  1: optional double min = -1e+100
  // values above this will set the score to the minimal one
  2: optional double max = 1e+100
  3: optional double weight = 0
}(persisted='true')

struct ThriftAgeDecayRankingParams {
  // the rate in which the score of older tweets decreases
  1: optional double slope = 0.003
  // the age, in minutes, where the age score of a tweet is half of the latest tweet
  2: optional double halflife = 360.0
  // the minimal age decay score a tweet will have
  3: optional double base = 0.6
}(persisted='true')

enum ThriftScoringFunctionType {
  LINEAR = 1,
  MODEL_BASED = 4,
  TENSORFLOW_BASED = 5,

  // deprecated
  TOPTWEETS = 2,
  EXPERIMENTAL = 3,
}

// The struct to define a class that is to be dynamically loaded in earlybird for
// experimentation.
struct ThriftExperimentClass {
  // the fully qualified class name.
  1: required string name
  // data source location (class/jar file) for this dynamic class on HDFS
  2: optional string location
  // parameters in key-value pairs for this experimental class
  3: optional map<string, double> params
}(persisted='true')

// Deprecated!!
struct ThriftQueryEngagementParams {
  // Rate Boosts: given a rate (usually a small fraction), the score will be multiplied by
  //   (1 + rate) ^ boost
  // 0 mean no boost, negative numbers are dampens
  1: optional double retweetRateBoost = 0
  2: optional double replyRateBoost = 0
  3: optional double faveRateBoost = 0
}(persisted='true')

struct ThriftHostQualityParams {
  // Multiplier applied to host score, for tweets that have links.
  // A multiplier of 0 means that this boost is not applied
  1: optional double multiplier = 0.0

  // Do not apply the multiplier to hosts with score above this level.
  // If 0, the multiplier will be applied to any host.
  2: optional double maxScoreToModify = 0.0

  // Do not apply the multiplier to hosts with score below this level.
  // If 0, the multiplier will be applied to any host.
  3: optional double minScoreToModify = 0.0

  // If true, score modification will be applied to hosts that have unknown scores.
  // The host-score used will be lower than the score of any known host.
  4: optional bool applyToUnknownHosts = 0
}(persisted='true')

struct ThriftCardRankingParams {
  1: optional double hasCardBoost          = 1.0
  2: optional double domainMatchBoost      = 1.0
  3: optional double authorMatchBoost      = 1.0
  4: optional double titleMatchBoost       = 1.0
  5: optional double descriptionMatchBoost = 1.0
}(persisted='true')

# The ids are assigned in 'blocks'. For adding a new field, find an unused id in the appropriate
# block. Be sure to mention explicitly which ids have been removed so that they are not used again.
struct ThriftRankingParams {
  1: optional ThriftScoringFunctionType type

  // Dynamically loaded scorer and collector for quick experimentation.
  40: optional ThriftExperimentClass expScorer
  41: optional ThriftExperimentClass expCollector

  // we must set it to a value that fits into a float: otherwise
  // some earlybird classes that convert it to float will interpret
  // it as Float.NEGATIVE_INFINITY, and some comparisons will fail
  2: optional double minScore = -1e+30

  10: optional ThriftLinearFeatureRankingParams parusScoreParams
  11: optional ThriftLinearFeatureRankingParams retweetCountParams
  12: optional ThriftLinearFeatureRankingParams replyCountParams
  15: optional ThriftLinearFeatureRankingParams reputationParams
  16: optional ThriftLinearFeatureRankingParams luceneScoreParams
  18: optional ThriftLinearFeatureRankingParams textScoreParams
  19: optional ThriftLinearFeatureRankingParams urlParams
  20: optional ThriftLinearFeatureRankingParams isReplyParams
  21: optional ThriftLinearFeatureRankingParams directFollowRetweetCountParams
  22: optional ThriftLinearFeatureRankingParams trustedCircleRetweetCountParams
  23: optional ThriftLinearFeatureRankingParams favCountParams
  24: optional ThriftLinearFeatureRankingParams multipleReplyCountParams
  27: optional ThriftLinearFeatureRankingParams embedsImpressionCountParams
  28: optional ThriftLinearFeatureRankingParams embedsUrlCountParams
  29: optional ThriftLinearFeatureRankingParams videoViewCountParams
  66: optional ThriftLinearFeatureRankingParams quotedCountParams

  // A map from MutableFeatureType to linear ranking params
  25: optional map<byte, ThriftLinearFeatureRankingParams> offlineExperimentalFeatureRankingParams

  // if min/max for score or ThriftLinearFeatureRankingParams should always be
  // applied or only to non-follows, non-self, non-verified
  26: optional bool applyFiltersAlways = 0

  // Whether to apply promotion/demotion at all for FeatureBasedScoringFunction
  70: optional bool applyBoosts = 1

  // UI language is english, tweet language is not
  30: optional double langEnglishUIBoost = 0.3
  // tweet language is english, UI language is not
  31: optional double langEnglishTweetBoost = 0.7
  // user language differs from tweet language, and neither is english
  32: optional double langDefaultBoost = 0.1
  // user that produced tweet is marked as spammer by metastore
  33: optional double spamUserBoost = 1.0
  // user that produced tweet is marked as nsfw by metastore
  34: optional double nsfwUserBoost = 1.0
  // user that produced tweet is marked as bot (self similarity) by metastore
  35: optional double botUserBoost = 1.0

  // An alternative way of using lucene score in the ranking function.
  38: optional bool useLuceneScoreAsBoost = 0
  39: optional double maxLuceneScoreBoost = 1.2

  // Use user's consumed and produced languages for scoring
  42: optional bool useUserLanguageInfo = 0

  // Boost (demotion) if the tweet language is not one of user's understandable languages,
  // nor interface language.
  43: optional double unknownLanguageBoost = 0.01

  // Use topic ids for scoring.
  // Deprecated in SEARCH-8616.
  44: optional bool deprecated_useTopicIDsBoost = 0
  // Parameters for topic id scoring.  See TopicIDsBoostScorer (and its test) for details.
  46: optional double deprecated_maxTopicIDsBoost = 3.0
  47: optional double deprecated_topicIDsBoostExponent = 2.0;
  48: optional double deprecated_topicIDsBoostSlope = 2.0;

  // Hit Attribute Demotion
  60: optional bool enableHitDemotion = 0
  61: optional double noTextHitDemotion = 1.0
  62: optional double urlOnlyHitDemotion = 1.0
  63: optional double nameOnlyHitDemotion = 1.0
  64: optional double separateTextAndNameHitDemotion = 1.0
  65: optional double separateTextAndUrlHitDemotion = 1.0

  // multiplicative score boost for results deemed offensive
  100: optional double offensiveBoost = 1
  // multiplicative score boost for results in the searcher's social circle
  101: optional double inTrustedCircleBoost = 1
  // multiplicative score dampen for results with more than one hash tag
  102: optional double multipleHashtagsOrTrendsBoost = 1
  // multiplicative score boost for results in the searcher's direct follows
  103: optional double inDirectFollowBoost = 1
  // multiplicative score boost for results that has trends
  104: optional double tweetHasTrendBoost = 1
  // is tweet from verified account?
  106: optional double tweetFromVerifiedAccountBoost = 1
  // is tweet authored by the searcher? (boost is in addition to social boost)
  107: optional double selfTweetBoost = 1
  // multiplicative score boost for a tweet that has image url.
  108: optional double tweetHasImageUrlBoost = 1
  // multiplicative score boost for a tweet that has video url.
  109: optional double tweetHasVideoUrlBoost = 1
  // multiplicative score boost for a tweet that has news url.
  110: optional double tweetHasNewsUrlBoost = 1
  // is tweet from a blue-verified account?
  111: optional double tweetFromBlueVerifiedAccountBoost = 1 (personalDataType = 'UserVerifiedFlag')

  // subtractive penalty applied after boosts for out-of-network replies.
  120: optional double outOfNetworkReplyPenalty = 10.0

  150: optional ThriftQueryEngagementParams deprecatedQueryEngagementParams

  160: optional ThriftHostQualityParams deprecatedHostQualityParams

  // age decay params for regular tweets
  203: optional ThriftAgeDecayRankingParams ageDecayParams

  // for card ranking: map between card name ordinal (defined in com.twitter.search.common.constants.CardConstants)
  // to ranking params
  400: optional map<byte, ThriftCardRankingParams> cardRankingParams

  // A map from tweet IDs to the score adjustment for that tweet. These are score
  // adjustments that include one or more features that can depend on the query
  // string. These features aren't indexed by Earlybird, and so their total contribution
  // to the scoring function is passed in directly as part of the request. If present,
  // the score adjustment for a tweet is directly added to the linear component of the
  // scoring function. Since this signal can be made up of multiple features, any
  // reweighting or combination of these features is assumed to be done by the caller
  // (hence there is no need for a weight parameter -- the weights of the features
  // included in this signal have already been incorporated by the caller).
  151: optional map<i64, double> querySpecificScoreAdjustments

  // A map from user ID to the score adjustment for tweets from that author.
  // This field provides a way for adjusting the tweets of a specific set of users with a score
  // that is not present in the Earlybird features but has to be passed from the clients, such as
  // real graph weights or a combination of multiple features.
  // This field should be used mainly for experimentation since it increases the size of the thrift
  // requests.
  154: optional map<i64, double> authorSpecificScoreAdjustments

  // -------- Parameters for ThriftScoringFunctionType.MODEL_BASED --------
  // Selected models along with their weights for the linear combination
  152: optional map<string, double> selectedModels
  153: optional bool useLogitScore = false

  // -------- Parameters for ThriftScoringFunctionType.TENSORFLOW_BASED --------
  // Selected tensorflow model
  303: optional string selectedTensorflowModel

  // -------- Deprecated Fields --------
  // ID 303 has been used in the past. Resume additional deprecated fields from 304
  105: optional double deprecatedTweetHasTrendInTrendingQueryBoost = 1
  200: optional double deprecatedAgeDecaySlope = 0.003
  201: optional double deprecatedAgeDecayHalflife = 360.0
  202: optional double deprecatedAgeDecayBase = 0.6
  204: optional ThriftAgeDecayRankingParams deprecatedAgeDecayForTrendsParams
  301: optional double deprecatedNameQueryConfidence = 0.0
  302: optional double deprecatedHashtagQueryConfidence = 0.0
  // Whether to use old-style engagement features (normalized by LogNormalizer)
  // or new ones (normalized by SingleBytePositiveFloatNormalizer)
  50: optional bool useGranularEngagementFeatures = 0  // DEPRECATED!
}(persisted='true')

// This sorting mode is used by earlybird to retrieve the top-n facets that
// are returned to blender
enum ThriftFacetEarlybirdSortingMode {
  SORT_BY_SIMPLE_COUNT = 0,
  SORT_BY_WEIGHTED_COUNT = 1,
}

// This is the final sort order used by blender after all results from
// the earlybirds are merged
enum ThriftFacetFinalSortOrder {
  // using the created_at date of the first tweet that contained the facet
  SCORE = 0,
  SIMPLE_COUNT = 1,
  WEIGHTED_COUNT = 2,
  CREATED_AT = 3
}

struct ThriftFacetRankingOptions {
  // next available field ID = 38

  // ======================================================================
  // EARLYBIRD SETTINGS
  //
  // These parameters primarily affect how earlybird creates the top-k
  // candidate list to be re-ranked by blender
  // ======================================================================
  // Dynamically loaded scorer and collector for quick experimentation.
  26: optional ThriftExperimentClass expScorer
  27: optional ThriftExperimentClass expCollector

  // It should be less than or equal to reputationParams.min, and all
  // tweepcreds between the two get a score of 1.0.
  21: optional i32 minTweepcredFilterThreshold

  // the maximum score a single tweet can contribute to the weightedCount
  22: optional i32 maxScorePerTweet

  15: optional ThriftFacetEarlybirdSortingMode sortingMode
  // The number of top candidates earlybird returns to blender
  16: optional i32 numCandidatesFromEarlybird = 100

  // when to early terminate for facet search, overrides the setting in ThriftSearchQuery
  34: optional i32 maxHitsToProcess = 1000

  // for anti-gaming we want to limit the maximum amount of hits the same user can
  // contribute.  Set to -1 to disable the anti-gaming filter. Overrides the setting in
  // ThriftSearchQuery
  35: optional i32 maxHitsPerUser = 3

  // if the tweepcred of the user is bigger than this value it will not be excluded
  // by the anti-gaming filter. Overrides the setting in ThriftSearchQuery
  36: optional i32 maxTweepcredForAntiGaming = 65

  // these settings affect how earlybird computes the weightedCount
   2: optional ThriftLinearFeatureRankingParams parusScoreParams
   3: optional ThriftLinearFeatureRankingParams reputationParams
  17: optional ThriftLinearFeatureRankingParams favoritesParams
  33: optional ThriftLinearFeatureRankingParams repliesParams
  37: optional map<byte, ThriftLinearFeatureRankingParams> rankingExpScoreParams

  // penalty counter settings
  6: optional i32 offensiveTweetPenalty  // set to -1 to disable the offensive filter
  7: optional i32 antigamingPenalty // set to -1 to disable antigaming filtering
  // weight of penalty counts from all tweets containing a facet, not just the tweets
  // matching the query
  9: optional double queryIndependentPenaltyWeight  // set to 0 to not use query independent penalty weights
  // penalty for keyword stuffing
  60: optional i32 multipleHashtagsOrTrendsPenalty

  // Language related boosts, similar to those in relevance ranking options. By default they are
  // all 1.0 (no-boost).
  // When the user language is english, facet language is not
  11: optional double langEnglishUIBoost = 1.0
  // When the facet language is english, user language is not
  12: optional double langEnglishFacetBoost = 1.0
  // When the user language differs from facet/tweet language, and neither is english
  13: optional double langDefaultBoost = 1.0

  // ======================================================================
  // BLENDER SETTINGS
  //
  // Settings for the facet relevance scoring happening in blender
  // ======================================================================

  // This block of parameters are only used in the FacetsFutureManager.
  // limits to discard facets
  // if a facet has a higher penalty count, it will not be returned
  5: optional i32 maxPenaltyCount
  // if a facet has a lower simple count, it will not be returned
  28: optional i32 minSimpleCount
  // if a facet has a lower weighted count, it will not be returned
  8: optional i32 minCount
  // the maximum allowed value for offensiveCount/facetCount a facet can have in order to be returned
  10: optional double maxPenaltyCountRatio
  // if set to true, then facets with offensive display tweets are excluded from the resultset
  29: optional bool excludePossiblySensitiveFacets
  // if set to true, then only facets that have a display tweet in their ThriftFacetCountMetadata object
  // will be returned to the caller
  30: optional bool onlyReturnFacetsWithDisplayTweet

  // parameters for scoring force-inserted media items
  // Please check FacetReRanker.java computeScoreForInserted() for their usage.
  38: optional double forceInsertedBackgroundExp = 0.3
  39: optional double forceInsertedMinBackgroundCount = 2
  40: optional double forceInsertedMultiplier = 0.01

  // -----------------------------------------------------
  // weights for the facet ranking formula
  18: optional double simpleCountWeight_DEPRECATED
  19: optional double weightedCountWeight_DEPRECATED
  20: optional double backgroundModelBoost_DEPRECATED

  // -----------------------------------------------------
  // Following parameters are used in the FacetsReRanker
  // age decay params
  14: optional ThriftAgeDecayRankingParams ageDecayParams

  // used in the facets reranker
  23: optional double maxNormBoost = 5.0
  24: optional double globalCountExponent = 3.0
  25: optional double simpleCountExponent = 3.0

  31: optional ThriftFacetFinalSortOrder finalSortOrder

  // Run facets search as if they happen at this specific time (ms since epoch).
  32: optional i64 fakeCurrentTimeMs  // not really used anywhere, remove?
}(persisted='true')