mirror of
synced 2025-01-13 12:49:10 +01:00
[docx] split commit for file 4600
Signed-off-by: Ari Archer <ari.web.xyz@gmail.com>
This commit is contained in:
Binary file not shown.
@ -1,205 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.Collections;
import java.util.Map;
import java.util.Optional;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Maps;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.decider.SearchDecider;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.util.earlybird.EarlybirdResponseUtil;
import com.twitter.search.earlybird.config.ServingRange;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
import com.twitter.search.queryparser.query.Query;
import com.twitter.search.queryparser.query.QueryParserException;
import com.twitter.search.queryparser.util.IdTimeRanges;
import com.twitter.util.Future;
* A Finagle filter used to filter requests to tiers.
* Parses serialized query on Earlybird request, and extracts since / until / since_id / max_id
* operators. This filter then tests whether the request overlaps with the given tier. If there
* is no overlap, an empty response is returned without actually forwarding the requests to the
* underlying service.
public class EarlybirdTimeRangeFilter extends
SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
private static final Logger LOG = LoggerFactory.getLogger(EarlybirdTimeRangeFilter.class);
private static final EarlybirdResponse ERROR_RESPONSE =
new EarlybirdResponse(EarlybirdResponseCode.PERSISTENT_ERROR, 0)
.setSearchResults(new ThriftSearchResults());
private final ServingRangeProvider servingRangeProvider;
private final Optional<EarlybirdTimeFilterQueryRewriter> queryRewriter;
private static final Map<EarlybirdRequestType, SearchCounter> FAILED_REQUESTS;
static {
final Map<EarlybirdRequestType, SearchCounter> tempMap =
for (EarlybirdRequestType requestType : EarlybirdRequestType.values()) {
tempMap.put(requestType, SearchCounter.export(
"time_range_filter_" + requestType.getNormalizedName() + "_failed_requests"));
FAILED_REQUESTS = Collections.unmodifiableMap(tempMap);
public static EarlybirdTimeRangeFilter newTimeRangeFilterWithQueryRewriter(
ServingRangeProvider servingRangeProvider,
SearchDecider decider) {
return new EarlybirdTimeRangeFilter(servingRangeProvider,
Optional.of(new EarlybirdTimeFilterQueryRewriter(servingRangeProvider, decider)));
public static EarlybirdTimeRangeFilter newTimeRangeFilterWithoutQueryRewriter(
ServingRangeProvider servingRangeProvider) {
return new EarlybirdTimeRangeFilter(servingRangeProvider, Optional.empty());
* Construct a filter that avoids forwarding requests to unrelated tiers
* based on requests' since / until / since_id / max_id.
* @param provider Holds the boundary information.
ServingRangeProvider provider,
Optional<EarlybirdTimeFilterQueryRewriter> rewriter) {
this.servingRangeProvider = provider;
this.queryRewriter = rewriter;
public ServingRangeProvider getServingRangeProvider() {
return servingRangeProvider;
public Future<EarlybirdResponse> apply(
EarlybirdRequestContext requestContext,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
Query parsedQuery = requestContext.getParsedQuery();
if (parsedQuery != null) {
// Only perform filtering if serialized query is set.
try {
IdTimeRanges queryRanges = IdTimeRanges.fromQuery(parsedQuery);
if (queryRanges == null) {
// No time ranges in query.
return issueServiceRequest(service, requestContext);
ServingRange servingRange =
requestContext, requestContext.useOverrideTierConfig());
if (queryDoesNotOverlapWithServingRange(queryRanges, servingRange)) {
return Future.value(tierSkippedResponse(requestContext.getEarlybirdRequestType(),
} else {
return issueServiceRequest(service, requestContext);
} catch (QueryParserException e) {
LOG.warn("Unable to get IdTimeRanges from query: " + parsedQuery.serialize());
// The failure here is not due to a miss-formed query from the client, since we already
// were able to successfully get a parsed Query from the request.
// If we can't determine the time ranges, pass the query along to the tier, and just
// restrict it to the timeranges of the tier.
return issueServiceRequest(service, requestContext);
} else {
// There's no serialized query. Just pass through like an identity filter.
return issueServiceRequest(service, requestContext);
private boolean queryDoesNotOverlapWithServingRange(IdTimeRanges queryRanges,
ServingRange servingRange) {
// As long as a query overlaps with the tier serving range on either side,
// the request is not filtered. I.e. we want to be conservative when doing this filtering,
// because it is just an optimization. We ignore the inclusiveness / exclusiveness of the
// boundaries. If the tier boundary and the query boundry happen to be the same, we do not
// filter the request.
return queryRanges.getSinceIDExclusive().or(0L)
> servingRange.getServingRangeMaxId()
|| queryRanges.getMaxIDInclusive().or(Long.MAX_VALUE)
< servingRange.getServingRangeSinceId()
|| queryRanges.getSinceTimeInclusive().or(0)
> servingRange.getServingRangeUntilTimeSecondsFromEpoch()
|| queryRanges.getUntilTimeExclusive().or(Integer.MAX_VALUE)
< servingRange.getServingRangeSinceTimeSecondsFromEpoch();
private Future<EarlybirdResponse> issueServiceRequest(
Service<EarlybirdRequestContext, EarlybirdResponse> service,
EarlybirdRequestContext requestContext) {
try {
EarlybirdRequestContext request = requestContext;
if (queryRewriter.isPresent()) {
request = queryRewriter.get().rewriteRequest(requestContext);
return service.apply(request);
} catch (QueryParserException e) {
String msg = "Failed to add time filter operators";
LOG.error(msg, e);
// Note that in this case it is not clear whether the error is the client's fault or our
// fault, so we don't necessarily return a CLIENT_ERROR here.
// Currently this actually returns a PERSISTENT_ERROR.
if (requestContext.getRequest().getDebugMode() > 0) {
return Future.value(
ERROR_RESPONSE.deepCopy().setDebugString(msg + ": " + e.getMessage()));
} else {
return Future.value(ERROR_RESPONSE);
* Creates a tier skipped response, based on the given request type.
* For recency, relevance, facets and top tweets requests, this method returns a SUCCESS response
* with no search results and the minSearchedStatusID and maxSearchedStatusID appropriately set.
* For term stats response, it returns a TIER_SKIPPED response, but we need to revisit this.
* @param requestType The type of the request.
* @param servingRange The serving range of the tier that we're skipping.
public static EarlybirdResponse tierSkippedResponse(
EarlybirdRequestType requestType,
ServingRange servingRange) {
String debugMessage =
"Tier skipped because it does not intersect with query time boundaries.";
if (requestType == EarlybirdRequestType.TERM_STATS) {
// If it's a term stats request, return a TIER_SKIPPED response for now.
// But we need to figure out the right thing to do here.
return new EarlybirdResponse(EarlybirdResponseCode.TIER_SKIPPED, 0)
} else {
// minIds in ServingRange instances are set to tierLowerBoundary - 1, because the
// since_id operator is exclusive. The max_id operator on the other hand is inclusive,
// so maxIds in ServingRange instances are also set to tierUpperBoundary - 1.
// Here we want both of them to be inclusive, so we need to increment the minId by 1.
return EarlybirdResponseUtil.tierSkippedRootResponse(
servingRange.getServingRangeSinceId() + 1,
Binary file not shown.
@ -1,167 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.List;
import javax.inject.Inject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.decider.SearchDecider;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.earlybird.thrift.EarlybirdDebugInfo;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.queryparser.query.Query;
import com.twitter.search.queryparser.query.QueryNodeUtils;
import com.twitter.search.queryparser.query.QueryParserException;
import com.twitter.search.queryparser.query.search.SearchOperator;
import com.twitter.search.queryparser.query.search.SearchOperatorConstants;
import com.twitter.search.queryparser.visitors.DropAllProtectedOperatorVisitor;
import com.twitter.search.queryparser.visitors.QueryTreeIndex;
import com.twitter.util.Future;
* Full archive service filter validates requests with a protected operator, appends the
* '[exclude protected]' operator by default, and appends '[filter protected]' operator instead if
* 'getProtectedTweetsOnly' request param is set. A client error response is returned if any of the
* following rules is violated.
* 1. There is at most one 'protected' operator in the query.
* 2. If there is a 'protected' operator, it must be in the query root node.
* 3. The parent node of the 'protected' operator must not be negated and must be a conjunction.
* 4. If there is a positive 'protected' operator, 'followedUserIds' and 'searcherId' request
* params must be set.
public class FullArchiveProtectedOperatorFilter extends
SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
private static final Logger LOG =
private static final SearchOperator EXCLUDE_PROTECTED_OPERATOR =
new SearchOperator(SearchOperator.Type.EXCLUDE, SearchOperatorConstants.PROTECTED);
private static final SearchOperator FILTER_PROTECTED_OPERATOR =
new SearchOperator(SearchOperator.Type.FILTER, SearchOperatorConstants.PROTECTED);
private static final SearchCounter QUERY_PARSER_FAILURE_COUNT =
private final DropAllProtectedOperatorVisitor dropProtectedOperatorVisitor;
private final SearchDecider decider;
public FullArchiveProtectedOperatorFilter(
DropAllProtectedOperatorVisitor dropProtectedOperatorVisitor,
SearchDecider decider) {
this.dropProtectedOperatorVisitor = dropProtectedOperatorVisitor;
this.decider = decider;
public Future<EarlybirdResponse> apply(
EarlybirdRequestContext requestContext,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
Query query = requestContext.getParsedQuery();
if (query == null) {
return service.apply(requestContext);
QueryTreeIndex queryTreeIndex = QueryTreeIndex.buildFor(query);
List<Query> nodeList = queryTreeIndex.getNodeList();
// try to find a protected operator, returns error response if more than one protected
// operator is detected
SearchOperator protectedOperator = null;
for (Query node : nodeList) {
if (node instanceof SearchOperator) {
SearchOperator searchOp = (SearchOperator) node;
if (SearchOperatorConstants.PROTECTED.equals(searchOp.getOperand())) {
if (protectedOperator == null) {
protectedOperator = searchOp;
} else {
return createErrorResponse("Only one 'protected' operator is expected.");
Query processedQuery;
if (protectedOperator == null) {
// no protected operator is detected, append '[exclude protected]' by default
processedQuery = QueryNodeUtils.appendAsConjunction(query, EXCLUDE_PROTECTED_OPERATOR);
} else {
// protected operator must be in the query root node
if (queryTreeIndex.getParentOf(protectedOperator) != query) {
return createErrorResponse("'protected' operator must be in the query root node");
// the query node that contains protected operator must not be negated
if (query.mustNotOccur()) {
return createErrorResponse("The query node that contains a 'protected' operator must not"
+ " be negated.");
// the query node that contains protected operator must be a conjunction
if (!query.isTypeOf(Query.QueryType.CONJUNCTION)) {
return createErrorResponse("The query node that contains a 'protected' operator must"
+ " be a conjunction.");
// check the existence of 'followedUserIds' and 'searcherId' if it is a positive operator
if (isPositive(protectedOperator)) {
if (!validateRequestParam(requestContext.getRequest())) {
return createErrorResponse("'followedUserIds' and 'searcherId' are required "
+ "by positive 'protected' operator.");
processedQuery = query;
// update processedQuery if 'getProtectedTweetsOnly' is set to true, it takes precedence over
// the existing protected operators
if (requestContext.getRequest().isGetProtectedTweetsOnly()) {
if (!validateRequestParam(requestContext.getRequest())) {
return createErrorResponse("'followedUserIds' and 'searcherId' are required "
+ "when 'getProtectedTweetsOnly' is set to true.");
try {
processedQuery = processedQuery.accept(dropProtectedOperatorVisitor);
} catch (QueryParserException e) {
// this should not happen since we already have a parsed query
"Failed to drop protected operator for serialized query: " + query.serialize(), e);
processedQuery =
QueryNodeUtils.appendAsConjunction(processedQuery, FILTER_PROTECTED_OPERATOR);
if (processedQuery == query) {
return service.apply(requestContext);
} else {
EarlybirdRequestContext clonedRequestContext =
EarlybirdRequestContext.copyRequestContext(requestContext, processedQuery);
return service.apply(clonedRequestContext);
private boolean validateRequestParam(EarlybirdRequest request) {
List<Long> followedUserIds = request.followedUserIds;
Long searcherId = (request.searchQuery != null && request.searchQuery.isSetSearcherId())
? request.searchQuery.getSearcherId() : null;
return followedUserIds != null && !followedUserIds.isEmpty() && searcherId != null;
private boolean isPositive(SearchOperator searchOp) {
boolean isNegateExclude = searchOp.mustNotOccur()
&& searchOp.getOperatorType() == SearchOperator.Type.EXCLUDE;
boolean isPositive = !searchOp.mustNotOccur()
&& (searchOp.getOperatorType() == SearchOperator.Type.INCLUDE
|| searchOp.getOperatorType() == SearchOperator.Type.FILTER);
return isNegateExclude || isPositive;
private Future<EarlybirdResponse> createErrorResponse(String errorMsg) {
EarlybirdResponse response = new EarlybirdResponse(EarlybirdResponseCode.CLIENT_ERROR, 0);
response.setDebugInfo(new EarlybirdDebugInfo().setHost("full_archive_root"));
return Future.value(response);
Binary file not shown.
@ -1,64 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.Date;
import java.util.concurrent.TimeUnit;
import com.twitter.search.common.decider.SearchDecider;
import com.twitter.search.common.partitioning.snowflakeparser.SnowflakeIdParser;
import com.twitter.search.common.util.date.DateUtil;
import com.twitter.search.earlybird.config.ServingRange;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
public class FullArchiveServingRangeProvider implements ServingRangeProvider {
public static final Date FULL_ARCHIVE_START_DATE = DateUtil.toDate(2006, 3, 21);
private static final int DEFAULT_SERVING_RANGE_BOUNDARY_HOURS_AGO = 48;
private final SearchDecider decider;
private final String deciderKey;
public FullArchiveServingRangeProvider(
SearchDecider decider, String deciderKey) {
this.decider = decider;
this.deciderKey = deciderKey;
public ServingRange getServingRange(
final EarlybirdRequestContext requestContext, boolean useBoundaryOverride) {
return new ServingRange() {
public long getServingRangeSinceId() {
// we use 1 instead of 0, because the since_id operator is inclusive in earlybirds.
return 1L;
public long getServingRangeMaxId() {
long servingRangeEndMillis = TimeUnit.HOURS.toMillis(
? decider.getAvailability(deciderKey)
long boundaryTime = requestContext.getCreatedTimeMillis() - servingRangeEndMillis;
return SnowflakeIdParser.generateValidStatusId(boundaryTime, 0);
public long getServingRangeSinceTimeSecondsFromEpoch() {
return FULL_ARCHIVE_START_DATE.getTime() / 1000;
public long getServingRangeUntilTimeSecondsFromEpoch() {
long servingRangeEndMillis = TimeUnit.HOURS.toMillis(
? decider.getAvailability(deciderKey)
long boundaryTime = requestContext.getCreatedTimeMillis() - servingRangeEndMillis;
return boundaryTime / 1000;
Binary file not shown.
@ -1,66 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import javax.inject.Inject;
import com.google.common.annotations.VisibleForTesting;
import com.twitter.common.util.Clock;
import com.twitter.finagle.Filter;
import com.twitter.finagle.Service;
import com.twitter.search.common.decider.SearchDecider;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.earlybird.common.EarlybirdRequestUtil;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.earlybird_root.common.QueryParsingUtils;
import com.twitter.search.earlybird_root.common.TwitterContextProvider;
import com.twitter.search.queryparser.query.QueryParserException;
import com.twitter.util.Future;
* Creates a new RequestContext from an EarlybirdRequest, and passes the RequestContext down to
* the rest of the filter/service chain.
public class InitializeRequestContextFilter extends
Filter<EarlybirdRequest, EarlybirdResponse, EarlybirdRequestContext, EarlybirdResponse> {
static final SearchCounter FAILED_QUERY_PARSING =
private final SearchDecider decider;
private final TwitterContextProvider twitterContextProvider;
private final Clock clock;
* The constructor of the filter.
public InitializeRequestContextFilter(SearchDecider decider,
TwitterContextProvider twitterContextProvider,
Clock clock) {
this.decider = decider;
this.twitterContextProvider = twitterContextProvider;
this.clock = clock;
public Future<EarlybirdResponse> apply(
EarlybirdRequest request,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
EarlybirdRequestContext requestContext;
try {
requestContext = EarlybirdRequestContext.newContext(
request, decider, twitterContextProvider.get(), clock);
} catch (QueryParserException e) {
return QueryParsingUtils.newClientErrorResponse(request, e);
return service.apply(requestContext);
Binary file not shown.
@ -1,80 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.EnumMap;
import java.util.List;
import java.util.Map;
import com.google.common.annotations.VisibleForTesting;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
import com.twitter.search.earlybird.thrift.ThriftSearchResultExtraMetadata;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
import com.twitter.util.Future;
import com.twitter.util.FutureEventListener;
* Filter tracks the isUserProtected metadata stats returned from Earlybirds.
public class IsUserProtectedMetadataTrackingFilter
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
private static final String COUNTER_PREFIX = "is_user_protected_metadata_count_filter_";
final Map<EarlybirdRequestType, SearchCounter> totalCounterByRequestTypeMap;
final Map<EarlybirdRequestType, SearchCounter> isProtectedCounterByRequestTypeMap;
public IsUserProtectedMetadataTrackingFilter() {
this.totalCounterByRequestTypeMap = new EnumMap<>(EarlybirdRequestType.class);
this.isProtectedCounterByRequestTypeMap = new EnumMap<>(EarlybirdRequestType.class);
for (EarlybirdRequestType requestType : EarlybirdRequestType.values()) {
SearchCounter.export(COUNTER_PREFIX + requestType.getNormalizedName() + "_total"));
SearchCounter.export(COUNTER_PREFIX + requestType.getNormalizedName() + "_is_protected"));
public Future<EarlybirdResponse> apply(
EarlybirdRequestContext request,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
Future<EarlybirdResponse> response = service.apply(request);
EarlybirdRequestType requestType = request.getEarlybirdRequestType();
response.addEventListener(new FutureEventListener<EarlybirdResponse>() {
public void onSuccess(EarlybirdResponse response) {
if (!response.isSetSearchResults() || response.getSearchResults().getResults().isEmpty()) {
List<ThriftSearchResult> searchResults = response.getSearchResults().getResults();
int totalCount = searchResults.size();
int isUserProtectedCount = 0;
for (ThriftSearchResult searchResult : searchResults) {
if (searchResult.isSetMetadata() && searchResult.getMetadata().isSetExtraMetadata()) {
ThriftSearchResultExtraMetadata extraMetadata =
if (extraMetadata.isIsUserProtected()) {
public void onFailure(Throwable cause) { }
return response;
Binary file not shown.
@ -1,49 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
import com.twitter.search.earlybird.thrift.ThriftTweetSource;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
import com.twitter.util.Function;
import com.twitter.util.Future;
public class MarkTweetSourceFilter
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
private final SearchCounter searchResultsNotSet;
private final ThriftTweetSource tweetSource;
public MarkTweetSourceFilter(ThriftTweetSource tweetSource) {
this.tweetSource = tweetSource;
searchResultsNotSet = SearchCounter.export(
tweetSource.name().toLowerCase() + "_mark_tweet_source_filter_search_results_not_set");
public Future<EarlybirdResponse> apply(
final EarlybirdRequestContext requestContext,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
return service.apply(requestContext).map(new Function<EarlybirdResponse, EarlybirdResponse>() {
public EarlybirdResponse apply(EarlybirdResponse response) {
if (response.getResponseCode() == EarlybirdResponseCode.SUCCESS
&& requestContext.getEarlybirdRequestType() != EarlybirdRequestType.TERM_STATS) {
if (!response.isSetSearchResults()) {
} else {
for (ThriftSearchResult searchResult : response.getSearchResults().getResults()) {
return response;
Binary file not shown.
@ -1,119 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.List;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.metrics.SearchMovingAverage;
import com.twitter.search.earlybird.common.ClientIdUtil;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadata;
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
import com.twitter.util.Future;
import com.twitter.util.FutureEventListener;
* Filter that is tracking the engagement stats returned from Earlybirds.
public class MetadataTrackingFilter extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
private static final String SCORING_SIGNAL_STAT_PREFIX = "scoring_signal_";
private static final String SCORE_STAT_PATTERN = "client_id_score_tracker_for_%s_x100";
static final SearchMovingAverage SCORING_SIGNAL_FAV_COUNT =
SearchMovingAverage.export(SCORING_SIGNAL_STAT_PREFIX + "fav_count");
static final SearchMovingAverage SCORING_SIGNAL_REPLY_COUNT =
SearchMovingAverage.export(SCORING_SIGNAL_STAT_PREFIX + "reply_count");
static final SearchMovingAverage SCORING_SIGNAL_RETWEET_COUNT =
SearchMovingAverage.export(SCORING_SIGNAL_STAT_PREFIX + "retweet_count");
static final LoadingCache<String, SearchMovingAverage> CLIENT_SCORE_METRICS_LOADING_CACHE =
CacheBuilder.newBuilder().build(new CacheLoader<String, SearchMovingAverage>() {
public SearchMovingAverage load(String clientId) {
return SearchMovingAverage.export(String.format(SCORE_STAT_PATTERN, clientId));
public Future<EarlybirdResponse> apply(final EarlybirdRequest request,
Service<EarlybirdRequest, EarlybirdResponse> service) {
Future<EarlybirdResponse> response = service.apply(request);
response.addEventListener(new FutureEventListener<EarlybirdResponse>() {
public void onSuccess(EarlybirdResponse earlybirdResponse) {
EarlybirdRequestType type = EarlybirdRequestType.of(request);
if (earlybirdResponse.responseCode == EarlybirdResponseCode.SUCCESS
&& type == EarlybirdRequestType.RELEVANCE
&& earlybirdResponse.isSetSearchResults()
&& earlybirdResponse.getSearchResults().isSetResults()) {
List<ThriftSearchResult> searchResults = earlybirdResponse.getSearchResults()
long totalFavoriteAmount = 0;
long totalReplyAmount = 0;
long totalRetweetAmount = 0;
double totalScoreX100 = 0;
for (ThriftSearchResult result : searchResults) {
if (!result.isSetMetadata()) {
ThriftSearchResultMetadata metadata = result.getMetadata();
if (metadata.isSetFavCount()) {
totalFavoriteAmount += metadata.getFavCount();
if (metadata.isSetReplyCount()) {
totalReplyAmount += metadata.getReplyCount();
if (metadata.isSetRetweetCount()) {
totalRetweetAmount += metadata.getRetweetCount();
if (metadata.isSetScore()) {
// Scale up the score by 100 so that scores are at least 1 and visible on viz graph
totalScoreX100 += metadata.getScore() * 100;
// We only count present engagement counts but report the full size of the search results.
// This means that we consider the missing counts as being 0.
SCORING_SIGNAL_FAV_COUNT.addSamples(totalFavoriteAmount, searchResults.size());
SCORING_SIGNAL_REPLY_COUNT.addSamples(totalReplyAmount, searchResults.size());
SCORING_SIGNAL_RETWEET_COUNT.addSamples(totalRetweetAmount, searchResults.size());
// Export per client id average scores.
String requestClientId = ClientIdUtil.getClientIdFromRequest(request);
String quotaClientId = ClientIdUtil.getQuotaClientId(requestClientId);
.addSamples((long) totalScoreX100, searchResults.size());
public void onFailure(Throwable cause) { }
return response;
Binary file not shown.
@ -1,45 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.metrics.Percentile;
import com.twitter.search.common.metrics.PercentileUtil;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.util.Future;
public class NamedMultiTermDisjunctionStatsFilter extends
SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
private static final String STAT_FORMAT = "named_disjunction_size_client_%s_key_%s";
// ClientID -> disjunction name -> operand count
private static final ConcurrentMap<String, ConcurrentMap<String, Percentile<Integer>>>
public Future<EarlybirdResponse> apply(EarlybirdRequest request,
Service<EarlybirdRequest, EarlybirdResponse> service) {
if (request.getSearchQuery().isSetNamedDisjunctionMap()) {
for (Map.Entry<String, List<Long>> entry
: request.getSearchQuery().getNamedDisjunctionMap().entrySet()) {
Map<String, Percentile<Integer>> statsForClient =
request.getClientId(), clientId -> new ConcurrentHashMap<>());
Percentile<Integer> stats = statsForClient.computeIfAbsent(entry.getKey(),
keyName -> PercentileUtil.createPercentile(
String.format(STAT_FORMAT, request.getClientId(), keyName)));
return service.apply(request);
Binary file not shown.
@ -1,81 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.HashSet;
import java.util.Set;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.util.earlybird.EarlybirdResponseUtil;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.queryparser.query.search.SearchOperatorConstants;
import com.twitter.search.queryparser.visitors.DetectPositiveOperatorVisitor;
* Filter that is tracking the unexpected nullcast results from Earlybirds.
public class NullcastTrackingFilter extends SensitiveResultsTrackingFilter {
public NullcastTrackingFilter() {
super("unexpected nullcast tweets", true);
private static final Logger LOG = LoggerFactory.getLogger(NullcastTrackingFilter.class);
static final SearchCounter BAD_NULLCAST_QUERY_COUNT =
static final SearchCounter BAD_NULLCAST_RESULT_COUNT =
protected Logger getLogger() {
return LOG;
protected SearchCounter getSensitiveQueryCounter() {
protected SearchCounter getSensitiveResultsCounter() {
protected Set<Long> getSensitiveResults(EarlybirdRequestContext requestContext,
EarlybirdResponse earlybirdResponse) throws Exception {
if (!requestContext.getParsedQuery().accept(
new DetectPositiveOperatorVisitor(SearchOperatorConstants.NULLCAST))) {
return EarlybirdResponseUtil.findUnexpectedNullcastStatusIds(
earlybirdResponse.getSearchResults(), requestContext.getRequest());
} else {
return new HashSet<>();
* Some Earlybird requests are not searches, instead, they are scoring requests.
* These requests supply a list of IDs to be scored.
* It is OK to return nullcast tweet result if the ID is supplied in the request.
* This extracts the scoring request tweet IDs.
protected Set<Long> getExceptedResults(EarlybirdRequestContext requestContext) {
EarlybirdRequest request = requestContext.getRequest();
if (request == null
|| !request.isSetSearchQuery()
|| request.getSearchQuery().getSearchStatusIdsSize() == 0) {
return ImmutableSet.of();
return request.getSearchQuery().getSearchStatusIds();
Binary file not shown.
@ -1,10 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import javax.inject.Inject;
public class PostCacheRequestTypeCountFilter extends RequestTypeCountFilter {
public PostCacheRequestTypeCountFilter() {
Binary file not shown.
@ -1,10 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import javax.inject.Inject;
public class PreCacheRequestTypeCountFilter extends RequestTypeCountFilter {
public PreCacheRequestTypeCountFilter() {
Binary file not shown.
@ -1,114 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import javax.inject.Inject;
import javax.inject.Singleton;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.twitter.common.text.language.LocaleUtil;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.constants.thriftjava.ThriftLanguage;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.util.lang.ThriftLanguageUtil;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
* Export stats for query languages.
public class QueryLangStatFilter
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
public static class Config {
// We put a limit here in case an error in the client are sending us random lang codes.
private int maxNumberOfLangs;
public Config(int maxNumberOfLangs) {
this.maxNumberOfLangs = maxNumberOfLangs;
public int getMaxNumberOfLangs() {
return maxNumberOfLangs;
protected static final String LANG_STATS_PREFIX = "num_queries_in_lang_";
private final Config config;
private final SearchCounter allCountsForLangsOverMaxNumLang =
SearchCounter.export(LANG_STATS_PREFIX + "overflow");
private final ConcurrentHashMap<String, SearchCounter> langCounters =
new ConcurrentHashMap<>();
public QueryLangStatFilter(Config config) {
this.config = config;
private SearchCounter getCounter(String lang) {
SearchCounter counter = langCounters.get(lang);
if (counter == null) {
if (langCounters.size() >= config.getMaxNumberOfLangs()) {
return allCountsForLangsOverMaxNumLang;
synchronized (langCounters) { // This double-checked locking is safe,
// since we're using a ConcurrentHashMap
counter = langCounters.get(lang);
if (counter == null) {
counter = SearchCounter.export(LANG_STATS_PREFIX + lang);
langCounters.put(lang, counter);
return counter;
public Future<EarlybirdResponse> apply(
EarlybirdRequestContext requestContext,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
String lang = null;
ThriftSearchQuery searchQuery = requestContext.getRequest().getSearchQuery();
lang = searchQuery.getQueryLang();
if (lang == null) {
// fallback to ui lang
lang = searchQuery.getUiLang();
if (lang == null && searchQuery.isSetUserLangs()) {
// fallback to the user lang with the highest confidence
double maxConfidence = Double.MIN_VALUE;
for (Map.Entry<ThriftLanguage, Double> entry : searchQuery.getUserLangs().entrySet()) {
if (entry.getValue() > maxConfidence) {
lang = ThriftLanguageUtil.getLanguageCodeOf(entry.getKey());
maxConfidence = entry.getValue();
if (lang == null) {
return service.apply(requestContext);
Binary file not shown.
@ -1,194 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.EnumSet;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import scala.runtime.BoxedUnit;
import com.google.common.collect.ImmutableMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.metrics.SearchTimer;
import com.twitter.search.common.metrics.SearchTimerStats;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.queryparser.query.Query;
import com.twitter.search.queryparser.query.QueryParserException;
import com.twitter.search.queryparser.query.annotation.Annotation;
import com.twitter.search.queryparser.query.search.SearchOperator;
import com.twitter.search.queryparser.query.search.SearchOperatorConstants;
import com.twitter.search.queryparser.visitors.DetectAnnotationVisitor;
import com.twitter.search.queryparser.visitors.DetectVisitor;
import com.twitter.util.Future;
* For a given query, increments counters if that query has a number of search operators or
* annotations applied to it. Used to detect unusual traffic patterns.
public class QueryOperatorStatFilter
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
private static final Logger LOG = LoggerFactory.getLogger(QueryOperatorStatFilter.class);
private final SearchCounter numQueryOperatorDetectionErrors =
private final SearchCounter numQueryOperatorConsideredRequests =
private final ImmutableMap<String, SearchTimerStats> filterOperatorStats;
// Keeps track of the number of queries with a filter applied, whose type we don't care about.
private final SearchCounter numUnknownFilterOperatorRequests =
private final ImmutableMap<String, SearchTimerStats> includeOperatorStats;
// Keeps track of the number of queries with an include operator applied, whose type we don't
// know about.
private final SearchCounter numUnknownIncludeOperatorRequests =
private final ImmutableMap<SearchOperator.Type, SearchTimerStats> operatorTypeStats;
private final SearchCounter numVariantRequests =
* Construct this QueryOperatorStatFilter by getting the complete set of possible filters a query
* might have and associating each with a counter.
public QueryOperatorStatFilter() {
ImmutableMap.Builder<String, SearchTimerStats> filterBuilder = new ImmutableMap.Builder<>();
for (String operand : SearchOperatorConstants.VALID_FILTER_OPERANDS) {
"query_operator_filter_" + operand + "_requests",
filterOperatorStats = filterBuilder.build();
ImmutableMap.Builder<String, SearchTimerStats> includeBuilder = new ImmutableMap.Builder<>();
for (String operand : SearchOperatorConstants.VALID_INCLUDE_OPERANDS) {
"query_operator_include_" + operand + "_requests",
includeOperatorStats = includeBuilder.build();
ImmutableMap.Builder<SearchOperator.Type, SearchTimerStats> operatorBuilder =
new ImmutableMap.Builder<>();
for (SearchOperator.Type operatorType : SearchOperator.Type.values()) {
"query_operator_" + operatorType.name().toLowerCase() + "_requests",
operatorTypeStats = operatorBuilder.build();
public Future<EarlybirdResponse> apply(
EarlybirdRequestContext requestContext,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
Query parsedQuery = requestContext.getParsedQuery();
if (parsedQuery == null) {
return service.apply(requestContext);
SearchTimer timer = new SearchTimer();
return service.apply(requestContext).ensure(() -> {
try {
updateTimersForOperatorsAndOperands(parsedQuery, timer);
} catch (QueryParserException e) {
LOG.warn("Unable to test if query has operators defined", e);
return BoxedUnit.UNIT;
* Tracks request stats for operators and operands.
* @param parsedQuery the query to check.
private void updateTimersForOperatorsAndOperands(Query parsedQuery, SearchTimer timer)
throws QueryParserException {
final DetectVisitor detectVisitor = new DetectVisitor(false, SearchOperator.Type.values());
Set<SearchOperator.Type> detectedOperatorTypes = EnumSet.noneOf(SearchOperator.Type.class);
for (Query query : detectVisitor.getDetectedQueries()) {
// This detectVisitor only matches on SearchOperators.
SearchOperator operator = (SearchOperator) query;
SearchOperator.Type operatorType = operator.getOperatorType();
if (operatorType == SearchOperator.Type.INCLUDE) {
if (operatorType == SearchOperator.Type.FILTER) {
for (SearchOperator.Type type : detectedOperatorTypes) {
private void updateOperandStats(
SearchOperator operator,
ImmutableMap<String, SearchTimerStats> operandRequestStats,
SearchTimer timer,
SearchCounter unknownOperandStat) {
String operand = operator.getOperand();
SearchTimerStats stats = operandRequestStats.get(operand);
if (stats != null) {
} else {
private void updateCountersIfVariantAnnotation(Query parsedQuery) throws QueryParserException {
DetectAnnotationVisitor visitor = new DetectAnnotationVisitor(Annotation.Type.VARIANT);
if (parsedQuery.accept(visitor)) {
Binary file not shown.
@ -1,92 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.concurrent.TimeUnit;
import javax.inject.Inject;
import com.twitter.common_internal.text.version.PenguinVersion;
import com.twitter.common_internal.text.version.PenguinVersionConfig;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.finagle.tracing.Trace;
import com.twitter.finagle.tracing.Tracing;
import com.twitter.search.common.metrics.SearchRateCounter;
import com.twitter.search.common.metrics.SearchTimer;
import com.twitter.search.common.metrics.SearchTimerStats;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.earlybird_root.common.QueryParsingUtils;
import com.twitter.search.queryparser.parser.SerializedQueryParser;
import com.twitter.search.queryparser.parser.SerializedQueryParser.TokenizationOption;
import com.twitter.search.queryparser.query.Query;
import com.twitter.search.queryparser.query.QueryParserException;
import com.twitter.util.Duration;
import com.twitter.util.Future;
public class QueryTokenizerFilter extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
private static final String PREFIX = "query_tokenizer_";
private static final SearchRateCounter SUCCESS_COUNTER =
SearchRateCounter.export(PREFIX + "success");
private static final SearchRateCounter FAILURE_COUNTER =
SearchRateCounter.export(PREFIX + "error");
private static final SearchRateCounter SKIPPED_COUNTER =
SearchRateCounter.export(PREFIX + "skipped");
private static final SearchTimerStats QUERY_TOKENIZER_TIME =
SearchTimerStats.export(PREFIX + "time", TimeUnit.MILLISECONDS, false);
private final TokenizationOption tokenizationOption;
public QueryTokenizerFilter(PenguinVersionConfig penguinversions) {
PenguinVersion[] supportedVersions = penguinversions
.getSupportedVersions().toArray(new PenguinVersion[0]);
tokenizationOption = new TokenizationOption(true, supportedVersions);
public Future<EarlybirdResponse> apply(
EarlybirdRequestContext requestContext,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
if (!requestContext.getRequest().isRetokenizeSerializedQuery()
|| !requestContext.getRequest().isSetSearchQuery()
|| !requestContext.getRequest().getSearchQuery().isSetSerializedQuery()) {
return service.apply(requestContext);
SearchTimer timer = QUERY_TOKENIZER_TIME.startNewTimer();
try {
String serializedQuery = requestContext.getRequest().getSearchQuery().getSerializedQuery();
Query parsedQuery = reparseQuery(serializedQuery);
return service.apply(EarlybirdRequestContext.copyRequestContext(requestContext, parsedQuery));
} catch (QueryParserException e) {
return QueryParsingUtils.newClientErrorResponse(requestContext.getRequest(), e);
} finally {
long elapsed = timer.stop();
Tracing trace = Trace.apply();
if (trace.isActivelyTracing()) {
trace.record(PREFIX + "time", Duration.fromMilliseconds(elapsed));
public Query reparseQuery(String serializedQuery) throws QueryParserException {
SerializedQueryParser parser = new SerializedQueryParser(tokenizationOption);
return parser.parse(serializedQuery);
* Initializing the query parser can take many seconds. We initialize it at warmup so that
* requests don't time out after we join the serverset. SEARCH-28801
public void performExpensiveInitialization() throws QueryParserException {
SerializedQueryParser queryParser = new SerializedQueryParser(tokenizationOption);
// The Korean query parser takes a few seconds on it's own to initialize.
String koreanQuery = "스포츠";
Binary file not shown.
@ -1,60 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.concurrent.TimeUnit;
import com.twitter.search.common.decider.SearchDecider;
import com.twitter.search.common.partitioning.snowflakeparser.SnowflakeIdParser;
import com.twitter.search.earlybird.config.ServingRange;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
public class RealtimeServingRangeProvider implements ServingRangeProvider {
private static final int DEFAULT_SERVING_RANGE_BOUNDARY_HOURS_AGO = 240;
private final SearchDecider decider;
private final String deciderKey;
public RealtimeServingRangeProvider(SearchDecider decider, String deciderKey) {
this.decider = decider;
this.deciderKey = deciderKey;
public ServingRange getServingRange(
final EarlybirdRequestContext requestContext, boolean useBoundaryOverride) {
return new ServingRange() {
public long getServingRangeSinceId() {
long servingRangeStartMillis = TimeUnit.HOURS.toMillis(
? decider.getAvailability(deciderKey)
long boundaryTime = requestContext.getCreatedTimeMillis() - servingRangeStartMillis;
return SnowflakeIdParser.generateValidStatusId(boundaryTime, 0);
public long getServingRangeMaxId() {
return SnowflakeIdParser.generateValidStatusId(
requestContext.getCreatedTimeMillis(), 0);
public long getServingRangeSinceTimeSecondsFromEpoch() {
long servingRangeStartMillis = TimeUnit.HOURS.toMillis(
? decider.getAvailability(deciderKey)
long boundaryTime = requestContext.getCreatedTimeMillis() - servingRangeStartMillis;
return boundaryTime / 1000;
public long getServingRangeUntilTimeSecondsFromEpoch() {
return requestContext.getCreatedTimeMillis() / 1000;
Binary file not shown.
@ -1,94 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.HashMap;
import java.util.Map;
import javax.annotation.Nullable;
import javax.inject.Inject;
import com.google.common.annotations.VisibleForTesting;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.constants.thriftjava.ThriftQuerySource;
import com.twitter.search.common.decider.SearchDecider;
import com.twitter.search.common.metrics.SearchRateCounter;
import com.twitter.search.common.schema.earlybird.EarlybirdCluster;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.util.Future;
* Rejects requests based on the query source of the request. Intended to be used at super-root
* or archive-root. If used to reject client request at super-root, the client will get a response
* with empty results and a REQUEST_BLOCKED_ERROR status code. If used at archive-root the client
* will get a response which might contain some results from realtime and protected and the status
* code of the response will depend on how super-root combines responses from the three downstream
* roots.
public class RejectRequestsByQuerySourceFilter extends
SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
protected static final String NUM_REJECTED_REQUESTS_STAT_NAME_PATTERN =
protected static final String REJECT_REQUESTS_DECIDER_KEY_PATTERN =
private final Map<ThriftQuerySource, SearchRateCounter> rejectedRequestsCounterPerQuerySource =
new HashMap<>();
private final Map<ThriftQuerySource, String> rejectRequestsDeciderKeyPerQuerySource =
new HashMap<>();
private final SearchDecider searchDecider;
public RejectRequestsByQuerySourceFilter(
@Nullable EarlybirdCluster cluster,
SearchDecider searchDecider) {
this.searchDecider = searchDecider;
String clusterName = cluster != null
? cluster.getNameForStats()
: EarlybirdCluster.SUPERROOT.getNameForStats();
for (ThriftQuerySource querySource : ThriftQuerySource.values()) {
String querySourceName = querySource.name().toLowerCase();
REJECT_REQUESTS_DECIDER_KEY_PATTERN, clusterName, querySourceName));
public Future<EarlybirdResponse> apply(EarlybirdRequest request,
Service<EarlybirdRequest, EarlybirdResponse> service) {
ThriftQuerySource querySource = request.isSetQuerySource()
? request.getQuerySource()
: ThriftQuerySource.UNKNOWN;
String deciderKey = rejectRequestsDeciderKeyPerQuerySource.get(querySource);
if (searchDecider.isAvailable(deciderKey)) {
return Future.value(getRejectedRequestResponse(querySource, deciderKey));
return service.apply(request);
private static EarlybirdResponse getRejectedRequestResponse(
ThriftQuerySource querySource, String deciderKey) {
return new EarlybirdResponse(EarlybirdResponseCode.REQUEST_BLOCKED_ERROR, 0)
.setSearchResults(new ThriftSearchResults())
"Request with query source %s is blocked by decider %s", querySource, deciderKey));
Binary file not shown.
@ -1,33 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.concurrent.TimeUnit;
import com.twitter.finagle.Filter;
import com.twitter.finagle.Service;
import com.twitter.search.common.metrics.SearchTimerStats;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
* A filter for transforming a RequestContext to an EarlybirdRequest.
public class RequestContextToEarlybirdRequestFilter extends
Filter<EarlybirdRequestContext, EarlybirdResponse, EarlybirdRequest, EarlybirdResponse> {
private static final SearchTimerStats REQUEST_CONTEXT_TRIP_TIME =
SearchTimerStats.export("request_context_trip_time", TimeUnit.MILLISECONDS, false,
public Future<EarlybirdResponse> apply(
EarlybirdRequestContext requestContext,
Service<EarlybirdRequest, EarlybirdResponse> service) {
long tripTime = System.currentTimeMillis() - requestContext.getCreatedTimeMillis();
return service.apply(requestContext.getRequest());
Binary file not shown.
@ -1,185 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import javax.inject.Inject;
import scala.runtime.BoxedUnit;
import com.twitter.common.util.Clock;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.metrics.Percentile;
import com.twitter.search.common.metrics.PercentileUtil;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.query.thriftjava.CollectorParams;
import com.twitter.search.common.query.thriftjava.CollectorTerminationParams;
import com.twitter.search.earlybird.common.ClientIdUtil;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.snowflake.id.SnowflakeId;
import com.twitter.util.Function;
import com.twitter.util.Future;
public class RequestResultStatsFilter
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
private final Clock clock;
private final RequestResultStats stats;
static class RequestResultStats {
private static final String PREFIX = "request_result_properties_";
private final SearchCounter resultsRequestedCount;
private final SearchCounter resultsReturnedCount;
private final SearchCounter maxHitsToProcessCount;
private final SearchCounter hitsProcessedCount;
private final SearchCounter docsProcessedCount;
private final SearchCounter timeoutMsCount;
private Map<String, Percentile<Integer>> requestedNumResultsPercentileByClientId;
private Map<String, Percentile<Integer>> returnedNumResultsPercentileByClientId;
private Map<String, Percentile<Long>> oldestResultPercentileByClientId;
RequestResultStats() {
// Request properties
resultsRequestedCount = SearchCounter.export(PREFIX + "results_requested_cnt");
maxHitsToProcessCount = SearchCounter.export(PREFIX + "max_hits_to_process_cnt");
timeoutMsCount = SearchCounter.export(PREFIX + "timeout_ms_cnt");
requestedNumResultsPercentileByClientId = new ConcurrentHashMap<>();
// Result properties
resultsReturnedCount = SearchCounter.export(PREFIX + "results_returned_cnt");
hitsProcessedCount = SearchCounter.export(PREFIX + "hits_processed_cnt");
docsProcessedCount = SearchCounter.export(PREFIX + "docs_processed_cnt");
returnedNumResultsPercentileByClientId = new ConcurrentHashMap<>();
oldestResultPercentileByClientId = new ConcurrentHashMap<>();
SearchCounter getResultsRequestedCount() {
return resultsRequestedCount;
SearchCounter getResultsReturnedCount() {
return resultsReturnedCount;
SearchCounter getMaxHitsToProcessCount() {
return maxHitsToProcessCount;
SearchCounter getHitsProcessedCount() {
return hitsProcessedCount;
SearchCounter getDocsProcessedCount() {
return docsProcessedCount;
SearchCounter getTimeoutMsCount() {
return timeoutMsCount;
Percentile<Long> getOldestResultPercentile(String clientId) {
return oldestResultPercentileByClientId.computeIfAbsent(clientId,
key -> PercentileUtil.createPercentile(statName(clientId, "oldest_result_age_seconds")));
Percentile<Integer> getRequestedNumResultsPercentile(String clientId) {
return requestedNumResultsPercentileByClientId.computeIfAbsent(clientId,
key -> PercentileUtil.createPercentile(statName(clientId, "requested_num_results")));
Percentile<Integer> getReturnedNumResultsPercentile(String clientId) {
return returnedNumResultsPercentileByClientId.computeIfAbsent(clientId,
key -> PercentileUtil.createPercentile(statName(clientId, "returned_num_results")));
private String statName(String clientId, String suffix) {
return String.format("%s%s_%s", PREFIX, ClientIdUtil.formatClientId(clientId), suffix);
RequestResultStatsFilter(Clock clock, RequestResultStats stats) {
this.clock = clock;
this.stats = stats;
private void updateRequestStats(EarlybirdRequest request) {
ThriftSearchQuery searchQuery = request.getSearchQuery();
CollectorParams collectorParams = searchQuery.getCollectorParams();
if (collectorParams != null) {
if (request.isSetClientId()) {
CollectorTerminationParams terminationParams = collectorParams.getTerminationParams();
if (terminationParams != null) {
if (terminationParams.isSetMaxHitsToProcess()) {
if (terminationParams.isSetTimeoutMs()) {
} else {
if (searchQuery.isSetNumResults()) {
if (request.isSetClientId()) {
if (searchQuery.isSetMaxHitsToProcess()) {
if (request.isSetTimeoutMs()) {
private void updateResultsStats(String clientId, ThriftSearchResults results) {
if (results.isSetNumHitsProcessed()) {
if (clientId != null) {
if (results.getResultsSize() > 0) {
List<ThriftSearchResult> resultsList = results.getResults();
long lastId = resultsList.get(resultsList.size() - 1).getId();
long tweetTime = SnowflakeId.timeFromId(lastId).inLongSeconds();
long tweetAge = (clock.nowMillis() / 1000) - tweetTime;
public Future<EarlybirdResponse> apply(
EarlybirdRequest request,
Service<EarlybirdRequest, EarlybirdResponse> service) {
return service.apply(request).onSuccess(
new Function<EarlybirdResponse, BoxedUnit>() {
public BoxedUnit apply(EarlybirdResponse response) {
if (response.isSetSearchResults()) {
updateResultsStats(request.getClientId(), response.searchResults);
return BoxedUnit.UNIT;
Binary file not shown.
@ -1,79 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.concurrent.TimeUnit;
import javax.inject.Inject;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.root.RequestSuccessStats;
import com.twitter.search.common.util.FinagleUtil;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.util.Future;
import com.twitter.util.FutureEventListener;
import static com.twitter.search.common.util.earlybird.EarlybirdResponseUtil.responseConsideredFailed;
* Records cancellations, timeouts, and failures for requests that do not go through
* ScatterGatherService (which also updates these stats, but for different requests).
public class RequestSuccessStatsFilter
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
private final RequestSuccessStats stats;
RequestSuccessStatsFilter(RequestSuccessStats stats) {
this.stats = stats;
public Future<EarlybirdResponse> apply(
EarlybirdRequest request,
Service<EarlybirdRequest, EarlybirdResponse> service) {
final long startTime = System.nanoTime();
return service.apply(request).addEventListener(
new FutureEventListener<EarlybirdResponse>() {
public void onSuccess(EarlybirdResponse response) {
boolean success = true;
if (response.getResponseCode() == EarlybirdResponseCode.CLIENT_CANCEL_ERROR) {
success = false;
} else if (response.getResponseCode() == EarlybirdResponseCode.SERVER_TIMEOUT_ERROR) {
success = false;
} else if (responseConsideredFailed(response.getResponseCode())) {
success = false;
long latencyNanos = System.nanoTime() - startTime;
TimeUnit.NANOSECONDS.toMillis(latencyNanos), 0, success);
public void onFailure(Throwable cause) {
long latencyNanos = System.nanoTime() - startTime;
TimeUnit.NANOSECONDS.toMillis(latencyNanos), 0, false);
if (FinagleUtil.isCancelException(cause)) {
} else if (FinagleUtil.isTimeoutException(cause)) {
} else {
Binary file not shown.
@ -1,105 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import com.google.common.base.Preconditions;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.ImmutableMap;
import com.twitter.common.util.Clock;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.clientstats.RequestCounters;
import com.twitter.search.common.clientstats.RequestCountersEventListener;
import com.twitter.search.common.util.FinagleUtil;
import com.twitter.search.earlybird.common.ClientIdUtil;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
import com.twitter.util.Future;
public class RequestTypeCountFilter
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
private final ImmutableMap<EarlybirdRequestType, RequestCounters> typeCounters;
private final RequestCounters allRequestTypesCounter;
private final ImmutableMap<EarlybirdRequestType, LoadingCache<String, RequestCounters>>
* Constructs the filter.
public RequestTypeCountFilter(final String statSuffix) {
ImmutableMap.Builder<EarlybirdRequestType, RequestCounters> perTypeBuilder =
for (EarlybirdRequestType type : EarlybirdRequestType.values()) {
perTypeBuilder.put(type, new RequestCounters(
"request_type_count_filter_" + type.getNormalizedName() + "_" + statSuffix));
typeCounters = perTypeBuilder.build();
allRequestTypesCounter =
new RequestCounters("request_type_count_filter_all_" + statSuffix, true);
ImmutableMap.Builder<EarlybirdRequestType, LoadingCache<String, RequestCounters>>
perTypePerClientBuilder = ImmutableMap.builder();
// No point in setting any kind of expiration policy for the cache, since the stats will
// continue to be exported, so the objects will not be GCed anyway.
CacheBuilder<Object, Object> cacheBuilder = CacheBuilder.newBuilder();
for (final EarlybirdRequestType requestType : EarlybirdRequestType.values()) {
CacheLoader<String, RequestCounters> cacheLoader =
new CacheLoader<String, RequestCounters>() {
public RequestCounters load(String clientId) {
return new RequestCounters("request_type_count_filter_for_" + clientId + "_"
+ requestType.getNormalizedName() + "_" + statSuffix);
perTypePerClientBuilder.put(requestType, cacheBuilder.build(cacheLoader));
perTypePerClientCounters = perTypePerClientBuilder.build();
public Future<EarlybirdResponse> apply(
EarlybirdRequestContext requestContext,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
EarlybirdRequestType requestType = requestContext.getEarlybirdRequestType();
RequestCounters requestCounters = typeCounters.get(requestType);
// Update the per-type and "all" counters.
RequestCountersEventListener<EarlybirdResponse> requestCountersEventListener =
new RequestCountersEventListener<>(
requestCounters, Clock.SYSTEM_CLOCK, EarlybirdSuccessfulResponseHandler.INSTANCE);
RequestCountersEventListener<EarlybirdResponse> allRequestTypesEventListener =
new RequestCountersEventListener<>(
allRequestTypesCounter, Clock.SYSTEM_CLOCK,
RequestCountersEventListener<EarlybirdResponse> perTypePerClientEventListener =
return service.apply(requestContext)
private RequestCountersEventListener<EarlybirdResponse> updatePerTypePerClientCountersListener(
EarlybirdRequestContext earlybirdRequestContext) {
EarlybirdRequestType requestType = earlybirdRequestContext.getEarlybirdRequestType();
LoadingCache<String, RequestCounters> perClientCounters =
String clientId = ClientIdUtil.formatFinagleClientIdAndClientId(
RequestCounters clientCounters = perClientCounters.getUnchecked(clientId);
return new RequestCountersEventListener<>(
clientCounters, Clock.SYSTEM_CLOCK, EarlybirdSuccessfulResponseHandler.INSTANCE);
Binary file not shown.
@ -1,50 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.Map;
import com.google.common.collect.Maps;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.util.Future;
import com.twitter.util.FutureEventListener;
public class ResponseCodeStatFilter
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
private final Map<EarlybirdResponseCode, SearchCounter> responseCodeCounters;
* Create ResponseCodeStatFilter
public ResponseCodeStatFilter() {
responseCodeCounters = Maps.newEnumMap(EarlybirdResponseCode.class);
for (EarlybirdResponseCode code : EarlybirdResponseCode.values()) {
SearchCounter stat = SearchCounter.export("response_code_" + code.name().toLowerCase());
responseCodeCounters.put(code, stat);
public Future<EarlybirdResponse> apply(
final EarlybirdRequest request,
final Service<EarlybirdRequest, EarlybirdResponse> service) {
return service.apply(request).addEventListener(
new FutureEventListener<EarlybirdResponse>() {
public void onSuccess(final EarlybirdResponse response) {
public void onFailure(final Throwable cause) { }
Binary file not shown.
@ -1,114 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.NavigableMap;
import javax.inject.Inject;
import javax.inject.Singleton;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableSortedMap;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.metrics.SearchCustomGauge;
import com.twitter.search.earlybird.config.TierInfo;
import com.twitter.search.earlybird.config.TierInfoSource;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.snowflake.id.SnowflakeId;
import com.twitter.util.Future;
import com.twitter.util.FutureEventListener;
* A filter to count the tier to which the oldest tweet in the results belong.
public class ResultTierCountFilter
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
private static final String COUNTER_PREFIX = "result_tier_count";
private final long firstTweetTimeSinceEpochSec;
private final NavigableMap<Long, SearchCounter> tierBuckets;
private final SearchCounter allCounter = SearchCounter.export(COUNTER_PREFIX + "_all");
private final SearchCounter noResultsCounter =
SearchCounter.export(COUNTER_PREFIX + "_no_results");
ResultTierCountFilter(TierInfoSource tierInfoSource) {
List<TierInfo> tierInfos = tierInfoSource.getTierInformation();
firstTweetTimeSinceEpochSec = tierInfos.get(0).getServingRangeSinceTimeSecondsFromEpoch();
ImmutableSortedMap.Builder<Long, SearchCounter> builder = ImmutableSortedMap.naturalOrder();
for (TierInfo tierInfo : tierInfos) {
SearchCounter searchCounter = SearchCounter.export(
String.format("%s_%s", COUNTER_PREFIX, tierInfo.getTierName()));
builder.put(tierInfo.getServingRangeSinceTimeSecondsFromEpoch(), searchCounter);
// export cumulative metrics to sum from the latest to a lower tier
Collection<SearchCounter> counters = builder.build().values();
String.format("%s_down_to_%s", COUNTER_PREFIX, tierInfo.getTierName()),
() -> counters.stream()
tierBuckets = builder.build();
public Future<EarlybirdResponse> apply(
EarlybirdRequestContext context,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
return service.apply(context).addEventListener(
new FutureEventListener<EarlybirdResponse>() {
public void onFailure(Throwable cause) {
// do nothing
public void onSuccess(EarlybirdResponse response) {
void record(EarlybirdResponse response) {
if (response.isSetSearchResults()) {
long minResultsStatusId = response.getSearchResults().getResults().stream()
private SearchCounter getBucket(long statusId) {
if (statusId < 0) {
return noResultsCounter;
// If non-negative statusId is not a SnowflakeId, the tweet must have been created before
// Twepoch (2010-11-04T01:42:54Z) and thus belongs to full1.
long timeSinceEpochSec = firstTweetTimeSinceEpochSec;
if (SnowflakeId.isSnowflakeId(statusId)) {
timeSinceEpochSec = SnowflakeId.timeFromId(statusId).inSeconds();
return tierBuckets.floorEntry(timeSinceEpochSec).getValue();
Binary file not shown.
@ -1,59 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.finagle.Service;
import com.twitter.search.common.root.ScatterGatherService;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.ExperimentCluster;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
public class ScatterGatherWithExperimentRedirectsService
extends Service<EarlybirdRequestContext, EarlybirdResponse> {
private final Service<EarlybirdRequestContext, EarlybirdResponse>
private final Map<ExperimentCluster,
ScatterGatherService<EarlybirdRequestContext, EarlybirdResponse>>
private static final Logger LOG =
public ScatterGatherWithExperimentRedirectsService(
Service<EarlybirdRequestContext, EarlybirdResponse> controlScatterGatherService,
ScatterGatherService<EarlybirdRequestContext, EarlybirdResponse>>
) {
this.controlScatterGatherService = controlScatterGatherService;
this.experimentScatterGatherServices = experimentScatterGatherServices;
public Future<EarlybirdResponse> apply(EarlybirdRequestContext request) {
if (request.getRequest().isSetExperimentClusterToUse()) {
ExperimentCluster cluster = request.getRequest().getExperimentClusterToUse();
if (!experimentScatterGatherServices.containsKey(cluster)) {
String error = String.format(
"Received invalid experiment cluster: %s", cluster.name());
LOG.error("{} Request: {}", error, request.getRequest());
return Future.value(new EarlybirdResponse()
return experimentScatterGatherServices.get(cluster).apply(request);
return controlScatterGatherService.apply(request);
Binary file not shown.
@ -1,43 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.concurrent.atomic.AtomicReference;
import scala.Option;
import com.google.common.base.Preconditions;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.finagle.context.Contexts;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.root.SearchPayloadSizeFilter;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.util.Future;
* A filter that sets the clientId in the local context, to be usd later by SearchPayloadSizeFilter.
public class SearchPayloadSizeLocalContextFilter
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
private static final SearchCounter CLIENT_ID_CONTEXT_KEY_NOT_SET_COUNTER = SearchCounter.export(
public Future<EarlybirdResponse> apply(EarlybirdRequest request,
Service<EarlybirdRequest, EarlybirdResponse> service) {
// In production, the SearchPayloadSizeFilter.CLIENT_ID_CONTEXT_KEY should always be set
// (by ThriftServer). However, it's not set in tests, because tests do not start a ThriftServer.
Option<AtomicReference<String>> clientIdOption =
if (clientIdOption.isDefined()) {
AtomicReference<String> clientIdReference = clientIdOption.get();
Preconditions.checkArgument(clientIdReference.get() == null);
} else {
return service.apply(request);
Binary file not shown.
@ -1,140 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.Set;
import com.google.common.base.Joiner;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.util.thrift.ThriftUtils;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
import com.twitter.util.FutureEventListener;
* The general framework for earlybird root to track sensitive results.
public abstract class SensitiveResultsTrackingFilter
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
* The type name is used to distinguish different kinds of sensitive results in log.
private final String typeName;
* The mark is to control whether to log expensive information.
private final boolean logDetails;
* Constructor helps distinguish different sensitive content trackers.
* @param typeName The sensitive content's name (e.g. nullcast)
* @param logDetails Whether to log details such as serialized requests and responses
public SensitiveResultsTrackingFilter(final String typeName, boolean logDetails) {
this.typeName = typeName;
this.logDetails = logDetails;
* Get the LOG that the sensitive results can write to.
protected abstract Logger getLogger();
* The counter which counts the number of queries with sensitive results.
protected abstract SearchCounter getSensitiveQueryCounter();
* The counter which counts the number of sensitive results.
protected abstract SearchCounter getSensitiveResultsCounter();
* The method defines how the sensitive results are identified.
protected abstract Set<Long> getSensitiveResults(
EarlybirdRequestContext requestContext,
EarlybirdResponse earlybirdResponse) throws Exception;
* Get a set of tweets which should be exclude from the sensitive results set.
protected abstract Set<Long> getExceptedResults(EarlybirdRequestContext requestContext);
public final Future<EarlybirdResponse> apply(
final EarlybirdRequestContext requestContext,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
Future<EarlybirdResponse> response = service.apply(requestContext);
response.addEventListener(new FutureEventListener<EarlybirdResponse>() {
public void onSuccess(EarlybirdResponse earlybirdResponse) {
try {
if (earlybirdResponse.responseCode == EarlybirdResponseCode.SUCCESS
&& earlybirdResponse.isSetSearchResults()
&& requestContext.getParsedQuery() != null) {
Set<Long> statusIds = getSensitiveResults(requestContext, earlybirdResponse);
Set<Long> exceptedIds = getExceptedResults(requestContext);
if (statusIds.size() > 0) {
logContent(requestContext, earlybirdResponse, statusIds);
} catch (Exception e) {
getLogger().error("Caught exception while trying to log sensitive results for query: {}",
requestContext.getParsedQuery().serialize(), e);
public void onFailure(Throwable cause) {
return response;
private void logContent(
final EarlybirdRequestContext requestContext,
final EarlybirdResponse earlybirdResponse,
final Set<Long> statusIds) {
if (logDetails) {
String base64Request;
try {
base64Request = ThriftUtils.toBase64EncodedString(requestContext.getRequest());
} catch (TException e) {
base64Request = "Failed to parse base 64 request";
getLogger().error("Found " + typeName
+ ": {} | "
+ "parsedQuery: {} | "
+ "request: {} | "
+ "base 64 request: {} | "
+ "response: {}",
} else {
getLogger().error("Found " + typeName + ": {} for parsedQuery {}",
Binary file not shown.
@ -1,27 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.schema.earlybird.EarlybirdCluster;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
/** A per-service filter for handling exceptions. */
public class ServiceExceptionHandlingFilter
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
private final EarlybirdResponseExceptionHandler exceptionHandler;
/** Creates a new ServiceExceptionHandlingFilter instance. */
public ServiceExceptionHandlingFilter(EarlybirdCluster cluster) {
this.exceptionHandler = new EarlybirdResponseExceptionHandler(cluster.getNameForStats());
public Future<EarlybirdResponse> apply(
EarlybirdRequestContext requestContext,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
return exceptionHandler.handleException(
requestContext.getRequest(), service.apply(requestContext));
Binary file not shown.
@ -1,81 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.HashMap;
import java.util.Map;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.schema.earlybird.EarlybirdCluster;
import com.twitter.search.common.util.earlybird.EarlybirdResponseMergeUtil;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
import com.twitter.search.earlybird_root.validators.FacetsResponseValidator;
import com.twitter.search.earlybird_root.validators.PassThroughResponseValidator;
import com.twitter.search.earlybird_root.validators.ServiceResponseValidator;
import com.twitter.search.earlybird_root.validators.TermStatsResultsValidator;
import com.twitter.search.earlybird_root.validators.TopTweetsResultsValidator;
import com.twitter.util.Function;
import com.twitter.util.Future;
* Filter responsible for handling invalid response returned by downstream services, and
* translating them into EarlybirdResponseExceptions.
public class ServiceResponseValidationFilter
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
private final Map<EarlybirdRequestType, ServiceResponseValidator<EarlybirdResponse>>
requestTypeToResponseValidators = new HashMap<>();
private final EarlybirdCluster cluster;
* Creates a new filter for handling invalid response
public ServiceResponseValidationFilter(EarlybirdCluster cluster) {
this.cluster = cluster;
ServiceResponseValidator<EarlybirdResponse> passThroughValidator =
new PassThroughResponseValidator();
.put(EarlybirdRequestType.FACETS, new FacetsResponseValidator(cluster));
.put(EarlybirdRequestType.RECENCY, passThroughValidator);
.put(EarlybirdRequestType.RELEVANCE, passThroughValidator);
.put(EarlybirdRequestType.STRICT_RECENCY, passThroughValidator);
.put(EarlybirdRequestType.TERM_STATS, new TermStatsResultsValidator(cluster));
.put(EarlybirdRequestType.TOP_TWEETS, new TopTweetsResultsValidator(cluster));
public Future<EarlybirdResponse> apply(
final EarlybirdRequestContext requestContext,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
return service.apply(requestContext).flatMap(
new Function<EarlybirdResponse, Future<EarlybirdResponse>>() {
public Future<EarlybirdResponse> apply(EarlybirdResponse response) {
if (response == null) {
return Future.exception(new IllegalStateException(
cluster + " returned null response"));
if (response.getResponseCode() == EarlybirdResponseCode.SUCCESS) {
return requestTypeToResponseValidators
return Future.value(EarlybirdResponseMergeUtil.transformInvalidResponse(
String.format("Failure from %s (%s)", cluster, response.getResponseCode())));
Binary file not shown.
@ -1,12 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import com.twitter.search.earlybird.config.ServingRange;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
public interface ServingRangeProvider {
* Get a ServingRange implementation.
* Usually backed by either TierInfoWrapper or RootClusterBoundaryInfo.
ServingRange getServingRange(EarlybirdRequestContext requestContext, boolean useBoundaryOverride);
Binary file not shown.
@ -1,30 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.earlybird.common.ClientIdUtil;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.util.Future;
* A filter that will set the clientId of the request to the strato HttpEndpoint Attribution.
* <p>
* If the clientId is already set to something non-null then that value is used.
* If the clientId is null but Attribution.httpEndpoint() contains a value it will be set as
* the clientId.
public class StratoAttributionClientIdFilter extends
SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
public Future<EarlybirdResponse> apply(
EarlybirdRequest request, Service<EarlybirdRequest, EarlybirdResponse> service
) {
if (request.getClientId() == null) {
return service.apply(request);
Binary file not shown.
@ -1,24 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.util.Future;
/** A top level filter for handling exceptions. */
public class TopLevelExceptionHandlingFilter
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
private final EarlybirdResponseExceptionHandler exceptionHandler;
/** Creates a new TopLevelExceptionHandlingFilter instance. */
public TopLevelExceptionHandlingFilter() {
this.exceptionHandler = new EarlybirdResponseExceptionHandler("top_level");
public Future<EarlybirdResponse> apply(EarlybirdRequest request,
Service<EarlybirdRequest, EarlybirdResponse> service) {
return exceptionHandler.handleException(request, service.apply(request));
Binary file not shown.
@ -1,30 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird_root.common.EarlybirdRequestUtil;
import com.twitter.util.Future;
* A filter that unsets some request fields that make sense only on the SuperRoot, before sending
* them to the individual roots.
public class UnsetSuperRootFieldsFilter extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
private final boolean unsetFollowedUserIds;
public UnsetSuperRootFieldsFilter() {
public UnsetSuperRootFieldsFilter(boolean unsetFollowedUserIds) {
this.unsetFollowedUserIds = unsetFollowedUserIds;
public Future<EarlybirdResponse> apply(EarlybirdRequest request,
Service<EarlybirdRequest, EarlybirdResponse> service) {
return service.apply(EarlybirdRequestUtil.unsetSuperRootFields(request, unsetFollowedUserIds));
Binary file not shown.
@ -1,44 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import javax.inject.Inject;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.decider.SearchDecider;
import com.twitter.search.common.metrics.SearchRateCounter;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.util.Future;
public class VeryRecentTweetsFilter
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
private static final String DECIDER_KEY = "enable_very_recent_tweets";
private static final SearchRateCounter VERY_RECENT_TWEETS_NOT_MODIFIED =
private static final SearchRateCounter VERY_RECENT_TWEETS_ENABLED =
private final SearchDecider decider;
public VeryRecentTweetsFilter(
SearchDecider decider
) {
this.decider = decider;
public Future<EarlybirdResponse> apply(
EarlybirdRequest request,
Service<EarlybirdRequest, EarlybirdResponse> service
) {
if (decider.isAvailable(DECIDER_KEY)) {
} else {
return service.apply(request);
Binary file not shown.
Before Width: | Height: | Size: 60 KiB |
Binary file not shown.
@ -1,176 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.List;
import java.util.Map;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import com.twitter.search.common.query.thriftjava.EarlyTerminationInfo;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.TierResponse;
* Collection of EarlybirdResponses and associated stats to be merged.
public class AccumulatedResponses {
// The list of the successful responses from all earlybird futures. This does not include empty
// responses resulted from null requests.
private final List<EarlybirdResponse> successResponses;
// The list of the unsuccessful responses from all earlybird futures.
private final List<EarlybirdResponse> errorResponses;
// the list of max statusIds seen in each earlybird.
private final List<Long> maxIds;
// the list of min statusIds seen in each earlybird.
private final List<Long> minIds;
private final EarlyTerminationInfo mergedEarlyTerminationInfo;
private final boolean isMergingAcrossTiers;
private final PartitionCounts partitionCounts;
private final int numSearchedSegments;
public static final class PartitionCounts {
private final int numPartitions;
private final int numSuccessfulPartitions;
private final List<TierResponse> perTierResponse;
public PartitionCounts(int numPartitions, int numSuccessfulPartitions, List<TierResponse>
perTierResponse) {
this.numPartitions = numPartitions;
this.numSuccessfulPartitions = numSuccessfulPartitions;
this.perTierResponse = perTierResponse;
public int getNumPartitions() {
return numPartitions;
public int getNumSuccessfulPartitions() {
return numSuccessfulPartitions;
public List<TierResponse> getPerTierResponse() {
return perTierResponse;
* Create AccumulatedResponses
public AccumulatedResponses(List<EarlybirdResponse> successResponses,
List<EarlybirdResponse> errorResponses,
List<Long> maxIds,
List<Long> minIds,
EarlyTerminationInfo mergedEarlyTerminationInfo,
boolean isMergingAcrossTiers,
PartitionCounts partitionCounts,
int numSearchedSegments) {
this.successResponses = successResponses;
this.errorResponses = errorResponses;
this.maxIds = maxIds;
this.minIds = minIds;
this.mergedEarlyTerminationInfo = mergedEarlyTerminationInfo;
this.isMergingAcrossTiers = isMergingAcrossTiers;
this.partitionCounts = partitionCounts;
this.numSearchedSegments = numSearchedSegments;
public List<EarlybirdResponse> getSuccessResponses() {
return successResponses;
public List<EarlybirdResponse> getErrorResponses() {
return errorResponses;
public List<Long> getMaxIds() {
return maxIds;
public List<Long> getMinIds() {
return minIds;
public EarlyTerminationInfo getMergedEarlyTerminationInfo() {
return mergedEarlyTerminationInfo;
public boolean foundError() {
return !errorResponses.isEmpty();
* Tries to return a merged EarlybirdResponse that propagates as much information from the error
* responses as possible.
* If all error responses have the same error response code, the merged response will have the
* same error response code, and the debugString/debugInfo on the merged response will be set to
* the debugString/debugInfo of one of the merged responses.
* If the error responses have at least 2 different response codes, TRANSIENT_ERROR will be set
* on the merged response. Also, we will look for the most common error response code, and will
* propagate the debugString/debugInfo from an error response with that response code.
public EarlybirdResponse getMergedErrorResponse() {
// Find a response that has the most common error response code.
int maxCount = 0;
EarlybirdResponse errorResponseWithMostCommonErrorResponseCode = null;
Map<EarlybirdResponseCode, Integer> responseCodeCounts = Maps.newHashMap();
for (EarlybirdResponse errorResponse : errorResponses) {
EarlybirdResponseCode responseCode = errorResponse.getResponseCode();
Integer responseCodeCount = responseCodeCounts.get(responseCode);
if (responseCodeCount == null) {
responseCodeCount = 0;
responseCodeCounts.put(responseCode, responseCodeCount);
if (responseCodeCount > maxCount) {
errorResponseWithMostCommonErrorResponseCode = errorResponse;
// If all error responses have the same response code, set it on the merged response.
// Otherwise, set TRANSIENT_ERROR on the merged response.
EarlybirdResponseCode mergedResponseCode = EarlybirdResponseCode.TRANSIENT_ERROR;
if (responseCodeCounts.size() == 1) {
mergedResponseCode = responseCodeCounts.keySet().iterator().next();
EarlybirdResponse mergedResponse = new EarlybirdResponse()
// Propagate the debugString/debugInfo of the selected error response to the merged response.
if (errorResponseWithMostCommonErrorResponseCode.isSetDebugString()) {
if (errorResponseWithMostCommonErrorResponseCode.isSetDebugInfo()) {
// Set the numPartitions and numPartitionsSucceeded on the mergedResponse
return mergedResponse;
public boolean isMergingAcrossTiers() {
return isMergingAcrossTiers;
public boolean isMergingPartitionsWithinATier() {
return !isMergingAcrossTiers;
public PartitionCounts getPartitionCounts() {
return partitionCounts;
public int getNumSearchedSegments() {
return numSearchedSegments;
@ -1,26 +0,0 @@
sources = ["*.java"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
Normal file
Normal file
Binary file not shown.
Binary file not shown.
@ -1,9 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
public interface EarlyTerminateTierMergePredicate {
* Do we have enough results so far that we can early terminate and not continue onto next tier?
boolean shouldEarlyTerminateTierMerge(int totalResultsFromSuccessfulShards,
boolean foundEarlyTermination);
Binary file not shown.
@ -1,176 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import javax.annotation.Nullable;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.collect.Iterables;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.search.common.logging.DebugMessageBuilder;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
* Collects debug messages to attach to EarlybirdResponse
class EarlybirdResponseDebugMessageBuilder {
private static final Logger LOG =
private static final Logger TOO_MANY_FAILED_PARTITIONS_LOG =
protected final SearchCounter insufficientValidResponseCounter =
protected final SearchCounter validPartitionResponseCounter =
// the combined debug string for all earlybird responses
private final StringBuilder debugString;
* A message builder backed by the same {@link #debugString} above.
private final DebugMessageBuilder debugMessageBuilder;
private static final Joiner JOINER = Joiner.on(", ");
EarlybirdResponseDebugMessageBuilder(EarlybirdRequest request) {
EarlybirdResponseDebugMessageBuilder(DebugMessageBuilder.Level level) {
this.debugString = new StringBuilder();
this.debugMessageBuilder = new DebugMessageBuilder(debugString, level);
private static DebugMessageBuilder.Level getDebugLevel(EarlybirdRequest request) {
if (request.isSetDebugMode() && request.getDebugMode() > 0) {
return DebugMessageBuilder.getDebugLevel(request.getDebugMode());
} else if (request.isSetDebugOptions()) {
return DebugMessageBuilder.Level.DEBUG_BASIC;
} else {
return DebugMessageBuilder.Level.DEBUG_NONE;
protected boolean isDebugMode() {
return debugMessageBuilder.getDebugLevel() > 0;
void append(String msg) {
void debugAndLogWarning(String msg) {
if (isDebugMode()) {
void debugDetailed(String format, Object... args) {
debugAtLevel(DebugMessageBuilder.Level.DEBUG_DETAILED, format, args);
void debugVerbose(String format, Object... args) {
debugAtLevel(DebugMessageBuilder.Level.DEBUG_VERBOSE, format, args);
void debugVerbose2(String format, Object... args) {
debugAtLevel(DebugMessageBuilder.Level.DEBUG_VERBOSE_2, format, args);
void debugAtLevel(DebugMessageBuilder.Level level, String format, Object... args) {
boolean levelOK = debugMessageBuilder.isAtLeastLevel(level);
if (levelOK || LOG.isDebugEnabled()) {
// We check both modes here in order to build the formatted message only once.
String message = String.format(format, args);
if (levelOK) {
String debugString() {
return debugString.toString();
DebugMessageBuilder getDebugMessageBuilder() {
return debugMessageBuilder;
void logBelowSuccessThreshold(ThriftSearchQuery searchQuery, int numSuccessResponses,
int numPartitions, double successThreshold) {
String rawQuery = (searchQuery != null && searchQuery.isSetRawQuery())
? "[" + searchQuery.getRawQuery() + "]" : "null";
String serializedQuery = (searchQuery != null && searchQuery.isSetSerializedQuery())
? "[" + searchQuery.getSerializedQuery() + "]" : "null";
// Not enough successful responses from partitions.
String errorMessage = String.format(
"Only %d valid responses returned out of %d partitions for raw query: %s"
+ " serialized query: %s. Lower than threshold of %s",
numSuccessResponses, numPartitions, rawQuery, serializedQuery, successThreshold);
void logResponseDebugInfo(EarlybirdRequest earlybirdRequest,
String partitionTierName,
EarlybirdResponse response) {
if (response.isSetDebugString() && !response.getDebugString().isEmpty()) {
debugString.append(String.format("Received response from [%s] with debug string [%s]",
partitionTierName, response.getDebugString())).append("\n");
if (!response.isSetResponseCode()) {
"Received Earlybird null response code for query [%s] from [%s]",
earlybirdRequest, partitionTierName));
} else if (response.getResponseCode() != EarlybirdResponseCode.SUCCESS
&& response.getResponseCode() != EarlybirdResponseCode.PARTITION_SKIPPED
&& response.getResponseCode() != EarlybirdResponseCode.PARTITION_DISABLED
&& response.getResponseCode() != EarlybirdResponseCode.TIER_SKIPPED) {
"Received Earlybird response error [%s] for query [%s] from [%s]",
response.getResponseCode(), earlybirdRequest, partitionTierName));
if (debugMessageBuilder.isVerbose2()) {
debugVerbose2("Earlybird [%s] returned response: %s", partitionTierName, response);
} else if (debugMessageBuilder.isVerbose()) {
if (response.isSetSearchResults() && response.getSearchResults().getResultsSize() > 0) {
String ids = JOINER.join(Iterables.transform(
new Function<ThriftSearchResult, Long>() {
public Long apply(ThriftSearchResult result) {
return result.getId();
debugVerbose("Earlybird [%s] returned TweetIDs: %s", partitionTierName, ids);
Binary file not shown.
@ -1,604 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import scala.runtime.BoxedUnit;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.metrics.SearchTimerStats;
import com.twitter.search.common.schema.earlybird.EarlybirdCluster;
import com.twitter.search.common.util.FinagleUtil;
import com.twitter.search.common.util.earlybird.EarlybirdResponseMergeUtil;
import com.twitter.search.common.util.earlybird.ResultsUtil;
import com.twitter.search.earlybird.thrift.EarlybirdDebugInfo;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.search.earlybird_root.collectors.MultiwayMergeCollector;
import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
import com.twitter.search.earlybird_root.common.EarlybirdRequestUtil;
import com.twitter.util.Function;
import com.twitter.util.Future;
* Base EarlybirdResponseMerger containing basic logic to merge EarlybirdResponse objects
public abstract class EarlybirdResponseMerger implements EarlyTerminateTierMergePredicate {
private static final Logger LOG = LoggerFactory.getLogger(EarlybirdResponseMerger.class);
private static final Logger MIN_SEARCHED_STATUS_ID_LOGGER =
private static final SearchCounter NO_SEARCH_RESULT_COUNTER =
private static final SearchCounter NO_RESPONSES_TO_MERGE =
private static final SearchCounter EARLYBIRD_RESPONSE_NO_MORE_RESULTS =
private static final String PARTITION_OR_TIER_COUNTER_NAME_FORMAT =
protected final EarlybirdResponseDebugMessageBuilder responseMessageBuilder;
protected final EarlybirdRequestContext requestContext;
protected final ImmutableList<Future<EarlybirdResponse>> responses;
protected AccumulatedResponses accumulatedResponses;
static final Map<EarlybirdRequestType, SearchCounter> MERGER_CREATED_STATS =
static final Map<EarlybirdRequestType, SearchCounter>
static final Map<EarlybirdRequestType, SearchCounter>
private static Map<EarlybirdRequestType, SearchCounter> perRequestTypeCounterImmutableMap(
String statPattern) {
Map<EarlybirdRequestType, SearchCounter> statsMap = Maps.newEnumMap(EarlybirdRequestType.class);
for (EarlybirdRequestType earlybirdRequestType : EarlybirdRequestType.values()) {
String statName = String.format(statPattern, earlybirdRequestType.getNormalizedName());
statsMap.put(earlybirdRequestType, SearchCounter.export(statName));
return Maps.immutableEnumMap(statsMap);
public static final com.google.common.base.Function<EarlybirdResponse, Map<Long, Integer>>
response -> response.getSearchResults() == null
? null
: response.getSearchResults().getHitCounts();
private final ChainMerger chainMerger;
private class ChainMerger {
private final EarlybirdRequestContext requestContext;
private final ResponseAccumulator responseAccumulator;
private final List<Future<EarlybirdResponse>> responses;
private final EarlybirdResponseDebugMessageBuilder responseMessageBuilder;
private int currentFutureIndex = -1;
public ChainMerger(EarlybirdRequestContext requestContext,
ResponseAccumulator responseAccumulator,
List<Future<EarlybirdResponse>> responses,
EarlybirdResponseDebugMessageBuilder responseMessageBuilder) {
this.requestContext = requestContext;
this.responseAccumulator = responseAccumulator;
this.responses = responses;
this.responseMessageBuilder = responseMessageBuilder;
public Future<EarlybirdResponse> merge() {
// 'responseFutures' should always be sorted.
// When returned by EarlybirdScatterGather service, the responses are sorted by partition ID.
// When returned by EarlybirdChainedScatterGatherService,
// responses are sorted descending by tier start date. See:
// com.twitter.search.earlybird_root.EarlybirdChainedScatterGatherService.TIER_COMPARATOR.
// When merging responses from partitions, we want to wait for responses from all partitions,
// so the order in which we wait for those results does not matter. When merging responses
// from tiers, we want to wait for the response from the latest. If we don't need any more
// responses to compute the final response, then we don't need to wait for the responses from
// other tiers. If we cannot terminate early, then we want to wait for the responses from the
// second tier, and so on.
// We do not need to have any explicit synchronization, because:
// 1. The callbacks for future_i are set by the flatMap() callback on future_{i-1} (when
// recursively calling merge() inside the flatMap()).
// 2. Before setting the callbacks on future_i, future_{i-1}.flatMap() adds the response
// results to mergeHelper.
// 3. When the callbacks on future_i are set, the memory barrier between
// thread_running_future_{i-1} and thread_running_future_i is crossed. This guarantees
// that thread_running_future_i will see the updates to mergeHelper before it sees the
// callbacks. (Or thread_running_future_{i-1} == thread_running_future_i, in which case
// synchronization is not an issue, and correctness is guarateed by the order in which
// things will run.)
// 4. The same reasoning applies to currentFutureIndex.
if (currentFutureIndex >= responses.size()) {
return Future.value(getTimedMergedResponse(responseAccumulator.getAccumulatedResults()));
final String partitionTierName =
responseAccumulator.getNameForLogging(currentFutureIndex, responses.size());
final String nameForEarlybirdResponseCodeStats =
currentFutureIndex, responses.size());
// If a tier in the chain throws an exception, convert it to a null response, and let the
// mergeHelper handle it appropriately.
return responses.get(currentFutureIndex)
.handle(Function.func(t -> {
if (FinagleUtil.isCancelException(t)) {
return new EarlybirdResponse()
} else if (FinagleUtil.isTimeoutException(t)) {
return new EarlybirdResponse()
} else {
if (responseMessageBuilder.isDebugMode()) {
String.format("[%s] failed, exception [%s]",
partitionTierName, t.toString()));
LOG.warn("exception response from: " + partitionTierName, t);
return new EarlybirdResponse()
.flatMap(Function.func(response -> {
if ((response.getResponseCode() != EarlybirdResponseCode.PARTITION_SKIPPED)
&& (response.getResponseCode() != EarlybirdResponseCode.TIER_SKIPPED)) {
String.format(PARTITION_OR_TIER_COUNTER_NAME_FORMAT, partitionTierName))
if (response.getResponseCode() == EarlybirdResponseCode.CLIENT_CANCEL_ERROR) {
// the request has been cancelled, no need to proceed
return Future.value(response);
rewriteResponseCodeIfSearchResultsMissing(requestContext, partitionTierName, response);
if (responseAccumulator.shouldEarlyTerminateMerge(EarlybirdResponseMerger.this)) {
return Future.value(getTimedMergedResponse(
return merge();
private void rewriteResponseCodeIfSearchResultsMissing(
EarlybirdRequestContext earlybirdRequestContext,
String partitionTierName,
EarlybirdResponse response) {
// We always require searchResults to be set, even for term stats and facet requests.
// This is because searchResults contains important info such as pagination cursors
// like minSearchStatusId and minSearchedTimeSinceEpoch.
// We expect all successful responses to have searchResults set.
if (response.isSetResponseCode()
&& response.getResponseCode() == EarlybirdResponseCode.SUCCESS
&& response.getSearchResults() == null) {
LOG.warn("Received Earlybird response with null searchResults from [{}]"
+ " EarlybirdRequest [{}] EarlybirdResponse [{}] ",
partitionTierName, earlybirdRequestContext.getRequest(), response);
* Construct a EarlybirdResponseMerger to merge responses from multiple partitions or tiers
* based on mode.
EarlybirdResponseMerger(EarlybirdRequestContext requestContext,
List<Future<EarlybirdResponse>> responses,
ResponseAccumulator responseAccumulator) {
this.requestContext = requestContext;
this.responses = ImmutableList.copyOf(responses);
this.responseMessageBuilder =
new EarlybirdResponseDebugMessageBuilder(requestContext.getRequest());
this.chainMerger = new ChainMerger(requestContext, responseAccumulator, responses,
* Get a response merger to merge the given responses.
public static EarlybirdResponseMerger getResponseMerger(
EarlybirdRequestContext requestContext,
List<Future<EarlybirdResponse>> responses,
ResponseAccumulator helper,
EarlybirdCluster cluster,
EarlybirdFeatureSchemaMerger featureSchemaMerger,
int numPartitions) {
EarlybirdRequestType type = requestContext.getEarlybirdRequestType();
switch (type) {
case FACETS:
return new FacetResponseMerger(requestContext, responses, helper);
return new TermStatisticsResponseMerger(requestContext, responses, helper);
return new RecencyResponseMerger(requestContext, responses, helper, featureSchemaMerger);
return new StrictRecencyResponseMerger(
requestContext, responses, helper, featureSchemaMerger, cluster);
return new RelevanceResponseMerger(
requestContext, responses, helper, featureSchemaMerger, numPartitions);
return new TopTweetsResponseMerger(requestContext, responses, helper);
throw new RuntimeException("EarlybirdRequestType " + type + "is not supported by merge");
* This method can perform two types of merges:
* 1. merge responses within a tier from different partitions.
* 2. merge responses from multiple tiers.
public final Future<EarlybirdResponse> merge() {
return chainMerger.merge()
* Returns the function that checks if the minSearchedStatusID on the merged response is higher
* than the max ID in the request.
private Function<EarlybirdResponse, BoxedUnit> checkMinSearchedStatusIdFunction(
final String operator, final Optional<Long> requestMaxId, final SearchCounter stat) {
return Function.cons(mergedResponse -> {
if (requestMaxId.isPresent()
&& requestMaxId.get() != Long.MAX_VALUE
&& (mergedResponse.getResponseCode() == EarlybirdResponseCode.SUCCESS)
&& mergedResponse.isSetSearchResults()
&& mergedResponse.getSearchResults().isSetMinSearchedStatusID()) {
long minSearchedStatusId = mergedResponse.getSearchResults().getMinSearchedStatusID();
// We sometimes set minSearchedStatusId = max_id + 1 when a request times out even
// before any search happens.
// Check SEARCH-10134 for more details.
if (minSearchedStatusId > requestMaxId.get() + 1) {
String logMessage = "Response has a minSearchedStatusID ({}) larger than request "
+ operator + " ({})."
+ "\nrequest type: {}"
+ "\nrequest: {}"
+ "\nmerged response: {}"
+ "\nSuccessful accumulated responses:";
List<Object> logMessageParams = Lists.newArrayList();
for (EarlybirdResponse response : accumulatedResponses.getSuccessResponses()) {
logMessage += "\naccumulated response: {}";
MIN_SEARCHED_STATUS_ID_LOGGER.warn(logMessage, logMessageParams.toArray());
private EarlybirdResponse getTimedMergedResponse(AccumulatedResponses accResponses) {
long start = System.nanoTime();
try {
return getMergedResponse(accResponses);
} finally {
long totalTime = System.nanoTime() - start;
private EarlybirdResponse initializeMergedSuccessResponseFromAccumulatedResponses() {
EarlybirdResponse mergedResponse = new EarlybirdResponse();
AccumulatedResponses.PartitionCounts partitionCounts =
return mergedResponse;
private EarlybirdResponse getMergedResponse(AccumulatedResponses accResponses) {
accumulatedResponses = accResponses;
EarlybirdResponse mergedResponse;
if (accumulatedResponses.getSuccessResponses().isEmpty()
&& !accumulatedResponses.foundError()) {
// No successful or error responses. This means that all tiers / partitions are intentionally
// skipped. Return a blank successful response.
mergedResponse = new EarlybirdResponse()
.setSearchResults(new ThriftSearchResults())
.setDebugString("No responses to merge, probably because all tiers/partitions "
+ "were skipped.");
} else if (accumulatedResponses.isMergingAcrossTiers()) {
mergedResponse = getMergedResponseAcrossTiers();
} else {
mergedResponse = getMergedResponseAcrossPartitions();
return mergedResponse;
private EarlybirdResponse getMergedResponseAcrossTiers() {
|| accumulatedResponses.foundError());
// When merging across tiers, if we have one failed tier, we should fail the whole
// response. Note that due to early termination, if a tier that is old fails
// but the newer tiers return enough results, the failed tier won't show up
// here in accumulatedResponses -- the only tiers that show up here
// will be successful.
if (accumulatedResponses.foundError()) {
// The TierResponseAccumulator early terminates on the first error, so we should
// never get more than one error. This means that the getMergedErrorResponse will
// return an error response with the error code of that one error, and will never
// have to decide which error response to return if the error responses are all
// different.
// Perhaps we should just return accumulatedResponses.getErrorResponses().get(0);
Preconditions.checkState(accumulatedResponses.getErrorResponses().size() == 1);
return accumulatedResponses.getMergedErrorResponse();
} else {
EarlybirdResponse mergedResponse = initializeMergedSuccessResponseFromAccumulatedResponses();
return internalMerge(mergedResponse);
private EarlybirdResponse getMergedResponseAcrossPartitions() {
|| accumulatedResponses.foundError());
EarlybirdResponse mergedResponse;
// Unlike tier merging, one failed response doesn't mean the merged response should
// fail. If we have successful responses we can check the success ratio and if its
// good we can still return a successful merge.
if (!accumulatedResponses.getSuccessResponses().isEmpty()) {
// We have at least one successful response, but still need to check the success ratio.
// mergedResponse is a SUCCESS response after this call, but we will
// set it to failure below if necessary.
mergedResponse = initializeMergedSuccessResponseFromAccumulatedResponses();
int numSuccessResponses = mergedResponse.getNumSuccessfulPartitions();
int numPartitions = mergedResponse.getNumPartitions();
double successThreshold = getSuccessResponseThreshold();
if (checkSuccessPartitionRatio(numSuccessResponses, numPartitions, successThreshold)) {
// Success! Proceed with merging.
mergedResponse = internalMerge(mergedResponse);
} else {
requestContext.getRequest().getSearchQuery(), numSuccessResponses, numPartitions,
} else {
mergedResponse = accumulatedResponses.getMergedErrorResponse();
return mergedResponse;
* Derive class should implement the logic to merge the specific type of results (recency,
* relevance, Top Tweets, etc..)
protected abstract EarlybirdResponse internalMerge(EarlybirdResponse response);
protected abstract SearchTimerStats getMergedResponseTimer();
* Do we have enough results so far that we can early terminate and not continue onto next tier?
public boolean shouldEarlyTerminateTierMerge(int totalResultsFromSuccessfulShards,
boolean foundEarlyTermination) {
// We are taking the most conservative tier response merging.
// This is the most conservative merge logic --- as long as we have some results, we should
// not return anything from the next tier. This may cause not ideal experience where a
// page is not full, but the use can still scroll further.
return foundEarlyTermination || totalResultsFromSuccessfulShards >= 1;
private void saveMergedDebugString(EarlybirdResponse mergedResponse) {
if (responseMessageBuilder.isDebugMode()) {
String message = responseMessageBuilder.debugString();
if (!accumulatedResponses.getSuccessResponses().isEmpty()
&& accumulatedResponses.getSuccessResponses().get(0).isSetDebugInfo()) {
EarlybirdDebugInfo debugInfo =
private double getSuccessResponseThreshold() {
EarlybirdRequest request = requestContext.getRequest();
if (request.isSetSuccessfulResponseThreshold()) {
double successfulResponseThreshold = request.getSuccessfulResponseThreshold();
Preconditions.checkArgument(successfulResponseThreshold > 0,
"Invalid successfulResponseThreshold %s", successfulResponseThreshold);
Preconditions.checkArgument(successfulResponseThreshold <= 1.0,
"Invalid successfulResponseThreshold %s", successfulResponseThreshold);
return successfulResponseThreshold;
} else {
return getDefaultSuccessResponseThreshold();
protected abstract double getDefaultSuccessResponseThreshold();
private static boolean checkSuccessPartitionRatio(
int numSuccessResponses,
int numPartitions,
double goodResponseThreshold) {
Preconditions.checkArgument(goodResponseThreshold > 0.0,
"Invalid goodResponseThreshold %s", goodResponseThreshold);
return numSuccessResponses >= (numPartitions * goodResponseThreshold);
* Merge hit counts from all results.
protected Map<Long, Integer> aggregateHitCountMap() {
Map<Long, Integer> hitCounts = ResultsUtil
.aggregateCountMap(accumulatedResponses.getSuccessResponses(), HIT_COUNT_GETTER);
if (hitCounts.size() > 0) {
if (responseMessageBuilder.isDebugMode()) {
responseMessageBuilder.append("Hit counts:\n");
for (Map.Entry<Long, Integer> entry : hitCounts.entrySet()) {
responseMessageBuilder.append(String.format(" %10s seconds: %d hits\n",
entry.getKey() / 1000, entry.getValue()));
return hitCounts;
return null;
* Returns the number of results to keep as part of merge-collection.
protected final int computeNumResultsToKeep() {
return EarlybirdResponseMergeUtil.computeNumResultsToKeep(requestContext.getRequest());
* Remove exact duplicates (same id) from the result set.
protected static void trimExactDups(ThriftSearchResults searchResults, TrimStats trimStats) {
int numResults = searchResults.getResultsSize();
List<ThriftSearchResult> oldResults = searchResults.getResults();
List<ThriftSearchResult> newResults = Lists.newArrayListWithCapacity(numResults);
HashSet<Long> resultSet = Sets.newHashSetWithExpectedSize(numResults);
for (ThriftSearchResult result : oldResults) {
if (resultSet.contains(result.getId())) {
protected final int addResponsesToCollector(MultiwayMergeCollector collector) {
int totalResultSize = 0;
for (EarlybirdResponse response : accumulatedResponses.getSuccessResponses()) {
if (response.isSetSearchResults()) {
totalResultSize += response.getSearchResults().getResultsSize();
return totalResultSize;
* Given a sorted searchResults (for recency, sorted by ID; for relevance, sorted by score),
* returns the first 'computeNumResultsToKeep()' number of results.
* @param searchResults the searchResults to be truncated.
protected final void truncateResults(ThriftSearchResults searchResults, TrimStats trimStats) {
int numResultsRequested = computeNumResultsToKeep();
int to = numResultsRequested == Integer.MAX_VALUE ? searchResults.getResultsSize()
: Math.min(numResultsRequested, searchResults.getResultsSize());
if (searchResults.getResultsSize() > to) {
trimStats.setResultsTruncatedFromTailCount(searchResults.getResultsSize() - to);
if (to > 0) {
searchResults.setResults(searchResults.getResults().subList(0, to));
} else {
// No more results for the next page
EarlybirdRequest getEarlybirdRequest() {
return requestContext.getRequest();
Binary file not shown.
@ -1,353 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import com.google.common.collect.Sets;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.search.common.logging.DebugMessageBuilder;
import com.twitter.search.common.metrics.SearchTimerStats;
import com.twitter.search.common.ranking.thriftjava.ThriftFacetRankingOptions;
import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant;
import com.twitter.search.common.util.earlybird.FacetsResultsUtils;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.ThriftFacetCount;
import com.twitter.search.earlybird.thrift.ThriftFacetCountMetadata;
import com.twitter.search.earlybird.thrift.ThriftFacetFieldResults;
import com.twitter.search.earlybird.thrift.ThriftFacetResults;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
* Merger class to merge facets EarlybirdResponse objects
public class FacetResponseMerger extends EarlybirdResponseMerger {
private static final Logger LOG = LoggerFactory.getLogger(FacetResponseMerger.class);
private static final SearchTimerStats TIMER =
SearchTimerStats.export("merge_facets", TimeUnit.NANOSECONDS, false, true);
private static final double SUCCESSFUL_RESPONSE_THRESHOLD = 0.9;
private final DebugMessageBuilder debugMessageBuilder;
* Constructor to create the merger
public FacetResponseMerger(EarlybirdRequestContext requestContext,
List<Future<EarlybirdResponse>> responses,
ResponseAccumulator mode) {
super(requestContext, responses, mode);
debugMessageBuilder = responseMessageBuilder.getDebugMessageBuilder();
debugMessageBuilder.verbose("--- Request Received: %s", requestContext.getRequest());
protected SearchTimerStats getMergedResponseTimer() {
return TIMER;
protected double getDefaultSuccessResponseThreshold() {
protected EarlybirdResponse internalMerge(EarlybirdResponse facetsResponse) {
final Map<String, FacetsResultsUtils.FacetFieldInfo> facetFieldInfoMap =
new HashMap<>();
final Set<Long> userIDWhitelist = new HashSet<>();
// First, parse the responses and build up our facet info map.
boolean termStatsFilteringMode = FacetsResultsUtils.prepareFieldInfoMap(
requestContext.getRequest().getFacetRequest(), facetFieldInfoMap);
// Iterate through all futures and get results.
collectResponsesAndPopulateMap(facetFieldInfoMap, userIDWhitelist);
// Next, aggregate the top facets and update the blender response.
.setFacetResults(new ThriftFacetResults()
.setFacetFields(new HashMap<>())
// keep track of how many facets a user contributed - this map gets reset for every field
Map<Long, Integer> perFieldAntiGamingMap = new HashMap<>();
// this one is used for images and twimges
Map<Long, Integer> imagesAntiGamingMap = new HashMap<>();
Set<String> twimgDedupSet = null;
for (final Map.Entry<String, FacetsResultsUtils.FacetFieldInfo> entry
: facetFieldInfoMap.entrySet()) {
// reset for each field
String field = entry.getKey();
final Map<Long, Integer> antiGamingMap;
if (field.equals(EarlybirdFieldConstant.IMAGES_FACET)
|| field.equals(EarlybirdFieldConstant.TWIMG_FACET)) {
antiGamingMap = imagesAntiGamingMap;
} else {
antiGamingMap = perFieldAntiGamingMap;
ThriftFacetFieldResults results = new ThriftFacetFieldResults();
FacetsResultsUtils.FacetFieldInfo info = entry.getValue();
results.setTopFacets(new ArrayList<>());
FacetsResultsUtils.fillTopLanguages(info, results);
if (info.topFacets != null && !info.topFacets.isEmpty()) {
fillFacetFieldResults(info, antiGamingMap, results);
if (field.equals(EarlybirdFieldConstant.TWIMG_FACET)) {
if (twimgDedupSet == null) {
twimgDedupSet = Sets.newHashSet();
FacetsResultsUtils.dedupTwimgFacet(twimgDedupSet, results, debugMessageBuilder);
facetsResponse.getFacetResults().putToFacetFields(entry.getKey(), results);
if (!termStatsFilteringMode) {
// in term stats filtering mode, if doing it here would break term stats filtering
// Update the numHitsProcessed on ThriftSearchResults.
int numHitsProcessed = 0;
int numPartitionsEarlyTerminated = 0;
for (EarlybirdResponse earlybirdResponse: accumulatedResponses.getSuccessResponses()) {
ThriftSearchResults searchResults = earlybirdResponse.getSearchResults();
if (searchResults != null) {
numHitsProcessed += searchResults.getNumHitsProcessed();
numPartitionsEarlyTerminated += searchResults.getNumPartitionsEarlyTerminated();
ThriftSearchResults searchResults = new ThriftSearchResults();
searchResults.setResults(new ArrayList<>()); // required field
LOG.debug("Facets call completed successfully: {}", facetsResponse);
return facetsResponse;
private void fillFacetFieldResults(FacetsResultsUtils.FacetFieldInfo facetFieldInfo,
Map<Long, Integer> antiGamingMap,
ThriftFacetFieldResults results) {
int minWeightedCount = 0;
int minSimpleCount = 0;
int maxPenaltyCount = Integer.MAX_VALUE;
double maxPenaltyCountRatio = 1;
boolean excludePossiblySensitiveFacets = false;
boolean onlyReturnFacetsWithDisplayTweet = false;
int maxHitsPerUser = -1;
EarlybirdRequest request = requestContext.getRequest();
if (request.getFacetRequest() != null) {
ThriftFacetRankingOptions rankingOptions = request.getFacetRequest().getFacetRankingOptions();
if (request.getSearchQuery() != null) {
maxHitsPerUser = request.getSearchQuery().getMaxHitsPerUser();
if (rankingOptions != null) {
LOG.debug("FacetsResponseMerger: Using rankingOptions={}", rankingOptions);
if (rankingOptions.isSetMinCount()) {
minWeightedCount = rankingOptions.getMinCount();
if (rankingOptions.isSetMinSimpleCount()) {
minSimpleCount = rankingOptions.getMinSimpleCount();
if (rankingOptions.isSetMaxPenaltyCount()) {
maxPenaltyCount = rankingOptions.getMaxPenaltyCount();
if (rankingOptions.isSetMaxPenaltyCountRatio()) {
maxPenaltyCountRatio = rankingOptions.getMaxPenaltyCountRatio();
if (rankingOptions.isSetExcludePossiblySensitiveFacets()) {
excludePossiblySensitiveFacets = rankingOptions.isExcludePossiblySensitiveFacets();
if (rankingOptions.isSetOnlyReturnFacetsWithDisplayTweet()) {
onlyReturnFacetsWithDisplayTweet = rankingOptions.isOnlyReturnFacetsWithDisplayTweet();
} else {
LOG.warn("earlybirdRequest.getFacetRequest() is null");
ThriftFacetCount[] topFacetsArray = new ThriftFacetCount[facetFieldInfo.topFacets.size()];
Arrays.sort(topFacetsArray, Collections.<ThriftFacetCount>reverseOrder(
int numResults = capFacetFieldWidth(facetFieldInfo.fieldRequest.numResults);
if (topFacetsArray.length < numResults) {
numResults = topFacetsArray.length;
int collected = 0;
for (int i = 0; i < topFacetsArray.length; ++i) {
ThriftFacetCount count = topFacetsArray[i];
if (onlyReturnFacetsWithDisplayTweet
&& (!count.isSetMetadata() || !count.getMetadata().isSetStatusId()
|| count.getMetadata().getStatusId() == -1)) {
// status id must be set
if (excludePossiblySensitiveFacets && count.isSetMetadata()
&& count.getMetadata().isStatusPossiblySensitive()) {
// the display tweet may be offensive or NSFW
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
debugMessageBuilder.verbose2("[%d] FacetsResponseMerger EXCLUDED: offensive or NSFW %s, "
+ "explanation: %s",
i, facetCountSummary(count),
boolean filterOutUser = false;
if (maxHitsPerUser != -1 && count.isSetMetadata()) {
ThriftFacetCountMetadata metadata = count.getMetadata();
if (!metadata.dontFilterUser) {
long twitterUserId = metadata.getTwitterUserId();
int numResultsFromUser = 1;
if (twitterUserId != -1) {
Integer perUser = antiGamingMap.get(twitterUserId);
if (perUser != null) {
numResultsFromUser = perUser + 1;
filterOutUser = numResultsFromUser > maxHitsPerUser;
antiGamingMap.put(twitterUserId, numResultsFromUser);
// Filter facets those don't meet the basic criteria.
if (count.getSimpleCount() < minSimpleCount) {
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
"[%d] FacetsResponseMerger EXCLUDED: simpleCount:%d < minSimpleCount:%d, %s",
i, count.getSimpleCount(), minSimpleCount, facetCountSummary(count));
if (count.getWeightedCount() < minWeightedCount) {
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
"[%d] FacetsResponseMerger EXCLUDED: weightedCount:%d < minWeightedCount:%d, %s",
i, count.getWeightedCount(), minWeightedCount, facetCountSummary(count));
if (filterOutUser) {
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
"[%d] FacetsResponseMerger EXCLUDED: antiGaming filterd user: %d: %s",
i, count.getMetadata().getTwitterUserId(), facetCountSummary(count));
if (count.getPenaltyCount() > maxPenaltyCount) {
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
"[%d] FacetsResponseMerger EXCLUCED: penaltyCount:%.3f > maxPenaltyCount:%.3f, %s",
i, count.getPenaltyCount(), maxPenaltyCount, facetCountSummary(count));
if (((double) count.getPenaltyCount() / count.getSimpleCount()) > maxPenaltyCountRatio) {
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
"[%d] FacetsResponseMerger EXCLUDED: penaltyCountRatio: %.3f > "
+ "maxPenaltyCountRatio:%.3f, %s",
i, (double) count.getPenaltyCount() / count.getSimpleCount(), maxPenaltyCountRatio,
if (collected >= numResults) {
private static int capFacetFieldWidth(int numResults) {
int ret = numResults;
if (numResults <= 0) {
// this in theory should not be allowed, but for now we issue the request with goodwill length
ret = 10; // default to 10 for future merge code to terminate correctly
if (numResults >= 100) {
ret = 100;
return ret;
private static String facetCountSummary(final ThriftFacetCount count) {
if (count.isSetMetadata()) {
return String.format("Label: %s (s:%d, w:%d, p:%d, score:%.2f, sid:%d (%s))",
count.getFacetLabel(), count.getSimpleCount(), count.getWeightedCount(),
count.getPenaltyCount(), count.getScore(), count.getMetadata().getStatusId(),
} else {
return String.format("Label: %s (s:%d, w:%d, p:%d, score:%.2f)", count.getFacetLabel(),
count.getSimpleCount(), count.getWeightedCount(), count.getPenaltyCount(),
// Iterate through the backend responses and fill up the FacetFieldInfo map.
private void collectResponsesAndPopulateMap(
final Map<String, FacetsResultsUtils.FacetFieldInfo> facetFieldInfoMap,
final Set<Long> userIDWhitelist) {
// Next, iterate through the backend responses.
int i = 0;
for (EarlybirdResponse facetsResponse : accumulatedResponses.getSuccessResponses()) {
if (facetsResponse.isSetFacetResults()) {
LOG.debug("Facet response from earlybird {} is {} ", i, facetsResponse.getFacetResults());
ThriftFacetResults facetResults = facetsResponse.getFacetResults();
if (facetResults.isSetUserIDWhitelist()) {
facetResults, facetFieldInfoMap,
LOG.debug("Earlybird facet response total size {}", i);
Binary file not shown.
@ -1,44 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
public final class PartitionResponseAccumulator extends ResponseAccumulator {
private static final String TARGET_TYPE_PARTITION = "partition";
public String getNameForLogging(int responseIndex, int numTotalResponses) {
return TARGET_TYPE_PARTITION + responseIndex;
public String getNameForEarlybirdResponseCodeStats(int responseIndex, int numTotalResponses) {
// We do not need to differentiate between partitions: we just want to get the number of
// responses returned by Earlybirds, for each EarlybirdResponseCode.
boolean shouldEarlyTerminateMerge(EarlyTerminateTierMergePredicate merger) {
return false;
public void handleSkippedResponse(EarlybirdResponseCode responseCode) { }
public void handleErrorResponse(EarlybirdResponse response) {
public AccumulatedResponses.PartitionCounts getPartitionCounts() {
return new AccumulatedResponses.PartitionCounts(getNumResponses(),
getSuccessResponses().size() + getSuccessfulEmptyResponseCount(), null);
protected boolean isMergingAcrossTiers() {
return false;
Binary file not shown.
@ -1,638 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.metrics.SearchTimerStats;
import com.twitter.search.common.partitioning.snowflakeparser.SnowflakeIdParser;
import com.twitter.search.common.query.thriftjava.EarlyTerminationInfo;
import com.twitter.search.common.relevance.utils.ResultComparators;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.search.earlybird_root.collectors.RecencyMergeCollector;
import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
* Merger class to merge recency search EarlybirdResponse objects.
public class RecencyResponseMerger extends EarlybirdResponseMerger {
private static final Logger LOG = LoggerFactory.getLogger(RecencyResponseMerger.class);
private static final SearchTimerStats RECENCY_TIMER =
SearchTimerStats.export("merge_recency", TimeUnit.NANOSECONDS, false, true);
// Allowed replication lag relative to all replicas. Replication lag exceeding
// this amount may result in some tweets from the replica not returned in search.
private static final long ALLOWED_REPLICATION_LAG_MS = 10000;
private static final double SUCCESSFUL_RESPONSE_THRESHOLD = 0.9;
private static final SearchCounter RECENCY_CLEARED_EARLY_TERMINATION_COUNT =
* Results were truncated because merged results exceeded the requested numResults.
* Results that were were filtered smaller than merged minSearchedStatusId were filtered out.
new EarlyTerminationTrimmingStats("recency_partition_merging");
static final EarlyTerminationTrimmingStats TIER_MERGING_EARLY_TERMINATION_TRIMMING_STATS =
new EarlyTerminationTrimmingStats("recency_tier_merging");
static class EarlyTerminationTrimmingStats {
enum Type {
* The whole result was not terminated at all.
* Was terminated before we did any trimming.
* Was not terminated when merged, but results were filtered due to min/max ranges.
* Was not terminated when merged, but results were truncated.
* Was not terminated when merged, but results were filtered due to min/max ranges and
* truncated.
* When the search asks for X result, and we get exactly X results back, without trimming
* or truncating on the tail side (min_id side), we still mark the search as early terminated.
* This is because later tiers possibly has more results.
* A counter tracking merged responses for each {@link EarlyTerminationTrimmingStats.Type}
* define above.
private final ImmutableMap<Type, SearchCounter> searchCounterMap;
EarlyTerminationTrimmingStats(String prefix) {
Map<Type, SearchCounter> tempMap = Maps.newEnumMap(Type.class);
SearchCounter.export(prefix + "_not_early_terminated_after_merging"));
SearchCounter.export(prefix + "_early_terminated_before_merge_trimming"));
SearchCounter.export(prefix + "_early_terminated_after_merging_truncated"));
SearchCounter.export(prefix + "_early_terminated_after_merging_filtered"));
SearchCounter.export(prefix + "_early_terminated_after_merging_filtered_and_truncated"));
SearchCounter.export(prefix + "_early_terminated_after_merging_got_exact_num_results"));
searchCounterMap = Maps.immutableEnumMap(tempMap);
public SearchCounter getCounterFor(Type type) {
return searchCounterMap.get(type);
private final EarlybirdFeatureSchemaMerger featureSchemaMerger;
public RecencyResponseMerger(EarlybirdRequestContext requestContext,
List<Future<EarlybirdResponse>> responses,
ResponseAccumulator mode,
EarlybirdFeatureSchemaMerger featureSchemaMerger) {
super(requestContext, responses, mode);
this.featureSchemaMerger = featureSchemaMerger;
protected double getDefaultSuccessResponseThreshold() {
protected SearchTimerStats getMergedResponseTimer() {
protected EarlybirdResponse internalMerge(EarlybirdResponse mergedResponse) {
// The merged maxSearchedStatusId and minSearchedStatusId
long maxId = findMaxFullySearchedStatusID();
long minId = findMinFullySearchedStatusID();
RecencyMergeCollector collector = new RecencyMergeCollector(responses.size());
int totalResultSize = addResponsesToCollector(collector);
ThriftSearchResults searchResults = collector.getAllSearchResults();
TrimStats trimStats = trimResults(searchResults, minId, maxId);
setMergedMaxSearchedStatusId(searchResults, maxId);
searchResults, minId, trimStats.getResultsTruncatedFromTailCount() > 0);
// Override some components of the response as appropriate to real-time.
if (accumulatedResponses.isMergingPartitionsWithinATier()
&& clearEarlyTerminationIfReachingTierBottom(mergedResponse)) {
} else {
setEarlyTerminationForTrimmedResults(mergedResponse, trimStats);
responseMessageBuilder.debugVerbose("Hits: %s %s", totalResultSize, trimStats);
"Hash Partitioned Earlybird call completed successfully: %s", mergedResponse);
return mergedResponse;
* When we reached tier bottom, pagination can stop working even though we haven't got
* all results. e.g.
* Results from partition 1: [101 91 81], minSearchedStatusId is 81
* Results from Partition 2: [102 92], minSearchedStatusId is 92, not early terminated.
* After merge, we get [102, 101, 92], with minResultId == 92. Since results from
* partition 2 is not early terminated, 92 is the tier bottom here. Since results are
* filtered, early termination for merged result is set to true, so blender will call again,
* with maxDocId == 91. This time we get result:
* Results from partition 1: [91 81], minSearchedStatusId is 81
* Results from partition 2: [], minSearchedStatusId is still 92
* After merge we get [] and minSearchedStatusId is still 92. No progress can be made on
* pagination and clients get stuck.
* So in this case, we clear the early termination flag to tell blender there is no more
* result in this tier. Tweets below tier bottom will be missed, but that also happens
* without this step, as the next pagination call will return empty results anyway.
* So even if there is NOT overlap between tiers, this is still better.
* Return true if early termination is cleared due to this, otherwise return false.
* To be safe, we do nothing here to keep existing behavior and only override it in
* StrictRecencyResponseMerger.
protected boolean clearEarlyTerminationIfReachingTierBottom(EarlybirdResponse mergedResponse) {
return false;
* Determines if the merged response should be early-terminated when it has exactly as many
* trimmed results as requested, as is not early-terminated because of other reasons.
protected boolean shouldEarlyTerminateWhenEnoughTrimmedResults() {
return true;
* If the end results were trimmed in any way, reflect that in the response as a query that was
* early terminated. A response can be either (1) truncated because we merged more results than
* what was asked for with numResults, or (2) we filtered results that were smaller than the
* merged minSearchedStatusId.
* @param mergedResponse the merged response.
* @param trimStats trim stats for this merge.
private void setEarlyTerminationForTrimmedResults(
EarlybirdResponse mergedResponse,
TrimStats trimStats) {
responseMessageBuilder.debugVerbose("Checking for merge trimming, trimStats %s", trimStats);
EarlyTerminationTrimmingStats stats = getEarlyTerminationTrimmingStats();
EarlyTerminationInfo earlyTerminationInfo = mergedResponse.getEarlyTerminationInfo();
if (!earlyTerminationInfo.isEarlyTerminated()) {
if (trimStats.getMinIdFilterCount() > 0 || trimStats.getResultsTruncatedFromTailCount() > 0) {
responseMessageBuilder.debugVerbose("Setting early termination, trimStats: %s, results: %s",
trimStats, mergedResponse);
addEarlyTerminationReasons(earlyTerminationInfo, trimStats);
if (trimStats.getMinIdFilterCount() > 0
&& trimStats.getResultsTruncatedFromTailCount() > 0) {
} else if (trimStats.getMinIdFilterCount() > 0) {
} else if (trimStats.getResultsTruncatedFromTailCount() > 0) {
} else {
Preconditions.checkState(false, "Invalid TrimStats: %s", trimStats);
} else if ((computeNumResultsToKeep() == mergedResponse.getSearchResults().getResultsSize())
&& shouldEarlyTerminateWhenEnoughTrimmedResults()) {
} else {
} else {
// Even if the results were already marked as early terminated, we can add additional
// reasons for debugging (if the merged results were filtered or truncated).
addEarlyTerminationReasons(earlyTerminationInfo, trimStats);
private void addEarlyTerminationReasons(
EarlyTerminationInfo earlyTerminationInfo,
TrimStats trimStats) {
if (trimStats.getMinIdFilterCount() > 0) {
if (trimStats.getResultsTruncatedFromTailCount() > 0) {
private EarlyTerminationTrimmingStats getEarlyTerminationTrimmingStats() {
if (accumulatedResponses.isMergingPartitionsWithinATier()) {
return getEarlyTerminationTrimmingStatsForPartitions();
} else {
return getEarlyTerminationTrimmingStatsForTiers();
protected EarlyTerminationTrimmingStats getEarlyTerminationTrimmingStatsForPartitions() {
protected EarlyTerminationTrimmingStats getEarlyTerminationTrimmingStatsForTiers() {
* If we get enough results, no need to go on.
* If one of the partitions early terminated, we can't go on or else there could be a gap.
public boolean shouldEarlyTerminateTierMerge(int totalResultsFromSuccessfulShards,
boolean foundEarlyTermination) {
int resultsRequested = computeNumResultsToKeep();
boolean shouldEarlyTerminate = foundEarlyTermination
|| totalResultsFromSuccessfulShards >= resultsRequested;
if (shouldEarlyTerminate && totalResultsFromSuccessfulShards < resultsRequested) {
return shouldEarlyTerminate;
* Find the min status id that has been _completely_ searched across all partitions. The
* largest min status id across all partitions.
* @return the min searched status id found
protected long findMinFullySearchedStatusID() {
List<Long> minIds = accumulatedResponses.getMinIds();
if (minIds.isEmpty()) {
return Long.MIN_VALUE;
if (accumulatedResponses.isMergingPartitionsWithinATier()) {
// When merging partitions, the min ID should be the largest among the min IDs.
return Collections.max(accumulatedResponses.getMinIds());
} else {
// When merging tiers, the min ID should be the smallest among the min IDs.
return Collections.min(accumulatedResponses.getMinIds());
* Find the max status id that has been _completely_ searched across all partitions. The
* smallest max status id across all partitions.
* This is where we reconcile replication lag by selecting the oldest maxid from the
* partitions searched.
* @return the max searched status id found
protected long findMaxFullySearchedStatusID() {
List<Long> maxIDs = accumulatedResponses.getMaxIds();
if (maxIDs.isEmpty()) {
return Long.MAX_VALUE;
final long newest = maxIDs.get(maxIDs.size() - 1);
final long newestTimestamp = SnowflakeIdParser.getTimestampFromTweetId(newest);
for (int i = 0; i < maxIDs.size(); i++) {
long oldest = maxIDs.get(i);
long oldestTimestamp = SnowflakeIdParser.getTimestampFromTweetId(oldest);
long deltaMs = newestTimestamp - oldestTimestamp;
if (i == 0) {
LOG.debug("Max delta is {}", deltaMs);
if (i != 0) {
LOG.debug("{} partition replicas lagging more than {} ms", i, ALLOWED_REPLICATION_LAG_MS);
return oldest;
// Can't get here - by this point oldest == newest, and delta is 0.
return newest;
* Trim the ThriftSearchResults if we have enough results, to return the first
* 'computeNumResultsToKeep()' number of results.
* If we don't have enough results after trimming, this function will first try to back fill
* older results, then newer results
* @param searchResults ThriftSearchResults that hold the to be trimmed List<ThriftSearchResult>
* @return TrimStats containing statistics about how many results being removed
protected TrimStats trimResults(
ThriftSearchResults searchResults,
long mergedMin,
long mergedMax) {
if (!searchResults.isSetResults() || searchResults.getResultsSize() == 0) {
// no results, no trimming needed
return TrimStats.EMPTY_STATS;
if (requestContext.getRequest().getSearchQuery().isSetSearchStatusIds()) {
// Not a normal search, no trimming needed
return TrimStats.EMPTY_STATS;
TrimStats trimStats = new TrimStats();
trimExactDups(searchResults, trimStats);
int numResultsRequested = computeNumResultsToKeep();
if (shouldSkipTrimmingWhenNotEnoughResults(searchResults, numResultsRequested)) {
// We don't have enough results, let's not do trimming
return trimStats;
if (accumulatedResponses.isMergingPartitionsWithinATier()) {
searchResults, trimStats, numResultsRequested, mergedMin, mergedMax);
// Respect "computeNumResultsToKeep()" here, only keep "computeNumResultsToKeep()" results.
truncateResults(searchResults, trimStats);
return trimStats;
* When there's not enough results, we don't remove results based on the searched range.
* This has a tradeoff: with this, we don't reduce our recall when we already don't have enough
* results. However, with this, we can lose results while paginating because we return results
* outside of the valid searched range.
protected boolean shouldSkipTrimmingWhenNotEnoughResults(
ThriftSearchResults searchResults, int numResultsRequested) {
return searchResults.getResultsSize() <= numResultsRequested;
* Trim results based on search range. The search range [x, y] is determined by:
* x is the maximun of the minimun search IDs;
* y is the minimun of the maximum search IDs.
* Ids out side of this range are removed.
* If we do not get enough results after the removal, we add IDs back until we get enough results.
* We first add IDs back from the older side back. If there's still not enough results,
* we start adding IDs from the newer side back.
private void trimResultsBasedSearchedRange(ThriftSearchResults searchResults,
TrimStats trimStats,
int numResultsRequested,
long mergedMin,
long mergedMax) {
// we have more results than requested, let's do some trimming
// Save the original results before trimming
List<ThriftSearchResult> originalResults = searchResults.getResults();
filterResultsByMergedMinMaxIds(searchResults, mergedMax, mergedMin, trimStats);
// This does happen. It is hard to say what we should do here so we just return the original
// result here.
if (searchResults.getResultsSize() == 0) {
// Clean up min/mix filtered count, since we're bringing back whatever we just filtered.
if (LOG.isDebugEnabled() || responseMessageBuilder.isDebugMode()) {
String errMsg = "No trimming is done as filtered results is empty. "
+ "maxId=" + mergedMax + ",minId=" + mergedMin;
responseMessageBuilder.append(errMsg + "\n");
} else {
// oops! we're trimming too many results. Let's put some back
if (searchResults.getResultsSize() < numResultsRequested) {
List<ThriftSearchResult> trimmedResults = searchResults.getResults();
long firstTrimmedResultId = trimmedResults.get(0).getId();
long lastTrimmedResultId = trimmedResults.get(trimmedResults.size() - 1).getId();
// First, try to back fill with older results
int i = 0;
for (; i < originalResults.size(); ++i) {
ThriftSearchResult result = originalResults.get(i);
if (result.getId() < lastTrimmedResultId) {
if (trimmedResults.size() >= numResultsRequested) {
// still not enough results? back fill with newer results
// find the oldest of the newer results
if (trimmedResults.size() < numResultsRequested) {
// still not enough results? back fill with newer results
// find the oldest of the newer results
for (i = originalResults.size() - 1; i >= 0; --i) {
ThriftSearchResult result = originalResults.get(i);
if (result.getId() > firstTrimmedResultId) {
if (trimmedResults.size() >= numResultsRequested) {
// newer results were added to the back of the list, re-sort
Collections.sort(trimmedResults, ResultComparators.ID_COMPARATOR);
protected void setMergedMinSearchedStatusId(
ThriftSearchResults searchResults,
long currentMergedMin,
boolean resultsWereTrimmed) {
if (accumulatedResponses.getMinIds().isEmpty()) {
long merged;
if (searchResults == null
|| !searchResults.isSetResults()
|| searchResults.getResultsSize() == 0) {
merged = currentMergedMin;
} else {
List<ThriftSearchResult> results = searchResults.getResults();
long firstResultId = results.get(0).getId();
long lastResultId = results.get(results.size() - 1).getId();
merged = Math.min(firstResultId, lastResultId);
if (!resultsWereTrimmed) {
// If the results were trimmed, we want to set minSearchedStatusID to the smallest
// tweet ID in the response. Otherwise, we want to take the min between that, and
// the current minSearchedStatusID.
merged = Math.min(merged, currentMergedMin);
private void setMergedMaxSearchedStatusId(
ThriftSearchResults searchResults,
long currentMergedMax) {
if (accumulatedResponses.getMaxIds().isEmpty()) {
long merged;
if (searchResults == null
|| !searchResults.isSetResults()
|| searchResults.getResultsSize() == 0) {
merged = currentMergedMax;
} else {
List<ThriftSearchResult> results = searchResults.getResults();
long firstResultId = results.get(0).getId();
long lastResultId = results.get(results.size() - 1).getId();
long maxResultId = Math.max(firstResultId, lastResultId);
merged = Math.max(maxResultId, currentMergedMax);
protected static void filterResultsByMergedMinMaxIds(
ThriftSearchResults results, long maxStatusId, long minStatusId, TrimStats trimStats) {
List<ThriftSearchResult> trimedResults =
for (ThriftSearchResult result : results.getResults()) {
long statusId = result.getId();
if (statusId > maxStatusId) {
} else if (statusId < minStatusId) {
} else {
Binary file not shown.
@ -1,268 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.search.common.constants.thriftjava.ThriftLanguage;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.metrics.SearchTimerStats;
import com.twitter.search.common.util.earlybird.EarlybirdResponseUtil;
import com.twitter.search.common.util.earlybird.ResultsUtil;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
import com.twitter.search.earlybird.thrift.ThriftSearchRankingMode;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.search.earlybird_root.collectors.RelevanceMergeCollector;
import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
* Merger class to merge relevance search EarlybirdResponse objects
public class RelevanceResponseMerger extends EarlybirdResponseMerger {
private static final Logger LOG = LoggerFactory.getLogger(RelevanceResponseMerger.class);
private static final SearchTimerStats TIMER =
SearchTimerStats.export("merge_relevance", TimeUnit.NANOSECONDS, false, true);
protected static final Function<EarlybirdResponse, Map<ThriftLanguage, Integer>> LANG_MAP_GETTER =
response -> response.getSearchResults() == null
? null
: response.getSearchResults().getLanguageHistogram();
private static final double SUCCESSFUL_RESPONSE_THRESHOLD = 0.8;
private final EarlybirdFeatureSchemaMerger featureSchemaMerger;
// The number of partitions are not meaningful when it is invoked through multi-tier merging.
private final int numPartitions;
public RelevanceResponseMerger(EarlybirdRequestContext requestContext,
List<Future<EarlybirdResponse>> responses,
ResponseAccumulator mode,
EarlybirdFeatureSchemaMerger featureSchemaMerger,
int numPartitions) {
super(requestContext, responses, mode);
this.featureSchemaMerger = Preconditions.checkNotNull(featureSchemaMerger);
this.numPartitions = numPartitions;
protected double getDefaultSuccessResponseThreshold() {
protected SearchTimerStats getMergedResponseTimer() {
return TIMER;
protected EarlybirdResponse internalMerge(EarlybirdResponse mergedResponse) {
final ThriftSearchQuery searchQuery = requestContext.getRequest().getSearchQuery();
long maxId = findMaxFullySearchedStatusID();
long minId = findMinFullySearchedStatusID();
Preconditions.checkState(searchQuery.getRankingMode() == ThriftSearchRankingMode.RELEVANCE);
// First get the results in score order (the default comparator for this merge collector).
RelevanceMergeCollector collector = new RelevanceMergeCollector(responses.size());
int totalResultSize = addResponsesToCollector(collector);
ThriftSearchResults searchResults = collector.getAllSearchResults();
TrimStats trimStats = trimResults(searchResults);
searchResults = mergedResponse.getSearchResults();
if (!accumulatedResponses.getMaxIds().isEmpty()) {
if (!accumulatedResponses.getMinIds().isEmpty()) {
LOG.debug("Hits: {} Removed duplicates: {}", totalResultSize, trimStats.getRemovedDupsCount());
LOG.debug("Hash Partition'ed Earlybird call completed successfully: {}", mergedResponse);
return mergedResponse;
* If any of the partitions has an early termination, the tier merge must also early terminate.
* If a partition early terminated (we haven't fully searched that partition), and we instead
* moved onto the next tier, there will be a gap of unsearched results.
* If our early termination condition was only if we had enough results, we could get bad quality
* results by only looking at 20 hits when asking for 20 results.
public boolean shouldEarlyTerminateTierMerge(int totalResultsFromSuccessfulShards,
boolean foundEarlyTermination) {
// Don't use computeNumResultsToKeep because if returnAllResults is true, it will be
// Integer.MAX_VALUE and we will always log a stat that we didn't get enough results
int resultsRequested;
EarlybirdRequest request = requestContext.getRequest();
if (request.isSetNumResultsToReturnAtRoot()) {
resultsRequested = request.getNumResultsToReturnAtRoot();
} else {
resultsRequested = request.getSearchQuery().getCollectorParams().getNumResultsToReturn();
if (foundEarlyTermination && totalResultsFromSuccessfulShards < resultsRequested) {
return foundEarlyTermination;
* Merge language histograms from all queries.
* @return Merge per-language count map.
private Map<ThriftLanguage, Integer> aggregateLanguageHistograms() {
Map<ThriftLanguage, Integer> totalLangCounts = new TreeMap<>(
accumulatedResponses.getSuccessResponses(), LANG_MAP_GETTER));
if (totalLangCounts.size() > 0) {
if (responseMessageBuilder.isDebugMode()) {
responseMessageBuilder.append("Language Distrbution:\n");
int count = 0;
for (Map.Entry<ThriftLanguage, Integer> entry : totalLangCounts.entrySet()) {
String.format(" %10s:%6d", entry.getKey(), entry.getValue()));
if (++count % 5 == 0) {
return totalLangCounts;
* Find the min status id that has been searched. Since no results are trimmed for Relevance mode,
* it should be the smallest among the min IDs.
private long findMinFullySearchedStatusID() {
// The min ID should be the smallest among the min IDs
return accumulatedResponses.getMinIds().isEmpty() ? 0
: Collections.min(accumulatedResponses.getMinIds());
* Find the max status id that has been searched. Since no results are trimmed for Relevance mode,
* it should be the largest among the max IDs.
private long findMaxFullySearchedStatusID() {
// The max ID should be the largest among the max IDs
return accumulatedResponses.getMaxIds().isEmpty() ? 0
: Collections.max(accumulatedResponses.getMaxIds());
* Return all the searchResults except duplicates.
* @param searchResults ThriftSearchResults that hold the to be trimmed List<ThriftSearchResult>
* @return TrimStats containing statistics about how many results being removed
private TrimStats trimResults(ThriftSearchResults searchResults) {
if (!searchResults.isSetResults() || searchResults.getResultsSize() == 0) {
// no results, no trimming needed
return TrimStats.EMPTY_STATS;
if (requestContext.getRequest().getSearchQuery().isSetSearchStatusIds()) {
// Not a normal search, no trimming needed
return TrimStats.EMPTY_STATS;
TrimStats trimStats = new TrimStats();
trimExactDups(searchResults, trimStats);
truncateResults(searchResults, trimStats);
return trimStats;
private void publishNumResultsFromPartitionStatistics(EarlybirdResponse mergedResponse) {
// Keep track of all of the results that were kept after merging
Set<Long> mergedResults =
.map(result -> result.getId())
// For each successful response (pre merge), count how many of its results were kept post merge.
// Increment the appropriate stat.
for (EarlybirdResponse response : accumulatedResponses.getSuccessResponses()) {
if (!response.isSetEarlybirdServerStats()) {
int numResultsKept = 0;
for (ThriftSearchResult result
: EarlybirdResponseUtil.getResults(response).getResults()) {
if (mergedResults.contains(result.getId())) {
// We only update partition stats when the partition ID looks sane.
String tierName = response.getEarlybirdServerStats().getTierName();
int partition = response.getEarlybirdServerStats().getPartition();
if (partition >= 0 && partition < numPartitions) {
} else {
Binary file not shown.
@ -1,356 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.ArrayList;
import java.util.EnumMap;
import java.util.List;
import java.util.Map;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.util.earlybird.ResponseMergerUtils;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
* Accumulates EarlybirdResponse's and determines when to early terminate.
public abstract class ResponseAccumulator {
static class MinMaxSearchedIdStats {
/** How many results did we actually check */
private final SearchCounter checkedMaxMinSearchedStatusId;
private final SearchCounter unsetMaxSearchedStatusId;
private final SearchCounter unsetMinSearchedStatusId;
private final SearchCounter unsetMaxAndMinSearchedStatusId;
private final SearchCounter sameMinMaxSearchedIdWithoutResults;
private final SearchCounter sameMinMaxSearchedIdWithOneResult;
private final SearchCounter sameMinMaxSearchedIdWithResults;
private final SearchCounter flippedMinMaxSearchedId;
MinMaxSearchedIdStats(EarlybirdRequestType requestType) {
String statPrefix = "merge_helper_" + requestType.getNormalizedName();
checkedMaxMinSearchedStatusId = SearchCounter.export(statPrefix
+ "_max_min_searched_id_checks");
unsetMaxSearchedStatusId = SearchCounter.export(statPrefix
+ "_unset_max_searched_status_id");
unsetMinSearchedStatusId = SearchCounter.export(statPrefix
+ "_unset_min_searched_status_id");
unsetMaxAndMinSearchedStatusId = SearchCounter.export(statPrefix
+ "_unset_max_and_min_searched_status_id");
sameMinMaxSearchedIdWithoutResults = SearchCounter.export(statPrefix
+ "_same_min_max_searched_id_without_results");
sameMinMaxSearchedIdWithOneResult = SearchCounter.export(statPrefix
+ "_same_min_max_searched_id_with_one_results");
sameMinMaxSearchedIdWithResults = SearchCounter.export(statPrefix
+ "_same_min_max_searched_id_with_results");
flippedMinMaxSearchedId = SearchCounter.export(statPrefix
+ "_flipped_min_max_searched_id");
SearchCounter getCheckedMaxMinSearchedStatusId() {
return checkedMaxMinSearchedStatusId;
SearchCounter getFlippedMinMaxSearchedId() {
return flippedMinMaxSearchedId;
SearchCounter getUnsetMaxSearchedStatusId() {
return unsetMaxSearchedStatusId;
SearchCounter getUnsetMinSearchedStatusId() {
return unsetMinSearchedStatusId;
SearchCounter getUnsetMaxAndMinSearchedStatusId() {
return unsetMaxAndMinSearchedStatusId;
SearchCounter getSameMinMaxSearchedIdWithoutResults() {
return sameMinMaxSearchedIdWithoutResults;
SearchCounter getSameMinMaxSearchedIdWithOneResult() {
return sameMinMaxSearchedIdWithOneResult;
SearchCounter getSameMinMaxSearchedIdWithResults() {
return sameMinMaxSearchedIdWithResults;
static final Map<EarlybirdRequestType, MinMaxSearchedIdStats> MIN_MAX_SEARCHED_ID_STATS_MAP;
static {
EnumMap<EarlybirdRequestType, MinMaxSearchedIdStats> statsMap
= Maps.newEnumMap(EarlybirdRequestType.class);
for (EarlybirdRequestType earlybirdRequestType : EarlybirdRequestType.values()) {
statsMap.put(earlybirdRequestType, new MinMaxSearchedIdStats(earlybirdRequestType));
MIN_MAX_SEARCHED_ID_STATS_MAP = Maps.immutableEnumMap(statsMap);
// Merge has encountered at least one early terminated response.
private boolean foundEarlyTermination = false;
// Empty but successful response counter (E.g. when a tier or partition is skipped)
private int successfulEmptyResponseCount = 0;
// The list of the successful responses from all earlybird futures. This does not include empty
// responses resulted from null requests.
private final List<EarlybirdResponse> successResponses = new ArrayList<>();
// The list of the error responses from all earlybird futures.
private final List<EarlybirdResponse> errorResponses = new ArrayList<>();
// the list of max statusIds seen in each earlybird.
private final List<Long> maxIds = new ArrayList<>();
// the list of min statusIds seen in each earlybird.
private final List<Long> minIds = new ArrayList<>();
private int numResponses = 0;
private int numResultsAccumulated = 0;
private int numSearchedSegments = 0;
* Returns a string that can be used for logging to identify a single response out of all the
* responses that are being merged.
* @param responseIndex the index of a response's partition or tier, depending on the type of
* responses being accumulated.
* @param numTotalResponses the total number of partitions or tiers that are being merged.
public abstract String getNameForLogging(int responseIndex, int numTotalResponses);
* Returns a string that is used to export per-EarlybirdResponseCode stats for partitions and tiers.
* @param responseIndex the index of of a response's partition or tier.
* @param numTotalResponses the total number of partitions or tiers that are being merged.
* @return a string that is used to export per-EarlybirdResponseCode stats for partitions and tiers.
public abstract String getNameForEarlybirdResponseCodeStats(
int responseIndex, int numTotalResponses);
abstract boolean shouldEarlyTerminateMerge(EarlyTerminateTierMergePredicate merger);
* Add a EarlybirdResponse
public void addResponse(EarlybirdResponseDebugMessageBuilder responseMessageBuilder,
EarlybirdRequest request,
EarlybirdResponse response) {
numSearchedSegments += response.getNumSearchedSegments();
if (isSkippedResponse(response)) {
// This is an empty response, no processing is required, just need to update statistics.
} else if (isErrorResponse(response)) {
} else {
handleSuccessfulResponse(responseMessageBuilder, request, response);
private boolean isErrorResponse(EarlybirdResponse response) {
return !response.isSetResponseCode()
|| response.getResponseCode() != EarlybirdResponseCode.SUCCESS;
private boolean isSkippedResponse(EarlybirdResponse response) {
return response.isSetResponseCode()
&& (response.getResponseCode() == EarlybirdResponseCode.PARTITION_SKIPPED
|| response.getResponseCode() == EarlybirdResponseCode.TIER_SKIPPED);
* Record a response corresponding to a skipped partition or skipped tier.
protected abstract void handleSkippedResponse(EarlybirdResponseCode responseCode);
* Handle an error response
protected abstract void handleErrorResponse(EarlybirdResponse response);
* Subclasses can override this to perform more successful response handling.
protected void extraSuccessfulResponseHandler(EarlybirdResponse response) { }
* Whether the helper is for merging results from partitions within a single tier.
protected final boolean isMergingPartitionsWithinATier() {
return !isMergingAcrossTiers();
* Whether the helper is for merging results across different tiers.
protected abstract boolean isMergingAcrossTiers();
* Record a successful response.
public final void handleSuccessfulResponse(
EarlybirdResponseDebugMessageBuilder responseMessageBuilder,
EarlybirdRequest request,
EarlybirdResponse response) {
if (response.isSetSearchResults()) {
ThriftSearchResults searchResults = response.getSearchResults();
numResultsAccumulated += searchResults.getResultsSize();
recordMinMaxSearchedIdsAndUpdateStats(responseMessageBuilder, request, response,
if (response.isSetEarlyTerminationInfo()
&& response.getEarlyTerminationInfo().isEarlyTerminated()) {
foundEarlyTermination = true;
private void recordMinMaxSearchedIdsAndUpdateStats(
EarlybirdResponseDebugMessageBuilder responseMessageBuidler,
EarlybirdRequest request,
EarlybirdResponse response,
ThriftSearchResults searchResults) {
boolean isMaxIdSet = searchResults.isSetMaxSearchedStatusID();
boolean isMinIdSet = searchResults.isSetMinSearchedStatusID();
if (isMaxIdSet) {
if (isMinIdSet) {
updateMinMaxIdStats(responseMessageBuidler, request, response, searchResults, isMaxIdSet,
private void updateMinMaxIdStats(
EarlybirdResponseDebugMessageBuilder responseMessageBuilder,
EarlybirdRequest request,
EarlybirdResponse response,
ThriftSearchResults searchResults,
boolean isMaxIdSet,
boolean isMinIdSet) {
// Now just track the stats.
EarlybirdRequestType requestType = EarlybirdRequestType.of(request);
MinMaxSearchedIdStats minMaxSearchedIdStats = MIN_MAX_SEARCHED_ID_STATS_MAP.get(requestType);
if (isMaxIdSet && isMinIdSet) {
if (searchResults.getMinSearchedStatusID() > searchResults.getMaxSearchedStatusID()) {
// We do not expect this case to happen in production.
} else if (searchResults.getResultsSize() == 0
&& searchResults.getMaxSearchedStatusID() == searchResults.getMinSearchedStatusID()) {
"Got no results, and same min/max searched ids. Request: %s, Response: %s",
request, response);
} else if (searchResults.getResultsSize() == 1
&& searchResults.getMaxSearchedStatusID() == searchResults.getMinSearchedStatusID()) {
"Got one results, and same min/max searched ids. Request: %s, Response: %s",
request, response);
} else if (searchResults.getMaxSearchedStatusID()
== searchResults.getMinSearchedStatusID()) {
"Got multiple results, and same min/max searched ids. Request: %s, Response: %s",
request, response);
} else if (!isMaxIdSet && isMinIdSet) {
// We do not expect this case to happen in production.
"Got unset maxSearchedStatusID. Request: %s, Response: %s", request, response);
} else if (isMaxIdSet && !isMinIdSet) {
// We do not expect this case to happen in production.
"Got unset minSearchedStatusID. Request: %s, Response: %s", request, response);
} else {
Preconditions.checkState(!isMaxIdSet && !isMinIdSet);
"Got unset maxSearchedStatusID and minSearchedStatusID. Request: %s, Response: %s",
request, response);
* Return partition counts with number of partitions, number of successful responses, and list of
* responses per tier.
public abstract AccumulatedResponses.PartitionCounts getPartitionCounts();
public final AccumulatedResponses getAccumulatedResults() {
return new AccumulatedResponses(successResponses,
// Getters are only intended to be used by subclasses. Other users should get data from
// AccumulatedResponses
int getNumResponses() {
return numResponses;
int getNumSearchedSegments() {
return numSearchedSegments;
List<EarlybirdResponse> getSuccessResponses() {
return successResponses;
int getNumResultsAccumulated() {
return numResultsAccumulated;
int getSuccessfulEmptyResponseCount() {
return successfulEmptyResponseCount;
boolean foundError() {
return !errorResponses.isEmpty();
boolean foundEarlyTermination() {
return foundEarlyTermination;
Binary file not shown.
@ -1,297 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.TimeUnit;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.twitter.search.common.metrics.SearchTimerStats;
import com.twitter.search.common.schema.earlybird.EarlybirdCluster;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
* A RecencyResponseMerger that prioritizes not losing results during pagination.
* As of now, this merger is used by Gnip to make sure that scrolling returns all results.
* The logic used for merging partitions is a bit tricky, because on one hand, we want to make sure
* that we do miss results on the next pagination request; on the other hand, we want to return as
* many results as we can, and we want to set the minSearchedStatusID of the merged response as low
* as we can, in order to minimize the number of pagination requests.
* The merging logic is:
* Realtime cluster:
* 1. merge results from all partitions
* 2. if at least one partition response is early-terminated, set earlyTerminated = true
* on the merged response
* 3. set trimmingMinId = max(minSearchedStatusIDs of all partition responses)
* 4. trim all results to trimmingMinId
* 5. set minSearchedStatusID on the merged response to trimmingMinId
* 6. if we have more than numRequested results:
* - keep only the newest numRequested results
* - set minSearchedStatusID of the merged response to the lowest tweet ID in the response
* 7. if at least one partition response is not early-terminated, set
* tierBottomId = max(minSearchedStatusIDs of all non-early-terminated responses)
* (otherwise, set tierBottomId to some undefined value: -1, Long.MAX_VALUE, etc.)
* 8. if minSearchedStatusID of the merged response is the same as tierBottomId,
* clear the early-termination flag on the merged response
* The logic in steps 7 and 8 can be a little tricky to understand. They basically say: when we've
* exhausted the "least deep" partition in the realtime cluster, it's time to move to the full
* archive cluster (if we keep going past the "least deep" partition, we might miss results).
* Full archive cluster:
* 1. merge results from all partitions
* 2. if at least one partition response is early-terminated, set earlyTerminated = true
* on the merged response
* 3. set trimmingMinId to:
* - max(minSearchedStatusIDs of early-terminated responses), if at least one partition response
* is early-terminated
* - min(minSearchedStatusIDs of all responses), if all partition responses are not
* early-terminated
* 4. trim all results to trimmingMinId
* 5. set minSearchedStatusID of the merged response to trimmingMinId
* 6. if we have more than numRequested results:
* - keep only the newest numRequested results
* - set minSearchedStatusID of the merged response to the lowest tweet ID in the response
* The logic in step 3 can be a little tricky to understand. On one hand, if we always set
* trimmingMinId to the highest minSearchedStatusID, then some tweets at the very bottom of some
* partitions will never be returned. Consider the case:
* partition 1 has tweets 10, 8, 6
* partition 2 has tweets 9, 7, 5
* In this case, we would always trim all results to minId = 6, and tweet 5 would never be returned.
* On the other hand, if we always set trimmingMinId to the lowest minSearchedStatusID, then we
* might miss tweets from partitions that early-terminated. Consider the case:
* partition 1 has tweets 10, 5, 3, 1 that match our query
* partition 2 has tweets 9, 8, 7, 6, 2 that match our query
* If we ask for 3 results, than partition 1 will return tweets 10, 5, 3, and partition 2 will
* return tweets 9, 8, 7. If we set trimmingMinId = min(minSearchedStatusIDs), then the next
* pagination request will have [max_id = 2], and we will miss tweet 6.
* So the intuition here is that if we have an early-terminated response, we cannot set
* trimmingMinId to something lower than the minSearchedStatusID returned by that partition
* (otherwise we might miss results from that partition). However, if we've exhausted all
* partitions, then it's OK to not trim any result, because tiers do not intersect, so we will not
* miss any result from the next tier once we get there.
public class StrictRecencyResponseMerger extends RecencyResponseMerger {
private static final SearchTimerStats STRICT_RECENCY_TIMER_AVG =
SearchTimerStats.export("merge_recency_strict", TimeUnit.NANOSECONDS, false, true);
new EarlyTerminationTrimmingStats("strict_recency_partition_merging");
static final EarlyTerminationTrimmingStats TIER_MERGING_EARLY_TERMINATION_TRIMMING_STATS =
new EarlyTerminationTrimmingStats("strict_recency_tier_merging");
private final EarlybirdCluster cluster;
public StrictRecencyResponseMerger(EarlybirdRequestContext requestContext,
List<Future<EarlybirdResponse>> responses,
ResponseAccumulator mode,
EarlybirdFeatureSchemaMerger featureSchemaMerger,
EarlybirdCluster cluster) {
super(requestContext, responses, mode, featureSchemaMerger);
this.cluster = cluster;
protected SearchTimerStats getMergedResponseTimer() {
* Unlike {@link com.twitter.search.earlybird_root.mergers.RecencyResponseMerger}, this method
* takes a much simpler approach by just taking the max of the maxSearchedStatusIds.
* Also, when no maxSearchedStatusId is available at all, Long.MIN_VALUE is used instead of
* Long.MAX_VALUE. This ensures that we don't return any result in these cases.
protected long findMaxFullySearchedStatusID() {
return accumulatedResponses.getMaxIds().isEmpty()
? Long.MIN_VALUE : Collections.max(accumulatedResponses.getMaxIds());
* This method is subtly different from the base class version: when no minSearchedStatusId is
* available at all, Long.MAX_VALUE is used instead of Long.MIN_VALUE. This ensures that we
* don't return any result in these cases.
protected long findMinFullySearchedStatusID() {
List<Long> minIds = accumulatedResponses.getMinIds();
if (minIds.isEmpty()) {
return Long.MAX_VALUE;
if (accumulatedResponses.isMergingPartitionsWithinATier()) {
return getTrimmingMinId();
// When merging tiers, the min ID should be the smallest among the min IDs.
return Collections.min(minIds);
protected TrimStats trimResults(
ThriftSearchResults searchResults, long mergedMin, long mergedMax) {
if (!searchResults.isSetResults() || searchResults.getResultsSize() == 0) {
// no results, no trimming needed
return TrimStats.EMPTY_STATS;
TrimStats trimStats = new TrimStats();
trimExactDups(searchResults, trimStats);
filterResultsByMergedMinMaxIds(searchResults, mergedMax, mergedMin, trimStats);
int numResults = computeNumResultsToKeep();
if (searchResults.getResultsSize() > numResults) {
trimStats.setResultsTruncatedFromTailCount(searchResults.getResultsSize() - numResults);
searchResults.setResults(searchResults.getResults().subList(0, numResults));
return trimStats;
* This method is different from the base class version because when minResultId is bigger
* than currentMergedMin, we always take minResultId.
* If we don't do this, we would lose results.
* Illustration with an example. Assuming we are outside of the lag threshold.
* Num results requested: 3
* Response 1: min: 100 max: 900 results: 400, 500, 600
* Response 2: min: 300 max: 700 results: 350, 450, 550
* Merged results: 600, 550, 500
* Merged max: 900
* Merged min: we could take 300 (minId), or take 500 (minResultId).
* If we take minId, and use 300 as the pagination cursor, we'd lose results
* 350 and 450 when we paginate. So we have to take minResultId here.
protected void setMergedMinSearchedStatusId(
ThriftSearchResults searchResults,
long currentMergedMin,
boolean resultsWereTrimmed) {
if (accumulatedResponses.getMinIds().isEmpty()) {
long minId = currentMergedMin;
if (resultsWereTrimmed
&& (searchResults != null)
&& searchResults.isSetResults()
&& (searchResults.getResultsSize() > 0)) {
List<ThriftSearchResult> results = searchResults.getResults();
minId = results.get(results.size() - 1).getId();
protected boolean clearEarlyTerminationIfReachingTierBottom(EarlybirdResponse mergedResponse) {
if (EarlybirdCluster.isArchive(cluster)) {
// We don't need to worry about the tier bottom when merging partition responses in the full
// archive cluster: if all partitions were exhausted and we didn't trim the results, then
// the early-terminated flag on the merged response will be false. If at least one partition
// is early-terminated, or we trimmed some results, then the ealry-terminated flag on the
// merged response will be true, and we should continue getting results from this tier before
// we move to the next one.
return false;
ThriftSearchResults searchResults = mergedResponse.getSearchResults();
if (searchResults.getMinSearchedStatusID() == getTierBottomId()) {
"Set earlytermination to false because minSearchedStatusId is tier bottom");
return true;
return false;
protected boolean shouldEarlyTerminateWhenEnoughTrimmedResults() {
return false;
protected final EarlyTerminationTrimmingStats getEarlyTerminationTrimmingStatsForPartitions() {
protected final EarlyTerminationTrimmingStats getEarlyTerminationTrimmingStatsForTiers() {
/** Determines the bottom of the realtime cluster, based on the partition responses. */
private long getTierBottomId() {
long tierBottomId = -1;
for (EarlybirdResponse response : accumulatedResponses.getSuccessResponses()) {
if (!isEarlyTerminated(response)
&& response.isSetSearchResults()
&& response.getSearchResults().isSetMinSearchedStatusID()
&& (response.getSearchResults().getMinSearchedStatusID() > tierBottomId)) {
tierBottomId = response.getSearchResults().getMinSearchedStatusID();
return tierBottomId;
/** Determines the minId to which all results should be trimmed. */
private long getTrimmingMinId() {
List<Long> minIds = accumulatedResponses.getMinIds();
if (!EarlybirdCluster.isArchive(cluster)) {
return Collections.max(minIds);
long maxOfEarlyTerminatedMins = -1;
long minOfAllMins = Long.MAX_VALUE;
for (EarlybirdResponse response : accumulatedResponses.getSuccessResponses()) {
if (response.isSetSearchResults()
&& response.getSearchResults().isSetMinSearchedStatusID()) {
long minId = response.getSearchResults().getMinSearchedStatusID();
minOfAllMins = Math.min(minOfAllMins, minId);
if (isEarlyTerminated(response)) {
maxOfEarlyTerminatedMins = Math.max(maxOfEarlyTerminatedMins, minId);
if (maxOfEarlyTerminatedMins >= 0) {
return maxOfEarlyTerminatedMins;
} else {
return minOfAllMins;
/** Determines if the given earlybird response is early terminated. */
private boolean isEarlyTerminated(EarlybirdResponse response) {
return response.isSetEarlyTerminationInfo()
&& response.getEarlyTerminationInfo().isEarlyTerminated();
Binary file not shown.
@ -1,688 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.Collections;
import java.util.List;
import javax.annotation.Nullable;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.Lists;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.common.collections.Pair;
import com.twitter.common.quantity.Amount;
import com.twitter.common.quantity.Time;
import com.twitter.common.util.Clock;
import com.twitter.search.common.futures.Futures;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.partitioning.snowflakeparser.SnowflakeIdParser;
import com.twitter.search.common.query.thriftjava.EarlyTerminationInfo;
import com.twitter.search.common.relevance.utils.ResultComparators;
import com.twitter.search.common.search.EarlyTerminationState;
import com.twitter.search.common.util.FinagleUtil;
import com.twitter.search.common.util.earlybird.EarlybirdResponseMergeUtil;
import com.twitter.search.common.util.earlybird.EarlybirdResponseUtil;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
import com.twitter.search.earlybird.thrift.ThriftSearchRankingMode;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.search.earlybird.thrift.ThriftTweetSource;
import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.earlybird_root.common.EarlybirdServiceResponse;
import com.twitter.util.Function;
import com.twitter.util.Function0;
import com.twitter.util.Future;
/** Utility functions for merging recency and relevance results. */
public class SuperRootResponseMerger {
private static final Logger LOG = LoggerFactory.getLogger(SuperRootResponseMerger.class);
private static final String ALL_STATS_PREFIX = "superroot_response_merger_";
private static final SearchCounter FULL_ARCHIVE_MIN_ID_GREATER_THAN_REALTIME_MIN_ID =
private static final String ERROR_FORMAT = "%s%s_errors_from_cluster_%s_%s";
private final ThriftSearchRankingMode rankingMode;
private final EarlybirdFeatureSchemaMerger featureSchemaMerger;
private final String featureStatPrefix;
private final Clock clock;
private final String rankingModeStatPrefix;
private final SearchCounter mergedResponseSearchResultsNotSet;
private final SearchCounter invalidMinStatusId;
private final SearchCounter invalidMaxStatusId;
private final SearchCounter noMinIds;
private final SearchCounter noMaxIds;
private final SearchCounter mergedResponses;
private final SearchCounter mergedResponsesWithExactDups;
private final LoadingCache<Pair<ThriftTweetSource, ThriftTweetSource>, SearchCounter> dupsStats;
private static final EarlybirdResponse EMPTY_RESPONSE =
new EarlybirdResponse(EarlybirdResponseCode.SUCCESS, 0)
.setSearchResults(new ThriftSearchResults()
* Creates a new SuperRootResponseMerger instance.
* @param rankingMode The ranking mode to use when merging results.
* @param featureSchemaMerger The merger that can merge feature schema from different tiers.
* @param clock The clock that will be used to merge results.
public SuperRootResponseMerger(ThriftSearchRankingMode rankingMode,
EarlybirdFeatureSchemaMerger featureSchemaMerger,
Clock clock) {
this.rankingModeStatPrefix = rankingMode.name().toLowerCase();
this.rankingMode = rankingMode;
this.featureSchemaMerger = featureSchemaMerger;
this.clock = clock;
this.featureStatPrefix = "superroot_" + rankingMode.name().toLowerCase();
mergedResponseSearchResultsNotSet = SearchCounter.export(
ALL_STATS_PREFIX + rankingModeStatPrefix + "_merged_response_search_results_not_set");
invalidMinStatusId =
SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix + "_invalid_min_status_id");
invalidMaxStatusId =
SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix + "_invalid_max_status_id");
noMinIds = SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix + "_no_min_ids");
noMaxIds = SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix + "_no_max_ids");
mergedResponses = SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix
+ "_merged_responses");
mergedResponsesWithExactDups =
SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix
+ "_merged_responses_with_exact_dups");
dupsStats = CacheBuilder.newBuilder()
.build(new CacheLoader<Pair<ThriftTweetSource, ThriftTweetSource>, SearchCounter>() {
public SearchCounter load(Pair<ThriftTweetSource, ThriftTweetSource> key) {
return SearchCounter.export(
ALL_STATS_PREFIX + rankingModeStatPrefix + "_merged_responses_with_exact_dups_"
+ key.getFirst().name() + "_" + key.getSecond().name());
private void incrErrorCount(String cluster, @Nullable EarlybirdResponse response) {
String cause;
if (response != null) {
cause = response.getResponseCode().name().toLowerCase();
} else {
cause = "null_response";
String statName = String.format(
ERROR_FORMAT, ALL_STATS_PREFIX, rankingModeStatPrefix, cluster, cause
* Merges the given response futures.
* @param earlybirdRequestContext The earlybird request.
* @param realtimeResponseFuture The response from the realtime cluster.
* @param protectedResponseFuture The response from the protected cluster.
* @param fullArchiveResponseFuture The response from the full archive cluster.
* @return A future with the merged results.
public Future<EarlybirdResponse> mergeResponseFutures(
final EarlybirdRequestContext earlybirdRequestContext,
final Future<EarlybirdServiceResponse> realtimeResponseFuture,
final Future<EarlybirdServiceResponse> protectedResponseFuture,
final Future<EarlybirdServiceResponse> fullArchiveResponseFuture) {
Future<EarlybirdResponse> mergedResponseFuture = Futures.map(
realtimeResponseFuture, protectedResponseFuture, fullArchiveResponseFuture,
new Function0<EarlybirdResponse>() {
public EarlybirdResponse apply() {
// If the realtime response is not valid, return an error response.
// Also, the realtime service should always be called.
EarlybirdServiceResponse realtimeResponse = Futures.get(realtimeResponseFuture);
if (realtimeResponse.getServiceState().serviceWasRequested()
&& (!realtimeResponse.getServiceState().serviceWasCalled()
|| !EarlybirdResponseMergeUtil.isValidResponse(
realtimeResponse.getResponse()))) {
incrErrorCount("realtime", realtimeResponse.getResponse());
return EarlybirdResponseMergeUtil.transformInvalidResponse(
realtimeResponse.getResponse(), "realtime");
// If we have a protected response and it's not valid, return an error response.
EarlybirdServiceResponse protectedResponse = Futures.get(protectedResponseFuture);
if (protectedResponse.getServiceState().serviceWasCalled()) {
if (!EarlybirdResponseMergeUtil.isValidResponse(protectedResponse.getResponse())) {
incrErrorCount("protected", protectedResponse.getResponse());
return EarlybirdResponseMergeUtil.transformInvalidResponse(
protectedResponse.getResponse(), "protected");
// If we have a full archive response, check if it's valid.
EarlybirdServiceResponse fullArchiveResponse = Futures.get(fullArchiveResponseFuture);
boolean archiveHasError =
&& !EarlybirdResponseMergeUtil.isValidResponse(fullArchiveResponse.getResponse());
// Merge the responses.
EarlybirdResponse mergedResponse = mergeResponses(
// If the realtime clusters didn't return any results, and the full archive cluster
// returned an error response, return an error merged response.
if (archiveHasError && !EarlybirdResponseUtil.hasResults(mergedResponse)) {
incrErrorCount("full_archive", fullArchiveResponse.getResponse());
return EarlybirdResponseMergeUtil.failedEarlybirdResponse(
"realtime clusters had no results and archive cluster response had error");
// Corner case: the realtime response could have exactly numRequested results, and could
// be exhausted (not early-terminated). In this case, the request should not have been
// sent to the full archive cluster.
// - If the full archive cluster is not available, or was not requested, then we don't
// need to change anything.
// - If the full archive cluster is available and was requested (but wasn't hit
// because we found enough results in the realtime cluster), then we should set the
// early-termination flag on the merged response, to indicate that we potentially
// have more results for this query in our index.
if ((fullArchiveResponse.getServiceState()
== EarlybirdServiceResponse.ServiceState.SERVICE_NOT_CALLED)
&& !EarlybirdResponseUtil.isEarlyTerminated(realtimeResponse.getResponse())) {
EarlyTerminationInfo earlyTerminationInfo = new EarlyTerminationInfo(true);
// If we've exhausted all clusters, set the minSearchedStatusID to 0.
if (!EarlybirdResponseUtil.isEarlyTerminated(mergedResponse)) {
return mergedResponse;
// Handle all merging exceptions.
return handleResponseException(mergedResponseFuture,
"Exception thrown while merging responses.");
* Merge the results in the given responses.
* @param earlybirdRequestContext The earlybird request context.
* @param realtimeResponse The response from the realtime cluster.
* @param protectedResponse The response from the protected cluster.
* @param fullArchiveResponse The response from the full archive cluster.
* @return The merged response.
private EarlybirdResponse mergeResponses(
EarlybirdRequestContext earlybirdRequestContext,
@Nullable EarlybirdResponse realtimeResponse,
@Nullable EarlybirdResponse protectedResponse,
@Nullable EarlybirdResponse fullArchiveResponse) {
EarlybirdRequest request = earlybirdRequestContext.getRequest();
ThriftSearchQuery searchQuery = request.getSearchQuery();
int numResultsRequested;
if (request.isSetNumResultsToReturnAtRoot()) {
numResultsRequested = request.getNumResultsToReturnAtRoot();
} else {
numResultsRequested = searchQuery.getNumResults();
Preconditions.checkState(numResultsRequested > 0);
EarlybirdResponse mergedResponse = EMPTY_RESPONSE.deepCopy();
if ((realtimeResponse != null)
&& (realtimeResponse.getResponseCode() != EarlybirdResponseCode.TIER_SKIPPED)) {
mergedResponse = realtimeResponse.deepCopy();
if (!mergedResponse.isSetSearchResults()) {
new ThriftSearchResults(Lists.<ThriftSearchResult>newArrayList()));
// If either the realtime or the full archive response is early-terminated, we want the merged
// response to be early-terminated too. The early-termination flag from the realtime response
// carries over to the merged response, because mergedResponse is just a deep copy of the
// realtime response. So we only need to check the early-termination flag of the full archive
// response.
if ((fullArchiveResponse != null)
&& EarlybirdResponseUtil.isEarlyTerminated(fullArchiveResponse)) {
// If realtime has empty results and protected has some results then we copy the early
// termination information if that is present
if (protectedResponse != null
&& mergedResponse.getSearchResults().getResults().isEmpty()
&& !protectedResponse.getSearchResults().getResults().isEmpty()
&& EarlybirdResponseUtil.isEarlyTerminated(protectedResponse)) {
// Merge the results.
List<ThriftSearchResult> mergedResults = mergeResults(
numResultsRequested, realtimeResponse, protectedResponse, fullArchiveResponse);
// Trim the merged results if necessary.
boolean resultsTrimmed = false;
if (mergedResults.size() > numResultsRequested
&& !(searchQuery.isSetRelevanceOptions()
&& searchQuery.getRelevanceOptions().isReturnAllResults())) {
// If we have more results than requested, trim the result list and re-adjust
// minSearchedStatusID.
mergedResults = mergedResults.subList(0, numResultsRequested);
// Mark early termination in merged response
if (!EarlybirdResponseUtil.isEarlyTerminated(mergedResponse)) {
EarlyTerminationInfo earlyTerminationInfo = new EarlyTerminationInfo(true);
resultsTrimmed = true;
// Set the minSearchedStatusID and maxSearchedStatusID fields on the merged response.
setMinSearchedStatusId(mergedResponse, realtimeResponse, protectedResponse, fullArchiveResponse,
setMaxSearchedStatusId(mergedResponse, realtimeResponse, protectedResponse,
int numRealtimeSearchedSegments =
(realtimeResponse != null && realtimeResponse.isSetNumSearchedSegments())
? realtimeResponse.getNumSearchedSegments()
: 0;
int numProtectedSearchedSegments =
(protectedResponse != null && protectedResponse.isSetNumSearchedSegments())
? protectedResponse.getNumSearchedSegments()
: 0;
int numArchiveSearchedSegments =
(fullArchiveResponse != null && fullArchiveResponse.isSetNumSearchedSegments())
? fullArchiveResponse.getNumSearchedSegments()
: 0;
numRealtimeSearchedSegments + numProtectedSearchedSegments + numArchiveSearchedSegments);
if (earlybirdRequestContext.getRequest().getDebugMode() > 0) {
mergeClusterDebugStrings(realtimeResponse, protectedResponse, fullArchiveResponse));
return mergedResponse;
* Merges the given responses.
* @param numResults the number of results requested
* @param realtimeResponse the response from the realtime response
* @param protectedResponse the response from the protected response
* @param fullArchiveResponse the response from the full archive response
* @return the list of merged results
private List<ThriftSearchResult> mergeResults(int numResults,
@Nullable EarlybirdResponse realtimeResponse,
@Nullable EarlybirdResponse protectedResponse,
@Nullable EarlybirdResponse fullArchiveResponse) {
// We first merge the results from the two realtime clusters, Realtime cluster and
// Realtime Protected Tweets cluster
List<ThriftSearchResult> mergedResults = mergePublicAndProtectedRealtimeResults(
EarlybirdResponseMergeUtil.addResultsToList(mergedResults, fullArchiveResponse,
List<ThriftSearchResult> distinctMergedResults =
EarlybirdResponseMergeUtil.distinctByStatusId(mergedResults, dupsStats);
if (mergedResults != distinctMergedResults) {
if (rankingMode == ThriftSearchRankingMode.RELEVANCE
|| rankingMode == ThriftSearchRankingMode.TOPTWEETS) {
} else {
return distinctMergedResults;
* Method for merging tweets from protected and realtime clusters
* - realtime, guaranteed newer than any archive tweets
* - protected, also realtime, but with a potentially larger window (optional)
* - archive, public, guaranteed older than any public realtime tweets (optional, used for
* id limits, *not added to results*)
* It adds the ThriftSearchResults from protected tweets to the realtimeResponse
* Algorithm diagram: (with newer tweets at the top)
* ------------------------------------ <--- protected maxSearchedStatusID
* |C:Newest protected realtime tweets|
* | (does not exist if realtime |
* | maxID >= protected maxID) |
* | ------------------------ | <--- 60 seconds ago
* |D:Newer protected realtime tweets |
* | (does not exist if realtime |
* | maxID >= 60 seconds ago) |
* ---------- | ------------------------ | <--- public realtime maxSearchedStatusID
* |A:Public| |E:Automatically valid protected |
* |realtime| |realtime tweets |
* ---------- | ------------------------ | <--- public realtime minSearchedStatusID
* | |
* ---------- | E if archive is present | <--- public archive maxSearchedStatusID
* ---------- | E if archive is present | <--- public archive maxSearchedStatusID
* |B:Public| | F is archive is not present |
* |archive | | |
* ---------- | ------------------------ | <--- public archive minSearchedStatusID
* |F:Older protected realtime tweets |
* | (does not exist if protected |
* | minID >= public minID) |
* ------------------------------------ <--- protected minSearchedStatusID
* Step 1: Select tweets from groups A, and E. If this is enough, return them
* Step 2: Select tweets from groups A, E, and F. If this is enough, return them
* Step 3: Select tweets from groups A, D, E, and F and return them
* There are two primary tradeoffs, both of which favor public tweets:
* (1) Benefit: While public indexing latency is < 60s, auto-updating never misses public tweets
* Cost: Absence of public tweets may delay protected tweets from being searchable for 60s
* (2) Benefit: No failure or delay from the protected cluster will affect realtime results
* Cost: If the protected cluster indexes more slowly, auto-update may miss its tweets
* @param fullArchiveTweets - used solely for generating anchor points, not merged in.
static List<ThriftSearchResult> mergePublicAndProtectedRealtimeResults(
int numRequested,
EarlybirdResponse realtimeTweets,
EarlybirdResponse realtimeProtectedTweets,
@Nullable EarlybirdResponse fullArchiveTweets,
Clock clock) {
// See which results will actually be used
boolean isRealtimeUsable = EarlybirdResponseUtil.hasResults(realtimeTweets);
boolean isArchiveUsable = EarlybirdResponseUtil.hasResults(fullArchiveTweets);
boolean isProtectedUsable = EarlybirdResponseUtil.hasResults(realtimeProtectedTweets);
long minId = Long.MIN_VALUE;
long maxId = Long.MAX_VALUE;
if (isRealtimeUsable) {
// Determine the actual upper/lower bounds on the tweet id
if (realtimeTweets.getSearchResults().isSetMinSearchedStatusID()) {
minId = realtimeTweets.getSearchResults().getMinSearchedStatusID();
if (realtimeTweets.getSearchResults().isSetMaxSearchedStatusID()) {
maxId = realtimeTweets.getSearchResults().getMaxSearchedStatusID();
int justRight = realtimeTweets.getSearchResults().getResultsSize();
if (isArchiveUsable) {
justRight += fullArchiveTweets.getSearchResults().getResultsSize();
if (fullArchiveTweets.getSearchResults().isSetMinSearchedStatusID()) {
long fullArchiveMinId = fullArchiveTweets.getSearchResults().getMinSearchedStatusID();
if (fullArchiveMinId <= minId) {
minId = fullArchiveMinId;
} else {
if (isProtectedUsable) {
for (ThriftSearchResult result : realtimeProtectedTweets.getSearchResults().getResults()) {
if (result.getId() >= minId && result.getId() <= maxId) {
if (justRight < numRequested) {
// Since this is only used as an upper bound, old (pre-2010) ids are still handled correctly
maxId = Math.max(
clock.nowMillis() - Amount.of(60, Time.SECONDS).as(Time.MILLISECONDS), 0));
List<ThriftSearchResult> mergedSearchResults = Lists.newArrayListWithCapacity(numRequested * 2);
// Add valid tweets in order of priority: protected, then realtime
// Only add results that are within range (that check only matters for protected)
if (isProtectedUsable) {
for (ThriftSearchResult result : realtimeProtectedTweets.getSearchResults().getResults()) {
if (result.getId() <= maxId && result.getId() >= minId) {
if (isRealtimeUsable) {
mergedSearchResults, realtimeTweets, ThriftTweetSource.REALTIME_CLUSTER);
// Set the minSearchedStatusID and maxSearchedStatusID on the protected response to the
// minId and maxId that were used to trim the protected results.
// This is needed in order to correctly set these IDs on the merged response.
ThriftSearchResults protectedResults =
if ((protectedResults != null)
&& protectedResults.isSetMinSearchedStatusID()
&& (protectedResults.getMinSearchedStatusID() < minId)) {
if ((protectedResults != null)
&& protectedResults.isSetMaxSearchedStatusID()
&& (protectedResults.getMaxSearchedStatusID() > maxId)) {
return mergedSearchResults;
* Merges the debug strings of the given cluster responses.
* @param realtimeResponse The response from the realtime cluster.
* @param protectedResponse The response from the protected cluster.
* @param fullArchiveResponse The response from the full archive cluster.
* @return The merged debug string.
public static String mergeClusterDebugStrings(@Nullable EarlybirdResponse realtimeResponse,
@Nullable EarlybirdResponse protectedResponse,
@Nullable EarlybirdResponse fullArchiveResponse) {
StringBuilder sb = new StringBuilder();
if ((realtimeResponse != null) && realtimeResponse.isSetDebugString()) {
sb.append("Realtime response: ").append(realtimeResponse.getDebugString());
if ((protectedResponse != null) && protectedResponse.isSetDebugString()) {
if (sb.length() > 0) {
sb.append("Protected response: ").append(protectedResponse.getDebugString());
if ((fullArchiveResponse != null) && fullArchiveResponse.isSetDebugString()) {
if (sb.length() > 0) {
sb.append("Full archive response: ").append(fullArchiveResponse.getDebugString());
if (sb.length() == 0) {
return null;
return sb.toString();
* Sets the minSearchedStatusID field on the merged response.
* @param mergedResponse The merged response.
* @param fullArchiveResponse The full archive response.
* @param resultsTrimmed Whether the merged response results were trimmed.
private void setMinSearchedStatusId(EarlybirdResponse mergedResponse,
EarlybirdResponse realtimeResponse,
EarlybirdResponse protectedResponse,
EarlybirdResponse fullArchiveResponse,
boolean resultsTrimmed) {
if (resultsTrimmed) {
// We got more results that we asked for and we trimmed them.
// Set minSearchedStatusID to the ID of the oldest result.
ThriftSearchResults searchResults = mergedResponse.getSearchResults();
if (searchResults.getResultsSize() > 0) {
List<ThriftSearchResult> results = searchResults.getResults();
long lastResultId = results.get(results.size() - 1).getId();
// We did not get more results that we asked for. Get the min of the minSearchedStatusIDs of
// the merged responses.
List<Long> minIDs = Lists.newArrayList();
if (fullArchiveResponse != null
&& fullArchiveResponse.isSetSearchResults()
&& fullArchiveResponse.getSearchResults().isSetMinSearchedStatusID()) {
if (mergedResponse.getSearchResults().isSetMinSearchedStatusID()
&& mergedResponse.getSearchResults().getMinSearchedStatusID()
< fullArchiveResponse.getSearchResults().getMinSearchedStatusID()) {
if (protectedResponse != null
&& !EarlybirdResponseUtil.hasResults(realtimeResponse)
&& EarlybirdResponseUtil.hasResults(protectedResponse)
&& protectedResponse.getSearchResults().isSetMinSearchedStatusID()) {
if (mergedResponse.getSearchResults().isSetMinSearchedStatusID()) {
if (!minIDs.isEmpty()) {
} else {
* Sets the maxSearchedStatusID field on the merged response.
* @param mergedResponse The merged response.
* @param fullArchiveResponse The full archive response.
private void setMaxSearchedStatusId(EarlybirdResponse mergedResponse,
EarlybirdResponse realtimeResponse,
EarlybirdResponse protectedResponse,
EarlybirdResponse fullArchiveResponse) {
List<Long> maxIDs = Lists.newArrayList();
if (fullArchiveResponse != null
&& fullArchiveResponse.isSetSearchResults()
&& fullArchiveResponse.getSearchResults().isSetMaxSearchedStatusID()) {
if (mergedResponse.getSearchResults().isSetMaxSearchedStatusID()
&& fullArchiveResponse.getSearchResults().getMaxSearchedStatusID()
> mergedResponse.getSearchResults().getMaxSearchedStatusID()) {
if (protectedResponse != null
&& !EarlybirdResponseUtil.hasResults(realtimeResponse)
&& EarlybirdResponseUtil.hasResults(protectedResponse)
&& protectedResponse.getSearchResults().isSetMaxSearchedStatusID()) {
if (mergedResponse.getSearchResults().isSetMaxSearchedStatusID()) {
ThriftSearchResults searchResults = mergedResponse.getSearchResults();
if (searchResults.getResultsSize() > 0) {
List<ThriftSearchResult> results = searchResults.getResults();
if (!maxIDs.isEmpty()) {
} else {
* Handles exceptions thrown while merging responses. Timeout exceptions are converted to
* SERVER_TIMEOUT_ERROR responses. All other exceptions are converted to PERSISTENT_ERROR
* responses.
private Future<EarlybirdResponse> handleResponseException(
Future<EarlybirdResponse> responseFuture, final String debugMsg) {
return responseFuture.handle(
new Function<Throwable, EarlybirdResponse>() {
public EarlybirdResponse apply(Throwable t) {
EarlybirdResponseCode responseCode = EarlybirdResponseCode.PERSISTENT_ERROR;
if (FinagleUtil.isTimeoutException(t)) {
responseCode = EarlybirdResponseCode.SERVER_TIMEOUT_ERROR;
EarlybirdResponse response = new EarlybirdResponse(responseCode, 0);
response.setDebugString(debugMsg + "\n" + t);
return response;
Binary file not shown.
@ -1,90 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.TimeUnit;
import com.google.common.collect.Collections2;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.search.common.metrics.SearchTimerStats;
import com.twitter.search.common.util.earlybird.FacetsResultsUtils;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.ThriftTermStatisticsRequest;
import com.twitter.search.earlybird.thrift.ThriftTermStatisticsResults;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
* Merger class to merge termstats EarlybirdResponse objects
public class TermStatisticsResponseMerger extends EarlybirdResponseMerger {
private static final Logger LOG = LoggerFactory.getLogger(TermStatisticsResponseMerger.class);
private static final SearchTimerStats TIMER =
SearchTimerStats.export("merge_term_stats", TimeUnit.NANOSECONDS, false, true);
private static final double SUCCESSFUL_RESPONSE_THRESHOLD = 0.9;
public TermStatisticsResponseMerger(EarlybirdRequestContext requestContext,
List<Future<EarlybirdResponse>> responses,
ResponseAccumulator mode) {
super(requestContext, responses, mode);
protected SearchTimerStats getMergedResponseTimer() {
return TIMER;
protected double getDefaultSuccessResponseThreshold() {
protected EarlybirdResponse internalMerge(EarlybirdResponse termStatsResponse) {
ThriftTermStatisticsRequest termStatisticsRequest =
Collection<EarlybirdResponse> termStatsResults =
earlybirdResponse -> earlybirdResponse.isSetTermStatisticsResults());
ThriftTermStatisticsResults results =
new ThriftTermResultsMerger(
if (results.getTermResults().isEmpty()) {
final String line = "No results returned from any backend for term statistics request: {}";
// If the termstats request was not empty and we got empty results. log it as a warning
// otherwise log is as a debug.
if (termStatisticsRequest.getTermRequestsSize() > 0) {
LOG.warn(line, termStatisticsRequest);
} else {
LOG.debug(line, termStatisticsRequest);
LOG.debug("TermStats call completed successfully: {}", termStatsResponse);
return termStatsResponse;
public boolean shouldEarlyTerminateTierMerge(int totalResultsFromSuccessfulShards,
boolean foundEarlyTermination) {
// To get accurate term stats, must never early terminate
return false;
Binary file not shown.
@ -1,472 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.util.earlybird.FacetsResultsUtils;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.ThriftHistogramSettings;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.search.earlybird.thrift.ThriftTermRequest;
import com.twitter.search.earlybird.thrift.ThriftTermResults;
import com.twitter.search.earlybird.thrift.ThriftTermStatisticsResults;
* Takes multiple successful EarlybirdResponses and merges them.
public class ThriftTermResultsMerger {
private static final Logger LOG = LoggerFactory.getLogger(ThriftTermResultsMerger.class);
private static final SearchCounter BIN_ID_GAP_COUNTER =
private static final SearchCounter MIN_COMPLETE_BIN_ID_ADJUSTED_NULL =
private static final SearchCounter MIN_COMPLETE_BIN_ID_NULL_WITHOUT_BINS =
private static final SearchCounter MIN_COMPLETE_BIN_ID_OUT_OF_RANGE =
private static final SearchCounter RESPONSE_WITHOUT_DRIVING_QUERY_HIT =
private static final ThriftTermRequest GLOBAL_COUNT_REQUEST =
new ThriftTermRequest().setFieldName("").setTerm("");
* Sorted list of the most recent (and contiguous) numBins binIds across all responses.
* Expected to be an empty list if this request did not ask for histograms, or if it
* did ask for histograms for 0 numBins.
private final List<Integer> mostRecentBinIds;
* The first binId in the {@link #mostRecentBinIds} list. This value is not meant to be used in
* case mostRecentBinIds is an empty list.
private final int firstBinId;
* For each unique ThriftTermRequest, stores an array of the total counts for all the binIds
* that we will return, summed up across all earlybird responses.
* The values in each totalCounts array correspond to the binIds in the
* {@link #mostRecentBinIds} list.
* Key: thrift term request.
* Value: array of the total counts summed up across all earlybird responses for the key's
* term request, corresponding to the binIds in {@link #mostRecentBinIds}.
private final Map<ThriftTermRequest, int[]> mergedTermRequestTotalCounts = Maps.newHashMap();
* The set of all unique binIds that we are merging.
private final Map<ThriftTermRequest, ThriftTermResults> termResultsMap = Maps.newHashMap();
private final ThriftHistogramSettings histogramSettings;
* Only relevant for merging responses with histogram settings.
* This will be null either if (1) the request is not asking for histograms at all, or if
* (2) numBins was set to 0 (and no bin can be considered complete).
* If not null, the minCompleteBinId will be computed as the max over all merged responses'
* minCompleteBinId's.
private final Integer minCompleteBinId;
* Create merger with collections of results to merge
public ThriftTermResultsMerger(Collection<EarlybirdResponse> termStatsResults,
ThriftHistogramSettings histogramSettings) {
this.histogramSettings = histogramSettings;
Collection<EarlybirdResponse> filteredTermStatsResults =
this.mostRecentBinIds = findMostRecentBinIds(histogramSettings, filteredTermStatsResults);
this.firstBinId = mostRecentBinIds.isEmpty()
? Integer.MAX_VALUE // Should not be used if mostRecentBinIds is empty.
: mostRecentBinIds.get(0);
List<Integer> minCompleteBinIds =
for (EarlybirdResponse response : filteredTermStatsResults) {
Preconditions.checkState(response.getResponseCode() == EarlybirdResponseCode.SUCCESS,
"Unsuccessful responses should not be given to ThriftTermResultsMerger.");
Preconditions.checkState(response.getTermStatisticsResults() != null,
"Response given to ThriftTermResultsMerger has no termStatisticsResults.");
ThriftTermStatisticsResults termStatisticsResults = response.getTermStatisticsResults();
List<Integer> binIds = termStatisticsResults.getBinIds();
for (Map.Entry<ThriftTermRequest, ThriftTermResults> entry
: termStatisticsResults.getTermResults().entrySet()) {
ThriftTermRequest termRequest = entry.getKey();
ThriftTermResults termResults = entry.getValue();
adjustTotalCount(termResults, binIds);
addTotalCountData(termRequest, termResults);
if (histogramSettings != null) {
addHistogramData(termRequest, termResults, termStatisticsResults.getBinIds());
if (histogramSettings != null) {
addMinCompleteBinId(minCompleteBinIds, response);
minCompleteBinId = minCompleteBinIds.isEmpty() ? null : Collections.max(minCompleteBinIds);
* Take out any earlybird responses that we know did not match anything relevant to the query,
* and may have erroneous binIds.
private Collection<EarlybirdResponse> filterOutEmptyEarlybirdResponses(
Collection<EarlybirdResponse> termStatsResults) {
List<EarlybirdResponse> emptyResponses = Lists.newArrayList();
List<EarlybirdResponse> nonEmptyResponses = Lists.newArrayList();
for (EarlybirdResponse response : termStatsResults) {
// Guard against erroneously merging and returning 0 counts when we actually have data to
// return from other partitions.
// When a query doesn't match anything at all on an earlybird, the binIds that are returned
// do not correspond at all to the actual query, and are just based on the data range on the
// earlybird itself.
// We can identify these responses as (1) being non-early terminated, and (2) having 0
// hits processed.
if (isTermStatResponseEmpty(response)) {
} else {
// If all responses were "empty", we will just use those to merge into a new set of empty
// responses, using the binIds provided.
return nonEmptyResponses.isEmpty() ? emptyResponses : nonEmptyResponses;
private boolean isTermStatResponseEmpty(EarlybirdResponse response) {
return response.isSetSearchResults()
&& (response.getSearchResults().getNumHitsProcessed() == 0
|| drivingQueryHasNoHits(response))
&& response.isSetEarlyTerminationInfo()
&& !response.getEarlyTerminationInfo().isEarlyTerminated();
* If the global count bins are all 0, then we know the driving query has no hits.
* This check is added as a short term solution for SEARCH-5476. This short term fix requires
* the client to set the includeGlobalCounts to kick in.
private boolean drivingQueryHasNoHits(EarlybirdResponse response) {
ThriftTermStatisticsResults termStatisticsResults = response.getTermStatisticsResults();
if (termStatisticsResults == null || termStatisticsResults.getTermResults() == null) {
// If there's no term stats response, be conservative and return false.
return false;
} else {
ThriftTermResults globalCounts =
if (globalCounts == null) {
// We cannot tell if driving query has no hits, be conservative and return false.
return false;
} else {
for (Integer i : globalCounts.getHistogramBins()) {
if (i > 0) {
return false;
return true;
private static List<Integer> findMostRecentBinIds(
ThriftHistogramSettings histogramSettings,
Collection<EarlybirdResponse> filteredTermStatsResults) {
Integer largestFirstBinId = null;
List<Integer> binIdsToUse = null;
if (histogramSettings != null) {
int numBins = histogramSettings.getNumBins();
for (EarlybirdResponse response : filteredTermStatsResults) {
ThriftTermStatisticsResults termStatisticsResults = response.getTermStatisticsResults();
Preconditions.checkState(termStatisticsResults.getBinIds().size() == numBins,
"expected all results to have the same numBins. "
+ "request numBins: %s, response numBins: %s",
numBins, termStatisticsResults.getBinIds().size());
if (termStatisticsResults.getBinIds().size() > 0) {
Integer firstBinId = termStatisticsResults.getBinIds().get(0);
if (largestFirstBinId == null
|| largestFirstBinId.intValue() < firstBinId.intValue()) {
largestFirstBinId = firstBinId;
binIdsToUse = termStatisticsResults.getBinIds();
return binIdsToUse == null
? Collections.<Integer>emptyList()
// Just in case, make a copy of the binIds so that we don't reuse the same list from one
// of the responses we're merging.
: Lists.newArrayList(binIdsToUse);
private void addMinCompleteBinId(List<Integer> minCompleteBinIds,
EarlybirdResponse response) {
ThriftTermStatisticsResults termStatisticsResults = response.getTermStatisticsResults();
if (termStatisticsResults.isSetMinCompleteBinId()) {
// This is the base case. Early terminated or not, this is the proper minCompleteBinId
// that we're told to use for this response.
} else if (termStatisticsResults.getBinIds().size() > 0) {
// This is the case where no bins were complete. For the purposes of merging, we need to
// mark all the binIds in this response as non-complete by marking the "max(binId)+1" as the
// last complete bin.
// When returning the merged response, we still have a guard for the resulting
// minCompleteBinId being outside of the binIds range, and will set the returned
// minCompleteBinId value to null, if this response's binIds end up being used as the most
// recent ones, and we need to signify that none of the bins are complete.
int binSize = termStatisticsResults.getBinIds().size();
Integer maxBinId = termStatisticsResults.getBinIds().get(binSize - 1);
minCompleteBinIds.add(maxBinId + 1);
LOG.debug("Adjusting null minCompleteBinId for response: {}, histogramSettings {}",
response, histogramSettings);
} else {
// This should only happen in the case where numBins is set to 0.
Preconditions.checkState(histogramSettings.getNumBins() == 0,
"Expected numBins set to 0. response: %s", response);
"minCompleteBinIds: %s", minCompleteBinIds);
LOG.debug("Got null minCompleteBinId with no bins for response: {}, histogramSettings {}",
response, histogramSettings);
private void addTotalCountData(ThriftTermRequest request, ThriftTermResults results) {
ThriftTermResults termResults = termResultsMap.get(request);
if (termResults == null) {
termResultsMap.put(request, results);
} else {
termResults.setTotalCount(termResults.getTotalCount() + results.getTotalCount());
if (termResults.isSetMetadata()) {
results.getMetadata(), null));
* Set results.totalCount to the sum of hits in only the bins that will be returned in
* the merged response.
private void adjustTotalCount(ThriftTermResults results, List<Integer> binIds) {
int adjustedTotalCount = 0;
List<Integer> histogramBins = results.getHistogramBins();
if ((binIds != null) && (histogramBins != null)) {
histogramBins.size() == binIds.size(),
"Expected ThriftTermResults to have the same number of histogramBins as binIds set in "
+ " ThriftTermStatisticsResults. ThriftTermResults.histogramBins: %s, "
+ " ThriftTermStatisticsResults.binIds: %s.",
histogramBins, binIds);
for (int i = 0; i < binIds.size(); ++i) {
if (binIds.get(i) >= firstBinId) {
adjustedTotalCount += histogramBins.get(i);
private void addHistogramData(ThriftTermRequest request,
ThriftTermResults results,
List<Integer> binIds) {
int[] requestTotalCounts = mergedTermRequestTotalCounts.get(request);
if (requestTotalCounts == null) {
requestTotalCounts = new int[mostRecentBinIds.size()];
mergedTermRequestTotalCounts.put(request, requestTotalCounts);
// Only consider these results if they fall into the mostRecentBinIds range.
// The list of returned binIds is expected to be both sorted (in ascending order), and
// contiguous, which allows us to use firstBinId to check if it overlaps with the
// mostRecentBinIds range.
if (binIds.size() > 0 && binIds.get(binIds.size() - 1) >= firstBinId) {
int firstBinIndex;
if (binIds.get(0) == firstBinId) {
// This should be the common case when all partitions have the same binIds,
// no need to do a binary search.
firstBinIndex = 0;
} else {
// The firstBinId must be in the binIds range. We can find it using binary search since
// binIds are sorted.
firstBinIndex = Collections.binarySearch(binIds, firstBinId);
Preconditions.checkState(firstBinIndex >= 0,
"Expected to find firstBinId (%s) in the result binIds: %s, "
+ "histogramSettings: %s, termRequest: %s",
firstBinId, binIds, histogramSettings, request);
// Skip binIds that are before the smallest binId that we will use in the merged results.
for (int i = firstBinIndex; i < binIds.size(); i++) {
final Integer currentBinValue = results.getHistogramBins().get(i);
requestTotalCounts[i - firstBinIndex] += currentBinValue.intValue();
* Return a new ThriftTermStatisticsResults with the total counts merged, and if enabled,
* histogram bins merged.
public ThriftTermStatisticsResults merge() {
ThriftTermStatisticsResults results = new ThriftTermStatisticsResults(termResultsMap);
if (histogramSettings != null) {
return results;
* Takes multiple histogram results and merges them so:
* 1) Counts for the same binId (represents the time) and term are summed
* 2) All results are re-indexed to use the most recent bins found from the union of all bins
private void mergeHistogramBins(ThriftTermStatisticsResults mergedResults) {
private void setMinCompleteBinId(ThriftTermStatisticsResults mergedResults) {
if (mostRecentBinIds.isEmpty()) {
Preconditions.checkState(minCompleteBinId == null);
// This is the case where the requested numBins is set to 0. We don't have any binIds,
// and the minCompleteBinId has to be unset.
LOG.debug("Empty binIds returned for mergedResults: {}", mergedResults);
} else {
Integer maxBinId = mostRecentBinIds.get(mostRecentBinIds.size() - 1);
if (minCompleteBinId <= maxBinId) {
} else {
// Leaving the minCompleteBinId unset as it is outside the range of the returned binIds.
LOG.debug("Computed minCompleteBinId: {} is out of maxBinId: {} for mergedResults: {}",
minCompleteBinId, mergedResults);
* Check that the binIds we are using are contiguous. Increment the provided stat if we find
* a gap, as we don't expect to find any.
* See: SEARCH-4362
* @param sortedBinIds most recent numBins sorted binIds.
* @param binIdGapCounter stat to increment if we see a gap in the binId range.
static void checkForBinIdGaps(List<Integer> sortedBinIds, SearchCounter binIdGapCounter) {
for (int i = sortedBinIds.size() - 1; i > 0; i--) {
final Integer currentBinId = sortedBinIds.get(i);
final Integer previousBinId = sortedBinIds.get(i - 1);
if (previousBinId < currentBinId - 1) {
* Returns a view containing only the last N items from the list
private static <E> List<E> takeLastN(List<E> lst, int n) {
Preconditions.checkArgument(n <= lst.size(),
"Attempting to take more elements than the list has. List size: %s, n: %s", lst.size(), n);
return lst.subList(lst.size() - n, lst.size());
private void useMostRecentBinsForEachThriftTermResults() {
for (Map.Entry<ThriftTermRequest, ThriftTermResults> entry : termResultsMap.entrySet()) {
ThriftTermRequest request = entry.getKey();
ThriftTermResults results = entry.getValue();
List<Integer> histogramBins = Lists.newArrayList();
int[] requestTotalCounts = mergedTermRequestTotalCounts.get(request);
for (int totalCount : requestTotalCounts) {
* Merges search stats from several earlybird responses and puts them in
* {@link ThriftSearchResults} structure.
* @param responses earlybird responses to merge the search stats from
* @return merged search stats inside of {@link ThriftSearchResults} structure
public static ThriftSearchResults mergeSearchStats(Collection<EarlybirdResponse> responses) {
int numHitsProcessed = 0;
int numPartitionsEarlyTerminated = 0;
for (EarlybirdResponse response : responses) {
ThriftSearchResults searchResults = response.getSearchResults();
if (searchResults != null) {
numHitsProcessed += searchResults.getNumHitsProcessed();
numPartitionsEarlyTerminated += searchResults.getNumPartitionsEarlyTerminated();
ThriftSearchResults searchResults = new ThriftSearchResults(new ArrayList<>());
return searchResults;
Binary file not shown.
@ -1,97 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.ArrayList;
import java.util.List;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.TierResponse;
public final class TierResponseAccumulator extends ResponseAccumulator {
private static final String TARGET_TYPE_TIER = "tier";
private final List<TierResponse> tierResponses = new ArrayList<>();
// Total number of partitions the request was sent to, across all tiers.
private int totalPartitionsQueriedInAllTiers = 0;
// Among the above partitions, the number of them that returned successful responses.
private int totalSuccessfulPartitionsInAllTiers = 0;
public String getNameForLogging(int responseIndex, int numTotalResponses) {
return TARGET_TYPE_TIER + (numTotalResponses - responseIndex);
public String getNameForEarlybirdResponseCodeStats(int responseIndex, int numTotalResponses) {
return TARGET_TYPE_TIER + (numTotalResponses - responseIndex);
protected boolean isMergingAcrossTiers() {
return true;
public boolean shouldEarlyTerminateMerge(EarlyTerminateTierMergePredicate merger) {
if (foundError()) {
return true;
int numResults = 0;
for (EarlybirdResponse resp : getSuccessResponses()) {
if (resp.isSetSearchResults()) {
numResults += resp.getSearchResults().getResultsSize();
return merger.shouldEarlyTerminateTierMerge(numResults, foundEarlyTermination());
public void handleSkippedResponse(EarlybirdResponseCode responseCode) {
tierResponses.add(new TierResponse()
public void handleErrorResponse(EarlybirdResponse response) {
// TierResponse, which is only returned if merging results from different tiers.
TierResponse tr = new TierResponse();
if (response != null) {
if (response.isSetResponseCode()) {
} else {
totalPartitionsQueriedInAllTiers += response.getNumPartitions();
} else {
public AccumulatedResponses.PartitionCounts getPartitionCounts() {
return new AccumulatedResponses.PartitionCounts(totalPartitionsQueriedInAllTiers,
totalSuccessfulPartitionsInAllTiers, tierResponses);
public void extraSuccessfulResponseHandler(EarlybirdResponse response) {
// Record tier stats.
totalPartitionsQueriedInAllTiers += response.getNumPartitions();
totalSuccessfulPartitionsInAllTiers += response.getNumSuccessfulPartitions();
tierResponses.add(new TierResponse()
Binary file not shown.
@ -1,65 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.List;
import java.util.concurrent.TimeUnit;
import com.google.common.base.Preconditions;
import com.twitter.search.common.metrics.SearchTimerStats;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
import com.twitter.search.earlybird.thrift.ThriftSearchRankingMode;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.search.earlybird_root.collectors.RelevanceMergeCollector;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
* Merger class to merge toptweets EarlybirdResponse objects
public class TopTweetsResponseMerger extends EarlybirdResponseMerger {
private static final double SUCCESSFUL_RESPONSE_THRESHOLD = 0.9;
private static final SearchTimerStats TIMER =
SearchTimerStats.export("merge_top_tweets", TimeUnit.NANOSECONDS, false, true);
public TopTweetsResponseMerger(EarlybirdRequestContext requestContext,
List<Future<EarlybirdResponse>> responses,
ResponseAccumulator mode) {
super(requestContext, responses, mode);
protected SearchTimerStats getMergedResponseTimer() {
return TIMER;
protected double getDefaultSuccessResponseThreshold() {
protected EarlybirdResponse internalMerge(EarlybirdResponse mergedResponse) {
final ThriftSearchQuery searchQuery = requestContext.getRequest().getSearchQuery();
Preconditions.checkState(searchQuery.getRankingMode() == ThriftSearchRankingMode.TOPTWEETS);
int numResultsRequested = computeNumResultsToKeep();
RelevanceMergeCollector collector = new RelevanceMergeCollector(responses.size());
ThriftSearchResults searchResults = collector.getAllSearchResults();
if (numResultsRequested < searchResults.getResults().size()) {
searchResults.setResults(searchResults.getResults().subList(0, numResultsRequested));
return mergedResponse;
Binary file not shown.
@ -1,71 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
* Tracks what situations are encountered when trimming results
class TrimStats {
protected static final TrimStats EMPTY_STATS = new TrimStats();
private int maxIdFilterCount = 0;
private int minIdFilterCount = 0;
private int removedDupsCount = 0;
private int resultsTruncatedFromTailCount = 0;
int getMinIdFilterCount() {
return minIdFilterCount;
int getRemovedDupsCount() {
return removedDupsCount;
int getResultsTruncatedFromTailCount() {
return resultsTruncatedFromTailCount;
void decreaseMaxIdFilterCount() {
void decreaseMinIdFilterCount() {
public void clearMaxIdFilterCount() {
this.maxIdFilterCount = 0;
public void clearMinIdFilterCount() {
this.minIdFilterCount = 0;
void increaseMaxIdFilterCount() {
void increaseMinIdFilterCount() {
void increaseRemovedDupsCount() {
void setResultsTruncatedFromTailCount(int resultsTruncatedFromTailCount) {
this.resultsTruncatedFromTailCount = resultsTruncatedFromTailCount;
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append(", minIdFilterCount=").append(minIdFilterCount);
builder.append(", removedDupsCount=").append(removedDupsCount);
builder.append(", resultsTruncatedFromTailCount=").append(resultsTruncatedFromTailCount);
return builder.toString();
@ -1,15 +0,0 @@
sources = ["*.java"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user