[docx] split commit for file 4600

Signed-off-by: Ari Archer <ari.web.xyz@gmail.com>
This commit is contained in:
Ari Archer 2024-01-23 19:16:30 +02:00
parent f37e76300b
commit 470dc00686
No known key found for this signature in database
GPG Key ID: A50D5B4B599AF8A2
393 changed files with 0 additions and 20103 deletions

View File

@ -1,205 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.Collections;
import java.util.Map;
import java.util.Optional;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Maps;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.decider.SearchDecider;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.util.earlybird.EarlybirdResponseUtil;
import com.twitter.search.earlybird.config.ServingRange;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
import com.twitter.search.queryparser.query.Query;
import com.twitter.search.queryparser.query.QueryParserException;
import com.twitter.search.queryparser.util.IdTimeRanges;
import com.twitter.util.Future;
/**
* A Finagle filter used to filter requests to tiers.
* Parses serialized query on Earlybird request, and extracts since / until / since_id / max_id
* operators. This filter then tests whether the request overlaps with the given tier. If there
* is no overlap, an empty response is returned without actually forwarding the requests to the
* underlying service.
*/
public class EarlybirdTimeRangeFilter extends
SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
private static final Logger LOG = LoggerFactory.getLogger(EarlybirdTimeRangeFilter.class);
private static final EarlybirdResponse ERROR_RESPONSE =
new EarlybirdResponse(EarlybirdResponseCode.PERSISTENT_ERROR, 0)
.setSearchResults(new ThriftSearchResults());
private final ServingRangeProvider servingRangeProvider;
private final Optional<EarlybirdTimeFilterQueryRewriter> queryRewriter;
private static final Map<EarlybirdRequestType, SearchCounter> FAILED_REQUESTS;
static {
final Map<EarlybirdRequestType, SearchCounter> tempMap =
Maps.newEnumMap(EarlybirdRequestType.class);
for (EarlybirdRequestType requestType : EarlybirdRequestType.values()) {
tempMap.put(requestType, SearchCounter.export(
"time_range_filter_" + requestType.getNormalizedName() + "_failed_requests"));
}
FAILED_REQUESTS = Collections.unmodifiableMap(tempMap);
}
public static EarlybirdTimeRangeFilter newTimeRangeFilterWithQueryRewriter(
ServingRangeProvider servingRangeProvider,
SearchDecider decider) {
return new EarlybirdTimeRangeFilter(servingRangeProvider,
Optional.of(new EarlybirdTimeFilterQueryRewriter(servingRangeProvider, decider)));
}
public static EarlybirdTimeRangeFilter newTimeRangeFilterWithoutQueryRewriter(
ServingRangeProvider servingRangeProvider) {
return new EarlybirdTimeRangeFilter(servingRangeProvider, Optional.empty());
}
/**
* Construct a filter that avoids forwarding requests to unrelated tiers
* based on requests' since / until / since_id / max_id.
* @param provider Holds the boundary information.
*/
EarlybirdTimeRangeFilter(
ServingRangeProvider provider,
Optional<EarlybirdTimeFilterQueryRewriter> rewriter) {
this.servingRangeProvider = provider;
this.queryRewriter = rewriter;
}
public ServingRangeProvider getServingRangeProvider() {
return servingRangeProvider;
}
@Override
public Future<EarlybirdResponse> apply(
EarlybirdRequestContext requestContext,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
Query parsedQuery = requestContext.getParsedQuery();
if (parsedQuery != null) {
// Only perform filtering if serialized query is set.
try {
IdTimeRanges queryRanges = IdTimeRanges.fromQuery(parsedQuery);
if (queryRanges == null) {
// No time ranges in query.
return issueServiceRequest(service, requestContext);
}
ServingRange servingRange =
servingRangeProvider.getServingRange(
requestContext, requestContext.useOverrideTierConfig());
if (queryDoesNotOverlapWithServingRange(queryRanges, servingRange)) {
return Future.value(tierSkippedResponse(requestContext.getEarlybirdRequestType(),
servingRange));
} else {
return issueServiceRequest(service, requestContext);
}
} catch (QueryParserException e) {
LOG.warn("Unable to get IdTimeRanges from query: " + parsedQuery.serialize());
// The failure here is not due to a miss-formed query from the client, since we already
// were able to successfully get a parsed Query from the request.
// If we can't determine the time ranges, pass the query along to the tier, and just
// restrict it to the timeranges of the tier.
return issueServiceRequest(service, requestContext);
}
} else {
// There's no serialized query. Just pass through like an identity filter.
return issueServiceRequest(service, requestContext);
}
}
private boolean queryDoesNotOverlapWithServingRange(IdTimeRanges queryRanges,
ServingRange servingRange) {
// As long as a query overlaps with the tier serving range on either side,
// the request is not filtered. I.e. we want to be conservative when doing this filtering,
// because it is just an optimization. We ignore the inclusiveness / exclusiveness of the
// boundaries. If the tier boundary and the query boundry happen to be the same, we do not
// filter the request.
return queryRanges.getSinceIDExclusive().or(0L)
> servingRange.getServingRangeMaxId()
|| queryRanges.getMaxIDInclusive().or(Long.MAX_VALUE)
< servingRange.getServingRangeSinceId()
|| queryRanges.getSinceTimeInclusive().or(0)
> servingRange.getServingRangeUntilTimeSecondsFromEpoch()
|| queryRanges.getUntilTimeExclusive().or(Integer.MAX_VALUE)
< servingRange.getServingRangeSinceTimeSecondsFromEpoch();
}
private Future<EarlybirdResponse> issueServiceRequest(
Service<EarlybirdRequestContext, EarlybirdResponse> service,
EarlybirdRequestContext requestContext) {
try {
EarlybirdRequestContext request = requestContext;
if (queryRewriter.isPresent()) {
request = queryRewriter.get().rewriteRequest(requestContext);
}
return service.apply(request);
} catch (QueryParserException e) {
FAILED_REQUESTS.get(requestContext.getEarlybirdRequestType()).increment();
String msg = "Failed to add time filter operators";
LOG.error(msg, e);
// Note that in this case it is not clear whether the error is the client's fault or our
// fault, so we don't necessarily return a CLIENT_ERROR here.
// Currently this actually returns a PERSISTENT_ERROR.
if (requestContext.getRequest().getDebugMode() > 0) {
return Future.value(
ERROR_RESPONSE.deepCopy().setDebugString(msg + ": " + e.getMessage()));
} else {
return Future.value(ERROR_RESPONSE);
}
}
}
/**
* Creates a tier skipped response, based on the given request type.
*
* For recency, relevance, facets and top tweets requests, this method returns a SUCCESS response
* with no search results and the minSearchedStatusID and maxSearchedStatusID appropriately set.
* For term stats response, it returns a TIER_SKIPPED response, but we need to revisit this.
*
* @param requestType The type of the request.
* @param servingRange The serving range of the tier that we're skipping.
*/
@VisibleForTesting
public static EarlybirdResponse tierSkippedResponse(
EarlybirdRequestType requestType,
ServingRange servingRange) {
String debugMessage =
"Tier skipped because it does not intersect with query time boundaries.";
if (requestType == EarlybirdRequestType.TERM_STATS) {
// If it's a term stats request, return a TIER_SKIPPED response for now.
// But we need to figure out the right thing to do here.
return new EarlybirdResponse(EarlybirdResponseCode.TIER_SKIPPED, 0)
.setDebugString(debugMessage);
} else {
// minIds in ServingRange instances are set to tierLowerBoundary - 1, because the
// since_id operator is exclusive. The max_id operator on the other hand is inclusive,
// so maxIds in ServingRange instances are also set to tierUpperBoundary - 1.
// Here we want both of them to be inclusive, so we need to increment the minId by 1.
return EarlybirdResponseUtil.tierSkippedRootResponse(
servingRange.getServingRangeSinceId() + 1,
servingRange.getServingRangeMaxId(),
debugMessage);
}
}
}

View File

@ -1,167 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.List;
import javax.inject.Inject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.decider.SearchDecider;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.earlybird.thrift.EarlybirdDebugInfo;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.queryparser.query.Query;
import com.twitter.search.queryparser.query.QueryNodeUtils;
import com.twitter.search.queryparser.query.QueryParserException;
import com.twitter.search.queryparser.query.search.SearchOperator;
import com.twitter.search.queryparser.query.search.SearchOperatorConstants;
import com.twitter.search.queryparser.visitors.DropAllProtectedOperatorVisitor;
import com.twitter.search.queryparser.visitors.QueryTreeIndex;
import com.twitter.util.Future;
/**
* Full archive service filter validates requests with a protected operator, appends the
* '[exclude protected]' operator by default, and appends '[filter protected]' operator instead if
* 'getProtectedTweetsOnly' request param is set. A client error response is returned if any of the
* following rules is violated.
* 1. There is at most one 'protected' operator in the query.
* 2. If there is a 'protected' operator, it must be in the query root node.
* 3. The parent node of the 'protected' operator must not be negated and must be a conjunction.
* 4. If there is a positive 'protected' operator, 'followedUserIds' and 'searcherId' request
* params must be set.
*/
public class FullArchiveProtectedOperatorFilter extends
SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
private static final Logger LOG =
LoggerFactory.getLogger(FullArchiveProtectedOperatorFilter.class);
private static final SearchOperator EXCLUDE_PROTECTED_OPERATOR =
new SearchOperator(SearchOperator.Type.EXCLUDE, SearchOperatorConstants.PROTECTED);
private static final SearchOperator FILTER_PROTECTED_OPERATOR =
new SearchOperator(SearchOperator.Type.FILTER, SearchOperatorConstants.PROTECTED);
private static final SearchCounter QUERY_PARSER_FAILURE_COUNT =
SearchCounter.export("protected_operator_filter_query_parser_failure_count");
private final DropAllProtectedOperatorVisitor dropProtectedOperatorVisitor;
private final SearchDecider decider;
@Inject
public FullArchiveProtectedOperatorFilter(
DropAllProtectedOperatorVisitor dropProtectedOperatorVisitor,
SearchDecider decider) {
this.dropProtectedOperatorVisitor = dropProtectedOperatorVisitor;
this.decider = decider;
}
@Override
public Future<EarlybirdResponse> apply(
EarlybirdRequestContext requestContext,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
Query query = requestContext.getParsedQuery();
if (query == null) {
return service.apply(requestContext);
}
QueryTreeIndex queryTreeIndex = QueryTreeIndex.buildFor(query);
List<Query> nodeList = queryTreeIndex.getNodeList();
// try to find a protected operator, returns error response if more than one protected
// operator is detected
SearchOperator protectedOperator = null;
for (Query node : nodeList) {
if (node instanceof SearchOperator) {
SearchOperator searchOp = (SearchOperator) node;
if (SearchOperatorConstants.PROTECTED.equals(searchOp.getOperand())) {
if (protectedOperator == null) {
protectedOperator = searchOp;
} else {
return createErrorResponse("Only one 'protected' operator is expected.");
}
}
}
}
Query processedQuery;
if (protectedOperator == null) {
// no protected operator is detected, append '[exclude protected]' by default
processedQuery = QueryNodeUtils.appendAsConjunction(query, EXCLUDE_PROTECTED_OPERATOR);
} else {
// protected operator must be in the query root node
if (queryTreeIndex.getParentOf(protectedOperator) != query) {
return createErrorResponse("'protected' operator must be in the query root node");
}
// the query node that contains protected operator must not be negated
if (query.mustNotOccur()) {
return createErrorResponse("The query node that contains a 'protected' operator must not"
+ " be negated.");
}
// the query node that contains protected operator must be a conjunction
if (!query.isTypeOf(Query.QueryType.CONJUNCTION)) {
return createErrorResponse("The query node that contains a 'protected' operator must"
+ " be a conjunction.");
}
// check the existence of 'followedUserIds' and 'searcherId' if it is a positive operator
if (isPositive(protectedOperator)) {
if (!validateRequestParam(requestContext.getRequest())) {
return createErrorResponse("'followedUserIds' and 'searcherId' are required "
+ "by positive 'protected' operator.");
}
}
processedQuery = query;
}
// update processedQuery if 'getProtectedTweetsOnly' is set to true, it takes precedence over
// the existing protected operators
if (requestContext.getRequest().isGetProtectedTweetsOnly()) {
if (!validateRequestParam(requestContext.getRequest())) {
return createErrorResponse("'followedUserIds' and 'searcherId' are required "
+ "when 'getProtectedTweetsOnly' is set to true.");
}
try {
processedQuery = processedQuery.accept(dropProtectedOperatorVisitor);
} catch (QueryParserException e) {
// this should not happen since we already have a parsed query
QUERY_PARSER_FAILURE_COUNT.increment();
LOG.warn(
"Failed to drop protected operator for serialized query: " + query.serialize(), e);
}
processedQuery =
QueryNodeUtils.appendAsConjunction(processedQuery, FILTER_PROTECTED_OPERATOR);
}
if (processedQuery == query) {
return service.apply(requestContext);
} else {
EarlybirdRequestContext clonedRequestContext =
EarlybirdRequestContext.copyRequestContext(requestContext, processedQuery);
return service.apply(clonedRequestContext);
}
}
private boolean validateRequestParam(EarlybirdRequest request) {
List<Long> followedUserIds = request.followedUserIds;
Long searcherId = (request.searchQuery != null && request.searchQuery.isSetSearcherId())
? request.searchQuery.getSearcherId() : null;
return followedUserIds != null && !followedUserIds.isEmpty() && searcherId != null;
}
private boolean isPositive(SearchOperator searchOp) {
boolean isNegateExclude = searchOp.mustNotOccur()
&& searchOp.getOperatorType() == SearchOperator.Type.EXCLUDE;
boolean isPositive = !searchOp.mustNotOccur()
&& (searchOp.getOperatorType() == SearchOperator.Type.INCLUDE
|| searchOp.getOperatorType() == SearchOperator.Type.FILTER);
return isNegateExclude || isPositive;
}
private Future<EarlybirdResponse> createErrorResponse(String errorMsg) {
EarlybirdResponse response = new EarlybirdResponse(EarlybirdResponseCode.CLIENT_ERROR, 0);
response.setDebugInfo(new EarlybirdDebugInfo().setHost("full_archive_root"));
response.setDebugString(errorMsg);
return Future.value(response);
}
}

View File

@ -1,64 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.Date;
import java.util.concurrent.TimeUnit;
import com.twitter.search.common.decider.SearchDecider;
import com.twitter.search.common.partitioning.snowflakeparser.SnowflakeIdParser;
import com.twitter.search.common.util.date.DateUtil;
import com.twitter.search.earlybird.config.ServingRange;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
public class FullArchiveServingRangeProvider implements ServingRangeProvider {
public static final Date FULL_ARCHIVE_START_DATE = DateUtil.toDate(2006, 3, 21);
private static final int DEFAULT_SERVING_RANGE_BOUNDARY_HOURS_AGO = 48;
private final SearchDecider decider;
private final String deciderKey;
public FullArchiveServingRangeProvider(
SearchDecider decider, String deciderKey) {
this.decider = decider;
this.deciderKey = deciderKey;
}
@Override
public ServingRange getServingRange(
final EarlybirdRequestContext requestContext, boolean useBoundaryOverride) {
return new ServingRange() {
@Override
public long getServingRangeSinceId() {
// we use 1 instead of 0, because the since_id operator is inclusive in earlybirds.
return 1L;
}
@Override
public long getServingRangeMaxId() {
long servingRangeEndMillis = TimeUnit.HOURS.toMillis(
(decider.featureExists(deciderKey))
? decider.getAvailability(deciderKey)
: DEFAULT_SERVING_RANGE_BOUNDARY_HOURS_AGO);
long boundaryTime = requestContext.getCreatedTimeMillis() - servingRangeEndMillis;
return SnowflakeIdParser.generateValidStatusId(boundaryTime, 0);
}
@Override
public long getServingRangeSinceTimeSecondsFromEpoch() {
return FULL_ARCHIVE_START_DATE.getTime() / 1000;
}
@Override
public long getServingRangeUntilTimeSecondsFromEpoch() {
long servingRangeEndMillis = TimeUnit.HOURS.toMillis(
(decider.featureExists(deciderKey))
? decider.getAvailability(deciderKey)
: DEFAULT_SERVING_RANGE_BOUNDARY_HOURS_AGO);
long boundaryTime = requestContext.getCreatedTimeMillis() - servingRangeEndMillis;
return boundaryTime / 1000;
}
};
}
}

View File

@ -1,66 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import javax.inject.Inject;
import com.google.common.annotations.VisibleForTesting;
import com.twitter.common.util.Clock;
import com.twitter.finagle.Filter;
import com.twitter.finagle.Service;
import com.twitter.search.common.decider.SearchDecider;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.earlybird.common.EarlybirdRequestUtil;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.earlybird_root.common.QueryParsingUtils;
import com.twitter.search.earlybird_root.common.TwitterContextProvider;
import com.twitter.search.queryparser.query.QueryParserException;
import com.twitter.util.Future;
/**
* Creates a new RequestContext from an EarlybirdRequest, and passes the RequestContext down to
* the rest of the filter/service chain.
*/
public class InitializeRequestContextFilter extends
Filter<EarlybirdRequest, EarlybirdResponse, EarlybirdRequestContext, EarlybirdResponse> {
@VisibleForTesting
static final SearchCounter FAILED_QUERY_PARSING =
SearchCounter.export("initialize_request_context_filter_query_parsing_failure");
private final SearchDecider decider;
private final TwitterContextProvider twitterContextProvider;
private final Clock clock;
/**
* The constructor of the filter.
*/
@Inject
public InitializeRequestContextFilter(SearchDecider decider,
TwitterContextProvider twitterContextProvider,
Clock clock) {
this.decider = decider;
this.twitterContextProvider = twitterContextProvider;
this.clock = clock;
}
@Override
public Future<EarlybirdResponse> apply(
EarlybirdRequest request,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
EarlybirdRequestUtil.recordClientClockDiff(request);
EarlybirdRequestContext requestContext;
try {
requestContext = EarlybirdRequestContext.newContext(
request, decider, twitterContextProvider.get(), clock);
} catch (QueryParserException e) {
FAILED_QUERY_PARSING.increment();
return QueryParsingUtils.newClientErrorResponse(request, e);
}
return service.apply(requestContext);
}
}

View File

@ -1,80 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.EnumMap;
import java.util.List;
import java.util.Map;
import com.google.common.annotations.VisibleForTesting;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
import com.twitter.search.earlybird.thrift.ThriftSearchResultExtraMetadata;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
import com.twitter.util.Future;
import com.twitter.util.FutureEventListener;
/**
* Filter tracks the isUserProtected metadata stats returned from Earlybirds.
*/
public class IsUserProtectedMetadataTrackingFilter
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
private static final String COUNTER_PREFIX = "is_user_protected_metadata_count_filter_";
@VisibleForTesting
final Map<EarlybirdRequestType, SearchCounter> totalCounterByRequestTypeMap;
@VisibleForTesting
final Map<EarlybirdRequestType, SearchCounter> isProtectedCounterByRequestTypeMap;
public IsUserProtectedMetadataTrackingFilter() {
this.totalCounterByRequestTypeMap = new EnumMap<>(EarlybirdRequestType.class);
this.isProtectedCounterByRequestTypeMap = new EnumMap<>(EarlybirdRequestType.class);
for (EarlybirdRequestType requestType : EarlybirdRequestType.values()) {
this.totalCounterByRequestTypeMap.put(requestType,
SearchCounter.export(COUNTER_PREFIX + requestType.getNormalizedName() + "_total"));
this.isProtectedCounterByRequestTypeMap.put(requestType,
SearchCounter.export(COUNTER_PREFIX + requestType.getNormalizedName() + "_is_protected"));
}
}
@Override
public Future<EarlybirdResponse> apply(
EarlybirdRequestContext request,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
Future<EarlybirdResponse> response = service.apply(request);
EarlybirdRequestType requestType = request.getEarlybirdRequestType();
response.addEventListener(new FutureEventListener<EarlybirdResponse>() {
@Override
public void onSuccess(EarlybirdResponse response) {
if (!response.isSetSearchResults() || response.getSearchResults().getResults().isEmpty()) {
return;
}
List<ThriftSearchResult> searchResults = response.getSearchResults().getResults();
int totalCount = searchResults.size();
int isUserProtectedCount = 0;
for (ThriftSearchResult searchResult : searchResults) {
if (searchResult.isSetMetadata() && searchResult.getMetadata().isSetExtraMetadata()) {
ThriftSearchResultExtraMetadata extraMetadata =
searchResult.getMetadata().getExtraMetadata();
if (extraMetadata.isIsUserProtected()) {
isUserProtectedCount++;
}
}
}
IsUserProtectedMetadataTrackingFilter.this
.totalCounterByRequestTypeMap.get(requestType).add(totalCount);
IsUserProtectedMetadataTrackingFilter.this
.isProtectedCounterByRequestTypeMap.get(requestType).add(isUserProtectedCount);
}
@Override
public void onFailure(Throwable cause) { }
});
return response;
}
}

View File

@ -1,49 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
import com.twitter.search.earlybird.thrift.ThriftTweetSource;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
import com.twitter.util.Function;
import com.twitter.util.Future;
public class MarkTweetSourceFilter
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
private final SearchCounter searchResultsNotSet;
private final ThriftTweetSource tweetSource;
public MarkTweetSourceFilter(ThriftTweetSource tweetSource) {
this.tweetSource = tweetSource;
searchResultsNotSet = SearchCounter.export(
tweetSource.name().toLowerCase() + "_mark_tweet_source_filter_search_results_not_set");
}
@Override
public Future<EarlybirdResponse> apply(
final EarlybirdRequestContext requestContext,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
return service.apply(requestContext).map(new Function<EarlybirdResponse, EarlybirdResponse>() {
@Override
public EarlybirdResponse apply(EarlybirdResponse response) {
if (response.getResponseCode() == EarlybirdResponseCode.SUCCESS
&& requestContext.getEarlybirdRequestType() != EarlybirdRequestType.TERM_STATS) {
if (!response.isSetSearchResults()) {
searchResultsNotSet.increment();
} else {
for (ThriftSearchResult searchResult : response.getSearchResults().getResults()) {
searchResult.setTweetSource(tweetSource);
}
}
}
return response;
}
}
);
}
}

View File

@ -1,119 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.List;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.metrics.SearchMovingAverage;
import com.twitter.search.earlybird.common.ClientIdUtil;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadata;
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
import com.twitter.util.Future;
import com.twitter.util.FutureEventListener;
/**
* Filter that is tracking the engagement stats returned from Earlybirds.
*/
public class MetadataTrackingFilter extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
private static final String SCORING_SIGNAL_STAT_PREFIX = "scoring_signal_";
private static final String SCORE_STAT_PATTERN = "client_id_score_tracker_for_%s_x100";
@VisibleForTesting
static final SearchMovingAverage SCORING_SIGNAL_FAV_COUNT =
SearchMovingAverage.export(SCORING_SIGNAL_STAT_PREFIX + "fav_count");
@VisibleForTesting
static final SearchMovingAverage SCORING_SIGNAL_REPLY_COUNT =
SearchMovingAverage.export(SCORING_SIGNAL_STAT_PREFIX + "reply_count");
@VisibleForTesting
static final SearchMovingAverage SCORING_SIGNAL_RETWEET_COUNT =
SearchMovingAverage.export(SCORING_SIGNAL_STAT_PREFIX + "retweet_count");
@VisibleForTesting
static final LoadingCache<String, SearchMovingAverage> CLIENT_SCORE_METRICS_LOADING_CACHE =
CacheBuilder.newBuilder().build(new CacheLoader<String, SearchMovingAverage>() {
public SearchMovingAverage load(String clientId) {
return SearchMovingAverage.export(String.format(SCORE_STAT_PATTERN, clientId));
}
});
@Override
public Future<EarlybirdResponse> apply(final EarlybirdRequest request,
Service<EarlybirdRequest, EarlybirdResponse> service) {
Future<EarlybirdResponse> response = service.apply(request);
response.addEventListener(new FutureEventListener<EarlybirdResponse>() {
@Override
public void onSuccess(EarlybirdResponse earlybirdResponse) {
EarlybirdRequestType type = EarlybirdRequestType.of(request);
if (earlybirdResponse.responseCode == EarlybirdResponseCode.SUCCESS
&& type == EarlybirdRequestType.RELEVANCE
&& earlybirdResponse.isSetSearchResults()
&& earlybirdResponse.getSearchResults().isSetResults()) {
List<ThriftSearchResult> searchResults = earlybirdResponse.getSearchResults()
.getResults();
long totalFavoriteAmount = 0;
long totalReplyAmount = 0;
long totalRetweetAmount = 0;
double totalScoreX100 = 0;
for (ThriftSearchResult result : searchResults) {
if (!result.isSetMetadata()) {
continue;
}
ThriftSearchResultMetadata metadata = result.getMetadata();
if (metadata.isSetFavCount()) {
totalFavoriteAmount += metadata.getFavCount();
}
if (metadata.isSetReplyCount()) {
totalReplyAmount += metadata.getReplyCount();
}
if (metadata.isSetRetweetCount()) {
totalRetweetAmount += metadata.getRetweetCount();
}
if (metadata.isSetScore()) {
// Scale up the score by 100 so that scores are at least 1 and visible on viz graph
totalScoreX100 += metadata.getScore() * 100;
}
}
// We only count present engagement counts but report the full size of the search results.
// This means that we consider the missing counts as being 0.
SCORING_SIGNAL_FAV_COUNT.addSamples(totalFavoriteAmount, searchResults.size());
SCORING_SIGNAL_REPLY_COUNT.addSamples(totalReplyAmount, searchResults.size());
SCORING_SIGNAL_RETWEET_COUNT.addSamples(totalRetweetAmount, searchResults.size());
// Export per client id average scores.
String requestClientId = ClientIdUtil.getClientIdFromRequest(request);
String quotaClientId = ClientIdUtil.getQuotaClientId(requestClientId);
CLIENT_SCORE_METRICS_LOADING_CACHE.getUnchecked(quotaClientId)
.addSamples((long) totalScoreX100, searchResults.size());
}
}
@Override
public void onFailure(Throwable cause) { }
});
return response;
}
}

View File

@ -1,45 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.metrics.Percentile;
import com.twitter.search.common.metrics.PercentileUtil;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.util.Future;
public class NamedMultiTermDisjunctionStatsFilter extends
SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
private static final String STAT_FORMAT = "named_disjunction_size_client_%s_key_%s";
// ClientID -> disjunction name -> operand count
private static final ConcurrentMap<String, ConcurrentMap<String, Percentile<Integer>>>
NAMED_MULTI_TERM_DISJUNCTION_IDS_COUNT = new ConcurrentHashMap<>();
@Override
public Future<EarlybirdResponse> apply(EarlybirdRequest request,
Service<EarlybirdRequest, EarlybirdResponse> service) {
if (request.getSearchQuery().isSetNamedDisjunctionMap()) {
for (Map.Entry<String, List<Long>> entry
: request.getSearchQuery().getNamedDisjunctionMap().entrySet()) {
Map<String, Percentile<Integer>> statsForClient =
NAMED_MULTI_TERM_DISJUNCTION_IDS_COUNT.computeIfAbsent(
request.getClientId(), clientId -> new ConcurrentHashMap<>());
Percentile<Integer> stats = statsForClient.computeIfAbsent(entry.getKey(),
keyName -> PercentileUtil.createPercentile(
String.format(STAT_FORMAT, request.getClientId(), keyName)));
stats.record(entry.getValue().size());
}
}
return service.apply(request);
}
}

View File

@ -1,81 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.HashSet;
import java.util.Set;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.util.earlybird.EarlybirdResponseUtil;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.queryparser.query.search.SearchOperatorConstants;
import com.twitter.search.queryparser.visitors.DetectPositiveOperatorVisitor;
/**
* Filter that is tracking the unexpected nullcast results from Earlybirds.
*/
public class NullcastTrackingFilter extends SensitiveResultsTrackingFilter {
public NullcastTrackingFilter() {
super("unexpected nullcast tweets", true);
}
private static final Logger LOG = LoggerFactory.getLogger(NullcastTrackingFilter.class);
@VisibleForTesting
static final SearchCounter BAD_NULLCAST_QUERY_COUNT =
SearchCounter.export("unexpected_nullcast_query_count");
@VisibleForTesting
static final SearchCounter BAD_NULLCAST_RESULT_COUNT =
SearchCounter.export("unexpected_nullcast_result_count");
@Override
protected Logger getLogger() {
return LOG;
}
@Override
protected SearchCounter getSensitiveQueryCounter() {
return BAD_NULLCAST_QUERY_COUNT;
}
@Override
protected SearchCounter getSensitiveResultsCounter() {
return BAD_NULLCAST_RESULT_COUNT;
}
@Override
protected Set<Long> getSensitiveResults(EarlybirdRequestContext requestContext,
EarlybirdResponse earlybirdResponse) throws Exception {
if (!requestContext.getParsedQuery().accept(
new DetectPositiveOperatorVisitor(SearchOperatorConstants.NULLCAST))) {
return EarlybirdResponseUtil.findUnexpectedNullcastStatusIds(
earlybirdResponse.getSearchResults(), requestContext.getRequest());
} else {
return new HashSet<>();
}
}
/**
* Some Earlybird requests are not searches, instead, they are scoring requests.
* These requests supply a list of IDs to be scored.
* It is OK to return nullcast tweet result if the ID is supplied in the request.
* This extracts the scoring request tweet IDs.
*/
@Override
protected Set<Long> getExceptedResults(EarlybirdRequestContext requestContext) {
EarlybirdRequest request = requestContext.getRequest();
if (request == null
|| !request.isSetSearchQuery()
|| request.getSearchQuery().getSearchStatusIdsSize() == 0) {
return ImmutableSet.of();
}
return request.getSearchQuery().getSearchStatusIds();
}
}

View File

@ -1,10 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import javax.inject.Inject;
public class PostCacheRequestTypeCountFilter extends RequestTypeCountFilter {
@Inject
public PostCacheRequestTypeCountFilter() {
super("post_cache");
}
}

View File

@ -1,10 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import javax.inject.Inject;
public class PreCacheRequestTypeCountFilter extends RequestTypeCountFilter {
@Inject
public PreCacheRequestTypeCountFilter() {
super("pre_cache");
}
}

View File

@ -1,114 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import javax.inject.Inject;
import javax.inject.Singleton;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.twitter.common.text.language.LocaleUtil;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.constants.thriftjava.ThriftLanguage;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.util.lang.ThriftLanguageUtil;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
/**
* Export stats for query languages.
*/
@Singleton
public class QueryLangStatFilter
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
public static class Config {
// We put a limit here in case an error in the client are sending us random lang codes.
private int maxNumberOfLangs;
public Config(int maxNumberOfLangs) {
this.maxNumberOfLangs = maxNumberOfLangs;
}
public int getMaxNumberOfLangs() {
return maxNumberOfLangs;
}
}
@VisibleForTesting
protected static final String LANG_STATS_PREFIX = "num_queries_in_lang_";
private final Config config;
private final SearchCounter allCountsForLangsOverMaxNumLang =
SearchCounter.export(LANG_STATS_PREFIX + "overflow");
private final ConcurrentHashMap<String, SearchCounter> langCounters =
new ConcurrentHashMap<>();
@Inject
public QueryLangStatFilter(Config config) {
this.config = config;
}
private SearchCounter getCounter(String lang) {
Preconditions.checkNotNull(lang);
SearchCounter counter = langCounters.get(lang);
if (counter == null) {
if (langCounters.size() >= config.getMaxNumberOfLangs()) {
return allCountsForLangsOverMaxNumLang;
}
synchronized (langCounters) { // This double-checked locking is safe,
// since we're using a ConcurrentHashMap
counter = langCounters.get(lang);
if (counter == null) {
counter = SearchCounter.export(LANG_STATS_PREFIX + lang);
langCounters.put(lang, counter);
}
}
}
return counter;
}
@Override
public Future<EarlybirdResponse> apply(
EarlybirdRequestContext requestContext,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
String lang = null;
ThriftSearchQuery searchQuery = requestContext.getRequest().getSearchQuery();
lang = searchQuery.getQueryLang();
if (lang == null) {
// fallback to ui lang
lang = searchQuery.getUiLang();
}
if (lang == null && searchQuery.isSetUserLangs()) {
// fallback to the user lang with the highest confidence
double maxConfidence = Double.MIN_VALUE;
for (Map.Entry<ThriftLanguage, Double> entry : searchQuery.getUserLangs().entrySet()) {
if (entry.getValue() > maxConfidence) {
lang = ThriftLanguageUtil.getLanguageCodeOf(entry.getKey());
maxConfidence = entry.getValue();
}
}
}
if (lang == null) {
lang = LocaleUtil.UNDETERMINED_LANGUAGE;
}
getCounter(lang).increment();
return service.apply(requestContext);
}
}

View File

@ -1,194 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.EnumSet;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import scala.runtime.BoxedUnit;
import com.google.common.collect.ImmutableMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.metrics.SearchTimer;
import com.twitter.search.common.metrics.SearchTimerStats;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.queryparser.query.Query;
import com.twitter.search.queryparser.query.QueryParserException;
import com.twitter.search.queryparser.query.annotation.Annotation;
import com.twitter.search.queryparser.query.search.SearchOperator;
import com.twitter.search.queryparser.query.search.SearchOperatorConstants;
import com.twitter.search.queryparser.visitors.DetectAnnotationVisitor;
import com.twitter.search.queryparser.visitors.DetectVisitor;
import com.twitter.util.Future;
/**
* For a given query, increments counters if that query has a number of search operators or
* annotations applied to it. Used to detect unusual traffic patterns.
*/
public class QueryOperatorStatFilter
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
private static final Logger LOG = LoggerFactory.getLogger(QueryOperatorStatFilter.class);
private final SearchCounter numQueryOperatorDetectionErrors =
SearchCounter.export("query_operator_detection_errors");
private final SearchCounter numQueryOperatorConsideredRequests =
SearchCounter.export("query_operator_requests_considered");
private final ImmutableMap<String, SearchTimerStats> filterOperatorStats;
// Keeps track of the number of queries with a filter applied, whose type we don't care about.
private final SearchCounter numUnknownFilterOperatorRequests =
SearchCounter.export("query_operator_filter_unknown_requests");
private final ImmutableMap<String, SearchTimerStats> includeOperatorStats;
// Keeps track of the number of queries with an include operator applied, whose type we don't
// know about.
private final SearchCounter numUnknownIncludeOperatorRequests =
SearchCounter.export("query_operator_include_unknown_requests");
private final ImmutableMap<SearchOperator.Type, SearchTimerStats> operatorTypeStats;
private final SearchCounter numVariantRequests =
SearchCounter.export("query_operator_variant_requests");
/**
* Construct this QueryOperatorStatFilter by getting the complete set of possible filters a query
* might have and associating each with a counter.
*/
public QueryOperatorStatFilter() {
ImmutableMap.Builder<String, SearchTimerStats> filterBuilder = new ImmutableMap.Builder<>();
for (String operand : SearchOperatorConstants.VALID_FILTER_OPERANDS) {
filterBuilder.put(
operand,
SearchTimerStats.export(
"query_operator_filter_" + operand + "_requests",
TimeUnit.MILLISECONDS,
false,
true));
}
filterOperatorStats = filterBuilder.build();
ImmutableMap.Builder<String, SearchTimerStats> includeBuilder = new ImmutableMap.Builder<>();
for (String operand : SearchOperatorConstants.VALID_INCLUDE_OPERANDS) {
includeBuilder.put(
operand,
SearchTimerStats.export(
"query_operator_include_" + operand + "_requests",
TimeUnit.MILLISECONDS,
false,
true));
}
includeOperatorStats = includeBuilder.build();
ImmutableMap.Builder<SearchOperator.Type, SearchTimerStats> operatorBuilder =
new ImmutableMap.Builder<>();
for (SearchOperator.Type operatorType : SearchOperator.Type.values()) {
operatorBuilder.put(
operatorType,
SearchTimerStats.export(
"query_operator_" + operatorType.name().toLowerCase() + "_requests",
TimeUnit.MILLISECONDS,
false,
true
));
}
operatorTypeStats = operatorBuilder.build();
}
@Override
public Future<EarlybirdResponse> apply(
EarlybirdRequestContext requestContext,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
numQueryOperatorConsideredRequests.increment();
Query parsedQuery = requestContext.getParsedQuery();
if (parsedQuery == null) {
return service.apply(requestContext);
}
SearchTimer timer = new SearchTimer();
timer.start();
return service.apply(requestContext).ensure(() -> {
timer.stop();
try {
updateTimersForOperatorsAndOperands(parsedQuery, timer);
updateCountersIfVariantAnnotation(parsedQuery);
} catch (QueryParserException e) {
LOG.warn("Unable to test if query has operators defined", e);
numQueryOperatorDetectionErrors.increment();
}
return BoxedUnit.UNIT;
});
}
/**
* Tracks request stats for operators and operands.
*
* @param parsedQuery the query to check.
*/
private void updateTimersForOperatorsAndOperands(Query parsedQuery, SearchTimer timer)
throws QueryParserException {
final DetectVisitor detectVisitor = new DetectVisitor(false, SearchOperator.Type.values());
parsedQuery.accept(detectVisitor);
Set<SearchOperator.Type> detectedOperatorTypes = EnumSet.noneOf(SearchOperator.Type.class);
for (Query query : detectVisitor.getDetectedQueries()) {
// This detectVisitor only matches on SearchOperators.
SearchOperator operator = (SearchOperator) query;
SearchOperator.Type operatorType = operator.getOperatorType();
detectedOperatorTypes.add(operatorType);
if (operatorType == SearchOperator.Type.INCLUDE) {
updateOperandStats(
operator,
includeOperatorStats,
timer,
numUnknownIncludeOperatorRequests);
}
if (operatorType == SearchOperator.Type.FILTER) {
updateOperandStats(
operator,
filterOperatorStats,
timer,
numUnknownFilterOperatorRequests);
}
}
for (SearchOperator.Type type : detectedOperatorTypes) {
operatorTypeStats.get(type).stoppedTimerIncrement(timer);
}
}
private void updateOperandStats(
SearchOperator operator,
ImmutableMap<String, SearchTimerStats> operandRequestStats,
SearchTimer timer,
SearchCounter unknownOperandStat) {
String operand = operator.getOperand();
SearchTimerStats stats = operandRequestStats.get(operand);
if (stats != null) {
stats.stoppedTimerIncrement(timer);
} else {
unknownOperandStat.increment();
}
}
private void updateCountersIfVariantAnnotation(Query parsedQuery) throws QueryParserException {
DetectAnnotationVisitor visitor = new DetectAnnotationVisitor(Annotation.Type.VARIANT);
if (parsedQuery.accept(visitor)) {
numVariantRequests.increment();
}
}
}

View File

@ -1,92 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.concurrent.TimeUnit;
import javax.inject.Inject;
import com.twitter.common_internal.text.version.PenguinVersion;
import com.twitter.common_internal.text.version.PenguinVersionConfig;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.finagle.tracing.Trace;
import com.twitter.finagle.tracing.Tracing;
import com.twitter.search.common.metrics.SearchRateCounter;
import com.twitter.search.common.metrics.SearchTimer;
import com.twitter.search.common.metrics.SearchTimerStats;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.earlybird_root.common.QueryParsingUtils;
import com.twitter.search.queryparser.parser.SerializedQueryParser;
import com.twitter.search.queryparser.parser.SerializedQueryParser.TokenizationOption;
import com.twitter.search.queryparser.query.Query;
import com.twitter.search.queryparser.query.QueryParserException;
import com.twitter.util.Duration;
import com.twitter.util.Future;
public class QueryTokenizerFilter extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
private static final String PREFIX = "query_tokenizer_";
private static final SearchRateCounter SUCCESS_COUNTER =
SearchRateCounter.export(PREFIX + "success");
private static final SearchRateCounter FAILURE_COUNTER =
SearchRateCounter.export(PREFIX + "error");
private static final SearchRateCounter SKIPPED_COUNTER =
SearchRateCounter.export(PREFIX + "skipped");
private static final SearchTimerStats QUERY_TOKENIZER_TIME =
SearchTimerStats.export(PREFIX + "time", TimeUnit.MILLISECONDS, false);
private final TokenizationOption tokenizationOption;
@Inject
public QueryTokenizerFilter(PenguinVersionConfig penguinversions) {
PenguinVersion[] supportedVersions = penguinversions
.getSupportedVersions().toArray(new PenguinVersion[0]);
tokenizationOption = new TokenizationOption(true, supportedVersions);
}
@Override
public Future<EarlybirdResponse> apply(
EarlybirdRequestContext requestContext,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
if (!requestContext.getRequest().isRetokenizeSerializedQuery()
|| !requestContext.getRequest().isSetSearchQuery()
|| !requestContext.getRequest().getSearchQuery().isSetSerializedQuery()) {
SKIPPED_COUNTER.increment();
return service.apply(requestContext);
}
SearchTimer timer = QUERY_TOKENIZER_TIME.startNewTimer();
try {
String serializedQuery = requestContext.getRequest().getSearchQuery().getSerializedQuery();
Query parsedQuery = reparseQuery(serializedQuery);
SUCCESS_COUNTER.increment();
return service.apply(EarlybirdRequestContext.copyRequestContext(requestContext, parsedQuery));
} catch (QueryParserException e) {
FAILURE_COUNTER.increment();
return QueryParsingUtils.newClientErrorResponse(requestContext.getRequest(), e);
} finally {
long elapsed = timer.stop();
QUERY_TOKENIZER_TIME.timerIncrement(elapsed);
Tracing trace = Trace.apply();
if (trace.isActivelyTracing()) {
trace.record(PREFIX + "time", Duration.fromMilliseconds(elapsed));
}
}
}
public Query reparseQuery(String serializedQuery) throws QueryParserException {
SerializedQueryParser parser = new SerializedQueryParser(tokenizationOption);
return parser.parse(serializedQuery);
}
/**
* Initializing the query parser can take many seconds. We initialize it at warmup so that
* requests don't time out after we join the serverset. SEARCH-28801
*/
public void performExpensiveInitialization() throws QueryParserException {
SerializedQueryParser queryParser = new SerializedQueryParser(tokenizationOption);
// The Korean query parser takes a few seconds on it's own to initialize.
String koreanQuery = "스포츠";
queryParser.parse(koreanQuery);
}
}

View File

@ -1,60 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.concurrent.TimeUnit;
import com.twitter.search.common.decider.SearchDecider;
import com.twitter.search.common.partitioning.snowflakeparser.SnowflakeIdParser;
import com.twitter.search.earlybird.config.ServingRange;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
public class RealtimeServingRangeProvider implements ServingRangeProvider {
private static final int DEFAULT_SERVING_RANGE_BOUNDARY_HOURS_AGO = 240;
private final SearchDecider decider;
private final String deciderKey;
public RealtimeServingRangeProvider(SearchDecider decider, String deciderKey) {
this.decider = decider;
this.deciderKey = deciderKey;
}
@Override
public ServingRange getServingRange(
final EarlybirdRequestContext requestContext, boolean useBoundaryOverride) {
return new ServingRange() {
@Override
public long getServingRangeSinceId() {
long servingRangeStartMillis = TimeUnit.HOURS.toMillis(
(decider.featureExists(deciderKey))
? decider.getAvailability(deciderKey)
: DEFAULT_SERVING_RANGE_BOUNDARY_HOURS_AGO);
long boundaryTime = requestContext.getCreatedTimeMillis() - servingRangeStartMillis;
return SnowflakeIdParser.generateValidStatusId(boundaryTime, 0);
}
@Override
public long getServingRangeMaxId() {
return SnowflakeIdParser.generateValidStatusId(
requestContext.getCreatedTimeMillis(), 0);
}
@Override
public long getServingRangeSinceTimeSecondsFromEpoch() {
long servingRangeStartMillis = TimeUnit.HOURS.toMillis(
(decider.featureExists(deciderKey))
? decider.getAvailability(deciderKey)
: DEFAULT_SERVING_RANGE_BOUNDARY_HOURS_AGO);
long boundaryTime = requestContext.getCreatedTimeMillis() - servingRangeStartMillis;
return boundaryTime / 1000;
}
@Override
public long getServingRangeUntilTimeSecondsFromEpoch() {
return requestContext.getCreatedTimeMillis() / 1000;
}
};
}
}

View File

@ -1,94 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.HashMap;
import java.util.Map;
import javax.annotation.Nullable;
import javax.inject.Inject;
import com.google.common.annotations.VisibleForTesting;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.constants.thriftjava.ThriftQuerySource;
import com.twitter.search.common.decider.SearchDecider;
import com.twitter.search.common.metrics.SearchRateCounter;
import com.twitter.search.common.schema.earlybird.EarlybirdCluster;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.util.Future;
/**
* Rejects requests based on the query source of the request. Intended to be used at super-root
* or archive-root. If used to reject client request at super-root, the client will get a response
* with empty results and a REQUEST_BLOCKED_ERROR status code. If used at archive-root the client
* will get a response which might contain some results from realtime and protected and the status
* code of the response will depend on how super-root combines responses from the three downstream
* roots.
*/
public class RejectRequestsByQuerySourceFilter extends
SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
@VisibleForTesting
protected static final String NUM_REJECTED_REQUESTS_STAT_NAME_PATTERN =
"num_root_%s_rejected_requests_with_query_source_%s";
@VisibleForTesting
protected static final String REJECT_REQUESTS_DECIDER_KEY_PATTERN =
"root_%s_reject_requests_with_query_source_%s";
private final Map<ThriftQuerySource, SearchRateCounter> rejectedRequestsCounterPerQuerySource =
new HashMap<>();
private final Map<ThriftQuerySource, String> rejectRequestsDeciderKeyPerQuerySource =
new HashMap<>();
private final SearchDecider searchDecider;
@Inject
public RejectRequestsByQuerySourceFilter(
@Nullable EarlybirdCluster cluster,
SearchDecider searchDecider) {
this.searchDecider = searchDecider;
String clusterName = cluster != null
? cluster.getNameForStats()
: EarlybirdCluster.SUPERROOT.getNameForStats();
for (ThriftQuerySource querySource : ThriftQuerySource.values()) {
String querySourceName = querySource.name().toLowerCase();
rejectedRequestsCounterPerQuerySource.put(querySource,
SearchRateCounter.export(
String.format(
NUM_REJECTED_REQUESTS_STAT_NAME_PATTERN, clusterName, querySourceName)));
rejectRequestsDeciderKeyPerQuerySource.put(querySource,
String.format(
REJECT_REQUESTS_DECIDER_KEY_PATTERN, clusterName, querySourceName));
}
}
@Override
public Future<EarlybirdResponse> apply(EarlybirdRequest request,
Service<EarlybirdRequest, EarlybirdResponse> service) {
ThriftQuerySource querySource = request.isSetQuerySource()
? request.getQuerySource()
: ThriftQuerySource.UNKNOWN;
String deciderKey = rejectRequestsDeciderKeyPerQuerySource.get(querySource);
if (searchDecider.isAvailable(deciderKey)) {
rejectedRequestsCounterPerQuerySource.get(querySource).increment();
return Future.value(getRejectedRequestResponse(querySource, deciderKey));
}
return service.apply(request);
}
private static EarlybirdResponse getRejectedRequestResponse(
ThriftQuerySource querySource, String deciderKey) {
return new EarlybirdResponse(EarlybirdResponseCode.REQUEST_BLOCKED_ERROR, 0)
.setSearchResults(new ThriftSearchResults())
.setDebugString(String.format(
"Request with query source %s is blocked by decider %s", querySource, deciderKey));
}
}

View File

@ -1,33 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.concurrent.TimeUnit;
import com.twitter.finagle.Filter;
import com.twitter.finagle.Service;
import com.twitter.search.common.metrics.SearchTimerStats;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
/**
* A filter for transforming a RequestContext to an EarlybirdRequest.
*/
public class RequestContextToEarlybirdRequestFilter extends
Filter<EarlybirdRequestContext, EarlybirdResponse, EarlybirdRequest, EarlybirdResponse> {
private static final SearchTimerStats REQUEST_CONTEXT_TRIP_TIME =
SearchTimerStats.export("request_context_trip_time", TimeUnit.MILLISECONDS, false,
true);
@Override
public Future<EarlybirdResponse> apply(
EarlybirdRequestContext requestContext,
Service<EarlybirdRequest, EarlybirdResponse> service) {
long tripTime = System.currentTimeMillis() - requestContext.getCreatedTimeMillis();
REQUEST_CONTEXT_TRIP_TIME.timerIncrement(tripTime);
return service.apply(requestContext.getRequest());
}
}

View File

@ -1,185 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import javax.inject.Inject;
import scala.runtime.BoxedUnit;
import com.twitter.common.util.Clock;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.metrics.Percentile;
import com.twitter.search.common.metrics.PercentileUtil;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.query.thriftjava.CollectorParams;
import com.twitter.search.common.query.thriftjava.CollectorTerminationParams;
import com.twitter.search.earlybird.common.ClientIdUtil;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.snowflake.id.SnowflakeId;
import com.twitter.util.Function;
import com.twitter.util.Future;
public class RequestResultStatsFilter
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
private final Clock clock;
private final RequestResultStats stats;
static class RequestResultStats {
private static final String PREFIX = "request_result_properties_";
private final SearchCounter resultsRequestedCount;
private final SearchCounter resultsReturnedCount;
private final SearchCounter maxHitsToProcessCount;
private final SearchCounter hitsProcessedCount;
private final SearchCounter docsProcessedCount;
private final SearchCounter timeoutMsCount;
private Map<String, Percentile<Integer>> requestedNumResultsPercentileByClientId;
private Map<String, Percentile<Integer>> returnedNumResultsPercentileByClientId;
private Map<String, Percentile<Long>> oldestResultPercentileByClientId;
RequestResultStats() {
// Request properties
resultsRequestedCount = SearchCounter.export(PREFIX + "results_requested_cnt");
maxHitsToProcessCount = SearchCounter.export(PREFIX + "max_hits_to_process_cnt");
timeoutMsCount = SearchCounter.export(PREFIX + "timeout_ms_cnt");
requestedNumResultsPercentileByClientId = new ConcurrentHashMap<>();
// Result properties
resultsReturnedCount = SearchCounter.export(PREFIX + "results_returned_cnt");
hitsProcessedCount = SearchCounter.export(PREFIX + "hits_processed_cnt");
docsProcessedCount = SearchCounter.export(PREFIX + "docs_processed_cnt");
returnedNumResultsPercentileByClientId = new ConcurrentHashMap<>();
oldestResultPercentileByClientId = new ConcurrentHashMap<>();
}
SearchCounter getResultsRequestedCount() {
return resultsRequestedCount;
}
SearchCounter getResultsReturnedCount() {
return resultsReturnedCount;
}
SearchCounter getMaxHitsToProcessCount() {
return maxHitsToProcessCount;
}
SearchCounter getHitsProcessedCount() {
return hitsProcessedCount;
}
SearchCounter getDocsProcessedCount() {
return docsProcessedCount;
}
SearchCounter getTimeoutMsCount() {
return timeoutMsCount;
}
Percentile<Long> getOldestResultPercentile(String clientId) {
return oldestResultPercentileByClientId.computeIfAbsent(clientId,
key -> PercentileUtil.createPercentile(statName(clientId, "oldest_result_age_seconds")));
}
Percentile<Integer> getRequestedNumResultsPercentile(String clientId) {
return requestedNumResultsPercentileByClientId.computeIfAbsent(clientId,
key -> PercentileUtil.createPercentile(statName(clientId, "requested_num_results")));
}
Percentile<Integer> getReturnedNumResultsPercentile(String clientId) {
return returnedNumResultsPercentileByClientId.computeIfAbsent(clientId,
key -> PercentileUtil.createPercentile(statName(clientId, "returned_num_results")));
}
private String statName(String clientId, String suffix) {
return String.format("%s%s_%s", PREFIX, ClientIdUtil.formatClientId(clientId), suffix);
}
}
@Inject
RequestResultStatsFilter(Clock clock, RequestResultStats stats) {
this.clock = clock;
this.stats = stats;
}
private void updateRequestStats(EarlybirdRequest request) {
ThriftSearchQuery searchQuery = request.getSearchQuery();
CollectorParams collectorParams = searchQuery.getCollectorParams();
if (collectorParams != null) {
stats.getResultsRequestedCount().add(collectorParams.numResultsToReturn);
if (request.isSetClientId()) {
stats.getRequestedNumResultsPercentile(request.getClientId())
.record(collectorParams.numResultsToReturn);
}
CollectorTerminationParams terminationParams = collectorParams.getTerminationParams();
if (terminationParams != null) {
if (terminationParams.isSetMaxHitsToProcess()) {
stats.getMaxHitsToProcessCount().add(terminationParams.maxHitsToProcess);
}
if (terminationParams.isSetTimeoutMs()) {
stats.getTimeoutMsCount().add(terminationParams.timeoutMs);
}
}
} else {
if (searchQuery.isSetNumResults()) {
stats.getResultsRequestedCount().add(searchQuery.numResults);
if (request.isSetClientId()) {
stats.getRequestedNumResultsPercentile(request.getClientId())
.record(searchQuery.numResults);
}
}
if (searchQuery.isSetMaxHitsToProcess()) {
stats.getMaxHitsToProcessCount().add(searchQuery.maxHitsToProcess);
}
if (request.isSetTimeoutMs()) {
stats.getTimeoutMsCount().add(request.timeoutMs);
}
}
}
private void updateResultsStats(String clientId, ThriftSearchResults results) {
stats.getResultsReturnedCount().add(results.getResultsSize());
if (results.isSetNumHitsProcessed()) {
stats.getHitsProcessedCount().add(results.numHitsProcessed);
}
if (clientId != null) {
if (results.getResultsSize() > 0) {
List<ThriftSearchResult> resultsList = results.getResults();
long lastId = resultsList.get(resultsList.size() - 1).getId();
long tweetTime = SnowflakeId.timeFromId(lastId).inLongSeconds();
long tweetAge = (clock.nowMillis() / 1000) - tweetTime;
stats.getOldestResultPercentile(clientId).record(tweetAge);
}
stats.getReturnedNumResultsPercentile(clientId).record(results.getResultsSize());
}
}
@Override
public Future<EarlybirdResponse> apply(
EarlybirdRequest request,
Service<EarlybirdRequest, EarlybirdResponse> service) {
updateRequestStats(request);
return service.apply(request).onSuccess(
new Function<EarlybirdResponse, BoxedUnit>() {
@Override
public BoxedUnit apply(EarlybirdResponse response) {
if (response.isSetSearchResults()) {
updateResultsStats(request.getClientId(), response.searchResults);
}
return BoxedUnit.UNIT;
}
});
}
}

View File

@ -1,79 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.concurrent.TimeUnit;
import javax.inject.Inject;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.root.RequestSuccessStats;
import com.twitter.search.common.util.FinagleUtil;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.util.Future;
import com.twitter.util.FutureEventListener;
import static com.twitter.search.common.util.earlybird.EarlybirdResponseUtil.responseConsideredFailed;
/**
* Records cancellations, timeouts, and failures for requests that do not go through
* ScatterGatherService (which also updates these stats, but for different requests).
*/
public class RequestSuccessStatsFilter
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
private final RequestSuccessStats stats;
@Inject
RequestSuccessStatsFilter(RequestSuccessStats stats) {
this.stats = stats;
}
@Override
public Future<EarlybirdResponse> apply(
EarlybirdRequest request,
Service<EarlybirdRequest, EarlybirdResponse> service) {
final long startTime = System.nanoTime();
return service.apply(request).addEventListener(
new FutureEventListener<EarlybirdResponse>() {
@Override
public void onSuccess(EarlybirdResponse response) {
boolean success = true;
if (response.getResponseCode() == EarlybirdResponseCode.CLIENT_CANCEL_ERROR) {
success = false;
stats.getCancelledRequestCount().increment();
} else if (response.getResponseCode() == EarlybirdResponseCode.SERVER_TIMEOUT_ERROR) {
success = false;
stats.getTimedoutRequestCount().increment();
} else if (responseConsideredFailed(response.getResponseCode())) {
success = false;
stats.getErroredRequestCount().increment();
}
long latencyNanos = System.nanoTime() - startTime;
stats.getRequestLatencyStats().requestComplete(
TimeUnit.NANOSECONDS.toMillis(latencyNanos), 0, success);
}
@Override
public void onFailure(Throwable cause) {
long latencyNanos = System.nanoTime() - startTime;
stats.getRequestLatencyStats().requestComplete(
TimeUnit.NANOSECONDS.toMillis(latencyNanos), 0, false);
if (FinagleUtil.isCancelException(cause)) {
stats.getCancelledRequestCount().increment();
} else if (FinagleUtil.isTimeoutException(cause)) {
stats.getTimedoutRequestCount().increment();
} else {
stats.getErroredRequestCount().increment();
}
}
});
}
}

View File

@ -1,105 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import com.google.common.base.Preconditions;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.ImmutableMap;
import com.twitter.common.util.Clock;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.clientstats.RequestCounters;
import com.twitter.search.common.clientstats.RequestCountersEventListener;
import com.twitter.search.common.util.FinagleUtil;
import com.twitter.search.earlybird.common.ClientIdUtil;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
import com.twitter.util.Future;
public class RequestTypeCountFilter
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
private final ImmutableMap<EarlybirdRequestType, RequestCounters> typeCounters;
private final RequestCounters allRequestTypesCounter;
private final ImmutableMap<EarlybirdRequestType, LoadingCache<String, RequestCounters>>
perTypePerClientCounters;
/**
* Constructs the filter.
*/
public RequestTypeCountFilter(final String statSuffix) {
ImmutableMap.Builder<EarlybirdRequestType, RequestCounters> perTypeBuilder =
ImmutableMap.builder();
for (EarlybirdRequestType type : EarlybirdRequestType.values()) {
perTypeBuilder.put(type, new RequestCounters(
"request_type_count_filter_" + type.getNormalizedName() + "_" + statSuffix));
}
typeCounters = perTypeBuilder.build();
allRequestTypesCounter =
new RequestCounters("request_type_count_filter_all_" + statSuffix, true);
ImmutableMap.Builder<EarlybirdRequestType, LoadingCache<String, RequestCounters>>
perTypePerClientBuilder = ImmutableMap.builder();
// No point in setting any kind of expiration policy for the cache, since the stats will
// continue to be exported, so the objects will not be GCed anyway.
CacheBuilder<Object, Object> cacheBuilder = CacheBuilder.newBuilder();
for (final EarlybirdRequestType requestType : EarlybirdRequestType.values()) {
CacheLoader<String, RequestCounters> cacheLoader =
new CacheLoader<String, RequestCounters>() {
@Override
public RequestCounters load(String clientId) {
return new RequestCounters("request_type_count_filter_for_" + clientId + "_"
+ requestType.getNormalizedName() + "_" + statSuffix);
}
};
perTypePerClientBuilder.put(requestType, cacheBuilder.build(cacheLoader));
}
perTypePerClientCounters = perTypePerClientBuilder.build();
}
@Override
public Future<EarlybirdResponse> apply(
EarlybirdRequestContext requestContext,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
EarlybirdRequestType requestType = requestContext.getEarlybirdRequestType();
RequestCounters requestCounters = typeCounters.get(requestType);
Preconditions.checkNotNull(requestCounters);
// Update the per-type and "all" counters.
RequestCountersEventListener<EarlybirdResponse> requestCountersEventListener =
new RequestCountersEventListener<>(
requestCounters, Clock.SYSTEM_CLOCK, EarlybirdSuccessfulResponseHandler.INSTANCE);
RequestCountersEventListener<EarlybirdResponse> allRequestTypesEventListener =
new RequestCountersEventListener<>(
allRequestTypesCounter, Clock.SYSTEM_CLOCK,
EarlybirdSuccessfulResponseHandler.INSTANCE);
RequestCountersEventListener<EarlybirdResponse> perTypePerClientEventListener =
updatePerTypePerClientCountersListener(requestContext);
return service.apply(requestContext)
.addEventListener(requestCountersEventListener)
.addEventListener(allRequestTypesEventListener)
.addEventListener(perTypePerClientEventListener);
}
private RequestCountersEventListener<EarlybirdResponse> updatePerTypePerClientCountersListener(
EarlybirdRequestContext earlybirdRequestContext) {
EarlybirdRequestType requestType = earlybirdRequestContext.getEarlybirdRequestType();
LoadingCache<String, RequestCounters> perClientCounters =
perTypePerClientCounters.get(requestType);
Preconditions.checkNotNull(perClientCounters);
String clientId = ClientIdUtil.formatFinagleClientIdAndClientId(
FinagleUtil.getFinagleClientName(),
ClientIdUtil.getClientIdFromRequest(earlybirdRequestContext.getRequest()));
RequestCounters clientCounters = perClientCounters.getUnchecked(clientId);
Preconditions.checkNotNull(clientCounters);
return new RequestCountersEventListener<>(
clientCounters, Clock.SYSTEM_CLOCK, EarlybirdSuccessfulResponseHandler.INSTANCE);
}
}

View File

@ -1,50 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.Map;
import com.google.common.collect.Maps;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.util.Future;
import com.twitter.util.FutureEventListener;
public class ResponseCodeStatFilter
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
private final Map<EarlybirdResponseCode, SearchCounter> responseCodeCounters;
/**
* Create ResponseCodeStatFilter
*/
public ResponseCodeStatFilter() {
responseCodeCounters = Maps.newEnumMap(EarlybirdResponseCode.class);
for (EarlybirdResponseCode code : EarlybirdResponseCode.values()) {
SearchCounter stat = SearchCounter.export("response_code_" + code.name().toLowerCase());
responseCodeCounters.put(code, stat);
}
}
@Override
public Future<EarlybirdResponse> apply(
final EarlybirdRequest request,
final Service<EarlybirdRequest, EarlybirdResponse> service) {
return service.apply(request).addEventListener(
new FutureEventListener<EarlybirdResponse>() {
@Override
public void onSuccess(final EarlybirdResponse response) {
responseCodeCounters.get(response.getResponseCode()).increment();
}
@Override
public void onFailure(final Throwable cause) { }
});
}
}

View File

@ -1,114 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.NavigableMap;
import javax.inject.Inject;
import javax.inject.Singleton;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableSortedMap;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.metrics.SearchCustomGauge;
import com.twitter.search.earlybird.config.TierInfo;
import com.twitter.search.earlybird.config.TierInfoSource;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.snowflake.id.SnowflakeId;
import com.twitter.util.Future;
import com.twitter.util.FutureEventListener;
/**
* A filter to count the tier to which the oldest tweet in the results belong.
*/
@Singleton
public class ResultTierCountFilter
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
private static final String COUNTER_PREFIX = "result_tier_count";
private final long firstTweetTimeSinceEpochSec;
private final NavigableMap<Long, SearchCounter> tierBuckets;
private final SearchCounter allCounter = SearchCounter.export(COUNTER_PREFIX + "_all");
private final SearchCounter noResultsCounter =
SearchCounter.export(COUNTER_PREFIX + "_no_results");
@Inject
@SuppressWarnings("unused")
ResultTierCountFilter(TierInfoSource tierInfoSource) {
List<TierInfo> tierInfos = tierInfoSource.getTierInformation();
tierInfos.sort(Comparator.comparing(TierInfo::getDataStartDate));
firstTweetTimeSinceEpochSec = tierInfos.get(0).getServingRangeSinceTimeSecondsFromEpoch();
ImmutableSortedMap.Builder<Long, SearchCounter> builder = ImmutableSortedMap.naturalOrder();
Collections.reverse(tierInfos);
for (TierInfo tierInfo : tierInfos) {
SearchCounter searchCounter = SearchCounter.export(
String.format("%s_%s", COUNTER_PREFIX, tierInfo.getTierName()));
builder.put(tierInfo.getServingRangeSinceTimeSecondsFromEpoch(), searchCounter);
// export cumulative metrics to sum from the latest to a lower tier
Collection<SearchCounter> counters = builder.build().values();
SearchCustomGauge.export(
String.format("%s_down_to_%s", COUNTER_PREFIX, tierInfo.getTierName()),
() -> counters.stream()
.mapToLong(SearchCounter::get)
.sum());
}
tierBuckets = builder.build();
}
@Override
public Future<EarlybirdResponse> apply(
EarlybirdRequestContext context,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
return service.apply(context).addEventListener(
new FutureEventListener<EarlybirdResponse>() {
@Override
public void onFailure(Throwable cause) {
// do nothing
}
@Override
public void onSuccess(EarlybirdResponse response) {
record(response);
}
});
}
@VisibleForTesting
void record(EarlybirdResponse response) {
if (response.isSetSearchResults()) {
long minResultsStatusId = response.getSearchResults().getResults().stream()
.mapToLong(ThriftSearchResult::getId)
.min()
.orElse(-1);
getBucket(minResultsStatusId).increment();
}
allCounter.increment();
}
private SearchCounter getBucket(long statusId) {
if (statusId < 0) {
return noResultsCounter;
}
// If non-negative statusId is not a SnowflakeId, the tweet must have been created before
// Twepoch (2010-11-04T01:42:54Z) and thus belongs to full1.
long timeSinceEpochSec = firstTweetTimeSinceEpochSec;
if (SnowflakeId.isSnowflakeId(statusId)) {
timeSinceEpochSec = SnowflakeId.timeFromId(statusId).inSeconds();
}
return tierBuckets.floorEntry(timeSinceEpochSec).getValue();
}
}

View File

@ -1,59 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.finagle.Service;
import com.twitter.search.common.root.ScatterGatherService;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.ExperimentCluster;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
public class ScatterGatherWithExperimentRedirectsService
extends Service<EarlybirdRequestContext, EarlybirdResponse> {
private final Service<EarlybirdRequestContext, EarlybirdResponse>
controlScatterGatherService;
private final Map<ExperimentCluster,
ScatterGatherService<EarlybirdRequestContext, EarlybirdResponse>>
experimentScatterGatherServices;
private static final Logger LOG =
LoggerFactory.getLogger(ScatterGatherWithExperimentRedirectsService.class);
public ScatterGatherWithExperimentRedirectsService(
Service<EarlybirdRequestContext, EarlybirdResponse> controlScatterGatherService,
Map<ExperimentCluster,
ScatterGatherService<EarlybirdRequestContext, EarlybirdResponse>>
experimentScatterGatherServices
) {
this.controlScatterGatherService = controlScatterGatherService;
this.experimentScatterGatherServices = experimentScatterGatherServices;
}
@Override
public Future<EarlybirdResponse> apply(EarlybirdRequestContext request) {
if (request.getRequest().isSetExperimentClusterToUse()) {
ExperimentCluster cluster = request.getRequest().getExperimentClusterToUse();
if (!experimentScatterGatherServices.containsKey(cluster)) {
String error = String.format(
"Received invalid experiment cluster: %s", cluster.name());
LOG.error("{} Request: {}", error, request.getRequest());
return Future.value(new EarlybirdResponse()
.setResponseCode(EarlybirdResponseCode.CLIENT_ERROR)
.setDebugString(error));
}
return experimentScatterGatherServices.get(cluster).apply(request);
}
return controlScatterGatherService.apply(request);
}
}

View File

@ -1,43 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.concurrent.atomic.AtomicReference;
import scala.Option;
import com.google.common.base.Preconditions;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.finagle.context.Contexts;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.root.SearchPayloadSizeFilter;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.util.Future;
/**
* A filter that sets the clientId in the local context, to be usd later by SearchPayloadSizeFilter.
*/
public class SearchPayloadSizeLocalContextFilter
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
private static final SearchCounter CLIENT_ID_CONTEXT_KEY_NOT_SET_COUNTER = SearchCounter.export(
"search_payload_size_local_context_filter_client_id_context_key_not_set");
@Override
public Future<EarlybirdResponse> apply(EarlybirdRequest request,
Service<EarlybirdRequest, EarlybirdResponse> service) {
// In production, the SearchPayloadSizeFilter.CLIENT_ID_CONTEXT_KEY should always be set
// (by ThriftServer). However, it's not set in tests, because tests do not start a ThriftServer.
Option<AtomicReference<String>> clientIdOption =
Contexts.local().get(SearchPayloadSizeFilter.CLIENT_ID_CONTEXT_KEY);
if (clientIdOption.isDefined()) {
AtomicReference<String> clientIdReference = clientIdOption.get();
Preconditions.checkArgument(clientIdReference.get() == null);
clientIdReference.set(request.getClientId());
} else {
CLIENT_ID_CONTEXT_KEY_NOT_SET_COUNTER.increment();
}
return service.apply(request);
}
}

View File

@ -1,140 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.Set;
import com.google.common.base.Joiner;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.util.thrift.ThriftUtils;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
import com.twitter.util.FutureEventListener;
/**
* The general framework for earlybird root to track sensitive results.
*/
public abstract class SensitiveResultsTrackingFilter
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
/**
* The type name is used to distinguish different kinds of sensitive results in log.
*/
private final String typeName;
/**
* The mark is to control whether to log expensive information.
*/
private final boolean logDetails;
/**
* Constructor helps distinguish different sensitive content trackers.
* @param typeName The sensitive content's name (e.g. nullcast)
* @param logDetails Whether to log details such as serialized requests and responses
*/
public SensitiveResultsTrackingFilter(final String typeName, boolean logDetails) {
super();
this.typeName = typeName;
this.logDetails = logDetails;
}
/**
* Get the LOG that the sensitive results can write to.
*/
protected abstract Logger getLogger();
/**
* The counter which counts the number of queries with sensitive results.
*/
protected abstract SearchCounter getSensitiveQueryCounter();
/**
* The counter which counts the number of sensitive results.
*/
protected abstract SearchCounter getSensitiveResultsCounter();
/**
* The method defines how the sensitive results are identified.
*/
protected abstract Set<Long> getSensitiveResults(
EarlybirdRequestContext requestContext,
EarlybirdResponse earlybirdResponse) throws Exception;
/**
* Get a set of tweets which should be exclude from the sensitive results set.
*/
protected abstract Set<Long> getExceptedResults(EarlybirdRequestContext requestContext);
@Override
public final Future<EarlybirdResponse> apply(
final EarlybirdRequestContext requestContext,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
Future<EarlybirdResponse> response = service.apply(requestContext);
response.addEventListener(new FutureEventListener<EarlybirdResponse>() {
@Override
public void onSuccess(EarlybirdResponse earlybirdResponse) {
try {
if (earlybirdResponse.responseCode == EarlybirdResponseCode.SUCCESS
&& earlybirdResponse.isSetSearchResults()
&& requestContext.getParsedQuery() != null) {
Set<Long> statusIds = getSensitiveResults(requestContext, earlybirdResponse);
Set<Long> exceptedIds = getExceptedResults(requestContext);
statusIds.removeAll(exceptedIds);
if (statusIds.size() > 0) {
getSensitiveQueryCounter().increment();
getSensitiveResultsCounter().add(statusIds.size());
logContent(requestContext, earlybirdResponse, statusIds);
}
}
} catch (Exception e) {
getLogger().error("Caught exception while trying to log sensitive results for query: {}",
requestContext.getParsedQuery().serialize(), e);
}
}
@Override
public void onFailure(Throwable cause) {
}
});
return response;
}
private void logContent(
final EarlybirdRequestContext requestContext,
final EarlybirdResponse earlybirdResponse,
final Set<Long> statusIds) {
if (logDetails) {
String base64Request;
try {
base64Request = ThriftUtils.toBase64EncodedString(requestContext.getRequest());
} catch (TException e) {
base64Request = "Failed to parse base 64 request";
}
getLogger().error("Found " + typeName
+ ": {} | "
+ "parsedQuery: {} | "
+ "request: {} | "
+ "base 64 request: {} | "
+ "response: {}",
Joiner.on(",").join(statusIds),
requestContext.getParsedQuery().serialize(),
requestContext.getRequest(),
base64Request,
earlybirdResponse);
} else {
getLogger().error("Found " + typeName + ": {} for parsedQuery {}",
Joiner.on(",").join(statusIds),
requestContext.getParsedQuery().serialize());
}
}
}

View File

@ -1,27 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.schema.earlybird.EarlybirdCluster;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
/** A per-service filter for handling exceptions. */
public class ServiceExceptionHandlingFilter
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
private final EarlybirdResponseExceptionHandler exceptionHandler;
/** Creates a new ServiceExceptionHandlingFilter instance. */
public ServiceExceptionHandlingFilter(EarlybirdCluster cluster) {
this.exceptionHandler = new EarlybirdResponseExceptionHandler(cluster.getNameForStats());
}
@Override
public Future<EarlybirdResponse> apply(
EarlybirdRequestContext requestContext,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
return exceptionHandler.handleException(
requestContext.getRequest(), service.apply(requestContext));
}
}

View File

@ -1,81 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import java.util.HashMap;
import java.util.Map;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.schema.earlybird.EarlybirdCluster;
import com.twitter.search.common.util.earlybird.EarlybirdResponseMergeUtil;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
import com.twitter.search.earlybird_root.validators.FacetsResponseValidator;
import com.twitter.search.earlybird_root.validators.PassThroughResponseValidator;
import com.twitter.search.earlybird_root.validators.ServiceResponseValidator;
import com.twitter.search.earlybird_root.validators.TermStatsResultsValidator;
import com.twitter.search.earlybird_root.validators.TopTweetsResultsValidator;
import com.twitter.util.Function;
import com.twitter.util.Future;
/**
* Filter responsible for handling invalid response returned by downstream services, and
* translating them into EarlybirdResponseExceptions.
*/
public class ServiceResponseValidationFilter
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
private final Map<EarlybirdRequestType, ServiceResponseValidator<EarlybirdResponse>>
requestTypeToResponseValidators = new HashMap<>();
private final EarlybirdCluster cluster;
/**
* Creates a new filter for handling invalid response
*/
public ServiceResponseValidationFilter(EarlybirdCluster cluster) {
this.cluster = cluster;
ServiceResponseValidator<EarlybirdResponse> passThroughValidator =
new PassThroughResponseValidator();
requestTypeToResponseValidators
.put(EarlybirdRequestType.FACETS, new FacetsResponseValidator(cluster));
requestTypeToResponseValidators
.put(EarlybirdRequestType.RECENCY, passThroughValidator);
requestTypeToResponseValidators
.put(EarlybirdRequestType.RELEVANCE, passThroughValidator);
requestTypeToResponseValidators
.put(EarlybirdRequestType.STRICT_RECENCY, passThroughValidator);
requestTypeToResponseValidators
.put(EarlybirdRequestType.TERM_STATS, new TermStatsResultsValidator(cluster));
requestTypeToResponseValidators
.put(EarlybirdRequestType.TOP_TWEETS, new TopTweetsResultsValidator(cluster));
}
@Override
public Future<EarlybirdResponse> apply(
final EarlybirdRequestContext requestContext,
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
return service.apply(requestContext).flatMap(
new Function<EarlybirdResponse, Future<EarlybirdResponse>>() {
@Override
public Future<EarlybirdResponse> apply(EarlybirdResponse response) {
if (response == null) {
return Future.exception(new IllegalStateException(
cluster + " returned null response"));
}
if (response.getResponseCode() == EarlybirdResponseCode.SUCCESS) {
return requestTypeToResponseValidators
.get(requestContext.getEarlybirdRequestType())
.validate(response);
}
return Future.value(EarlybirdResponseMergeUtil.transformInvalidResponse(
response,
String.format("Failure from %s (%s)", cluster, response.getResponseCode())));
}
});
}
}

View File

@ -1,12 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import com.twitter.search.earlybird.config.ServingRange;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
public interface ServingRangeProvider {
/**
* Get a ServingRange implementation.
* Usually backed by either TierInfoWrapper or RootClusterBoundaryInfo.
*/
ServingRange getServingRange(EarlybirdRequestContext requestContext, boolean useBoundaryOverride);
}

View File

@ -1,30 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.earlybird.common.ClientIdUtil;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.util.Future;
/**
* A filter that will set the clientId of the request to the strato HttpEndpoint Attribution.
* <p>
* If the clientId is already set to something non-null then that value is used.
* If the clientId is null but Attribution.httpEndpoint() contains a value it will be set as
* the clientId.
*/
public class StratoAttributionClientIdFilter extends
SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
@Override
public Future<EarlybirdResponse> apply(
EarlybirdRequest request, Service<EarlybirdRequest, EarlybirdResponse> service
) {
if (request.getClientId() == null) {
ClientIdUtil.getClientIdFromHttpEndpointAttribution().ifPresent(request::setClientId);
}
return service.apply(request);
}
}

View File

@ -1,24 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.util.Future;
/** A top level filter for handling exceptions. */
public class TopLevelExceptionHandlingFilter
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
private final EarlybirdResponseExceptionHandler exceptionHandler;
/** Creates a new TopLevelExceptionHandlingFilter instance. */
public TopLevelExceptionHandlingFilter() {
this.exceptionHandler = new EarlybirdResponseExceptionHandler("top_level");
}
@Override
public Future<EarlybirdResponse> apply(EarlybirdRequest request,
Service<EarlybirdRequest, EarlybirdResponse> service) {
return exceptionHandler.handleException(request, service.apply(request));
}
}

View File

@ -1,30 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird_root.common.EarlybirdRequestUtil;
import com.twitter.util.Future;
/**
* A filter that unsets some request fields that make sense only on the SuperRoot, before sending
* them to the individual roots.
*/
public class UnsetSuperRootFieldsFilter extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
private final boolean unsetFollowedUserIds;
public UnsetSuperRootFieldsFilter() {
this(true);
}
public UnsetSuperRootFieldsFilter(boolean unsetFollowedUserIds) {
this.unsetFollowedUserIds = unsetFollowedUserIds;
}
@Override
public Future<EarlybirdResponse> apply(EarlybirdRequest request,
Service<EarlybirdRequest, EarlybirdResponse> service) {
return service.apply(EarlybirdRequestUtil.unsetSuperRootFields(request, unsetFollowedUserIds));
}
}

View File

@ -1,44 +0,0 @@
package com.twitter.search.earlybird_root.filters;
import javax.inject.Inject;
import com.twitter.finagle.Service;
import com.twitter.finagle.SimpleFilter;
import com.twitter.search.common.decider.SearchDecider;
import com.twitter.search.common.metrics.SearchRateCounter;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.util.Future;
public class VeryRecentTweetsFilter
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
private static final String DECIDER_KEY = "enable_very_recent_tweets";
private static final SearchRateCounter VERY_RECENT_TWEETS_NOT_MODIFIED =
SearchRateCounter.export("very_recent_tweets_not_modified");
private static final SearchRateCounter VERY_RECENT_TWEETS_ENABLED =
SearchRateCounter.export("very_recent_tweets_enabled");
private final SearchDecider decider;
@Inject
public VeryRecentTweetsFilter(
SearchDecider decider
) {
this.decider = decider;
}
@Override
public Future<EarlybirdResponse> apply(
EarlybirdRequest request,
Service<EarlybirdRequest, EarlybirdResponse> service
) {
if (decider.isAvailable(DECIDER_KEY)) {
VERY_RECENT_TWEETS_ENABLED.increment();
request.setSkipVeryRecentTweets(false);
} else {
VERY_RECENT_TWEETS_NOT_MODIFIED.increment();
}
return service.apply(request);
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 60 KiB

View File

@ -1,176 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.List;
import java.util.Map;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import com.twitter.search.common.query.thriftjava.EarlyTerminationInfo;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.TierResponse;
/**
* Collection of EarlybirdResponses and associated stats to be merged.
*/
public class AccumulatedResponses {
// The list of the successful responses from all earlybird futures. This does not include empty
// responses resulted from null requests.
private final List<EarlybirdResponse> successResponses;
// The list of the unsuccessful responses from all earlybird futures.
private final List<EarlybirdResponse> errorResponses;
// the list of max statusIds seen in each earlybird.
private final List<Long> maxIds;
// the list of min statusIds seen in each earlybird.
private final List<Long> minIds;
private final EarlyTerminationInfo mergedEarlyTerminationInfo;
private final boolean isMergingAcrossTiers;
private final PartitionCounts partitionCounts;
private final int numSearchedSegments;
public static final class PartitionCounts {
private final int numPartitions;
private final int numSuccessfulPartitions;
private final List<TierResponse> perTierResponse;
public PartitionCounts(int numPartitions, int numSuccessfulPartitions, List<TierResponse>
perTierResponse) {
this.numPartitions = numPartitions;
this.numSuccessfulPartitions = numSuccessfulPartitions;
this.perTierResponse = perTierResponse;
}
public int getNumPartitions() {
return numPartitions;
}
public int getNumSuccessfulPartitions() {
return numSuccessfulPartitions;
}
public List<TierResponse> getPerTierResponse() {
return perTierResponse;
}
}
/**
* Create AccumulatedResponses
*/
public AccumulatedResponses(List<EarlybirdResponse> successResponses,
List<EarlybirdResponse> errorResponses,
List<Long> maxIds,
List<Long> minIds,
EarlyTerminationInfo mergedEarlyTerminationInfo,
boolean isMergingAcrossTiers,
PartitionCounts partitionCounts,
int numSearchedSegments) {
this.successResponses = successResponses;
this.errorResponses = errorResponses;
this.maxIds = maxIds;
this.minIds = minIds;
this.mergedEarlyTerminationInfo = mergedEarlyTerminationInfo;
this.isMergingAcrossTiers = isMergingAcrossTiers;
this.partitionCounts = partitionCounts;
this.numSearchedSegments = numSearchedSegments;
}
public List<EarlybirdResponse> getSuccessResponses() {
return successResponses;
}
public List<EarlybirdResponse> getErrorResponses() {
return errorResponses;
}
public List<Long> getMaxIds() {
return maxIds;
}
public List<Long> getMinIds() {
return minIds;
}
public EarlyTerminationInfo getMergedEarlyTerminationInfo() {
return mergedEarlyTerminationInfo;
}
public boolean foundError() {
return !errorResponses.isEmpty();
}
/**
* Tries to return a merged EarlybirdResponse that propagates as much information from the error
* responses as possible.
*
* If all error responses have the same error response code, the merged response will have the
* same error response code, and the debugString/debugInfo on the merged response will be set to
* the debugString/debugInfo of one of the merged responses.
*
* If the error responses have at least 2 different response codes, TRANSIENT_ERROR will be set
* on the merged response. Also, we will look for the most common error response code, and will
* propagate the debugString/debugInfo from an error response with that response code.
*/
public EarlybirdResponse getMergedErrorResponse() {
Preconditions.checkState(!errorResponses.isEmpty());
// Find a response that has the most common error response code.
int maxCount = 0;
EarlybirdResponse errorResponseWithMostCommonErrorResponseCode = null;
Map<EarlybirdResponseCode, Integer> responseCodeCounts = Maps.newHashMap();
for (EarlybirdResponse errorResponse : errorResponses) {
EarlybirdResponseCode responseCode = errorResponse.getResponseCode();
Integer responseCodeCount = responseCodeCounts.get(responseCode);
if (responseCodeCount == null) {
responseCodeCount = 0;
}
++responseCodeCount;
responseCodeCounts.put(responseCode, responseCodeCount);
if (responseCodeCount > maxCount) {
errorResponseWithMostCommonErrorResponseCode = errorResponse;
}
}
// If all error responses have the same response code, set it on the merged response.
// Otherwise, set TRANSIENT_ERROR on the merged response.
EarlybirdResponseCode mergedResponseCode = EarlybirdResponseCode.TRANSIENT_ERROR;
if (responseCodeCounts.size() == 1) {
mergedResponseCode = responseCodeCounts.keySet().iterator().next();
}
EarlybirdResponse mergedResponse = new EarlybirdResponse()
.setResponseCode(mergedResponseCode);
// Propagate the debugString/debugInfo of the selected error response to the merged response.
Preconditions.checkNotNull(errorResponseWithMostCommonErrorResponseCode);
if (errorResponseWithMostCommonErrorResponseCode.isSetDebugString()) {
mergedResponse.setDebugString(errorResponseWithMostCommonErrorResponseCode.getDebugString());
}
if (errorResponseWithMostCommonErrorResponseCode.isSetDebugInfo()) {
mergedResponse.setDebugInfo(errorResponseWithMostCommonErrorResponseCode.getDebugInfo());
}
// Set the numPartitions and numPartitionsSucceeded on the mergedResponse
mergedResponse.setNumPartitions(partitionCounts.getNumPartitions());
mergedResponse.setNumSuccessfulPartitions(partitionCounts.getNumSuccessfulPartitions());
return mergedResponse;
}
public boolean isMergingAcrossTiers() {
return isMergingAcrossTiers;
}
public boolean isMergingPartitionsWithinATier() {
return !isMergingAcrossTiers;
}
public PartitionCounts getPartitionCounts() {
return partitionCounts;
}
public int getNumSearchedSegments() {
return numSearchedSegments;
}
}

View File

@ -1,26 +0,0 @@
java_library(
sources = ["*.java"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/google/guava",
"3rdparty/jvm/log4j",
"3rdparty/jvm/org/slf4j:slf4j-api",
"src/java/com/twitter/common/collections",
"src/java/com/twitter/common/quantity",
"src/java/com/twitter/search/common/futures",
"src/java/com/twitter/search/common/logging",
"src/java/com/twitter/search/common/metrics",
"src/java/com/twitter/search/common/partitioning/snowflakeparser",
"src/java/com/twitter/search/common/relevance:utils",
"src/java/com/twitter/search/common/schema/earlybird",
"src/java/com/twitter/search/common/search",
"src/java/com/twitter/search/common/util:finagleutil",
"src/java/com/twitter/search/common/util/earlybird",
"src/java/com/twitter/search/earlybird_root/collectors",
"src/java/com/twitter/search/earlybird_root/common",
"src/java/com/twitter/search/queryparser/query:core-query-nodes",
"src/thrift/com/twitter/search:earlybird-java",
"src/thrift/com/twitter/search/common:query-java",
],
)

View File

@ -1,9 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
public interface EarlyTerminateTierMergePredicate {
/**
* Do we have enough results so far that we can early terminate and not continue onto next tier?
*/
boolean shouldEarlyTerminateTierMerge(int totalResultsFromSuccessfulShards,
boolean foundEarlyTermination);
}

View File

@ -1,176 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import javax.annotation.Nullable;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.collect.Iterables;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.search.common.logging.DebugMessageBuilder;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
/**
* Collects debug messages to attach to EarlybirdResponse
*/
class EarlybirdResponseDebugMessageBuilder {
private static final Logger LOG =
LoggerFactory.getLogger(EarlybirdResponseDebugMessageBuilder.class);
private static final Logger TOO_MANY_FAILED_PARTITIONS_LOG =
LoggerFactory.getLogger(String.format("%s_too_many_failed_partitions",
EarlybirdResponseDebugMessageBuilder.class.getName()));
@VisibleForTesting
protected final SearchCounter insufficientValidResponseCounter =
SearchCounter.export("insufficient_valid_partition_responses_count");
@VisibleForTesting
protected final SearchCounter validPartitionResponseCounter =
SearchCounter.export("valid_partition_response_count");
// the combined debug string for all earlybird responses
private final StringBuilder debugString;
/**
* A message builder backed by the same {@link #debugString} above.
*/
private final DebugMessageBuilder debugMessageBuilder;
private static final Joiner JOINER = Joiner.on(", ");
EarlybirdResponseDebugMessageBuilder(EarlybirdRequest request) {
this(getDebugLevel(request));
}
EarlybirdResponseDebugMessageBuilder(DebugMessageBuilder.Level level) {
this.debugString = new StringBuilder();
this.debugMessageBuilder = new DebugMessageBuilder(debugString, level);
}
private static DebugMessageBuilder.Level getDebugLevel(EarlybirdRequest request) {
if (request.isSetDebugMode() && request.getDebugMode() > 0) {
return DebugMessageBuilder.getDebugLevel(request.getDebugMode());
} else if (request.isSetDebugOptions()) {
return DebugMessageBuilder.Level.DEBUG_BASIC;
} else {
return DebugMessageBuilder.Level.DEBUG_NONE;
}
}
protected boolean isDebugMode() {
return debugMessageBuilder.getDebugLevel() > 0;
}
void append(String msg) {
debugString.append(msg);
}
void debugAndLogWarning(String msg) {
if (isDebugMode()) {
debugString.append(msg).append('\n');
}
LOG.warn(msg);
}
void debugDetailed(String format, Object... args) {
debugAtLevel(DebugMessageBuilder.Level.DEBUG_DETAILED, format, args);
}
void debugVerbose(String format, Object... args) {
debugAtLevel(DebugMessageBuilder.Level.DEBUG_VERBOSE, format, args);
}
void debugVerbose2(String format, Object... args) {
debugAtLevel(DebugMessageBuilder.Level.DEBUG_VERBOSE_2, format, args);
}
void debugAtLevel(DebugMessageBuilder.Level level, String format, Object... args) {
boolean levelOK = debugMessageBuilder.isAtLeastLevel(level);
if (levelOK || LOG.isDebugEnabled()) {
// We check both modes here in order to build the formatted message only once.
String message = String.format(format, args);
LOG.debug(message);
if (levelOK) {
debugString.append(message).append('\n');
}
}
}
String debugString() {
return debugString.toString();
}
DebugMessageBuilder getDebugMessageBuilder() {
return debugMessageBuilder;
}
void logBelowSuccessThreshold(ThriftSearchQuery searchQuery, int numSuccessResponses,
int numPartitions, double successThreshold) {
String rawQuery = (searchQuery != null && searchQuery.isSetRawQuery())
? "[" + searchQuery.getRawQuery() + "]" : "null";
String serializedQuery = (searchQuery != null && searchQuery.isSetSerializedQuery())
? "[" + searchQuery.getSerializedQuery() + "]" : "null";
// Not enough successful responses from partitions.
String errorMessage = String.format(
"Only %d valid responses returned out of %d partitions for raw query: %s"
+ " serialized query: %s. Lower than threshold of %s",
numSuccessResponses, numPartitions, rawQuery, serializedQuery, successThreshold);
TOO_MANY_FAILED_PARTITIONS_LOG.warn(errorMessage);
insufficientValidResponseCounter.increment();
validPartitionResponseCounter.add(numSuccessResponses);
debugString.append(errorMessage);
}
@VisibleForTesting
void logResponseDebugInfo(EarlybirdRequest earlybirdRequest,
String partitionTierName,
EarlybirdResponse response) {
if (response.isSetDebugString() && !response.getDebugString().isEmpty()) {
debugString.append(String.format("Received response from [%s] with debug string [%s]",
partitionTierName, response.getDebugString())).append("\n");
}
if (!response.isSetResponseCode()) {
debugAndLogWarning(String.format(
"Received Earlybird null response code for query [%s] from [%s]",
earlybirdRequest, partitionTierName));
} else if (response.getResponseCode() != EarlybirdResponseCode.SUCCESS
&& response.getResponseCode() != EarlybirdResponseCode.PARTITION_SKIPPED
&& response.getResponseCode() != EarlybirdResponseCode.PARTITION_DISABLED
&& response.getResponseCode() != EarlybirdResponseCode.TIER_SKIPPED) {
debugAndLogWarning(String.format(
"Received Earlybird response error [%s] for query [%s] from [%s]",
response.getResponseCode(), earlybirdRequest, partitionTierName));
}
if (debugMessageBuilder.isVerbose2()) {
debugVerbose2("Earlybird [%s] returned response: %s", partitionTierName, response);
} else if (debugMessageBuilder.isVerbose()) {
if (response.isSetSearchResults() && response.getSearchResults().getResultsSize() > 0) {
String ids = JOINER.join(Iterables.transform(
response.getSearchResults().getResults(),
new Function<ThriftSearchResult, Long>() {
@Nullable
@Override
public Long apply(ThriftSearchResult result) {
return result.getId();
}
}));
debugVerbose("Earlybird [%s] returned TweetIDs: %s", partitionTierName, ids);
}
}
}
}

View File

@ -1,604 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import scala.runtime.BoxedUnit;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.metrics.SearchTimerStats;
import com.twitter.search.common.schema.earlybird.EarlybirdCluster;
import com.twitter.search.common.util.FinagleUtil;
import com.twitter.search.common.util.earlybird.EarlybirdResponseMergeUtil;
import com.twitter.search.common.util.earlybird.ResultsUtil;
import com.twitter.search.earlybird.thrift.EarlybirdDebugInfo;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.search.earlybird_root.collectors.MultiwayMergeCollector;
import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
import com.twitter.search.earlybird_root.common.EarlybirdRequestUtil;
import com.twitter.util.Function;
import com.twitter.util.Future;
/**
* Base EarlybirdResponseMerger containing basic logic to merge EarlybirdResponse objects
*/
public abstract class EarlybirdResponseMerger implements EarlyTerminateTierMergePredicate {
private static final Logger LOG = LoggerFactory.getLogger(EarlybirdResponseMerger.class);
private static final Logger MIN_SEARCHED_STATUS_ID_LOGGER =
LoggerFactory.getLogger("MinSearchedStatusIdLogger");
private static final SearchCounter NO_SEARCH_RESULT_COUNTER =
SearchCounter.export("no_search_result_count");
private static final SearchCounter NO_RESPONSES_TO_MERGE =
SearchCounter.export("no_responses_to_merge");
private static final SearchCounter EARLYBIRD_RESPONSE_NO_MORE_RESULTS =
SearchCounter.export("merger_earlybird_response_no_more_results");
private static final String PARTITION_OR_TIER_COUNTER_NAME_FORMAT =
"merger_waited_for_response_from_%s_counter";
private static final String PARTITION_OR_TIER_ERROR_COUNTER_NAME_FORMAT =
"merger_num_error_responses_from_%s";
private static final String PARTITION_OR_TIER_RESPONSE_CODE_COUNTER_NAME_FORMAT =
"merger_earlybird_response_code_from_%s_%s";
protected final EarlybirdResponseDebugMessageBuilder responseMessageBuilder;
protected final EarlybirdRequestContext requestContext;
protected final ImmutableList<Future<EarlybirdResponse>> responses;
protected AccumulatedResponses accumulatedResponses;
@VisibleForTesting
static final Map<EarlybirdRequestType, SearchCounter> MERGER_CREATED_STATS =
perRequestTypeCounterImmutableMap("earlybird_response_merger_%s_created_count");
@VisibleForTesting
static final Map<EarlybirdRequestType, SearchCounter>
MIN_SEARCHED_STATUS_ID_LARGER_THAN_REQUEST_MAX_ID = perRequestTypeCounterImmutableMap(
"merger_%s_min_searched_status_id_larger_than_request_max_id");
@VisibleForTesting
static final Map<EarlybirdRequestType, SearchCounter>
MIN_SEARCHED_STATUS_ID_LARGER_THAN_REQUEST_UNTIL_TIME = perRequestTypeCounterImmutableMap(
"merger_%s_min_searched_status_id_larger_than_request_until_time");
private static Map<EarlybirdRequestType, SearchCounter> perRequestTypeCounterImmutableMap(
String statPattern) {
Map<EarlybirdRequestType, SearchCounter> statsMap = Maps.newEnumMap(EarlybirdRequestType.class);
for (EarlybirdRequestType earlybirdRequestType : EarlybirdRequestType.values()) {
String statName = String.format(statPattern, earlybirdRequestType.getNormalizedName());
statsMap.put(earlybirdRequestType, SearchCounter.export(statName));
}
return Maps.immutableEnumMap(statsMap);
}
public static final com.google.common.base.Function<EarlybirdResponse, Map<Long, Integer>>
HIT_COUNT_GETTER =
response -> response.getSearchResults() == null
? null
: response.getSearchResults().getHitCounts();
private final ChainMerger chainMerger;
private class ChainMerger {
private final EarlybirdRequestContext requestContext;
private final ResponseAccumulator responseAccumulator;
private final List<Future<EarlybirdResponse>> responses;
private final EarlybirdResponseDebugMessageBuilder responseMessageBuilder;
private int currentFutureIndex = -1;
public ChainMerger(EarlybirdRequestContext requestContext,
ResponseAccumulator responseAccumulator,
List<Future<EarlybirdResponse>> responses,
EarlybirdResponseDebugMessageBuilder responseMessageBuilder) {
this.requestContext = requestContext;
this.responseAccumulator = responseAccumulator;
this.responses = responses;
this.responseMessageBuilder = responseMessageBuilder;
}
public Future<EarlybirdResponse> merge() {
// 'responseFutures' should always be sorted.
// When returned by EarlybirdScatterGather service, the responses are sorted by partition ID.
// When returned by EarlybirdChainedScatterGatherService,
// responses are sorted descending by tier start date. See:
// com.twitter.search.earlybird_root.EarlybirdChainedScatterGatherService.TIER_COMPARATOR.
//
// When merging responses from partitions, we want to wait for responses from all partitions,
// so the order in which we wait for those results does not matter. When merging responses
// from tiers, we want to wait for the response from the latest. If we don't need any more
// responses to compute the final response, then we don't need to wait for the responses from
// other tiers. If we cannot terminate early, then we want to wait for the responses from the
// second tier, and so on.
//
// We do not need to have any explicit synchronization, because:
// 1. The callbacks for future_i are set by the flatMap() callback on future_{i-1} (when
// recursively calling merge() inside the flatMap()).
// 2. Before setting the callbacks on future_i, future_{i-1}.flatMap() adds the response
// results to mergeHelper.
// 3. When the callbacks on future_i are set, the memory barrier between
// thread_running_future_{i-1} and thread_running_future_i is crossed. This guarantees
// that thread_running_future_i will see the updates to mergeHelper before it sees the
// callbacks. (Or thread_running_future_{i-1} == thread_running_future_i, in which case
// synchronization is not an issue, and correctness is guarateed by the order in which
// things will run.)
// 4. The same reasoning applies to currentFutureIndex.
++currentFutureIndex;
if (currentFutureIndex >= responses.size()) {
return Future.value(getTimedMergedResponse(responseAccumulator.getAccumulatedResults()));
}
final String partitionTierName =
responseAccumulator.getNameForLogging(currentFutureIndex, responses.size());
final String nameForEarlybirdResponseCodeStats =
responseAccumulator.getNameForEarlybirdResponseCodeStats(
currentFutureIndex, responses.size());
// If a tier in the chain throws an exception, convert it to a null response, and let the
// mergeHelper handle it appropriately.
return responses.get(currentFutureIndex)
.handle(Function.func(t -> {
if (FinagleUtil.isCancelException(t)) {
return new EarlybirdResponse()
.setResponseCode(EarlybirdResponseCode.CLIENT_CANCEL_ERROR);
} else if (FinagleUtil.isTimeoutException(t)) {
return new EarlybirdResponse()
.setResponseCode(EarlybirdResponseCode.SERVER_TIMEOUT_ERROR);
} else {
SearchCounter.export(
String.format(PARTITION_OR_TIER_ERROR_COUNTER_NAME_FORMAT, partitionTierName))
.increment();
if (responseMessageBuilder.isDebugMode()) {
responseMessageBuilder.debugAndLogWarning(
String.format("[%s] failed, exception [%s]",
partitionTierName, t.toString()));
}
LOG.warn("exception response from: " + partitionTierName, t);
return new EarlybirdResponse()
.setResponseCode(EarlybirdResponseCode.TRANSIENT_ERROR);
}
}))
.flatMap(Function.func(response -> {
Preconditions.checkNotNull(response);
SearchCounter.export(
String.format(PARTITION_OR_TIER_RESPONSE_CODE_COUNTER_NAME_FORMAT,
nameForEarlybirdResponseCodeStats,
response.getResponseCode().name().toLowerCase()))
.increment();
if ((response.getResponseCode() != EarlybirdResponseCode.PARTITION_SKIPPED)
&& (response.getResponseCode() != EarlybirdResponseCode.TIER_SKIPPED)) {
SearchCounter.export(
String.format(PARTITION_OR_TIER_COUNTER_NAME_FORMAT, partitionTierName))
.increment();
}
if (response.getResponseCode() == EarlybirdResponseCode.CLIENT_CANCEL_ERROR) {
// the request has been cancelled, no need to proceed
return Future.value(response);
}
rewriteResponseCodeIfSearchResultsMissing(requestContext, partitionTierName, response);
responseMessageBuilder.logResponseDebugInfo(
requestContext.getRequest(),
partitionTierName,
response);
responseAccumulator.addResponse(
responseMessageBuilder,
requestContext.getRequest(),
response);
if (responseAccumulator.shouldEarlyTerminateMerge(EarlybirdResponseMerger.this)) {
return Future.value(getTimedMergedResponse(
responseAccumulator.getAccumulatedResults()));
}
return merge();
}));
}
}
private void rewriteResponseCodeIfSearchResultsMissing(
EarlybirdRequestContext earlybirdRequestContext,
String partitionTierName,
EarlybirdResponse response) {
// We always require searchResults to be set, even for term stats and facet requests.
// This is because searchResults contains important info such as pagination cursors
// like minSearchStatusId and minSearchedTimeSinceEpoch.
// We expect all successful responses to have searchResults set.
if (response.isSetResponseCode()
&& response.getResponseCode() == EarlybirdResponseCode.SUCCESS
&& response.getSearchResults() == null) {
NO_SEARCH_RESULT_COUNTER.increment();
LOG.warn("Received Earlybird response with null searchResults from [{}]"
+ " EarlybirdRequest [{}] EarlybirdResponse [{}] ",
partitionTierName, earlybirdRequestContext.getRequest(), response);
response.setResponseCode(EarlybirdResponseCode.TRANSIENT_ERROR);
}
}
/**
* Construct a EarlybirdResponseMerger to merge responses from multiple partitions or tiers
* based on mode.
*/
EarlybirdResponseMerger(EarlybirdRequestContext requestContext,
List<Future<EarlybirdResponse>> responses,
ResponseAccumulator responseAccumulator) {
this.requestContext = requestContext;
this.responses = ImmutableList.copyOf(responses);
this.responseMessageBuilder =
new EarlybirdResponseDebugMessageBuilder(requestContext.getRequest());
this.chainMerger = new ChainMerger(requestContext, responseAccumulator, responses,
responseMessageBuilder);
}
/**
* Get a response merger to merge the given responses.
*/
public static EarlybirdResponseMerger getResponseMerger(
EarlybirdRequestContext requestContext,
List<Future<EarlybirdResponse>> responses,
ResponseAccumulator helper,
EarlybirdCluster cluster,
EarlybirdFeatureSchemaMerger featureSchemaMerger,
int numPartitions) {
EarlybirdRequestType type = requestContext.getEarlybirdRequestType();
MERGER_CREATED_STATS.get(type).increment();
switch (type) {
case FACETS:
return new FacetResponseMerger(requestContext, responses, helper);
case TERM_STATS:
return new TermStatisticsResponseMerger(requestContext, responses, helper);
case RECENCY:
return new RecencyResponseMerger(requestContext, responses, helper, featureSchemaMerger);
case STRICT_RECENCY:
return new StrictRecencyResponseMerger(
requestContext, responses, helper, featureSchemaMerger, cluster);
case RELEVANCE:
return new RelevanceResponseMerger(
requestContext, responses, helper, featureSchemaMerger, numPartitions);
case TOP_TWEETS:
return new TopTweetsResponseMerger(requestContext, responses, helper);
default:
throw new RuntimeException("EarlybirdRequestType " + type + "is not supported by merge");
}
}
/**
* This method can perform two types of merges:
* 1. merge responses within a tier from different partitions.
* 2. merge responses from multiple tiers.
*/
public final Future<EarlybirdResponse> merge() {
return chainMerger.merge()
.onSuccess(checkMinSearchedStatusIdFunction(
"max_id",
EarlybirdRequestUtil.getRequestMaxId(requestContext.getParsedQuery()),
MIN_SEARCHED_STATUS_ID_LARGER_THAN_REQUEST_MAX_ID.get(
requestContext.getEarlybirdRequestType())))
.onSuccess(checkMinSearchedStatusIdFunction(
"until_time",
EarlybirdRequestUtil.getRequestMaxIdFromUntilTime(requestContext.getParsedQuery()),
MIN_SEARCHED_STATUS_ID_LARGER_THAN_REQUEST_UNTIL_TIME.get(
requestContext.getEarlybirdRequestType())));
}
/**
* Returns the function that checks if the minSearchedStatusID on the merged response is higher
* than the max ID in the request.
*/
private Function<EarlybirdResponse, BoxedUnit> checkMinSearchedStatusIdFunction(
final String operator, final Optional<Long> requestMaxId, final SearchCounter stat) {
return Function.cons(mergedResponse -> {
if (requestMaxId.isPresent()
&& requestMaxId.get() != Long.MAX_VALUE
&& (mergedResponse.getResponseCode() == EarlybirdResponseCode.SUCCESS)
&& mergedResponse.isSetSearchResults()
&& mergedResponse.getSearchResults().isSetMinSearchedStatusID()) {
long minSearchedStatusId = mergedResponse.getSearchResults().getMinSearchedStatusID();
// We sometimes set minSearchedStatusId = max_id + 1 when a request times out even
// before any search happens.
// Check SEARCH-10134 for more details.
if (minSearchedStatusId > requestMaxId.get() + 1) {
stat.increment();
String logMessage = "Response has a minSearchedStatusID ({}) larger than request "
+ operator + " ({})."
+ "\nrequest type: {}"
+ "\nrequest: {}"
+ "\nmerged response: {}"
+ "\nSuccessful accumulated responses:";
List<Object> logMessageParams = Lists.newArrayList();
logMessageParams.add(minSearchedStatusId);
logMessageParams.add(requestMaxId.get());
logMessageParams.add(requestContext.getEarlybirdRequestType());
logMessageParams.add(requestContext.getRequest());
logMessageParams.add(mergedResponse);
for (EarlybirdResponse response : accumulatedResponses.getSuccessResponses()) {
logMessage += "\naccumulated response: {}";
logMessageParams.add(response);
}
MIN_SEARCHED_STATUS_ID_LOGGER.warn(logMessage, logMessageParams.toArray());
}
}
});
}
private EarlybirdResponse getTimedMergedResponse(AccumulatedResponses accResponses) {
long start = System.nanoTime();
try {
return getMergedResponse(accResponses);
} finally {
long totalTime = System.nanoTime() - start;
getMergedResponseTimer().timerIncrement(totalTime);
}
}
private EarlybirdResponse initializeMergedSuccessResponseFromAccumulatedResponses() {
EarlybirdResponse mergedResponse = new EarlybirdResponse();
AccumulatedResponses.PartitionCounts partitionCounts =
accumulatedResponses.getPartitionCounts();
mergedResponse.setNumPartitions(partitionCounts.getNumPartitions())
.setNumSuccessfulPartitions(partitionCounts.getNumSuccessfulPartitions())
.setPerTierResponse(partitionCounts.getPerTierResponse())
.setNumSearchedSegments(accumulatedResponses.getNumSearchedSegments());
mergedResponse.setEarlyTerminationInfo(accumulatedResponses.getMergedEarlyTerminationInfo());
mergedResponse.setResponseCode(EarlybirdResponseCode.SUCCESS);
return mergedResponse;
}
private EarlybirdResponse getMergedResponse(AccumulatedResponses accResponses) {
accumulatedResponses = accResponses;
EarlybirdResponse mergedResponse;
if (accumulatedResponses.getSuccessResponses().isEmpty()
&& !accumulatedResponses.foundError()) {
// No successful or error responses. This means that all tiers / partitions are intentionally
// skipped. Return a blank successful response.
NO_RESPONSES_TO_MERGE.increment();
mergedResponse = new EarlybirdResponse()
.setResponseCode(EarlybirdResponseCode.SUCCESS)
.setSearchResults(new ThriftSearchResults())
.setDebugString("No responses to merge, probably because all tiers/partitions "
+ "were skipped.");
} else if (accumulatedResponses.isMergingAcrossTiers()) {
mergedResponse = getMergedResponseAcrossTiers();
} else {
mergedResponse = getMergedResponseAcrossPartitions();
}
saveMergedDebugString(mergedResponse);
return mergedResponse;
}
private EarlybirdResponse getMergedResponseAcrossTiers() {
Preconditions.checkState(
!accumulatedResponses.getSuccessResponses().isEmpty()
|| accumulatedResponses.foundError());
// When merging across tiers, if we have one failed tier, we should fail the whole
// response. Note that due to early termination, if a tier that is old fails
// but the newer tiers return enough results, the failed tier won't show up
// here in accumulatedResponses -- the only tiers that show up here
// will be successful.
if (accumulatedResponses.foundError()) {
// The TierResponseAccumulator early terminates on the first error, so we should
// never get more than one error. This means that the getMergedErrorResponse will
// return an error response with the error code of that one error, and will never
// have to decide which error response to return if the error responses are all
// different.
// Perhaps we should just return accumulatedResponses.getErrorResponses().get(0);
Preconditions.checkState(accumulatedResponses.getErrorResponses().size() == 1);
return accumulatedResponses.getMergedErrorResponse();
} else {
EarlybirdResponse mergedResponse = initializeMergedSuccessResponseFromAccumulatedResponses();
return internalMerge(mergedResponse);
}
}
private EarlybirdResponse getMergedResponseAcrossPartitions() {
Preconditions.checkState(
!accumulatedResponses.getSuccessResponses().isEmpty()
|| accumulatedResponses.foundError());
EarlybirdResponse mergedResponse;
// Unlike tier merging, one failed response doesn't mean the merged response should
// fail. If we have successful responses we can check the success ratio and if its
// good we can still return a successful merge.
if (!accumulatedResponses.getSuccessResponses().isEmpty()) {
// We have at least one successful response, but still need to check the success ratio.
// mergedResponse is a SUCCESS response after this call, but we will
// set it to failure below if necessary.
mergedResponse = initializeMergedSuccessResponseFromAccumulatedResponses();
int numSuccessResponses = mergedResponse.getNumSuccessfulPartitions();
int numPartitions = mergedResponse.getNumPartitions();
double successThreshold = getSuccessResponseThreshold();
if (checkSuccessPartitionRatio(numSuccessResponses, numPartitions, successThreshold)) {
// Success! Proceed with merging.
mergedResponse.setResponseCode(EarlybirdResponseCode.SUCCESS);
mergedResponse = internalMerge(mergedResponse);
} else {
responseMessageBuilder.logBelowSuccessThreshold(
requestContext.getRequest().getSearchQuery(), numSuccessResponses, numPartitions,
successThreshold);
mergedResponse.setResponseCode(EarlybirdResponseCode.TOO_MANY_PARTITIONS_FAILED_ERROR);
}
} else {
mergedResponse = accumulatedResponses.getMergedErrorResponse();
}
return mergedResponse;
}
/**
* Derive class should implement the logic to merge the specific type of results (recency,
* relevance, Top Tweets, etc..)
*/
protected abstract EarlybirdResponse internalMerge(EarlybirdResponse response);
protected abstract SearchTimerStats getMergedResponseTimer();
/**
* Do we have enough results so far that we can early terminate and not continue onto next tier?
*/
public boolean shouldEarlyTerminateTierMerge(int totalResultsFromSuccessfulShards,
boolean foundEarlyTermination) {
// We are taking the most conservative tier response merging.
// This is the most conservative merge logic --- as long as we have some results, we should
// not return anything from the next tier. This may cause not ideal experience where a
// page is not full, but the use can still scroll further.
return foundEarlyTermination || totalResultsFromSuccessfulShards >= 1;
}
private void saveMergedDebugString(EarlybirdResponse mergedResponse) {
if (responseMessageBuilder.isDebugMode()) {
String message = responseMessageBuilder.debugString();
mergedResponse.setDebugString(message);
if (!accumulatedResponses.getSuccessResponses().isEmpty()
&& accumulatedResponses.getSuccessResponses().get(0).isSetDebugInfo()) {
EarlybirdDebugInfo debugInfo =
accumulatedResponses.getSuccessResponses().get(0).getDebugInfo();
mergedResponse.setDebugInfo(debugInfo);
}
}
}
private double getSuccessResponseThreshold() {
EarlybirdRequest request = requestContext.getRequest();
if (request.isSetSuccessfulResponseThreshold()) {
double successfulResponseThreshold = request.getSuccessfulResponseThreshold();
Preconditions.checkArgument(successfulResponseThreshold > 0,
"Invalid successfulResponseThreshold %s", successfulResponseThreshold);
Preconditions.checkArgument(successfulResponseThreshold <= 1.0,
"Invalid successfulResponseThreshold %s", successfulResponseThreshold);
return successfulResponseThreshold;
} else {
return getDefaultSuccessResponseThreshold();
}
}
protected abstract double getDefaultSuccessResponseThreshold();
private static boolean checkSuccessPartitionRatio(
int numSuccessResponses,
int numPartitions,
double goodResponseThreshold) {
Preconditions.checkArgument(goodResponseThreshold > 0.0,
"Invalid goodResponseThreshold %s", goodResponseThreshold);
return numSuccessResponses >= (numPartitions * goodResponseThreshold);
}
/**
* Merge hit counts from all results.
*/
protected Map<Long, Integer> aggregateHitCountMap() {
Map<Long, Integer> hitCounts = ResultsUtil
.aggregateCountMap(accumulatedResponses.getSuccessResponses(), HIT_COUNT_GETTER);
if (hitCounts.size() > 0) {
if (responseMessageBuilder.isDebugMode()) {
responseMessageBuilder.append("Hit counts:\n");
for (Map.Entry<Long, Integer> entry : hitCounts.entrySet()) {
responseMessageBuilder.append(String.format(" %10s seconds: %d hits\n",
entry.getKey() / 1000, entry.getValue()));
}
}
return hitCounts;
}
return null;
}
/**
* Returns the number of results to keep as part of merge-collection.
*/
protected final int computeNumResultsToKeep() {
return EarlybirdResponseMergeUtil.computeNumResultsToKeep(requestContext.getRequest());
}
/**
* Remove exact duplicates (same id) from the result set.
*/
protected static void trimExactDups(ThriftSearchResults searchResults, TrimStats trimStats) {
int numResults = searchResults.getResultsSize();
List<ThriftSearchResult> oldResults = searchResults.getResults();
List<ThriftSearchResult> newResults = Lists.newArrayListWithCapacity(numResults);
HashSet<Long> resultSet = Sets.newHashSetWithExpectedSize(numResults);
for (ThriftSearchResult result : oldResults) {
if (resultSet.contains(result.getId())) {
trimStats.increaseRemovedDupsCount();
continue;
}
newResults.add(result);
resultSet.add(result.getId());
}
searchResults.setResults(newResults);
}
protected final int addResponsesToCollector(MultiwayMergeCollector collector) {
int totalResultSize = 0;
for (EarlybirdResponse response : accumulatedResponses.getSuccessResponses()) {
if (response.isSetSearchResults()) {
totalResultSize += response.getSearchResults().getResultsSize();
}
collector.addResponse(response);
}
return totalResultSize;
}
/**
* Given a sorted searchResults (for recency, sorted by ID; for relevance, sorted by score),
* returns the first 'computeNumResultsToKeep()' number of results.
*
* @param searchResults the searchResults to be truncated.
*/
protected final void truncateResults(ThriftSearchResults searchResults, TrimStats trimStats) {
int numResultsRequested = computeNumResultsToKeep();
int to = numResultsRequested == Integer.MAX_VALUE ? searchResults.getResultsSize()
: Math.min(numResultsRequested, searchResults.getResultsSize());
if (searchResults.getResultsSize() > to) {
trimStats.setResultsTruncatedFromTailCount(searchResults.getResultsSize() - to);
if (to > 0) {
searchResults.setResults(searchResults.getResults().subList(0, to));
} else {
// No more results for the next page
EARLYBIRD_RESPONSE_NO_MORE_RESULTS.increment();
searchResults.setResults(Collections.<ThriftSearchResult>emptyList());
}
}
}
EarlybirdRequest getEarlybirdRequest() {
return requestContext.getRequest();
}
}

View File

@ -1,353 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import com.google.common.collect.Sets;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.search.common.logging.DebugMessageBuilder;
import com.twitter.search.common.metrics.SearchTimerStats;
import com.twitter.search.common.ranking.thriftjava.ThriftFacetRankingOptions;
import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant;
import com.twitter.search.common.util.earlybird.FacetsResultsUtils;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.ThriftFacetCount;
import com.twitter.search.earlybird.thrift.ThriftFacetCountMetadata;
import com.twitter.search.earlybird.thrift.ThriftFacetFieldResults;
import com.twitter.search.earlybird.thrift.ThriftFacetResults;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
/**
* Merger class to merge facets EarlybirdResponse objects
*/
public class FacetResponseMerger extends EarlybirdResponseMerger {
private static final Logger LOG = LoggerFactory.getLogger(FacetResponseMerger.class);
private static final SearchTimerStats TIMER =
SearchTimerStats.export("merge_facets", TimeUnit.NANOSECONDS, false, true);
private static final double SUCCESSFUL_RESPONSE_THRESHOLD = 0.9;
private final DebugMessageBuilder debugMessageBuilder;
/**
* Constructor to create the merger
*/
public FacetResponseMerger(EarlybirdRequestContext requestContext,
List<Future<EarlybirdResponse>> responses,
ResponseAccumulator mode) {
super(requestContext, responses, mode);
debugMessageBuilder = responseMessageBuilder.getDebugMessageBuilder();
debugMessageBuilder.verbose("--- Request Received: %s", requestContext.getRequest());
}
@Override
protected SearchTimerStats getMergedResponseTimer() {
return TIMER;
}
@Override
protected double getDefaultSuccessResponseThreshold() {
return SUCCESSFUL_RESPONSE_THRESHOLD;
}
@Override
protected EarlybirdResponse internalMerge(EarlybirdResponse facetsResponse) {
final Map<String, FacetsResultsUtils.FacetFieldInfo> facetFieldInfoMap =
new HashMap<>();
final Set<Long> userIDWhitelist = new HashSet<>();
// First, parse the responses and build up our facet info map.
boolean termStatsFilteringMode = FacetsResultsUtils.prepareFieldInfoMap(
requestContext.getRequest().getFacetRequest(), facetFieldInfoMap);
// Iterate through all futures and get results.
collectResponsesAndPopulateMap(facetFieldInfoMap, userIDWhitelist);
// Next, aggregate the top facets and update the blender response.
facetsResponse
.setFacetResults(new ThriftFacetResults()
.setFacetFields(new HashMap<>())
.setUserIDWhitelist(userIDWhitelist));
// keep track of how many facets a user contributed - this map gets reset for every field
Map<Long, Integer> perFieldAntiGamingMap = new HashMap<>();
// this one is used for images and twimges
Map<Long, Integer> imagesAntiGamingMap = new HashMap<>();
Set<String> twimgDedupSet = null;
for (final Map.Entry<String, FacetsResultsUtils.FacetFieldInfo> entry
: facetFieldInfoMap.entrySet()) {
// reset for each field
String field = entry.getKey();
final Map<Long, Integer> antiGamingMap;
if (field.equals(EarlybirdFieldConstant.IMAGES_FACET)
|| field.equals(EarlybirdFieldConstant.TWIMG_FACET)) {
antiGamingMap = imagesAntiGamingMap;
} else {
perFieldAntiGamingMap.clear();
antiGamingMap = perFieldAntiGamingMap;
}
ThriftFacetFieldResults results = new ThriftFacetFieldResults();
FacetsResultsUtils.FacetFieldInfo info = entry.getValue();
results.setTotalCount(info.totalCounts);
results.setTopFacets(new ArrayList<>());
FacetsResultsUtils.fillTopLanguages(info, results);
if (info.topFacets != null && !info.topFacets.isEmpty()) {
fillFacetFieldResults(info, antiGamingMap, results);
}
if (field.equals(EarlybirdFieldConstant.TWIMG_FACET)) {
if (twimgDedupSet == null) {
twimgDedupSet = Sets.newHashSet();
}
FacetsResultsUtils.dedupTwimgFacet(twimgDedupSet, results, debugMessageBuilder);
}
facetsResponse.getFacetResults().putToFacetFields(entry.getKey(), results);
}
if (!termStatsFilteringMode) {
// in term stats filtering mode, if doing it here would break term stats filtering
FacetsResultsUtils.mergeTwimgResults(
facetsResponse.getFacetResults(),
Collections.<ThriftFacetCount>reverseOrder(
FacetsResultsUtils.getFacetCountComparator(
requestContext.getRequest().getFacetRequest())));
}
// Update the numHitsProcessed on ThriftSearchResults.
int numHitsProcessed = 0;
int numPartitionsEarlyTerminated = 0;
for (EarlybirdResponse earlybirdResponse: accumulatedResponses.getSuccessResponses()) {
ThriftSearchResults searchResults = earlybirdResponse.getSearchResults();
if (searchResults != null) {
numHitsProcessed += searchResults.getNumHitsProcessed();
numPartitionsEarlyTerminated += searchResults.getNumPartitionsEarlyTerminated();
}
}
ThriftSearchResults searchResults = new ThriftSearchResults();
searchResults.setResults(new ArrayList<>()); // required field
searchResults.setNumHitsProcessed(numHitsProcessed);
searchResults.setNumPartitionsEarlyTerminated(numPartitionsEarlyTerminated);
facetsResponse.setSearchResults(searchResults);
LOG.debug("Facets call completed successfully: {}", facetsResponse);
FacetsResultsUtils.fixNativePhotoUrl(facetsResponse);
return facetsResponse;
}
private void fillFacetFieldResults(FacetsResultsUtils.FacetFieldInfo facetFieldInfo,
Map<Long, Integer> antiGamingMap,
ThriftFacetFieldResults results) {
int minWeightedCount = 0;
int minSimpleCount = 0;
int maxPenaltyCount = Integer.MAX_VALUE;
double maxPenaltyCountRatio = 1;
boolean excludePossiblySensitiveFacets = false;
boolean onlyReturnFacetsWithDisplayTweet = false;
int maxHitsPerUser = -1;
EarlybirdRequest request = requestContext.getRequest();
if (request.getFacetRequest() != null) {
ThriftFacetRankingOptions rankingOptions = request.getFacetRequest().getFacetRankingOptions();
if (request.getSearchQuery() != null) {
maxHitsPerUser = request.getSearchQuery().getMaxHitsPerUser();
}
if (rankingOptions != null) {
LOG.debug("FacetsResponseMerger: Using rankingOptions={}", rankingOptions);
if (rankingOptions.isSetMinCount()) {
minWeightedCount = rankingOptions.getMinCount();
}
if (rankingOptions.isSetMinSimpleCount()) {
minSimpleCount = rankingOptions.getMinSimpleCount();
}
if (rankingOptions.isSetMaxPenaltyCount()) {
maxPenaltyCount = rankingOptions.getMaxPenaltyCount();
}
if (rankingOptions.isSetMaxPenaltyCountRatio()) {
maxPenaltyCountRatio = rankingOptions.getMaxPenaltyCountRatio();
}
if (rankingOptions.isSetExcludePossiblySensitiveFacets()) {
excludePossiblySensitiveFacets = rankingOptions.isExcludePossiblySensitiveFacets();
}
if (rankingOptions.isSetOnlyReturnFacetsWithDisplayTweet()) {
onlyReturnFacetsWithDisplayTweet = rankingOptions.isOnlyReturnFacetsWithDisplayTweet();
}
}
} else {
LOG.warn("earlybirdRequest.getFacetRequest() is null");
}
ThriftFacetCount[] topFacetsArray = new ThriftFacetCount[facetFieldInfo.topFacets.size()];
facetFieldInfo.topFacets.values().toArray(topFacetsArray);
Arrays.sort(topFacetsArray, Collections.<ThriftFacetCount>reverseOrder(
FacetsResultsUtils.getFacetCountComparator(request.getFacetRequest())));
int numResults = capFacetFieldWidth(facetFieldInfo.fieldRequest.numResults);
if (topFacetsArray.length < numResults) {
numResults = topFacetsArray.length;
}
int collected = 0;
for (int i = 0; i < topFacetsArray.length; ++i) {
ThriftFacetCount count = topFacetsArray[i];
if (onlyReturnFacetsWithDisplayTweet
&& (!count.isSetMetadata() || !count.getMetadata().isSetStatusId()
|| count.getMetadata().getStatusId() == -1)) {
// status id must be set
continue;
}
if (excludePossiblySensitiveFacets && count.isSetMetadata()
&& count.getMetadata().isStatusPossiblySensitive()) {
// the display tweet may be offensive or NSFW
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
debugMessageBuilder.verbose2("[%d] FacetsResponseMerger EXCLUDED: offensive or NSFW %s, "
+ "explanation: %s",
i, facetCountSummary(count),
count.getMetadata().getExplanation());
}
continue;
}
boolean filterOutUser = false;
if (maxHitsPerUser != -1 && count.isSetMetadata()) {
ThriftFacetCountMetadata metadata = count.getMetadata();
if (!metadata.dontFilterUser) {
long twitterUserId = metadata.getTwitterUserId();
int numResultsFromUser = 1;
if (twitterUserId != -1) {
Integer perUser = antiGamingMap.get(twitterUserId);
if (perUser != null) {
numResultsFromUser = perUser + 1;
filterOutUser = numResultsFromUser > maxHitsPerUser;
}
antiGamingMap.put(twitterUserId, numResultsFromUser);
}
}
}
// Filter facets those don't meet the basic criteria.
if (count.getSimpleCount() < minSimpleCount) {
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
debugMessageBuilder.verbose2(
"[%d] FacetsResponseMerger EXCLUDED: simpleCount:%d < minSimpleCount:%d, %s",
i, count.getSimpleCount(), minSimpleCount, facetCountSummary(count));
}
continue;
}
if (count.getWeightedCount() < minWeightedCount) {
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
debugMessageBuilder.verbose2(
"[%d] FacetsResponseMerger EXCLUDED: weightedCount:%d < minWeightedCount:%d, %s",
i, count.getWeightedCount(), minWeightedCount, facetCountSummary(count));
}
continue;
}
if (filterOutUser) {
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
debugMessageBuilder.verbose2(
"[%d] FacetsResponseMerger EXCLUDED: antiGaming filterd user: %d: %s",
i, count.getMetadata().getTwitterUserId(), facetCountSummary(count));
}
continue;
}
if (count.getPenaltyCount() > maxPenaltyCount) {
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
debugMessageBuilder.verbose2(
"[%d] FacetsResponseMerger EXCLUCED: penaltyCount:%.3f > maxPenaltyCount:%.3f, %s",
i, count.getPenaltyCount(), maxPenaltyCount, facetCountSummary(count));
}
continue;
}
if (((double) count.getPenaltyCount() / count.getSimpleCount()) > maxPenaltyCountRatio) {
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
debugMessageBuilder.verbose2(
"[%d] FacetsResponseMerger EXCLUDED: penaltyCountRatio: %.3f > "
+ "maxPenaltyCountRatio:%.3f, %s",
i, (double) count.getPenaltyCount() / count.getSimpleCount(), maxPenaltyCountRatio,
facetCountSummary(count));
}
continue;
}
results.addToTopFacets(count);
collected++;
if (collected >= numResults) {
break;
}
}
}
private static int capFacetFieldWidth(int numResults) {
int ret = numResults;
if (numResults <= 0) {
// this in theory should not be allowed, but for now we issue the request with goodwill length
ret = 10; // default to 10 for future merge code to terminate correctly
}
if (numResults >= 100) {
ret = 100;
}
return ret;
}
private static String facetCountSummary(final ThriftFacetCount count) {
if (count.isSetMetadata()) {
return String.format("Label: %s (s:%d, w:%d, p:%d, score:%.2f, sid:%d (%s))",
count.getFacetLabel(), count.getSimpleCount(), count.getWeightedCount(),
count.getPenaltyCount(), count.getScore(), count.getMetadata().getStatusId(),
count.getMetadata().getStatusLanguage());
} else {
return String.format("Label: %s (s:%d, w:%d, p:%d, score:%.2f)", count.getFacetLabel(),
count.getSimpleCount(), count.getWeightedCount(), count.getPenaltyCount(),
count.getScore());
}
}
// Iterate through the backend responses and fill up the FacetFieldInfo map.
private void collectResponsesAndPopulateMap(
final Map<String, FacetsResultsUtils.FacetFieldInfo> facetFieldInfoMap,
final Set<Long> userIDWhitelist) {
// Next, iterate through the backend responses.
int i = 0;
for (EarlybirdResponse facetsResponse : accumulatedResponses.getSuccessResponses()) {
if (facetsResponse.isSetFacetResults()) {
LOG.debug("Facet response from earlybird {} is {} ", i, facetsResponse.getFacetResults());
i++;
ThriftFacetResults facetResults = facetsResponse.getFacetResults();
if (facetResults.isSetUserIDWhitelist()) {
userIDWhitelist.addAll(facetResults.getUserIDWhitelist());
}
FacetsResultsUtils.fillFacetFieldInfo(
facetResults, facetFieldInfoMap,
userIDWhitelist);
}
}
LOG.debug("Earlybird facet response total size {}", i);
}
}

View File

@ -1,44 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
public final class PartitionResponseAccumulator extends ResponseAccumulator {
private static final String TARGET_TYPE_PARTITION = "partition";
@Override
public String getNameForLogging(int responseIndex, int numTotalResponses) {
return TARGET_TYPE_PARTITION + responseIndex;
}
@Override
public String getNameForEarlybirdResponseCodeStats(int responseIndex, int numTotalResponses) {
// We do not need to differentiate between partitions: we just want to get the number of
// responses returned by Earlybirds, for each EarlybirdResponseCode.
return TARGET_TYPE_PARTITION;
}
@Override
boolean shouldEarlyTerminateMerge(EarlyTerminateTierMergePredicate merger) {
return false;
}
@Override
public void handleSkippedResponse(EarlybirdResponseCode responseCode) { }
@Override
public void handleErrorResponse(EarlybirdResponse response) {
}
@Override
public AccumulatedResponses.PartitionCounts getPartitionCounts() {
return new AccumulatedResponses.PartitionCounts(getNumResponses(),
getSuccessResponses().size() + getSuccessfulEmptyResponseCount(), null);
}
@Override
protected boolean isMergingAcrossTiers() {
return false;
}
}

View File

@ -1,638 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.metrics.SearchTimerStats;
import com.twitter.search.common.partitioning.snowflakeparser.SnowflakeIdParser;
import com.twitter.search.common.query.thriftjava.EarlyTerminationInfo;
import com.twitter.search.common.relevance.utils.ResultComparators;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.search.earlybird_root.collectors.RecencyMergeCollector;
import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
.EarlyTerminationTrimmingStats.Type.ALREADY_EARLY_TERMINATED;
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
.EarlyTerminationTrimmingStats.Type.FILTERED;
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
.EarlyTerminationTrimmingStats.Type.FILTERED_AND_TRUNCATED;
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
.EarlyTerminationTrimmingStats.Type.NOT_EARLY_TERMINATED;
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
.EarlyTerminationTrimmingStats.Type.TERMINATED_GOT_EXACT_NUM_RESULTS;
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
.EarlyTerminationTrimmingStats.Type.TRUNCATED;
/**
* Merger class to merge recency search EarlybirdResponse objects.
*/
public class RecencyResponseMerger extends EarlybirdResponseMerger {
private static final Logger LOG = LoggerFactory.getLogger(RecencyResponseMerger.class);
private static final SearchTimerStats RECENCY_TIMER =
SearchTimerStats.export("merge_recency", TimeUnit.NANOSECONDS, false, true);
@VisibleForTesting
static final String TERMINATED_COLLECTED_ENOUGH_RESULTS =
"terminated_collected_enough_results";
// Allowed replication lag relative to all replicas. Replication lag exceeding
// this amount may result in some tweets from the replica not returned in search.
private static final long ALLOWED_REPLICATION_LAG_MS = 10000;
private static final double SUCCESSFUL_RESPONSE_THRESHOLD = 0.9;
@VisibleForTesting
static final SearchCounter RECENCY_ZERO_RESULT_COUNT_AFTER_FILTERING_MAX_MIN_IDS =
SearchCounter.export("merger_recency_zero_result_count_after_filtering_max_min_ids");
@VisibleForTesting
static final SearchCounter RECENCY_TRIMMED_TOO_MANY_RESULTS_COUNT =
SearchCounter.export("merger_recency_trimmed_too_many_results_count");
private static final SearchCounter RECENCY_TIER_MERGE_EARLY_TERMINATED_WITH_NOT_ENOUGH_RESULTS =
SearchCounter.export("merger_recency_tier_merge_early_terminated_with_not_enough_results");
private static final SearchCounter RECENCY_CLEARED_EARLY_TERMINATION_COUNT =
SearchCounter.export("merger_recency_cleared_early_termination_count");
/**
* Results were truncated because merged results exceeded the requested numResults.
*/
@VisibleForTesting
static final String MERGING_EARLY_TERMINATION_REASON_TRUNCATED =
"root_merging_truncated_results";
/**
* Results that were were filtered smaller than merged minSearchedStatusId were filtered out.
*/
@VisibleForTesting
static final String MERGING_EARLY_TERMINATION_REASON_FILTERED =
"root_merging_filtered_results";
@VisibleForTesting
static final EarlyTerminationTrimmingStats PARTITION_MERGING_EARLY_TERMINATION_TRIMMING_STATS =
new EarlyTerminationTrimmingStats("recency_partition_merging");
@VisibleForTesting
static final EarlyTerminationTrimmingStats TIER_MERGING_EARLY_TERMINATION_TRIMMING_STATS =
new EarlyTerminationTrimmingStats("recency_tier_merging");
@VisibleForTesting
static class EarlyTerminationTrimmingStats {
enum Type {
/**
* The whole result was not terminated at all.
*/
NOT_EARLY_TERMINATED,
/**
* Was terminated before we did any trimming.
*/
ALREADY_EARLY_TERMINATED,
/**
* Was not terminated when merged, but results were filtered due to min/max ranges.
*/
FILTERED,
/**
* Was not terminated when merged, but results were truncated.
*/
TRUNCATED,
/**
* Was not terminated when merged, but results were filtered due to min/max ranges and
* truncated.
*/
FILTERED_AND_TRUNCATED,
/**
* When the search asks for X result, and we get exactly X results back, without trimming
* or truncating on the tail side (min_id side), we still mark the search as early terminated.
* This is because later tiers possibly has more results.
*/
TERMINATED_GOT_EXACT_NUM_RESULTS,
}
/**
* A counter tracking merged responses for each {@link EarlyTerminationTrimmingStats.Type}
* define above.
*/
private final ImmutableMap<Type, SearchCounter> searchCounterMap;
EarlyTerminationTrimmingStats(String prefix) {
Map<Type, SearchCounter> tempMap = Maps.newEnumMap(Type.class);
tempMap.put(NOT_EARLY_TERMINATED,
SearchCounter.export(prefix + "_not_early_terminated_after_merging"));
tempMap.put(ALREADY_EARLY_TERMINATED,
SearchCounter.export(prefix + "_early_terminated_before_merge_trimming"));
tempMap.put(TRUNCATED,
SearchCounter.export(prefix + "_early_terminated_after_merging_truncated"));
tempMap.put(FILTERED,
SearchCounter.export(prefix + "_early_terminated_after_merging_filtered"));
tempMap.put(FILTERED_AND_TRUNCATED,
SearchCounter.export(prefix + "_early_terminated_after_merging_filtered_and_truncated"));
tempMap.put(TERMINATED_GOT_EXACT_NUM_RESULTS,
SearchCounter.export(prefix + "_early_terminated_after_merging_got_exact_num_results"));
searchCounterMap = Maps.immutableEnumMap(tempMap);
}
public SearchCounter getCounterFor(Type type) {
return searchCounterMap.get(type);
}
}
private final EarlybirdFeatureSchemaMerger featureSchemaMerger;
public RecencyResponseMerger(EarlybirdRequestContext requestContext,
List<Future<EarlybirdResponse>> responses,
ResponseAccumulator mode,
EarlybirdFeatureSchemaMerger featureSchemaMerger) {
super(requestContext, responses, mode);
this.featureSchemaMerger = featureSchemaMerger;
}
@Override
protected double getDefaultSuccessResponseThreshold() {
return SUCCESSFUL_RESPONSE_THRESHOLD;
}
@Override
protected SearchTimerStats getMergedResponseTimer() {
return RECENCY_TIMER;
}
@Override
protected EarlybirdResponse internalMerge(EarlybirdResponse mergedResponse) {
// The merged maxSearchedStatusId and minSearchedStatusId
long maxId = findMaxFullySearchedStatusID();
long minId = findMinFullySearchedStatusID();
RecencyMergeCollector collector = new RecencyMergeCollector(responses.size());
int totalResultSize = addResponsesToCollector(collector);
ThriftSearchResults searchResults = collector.getAllSearchResults();
TrimStats trimStats = trimResults(searchResults, minId, maxId);
setMergedMaxSearchedStatusId(searchResults, maxId);
setMergedMinSearchedStatusId(
searchResults, minId, trimStats.getResultsTruncatedFromTailCount() > 0);
mergedResponse.setSearchResults(searchResults);
// Override some components of the response as appropriate to real-time.
searchResults.setHitCounts(aggregateHitCountMap());
if (accumulatedResponses.isMergingPartitionsWithinATier()
&& clearEarlyTerminationIfReachingTierBottom(mergedResponse)) {
RECENCY_CLEARED_EARLY_TERMINATION_COUNT.increment();
} else {
setEarlyTerminationForTrimmedResults(mergedResponse, trimStats);
}
responseMessageBuilder.debugVerbose("Hits: %s %s", totalResultSize, trimStats);
responseMessageBuilder.debugVerbose(
"Hash Partitioned Earlybird call completed successfully: %s", mergedResponse);
featureSchemaMerger.collectAndSetFeatureSchemaInResponse(
searchResults,
requestContext,
"merger_recency_tier",
accumulatedResponses.getSuccessResponses());
return mergedResponse;
}
/**
* When we reached tier bottom, pagination can stop working even though we haven't got
* all results. e.g.
* Results from partition 1: [101 91 81], minSearchedStatusId is 81
* Results from Partition 2: [102 92], minSearchedStatusId is 92, not early terminated.
*
* After merge, we get [102, 101, 92], with minResultId == 92. Since results from
* partition 2 is not early terminated, 92 is the tier bottom here. Since results are
* filtered, early termination for merged result is set to true, so blender will call again,
* with maxDocId == 91. This time we get result:
* Results from partition 1: [91 81], minSearchedStatusId is 81
* Results from partition 2: [], minSearchedStatusId is still 92
* After merge we get [] and minSearchedStatusId is still 92. No progress can be made on
* pagination and clients get stuck.
*
* So in this case, we clear the early termination flag to tell blender there is no more
* result in this tier. Tweets below tier bottom will be missed, but that also happens
* without this step, as the next pagination call will return empty results anyway.
* So even if there is NOT overlap between tiers, this is still better.
*
* Return true if early termination is cleared due to this, otherwise return false.
* To be safe, we do nothing here to keep existing behavior and only override it in
* StrictRecencyResponseMerger.
*/
protected boolean clearEarlyTerminationIfReachingTierBottom(EarlybirdResponse mergedResponse) {
return false;
}
/**
* Determines if the merged response should be early-terminated when it has exactly as many
* trimmed results as requested, as is not early-terminated because of other reasons.
*/
protected boolean shouldEarlyTerminateWhenEnoughTrimmedResults() {
return true;
}
/**
* If the end results were trimmed in any way, reflect that in the response as a query that was
* early terminated. A response can be either (1) truncated because we merged more results than
* what was asked for with numResults, or (2) we filtered results that were smaller than the
* merged minSearchedStatusId.
*
* @param mergedResponse the merged response.
* @param trimStats trim stats for this merge.
*/
private void setEarlyTerminationForTrimmedResults(
EarlybirdResponse mergedResponse,
TrimStats trimStats) {
responseMessageBuilder.debugVerbose("Checking for merge trimming, trimStats %s", trimStats);
EarlyTerminationTrimmingStats stats = getEarlyTerminationTrimmingStats();
EarlyTerminationInfo earlyTerminationInfo = mergedResponse.getEarlyTerminationInfo();
Preconditions.checkNotNull(earlyTerminationInfo);
if (!earlyTerminationInfo.isEarlyTerminated()) {
if (trimStats.getMinIdFilterCount() > 0 || trimStats.getResultsTruncatedFromTailCount() > 0) {
responseMessageBuilder.debugVerbose("Setting early termination, trimStats: %s, results: %s",
trimStats, mergedResponse);
earlyTerminationInfo.setEarlyTerminated(true);
addEarlyTerminationReasons(earlyTerminationInfo, trimStats);
if (trimStats.getMinIdFilterCount() > 0
&& trimStats.getResultsTruncatedFromTailCount() > 0) {
stats.getCounterFor(FILTERED_AND_TRUNCATED).increment();
} else if (trimStats.getMinIdFilterCount() > 0) {
stats.getCounterFor(FILTERED).increment();
} else if (trimStats.getResultsTruncatedFromTailCount() > 0) {
stats.getCounterFor(TRUNCATED).increment();
} else {
Preconditions.checkState(false, "Invalid TrimStats: %s", trimStats);
}
} else if ((computeNumResultsToKeep() == mergedResponse.getSearchResults().getResultsSize())
&& shouldEarlyTerminateWhenEnoughTrimmedResults()) {
earlyTerminationInfo.setEarlyTerminated(true);
earlyTerminationInfo.addToMergedEarlyTerminationReasons(
TERMINATED_COLLECTED_ENOUGH_RESULTS);
stats.getCounterFor(TERMINATED_GOT_EXACT_NUM_RESULTS).increment();
} else {
stats.getCounterFor(NOT_EARLY_TERMINATED).increment();
}
} else {
stats.getCounterFor(ALREADY_EARLY_TERMINATED).increment();
// Even if the results were already marked as early terminated, we can add additional
// reasons for debugging (if the merged results were filtered or truncated).
addEarlyTerminationReasons(earlyTerminationInfo, trimStats);
}
}
private void addEarlyTerminationReasons(
EarlyTerminationInfo earlyTerminationInfo,
TrimStats trimStats) {
if (trimStats.getMinIdFilterCount() > 0) {
earlyTerminationInfo.addToMergedEarlyTerminationReasons(
MERGING_EARLY_TERMINATION_REASON_FILTERED);
}
if (trimStats.getResultsTruncatedFromTailCount() > 0) {
earlyTerminationInfo.addToMergedEarlyTerminationReasons(
MERGING_EARLY_TERMINATION_REASON_TRUNCATED);
}
}
private EarlyTerminationTrimmingStats getEarlyTerminationTrimmingStats() {
if (accumulatedResponses.isMergingPartitionsWithinATier()) {
return getEarlyTerminationTrimmingStatsForPartitions();
} else {
return getEarlyTerminationTrimmingStatsForTiers();
}
}
protected EarlyTerminationTrimmingStats getEarlyTerminationTrimmingStatsForPartitions() {
return PARTITION_MERGING_EARLY_TERMINATION_TRIMMING_STATS;
}
protected EarlyTerminationTrimmingStats getEarlyTerminationTrimmingStatsForTiers() {
return TIER_MERGING_EARLY_TERMINATION_TRIMMING_STATS;
}
/**
* If we get enough results, no need to go on.
* If one of the partitions early terminated, we can't go on or else there could be a gap.
*/
@Override
public boolean shouldEarlyTerminateTierMerge(int totalResultsFromSuccessfulShards,
boolean foundEarlyTermination) {
int resultsRequested = computeNumResultsToKeep();
boolean shouldEarlyTerminate = foundEarlyTermination
|| totalResultsFromSuccessfulShards >= resultsRequested;
if (shouldEarlyTerminate && totalResultsFromSuccessfulShards < resultsRequested) {
RECENCY_TIER_MERGE_EARLY_TERMINATED_WITH_NOT_ENOUGH_RESULTS.increment();
}
return shouldEarlyTerminate;
}
/**
* Find the min status id that has been _completely_ searched across all partitions. The
* largest min status id across all partitions.
*
* @return the min searched status id found
*/
protected long findMinFullySearchedStatusID() {
List<Long> minIds = accumulatedResponses.getMinIds();
if (minIds.isEmpty()) {
return Long.MIN_VALUE;
}
if (accumulatedResponses.isMergingPartitionsWithinATier()) {
// When merging partitions, the min ID should be the largest among the min IDs.
return Collections.max(accumulatedResponses.getMinIds());
} else {
// When merging tiers, the min ID should be the smallest among the min IDs.
return Collections.min(accumulatedResponses.getMinIds());
}
}
/**
* Find the max status id that has been _completely_ searched across all partitions. The
* smallest max status id across all partitions.
*
* This is where we reconcile replication lag by selecting the oldest maxid from the
* partitions searched.
*
* @return the max searched status id found
*/
protected long findMaxFullySearchedStatusID() {
List<Long> maxIDs = accumulatedResponses.getMaxIds();
if (maxIDs.isEmpty()) {
return Long.MAX_VALUE;
}
Collections.sort(maxIDs);
final long newest = maxIDs.get(maxIDs.size() - 1);
final long newestTimestamp = SnowflakeIdParser.getTimestampFromTweetId(newest);
for (int i = 0; i < maxIDs.size(); i++) {
long oldest = maxIDs.get(i);
long oldestTimestamp = SnowflakeIdParser.getTimestampFromTweetId(oldest);
long deltaMs = newestTimestamp - oldestTimestamp;
if (i == 0) {
LOG.debug("Max delta is {}", deltaMs);
}
if (deltaMs < ALLOWED_REPLICATION_LAG_MS) {
if (i != 0) {
LOG.debug("{} partition replicas lagging more than {} ms", i, ALLOWED_REPLICATION_LAG_MS);
}
return oldest;
}
}
// Can't get here - by this point oldest == newest, and delta is 0.
return newest;
}
/**
* Trim the ThriftSearchResults if we have enough results, to return the first
* 'computeNumResultsToKeep()' number of results.
*
* If we don't have enough results after trimming, this function will first try to back fill
* older results, then newer results
*
* @param searchResults ThriftSearchResults that hold the to be trimmed List<ThriftSearchResult>
* @return TrimStats containing statistics about how many results being removed
*/
protected TrimStats trimResults(
ThriftSearchResults searchResults,
long mergedMin,
long mergedMax) {
if (!searchResults.isSetResults() || searchResults.getResultsSize() == 0) {
// no results, no trimming needed
return TrimStats.EMPTY_STATS;
}
if (requestContext.getRequest().getSearchQuery().isSetSearchStatusIds()) {
// Not a normal search, no trimming needed
return TrimStats.EMPTY_STATS;
}
TrimStats trimStats = new TrimStats();
trimExactDups(searchResults, trimStats);
int numResultsRequested = computeNumResultsToKeep();
if (shouldSkipTrimmingWhenNotEnoughResults(searchResults, numResultsRequested)) {
//////////////////////////////////////////////////////////
// We don't have enough results, let's not do trimming
//////////////////////////////////////////////////////////
return trimStats;
}
if (accumulatedResponses.isMergingPartitionsWithinATier()) {
trimResultsBasedSearchedRange(
searchResults, trimStats, numResultsRequested, mergedMin, mergedMax);
}
// Respect "computeNumResultsToKeep()" here, only keep "computeNumResultsToKeep()" results.
truncateResults(searchResults, trimStats);
return trimStats;
}
/**
* When there's not enough results, we don't remove results based on the searched range.
* This has a tradeoff: with this, we don't reduce our recall when we already don't have enough
* results. However, with this, we can lose results while paginating because we return results
* outside of the valid searched range.
*/
protected boolean shouldSkipTrimmingWhenNotEnoughResults(
ThriftSearchResults searchResults, int numResultsRequested) {
return searchResults.getResultsSize() <= numResultsRequested;
}
/**
* Trim results based on search range. The search range [x, y] is determined by:
* x is the maximun of the minimun search IDs;
* y is the minimun of the maximum search IDs.
*
* Ids out side of this range are removed.
* If we do not get enough results after the removal, we add IDs back until we get enough results.
* We first add IDs back from the older side back. If there's still not enough results,
* we start adding IDs from the newer side back.
*/
private void trimResultsBasedSearchedRange(ThriftSearchResults searchResults,
TrimStats trimStats,
int numResultsRequested,
long mergedMin,
long mergedMax) {
///////////////////////////////////////////////////////////////////
// we have more results than requested, let's do some trimming
///////////////////////////////////////////////////////////////////
// Save the original results before trimming
List<ThriftSearchResult> originalResults = searchResults.getResults();
filterResultsByMergedMinMaxIds(searchResults, mergedMax, mergedMin, trimStats);
// This does happen. It is hard to say what we should do here so we just return the original
// result here.
if (searchResults.getResultsSize() == 0) {
RECENCY_ZERO_RESULT_COUNT_AFTER_FILTERING_MAX_MIN_IDS.increment();
searchResults.setResults(originalResults);
// Clean up min/mix filtered count, since we're bringing back whatever we just filtered.
trimStats.clearMaxIdFilterCount();
trimStats.clearMinIdFilterCount();
if (LOG.isDebugEnabled() || responseMessageBuilder.isDebugMode()) {
String errMsg = "No trimming is done as filtered results is empty. "
+ "maxId=" + mergedMax + ",minId=" + mergedMin;
LOG.debug(errMsg);
responseMessageBuilder.append(errMsg + "\n");
}
} else {
// oops! we're trimming too many results. Let's put some back
if (searchResults.getResultsSize() < numResultsRequested) {
RECENCY_TRIMMED_TOO_MANY_RESULTS_COUNT.increment();
List<ThriftSearchResult> trimmedResults = searchResults.getResults();
long firstTrimmedResultId = trimmedResults.get(0).getId();
long lastTrimmedResultId = trimmedResults.get(trimmedResults.size() - 1).getId();
// First, try to back fill with older results
int i = 0;
for (; i < originalResults.size(); ++i) {
ThriftSearchResult result = originalResults.get(i);
if (result.getId() < lastTrimmedResultId) {
trimmedResults.add(result);
trimStats.decreaseMinIdFilterCount();
if (trimmedResults.size() >= numResultsRequested) {
break;
}
}
}
// still not enough results? back fill with newer results
// find the oldest of the newer results
if (trimmedResults.size() < numResultsRequested) {
// still not enough results? back fill with newer results
// find the oldest of the newer results
for (i = originalResults.size() - 1; i >= 0; --i) {
ThriftSearchResult result = originalResults.get(i);
if (result.getId() > firstTrimmedResultId) {
trimmedResults.add(result);
trimStats.decreaseMaxIdFilterCount();
if (trimmedResults.size() >= numResultsRequested) {
break;
}
}
}
// newer results were added to the back of the list, re-sort
Collections.sort(trimmedResults, ResultComparators.ID_COMPARATOR);
}
}
}
}
protected void setMergedMinSearchedStatusId(
ThriftSearchResults searchResults,
long currentMergedMin,
boolean resultsWereTrimmed) {
if (accumulatedResponses.getMinIds().isEmpty()) {
return;
}
long merged;
if (searchResults == null
|| !searchResults.isSetResults()
|| searchResults.getResultsSize() == 0) {
merged = currentMergedMin;
} else {
List<ThriftSearchResult> results = searchResults.getResults();
long firstResultId = results.get(0).getId();
long lastResultId = results.get(results.size() - 1).getId();
merged = Math.min(firstResultId, lastResultId);
if (!resultsWereTrimmed) {
// If the results were trimmed, we want to set minSearchedStatusID to the smallest
// tweet ID in the response. Otherwise, we want to take the min between that, and
// the current minSearchedStatusID.
merged = Math.min(merged, currentMergedMin);
}
}
searchResults.setMinSearchedStatusID(merged);
}
private void setMergedMaxSearchedStatusId(
ThriftSearchResults searchResults,
long currentMergedMax) {
if (accumulatedResponses.getMaxIds().isEmpty()) {
return;
}
long merged;
if (searchResults == null
|| !searchResults.isSetResults()
|| searchResults.getResultsSize() == 0) {
merged = currentMergedMax;
} else {
List<ThriftSearchResult> results = searchResults.getResults();
long firstResultId = results.get(0).getId();
long lastResultId = results.get(results.size() - 1).getId();
long maxResultId = Math.max(firstResultId, lastResultId);
merged = Math.max(maxResultId, currentMergedMax);
}
searchResults.setMaxSearchedStatusID(merged);
}
protected static void filterResultsByMergedMinMaxIds(
ThriftSearchResults results, long maxStatusId, long minStatusId, TrimStats trimStats) {
List<ThriftSearchResult> trimedResults =
Lists.newArrayListWithCapacity(results.getResultsSize());
for (ThriftSearchResult result : results.getResults()) {
long statusId = result.getId();
if (statusId > maxStatusId) {
trimStats.increaseMaxIdFilterCount();
} else if (statusId < minStatusId) {
trimStats.increaseMinIdFilterCount();
} else {
trimedResults.add(result);
}
}
results.setResults(trimedResults);
}
}

View File

@ -1,268 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.search.common.constants.thriftjava.ThriftLanguage;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.metrics.SearchTimerStats;
import com.twitter.search.common.util.earlybird.EarlybirdResponseUtil;
import com.twitter.search.common.util.earlybird.ResultsUtil;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
import com.twitter.search.earlybird.thrift.ThriftSearchRankingMode;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.search.earlybird_root.collectors.RelevanceMergeCollector;
import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
/**
* Merger class to merge relevance search EarlybirdResponse objects
*/
public class RelevanceResponseMerger extends EarlybirdResponseMerger {
private static final Logger LOG = LoggerFactory.getLogger(RelevanceResponseMerger.class);
private static final SearchTimerStats TIMER =
SearchTimerStats.export("merge_relevance", TimeUnit.NANOSECONDS, false, true);
private static final SearchCounter RELVEANCE_TIER_MERGE_EARLY_TERMINATED_WITH_NOT_ENOUGH_RESULTS =
SearchCounter.export("merger_relevance_tier_merge_early_terminated_with_not_enough_results");
private static final String PARTITION_NUM_RESULTS_COUNTER_SKIP_STATS =
"merger_relevance_post_trimmed_results_skip_stat_tier_%s_partition_%d";
@VisibleForTesting
public static final String PARTITION_NUM_RESULTS_COUNTER_NAME_FORMAT =
"merger_relevance_post_trimmed_results_from_tier_%s_partition_%d";
protected static final Function<EarlybirdResponse, Map<ThriftLanguage, Integer>> LANG_MAP_GETTER =
response -> response.getSearchResults() == null
? null
: response.getSearchResults().getLanguageHistogram();
private static final double SUCCESSFUL_RESPONSE_THRESHOLD = 0.8;
private final EarlybirdFeatureSchemaMerger featureSchemaMerger;
// The number of partitions are not meaningful when it is invoked through multi-tier merging.
private final int numPartitions;
public RelevanceResponseMerger(EarlybirdRequestContext requestContext,
List<Future<EarlybirdResponse>> responses,
ResponseAccumulator mode,
EarlybirdFeatureSchemaMerger featureSchemaMerger,
int numPartitions) {
super(requestContext, responses, mode);
this.featureSchemaMerger = Preconditions.checkNotNull(featureSchemaMerger);
this.numPartitions = numPartitions;
}
@Override
protected double getDefaultSuccessResponseThreshold() {
return SUCCESSFUL_RESPONSE_THRESHOLD;
}
@Override
protected SearchTimerStats getMergedResponseTimer() {
return TIMER;
}
@Override
protected EarlybirdResponse internalMerge(EarlybirdResponse mergedResponse) {
final ThriftSearchQuery searchQuery = requestContext.getRequest().getSearchQuery();
long maxId = findMaxFullySearchedStatusID();
long minId = findMinFullySearchedStatusID();
Preconditions.checkNotNull(searchQuery);
Preconditions.checkState(searchQuery.isSetRankingMode());
Preconditions.checkState(searchQuery.getRankingMode() == ThriftSearchRankingMode.RELEVANCE);
// First get the results in score order (the default comparator for this merge collector).
RelevanceMergeCollector collector = new RelevanceMergeCollector(responses.size());
int totalResultSize = addResponsesToCollector(collector);
ThriftSearchResults searchResults = collector.getAllSearchResults();
TrimStats trimStats = trimResults(searchResults);
featureSchemaMerger.collectAndSetFeatureSchemaInResponse(
searchResults,
requestContext,
"merger_relevance_tier",
accumulatedResponses.getSuccessResponses());
mergedResponse.setSearchResults(searchResults);
searchResults = mergedResponse.getSearchResults();
searchResults
.setHitCounts(aggregateHitCountMap())
.setLanguageHistogram(aggregateLanguageHistograms());
if (!accumulatedResponses.getMaxIds().isEmpty()) {
searchResults.setMaxSearchedStatusID(maxId);
}
if (!accumulatedResponses.getMinIds().isEmpty()) {
searchResults.setMinSearchedStatusID(minId);
}
LOG.debug("Hits: {} Removed duplicates: {}", totalResultSize, trimStats.getRemovedDupsCount());
LOG.debug("Hash Partition'ed Earlybird call completed successfully: {}", mergedResponse);
publishNumResultsFromPartitionStatistics(mergedResponse);
return mergedResponse;
}
/**
* If any of the partitions has an early termination, the tier merge must also early terminate.
*
* If a partition early terminated (we haven't fully searched that partition), and we instead
* moved onto the next tier, there will be a gap of unsearched results.
*
* If our early termination condition was only if we had enough results, we could get bad quality
* results by only looking at 20 hits when asking for 20 results.
*/
@Override
public boolean shouldEarlyTerminateTierMerge(int totalResultsFromSuccessfulShards,
boolean foundEarlyTermination) {
// Don't use computeNumResultsToKeep because if returnAllResults is true, it will be
// Integer.MAX_VALUE and we will always log a stat that we didn't get enough results
int resultsRequested;
EarlybirdRequest request = requestContext.getRequest();
if (request.isSetNumResultsToReturnAtRoot()) {
resultsRequested = request.getNumResultsToReturnAtRoot();
} else {
resultsRequested = request.getSearchQuery().getCollectorParams().getNumResultsToReturn();
}
if (foundEarlyTermination && totalResultsFromSuccessfulShards < resultsRequested) {
RELVEANCE_TIER_MERGE_EARLY_TERMINATED_WITH_NOT_ENOUGH_RESULTS.increment();
}
return foundEarlyTermination;
}
/**
* Merge language histograms from all queries.
*
* @return Merge per-language count map.
*/
private Map<ThriftLanguage, Integer> aggregateLanguageHistograms() {
Map<ThriftLanguage, Integer> totalLangCounts = new TreeMap<>(
ResultsUtil.aggregateCountMap(
accumulatedResponses.getSuccessResponses(), LANG_MAP_GETTER));
if (totalLangCounts.size() > 0) {
if (responseMessageBuilder.isDebugMode()) {
responseMessageBuilder.append("Language Distrbution:\n");
int count = 0;
for (Map.Entry<ThriftLanguage, Integer> entry : totalLangCounts.entrySet()) {
responseMessageBuilder.append(
String.format(" %10s:%6d", entry.getKey(), entry.getValue()));
if (++count % 5 == 0) {
responseMessageBuilder.append("\n");
}
}
responseMessageBuilder.append("\n");
}
}
return totalLangCounts;
}
/**
* Find the min status id that has been searched. Since no results are trimmed for Relevance mode,
* it should be the smallest among the min IDs.
*/
private long findMinFullySearchedStatusID() {
// The min ID should be the smallest among the min IDs
return accumulatedResponses.getMinIds().isEmpty() ? 0
: Collections.min(accumulatedResponses.getMinIds());
}
/**
* Find the max status id that has been searched. Since no results are trimmed for Relevance mode,
* it should be the largest among the max IDs.
*/
private long findMaxFullySearchedStatusID() {
// The max ID should be the largest among the max IDs
return accumulatedResponses.getMaxIds().isEmpty() ? 0
: Collections.max(accumulatedResponses.getMaxIds());
}
/**
* Return all the searchResults except duplicates.
*
* @param searchResults ThriftSearchResults that hold the to be trimmed List<ThriftSearchResult>
* @return TrimStats containing statistics about how many results being removed
*/
private TrimStats trimResults(ThriftSearchResults searchResults) {
if (!searchResults.isSetResults() || searchResults.getResultsSize() == 0) {
// no results, no trimming needed
return TrimStats.EMPTY_STATS;
}
if (requestContext.getRequest().getSearchQuery().isSetSearchStatusIds()) {
// Not a normal search, no trimming needed
return TrimStats.EMPTY_STATS;
}
TrimStats trimStats = new TrimStats();
trimExactDups(searchResults, trimStats);
truncateResults(searchResults, trimStats);
return trimStats;
}
private void publishNumResultsFromPartitionStatistics(EarlybirdResponse mergedResponse) {
// Keep track of all of the results that were kept after merging
Set<Long> mergedResults =
EarlybirdResponseUtil.getResults(mergedResponse).getResults()
.stream()
.map(result -> result.getId())
.collect(Collectors.toSet());
// For each successful response (pre merge), count how many of its results were kept post merge.
// Increment the appropriate stat.
for (EarlybirdResponse response : accumulatedResponses.getSuccessResponses()) {
if (!response.isSetEarlybirdServerStats()) {
continue;
}
int numResultsKept = 0;
for (ThriftSearchResult result
: EarlybirdResponseUtil.getResults(response).getResults()) {
if (mergedResults.contains(result.getId())) {
++numResultsKept;
}
}
// We only update partition stats when the partition ID looks sane.
String tierName = response.getEarlybirdServerStats().getTierName();
int partition = response.getEarlybirdServerStats().getPartition();
if (partition >= 0 && partition < numPartitions) {
SearchCounter.export(String.format(PARTITION_NUM_RESULTS_COUNTER_NAME_FORMAT,
tierName,
partition))
.add(numResultsKept);
} else {
SearchCounter.export(String.format(PARTITION_NUM_RESULTS_COUNTER_SKIP_STATS,
tierName,
partition)).increment();
}
}
}
}

View File

@ -1,356 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.ArrayList;
import java.util.EnumMap;
import java.util.List;
import java.util.Map;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.util.earlybird.ResponseMergerUtils;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
/**
* Accumulates EarlybirdResponse's and determines when to early terminate.
*/
public abstract class ResponseAccumulator {
@VisibleForTesting
static class MinMaxSearchedIdStats {
/** How many results did we actually check */
private final SearchCounter checkedMaxMinSearchedStatusId;
private final SearchCounter unsetMaxSearchedStatusId;
private final SearchCounter unsetMinSearchedStatusId;
private final SearchCounter unsetMaxAndMinSearchedStatusId;
private final SearchCounter sameMinMaxSearchedIdWithoutResults;
private final SearchCounter sameMinMaxSearchedIdWithOneResult;
private final SearchCounter sameMinMaxSearchedIdWithResults;
private final SearchCounter flippedMinMaxSearchedId;
MinMaxSearchedIdStats(EarlybirdRequestType requestType) {
String statPrefix = "merge_helper_" + requestType.getNormalizedName();
checkedMaxMinSearchedStatusId = SearchCounter.export(statPrefix
+ "_max_min_searched_id_checks");
unsetMaxSearchedStatusId = SearchCounter.export(statPrefix
+ "_unset_max_searched_status_id");
unsetMinSearchedStatusId = SearchCounter.export(statPrefix
+ "_unset_min_searched_status_id");
unsetMaxAndMinSearchedStatusId = SearchCounter.export(statPrefix
+ "_unset_max_and_min_searched_status_id");
sameMinMaxSearchedIdWithoutResults = SearchCounter.export(statPrefix
+ "_same_min_max_searched_id_without_results");
sameMinMaxSearchedIdWithOneResult = SearchCounter.export(statPrefix
+ "_same_min_max_searched_id_with_one_results");
sameMinMaxSearchedIdWithResults = SearchCounter.export(statPrefix
+ "_same_min_max_searched_id_with_results");
flippedMinMaxSearchedId = SearchCounter.export(statPrefix
+ "_flipped_min_max_searched_id");
}
@VisibleForTesting
SearchCounter getCheckedMaxMinSearchedStatusId() {
return checkedMaxMinSearchedStatusId;
}
@VisibleForTesting
SearchCounter getFlippedMinMaxSearchedId() {
return flippedMinMaxSearchedId;
}
@VisibleForTesting
SearchCounter getUnsetMaxSearchedStatusId() {
return unsetMaxSearchedStatusId;
}
@VisibleForTesting
SearchCounter getUnsetMinSearchedStatusId() {
return unsetMinSearchedStatusId;
}
@VisibleForTesting
SearchCounter getUnsetMaxAndMinSearchedStatusId() {
return unsetMaxAndMinSearchedStatusId;
}
@VisibleForTesting
SearchCounter getSameMinMaxSearchedIdWithoutResults() {
return sameMinMaxSearchedIdWithoutResults;
}
@VisibleForTesting
SearchCounter getSameMinMaxSearchedIdWithOneResult() {
return sameMinMaxSearchedIdWithOneResult;
}
@VisibleForTesting
SearchCounter getSameMinMaxSearchedIdWithResults() {
return sameMinMaxSearchedIdWithResults;
}
}
@VisibleForTesting
static final Map<EarlybirdRequestType, MinMaxSearchedIdStats> MIN_MAX_SEARCHED_ID_STATS_MAP;
static {
EnumMap<EarlybirdRequestType, MinMaxSearchedIdStats> statsMap
= Maps.newEnumMap(EarlybirdRequestType.class);
for (EarlybirdRequestType earlybirdRequestType : EarlybirdRequestType.values()) {
statsMap.put(earlybirdRequestType, new MinMaxSearchedIdStats(earlybirdRequestType));
}
MIN_MAX_SEARCHED_ID_STATS_MAP = Maps.immutableEnumMap(statsMap);
}
// Merge has encountered at least one early terminated response.
private boolean foundEarlyTermination = false;
// Empty but successful response counter (E.g. when a tier or partition is skipped)
private int successfulEmptyResponseCount = 0;
// The list of the successful responses from all earlybird futures. This does not include empty
// responses resulted from null requests.
private final List<EarlybirdResponse> successResponses = new ArrayList<>();
// The list of the error responses from all earlybird futures.
private final List<EarlybirdResponse> errorResponses = new ArrayList<>();
// the list of max statusIds seen in each earlybird.
private final List<Long> maxIds = new ArrayList<>();
// the list of min statusIds seen in each earlybird.
private final List<Long> minIds = new ArrayList<>();
private int numResponses = 0;
private int numResultsAccumulated = 0;
private int numSearchedSegments = 0;
/**
* Returns a string that can be used for logging to identify a single response out of all the
* responses that are being merged.
*
* @param responseIndex the index of a response's partition or tier, depending on the type of
* responses being accumulated.
* @param numTotalResponses the total number of partitions or tiers that are being merged.
*/
public abstract String getNameForLogging(int responseIndex, int numTotalResponses);
/**
* Returns a string that is used to export per-EarlybirdResponseCode stats for partitions and tiers.
*
* @param responseIndex the index of of a response's partition or tier.
* @param numTotalResponses the total number of partitions or tiers that are being merged.
* @return a string that is used to export per-EarlybirdResponseCode stats for partitions and tiers.
*/
public abstract String getNameForEarlybirdResponseCodeStats(
int responseIndex, int numTotalResponses);
abstract boolean shouldEarlyTerminateMerge(EarlyTerminateTierMergePredicate merger);
/**
* Add a EarlybirdResponse
*/
public void addResponse(EarlybirdResponseDebugMessageBuilder responseMessageBuilder,
EarlybirdRequest request,
EarlybirdResponse response) {
numResponses++;
numSearchedSegments += response.getNumSearchedSegments();
if (isSkippedResponse(response)) {
// This is an empty response, no processing is required, just need to update statistics.
successfulEmptyResponseCount++;
handleSkippedResponse(response.getResponseCode());
} else if (isErrorResponse(response)) {
errorResponses.add(response);
handleErrorResponse(response);
} else {
handleSuccessfulResponse(responseMessageBuilder, request, response);
}
}
private boolean isErrorResponse(EarlybirdResponse response) {
return !response.isSetResponseCode()
|| response.getResponseCode() != EarlybirdResponseCode.SUCCESS;
}
private boolean isSkippedResponse(EarlybirdResponse response) {
return response.isSetResponseCode()
&& (response.getResponseCode() == EarlybirdResponseCode.PARTITION_SKIPPED
|| response.getResponseCode() == EarlybirdResponseCode.TIER_SKIPPED);
}
/**
* Record a response corresponding to a skipped partition or skipped tier.
*/
protected abstract void handleSkippedResponse(EarlybirdResponseCode responseCode);
/**
* Handle an error response
*/
protected abstract void handleErrorResponse(EarlybirdResponse response);
/**
* Subclasses can override this to perform more successful response handling.
*/
protected void extraSuccessfulResponseHandler(EarlybirdResponse response) { }
/**
* Whether the helper is for merging results from partitions within a single tier.
*/
protected final boolean isMergingPartitionsWithinATier() {
return !isMergingAcrossTiers();
}
/**
* Whether the helper is for merging results across different tiers.
*/
protected abstract boolean isMergingAcrossTiers();
/**
* Record a successful response.
*/
public final void handleSuccessfulResponse(
EarlybirdResponseDebugMessageBuilder responseMessageBuilder,
EarlybirdRequest request,
EarlybirdResponse response) {
successResponses.add(response);
if (response.isSetSearchResults()) {
ThriftSearchResults searchResults = response.getSearchResults();
numResultsAccumulated += searchResults.getResultsSize();
recordMinMaxSearchedIdsAndUpdateStats(responseMessageBuilder, request, response,
searchResults);
}
if (response.isSetEarlyTerminationInfo()
&& response.getEarlyTerminationInfo().isEarlyTerminated()) {
foundEarlyTermination = true;
}
extraSuccessfulResponseHandler(response);
}
private void recordMinMaxSearchedIdsAndUpdateStats(
EarlybirdResponseDebugMessageBuilder responseMessageBuidler,
EarlybirdRequest request,
EarlybirdResponse response,
ThriftSearchResults searchResults) {
boolean isMaxIdSet = searchResults.isSetMaxSearchedStatusID();
boolean isMinIdSet = searchResults.isSetMinSearchedStatusID();
if (isMaxIdSet) {
maxIds.add(searchResults.getMaxSearchedStatusID());
}
if (isMinIdSet) {
minIds.add(searchResults.getMinSearchedStatusID());
}
updateMinMaxIdStats(responseMessageBuidler, request, response, searchResults, isMaxIdSet,
isMinIdSet);
}
private void updateMinMaxIdStats(
EarlybirdResponseDebugMessageBuilder responseMessageBuilder,
EarlybirdRequest request,
EarlybirdResponse response,
ThriftSearchResults searchResults,
boolean isMaxIdSet,
boolean isMinIdSet) {
// Now just track the stats.
EarlybirdRequestType requestType = EarlybirdRequestType.of(request);
MinMaxSearchedIdStats minMaxSearchedIdStats = MIN_MAX_SEARCHED_ID_STATS_MAP.get(requestType);
minMaxSearchedIdStats.checkedMaxMinSearchedStatusId.increment();
if (isMaxIdSet && isMinIdSet) {
if (searchResults.getMinSearchedStatusID() > searchResults.getMaxSearchedStatusID()) {
// We do not expect this case to happen in production.
minMaxSearchedIdStats.flippedMinMaxSearchedId.increment();
} else if (searchResults.getResultsSize() == 0
&& searchResults.getMaxSearchedStatusID() == searchResults.getMinSearchedStatusID()) {
minMaxSearchedIdStats.sameMinMaxSearchedIdWithoutResults.increment();
responseMessageBuilder.debugVerbose(
"Got no results, and same min/max searched ids. Request: %s, Response: %s",
request, response);
} else if (searchResults.getResultsSize() == 1
&& searchResults.getMaxSearchedStatusID() == searchResults.getMinSearchedStatusID()) {
minMaxSearchedIdStats.sameMinMaxSearchedIdWithOneResult.increment();
responseMessageBuilder.debugVerbose(
"Got one results, and same min/max searched ids. Request: %s, Response: %s",
request, response);
} else if (searchResults.getMaxSearchedStatusID()
== searchResults.getMinSearchedStatusID()) {
minMaxSearchedIdStats.sameMinMaxSearchedIdWithResults.increment();
responseMessageBuilder.debugVerbose(
"Got multiple results, and same min/max searched ids. Request: %s, Response: %s",
request, response);
}
} else if (!isMaxIdSet && isMinIdSet) {
// We do not expect this case to happen in production.
minMaxSearchedIdStats.unsetMaxSearchedStatusId.increment();
responseMessageBuilder.debugVerbose(
"Got unset maxSearchedStatusID. Request: %s, Response: %s", request, response);
} else if (isMaxIdSet && !isMinIdSet) {
// We do not expect this case to happen in production.
minMaxSearchedIdStats.unsetMinSearchedStatusId.increment();
responseMessageBuilder.debugVerbose(
"Got unset minSearchedStatusID. Request: %s, Response: %s", request, response);
} else {
Preconditions.checkState(!isMaxIdSet && !isMinIdSet);
minMaxSearchedIdStats.unsetMaxAndMinSearchedStatusId.increment();
responseMessageBuilder.debugVerbose(
"Got unset maxSearchedStatusID and minSearchedStatusID. Request: %s, Response: %s",
request, response);
}
}
/**
* Return partition counts with number of partitions, number of successful responses, and list of
* responses per tier.
*/
public abstract AccumulatedResponses.PartitionCounts getPartitionCounts();
public final AccumulatedResponses getAccumulatedResults() {
return new AccumulatedResponses(successResponses,
errorResponses,
maxIds,
minIds,
ResponseMergerUtils.mergeEarlyTerminationInfo(successResponses),
isMergingAcrossTiers(),
getPartitionCounts(),
getNumSearchedSegments());
}
// Getters are only intended to be used by subclasses. Other users should get data from
// AccumulatedResponses
int getNumResponses() {
return numResponses;
}
int getNumSearchedSegments() {
return numSearchedSegments;
}
List<EarlybirdResponse> getSuccessResponses() {
return successResponses;
}
int getNumResultsAccumulated() {
return numResultsAccumulated;
}
int getSuccessfulEmptyResponseCount() {
return successfulEmptyResponseCount;
}
boolean foundError() {
return !errorResponses.isEmpty();
}
boolean foundEarlyTermination() {
return foundEarlyTermination;
}
}

View File

@ -1,297 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.TimeUnit;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.twitter.search.common.metrics.SearchTimerStats;
import com.twitter.search.common.schema.earlybird.EarlybirdCluster;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
/**
* A RecencyResponseMerger that prioritizes not losing results during pagination.
* As of now, this merger is used by Gnip to make sure that scrolling returns all results.
*
* The logic used for merging partitions is a bit tricky, because on one hand, we want to make sure
* that we do miss results on the next pagination request; on the other hand, we want to return as
* many results as we can, and we want to set the minSearchedStatusID of the merged response as low
* as we can, in order to minimize the number of pagination requests.
*
* The merging logic is:
*
* Realtime cluster:
* 1. merge results from all partitions
* 2. if at least one partition response is early-terminated, set earlyTerminated = true
* on the merged response
* 3. set trimmingMinId = max(minSearchedStatusIDs of all partition responses)
* 4. trim all results to trimmingMinId
* 5. set minSearchedStatusID on the merged response to trimmingMinId
* 6. if we have more than numRequested results:
* - keep only the newest numRequested results
* - set minSearchedStatusID of the merged response to the lowest tweet ID in the response
* 7. if at least one partition response is not early-terminated, set
* tierBottomId = max(minSearchedStatusIDs of all non-early-terminated responses)
* (otherwise, set tierBottomId to some undefined value: -1, Long.MAX_VALUE, etc.)
* 8. if minSearchedStatusID of the merged response is the same as tierBottomId,
* clear the early-termination flag on the merged response
*
* The logic in steps 7 and 8 can be a little tricky to understand. They basically say: when we've
* exhausted the "least deep" partition in the realtime cluster, it's time to move to the full
* archive cluster (if we keep going past the "least deep" partition, we might miss results).
*
* Full archive cluster:
* 1. merge results from all partitions
* 2. if at least one partition response is early-terminated, set earlyTerminated = true
* on the merged response
* 3. set trimmingMinId to:
* - max(minSearchedStatusIDs of early-terminated responses), if at least one partition response
* is early-terminated
* - min(minSearchedStatusIDs of all responses), if all partition responses are not
* early-terminated
* 4. trim all results to trimmingMinId
* 5. set minSearchedStatusID of the merged response to trimmingMinId
* 6. if we have more than numRequested results:
* - keep only the newest numRequested results
* - set minSearchedStatusID of the merged response to the lowest tweet ID in the response
*
* The logic in step 3 can be a little tricky to understand. On one hand, if we always set
* trimmingMinId to the highest minSearchedStatusID, then some tweets at the very bottom of some
* partitions will never be returned. Consider the case:
*
* partition 1 has tweets 10, 8, 6
* partition 2 has tweets 9, 7, 5
*
* In this case, we would always trim all results to minId = 6, and tweet 5 would never be returned.
*
* On the other hand, if we always set trimmingMinId to the lowest minSearchedStatusID, then we
* might miss tweets from partitions that early-terminated. Consider the case:
*
* partition 1 has tweets 10, 5, 3, 1 that match our query
* partition 2 has tweets 9, 8, 7, 6, 2 that match our query
*
* If we ask for 3 results, than partition 1 will return tweets 10, 5, 3, and partition 2 will
* return tweets 9, 8, 7. If we set trimmingMinId = min(minSearchedStatusIDs), then the next
* pagination request will have [max_id = 2], and we will miss tweet 6.
*
* So the intuition here is that if we have an early-terminated response, we cannot set
* trimmingMinId to something lower than the minSearchedStatusID returned by that partition
* (otherwise we might miss results from that partition). However, if we've exhausted all
* partitions, then it's OK to not trim any result, because tiers do not intersect, so we will not
* miss any result from the next tier once we get there.
*/
public class StrictRecencyResponseMerger extends RecencyResponseMerger {
private static final SearchTimerStats STRICT_RECENCY_TIMER_AVG =
SearchTimerStats.export("merge_recency_strict", TimeUnit.NANOSECONDS, false, true);
@VisibleForTesting
static final EarlyTerminationTrimmingStats PARTITION_MERGING_EARLY_TERMINATION_TRIMMING_STATS =
new EarlyTerminationTrimmingStats("strict_recency_partition_merging");
@VisibleForTesting
static final EarlyTerminationTrimmingStats TIER_MERGING_EARLY_TERMINATION_TRIMMING_STATS =
new EarlyTerminationTrimmingStats("strict_recency_tier_merging");
private final EarlybirdCluster cluster;
public StrictRecencyResponseMerger(EarlybirdRequestContext requestContext,
List<Future<EarlybirdResponse>> responses,
ResponseAccumulator mode,
EarlybirdFeatureSchemaMerger featureSchemaMerger,
EarlybirdCluster cluster) {
super(requestContext, responses, mode, featureSchemaMerger);
this.cluster = cluster;
}
@Override
protected SearchTimerStats getMergedResponseTimer() {
return STRICT_RECENCY_TIMER_AVG;
}
/**
* Unlike {@link com.twitter.search.earlybird_root.mergers.RecencyResponseMerger}, this method
* takes a much simpler approach by just taking the max of the maxSearchedStatusIds.
*
* Also, when no maxSearchedStatusId is available at all, Long.MIN_VALUE is used instead of
* Long.MAX_VALUE. This ensures that we don't return any result in these cases.
*/
@Override
protected long findMaxFullySearchedStatusID() {
return accumulatedResponses.getMaxIds().isEmpty()
? Long.MIN_VALUE : Collections.max(accumulatedResponses.getMaxIds());
}
/**
* This method is subtly different from the base class version: when no minSearchedStatusId is
* available at all, Long.MAX_VALUE is used instead of Long.MIN_VALUE. This ensures that we
* don't return any result in these cases.
*/
@Override
protected long findMinFullySearchedStatusID() {
List<Long> minIds = accumulatedResponses.getMinIds();
if (minIds.isEmpty()) {
return Long.MAX_VALUE;
}
if (accumulatedResponses.isMergingPartitionsWithinATier()) {
return getTrimmingMinId();
}
// When merging tiers, the min ID should be the smallest among the min IDs.
return Collections.min(minIds);
}
@Override
protected TrimStats trimResults(
ThriftSearchResults searchResults, long mergedMin, long mergedMax) {
if (!searchResults.isSetResults() || searchResults.getResultsSize() == 0) {
// no results, no trimming needed
return TrimStats.EMPTY_STATS;
}
TrimStats trimStats = new TrimStats();
trimExactDups(searchResults, trimStats);
filterResultsByMergedMinMaxIds(searchResults, mergedMax, mergedMin, trimStats);
int numResults = computeNumResultsToKeep();
if (searchResults.getResultsSize() > numResults) {
trimStats.setResultsTruncatedFromTailCount(searchResults.getResultsSize() - numResults);
searchResults.setResults(searchResults.getResults().subList(0, numResults));
}
return trimStats;
}
/**
* This method is different from the base class version because when minResultId is bigger
* than currentMergedMin, we always take minResultId.
* If we don't do this, we would lose results.
*
* Illustration with an example. Assuming we are outside of the lag threshold.
* Num results requested: 3
* Response 1: min: 100 max: 900 results: 400, 500, 600
* Response 2: min: 300 max: 700 results: 350, 450, 550
*
* Merged results: 600, 550, 500
* Merged max: 900
* Merged min: we could take 300 (minId), or take 500 (minResultId).
*
* If we take minId, and use 300 as the pagination cursor, we'd lose results
* 350 and 450 when we paginate. So we have to take minResultId here.
*/
@Override
protected void setMergedMinSearchedStatusId(
ThriftSearchResults searchResults,
long currentMergedMin,
boolean resultsWereTrimmed) {
if (accumulatedResponses.getMinIds().isEmpty()) {
return;
}
long minId = currentMergedMin;
if (resultsWereTrimmed
&& (searchResults != null)
&& searchResults.isSetResults()
&& (searchResults.getResultsSize() > 0)) {
List<ThriftSearchResult> results = searchResults.getResults();
minId = results.get(results.size() - 1).getId();
}
searchResults.setMinSearchedStatusID(minId);
}
@Override
protected boolean clearEarlyTerminationIfReachingTierBottom(EarlybirdResponse mergedResponse) {
if (EarlybirdCluster.isArchive(cluster)) {
// We don't need to worry about the tier bottom when merging partition responses in the full
// archive cluster: if all partitions were exhausted and we didn't trim the results, then
// the early-terminated flag on the merged response will be false. If at least one partition
// is early-terminated, or we trimmed some results, then the ealry-terminated flag on the
// merged response will be true, and we should continue getting results from this tier before
// we move to the next one.
return false;
}
ThriftSearchResults searchResults = mergedResponse.getSearchResults();
if (searchResults.getMinSearchedStatusID() == getTierBottomId()) {
mergedResponse.getEarlyTerminationInfo().setEarlyTerminated(false);
mergedResponse.getEarlyTerminationInfo().unsetMergedEarlyTerminationReasons();
responseMessageBuilder.debugVerbose(
"Set earlytermination to false because minSearchedStatusId is tier bottom");
return true;
}
return false;
}
@Override
protected boolean shouldEarlyTerminateWhenEnoughTrimmedResults() {
return false;
}
@Override
protected final EarlyTerminationTrimmingStats getEarlyTerminationTrimmingStatsForPartitions() {
return PARTITION_MERGING_EARLY_TERMINATION_TRIMMING_STATS;
}
@Override
protected final EarlyTerminationTrimmingStats getEarlyTerminationTrimmingStatsForTiers() {
return TIER_MERGING_EARLY_TERMINATION_TRIMMING_STATS;
}
/** Determines the bottom of the realtime cluster, based on the partition responses. */
private long getTierBottomId() {
Preconditions.checkState(!EarlybirdCluster.isArchive(cluster));
long tierBottomId = -1;
for (EarlybirdResponse response : accumulatedResponses.getSuccessResponses()) {
if (!isEarlyTerminated(response)
&& response.isSetSearchResults()
&& response.getSearchResults().isSetMinSearchedStatusID()
&& (response.getSearchResults().getMinSearchedStatusID() > tierBottomId)) {
tierBottomId = response.getSearchResults().getMinSearchedStatusID();
}
}
return tierBottomId;
}
/** Determines the minId to which all results should be trimmed. */
private long getTrimmingMinId() {
List<Long> minIds = accumulatedResponses.getMinIds();
Preconditions.checkArgument(!minIds.isEmpty());
if (!EarlybirdCluster.isArchive(cluster)) {
return Collections.max(minIds);
}
long maxOfEarlyTerminatedMins = -1;
long minOfAllMins = Long.MAX_VALUE;
for (EarlybirdResponse response : accumulatedResponses.getSuccessResponses()) {
if (response.isSetSearchResults()
&& response.getSearchResults().isSetMinSearchedStatusID()) {
long minId = response.getSearchResults().getMinSearchedStatusID();
minOfAllMins = Math.min(minOfAllMins, minId);
if (isEarlyTerminated(response)) {
maxOfEarlyTerminatedMins = Math.max(maxOfEarlyTerminatedMins, minId);
}
}
}
if (maxOfEarlyTerminatedMins >= 0) {
return maxOfEarlyTerminatedMins;
} else {
return minOfAllMins;
}
}
/** Determines if the given earlybird response is early terminated. */
private boolean isEarlyTerminated(EarlybirdResponse response) {
return response.isSetEarlyTerminationInfo()
&& response.getEarlyTerminationInfo().isEarlyTerminated();
}
}

View File

@ -1,688 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.Collections;
import java.util.List;
import javax.annotation.Nullable;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.Lists;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.common.collections.Pair;
import com.twitter.common.quantity.Amount;
import com.twitter.common.quantity.Time;
import com.twitter.common.util.Clock;
import com.twitter.search.common.futures.Futures;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.partitioning.snowflakeparser.SnowflakeIdParser;
import com.twitter.search.common.query.thriftjava.EarlyTerminationInfo;
import com.twitter.search.common.relevance.utils.ResultComparators;
import com.twitter.search.common.search.EarlyTerminationState;
import com.twitter.search.common.util.FinagleUtil;
import com.twitter.search.common.util.earlybird.EarlybirdResponseMergeUtil;
import com.twitter.search.common.util.earlybird.EarlybirdResponseUtil;
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
import com.twitter.search.earlybird.thrift.ThriftSearchRankingMode;
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.search.earlybird.thrift.ThriftTweetSource;
import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.search.earlybird_root.common.EarlybirdServiceResponse;
import com.twitter.util.Function;
import com.twitter.util.Function0;
import com.twitter.util.Future;
/** Utility functions for merging recency and relevance results. */
public class SuperRootResponseMerger {
private static final Logger LOG = LoggerFactory.getLogger(SuperRootResponseMerger.class);
private static final String ALL_STATS_PREFIX = "superroot_response_merger_";
private static final SearchCounter FULL_ARCHIVE_MIN_ID_GREATER_THAN_REALTIME_MIN_ID =
SearchCounter.export("full_archive_min_id_greater_than_realtime_min_id");
private static final String ERROR_FORMAT = "%s%s_errors_from_cluster_%s_%s";
private final ThriftSearchRankingMode rankingMode;
private final EarlybirdFeatureSchemaMerger featureSchemaMerger;
private final String featureStatPrefix;
private final Clock clock;
private final String rankingModeStatPrefix;
private final SearchCounter mergedResponseSearchResultsNotSet;
private final SearchCounter invalidMinStatusId;
private final SearchCounter invalidMaxStatusId;
private final SearchCounter noMinIds;
private final SearchCounter noMaxIds;
private final SearchCounter mergedResponses;
private final SearchCounter mergedResponsesWithExactDups;
private final LoadingCache<Pair<ThriftTweetSource, ThriftTweetSource>, SearchCounter> dupsStats;
private static final EarlybirdResponse EMPTY_RESPONSE =
new EarlybirdResponse(EarlybirdResponseCode.SUCCESS, 0)
.setSearchResults(new ThriftSearchResults()
.setResults(Lists.<ThriftSearchResult>newArrayList()));
/**
* Creates a new SuperRootResponseMerger instance.
* @param rankingMode The ranking mode to use when merging results.
* @param featureSchemaMerger The merger that can merge feature schema from different tiers.
* @param clock The clock that will be used to merge results.
*/
public SuperRootResponseMerger(ThriftSearchRankingMode rankingMode,
EarlybirdFeatureSchemaMerger featureSchemaMerger,
Clock clock) {
this.rankingModeStatPrefix = rankingMode.name().toLowerCase();
this.rankingMode = rankingMode;
this.featureSchemaMerger = featureSchemaMerger;
this.clock = clock;
this.featureStatPrefix = "superroot_" + rankingMode.name().toLowerCase();
mergedResponseSearchResultsNotSet = SearchCounter.export(
ALL_STATS_PREFIX + rankingModeStatPrefix + "_merged_response_search_results_not_set");
invalidMinStatusId =
SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix + "_invalid_min_status_id");
invalidMaxStatusId =
SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix + "_invalid_max_status_id");
noMinIds = SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix + "_no_min_ids");
noMaxIds = SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix + "_no_max_ids");
mergedResponses = SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix
+ "_merged_responses");
mergedResponsesWithExactDups =
SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix
+ "_merged_responses_with_exact_dups");
dupsStats = CacheBuilder.newBuilder()
.build(new CacheLoader<Pair<ThriftTweetSource, ThriftTweetSource>, SearchCounter>() {
@Override
public SearchCounter load(Pair<ThriftTweetSource, ThriftTweetSource> key) {
return SearchCounter.export(
ALL_STATS_PREFIX + rankingModeStatPrefix + "_merged_responses_with_exact_dups_"
+ key.getFirst().name() + "_" + key.getSecond().name());
}
});
}
private void incrErrorCount(String cluster, @Nullable EarlybirdResponse response) {
String cause;
if (response != null) {
cause = response.getResponseCode().name().toLowerCase();
} else {
cause = "null_response";
}
String statName = String.format(
ERROR_FORMAT, ALL_STATS_PREFIX, rankingModeStatPrefix, cluster, cause
);
SearchCounter.export(statName).increment();
}
/**
* Merges the given response futures.
*
* @param earlybirdRequestContext The earlybird request.
* @param realtimeResponseFuture The response from the realtime cluster.
* @param protectedResponseFuture The response from the protected cluster.
* @param fullArchiveResponseFuture The response from the full archive cluster.
* @return A future with the merged results.
*/
public Future<EarlybirdResponse> mergeResponseFutures(
final EarlybirdRequestContext earlybirdRequestContext,
final Future<EarlybirdServiceResponse> realtimeResponseFuture,
final Future<EarlybirdServiceResponse> protectedResponseFuture,
final Future<EarlybirdServiceResponse> fullArchiveResponseFuture) {
Future<EarlybirdResponse> mergedResponseFuture = Futures.map(
realtimeResponseFuture, protectedResponseFuture, fullArchiveResponseFuture,
new Function0<EarlybirdResponse>() {
@Override
public EarlybirdResponse apply() {
// If the realtime response is not valid, return an error response.
// Also, the realtime service should always be called.
EarlybirdServiceResponse realtimeResponse = Futures.get(realtimeResponseFuture);
if (realtimeResponse.getServiceState().serviceWasRequested()
&& (!realtimeResponse.getServiceState().serviceWasCalled()
|| !EarlybirdResponseMergeUtil.isValidResponse(
realtimeResponse.getResponse()))) {
incrErrorCount("realtime", realtimeResponse.getResponse());
return EarlybirdResponseMergeUtil.transformInvalidResponse(
realtimeResponse.getResponse(), "realtime");
}
// If we have a protected response and it's not valid, return an error response.
EarlybirdServiceResponse protectedResponse = Futures.get(protectedResponseFuture);
if (protectedResponse.getServiceState().serviceWasCalled()) {
if (!EarlybirdResponseMergeUtil.isValidResponse(protectedResponse.getResponse())) {
incrErrorCount("protected", protectedResponse.getResponse());
return EarlybirdResponseMergeUtil.transformInvalidResponse(
protectedResponse.getResponse(), "protected");
}
}
// If we have a full archive response, check if it's valid.
EarlybirdServiceResponse fullArchiveResponse = Futures.get(fullArchiveResponseFuture);
boolean archiveHasError =
fullArchiveResponse.getServiceState().serviceWasCalled()
&& !EarlybirdResponseMergeUtil.isValidResponse(fullArchiveResponse.getResponse());
// Merge the responses.
EarlybirdResponse mergedResponse = mergeResponses(
earlybirdRequestContext,
realtimeResponse.getResponse(),
protectedResponse.getResponse(),
fullArchiveResponse.getResponse());
// If the realtime clusters didn't return any results, and the full archive cluster
// returned an error response, return an error merged response.
if (archiveHasError && !EarlybirdResponseUtil.hasResults(mergedResponse)) {
incrErrorCount("full_archive", fullArchiveResponse.getResponse());
return EarlybirdResponseMergeUtil.failedEarlybirdResponse(
fullArchiveResponse.getResponse().getResponseCode(),
"realtime clusters had no results and archive cluster response had error");
}
// Corner case: the realtime response could have exactly numRequested results, and could
// be exhausted (not early-terminated). In this case, the request should not have been
// sent to the full archive cluster.
// - If the full archive cluster is not available, or was not requested, then we don't
// need to change anything.
// - If the full archive cluster is available and was requested (but wasn't hit
// because we found enough results in the realtime cluster), then we should set the
// early-termination flag on the merged response, to indicate that we potentially
// have more results for this query in our index.
if ((fullArchiveResponse.getServiceState()
== EarlybirdServiceResponse.ServiceState.SERVICE_NOT_CALLED)
&& !EarlybirdResponseUtil.isEarlyTerminated(realtimeResponse.getResponse())) {
EarlyTerminationInfo earlyTerminationInfo = new EarlyTerminationInfo(true);
earlyTerminationInfo.setEarlyTerminationReason(
EarlyTerminationState.TERMINATED_NUM_RESULTS_EXCEEDED.getTerminationReason());
mergedResponse.setEarlyTerminationInfo(earlyTerminationInfo);
}
// If we've exhausted all clusters, set the minSearchedStatusID to 0.
if (!EarlybirdResponseUtil.isEarlyTerminated(mergedResponse)) {
mergedResponse.getSearchResults().setMinSearchedStatusID(0);
}
return mergedResponse;
}
});
// Handle all merging exceptions.
return handleResponseException(mergedResponseFuture,
"Exception thrown while merging responses.");
}
/**
* Merge the results in the given responses.
*
* @param earlybirdRequestContext The earlybird request context.
* @param realtimeResponse The response from the realtime cluster.
* @param protectedResponse The response from the protected cluster.
* @param fullArchiveResponse The response from the full archive cluster.
* @return The merged response.
*/
private EarlybirdResponse mergeResponses(
EarlybirdRequestContext earlybirdRequestContext,
@Nullable EarlybirdResponse realtimeResponse,
@Nullable EarlybirdResponse protectedResponse,
@Nullable EarlybirdResponse fullArchiveResponse) {
EarlybirdRequest request = earlybirdRequestContext.getRequest();
ThriftSearchQuery searchQuery = request.getSearchQuery();
int numResultsRequested;
if (request.isSetNumResultsToReturnAtRoot()) {
numResultsRequested = request.getNumResultsToReturnAtRoot();
} else {
numResultsRequested = searchQuery.getNumResults();
}
Preconditions.checkState(numResultsRequested > 0);
EarlybirdResponse mergedResponse = EMPTY_RESPONSE.deepCopy();
if ((realtimeResponse != null)
&& (realtimeResponse.getResponseCode() != EarlybirdResponseCode.TIER_SKIPPED)) {
mergedResponse = realtimeResponse.deepCopy();
}
if (!mergedResponse.isSetSearchResults()) {
mergedResponseSearchResultsNotSet.increment();
mergedResponse.setSearchResults(
new ThriftSearchResults(Lists.<ThriftSearchResult>newArrayList()));
}
// If either the realtime or the full archive response is early-terminated, we want the merged
// response to be early-terminated too. The early-termination flag from the realtime response
// carries over to the merged response, because mergedResponse is just a deep copy of the
// realtime response. So we only need to check the early-termination flag of the full archive
// response.
if ((fullArchiveResponse != null)
&& EarlybirdResponseUtil.isEarlyTerminated(fullArchiveResponse)) {
mergedResponse.setEarlyTerminationInfo(fullArchiveResponse.getEarlyTerminationInfo());
}
// If realtime has empty results and protected has some results then we copy the early
// termination information if that is present
if (protectedResponse != null
&& mergedResponse.getSearchResults().getResults().isEmpty()
&& !protectedResponse.getSearchResults().getResults().isEmpty()
&& EarlybirdResponseUtil.isEarlyTerminated(protectedResponse)) {
mergedResponse.setEarlyTerminationInfo(protectedResponse.getEarlyTerminationInfo());
}
// Merge the results.
List<ThriftSearchResult> mergedResults = mergeResults(
numResultsRequested, realtimeResponse, protectedResponse, fullArchiveResponse);
// Trim the merged results if necessary.
boolean resultsTrimmed = false;
if (mergedResults.size() > numResultsRequested
&& !(searchQuery.isSetRelevanceOptions()
&& searchQuery.getRelevanceOptions().isReturnAllResults())) {
// If we have more results than requested, trim the result list and re-adjust
// minSearchedStatusID.
mergedResults = mergedResults.subList(0, numResultsRequested);
// Mark early termination in merged response
if (!EarlybirdResponseUtil.isEarlyTerminated(mergedResponse)) {
EarlyTerminationInfo earlyTerminationInfo = new EarlyTerminationInfo(true);
earlyTerminationInfo.setEarlyTerminationReason(
EarlyTerminationState.TERMINATED_NUM_RESULTS_EXCEEDED.getTerminationReason());
mergedResponse.setEarlyTerminationInfo(earlyTerminationInfo);
}
resultsTrimmed = true;
}
mergedResponse.getSearchResults().setResults(mergedResults);
featureSchemaMerger.mergeFeatureSchemaAcrossClusters(
earlybirdRequestContext,
mergedResponse,
featureStatPrefix,
realtimeResponse,
protectedResponse,
fullArchiveResponse);
// Set the minSearchedStatusID and maxSearchedStatusID fields on the merged response.
setMinSearchedStatusId(mergedResponse, realtimeResponse, protectedResponse, fullArchiveResponse,
resultsTrimmed);
setMaxSearchedStatusId(mergedResponse, realtimeResponse, protectedResponse,
fullArchiveResponse);
int numRealtimeSearchedSegments =
(realtimeResponse != null && realtimeResponse.isSetNumSearchedSegments())
? realtimeResponse.getNumSearchedSegments()
: 0;
int numProtectedSearchedSegments =
(protectedResponse != null && protectedResponse.isSetNumSearchedSegments())
? protectedResponse.getNumSearchedSegments()
: 0;
int numArchiveSearchedSegments =
(fullArchiveResponse != null && fullArchiveResponse.isSetNumSearchedSegments())
? fullArchiveResponse.getNumSearchedSegments()
: 0;
mergedResponse.setNumSearchedSegments(
numRealtimeSearchedSegments + numProtectedSearchedSegments + numArchiveSearchedSegments);
if (earlybirdRequestContext.getRequest().getDebugMode() > 0) {
mergedResponse.setDebugString(
mergeClusterDebugStrings(realtimeResponse, protectedResponse, fullArchiveResponse));
}
return mergedResponse;
}
/**
* Merges the given responses.
*
* @param numResults the number of results requested
* @param realtimeResponse the response from the realtime response
* @param protectedResponse the response from the protected response
* @param fullArchiveResponse the response from the full archive response
* @return the list of merged results
*/
private List<ThriftSearchResult> mergeResults(int numResults,
@Nullable EarlybirdResponse realtimeResponse,
@Nullable EarlybirdResponse protectedResponse,
@Nullable EarlybirdResponse fullArchiveResponse) {
mergedResponses.increment();
// We first merge the results from the two realtime clusters, Realtime cluster and
// Realtime Protected Tweets cluster
List<ThriftSearchResult> mergedResults = mergePublicAndProtectedRealtimeResults(
numResults,
realtimeResponse,
protectedResponse,
fullArchiveResponse,
clock);
EarlybirdResponseMergeUtil.addResultsToList(mergedResults, fullArchiveResponse,
ThriftTweetSource.FULL_ARCHIVE_CLUSTER);
List<ThriftSearchResult> distinctMergedResults =
EarlybirdResponseMergeUtil.distinctByStatusId(mergedResults, dupsStats);
if (mergedResults != distinctMergedResults) {
mergedResponsesWithExactDups.increment();
}
if (rankingMode == ThriftSearchRankingMode.RELEVANCE
|| rankingMode == ThriftSearchRankingMode.TOPTWEETS) {
distinctMergedResults.sort(ResultComparators.SCORE_COMPARATOR);
} else {
distinctMergedResults.sort(ResultComparators.ID_COMPARATOR);
}
return distinctMergedResults;
}
/**
* Method for merging tweets from protected and realtime clusters
* - realtime, guaranteed newer than any archive tweets
* - protected, also realtime, but with a potentially larger window (optional)
* - archive, public, guaranteed older than any public realtime tweets (optional, used for
* id limits, *not added to results*)
* It adds the ThriftSearchResults from protected tweets to the realtimeResponse
*
* Algorithm diagram: (with newer tweets at the top)
* ------------------------------------ <--- protected maxSearchedStatusID
* |C:Newest protected realtime tweets|
* | (does not exist if realtime |
* | maxID >= protected maxID) |
*
* | ------------------------ | <--- 60 seconds ago
* |D:Newer protected realtime tweets |
* | (does not exist if realtime |
* | maxID >= 60 seconds ago) |
* ---------- | ------------------------ | <--- public realtime maxSearchedStatusID
* |A:Public| |E:Automatically valid protected |
* |realtime| |realtime tweets |
* ---------- | ------------------------ | <--- public realtime minSearchedStatusID
* | |
* ---------- | E if archive is present | <--- public archive maxSearchedStatusID
* ---------- | E if archive is present | <--- public archive maxSearchedStatusID
* |B:Public| | F is archive is not present |
* |archive | | |
* ---------- | ------------------------ | <--- public archive minSearchedStatusID
* |F:Older protected realtime tweets |
* | (does not exist if protected |
* | minID >= public minID) |
* ------------------------------------ <--- protected minSearchedStatusID
* Step 1: Select tweets from groups A, and E. If this is enough, return them
* Step 2: Select tweets from groups A, E, and F. If this is enough, return them
* Step 3: Select tweets from groups A, D, E, and F and return them
*
* There are two primary tradeoffs, both of which favor public tweets:
* (1) Benefit: While public indexing latency is < 60s, auto-updating never misses public tweets
* Cost: Absence of public tweets may delay protected tweets from being searchable for 60s
* (2) Benefit: No failure or delay from the protected cluster will affect realtime results
* Cost: If the protected cluster indexes more slowly, auto-update may miss its tweets
*
* @param fullArchiveTweets - used solely for generating anchor points, not merged in.
*/
@VisibleForTesting
static List<ThriftSearchResult> mergePublicAndProtectedRealtimeResults(
int numRequested,
EarlybirdResponse realtimeTweets,
EarlybirdResponse realtimeProtectedTweets,
@Nullable EarlybirdResponse fullArchiveTweets,
Clock clock) {
// See which results will actually be used
boolean isRealtimeUsable = EarlybirdResponseUtil.hasResults(realtimeTweets);
boolean isArchiveUsable = EarlybirdResponseUtil.hasResults(fullArchiveTweets);
boolean isProtectedUsable = EarlybirdResponseUtil.hasResults(realtimeProtectedTweets);
long minId = Long.MIN_VALUE;
long maxId = Long.MAX_VALUE;
if (isRealtimeUsable) {
// Determine the actual upper/lower bounds on the tweet id
if (realtimeTweets.getSearchResults().isSetMinSearchedStatusID()) {
minId = realtimeTweets.getSearchResults().getMinSearchedStatusID();
}
if (realtimeTweets.getSearchResults().isSetMaxSearchedStatusID()) {
maxId = realtimeTweets.getSearchResults().getMaxSearchedStatusID();
}
int justRight = realtimeTweets.getSearchResults().getResultsSize();
if (isArchiveUsable) {
justRight += fullArchiveTweets.getSearchResults().getResultsSize();
if (fullArchiveTweets.getSearchResults().isSetMinSearchedStatusID()) {
long fullArchiveMinId = fullArchiveTweets.getSearchResults().getMinSearchedStatusID();
if (fullArchiveMinId <= minId) {
minId = fullArchiveMinId;
} else {
FULL_ARCHIVE_MIN_ID_GREATER_THAN_REALTIME_MIN_ID.increment();
}
}
}
if (isProtectedUsable) {
for (ThriftSearchResult result : realtimeProtectedTweets.getSearchResults().getResults()) {
if (result.getId() >= minId && result.getId() <= maxId) {
justRight++;
}
}
}
if (justRight < numRequested) {
// Since this is only used as an upper bound, old (pre-2010) ids are still handled correctly
maxId = Math.max(
maxId,
SnowflakeIdParser.generateValidStatusId(
clock.nowMillis() - Amount.of(60, Time.SECONDS).as(Time.MILLISECONDS), 0));
}
}
List<ThriftSearchResult> mergedSearchResults = Lists.newArrayListWithCapacity(numRequested * 2);
// Add valid tweets in order of priority: protected, then realtime
// Only add results that are within range (that check only matters for protected)
if (isProtectedUsable) {
EarlybirdResponseMergeUtil.markWithTweetSource(
realtimeProtectedTweets.getSearchResults().getResults(),
ThriftTweetSource.REALTIME_PROTECTED_CLUSTER);
for (ThriftSearchResult result : realtimeProtectedTweets.getSearchResults().getResults()) {
if (result.getId() <= maxId && result.getId() >= minId) {
mergedSearchResults.add(result);
}
}
}
if (isRealtimeUsable) {
EarlybirdResponseMergeUtil.addResultsToList(
mergedSearchResults, realtimeTweets, ThriftTweetSource.REALTIME_CLUSTER);
}
// Set the minSearchedStatusID and maxSearchedStatusID on the protected response to the
// minId and maxId that were used to trim the protected results.
// This is needed in order to correctly set these IDs on the merged response.
ThriftSearchResults protectedResults =
EarlybirdResponseUtil.getResults(realtimeProtectedTweets);
if ((protectedResults != null)
&& protectedResults.isSetMinSearchedStatusID()
&& (protectedResults.getMinSearchedStatusID() < minId)) {
protectedResults.setMinSearchedStatusID(minId);
}
if ((protectedResults != null)
&& protectedResults.isSetMaxSearchedStatusID()
&& (protectedResults.getMaxSearchedStatusID() > maxId)) {
realtimeProtectedTweets.getSearchResults().setMaxSearchedStatusID(maxId);
}
return mergedSearchResults;
}
/**
* Merges the debug strings of the given cluster responses.
*
* @param realtimeResponse The response from the realtime cluster.
* @param protectedResponse The response from the protected cluster.
* @param fullArchiveResponse The response from the full archive cluster.
* @return The merged debug string.
*/
public static String mergeClusterDebugStrings(@Nullable EarlybirdResponse realtimeResponse,
@Nullable EarlybirdResponse protectedResponse,
@Nullable EarlybirdResponse fullArchiveResponse) {
StringBuilder sb = new StringBuilder();
if ((realtimeResponse != null) && realtimeResponse.isSetDebugString()) {
sb.append("Realtime response: ").append(realtimeResponse.getDebugString());
}
if ((protectedResponse != null) && protectedResponse.isSetDebugString()) {
if (sb.length() > 0) {
sb.append("\n");
}
sb.append("Protected response: ").append(protectedResponse.getDebugString());
}
if ((fullArchiveResponse != null) && fullArchiveResponse.isSetDebugString()) {
if (sb.length() > 0) {
sb.append("\n");
}
sb.append("Full archive response: ").append(fullArchiveResponse.getDebugString());
}
if (sb.length() == 0) {
return null;
}
return sb.toString();
}
/**
* Sets the minSearchedStatusID field on the merged response.
*
* @param mergedResponse The merged response.
* @param fullArchiveResponse The full archive response.
* @param resultsTrimmed Whether the merged response results were trimmed.
*/
private void setMinSearchedStatusId(EarlybirdResponse mergedResponse,
EarlybirdResponse realtimeResponse,
EarlybirdResponse protectedResponse,
EarlybirdResponse fullArchiveResponse,
boolean resultsTrimmed) {
Preconditions.checkNotNull(mergedResponse.getSearchResults());
if (resultsTrimmed) {
// We got more results that we asked for and we trimmed them.
// Set minSearchedStatusID to the ID of the oldest result.
ThriftSearchResults searchResults = mergedResponse.getSearchResults();
if (searchResults.getResultsSize() > 0) {
List<ThriftSearchResult> results = searchResults.getResults();
long lastResultId = results.get(results.size() - 1).getId();
searchResults.setMinSearchedStatusID(lastResultId);
}
return;
}
// We did not get more results that we asked for. Get the min of the minSearchedStatusIDs of
// the merged responses.
List<Long> minIDs = Lists.newArrayList();
if (fullArchiveResponse != null
&& fullArchiveResponse.isSetSearchResults()
&& fullArchiveResponse.getSearchResults().isSetMinSearchedStatusID()) {
minIDs.add(fullArchiveResponse.getSearchResults().getMinSearchedStatusID());
if (mergedResponse.getSearchResults().isSetMinSearchedStatusID()
&& mergedResponse.getSearchResults().getMinSearchedStatusID()
< fullArchiveResponse.getSearchResults().getMinSearchedStatusID()) {
invalidMinStatusId.increment();
}
}
if (protectedResponse != null
&& !EarlybirdResponseUtil.hasResults(realtimeResponse)
&& EarlybirdResponseUtil.hasResults(protectedResponse)
&& protectedResponse.getSearchResults().isSetMinSearchedStatusID()) {
minIDs.add(protectedResponse.getSearchResults().getMinSearchedStatusID());
}
if (mergedResponse.getSearchResults().isSetMinSearchedStatusID()) {
minIDs.add(mergedResponse.getSearchResults().getMinSearchedStatusID());
}
if (!minIDs.isEmpty()) {
mergedResponse.getSearchResults().setMinSearchedStatusID(Collections.min(minIDs));
} else {
noMinIds.increment();
}
}
/**
* Sets the maxSearchedStatusID field on the merged response.
*
* @param mergedResponse The merged response.
* @param fullArchiveResponse The full archive response.
*/
private void setMaxSearchedStatusId(EarlybirdResponse mergedResponse,
EarlybirdResponse realtimeResponse,
EarlybirdResponse protectedResponse,
EarlybirdResponse fullArchiveResponse) {
Preconditions.checkNotNull(mergedResponse.getSearchResults());
List<Long> maxIDs = Lists.newArrayList();
if (fullArchiveResponse != null
&& fullArchiveResponse.isSetSearchResults()
&& fullArchiveResponse.getSearchResults().isSetMaxSearchedStatusID()) {
maxIDs.add(fullArchiveResponse.getSearchResults().getMaxSearchedStatusID());
if (mergedResponse.getSearchResults().isSetMaxSearchedStatusID()
&& fullArchiveResponse.getSearchResults().getMaxSearchedStatusID()
> mergedResponse.getSearchResults().getMaxSearchedStatusID()) {
invalidMaxStatusId.increment();
}
}
if (protectedResponse != null
&& !EarlybirdResponseUtil.hasResults(realtimeResponse)
&& EarlybirdResponseUtil.hasResults(protectedResponse)
&& protectedResponse.getSearchResults().isSetMaxSearchedStatusID()) {
maxIDs.add(protectedResponse.getSearchResults().getMaxSearchedStatusID());
}
if (mergedResponse.getSearchResults().isSetMaxSearchedStatusID()) {
maxIDs.add(mergedResponse.getSearchResults().getMaxSearchedStatusID());
}
ThriftSearchResults searchResults = mergedResponse.getSearchResults();
if (searchResults.getResultsSize() > 0) {
List<ThriftSearchResult> results = searchResults.getResults();
maxIDs.add(results.get(0).getId());
}
if (!maxIDs.isEmpty()) {
mergedResponse.getSearchResults().setMaxSearchedStatusID(Collections.max(maxIDs));
} else {
noMaxIds.increment();
}
}
/**
* Handles exceptions thrown while merging responses. Timeout exceptions are converted to
* SERVER_TIMEOUT_ERROR responses. All other exceptions are converted to PERSISTENT_ERROR
* responses.
*/
private Future<EarlybirdResponse> handleResponseException(
Future<EarlybirdResponse> responseFuture, final String debugMsg) {
return responseFuture.handle(
new Function<Throwable, EarlybirdResponse>() {
@Override
public EarlybirdResponse apply(Throwable t) {
EarlybirdResponseCode responseCode = EarlybirdResponseCode.PERSISTENT_ERROR;
if (FinagleUtil.isTimeoutException(t)) {
responseCode = EarlybirdResponseCode.SERVER_TIMEOUT_ERROR;
}
EarlybirdResponse response = new EarlybirdResponse(responseCode, 0);
response.setDebugString(debugMsg + "\n" + t);
return response;
}
});
}
}

View File

@ -1,90 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.TimeUnit;
import com.google.common.collect.Collections2;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.search.common.metrics.SearchTimerStats;
import com.twitter.search.common.util.earlybird.FacetsResultsUtils;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.ThriftTermStatisticsRequest;
import com.twitter.search.earlybird.thrift.ThriftTermStatisticsResults;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
/**
* Merger class to merge termstats EarlybirdResponse objects
*/
public class TermStatisticsResponseMerger extends EarlybirdResponseMerger {
private static final Logger LOG = LoggerFactory.getLogger(TermStatisticsResponseMerger.class);
private static final SearchTimerStats TIMER =
SearchTimerStats.export("merge_term_stats", TimeUnit.NANOSECONDS, false, true);
private static final double SUCCESSFUL_RESPONSE_THRESHOLD = 0.9;
public TermStatisticsResponseMerger(EarlybirdRequestContext requestContext,
List<Future<EarlybirdResponse>> responses,
ResponseAccumulator mode) {
super(requestContext, responses, mode);
}
@Override
protected SearchTimerStats getMergedResponseTimer() {
return TIMER;
}
@Override
protected double getDefaultSuccessResponseThreshold() {
return SUCCESSFUL_RESPONSE_THRESHOLD;
}
@Override
protected EarlybirdResponse internalMerge(EarlybirdResponse termStatsResponse) {
ThriftTermStatisticsRequest termStatisticsRequest =
requestContext.getRequest().getTermStatisticsRequest();
Collection<EarlybirdResponse> termStatsResults =
Collections2.filter(accumulatedResponses.getSuccessResponses(),
earlybirdResponse -> earlybirdResponse.isSetTermStatisticsResults());
ThriftTermStatisticsResults results =
new ThriftTermResultsMerger(
termStatsResults,
termStatisticsRequest.getHistogramSettings())
.merge();
if (results.getTermResults().isEmpty()) {
final String line = "No results returned from any backend for term statistics request: {}";
// If the termstats request was not empty and we got empty results. log it as a warning
// otherwise log is as a debug.
if (termStatisticsRequest.getTermRequestsSize() > 0) {
LOG.warn(line, termStatisticsRequest);
} else {
LOG.debug(line, termStatisticsRequest);
}
}
termStatsResponse.setTermStatisticsResults(results);
termStatsResponse.setSearchResults(ThriftTermResultsMerger.mergeSearchStats(termStatsResults));
FacetsResultsUtils.fixNativePhotoUrl(results.getTermResults().values());
LOG.debug("TermStats call completed successfully: {}", termStatsResponse);
return termStatsResponse;
}
@Override
public boolean shouldEarlyTerminateTierMerge(int totalResultsFromSuccessfulShards,
boolean foundEarlyTermination) {
// To get accurate term stats, must never early terminate
return false;
}
}

View File

@ -1,472 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.twitter.search.common.metrics.SearchCounter;
import com.twitter.search.common.util.earlybird.FacetsResultsUtils;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.ThriftHistogramSettings;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.search.earlybird.thrift.ThriftTermRequest;
import com.twitter.search.earlybird.thrift.ThriftTermResults;
import com.twitter.search.earlybird.thrift.ThriftTermStatisticsResults;
/**
* Takes multiple successful EarlybirdResponses and merges them.
*/
public class ThriftTermResultsMerger {
private static final Logger LOG = LoggerFactory.getLogger(ThriftTermResultsMerger.class);
private static final SearchCounter BIN_ID_GAP_COUNTER =
SearchCounter.export("thrift_term_results_merger_found_gap_in_bin_ids");
private static final SearchCounter MIN_COMPLETE_BIN_ID_ADJUSTED_NULL =
SearchCounter.export("thrift_term_results_merger_min_complete_bin_id_adjusted_null");
private static final SearchCounter MIN_COMPLETE_BIN_ID_NULL_WITHOUT_BINS =
SearchCounter.export("thrift_term_results_merger_min_complete_bin_id_null_without_bins");
private static final SearchCounter MIN_COMPLETE_BIN_ID_OUT_OF_RANGE =
SearchCounter.export("thrift_term_results_merger_min_complete_bin_id_out_of_range");
private static final SearchCounter RESPONSE_WITHOUT_DRIVING_QUERY_HIT =
SearchCounter.export("response_without_driving_query_hit");
private static final ThriftTermRequest GLOBAL_COUNT_REQUEST =
new ThriftTermRequest().setFieldName("").setTerm("");
/**
* Sorted list of the most recent (and contiguous) numBins binIds across all responses.
* Expected to be an empty list if this request did not ask for histograms, or if it
* did ask for histograms for 0 numBins.
*/
@Nonnull
private final List<Integer> mostRecentBinIds;
/**
* The first binId in the {@link #mostRecentBinIds} list. This value is not meant to be used in
* case mostRecentBinIds is an empty list.
*/
private final int firstBinId;
/**
* For each unique ThriftTermRequest, stores an array of the total counts for all the binIds
* that we will return, summed up across all earlybird responses.
*
* The values in each totalCounts array correspond to the binIds in the
* {@link #mostRecentBinIds} list.
*
* Key: thrift term request.
* Value: array of the total counts summed up across all earlybird responses for the key's
* term request, corresponding to the binIds in {@link #mostRecentBinIds}.
*/
private final Map<ThriftTermRequest, int[]> mergedTermRequestTotalCounts = Maps.newHashMap();
/**
* The set of all unique binIds that we are merging.
*/
private final Map<ThriftTermRequest, ThriftTermResults> termResultsMap = Maps.newHashMap();
private final ThriftHistogramSettings histogramSettings;
/**
* Only relevant for merging responses with histogram settings.
* This will be null either if (1) the request is not asking for histograms at all, or if
* (2) numBins was set to 0 (and no bin can be considered complete).
* If not null, the minCompleteBinId will be computed as the max over all merged responses'
* minCompleteBinId's.
*/
@Nullable
private final Integer minCompleteBinId;
/**
* Create merger with collections of results to merge
*/
public ThriftTermResultsMerger(Collection<EarlybirdResponse> termStatsResults,
ThriftHistogramSettings histogramSettings) {
this.histogramSettings = histogramSettings;
Collection<EarlybirdResponse> filteredTermStatsResults =
filterOutEmptyEarlybirdResponses(termStatsResults);
this.mostRecentBinIds = findMostRecentBinIds(histogramSettings, filteredTermStatsResults);
this.firstBinId = mostRecentBinIds.isEmpty()
? Integer.MAX_VALUE // Should not be used if mostRecentBinIds is empty.
: mostRecentBinIds.get(0);
List<Integer> minCompleteBinIds =
Lists.newArrayListWithCapacity(filteredTermStatsResults.size());
for (EarlybirdResponse response : filteredTermStatsResults) {
Preconditions.checkState(response.getResponseCode() == EarlybirdResponseCode.SUCCESS,
"Unsuccessful responses should not be given to ThriftTermResultsMerger.");
Preconditions.checkState(response.getTermStatisticsResults() != null,
"Response given to ThriftTermResultsMerger has no termStatisticsResults.");
ThriftTermStatisticsResults termStatisticsResults = response.getTermStatisticsResults();
List<Integer> binIds = termStatisticsResults.getBinIds();
for (Map.Entry<ThriftTermRequest, ThriftTermResults> entry
: termStatisticsResults.getTermResults().entrySet()) {
ThriftTermRequest termRequest = entry.getKey();
ThriftTermResults termResults = entry.getValue();
adjustTotalCount(termResults, binIds);
addTotalCountData(termRequest, termResults);
if (histogramSettings != null) {
Preconditions.checkState(termStatisticsResults.isSetBinIds());
addHistogramData(termRequest, termResults, termStatisticsResults.getBinIds());
}
}
if (histogramSettings != null) {
addMinCompleteBinId(minCompleteBinIds, response);
}
}
minCompleteBinId = minCompleteBinIds.isEmpty() ? null : Collections.max(minCompleteBinIds);
}
/**
* Take out any earlybird responses that we know did not match anything relevant to the query,
* and may have erroneous binIds.
*/
private Collection<EarlybirdResponse> filterOutEmptyEarlybirdResponses(
Collection<EarlybirdResponse> termStatsResults) {
List<EarlybirdResponse> emptyResponses = Lists.newArrayList();
List<EarlybirdResponse> nonEmptyResponses = Lists.newArrayList();
for (EarlybirdResponse response : termStatsResults) {
// Guard against erroneously merging and returning 0 counts when we actually have data to
// return from other partitions.
// When a query doesn't match anything at all on an earlybird, the binIds that are returned
// do not correspond at all to the actual query, and are just based on the data range on the
// earlybird itself.
// We can identify these responses as (1) being non-early terminated, and (2) having 0
// hits processed.
if (isTermStatResponseEmpty(response)) {
emptyResponses.add(response);
} else {
nonEmptyResponses.add(response);
}
}
// If all responses were "empty", we will just use those to merge into a new set of empty
// responses, using the binIds provided.
return nonEmptyResponses.isEmpty() ? emptyResponses : nonEmptyResponses;
}
private boolean isTermStatResponseEmpty(EarlybirdResponse response) {
return response.isSetSearchResults()
&& (response.getSearchResults().getNumHitsProcessed() == 0
|| drivingQueryHasNoHits(response))
&& response.isSetEarlyTerminationInfo()
&& !response.getEarlyTerminationInfo().isEarlyTerminated();
}
/**
* If the global count bins are all 0, then we know the driving query has no hits.
* This check is added as a short term solution for SEARCH-5476. This short term fix requires
* the client to set the includeGlobalCounts to kick in.
*/
private boolean drivingQueryHasNoHits(EarlybirdResponse response) {
ThriftTermStatisticsResults termStatisticsResults = response.getTermStatisticsResults();
if (termStatisticsResults == null || termStatisticsResults.getTermResults() == null) {
// If there's no term stats response, be conservative and return false.
return false;
} else {
ThriftTermResults globalCounts =
termStatisticsResults.getTermResults().get(GLOBAL_COUNT_REQUEST);
if (globalCounts == null) {
// We cannot tell if driving query has no hits, be conservative and return false.
return false;
} else {
for (Integer i : globalCounts.getHistogramBins()) {
if (i > 0) {
return false;
}
}
RESPONSE_WITHOUT_DRIVING_QUERY_HIT.increment();
return true;
}
}
}
private static List<Integer> findMostRecentBinIds(
ThriftHistogramSettings histogramSettings,
Collection<EarlybirdResponse> filteredTermStatsResults) {
Integer largestFirstBinId = null;
List<Integer> binIdsToUse = null;
if (histogramSettings != null) {
int numBins = histogramSettings.getNumBins();
for (EarlybirdResponse response : filteredTermStatsResults) {
ThriftTermStatisticsResults termStatisticsResults = response.getTermStatisticsResults();
Preconditions.checkState(termStatisticsResults.getBinIds().size() == numBins,
"expected all results to have the same numBins. "
+ "request numBins: %s, response numBins: %s",
numBins, termStatisticsResults.getBinIds().size());
if (termStatisticsResults.getBinIds().size() > 0) {
Integer firstBinId = termStatisticsResults.getBinIds().get(0);
if (largestFirstBinId == null
|| largestFirstBinId.intValue() < firstBinId.intValue()) {
largestFirstBinId = firstBinId;
binIdsToUse = termStatisticsResults.getBinIds();
}
}
}
}
return binIdsToUse == null
? Collections.<Integer>emptyList()
// Just in case, make a copy of the binIds so that we don't reuse the same list from one
// of the responses we're merging.
: Lists.newArrayList(binIdsToUse);
}
private void addMinCompleteBinId(List<Integer> minCompleteBinIds,
EarlybirdResponse response) {
Preconditions.checkNotNull(histogramSettings);
ThriftTermStatisticsResults termStatisticsResults = response.getTermStatisticsResults();
if (termStatisticsResults.isSetMinCompleteBinId()) {
// This is the base case. Early terminated or not, this is the proper minCompleteBinId
// that we're told to use for this response.
minCompleteBinIds.add(termStatisticsResults.getMinCompleteBinId());
} else if (termStatisticsResults.getBinIds().size() > 0) {
// This is the case where no bins were complete. For the purposes of merging, we need to
// mark all the binIds in this response as non-complete by marking the "max(binId)+1" as the
// last complete bin.
// When returning the merged response, we still have a guard for the resulting
// minCompleteBinId being outside of the binIds range, and will set the returned
// minCompleteBinId value to null, if this response's binIds end up being used as the most
// recent ones, and we need to signify that none of the bins are complete.
int binSize = termStatisticsResults.getBinIds().size();
Integer maxBinId = termStatisticsResults.getBinIds().get(binSize - 1);
minCompleteBinIds.add(maxBinId + 1);
LOG.debug("Adjusting null minCompleteBinId for response: {}, histogramSettings {}",
response, histogramSettings);
MIN_COMPLETE_BIN_ID_ADJUSTED_NULL.increment();
} else {
// This should only happen in the case where numBins is set to 0.
Preconditions.checkState(histogramSettings.getNumBins() == 0,
"Expected numBins set to 0. response: %s", response);
Preconditions.checkState(minCompleteBinIds.isEmpty(),
"minCompleteBinIds: %s", minCompleteBinIds);
LOG.debug("Got null minCompleteBinId with no bins for response: {}, histogramSettings {}",
response, histogramSettings);
MIN_COMPLETE_BIN_ID_NULL_WITHOUT_BINS.increment();
}
}
private void addTotalCountData(ThriftTermRequest request, ThriftTermResults results) {
ThriftTermResults termResults = termResultsMap.get(request);
if (termResults == null) {
termResultsMap.put(request, results);
} else {
termResults.setTotalCount(termResults.getTotalCount() + results.getTotalCount());
if (termResults.isSetMetadata()) {
termResults.setMetadata(
FacetsResultsUtils.mergeFacetMetadata(termResults.getMetadata(),
results.getMetadata(), null));
}
}
}
/**
* Set results.totalCount to the sum of hits in only the bins that will be returned in
* the merged response.
*/
private void adjustTotalCount(ThriftTermResults results, List<Integer> binIds) {
int adjustedTotalCount = 0;
List<Integer> histogramBins = results.getHistogramBins();
if ((binIds != null) && (histogramBins != null)) {
Preconditions.checkState(
histogramBins.size() == binIds.size(),
"Expected ThriftTermResults to have the same number of histogramBins as binIds set in "
+ " ThriftTermStatisticsResults. ThriftTermResults.histogramBins: %s, "
+ " ThriftTermStatisticsResults.binIds: %s.",
histogramBins, binIds);
for (int i = 0; i < binIds.size(); ++i) {
if (binIds.get(i) >= firstBinId) {
adjustedTotalCount += histogramBins.get(i);
}
}
}
results.setTotalCount(adjustedTotalCount);
}
private void addHistogramData(ThriftTermRequest request,
ThriftTermResults results,
List<Integer> binIds) {
int[] requestTotalCounts = mergedTermRequestTotalCounts.get(request);
if (requestTotalCounts == null) {
requestTotalCounts = new int[mostRecentBinIds.size()];
mergedTermRequestTotalCounts.put(request, requestTotalCounts);
}
// Only consider these results if they fall into the mostRecentBinIds range.
//
// The list of returned binIds is expected to be both sorted (in ascending order), and
// contiguous, which allows us to use firstBinId to check if it overlaps with the
// mostRecentBinIds range.
if (binIds.size() > 0 && binIds.get(binIds.size() - 1) >= firstBinId) {
int firstBinIndex;
if (binIds.get(0) == firstBinId) {
// This should be the common case when all partitions have the same binIds,
// no need to do a binary search.
firstBinIndex = 0;
} else {
// The firstBinId must be in the binIds range. We can find it using binary search since
// binIds are sorted.
firstBinIndex = Collections.binarySearch(binIds, firstBinId);
Preconditions.checkState(firstBinIndex >= 0,
"Expected to find firstBinId (%s) in the result binIds: %s, "
+ "histogramSettings: %s, termRequest: %s",
firstBinId, binIds, histogramSettings, request);
}
// Skip binIds that are before the smallest binId that we will use in the merged results.
for (int i = firstBinIndex; i < binIds.size(); i++) {
final Integer currentBinValue = results.getHistogramBins().get(i);
requestTotalCounts[i - firstBinIndex] += currentBinValue.intValue();
}
}
}
/**
* Return a new ThriftTermStatisticsResults with the total counts merged, and if enabled,
* histogram bins merged.
*/
public ThriftTermStatisticsResults merge() {
ThriftTermStatisticsResults results = new ThriftTermStatisticsResults(termResultsMap);
if (histogramSettings != null) {
mergeHistogramBins(results);
}
return results;
}
/**
* Takes multiple histogram results and merges them so:
* 1) Counts for the same binId (represents the time) and term are summed
* 2) All results are re-indexed to use the most recent bins found from the union of all bins
*/
private void mergeHistogramBins(ThriftTermStatisticsResults mergedResults) {
mergedResults.setBinIds(mostRecentBinIds);
mergedResults.setHistogramSettings(histogramSettings);
setMinCompleteBinId(mergedResults);
useMostRecentBinsForEachThriftTermResults();
}
private void setMinCompleteBinId(ThriftTermStatisticsResults mergedResults) {
if (mostRecentBinIds.isEmpty()) {
Preconditions.checkState(minCompleteBinId == null);
// This is the case where the requested numBins is set to 0. We don't have any binIds,
// and the minCompleteBinId has to be unset.
LOG.debug("Empty binIds returned for mergedResults: {}", mergedResults);
} else {
Preconditions.checkNotNull(minCompleteBinId);
Integer maxBinId = mostRecentBinIds.get(mostRecentBinIds.size() - 1);
if (minCompleteBinId <= maxBinId) {
mergedResults.setMinCompleteBinId(minCompleteBinId);
} else {
// Leaving the minCompleteBinId unset as it is outside the range of the returned binIds.
LOG.debug("Computed minCompleteBinId: {} is out of maxBinId: {} for mergedResults: {}",
minCompleteBinId, mergedResults);
MIN_COMPLETE_BIN_ID_OUT_OF_RANGE.increment();
}
}
}
/**
* Check that the binIds we are using are contiguous. Increment the provided stat if we find
* a gap, as we don't expect to find any.
* See: SEARCH-4362
*
* @param sortedBinIds most recent numBins sorted binIds.
* @param binIdGapCounter stat to increment if we see a gap in the binId range.
*/
@VisibleForTesting
static void checkForBinIdGaps(List<Integer> sortedBinIds, SearchCounter binIdGapCounter) {
for (int i = sortedBinIds.size() - 1; i > 0; i--) {
final Integer currentBinId = sortedBinIds.get(i);
final Integer previousBinId = sortedBinIds.get(i - 1);
if (previousBinId < currentBinId - 1) {
binIdGapCounter.increment();
break;
}
}
}
/**
* Returns a view containing only the last N items from the list
*/
private static <E> List<E> takeLastN(List<E> lst, int n) {
Preconditions.checkArgument(n <= lst.size(),
"Attempting to take more elements than the list has. List size: %s, n: %s", lst.size(), n);
return lst.subList(lst.size() - n, lst.size());
}
private void useMostRecentBinsForEachThriftTermResults() {
for (Map.Entry<ThriftTermRequest, ThriftTermResults> entry : termResultsMap.entrySet()) {
ThriftTermRequest request = entry.getKey();
ThriftTermResults results = entry.getValue();
List<Integer> histogramBins = Lists.newArrayList();
results.setHistogramBins(histogramBins);
int[] requestTotalCounts = mergedTermRequestTotalCounts.get(request);
Preconditions.checkNotNull(requestTotalCounts);
for (int totalCount : requestTotalCounts) {
histogramBins.add(totalCount);
}
}
}
/**
* Merges search stats from several earlybird responses and puts them in
* {@link ThriftSearchResults} structure.
*
* @param responses earlybird responses to merge the search stats from
* @return merged search stats inside of {@link ThriftSearchResults} structure
*/
public static ThriftSearchResults mergeSearchStats(Collection<EarlybirdResponse> responses) {
int numHitsProcessed = 0;
int numPartitionsEarlyTerminated = 0;
for (EarlybirdResponse response : responses) {
ThriftSearchResults searchResults = response.getSearchResults();
if (searchResults != null) {
numHitsProcessed += searchResults.getNumHitsProcessed();
numPartitionsEarlyTerminated += searchResults.getNumPartitionsEarlyTerminated();
}
}
ThriftSearchResults searchResults = new ThriftSearchResults(new ArrayList<>());
searchResults.setNumHitsProcessed(numHitsProcessed);
searchResults.setNumPartitionsEarlyTerminated(numPartitionsEarlyTerminated);
return searchResults;
}
}

View File

@ -1,97 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.ArrayList;
import java.util.List;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
import com.twitter.search.earlybird.thrift.TierResponse;
public final class TierResponseAccumulator extends ResponseAccumulator {
private static final String TARGET_TYPE_TIER = "tier";
private final List<TierResponse> tierResponses = new ArrayList<>();
// Total number of partitions the request was sent to, across all tiers.
private int totalPartitionsQueriedInAllTiers = 0;
// Among the above partitions, the number of them that returned successful responses.
private int totalSuccessfulPartitionsInAllTiers = 0;
@Override
public String getNameForLogging(int responseIndex, int numTotalResponses) {
return TARGET_TYPE_TIER + (numTotalResponses - responseIndex);
}
@Override
public String getNameForEarlybirdResponseCodeStats(int responseIndex, int numTotalResponses) {
return TARGET_TYPE_TIER + (numTotalResponses - responseIndex);
}
@Override
protected boolean isMergingAcrossTiers() {
return true;
}
@Override
public boolean shouldEarlyTerminateMerge(EarlyTerminateTierMergePredicate merger) {
if (foundError()) {
return true;
}
int numResults = 0;
for (EarlybirdResponse resp : getSuccessResponses()) {
if (resp.isSetSearchResults()) {
numResults += resp.getSearchResults().getResultsSize();
}
}
return merger.shouldEarlyTerminateTierMerge(numResults, foundEarlyTermination());
}
@Override
public void handleSkippedResponse(EarlybirdResponseCode responseCode) {
tierResponses.add(new TierResponse()
.setNumPartitions(0)
.setNumSuccessfulPartitions(0)
.setTierResponseCode(responseCode));
}
@Override
public void handleErrorResponse(EarlybirdResponse response) {
// TierResponse, which is only returned if merging results from different tiers.
TierResponse tr = new TierResponse();
if (response != null) {
if (response.isSetResponseCode()) {
tr.setTierResponseCode(response.getResponseCode());
} else {
tr.setTierResponseCode(EarlybirdResponseCode.TRANSIENT_ERROR);
}
tr.setNumPartitions(response.getNumPartitions());
tr.setNumSuccessfulPartitions(0);
totalPartitionsQueriedInAllTiers += response.getNumPartitions();
} else {
tr.setTierResponseCode(EarlybirdResponseCode.TRANSIENT_ERROR)
.setNumPartitions(0)
.setNumSuccessfulPartitions(0);
}
tierResponses.add(tr);
}
@Override
public AccumulatedResponses.PartitionCounts getPartitionCounts() {
return new AccumulatedResponses.PartitionCounts(totalPartitionsQueriedInAllTiers,
totalSuccessfulPartitionsInAllTiers, tierResponses);
}
@Override
public void extraSuccessfulResponseHandler(EarlybirdResponse response) {
// Record tier stats.
totalPartitionsQueriedInAllTiers += response.getNumPartitions();
totalSuccessfulPartitionsInAllTiers += response.getNumSuccessfulPartitions();
tierResponses.add(new TierResponse()
.setNumPartitions(response.getNumPartitions())
.setNumSuccessfulPartitions(response.getNumSuccessfulPartitions())
.setTierResponseCode(EarlybirdResponseCode.SUCCESS));
}
}

View File

@ -1,65 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
import java.util.List;
import java.util.concurrent.TimeUnit;
import com.google.common.base.Preconditions;
import com.twitter.search.common.metrics.SearchTimerStats;
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
import com.twitter.search.earlybird.thrift.ThriftSearchRankingMode;
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
import com.twitter.search.earlybird_root.collectors.RelevanceMergeCollector;
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
import com.twitter.util.Future;
/**
* Merger class to merge toptweets EarlybirdResponse objects
*/
public class TopTweetsResponseMerger extends EarlybirdResponseMerger {
private static final double SUCCESSFUL_RESPONSE_THRESHOLD = 0.9;
private static final SearchTimerStats TIMER =
SearchTimerStats.export("merge_top_tweets", TimeUnit.NANOSECONDS, false, true);
public TopTweetsResponseMerger(EarlybirdRequestContext requestContext,
List<Future<EarlybirdResponse>> responses,
ResponseAccumulator mode) {
super(requestContext, responses, mode);
}
@Override
protected SearchTimerStats getMergedResponseTimer() {
return TIMER;
}
@Override
protected double getDefaultSuccessResponseThreshold() {
return SUCCESSFUL_RESPONSE_THRESHOLD;
}
@Override
protected EarlybirdResponse internalMerge(EarlybirdResponse mergedResponse) {
final ThriftSearchQuery searchQuery = requestContext.getRequest().getSearchQuery();
Preconditions.checkNotNull(searchQuery);
Preconditions.checkState(searchQuery.isSetRankingMode());
Preconditions.checkState(searchQuery.getRankingMode() == ThriftSearchRankingMode.TOPTWEETS);
int numResultsRequested = computeNumResultsToKeep();
RelevanceMergeCollector collector = new RelevanceMergeCollector(responses.size());
addResponsesToCollector(collector);
ThriftSearchResults searchResults = collector.getAllSearchResults();
if (numResultsRequested < searchResults.getResults().size()) {
searchResults.setResults(searchResults.getResults().subList(0, numResultsRequested));
}
mergedResponse.setSearchResults(searchResults);
return mergedResponse;
}
}

View File

@ -1,71 +0,0 @@
package com.twitter.search.earlybird_root.mergers;
/**
* Tracks what situations are encountered when trimming results
*/
class TrimStats {
protected static final TrimStats EMPTY_STATS = new TrimStats();
private int maxIdFilterCount = 0;
private int minIdFilterCount = 0;
private int removedDupsCount = 0;
private int resultsTruncatedFromTailCount = 0;
int getMinIdFilterCount() {
return minIdFilterCount;
}
int getRemovedDupsCount() {
return removedDupsCount;
}
int getResultsTruncatedFromTailCount() {
return resultsTruncatedFromTailCount;
}
void decreaseMaxIdFilterCount() {
maxIdFilterCount--;
}
void decreaseMinIdFilterCount() {
minIdFilterCount--;
}
public void clearMaxIdFilterCount() {
this.maxIdFilterCount = 0;
}
public void clearMinIdFilterCount() {
this.minIdFilterCount = 0;
}
void increaseMaxIdFilterCount() {
maxIdFilterCount++;
}
void increaseMinIdFilterCount() {
minIdFilterCount++;
}
void increaseRemovedDupsCount() {
removedDupsCount++;
}
void setResultsTruncatedFromTailCount(int resultsTruncatedFromTailCount) {
this.resultsTruncatedFromTailCount = resultsTruncatedFromTailCount;
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append("TrimStats{");
builder.append("maxIdFilterCount=").append(maxIdFilterCount);
builder.append(", minIdFilterCount=").append(minIdFilterCount);
builder.append(", removedDupsCount=").append(removedDupsCount);
builder.append(", resultsTruncatedFromTailCount=").append(resultsTruncatedFromTailCount);
builder.append("}");
return builder.toString();
}
}

View File

@ -1,15 +0,0 @@
java_library(
sources = ["*.java"],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/google/guava",
"3rdparty/jvm/commons-io",
"3rdparty/jvm/org/json",
"src/java/com/twitter/common/util:system-mocks",
"src/java/com/twitter/search/common/dark",
"src/java/com/twitter/search/common/metrics",
"src/java/com/twitter/search/common/util/io/periodic",
"src/java/com/twitter/search/common/util/json",
],
)

Some files were not shown because too many files have changed in this diff Show More