mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-12-25 11:41:51 +01:00
[docx] split commit for file 4600
Signed-off-by: Ari Archer <ari.web.xyz@gmail.com>
This commit is contained in:
parent
f37e76300b
commit
470dc00686
Binary file not shown.
@ -1,205 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.search.common.decider.SearchDecider;
|
||||
import com.twitter.search.common.metrics.SearchCounter;
|
||||
import com.twitter.search.common.util.earlybird.EarlybirdResponseUtil;
|
||||
import com.twitter.search.earlybird.config.ServingRange;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
|
||||
import com.twitter.search.queryparser.query.Query;
|
||||
import com.twitter.search.queryparser.query.QueryParserException;
|
||||
import com.twitter.search.queryparser.util.IdTimeRanges;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
/**
|
||||
* A Finagle filter used to filter requests to tiers.
|
||||
* Parses serialized query on Earlybird request, and extracts since / until / since_id / max_id
|
||||
* operators. This filter then tests whether the request overlaps with the given tier. If there
|
||||
* is no overlap, an empty response is returned without actually forwarding the requests to the
|
||||
* underlying service.
|
||||
*/
|
||||
public class EarlybirdTimeRangeFilter extends
|
||||
SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(EarlybirdTimeRangeFilter.class);
|
||||
|
||||
private static final EarlybirdResponse ERROR_RESPONSE =
|
||||
new EarlybirdResponse(EarlybirdResponseCode.PERSISTENT_ERROR, 0)
|
||||
.setSearchResults(new ThriftSearchResults());
|
||||
|
||||
private final ServingRangeProvider servingRangeProvider;
|
||||
private final Optional<EarlybirdTimeFilterQueryRewriter> queryRewriter;
|
||||
|
||||
private static final Map<EarlybirdRequestType, SearchCounter> FAILED_REQUESTS;
|
||||
static {
|
||||
final Map<EarlybirdRequestType, SearchCounter> tempMap =
|
||||
Maps.newEnumMap(EarlybirdRequestType.class);
|
||||
for (EarlybirdRequestType requestType : EarlybirdRequestType.values()) {
|
||||
tempMap.put(requestType, SearchCounter.export(
|
||||
"time_range_filter_" + requestType.getNormalizedName() + "_failed_requests"));
|
||||
}
|
||||
FAILED_REQUESTS = Collections.unmodifiableMap(tempMap);
|
||||
}
|
||||
|
||||
public static EarlybirdTimeRangeFilter newTimeRangeFilterWithQueryRewriter(
|
||||
ServingRangeProvider servingRangeProvider,
|
||||
SearchDecider decider) {
|
||||
|
||||
return new EarlybirdTimeRangeFilter(servingRangeProvider,
|
||||
Optional.of(new EarlybirdTimeFilterQueryRewriter(servingRangeProvider, decider)));
|
||||
}
|
||||
|
||||
public static EarlybirdTimeRangeFilter newTimeRangeFilterWithoutQueryRewriter(
|
||||
ServingRangeProvider servingRangeProvider) {
|
||||
|
||||
return new EarlybirdTimeRangeFilter(servingRangeProvider, Optional.empty());
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a filter that avoids forwarding requests to unrelated tiers
|
||||
* based on requests' since / until / since_id / max_id.
|
||||
* @param provider Holds the boundary information.
|
||||
*/
|
||||
EarlybirdTimeRangeFilter(
|
||||
ServingRangeProvider provider,
|
||||
Optional<EarlybirdTimeFilterQueryRewriter> rewriter) {
|
||||
|
||||
this.servingRangeProvider = provider;
|
||||
this.queryRewriter = rewriter;
|
||||
}
|
||||
|
||||
public ServingRangeProvider getServingRangeProvider() {
|
||||
return servingRangeProvider;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(
|
||||
EarlybirdRequestContext requestContext,
|
||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
||||
|
||||
Query parsedQuery = requestContext.getParsedQuery();
|
||||
if (parsedQuery != null) {
|
||||
// Only perform filtering if serialized query is set.
|
||||
try {
|
||||
IdTimeRanges queryRanges = IdTimeRanges.fromQuery(parsedQuery);
|
||||
if (queryRanges == null) {
|
||||
// No time ranges in query.
|
||||
return issueServiceRequest(service, requestContext);
|
||||
}
|
||||
|
||||
ServingRange servingRange =
|
||||
servingRangeProvider.getServingRange(
|
||||
requestContext, requestContext.useOverrideTierConfig());
|
||||
|
||||
if (queryDoesNotOverlapWithServingRange(queryRanges, servingRange)) {
|
||||
return Future.value(tierSkippedResponse(requestContext.getEarlybirdRequestType(),
|
||||
servingRange));
|
||||
} else {
|
||||
return issueServiceRequest(service, requestContext);
|
||||
}
|
||||
} catch (QueryParserException e) {
|
||||
LOG.warn("Unable to get IdTimeRanges from query: " + parsedQuery.serialize());
|
||||
// The failure here is not due to a miss-formed query from the client, since we already
|
||||
// were able to successfully get a parsed Query from the request.
|
||||
// If we can't determine the time ranges, pass the query along to the tier, and just
|
||||
// restrict it to the timeranges of the tier.
|
||||
return issueServiceRequest(service, requestContext);
|
||||
}
|
||||
} else {
|
||||
// There's no serialized query. Just pass through like an identity filter.
|
||||
return issueServiceRequest(service, requestContext);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean queryDoesNotOverlapWithServingRange(IdTimeRanges queryRanges,
|
||||
ServingRange servingRange) {
|
||||
// As long as a query overlaps with the tier serving range on either side,
|
||||
// the request is not filtered. I.e. we want to be conservative when doing this filtering,
|
||||
// because it is just an optimization. We ignore the inclusiveness / exclusiveness of the
|
||||
// boundaries. If the tier boundary and the query boundry happen to be the same, we do not
|
||||
// filter the request.
|
||||
return queryRanges.getSinceIDExclusive().or(0L)
|
||||
> servingRange.getServingRangeMaxId()
|
||||
|| queryRanges.getMaxIDInclusive().or(Long.MAX_VALUE)
|
||||
< servingRange.getServingRangeSinceId()
|
||||
|| queryRanges.getSinceTimeInclusive().or(0)
|
||||
> servingRange.getServingRangeUntilTimeSecondsFromEpoch()
|
||||
|| queryRanges.getUntilTimeExclusive().or(Integer.MAX_VALUE)
|
||||
< servingRange.getServingRangeSinceTimeSecondsFromEpoch();
|
||||
}
|
||||
|
||||
private Future<EarlybirdResponse> issueServiceRequest(
|
||||
Service<EarlybirdRequestContext, EarlybirdResponse> service,
|
||||
EarlybirdRequestContext requestContext) {
|
||||
|
||||
try {
|
||||
EarlybirdRequestContext request = requestContext;
|
||||
if (queryRewriter.isPresent()) {
|
||||
request = queryRewriter.get().rewriteRequest(requestContext);
|
||||
}
|
||||
return service.apply(request);
|
||||
} catch (QueryParserException e) {
|
||||
FAILED_REQUESTS.get(requestContext.getEarlybirdRequestType()).increment();
|
||||
String msg = "Failed to add time filter operators";
|
||||
LOG.error(msg, e);
|
||||
|
||||
// Note that in this case it is not clear whether the error is the client's fault or our
|
||||
// fault, so we don't necessarily return a CLIENT_ERROR here.
|
||||
// Currently this actually returns a PERSISTENT_ERROR.
|
||||
if (requestContext.getRequest().getDebugMode() > 0) {
|
||||
return Future.value(
|
||||
ERROR_RESPONSE.deepCopy().setDebugString(msg + ": " + e.getMessage()));
|
||||
} else {
|
||||
return Future.value(ERROR_RESPONSE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a tier skipped response, based on the given request type.
|
||||
*
|
||||
* For recency, relevance, facets and top tweets requests, this method returns a SUCCESS response
|
||||
* with no search results and the minSearchedStatusID and maxSearchedStatusID appropriately set.
|
||||
* For term stats response, it returns a TIER_SKIPPED response, but we need to revisit this.
|
||||
*
|
||||
* @param requestType The type of the request.
|
||||
* @param servingRange The serving range of the tier that we're skipping.
|
||||
*/
|
||||
@VisibleForTesting
|
||||
public static EarlybirdResponse tierSkippedResponse(
|
||||
EarlybirdRequestType requestType,
|
||||
ServingRange servingRange) {
|
||||
String debugMessage =
|
||||
"Tier skipped because it does not intersect with query time boundaries.";
|
||||
if (requestType == EarlybirdRequestType.TERM_STATS) {
|
||||
// If it's a term stats request, return a TIER_SKIPPED response for now.
|
||||
// But we need to figure out the right thing to do here.
|
||||
return new EarlybirdResponse(EarlybirdResponseCode.TIER_SKIPPED, 0)
|
||||
.setDebugString(debugMessage);
|
||||
} else {
|
||||
// minIds in ServingRange instances are set to tierLowerBoundary - 1, because the
|
||||
// since_id operator is exclusive. The max_id operator on the other hand is inclusive,
|
||||
// so maxIds in ServingRange instances are also set to tierUpperBoundary - 1.
|
||||
// Here we want both of them to be inclusive, so we need to increment the minId by 1.
|
||||
return EarlybirdResponseUtil.tierSkippedRootResponse(
|
||||
servingRange.getServingRangeSinceId() + 1,
|
||||
servingRange.getServingRangeMaxId(),
|
||||
debugMessage);
|
||||
}
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,167 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import javax.inject.Inject;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.search.common.decider.SearchDecider;
|
||||
import com.twitter.search.common.metrics.SearchCounter;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdDebugInfo;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.search.queryparser.query.Query;
|
||||
import com.twitter.search.queryparser.query.QueryNodeUtils;
|
||||
import com.twitter.search.queryparser.query.QueryParserException;
|
||||
import com.twitter.search.queryparser.query.search.SearchOperator;
|
||||
import com.twitter.search.queryparser.query.search.SearchOperatorConstants;
|
||||
import com.twitter.search.queryparser.visitors.DropAllProtectedOperatorVisitor;
|
||||
import com.twitter.search.queryparser.visitors.QueryTreeIndex;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
/**
|
||||
* Full archive service filter validates requests with a protected operator, appends the
|
||||
* '[exclude protected]' operator by default, and appends '[filter protected]' operator instead if
|
||||
* 'getProtectedTweetsOnly' request param is set. A client error response is returned if any of the
|
||||
* following rules is violated.
|
||||
* 1. There is at most one 'protected' operator in the query.
|
||||
* 2. If there is a 'protected' operator, it must be in the query root node.
|
||||
* 3. The parent node of the 'protected' operator must not be negated and must be a conjunction.
|
||||
* 4. If there is a positive 'protected' operator, 'followedUserIds' and 'searcherId' request
|
||||
* params must be set.
|
||||
*/
|
||||
public class FullArchiveProtectedOperatorFilter extends
|
||||
SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
||||
private static final Logger LOG =
|
||||
LoggerFactory.getLogger(FullArchiveProtectedOperatorFilter.class);
|
||||
private static final SearchOperator EXCLUDE_PROTECTED_OPERATOR =
|
||||
new SearchOperator(SearchOperator.Type.EXCLUDE, SearchOperatorConstants.PROTECTED);
|
||||
private static final SearchOperator FILTER_PROTECTED_OPERATOR =
|
||||
new SearchOperator(SearchOperator.Type.FILTER, SearchOperatorConstants.PROTECTED);
|
||||
private static final SearchCounter QUERY_PARSER_FAILURE_COUNT =
|
||||
SearchCounter.export("protected_operator_filter_query_parser_failure_count");
|
||||
|
||||
private final DropAllProtectedOperatorVisitor dropProtectedOperatorVisitor;
|
||||
private final SearchDecider decider;
|
||||
|
||||
@Inject
|
||||
public FullArchiveProtectedOperatorFilter(
|
||||
DropAllProtectedOperatorVisitor dropProtectedOperatorVisitor,
|
||||
SearchDecider decider) {
|
||||
this.dropProtectedOperatorVisitor = dropProtectedOperatorVisitor;
|
||||
this.decider = decider;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(
|
||||
EarlybirdRequestContext requestContext,
|
||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
||||
Query query = requestContext.getParsedQuery();
|
||||
if (query == null) {
|
||||
return service.apply(requestContext);
|
||||
}
|
||||
|
||||
QueryTreeIndex queryTreeIndex = QueryTreeIndex.buildFor(query);
|
||||
List<Query> nodeList = queryTreeIndex.getNodeList();
|
||||
// try to find a protected operator, returns error response if more than one protected
|
||||
// operator is detected
|
||||
SearchOperator protectedOperator = null;
|
||||
for (Query node : nodeList) {
|
||||
if (node instanceof SearchOperator) {
|
||||
SearchOperator searchOp = (SearchOperator) node;
|
||||
if (SearchOperatorConstants.PROTECTED.equals(searchOp.getOperand())) {
|
||||
if (protectedOperator == null) {
|
||||
protectedOperator = searchOp;
|
||||
} else {
|
||||
return createErrorResponse("Only one 'protected' operator is expected.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Query processedQuery;
|
||||
if (protectedOperator == null) {
|
||||
// no protected operator is detected, append '[exclude protected]' by default
|
||||
processedQuery = QueryNodeUtils.appendAsConjunction(query, EXCLUDE_PROTECTED_OPERATOR);
|
||||
} else {
|
||||
// protected operator must be in the query root node
|
||||
if (queryTreeIndex.getParentOf(protectedOperator) != query) {
|
||||
return createErrorResponse("'protected' operator must be in the query root node");
|
||||
}
|
||||
// the query node that contains protected operator must not be negated
|
||||
if (query.mustNotOccur()) {
|
||||
return createErrorResponse("The query node that contains a 'protected' operator must not"
|
||||
+ " be negated.");
|
||||
}
|
||||
// the query node that contains protected operator must be a conjunction
|
||||
if (!query.isTypeOf(Query.QueryType.CONJUNCTION)) {
|
||||
return createErrorResponse("The query node that contains a 'protected' operator must"
|
||||
+ " be a conjunction.");
|
||||
}
|
||||
// check the existence of 'followedUserIds' and 'searcherId' if it is a positive operator
|
||||
if (isPositive(protectedOperator)) {
|
||||
if (!validateRequestParam(requestContext.getRequest())) {
|
||||
return createErrorResponse("'followedUserIds' and 'searcherId' are required "
|
||||
+ "by positive 'protected' operator.");
|
||||
}
|
||||
}
|
||||
processedQuery = query;
|
||||
}
|
||||
// update processedQuery if 'getProtectedTweetsOnly' is set to true, it takes precedence over
|
||||
// the existing protected operators
|
||||
if (requestContext.getRequest().isGetProtectedTweetsOnly()) {
|
||||
if (!validateRequestParam(requestContext.getRequest())) {
|
||||
return createErrorResponse("'followedUserIds' and 'searcherId' are required "
|
||||
+ "when 'getProtectedTweetsOnly' is set to true.");
|
||||
}
|
||||
try {
|
||||
processedQuery = processedQuery.accept(dropProtectedOperatorVisitor);
|
||||
} catch (QueryParserException e) {
|
||||
// this should not happen since we already have a parsed query
|
||||
QUERY_PARSER_FAILURE_COUNT.increment();
|
||||
LOG.warn(
|
||||
"Failed to drop protected operator for serialized query: " + query.serialize(), e);
|
||||
}
|
||||
processedQuery =
|
||||
QueryNodeUtils.appendAsConjunction(processedQuery, FILTER_PROTECTED_OPERATOR);
|
||||
}
|
||||
|
||||
if (processedQuery == query) {
|
||||
return service.apply(requestContext);
|
||||
} else {
|
||||
EarlybirdRequestContext clonedRequestContext =
|
||||
EarlybirdRequestContext.copyRequestContext(requestContext, processedQuery);
|
||||
return service.apply(clonedRequestContext);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean validateRequestParam(EarlybirdRequest request) {
|
||||
List<Long> followedUserIds = request.followedUserIds;
|
||||
Long searcherId = (request.searchQuery != null && request.searchQuery.isSetSearcherId())
|
||||
? request.searchQuery.getSearcherId() : null;
|
||||
return followedUserIds != null && !followedUserIds.isEmpty() && searcherId != null;
|
||||
}
|
||||
|
||||
private boolean isPositive(SearchOperator searchOp) {
|
||||
boolean isNegateExclude = searchOp.mustNotOccur()
|
||||
&& searchOp.getOperatorType() == SearchOperator.Type.EXCLUDE;
|
||||
boolean isPositive = !searchOp.mustNotOccur()
|
||||
&& (searchOp.getOperatorType() == SearchOperator.Type.INCLUDE
|
||||
|| searchOp.getOperatorType() == SearchOperator.Type.FILTER);
|
||||
return isNegateExclude || isPositive;
|
||||
}
|
||||
|
||||
private Future<EarlybirdResponse> createErrorResponse(String errorMsg) {
|
||||
EarlybirdResponse response = new EarlybirdResponse(EarlybirdResponseCode.CLIENT_ERROR, 0);
|
||||
response.setDebugInfo(new EarlybirdDebugInfo().setHost("full_archive_root"));
|
||||
response.setDebugString(errorMsg);
|
||||
return Future.value(response);
|
||||
}
|
||||
|
||||
}
|
Binary file not shown.
@ -1,64 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import com.twitter.search.common.decider.SearchDecider;
|
||||
import com.twitter.search.common.partitioning.snowflakeparser.SnowflakeIdParser;
|
||||
import com.twitter.search.common.util.date.DateUtil;
|
||||
import com.twitter.search.earlybird.config.ServingRange;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
|
||||
public class FullArchiveServingRangeProvider implements ServingRangeProvider {
|
||||
|
||||
public static final Date FULL_ARCHIVE_START_DATE = DateUtil.toDate(2006, 3, 21);
|
||||
private static final int DEFAULT_SERVING_RANGE_BOUNDARY_HOURS_AGO = 48;
|
||||
|
||||
private final SearchDecider decider;
|
||||
private final String deciderKey;
|
||||
|
||||
public FullArchiveServingRangeProvider(
|
||||
SearchDecider decider, String deciderKey) {
|
||||
this.decider = decider;
|
||||
this.deciderKey = deciderKey;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ServingRange getServingRange(
|
||||
final EarlybirdRequestContext requestContext, boolean useBoundaryOverride) {
|
||||
return new ServingRange() {
|
||||
@Override
|
||||
public long getServingRangeSinceId() {
|
||||
// we use 1 instead of 0, because the since_id operator is inclusive in earlybirds.
|
||||
return 1L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getServingRangeMaxId() {
|
||||
long servingRangeEndMillis = TimeUnit.HOURS.toMillis(
|
||||
(decider.featureExists(deciderKey))
|
||||
? decider.getAvailability(deciderKey)
|
||||
: DEFAULT_SERVING_RANGE_BOUNDARY_HOURS_AGO);
|
||||
|
||||
long boundaryTime = requestContext.getCreatedTimeMillis() - servingRangeEndMillis;
|
||||
return SnowflakeIdParser.generateValidStatusId(boundaryTime, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getServingRangeSinceTimeSecondsFromEpoch() {
|
||||
return FULL_ARCHIVE_START_DATE.getTime() / 1000;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getServingRangeUntilTimeSecondsFromEpoch() {
|
||||
long servingRangeEndMillis = TimeUnit.HOURS.toMillis(
|
||||
(decider.featureExists(deciderKey))
|
||||
? decider.getAvailability(deciderKey)
|
||||
: DEFAULT_SERVING_RANGE_BOUNDARY_HOURS_AGO);
|
||||
|
||||
long boundaryTime = requestContext.getCreatedTimeMillis() - servingRangeEndMillis;
|
||||
return boundaryTime / 1000;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,66 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import javax.inject.Inject;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
|
||||
import com.twitter.common.util.Clock;
|
||||
import com.twitter.finagle.Filter;
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.search.common.decider.SearchDecider;
|
||||
import com.twitter.search.common.metrics.SearchCounter;
|
||||
import com.twitter.search.earlybird.common.EarlybirdRequestUtil;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.search.earlybird_root.common.QueryParsingUtils;
|
||||
import com.twitter.search.earlybird_root.common.TwitterContextProvider;
|
||||
import com.twitter.search.queryparser.query.QueryParserException;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
/**
|
||||
* Creates a new RequestContext from an EarlybirdRequest, and passes the RequestContext down to
|
||||
* the rest of the filter/service chain.
|
||||
*/
|
||||
public class InitializeRequestContextFilter extends
|
||||
Filter<EarlybirdRequest, EarlybirdResponse, EarlybirdRequestContext, EarlybirdResponse> {
|
||||
|
||||
@VisibleForTesting
|
||||
static final SearchCounter FAILED_QUERY_PARSING =
|
||||
SearchCounter.export("initialize_request_context_filter_query_parsing_failure");
|
||||
|
||||
private final SearchDecider decider;
|
||||
private final TwitterContextProvider twitterContextProvider;
|
||||
private final Clock clock;
|
||||
|
||||
/**
|
||||
* The constructor of the filter.
|
||||
*/
|
||||
@Inject
|
||||
public InitializeRequestContextFilter(SearchDecider decider,
|
||||
TwitterContextProvider twitterContextProvider,
|
||||
Clock clock) {
|
||||
this.decider = decider;
|
||||
this.twitterContextProvider = twitterContextProvider;
|
||||
this.clock = clock;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(
|
||||
EarlybirdRequest request,
|
||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
||||
|
||||
EarlybirdRequestUtil.recordClientClockDiff(request);
|
||||
|
||||
EarlybirdRequestContext requestContext;
|
||||
try {
|
||||
requestContext = EarlybirdRequestContext.newContext(
|
||||
request, decider, twitterContextProvider.get(), clock);
|
||||
} catch (QueryParserException e) {
|
||||
FAILED_QUERY_PARSING.increment();
|
||||
return QueryParsingUtils.newClientErrorResponse(request, e);
|
||||
}
|
||||
|
||||
return service.apply(requestContext);
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,80 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import java.util.EnumMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.search.common.metrics.SearchCounter;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResultExtraMetadata;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
|
||||
import com.twitter.util.Future;
|
||||
import com.twitter.util.FutureEventListener;
|
||||
|
||||
/**
|
||||
* Filter tracks the isUserProtected metadata stats returned from Earlybirds.
|
||||
*/
|
||||
public class IsUserProtectedMetadataTrackingFilter
|
||||
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
||||
private static final String COUNTER_PREFIX = "is_user_protected_metadata_count_filter_";
|
||||
@VisibleForTesting
|
||||
final Map<EarlybirdRequestType, SearchCounter> totalCounterByRequestTypeMap;
|
||||
@VisibleForTesting
|
||||
final Map<EarlybirdRequestType, SearchCounter> isProtectedCounterByRequestTypeMap;
|
||||
|
||||
public IsUserProtectedMetadataTrackingFilter() {
|
||||
this.totalCounterByRequestTypeMap = new EnumMap<>(EarlybirdRequestType.class);
|
||||
this.isProtectedCounterByRequestTypeMap = new EnumMap<>(EarlybirdRequestType.class);
|
||||
for (EarlybirdRequestType requestType : EarlybirdRequestType.values()) {
|
||||
this.totalCounterByRequestTypeMap.put(requestType,
|
||||
SearchCounter.export(COUNTER_PREFIX + requestType.getNormalizedName() + "_total"));
|
||||
this.isProtectedCounterByRequestTypeMap.put(requestType,
|
||||
SearchCounter.export(COUNTER_PREFIX + requestType.getNormalizedName() + "_is_protected"));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(
|
||||
EarlybirdRequestContext request,
|
||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
||||
Future<EarlybirdResponse> response = service.apply(request);
|
||||
|
||||
EarlybirdRequestType requestType = request.getEarlybirdRequestType();
|
||||
response.addEventListener(new FutureEventListener<EarlybirdResponse>() {
|
||||
@Override
|
||||
public void onSuccess(EarlybirdResponse response) {
|
||||
if (!response.isSetSearchResults() || response.getSearchResults().getResults().isEmpty()) {
|
||||
return;
|
||||
}
|
||||
List<ThriftSearchResult> searchResults = response.getSearchResults().getResults();
|
||||
int totalCount = searchResults.size();
|
||||
int isUserProtectedCount = 0;
|
||||
for (ThriftSearchResult searchResult : searchResults) {
|
||||
if (searchResult.isSetMetadata() && searchResult.getMetadata().isSetExtraMetadata()) {
|
||||
ThriftSearchResultExtraMetadata extraMetadata =
|
||||
searchResult.getMetadata().getExtraMetadata();
|
||||
if (extraMetadata.isIsUserProtected()) {
|
||||
isUserProtectedCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
IsUserProtectedMetadataTrackingFilter.this
|
||||
.totalCounterByRequestTypeMap.get(requestType).add(totalCount);
|
||||
IsUserProtectedMetadataTrackingFilter.this
|
||||
.isProtectedCounterByRequestTypeMap.get(requestType).add(isUserProtectedCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFailure(Throwable cause) { }
|
||||
});
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
}
|
Binary file not shown.
@ -1,49 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.search.common.metrics.SearchCounter;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
||||
import com.twitter.search.earlybird.thrift.ThriftTweetSource;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
|
||||
import com.twitter.util.Function;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
public class MarkTweetSourceFilter
|
||||
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
||||
private final SearchCounter searchResultsNotSet;
|
||||
|
||||
private final ThriftTweetSource tweetSource;
|
||||
|
||||
public MarkTweetSourceFilter(ThriftTweetSource tweetSource) {
|
||||
this.tweetSource = tweetSource;
|
||||
searchResultsNotSet = SearchCounter.export(
|
||||
tweetSource.name().toLowerCase() + "_mark_tweet_source_filter_search_results_not_set");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(
|
||||
final EarlybirdRequestContext requestContext,
|
||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
||||
return service.apply(requestContext).map(new Function<EarlybirdResponse, EarlybirdResponse>() {
|
||||
@Override
|
||||
public EarlybirdResponse apply(EarlybirdResponse response) {
|
||||
if (response.getResponseCode() == EarlybirdResponseCode.SUCCESS
|
||||
&& requestContext.getEarlybirdRequestType() != EarlybirdRequestType.TERM_STATS) {
|
||||
if (!response.isSetSearchResults()) {
|
||||
searchResultsNotSet.increment();
|
||||
} else {
|
||||
for (ThriftSearchResult searchResult : response.getSearchResults().getResults()) {
|
||||
searchResult.setTweetSource(tweetSource);
|
||||
}
|
||||
}
|
||||
}
|
||||
return response;
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,119 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import com.google.common.cache.CacheLoader;
|
||||
import com.google.common.cache.LoadingCache;
|
||||
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.search.common.metrics.SearchMovingAverage;
|
||||
import com.twitter.search.earlybird.common.ClientIdUtil;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResultMetadata;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
|
||||
import com.twitter.util.Future;
|
||||
import com.twitter.util.FutureEventListener;
|
||||
|
||||
/**
|
||||
* Filter that is tracking the engagement stats returned from Earlybirds.
|
||||
*/
|
||||
public class MetadataTrackingFilter extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
||||
|
||||
private static final String SCORING_SIGNAL_STAT_PREFIX = "scoring_signal_";
|
||||
private static final String SCORE_STAT_PATTERN = "client_id_score_tracker_for_%s_x100";
|
||||
|
||||
@VisibleForTesting
|
||||
static final SearchMovingAverage SCORING_SIGNAL_FAV_COUNT =
|
||||
SearchMovingAverage.export(SCORING_SIGNAL_STAT_PREFIX + "fav_count");
|
||||
|
||||
@VisibleForTesting
|
||||
static final SearchMovingAverage SCORING_SIGNAL_REPLY_COUNT =
|
||||
SearchMovingAverage.export(SCORING_SIGNAL_STAT_PREFIX + "reply_count");
|
||||
|
||||
@VisibleForTesting
|
||||
static final SearchMovingAverage SCORING_SIGNAL_RETWEET_COUNT =
|
||||
SearchMovingAverage.export(SCORING_SIGNAL_STAT_PREFIX + "retweet_count");
|
||||
|
||||
@VisibleForTesting
|
||||
static final LoadingCache<String, SearchMovingAverage> CLIENT_SCORE_METRICS_LOADING_CACHE =
|
||||
CacheBuilder.newBuilder().build(new CacheLoader<String, SearchMovingAverage>() {
|
||||
public SearchMovingAverage load(String clientId) {
|
||||
return SearchMovingAverage.export(String.format(SCORE_STAT_PATTERN, clientId));
|
||||
}
|
||||
});
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(final EarlybirdRequest request,
|
||||
Service<EarlybirdRequest, EarlybirdResponse> service) {
|
||||
|
||||
Future<EarlybirdResponse> response = service.apply(request);
|
||||
|
||||
response.addEventListener(new FutureEventListener<EarlybirdResponse>() {
|
||||
@Override
|
||||
public void onSuccess(EarlybirdResponse earlybirdResponse) {
|
||||
EarlybirdRequestType type = EarlybirdRequestType.of(request);
|
||||
|
||||
if (earlybirdResponse.responseCode == EarlybirdResponseCode.SUCCESS
|
||||
&& type == EarlybirdRequestType.RELEVANCE
|
||||
&& earlybirdResponse.isSetSearchResults()
|
||||
&& earlybirdResponse.getSearchResults().isSetResults()) {
|
||||
|
||||
List<ThriftSearchResult> searchResults = earlybirdResponse.getSearchResults()
|
||||
.getResults();
|
||||
|
||||
long totalFavoriteAmount = 0;
|
||||
long totalReplyAmount = 0;
|
||||
long totalRetweetAmount = 0;
|
||||
double totalScoreX100 = 0;
|
||||
|
||||
for (ThriftSearchResult result : searchResults) {
|
||||
if (!result.isSetMetadata()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ThriftSearchResultMetadata metadata = result.getMetadata();
|
||||
|
||||
if (metadata.isSetFavCount()) {
|
||||
totalFavoriteAmount += metadata.getFavCount();
|
||||
}
|
||||
|
||||
if (metadata.isSetReplyCount()) {
|
||||
totalReplyAmount += metadata.getReplyCount();
|
||||
}
|
||||
|
||||
if (metadata.isSetRetweetCount()) {
|
||||
totalRetweetAmount += metadata.getRetweetCount();
|
||||
}
|
||||
|
||||
if (metadata.isSetScore()) {
|
||||
// Scale up the score by 100 so that scores are at least 1 and visible on viz graph
|
||||
totalScoreX100 += metadata.getScore() * 100;
|
||||
}
|
||||
}
|
||||
|
||||
// We only count present engagement counts but report the full size of the search results.
|
||||
// This means that we consider the missing counts as being 0.
|
||||
SCORING_SIGNAL_FAV_COUNT.addSamples(totalFavoriteAmount, searchResults.size());
|
||||
SCORING_SIGNAL_REPLY_COUNT.addSamples(totalReplyAmount, searchResults.size());
|
||||
SCORING_SIGNAL_RETWEET_COUNT.addSamples(totalRetweetAmount, searchResults.size());
|
||||
// Export per client id average scores.
|
||||
String requestClientId = ClientIdUtil.getClientIdFromRequest(request);
|
||||
String quotaClientId = ClientIdUtil.getQuotaClientId(requestClientId);
|
||||
CLIENT_SCORE_METRICS_LOADING_CACHE.getUnchecked(quotaClientId)
|
||||
.addSamples((long) totalScoreX100, searchResults.size());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFailure(Throwable cause) { }
|
||||
});
|
||||
|
||||
return response;
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,45 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.search.common.metrics.Percentile;
|
||||
import com.twitter.search.common.metrics.PercentileUtil;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
public class NamedMultiTermDisjunctionStatsFilter extends
|
||||
SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
||||
|
||||
private static final String STAT_FORMAT = "named_disjunction_size_client_%s_key_%s";
|
||||
// ClientID -> disjunction name -> operand count
|
||||
private static final ConcurrentMap<String, ConcurrentMap<String, Percentile<Integer>>>
|
||||
NAMED_MULTI_TERM_DISJUNCTION_IDS_COUNT = new ConcurrentHashMap<>();
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(EarlybirdRequest request,
|
||||
Service<EarlybirdRequest, EarlybirdResponse> service) {
|
||||
|
||||
if (request.getSearchQuery().isSetNamedDisjunctionMap()) {
|
||||
for (Map.Entry<String, List<Long>> entry
|
||||
: request.getSearchQuery().getNamedDisjunctionMap().entrySet()) {
|
||||
|
||||
Map<String, Percentile<Integer>> statsForClient =
|
||||
NAMED_MULTI_TERM_DISJUNCTION_IDS_COUNT.computeIfAbsent(
|
||||
request.getClientId(), clientId -> new ConcurrentHashMap<>());
|
||||
Percentile<Integer> stats = statsForClient.computeIfAbsent(entry.getKey(),
|
||||
keyName -> PercentileUtil.createPercentile(
|
||||
String.format(STAT_FORMAT, request.getClientId(), keyName)));
|
||||
|
||||
stats.record(entry.getValue().size());
|
||||
}
|
||||
}
|
||||
|
||||
return service.apply(request);
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,81 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.twitter.search.common.metrics.SearchCounter;
|
||||
import com.twitter.search.common.util.earlybird.EarlybirdResponseUtil;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.search.queryparser.query.search.SearchOperatorConstants;
|
||||
import com.twitter.search.queryparser.visitors.DetectPositiveOperatorVisitor;
|
||||
|
||||
/**
|
||||
* Filter that is tracking the unexpected nullcast results from Earlybirds.
|
||||
*/
|
||||
public class NullcastTrackingFilter extends SensitiveResultsTrackingFilter {
|
||||
public NullcastTrackingFilter() {
|
||||
super("unexpected nullcast tweets", true);
|
||||
}
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(NullcastTrackingFilter.class);
|
||||
|
||||
@VisibleForTesting
|
||||
static final SearchCounter BAD_NULLCAST_QUERY_COUNT =
|
||||
SearchCounter.export("unexpected_nullcast_query_count");
|
||||
|
||||
@VisibleForTesting
|
||||
static final SearchCounter BAD_NULLCAST_RESULT_COUNT =
|
||||
SearchCounter.export("unexpected_nullcast_result_count");
|
||||
|
||||
@Override
|
||||
protected Logger getLogger() {
|
||||
return LOG;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SearchCounter getSensitiveQueryCounter() {
|
||||
return BAD_NULLCAST_QUERY_COUNT;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SearchCounter getSensitiveResultsCounter() {
|
||||
return BAD_NULLCAST_RESULT_COUNT;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Set<Long> getSensitiveResults(EarlybirdRequestContext requestContext,
|
||||
EarlybirdResponse earlybirdResponse) throws Exception {
|
||||
if (!requestContext.getParsedQuery().accept(
|
||||
new DetectPositiveOperatorVisitor(SearchOperatorConstants.NULLCAST))) {
|
||||
return EarlybirdResponseUtil.findUnexpectedNullcastStatusIds(
|
||||
earlybirdResponse.getSearchResults(), requestContext.getRequest());
|
||||
} else {
|
||||
return new HashSet<>();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Some Earlybird requests are not searches, instead, they are scoring requests.
|
||||
* These requests supply a list of IDs to be scored.
|
||||
* It is OK to return nullcast tweet result if the ID is supplied in the request.
|
||||
* This extracts the scoring request tweet IDs.
|
||||
*/
|
||||
@Override
|
||||
protected Set<Long> getExceptedResults(EarlybirdRequestContext requestContext) {
|
||||
EarlybirdRequest request = requestContext.getRequest();
|
||||
if (request == null
|
||||
|| !request.isSetSearchQuery()
|
||||
|| request.getSearchQuery().getSearchStatusIdsSize() == 0) {
|
||||
return ImmutableSet.of();
|
||||
}
|
||||
return request.getSearchQuery().getSearchStatusIds();
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,10 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import javax.inject.Inject;
|
||||
|
||||
public class PostCacheRequestTypeCountFilter extends RequestTypeCountFilter {
|
||||
@Inject
|
||||
public PostCacheRequestTypeCountFilter() {
|
||||
super("post_cache");
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,10 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import javax.inject.Inject;
|
||||
|
||||
public class PreCacheRequestTypeCountFilter extends RequestTypeCountFilter {
|
||||
@Inject
|
||||
public PreCacheRequestTypeCountFilter() {
|
||||
super("pre_cache");
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,114 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
import com.twitter.common.text.language.LocaleUtil;
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.search.common.constants.thriftjava.ThriftLanguage;
|
||||
import com.twitter.search.common.metrics.SearchCounter;
|
||||
import com.twitter.search.common.util.lang.ThriftLanguageUtil;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
/**
|
||||
* Export stats for query languages.
|
||||
*/
|
||||
@Singleton
|
||||
public class QueryLangStatFilter
|
||||
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
||||
|
||||
public static class Config {
|
||||
// We put a limit here in case an error in the client are sending us random lang codes.
|
||||
private int maxNumberOfLangs;
|
||||
|
||||
public Config(int maxNumberOfLangs) {
|
||||
this.maxNumberOfLangs = maxNumberOfLangs;
|
||||
}
|
||||
|
||||
public int getMaxNumberOfLangs() {
|
||||
return maxNumberOfLangs;
|
||||
}
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
protected static final String LANG_STATS_PREFIX = "num_queries_in_lang_";
|
||||
|
||||
private final Config config;
|
||||
private final SearchCounter allCountsForLangsOverMaxNumLang =
|
||||
SearchCounter.export(LANG_STATS_PREFIX + "overflow");
|
||||
|
||||
private final ConcurrentHashMap<String, SearchCounter> langCounters =
|
||||
new ConcurrentHashMap<>();
|
||||
|
||||
@Inject
|
||||
public QueryLangStatFilter(Config config) {
|
||||
this.config = config;
|
||||
}
|
||||
|
||||
private SearchCounter getCounter(String lang) {
|
||||
Preconditions.checkNotNull(lang);
|
||||
|
||||
SearchCounter counter = langCounters.get(lang);
|
||||
if (counter == null) {
|
||||
if (langCounters.size() >= config.getMaxNumberOfLangs()) {
|
||||
return allCountsForLangsOverMaxNumLang;
|
||||
}
|
||||
synchronized (langCounters) { // This double-checked locking is safe,
|
||||
// since we're using a ConcurrentHashMap
|
||||
counter = langCounters.get(lang);
|
||||
if (counter == null) {
|
||||
counter = SearchCounter.export(LANG_STATS_PREFIX + lang);
|
||||
langCounters.put(lang, counter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return counter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(
|
||||
EarlybirdRequestContext requestContext,
|
||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
||||
|
||||
String lang = null;
|
||||
|
||||
ThriftSearchQuery searchQuery = requestContext.getRequest().getSearchQuery();
|
||||
|
||||
lang = searchQuery.getQueryLang();
|
||||
|
||||
if (lang == null) {
|
||||
// fallback to ui lang
|
||||
lang = searchQuery.getUiLang();
|
||||
}
|
||||
|
||||
if (lang == null && searchQuery.isSetUserLangs()) {
|
||||
// fallback to the user lang with the highest confidence
|
||||
double maxConfidence = Double.MIN_VALUE;
|
||||
|
||||
for (Map.Entry<ThriftLanguage, Double> entry : searchQuery.getUserLangs().entrySet()) {
|
||||
if (entry.getValue() > maxConfidence) {
|
||||
lang = ThriftLanguageUtil.getLanguageCodeOf(entry.getKey());
|
||||
maxConfidence = entry.getValue();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (lang == null) {
|
||||
lang = LocaleUtil.UNDETERMINED_LANGUAGE;
|
||||
}
|
||||
|
||||
getCounter(lang).increment();
|
||||
|
||||
return service.apply(requestContext);
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,194 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import java.util.EnumSet;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import scala.runtime.BoxedUnit;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.search.common.metrics.SearchCounter;
|
||||
import com.twitter.search.common.metrics.SearchTimer;
|
||||
import com.twitter.search.common.metrics.SearchTimerStats;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.search.queryparser.query.Query;
|
||||
import com.twitter.search.queryparser.query.QueryParserException;
|
||||
import com.twitter.search.queryparser.query.annotation.Annotation;
|
||||
import com.twitter.search.queryparser.query.search.SearchOperator;
|
||||
import com.twitter.search.queryparser.query.search.SearchOperatorConstants;
|
||||
import com.twitter.search.queryparser.visitors.DetectAnnotationVisitor;
|
||||
import com.twitter.search.queryparser.visitors.DetectVisitor;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
/**
|
||||
* For a given query, increments counters if that query has a number of search operators or
|
||||
* annotations applied to it. Used to detect unusual traffic patterns.
|
||||
*/
|
||||
public class QueryOperatorStatFilter
|
||||
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(QueryOperatorStatFilter.class);
|
||||
|
||||
private final SearchCounter numQueryOperatorDetectionErrors =
|
||||
SearchCounter.export("query_operator_detection_errors");
|
||||
|
||||
private final SearchCounter numQueryOperatorConsideredRequests =
|
||||
SearchCounter.export("query_operator_requests_considered");
|
||||
|
||||
private final ImmutableMap<String, SearchTimerStats> filterOperatorStats;
|
||||
|
||||
// Keeps track of the number of queries with a filter applied, whose type we don't care about.
|
||||
private final SearchCounter numUnknownFilterOperatorRequests =
|
||||
SearchCounter.export("query_operator_filter_unknown_requests");
|
||||
|
||||
private final ImmutableMap<String, SearchTimerStats> includeOperatorStats;
|
||||
|
||||
// Keeps track of the number of queries with an include operator applied, whose type we don't
|
||||
// know about.
|
||||
private final SearchCounter numUnknownIncludeOperatorRequests =
|
||||
SearchCounter.export("query_operator_include_unknown_requests");
|
||||
|
||||
private final ImmutableMap<SearchOperator.Type, SearchTimerStats> operatorTypeStats;
|
||||
|
||||
private final SearchCounter numVariantRequests =
|
||||
SearchCounter.export("query_operator_variant_requests");
|
||||
|
||||
/**
|
||||
* Construct this QueryOperatorStatFilter by getting the complete set of possible filters a query
|
||||
* might have and associating each with a counter.
|
||||
*/
|
||||
public QueryOperatorStatFilter() {
|
||||
|
||||
ImmutableMap.Builder<String, SearchTimerStats> filterBuilder = new ImmutableMap.Builder<>();
|
||||
for (String operand : SearchOperatorConstants.VALID_FILTER_OPERANDS) {
|
||||
filterBuilder.put(
|
||||
operand,
|
||||
SearchTimerStats.export(
|
||||
"query_operator_filter_" + operand + "_requests",
|
||||
TimeUnit.MILLISECONDS,
|
||||
false,
|
||||
true));
|
||||
}
|
||||
filterOperatorStats = filterBuilder.build();
|
||||
|
||||
ImmutableMap.Builder<String, SearchTimerStats> includeBuilder = new ImmutableMap.Builder<>();
|
||||
for (String operand : SearchOperatorConstants.VALID_INCLUDE_OPERANDS) {
|
||||
includeBuilder.put(
|
||||
operand,
|
||||
SearchTimerStats.export(
|
||||
"query_operator_include_" + operand + "_requests",
|
||||
TimeUnit.MILLISECONDS,
|
||||
false,
|
||||
true));
|
||||
}
|
||||
includeOperatorStats = includeBuilder.build();
|
||||
|
||||
ImmutableMap.Builder<SearchOperator.Type, SearchTimerStats> operatorBuilder =
|
||||
new ImmutableMap.Builder<>();
|
||||
for (SearchOperator.Type operatorType : SearchOperator.Type.values()) {
|
||||
operatorBuilder.put(
|
||||
operatorType,
|
||||
SearchTimerStats.export(
|
||||
"query_operator_" + operatorType.name().toLowerCase() + "_requests",
|
||||
TimeUnit.MILLISECONDS,
|
||||
false,
|
||||
true
|
||||
));
|
||||
}
|
||||
operatorTypeStats = operatorBuilder.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(
|
||||
EarlybirdRequestContext requestContext,
|
||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
||||
numQueryOperatorConsideredRequests.increment();
|
||||
Query parsedQuery = requestContext.getParsedQuery();
|
||||
|
||||
if (parsedQuery == null) {
|
||||
return service.apply(requestContext);
|
||||
}
|
||||
|
||||
SearchTimer timer = new SearchTimer();
|
||||
timer.start();
|
||||
|
||||
return service.apply(requestContext).ensure(() -> {
|
||||
timer.stop();
|
||||
|
||||
try {
|
||||
updateTimersForOperatorsAndOperands(parsedQuery, timer);
|
||||
updateCountersIfVariantAnnotation(parsedQuery);
|
||||
} catch (QueryParserException e) {
|
||||
LOG.warn("Unable to test if query has operators defined", e);
|
||||
numQueryOperatorDetectionErrors.increment();
|
||||
}
|
||||
return BoxedUnit.UNIT;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Tracks request stats for operators and operands.
|
||||
*
|
||||
* @param parsedQuery the query to check.
|
||||
*/
|
||||
private void updateTimersForOperatorsAndOperands(Query parsedQuery, SearchTimer timer)
|
||||
throws QueryParserException {
|
||||
final DetectVisitor detectVisitor = new DetectVisitor(false, SearchOperator.Type.values());
|
||||
parsedQuery.accept(detectVisitor);
|
||||
|
||||
Set<SearchOperator.Type> detectedOperatorTypes = EnumSet.noneOf(SearchOperator.Type.class);
|
||||
for (Query query : detectVisitor.getDetectedQueries()) {
|
||||
// This detectVisitor only matches on SearchOperators.
|
||||
SearchOperator operator = (SearchOperator) query;
|
||||
SearchOperator.Type operatorType = operator.getOperatorType();
|
||||
detectedOperatorTypes.add(operatorType);
|
||||
|
||||
if (operatorType == SearchOperator.Type.INCLUDE) {
|
||||
updateOperandStats(
|
||||
operator,
|
||||
includeOperatorStats,
|
||||
timer,
|
||||
numUnknownIncludeOperatorRequests);
|
||||
}
|
||||
if (operatorType == SearchOperator.Type.FILTER) {
|
||||
updateOperandStats(
|
||||
operator,
|
||||
filterOperatorStats,
|
||||
timer,
|
||||
numUnknownFilterOperatorRequests);
|
||||
}
|
||||
}
|
||||
|
||||
for (SearchOperator.Type type : detectedOperatorTypes) {
|
||||
operatorTypeStats.get(type).stoppedTimerIncrement(timer);
|
||||
}
|
||||
}
|
||||
|
||||
private void updateOperandStats(
|
||||
SearchOperator operator,
|
||||
ImmutableMap<String, SearchTimerStats> operandRequestStats,
|
||||
SearchTimer timer,
|
||||
SearchCounter unknownOperandStat) {
|
||||
String operand = operator.getOperand();
|
||||
SearchTimerStats stats = operandRequestStats.get(operand);
|
||||
|
||||
if (stats != null) {
|
||||
stats.stoppedTimerIncrement(timer);
|
||||
} else {
|
||||
unknownOperandStat.increment();
|
||||
}
|
||||
}
|
||||
|
||||
private void updateCountersIfVariantAnnotation(Query parsedQuery) throws QueryParserException {
|
||||
DetectAnnotationVisitor visitor = new DetectAnnotationVisitor(Annotation.Type.VARIANT);
|
||||
if (parsedQuery.accept(visitor)) {
|
||||
numVariantRequests.increment();
|
||||
}
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,92 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import javax.inject.Inject;
|
||||
|
||||
import com.twitter.common_internal.text.version.PenguinVersion;
|
||||
import com.twitter.common_internal.text.version.PenguinVersionConfig;
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.finagle.tracing.Trace;
|
||||
import com.twitter.finagle.tracing.Tracing;
|
||||
import com.twitter.search.common.metrics.SearchRateCounter;
|
||||
import com.twitter.search.common.metrics.SearchTimer;
|
||||
import com.twitter.search.common.metrics.SearchTimerStats;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.search.earlybird_root.common.QueryParsingUtils;
|
||||
import com.twitter.search.queryparser.parser.SerializedQueryParser;
|
||||
import com.twitter.search.queryparser.parser.SerializedQueryParser.TokenizationOption;
|
||||
import com.twitter.search.queryparser.query.Query;
|
||||
import com.twitter.search.queryparser.query.QueryParserException;
|
||||
import com.twitter.util.Duration;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
public class QueryTokenizerFilter extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
||||
private static final String PREFIX = "query_tokenizer_";
|
||||
private static final SearchRateCounter SUCCESS_COUNTER =
|
||||
SearchRateCounter.export(PREFIX + "success");
|
||||
private static final SearchRateCounter FAILURE_COUNTER =
|
||||
SearchRateCounter.export(PREFIX + "error");
|
||||
private static final SearchRateCounter SKIPPED_COUNTER =
|
||||
SearchRateCounter.export(PREFIX + "skipped");
|
||||
private static final SearchTimerStats QUERY_TOKENIZER_TIME =
|
||||
SearchTimerStats.export(PREFIX + "time", TimeUnit.MILLISECONDS, false);
|
||||
|
||||
private final TokenizationOption tokenizationOption;
|
||||
|
||||
@Inject
|
||||
public QueryTokenizerFilter(PenguinVersionConfig penguinversions) {
|
||||
PenguinVersion[] supportedVersions = penguinversions
|
||||
.getSupportedVersions().toArray(new PenguinVersion[0]);
|
||||
tokenizationOption = new TokenizationOption(true, supportedVersions);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(
|
||||
EarlybirdRequestContext requestContext,
|
||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
||||
|
||||
if (!requestContext.getRequest().isRetokenizeSerializedQuery()
|
||||
|| !requestContext.getRequest().isSetSearchQuery()
|
||||
|| !requestContext.getRequest().getSearchQuery().isSetSerializedQuery()) {
|
||||
SKIPPED_COUNTER.increment();
|
||||
return service.apply(requestContext);
|
||||
}
|
||||
|
||||
SearchTimer timer = QUERY_TOKENIZER_TIME.startNewTimer();
|
||||
try {
|
||||
String serializedQuery = requestContext.getRequest().getSearchQuery().getSerializedQuery();
|
||||
Query parsedQuery = reparseQuery(serializedQuery);
|
||||
SUCCESS_COUNTER.increment();
|
||||
return service.apply(EarlybirdRequestContext.copyRequestContext(requestContext, parsedQuery));
|
||||
} catch (QueryParserException e) {
|
||||
FAILURE_COUNTER.increment();
|
||||
return QueryParsingUtils.newClientErrorResponse(requestContext.getRequest(), e);
|
||||
} finally {
|
||||
long elapsed = timer.stop();
|
||||
QUERY_TOKENIZER_TIME.timerIncrement(elapsed);
|
||||
Tracing trace = Trace.apply();
|
||||
if (trace.isActivelyTracing()) {
|
||||
trace.record(PREFIX + "time", Duration.fromMilliseconds(elapsed));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Query reparseQuery(String serializedQuery) throws QueryParserException {
|
||||
SerializedQueryParser parser = new SerializedQueryParser(tokenizationOption);
|
||||
return parser.parse(serializedQuery);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializing the query parser can take many seconds. We initialize it at warmup so that
|
||||
* requests don't time out after we join the serverset. SEARCH-28801
|
||||
*/
|
||||
public void performExpensiveInitialization() throws QueryParserException {
|
||||
SerializedQueryParser queryParser = new SerializedQueryParser(tokenizationOption);
|
||||
|
||||
// The Korean query parser takes a few seconds on it's own to initialize.
|
||||
String koreanQuery = "스포츠";
|
||||
queryParser.parse(koreanQuery);
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,60 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import com.twitter.search.common.decider.SearchDecider;
|
||||
import com.twitter.search.common.partitioning.snowflakeparser.SnowflakeIdParser;
|
||||
import com.twitter.search.earlybird.config.ServingRange;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
|
||||
public class RealtimeServingRangeProvider implements ServingRangeProvider {
|
||||
|
||||
private static final int DEFAULT_SERVING_RANGE_BOUNDARY_HOURS_AGO = 240;
|
||||
|
||||
private final SearchDecider decider;
|
||||
private final String deciderKey;
|
||||
|
||||
public RealtimeServingRangeProvider(SearchDecider decider, String deciderKey) {
|
||||
this.decider = decider;
|
||||
this.deciderKey = deciderKey;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ServingRange getServingRange(
|
||||
final EarlybirdRequestContext requestContext, boolean useBoundaryOverride) {
|
||||
return new ServingRange() {
|
||||
@Override
|
||||
public long getServingRangeSinceId() {
|
||||
long servingRangeStartMillis = TimeUnit.HOURS.toMillis(
|
||||
(decider.featureExists(deciderKey))
|
||||
? decider.getAvailability(deciderKey)
|
||||
: DEFAULT_SERVING_RANGE_BOUNDARY_HOURS_AGO);
|
||||
|
||||
long boundaryTime = requestContext.getCreatedTimeMillis() - servingRangeStartMillis;
|
||||
return SnowflakeIdParser.generateValidStatusId(boundaryTime, 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getServingRangeMaxId() {
|
||||
return SnowflakeIdParser.generateValidStatusId(
|
||||
requestContext.getCreatedTimeMillis(), 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getServingRangeSinceTimeSecondsFromEpoch() {
|
||||
long servingRangeStartMillis = TimeUnit.HOURS.toMillis(
|
||||
(decider.featureExists(deciderKey))
|
||||
? decider.getAvailability(deciderKey)
|
||||
: DEFAULT_SERVING_RANGE_BOUNDARY_HOURS_AGO);
|
||||
|
||||
long boundaryTime = requestContext.getCreatedTimeMillis() - servingRangeStartMillis;
|
||||
return boundaryTime / 1000;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getServingRangeUntilTimeSecondsFromEpoch() {
|
||||
return requestContext.getCreatedTimeMillis() / 1000;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,94 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import javax.annotation.Nullable;
|
||||
import javax.inject.Inject;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.search.common.constants.thriftjava.ThriftQuerySource;
|
||||
import com.twitter.search.common.decider.SearchDecider;
|
||||
import com.twitter.search.common.metrics.SearchRateCounter;
|
||||
import com.twitter.search.common.schema.earlybird.EarlybirdCluster;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
/**
|
||||
* Rejects requests based on the query source of the request. Intended to be used at super-root
|
||||
* or archive-root. If used to reject client request at super-root, the client will get a response
|
||||
* with empty results and a REQUEST_BLOCKED_ERROR status code. If used at archive-root the client
|
||||
* will get a response which might contain some results from realtime and protected and the status
|
||||
* code of the response will depend on how super-root combines responses from the three downstream
|
||||
* roots.
|
||||
*/
|
||||
public class RejectRequestsByQuerySourceFilter extends
|
||||
SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
||||
|
||||
@VisibleForTesting
|
||||
protected static final String NUM_REJECTED_REQUESTS_STAT_NAME_PATTERN =
|
||||
"num_root_%s_rejected_requests_with_query_source_%s";
|
||||
@VisibleForTesting
|
||||
protected static final String REJECT_REQUESTS_DECIDER_KEY_PATTERN =
|
||||
"root_%s_reject_requests_with_query_source_%s";
|
||||
private final Map<ThriftQuerySource, SearchRateCounter> rejectedRequestsCounterPerQuerySource =
|
||||
new HashMap<>();
|
||||
private final Map<ThriftQuerySource, String> rejectRequestsDeciderKeyPerQuerySource =
|
||||
new HashMap<>();
|
||||
private final SearchDecider searchDecider;
|
||||
|
||||
|
||||
@Inject
|
||||
public RejectRequestsByQuerySourceFilter(
|
||||
@Nullable EarlybirdCluster cluster,
|
||||
SearchDecider searchDecider) {
|
||||
|
||||
this.searchDecider = searchDecider;
|
||||
|
||||
String clusterName = cluster != null
|
||||
? cluster.getNameForStats()
|
||||
: EarlybirdCluster.SUPERROOT.getNameForStats();
|
||||
|
||||
for (ThriftQuerySource querySource : ThriftQuerySource.values()) {
|
||||
String querySourceName = querySource.name().toLowerCase();
|
||||
|
||||
rejectedRequestsCounterPerQuerySource.put(querySource,
|
||||
SearchRateCounter.export(
|
||||
String.format(
|
||||
NUM_REJECTED_REQUESTS_STAT_NAME_PATTERN, clusterName, querySourceName)));
|
||||
|
||||
rejectRequestsDeciderKeyPerQuerySource.put(querySource,
|
||||
String.format(
|
||||
REJECT_REQUESTS_DECIDER_KEY_PATTERN, clusterName, querySourceName));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(EarlybirdRequest request,
|
||||
Service<EarlybirdRequest, EarlybirdResponse> service) {
|
||||
|
||||
ThriftQuerySource querySource = request.isSetQuerySource()
|
||||
? request.getQuerySource()
|
||||
: ThriftQuerySource.UNKNOWN;
|
||||
|
||||
String deciderKey = rejectRequestsDeciderKeyPerQuerySource.get(querySource);
|
||||
if (searchDecider.isAvailable(deciderKey)) {
|
||||
rejectedRequestsCounterPerQuerySource.get(querySource).increment();
|
||||
return Future.value(getRejectedRequestResponse(querySource, deciderKey));
|
||||
}
|
||||
return service.apply(request);
|
||||
}
|
||||
|
||||
private static EarlybirdResponse getRejectedRequestResponse(
|
||||
ThriftQuerySource querySource, String deciderKey) {
|
||||
return new EarlybirdResponse(EarlybirdResponseCode.REQUEST_BLOCKED_ERROR, 0)
|
||||
.setSearchResults(new ThriftSearchResults())
|
||||
.setDebugString(String.format(
|
||||
"Request with query source %s is blocked by decider %s", querySource, deciderKey));
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,33 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import com.twitter.finagle.Filter;
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.search.common.metrics.SearchTimerStats;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
/**
|
||||
* A filter for transforming a RequestContext to an EarlybirdRequest.
|
||||
*/
|
||||
public class RequestContextToEarlybirdRequestFilter extends
|
||||
Filter<EarlybirdRequestContext, EarlybirdResponse, EarlybirdRequest, EarlybirdResponse> {
|
||||
|
||||
private static final SearchTimerStats REQUEST_CONTEXT_TRIP_TIME =
|
||||
SearchTimerStats.export("request_context_trip_time", TimeUnit.MILLISECONDS, false,
|
||||
true);
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(
|
||||
EarlybirdRequestContext requestContext,
|
||||
Service<EarlybirdRequest, EarlybirdResponse> service) {
|
||||
|
||||
long tripTime = System.currentTimeMillis() - requestContext.getCreatedTimeMillis();
|
||||
REQUEST_CONTEXT_TRIP_TIME.timerIncrement(tripTime);
|
||||
|
||||
return service.apply(requestContext.getRequest());
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,185 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import javax.inject.Inject;
|
||||
|
||||
import scala.runtime.BoxedUnit;
|
||||
|
||||
import com.twitter.common.util.Clock;
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.search.common.metrics.Percentile;
|
||||
import com.twitter.search.common.metrics.PercentileUtil;
|
||||
import com.twitter.search.common.metrics.SearchCounter;
|
||||
import com.twitter.search.common.query.thriftjava.CollectorParams;
|
||||
import com.twitter.search.common.query.thriftjava.CollectorTerminationParams;
|
||||
import com.twitter.search.earlybird.common.ClientIdUtil;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
||||
import com.twitter.snowflake.id.SnowflakeId;
|
||||
import com.twitter.util.Function;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
public class RequestResultStatsFilter
|
||||
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
||||
private final Clock clock;
|
||||
private final RequestResultStats stats;
|
||||
|
||||
static class RequestResultStats {
|
||||
private static final String PREFIX = "request_result_properties_";
|
||||
|
||||
private final SearchCounter resultsRequestedCount;
|
||||
private final SearchCounter resultsReturnedCount;
|
||||
private final SearchCounter maxHitsToProcessCount;
|
||||
private final SearchCounter hitsProcessedCount;
|
||||
private final SearchCounter docsProcessedCount;
|
||||
private final SearchCounter timeoutMsCount;
|
||||
private Map<String, Percentile<Integer>> requestedNumResultsPercentileByClientId;
|
||||
private Map<String, Percentile<Integer>> returnedNumResultsPercentileByClientId;
|
||||
private Map<String, Percentile<Long>> oldestResultPercentileByClientId;
|
||||
|
||||
RequestResultStats() {
|
||||
// Request properties
|
||||
resultsRequestedCount = SearchCounter.export(PREFIX + "results_requested_cnt");
|
||||
maxHitsToProcessCount = SearchCounter.export(PREFIX + "max_hits_to_process_cnt");
|
||||
timeoutMsCount = SearchCounter.export(PREFIX + "timeout_ms_cnt");
|
||||
requestedNumResultsPercentileByClientId = new ConcurrentHashMap<>();
|
||||
|
||||
// Result properties
|
||||
resultsReturnedCount = SearchCounter.export(PREFIX + "results_returned_cnt");
|
||||
hitsProcessedCount = SearchCounter.export(PREFIX + "hits_processed_cnt");
|
||||
docsProcessedCount = SearchCounter.export(PREFIX + "docs_processed_cnt");
|
||||
returnedNumResultsPercentileByClientId = new ConcurrentHashMap<>();
|
||||
oldestResultPercentileByClientId = new ConcurrentHashMap<>();
|
||||
}
|
||||
|
||||
SearchCounter getResultsRequestedCount() {
|
||||
return resultsRequestedCount;
|
||||
}
|
||||
|
||||
SearchCounter getResultsReturnedCount() {
|
||||
return resultsReturnedCount;
|
||||
}
|
||||
|
||||
SearchCounter getMaxHitsToProcessCount() {
|
||||
return maxHitsToProcessCount;
|
||||
}
|
||||
|
||||
SearchCounter getHitsProcessedCount() {
|
||||
return hitsProcessedCount;
|
||||
}
|
||||
|
||||
SearchCounter getDocsProcessedCount() {
|
||||
return docsProcessedCount;
|
||||
}
|
||||
|
||||
SearchCounter getTimeoutMsCount() {
|
||||
return timeoutMsCount;
|
||||
}
|
||||
|
||||
Percentile<Long> getOldestResultPercentile(String clientId) {
|
||||
return oldestResultPercentileByClientId.computeIfAbsent(clientId,
|
||||
key -> PercentileUtil.createPercentile(statName(clientId, "oldest_result_age_seconds")));
|
||||
}
|
||||
|
||||
Percentile<Integer> getRequestedNumResultsPercentile(String clientId) {
|
||||
return requestedNumResultsPercentileByClientId.computeIfAbsent(clientId,
|
||||
key -> PercentileUtil.createPercentile(statName(clientId, "requested_num_results")));
|
||||
}
|
||||
|
||||
Percentile<Integer> getReturnedNumResultsPercentile(String clientId) {
|
||||
return returnedNumResultsPercentileByClientId.computeIfAbsent(clientId,
|
||||
key -> PercentileUtil.createPercentile(statName(clientId, "returned_num_results")));
|
||||
}
|
||||
|
||||
private String statName(String clientId, String suffix) {
|
||||
return String.format("%s%s_%s", PREFIX, ClientIdUtil.formatClientId(clientId), suffix);
|
||||
}
|
||||
}
|
||||
|
||||
@Inject
|
||||
RequestResultStatsFilter(Clock clock, RequestResultStats stats) {
|
||||
this.clock = clock;
|
||||
this.stats = stats;
|
||||
}
|
||||
|
||||
private void updateRequestStats(EarlybirdRequest request) {
|
||||
ThriftSearchQuery searchQuery = request.getSearchQuery();
|
||||
CollectorParams collectorParams = searchQuery.getCollectorParams();
|
||||
|
||||
if (collectorParams != null) {
|
||||
stats.getResultsRequestedCount().add(collectorParams.numResultsToReturn);
|
||||
if (request.isSetClientId()) {
|
||||
stats.getRequestedNumResultsPercentile(request.getClientId())
|
||||
.record(collectorParams.numResultsToReturn);
|
||||
}
|
||||
CollectorTerminationParams terminationParams = collectorParams.getTerminationParams();
|
||||
if (terminationParams != null) {
|
||||
if (terminationParams.isSetMaxHitsToProcess()) {
|
||||
stats.getMaxHitsToProcessCount().add(terminationParams.maxHitsToProcess);
|
||||
}
|
||||
if (terminationParams.isSetTimeoutMs()) {
|
||||
stats.getTimeoutMsCount().add(terminationParams.timeoutMs);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (searchQuery.isSetNumResults()) {
|
||||
stats.getResultsRequestedCount().add(searchQuery.numResults);
|
||||
if (request.isSetClientId()) {
|
||||
stats.getRequestedNumResultsPercentile(request.getClientId())
|
||||
.record(searchQuery.numResults);
|
||||
}
|
||||
}
|
||||
if (searchQuery.isSetMaxHitsToProcess()) {
|
||||
stats.getMaxHitsToProcessCount().add(searchQuery.maxHitsToProcess);
|
||||
}
|
||||
if (request.isSetTimeoutMs()) {
|
||||
stats.getTimeoutMsCount().add(request.timeoutMs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void updateResultsStats(String clientId, ThriftSearchResults results) {
|
||||
stats.getResultsReturnedCount().add(results.getResultsSize());
|
||||
if (results.isSetNumHitsProcessed()) {
|
||||
stats.getHitsProcessedCount().add(results.numHitsProcessed);
|
||||
}
|
||||
|
||||
if (clientId != null) {
|
||||
if (results.getResultsSize() > 0) {
|
||||
List<ThriftSearchResult> resultsList = results.getResults();
|
||||
|
||||
long lastId = resultsList.get(resultsList.size() - 1).getId();
|
||||
long tweetTime = SnowflakeId.timeFromId(lastId).inLongSeconds();
|
||||
long tweetAge = (clock.nowMillis() / 1000) - tweetTime;
|
||||
stats.getOldestResultPercentile(clientId).record(tweetAge);
|
||||
}
|
||||
|
||||
stats.getReturnedNumResultsPercentile(clientId).record(results.getResultsSize());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(
|
||||
EarlybirdRequest request,
|
||||
Service<EarlybirdRequest, EarlybirdResponse> service) {
|
||||
|
||||
updateRequestStats(request);
|
||||
|
||||
return service.apply(request).onSuccess(
|
||||
new Function<EarlybirdResponse, BoxedUnit>() {
|
||||
@Override
|
||||
public BoxedUnit apply(EarlybirdResponse response) {
|
||||
if (response.isSetSearchResults()) {
|
||||
updateResultsStats(request.getClientId(), response.searchResults);
|
||||
}
|
||||
return BoxedUnit.UNIT;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,79 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import javax.inject.Inject;
|
||||
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.search.common.root.RequestSuccessStats;
|
||||
import com.twitter.search.common.util.FinagleUtil;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
||||
import com.twitter.util.Future;
|
||||
import com.twitter.util.FutureEventListener;
|
||||
|
||||
import static com.twitter.search.common.util.earlybird.EarlybirdResponseUtil.responseConsideredFailed;
|
||||
|
||||
|
||||
/**
|
||||
* Records cancellations, timeouts, and failures for requests that do not go through
|
||||
* ScatterGatherService (which also updates these stats, but for different requests).
|
||||
*/
|
||||
public class RequestSuccessStatsFilter
|
||||
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
||||
|
||||
private final RequestSuccessStats stats;
|
||||
|
||||
@Inject
|
||||
RequestSuccessStatsFilter(RequestSuccessStats stats) {
|
||||
this.stats = stats;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(
|
||||
EarlybirdRequest request,
|
||||
Service<EarlybirdRequest, EarlybirdResponse> service) {
|
||||
|
||||
final long startTime = System.nanoTime();
|
||||
|
||||
return service.apply(request).addEventListener(
|
||||
new FutureEventListener<EarlybirdResponse>() {
|
||||
@Override
|
||||
public void onSuccess(EarlybirdResponse response) {
|
||||
boolean success = true;
|
||||
|
||||
if (response.getResponseCode() == EarlybirdResponseCode.CLIENT_CANCEL_ERROR) {
|
||||
success = false;
|
||||
stats.getCancelledRequestCount().increment();
|
||||
} else if (response.getResponseCode() == EarlybirdResponseCode.SERVER_TIMEOUT_ERROR) {
|
||||
success = false;
|
||||
stats.getTimedoutRequestCount().increment();
|
||||
} else if (responseConsideredFailed(response.getResponseCode())) {
|
||||
success = false;
|
||||
stats.getErroredRequestCount().increment();
|
||||
}
|
||||
|
||||
long latencyNanos = System.nanoTime() - startTime;
|
||||
stats.getRequestLatencyStats().requestComplete(
|
||||
TimeUnit.NANOSECONDS.toMillis(latencyNanos), 0, success);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFailure(Throwable cause) {
|
||||
long latencyNanos = System.nanoTime() - startTime;
|
||||
stats.getRequestLatencyStats().requestComplete(
|
||||
TimeUnit.NANOSECONDS.toMillis(latencyNanos), 0, false);
|
||||
|
||||
if (FinagleUtil.isCancelException(cause)) {
|
||||
stats.getCancelledRequestCount().increment();
|
||||
} else if (FinagleUtil.isTimeoutException(cause)) {
|
||||
stats.getTimedoutRequestCount().increment();
|
||||
} else {
|
||||
stats.getErroredRequestCount().increment();
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,105 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import com.google.common.cache.CacheLoader;
|
||||
import com.google.common.cache.LoadingCache;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
|
||||
import com.twitter.common.util.Clock;
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.search.common.clientstats.RequestCounters;
|
||||
import com.twitter.search.common.clientstats.RequestCountersEventListener;
|
||||
import com.twitter.search.common.util.FinagleUtil;
|
||||
import com.twitter.search.earlybird.common.ClientIdUtil;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
public class RequestTypeCountFilter
|
||||
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
||||
private final ImmutableMap<EarlybirdRequestType, RequestCounters> typeCounters;
|
||||
private final RequestCounters allRequestTypesCounter;
|
||||
private final ImmutableMap<EarlybirdRequestType, LoadingCache<String, RequestCounters>>
|
||||
perTypePerClientCounters;
|
||||
|
||||
/**
|
||||
* Constructs the filter.
|
||||
*/
|
||||
public RequestTypeCountFilter(final String statSuffix) {
|
||||
ImmutableMap.Builder<EarlybirdRequestType, RequestCounters> perTypeBuilder =
|
||||
ImmutableMap.builder();
|
||||
for (EarlybirdRequestType type : EarlybirdRequestType.values()) {
|
||||
perTypeBuilder.put(type, new RequestCounters(
|
||||
"request_type_count_filter_" + type.getNormalizedName() + "_" + statSuffix));
|
||||
}
|
||||
typeCounters = perTypeBuilder.build();
|
||||
|
||||
allRequestTypesCounter =
|
||||
new RequestCounters("request_type_count_filter_all_" + statSuffix, true);
|
||||
|
||||
ImmutableMap.Builder<EarlybirdRequestType, LoadingCache<String, RequestCounters>>
|
||||
perTypePerClientBuilder = ImmutableMap.builder();
|
||||
|
||||
// No point in setting any kind of expiration policy for the cache, since the stats will
|
||||
// continue to be exported, so the objects will not be GCed anyway.
|
||||
CacheBuilder<Object, Object> cacheBuilder = CacheBuilder.newBuilder();
|
||||
for (final EarlybirdRequestType requestType : EarlybirdRequestType.values()) {
|
||||
CacheLoader<String, RequestCounters> cacheLoader =
|
||||
new CacheLoader<String, RequestCounters>() {
|
||||
@Override
|
||||
public RequestCounters load(String clientId) {
|
||||
return new RequestCounters("request_type_count_filter_for_" + clientId + "_"
|
||||
+ requestType.getNormalizedName() + "_" + statSuffix);
|
||||
}
|
||||
};
|
||||
perTypePerClientBuilder.put(requestType, cacheBuilder.build(cacheLoader));
|
||||
}
|
||||
perTypePerClientCounters = perTypePerClientBuilder.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(
|
||||
EarlybirdRequestContext requestContext,
|
||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
||||
EarlybirdRequestType requestType = requestContext.getEarlybirdRequestType();
|
||||
RequestCounters requestCounters = typeCounters.get(requestType);
|
||||
Preconditions.checkNotNull(requestCounters);
|
||||
|
||||
// Update the per-type and "all" counters.
|
||||
RequestCountersEventListener<EarlybirdResponse> requestCountersEventListener =
|
||||
new RequestCountersEventListener<>(
|
||||
requestCounters, Clock.SYSTEM_CLOCK, EarlybirdSuccessfulResponseHandler.INSTANCE);
|
||||
RequestCountersEventListener<EarlybirdResponse> allRequestTypesEventListener =
|
||||
new RequestCountersEventListener<>(
|
||||
allRequestTypesCounter, Clock.SYSTEM_CLOCK,
|
||||
EarlybirdSuccessfulResponseHandler.INSTANCE);
|
||||
|
||||
RequestCountersEventListener<EarlybirdResponse> perTypePerClientEventListener =
|
||||
updatePerTypePerClientCountersListener(requestContext);
|
||||
|
||||
return service.apply(requestContext)
|
||||
.addEventListener(requestCountersEventListener)
|
||||
.addEventListener(allRequestTypesEventListener)
|
||||
.addEventListener(perTypePerClientEventListener);
|
||||
}
|
||||
|
||||
private RequestCountersEventListener<EarlybirdResponse> updatePerTypePerClientCountersListener(
|
||||
EarlybirdRequestContext earlybirdRequestContext) {
|
||||
EarlybirdRequestType requestType = earlybirdRequestContext.getEarlybirdRequestType();
|
||||
LoadingCache<String, RequestCounters> perClientCounters =
|
||||
perTypePerClientCounters.get(requestType);
|
||||
Preconditions.checkNotNull(perClientCounters);
|
||||
|
||||
String clientId = ClientIdUtil.formatFinagleClientIdAndClientId(
|
||||
FinagleUtil.getFinagleClientName(),
|
||||
ClientIdUtil.getClientIdFromRequest(earlybirdRequestContext.getRequest()));
|
||||
RequestCounters clientCounters = perClientCounters.getUnchecked(clientId);
|
||||
Preconditions.checkNotNull(clientCounters);
|
||||
|
||||
return new RequestCountersEventListener<>(
|
||||
clientCounters, Clock.SYSTEM_CLOCK, EarlybirdSuccessfulResponseHandler.INSTANCE);
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,50 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.search.common.metrics.SearchCounter;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
||||
import com.twitter.util.Future;
|
||||
import com.twitter.util.FutureEventListener;
|
||||
|
||||
public class ResponseCodeStatFilter
|
||||
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
||||
|
||||
private final Map<EarlybirdResponseCode, SearchCounter> responseCodeCounters;
|
||||
|
||||
/**
|
||||
* Create ResponseCodeStatFilter
|
||||
*/
|
||||
public ResponseCodeStatFilter() {
|
||||
responseCodeCounters = Maps.newEnumMap(EarlybirdResponseCode.class);
|
||||
for (EarlybirdResponseCode code : EarlybirdResponseCode.values()) {
|
||||
SearchCounter stat = SearchCounter.export("response_code_" + code.name().toLowerCase());
|
||||
responseCodeCounters.put(code, stat);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(
|
||||
final EarlybirdRequest request,
|
||||
final Service<EarlybirdRequest, EarlybirdResponse> service) {
|
||||
|
||||
return service.apply(request).addEventListener(
|
||||
new FutureEventListener<EarlybirdResponse>() {
|
||||
|
||||
@Override
|
||||
public void onSuccess(final EarlybirdResponse response) {
|
||||
responseCodeCounters.get(response.getResponseCode()).increment();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFailure(final Throwable cause) { }
|
||||
});
|
||||
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,114 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.NavigableMap;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Singleton;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.collect.ImmutableSortedMap;
|
||||
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.search.common.metrics.SearchCounter;
|
||||
import com.twitter.search.common.metrics.SearchCustomGauge;
|
||||
import com.twitter.search.earlybird.config.TierInfo;
|
||||
import com.twitter.search.earlybird.config.TierInfoSource;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.snowflake.id.SnowflakeId;
|
||||
import com.twitter.util.Future;
|
||||
import com.twitter.util.FutureEventListener;
|
||||
|
||||
/**
|
||||
* A filter to count the tier to which the oldest tweet in the results belong.
|
||||
*/
|
||||
@Singleton
|
||||
public class ResultTierCountFilter
|
||||
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
||||
|
||||
private static final String COUNTER_PREFIX = "result_tier_count";
|
||||
private final long firstTweetTimeSinceEpochSec;
|
||||
private final NavigableMap<Long, SearchCounter> tierBuckets;
|
||||
private final SearchCounter allCounter = SearchCounter.export(COUNTER_PREFIX + "_all");
|
||||
private final SearchCounter noResultsCounter =
|
||||
SearchCounter.export(COUNTER_PREFIX + "_no_results");
|
||||
|
||||
@Inject
|
||||
@SuppressWarnings("unused")
|
||||
ResultTierCountFilter(TierInfoSource tierInfoSource) {
|
||||
List<TierInfo> tierInfos = tierInfoSource.getTierInformation();
|
||||
tierInfos.sort(Comparator.comparing(TierInfo::getDataStartDate));
|
||||
|
||||
firstTweetTimeSinceEpochSec = tierInfos.get(0).getServingRangeSinceTimeSecondsFromEpoch();
|
||||
|
||||
ImmutableSortedMap.Builder<Long, SearchCounter> builder = ImmutableSortedMap.naturalOrder();
|
||||
Collections.reverse(tierInfos);
|
||||
|
||||
for (TierInfo tierInfo : tierInfos) {
|
||||
SearchCounter searchCounter = SearchCounter.export(
|
||||
String.format("%s_%s", COUNTER_PREFIX, tierInfo.getTierName()));
|
||||
builder.put(tierInfo.getServingRangeSinceTimeSecondsFromEpoch(), searchCounter);
|
||||
|
||||
// export cumulative metrics to sum from the latest to a lower tier
|
||||
Collection<SearchCounter> counters = builder.build().values();
|
||||
SearchCustomGauge.export(
|
||||
String.format("%s_down_to_%s", COUNTER_PREFIX, tierInfo.getTierName()),
|
||||
() -> counters.stream()
|
||||
.mapToLong(SearchCounter::get)
|
||||
.sum());
|
||||
}
|
||||
|
||||
tierBuckets = builder.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(
|
||||
EarlybirdRequestContext context,
|
||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
||||
return service.apply(context).addEventListener(
|
||||
new FutureEventListener<EarlybirdResponse>() {
|
||||
@Override
|
||||
public void onFailure(Throwable cause) {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onSuccess(EarlybirdResponse response) {
|
||||
record(response);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
void record(EarlybirdResponse response) {
|
||||
if (response.isSetSearchResults()) {
|
||||
long minResultsStatusId = response.getSearchResults().getResults().stream()
|
||||
.mapToLong(ThriftSearchResult::getId)
|
||||
.min()
|
||||
.orElse(-1);
|
||||
getBucket(minResultsStatusId).increment();
|
||||
}
|
||||
allCounter.increment();
|
||||
}
|
||||
|
||||
private SearchCounter getBucket(long statusId) {
|
||||
if (statusId < 0) {
|
||||
return noResultsCounter;
|
||||
}
|
||||
|
||||
// If non-negative statusId is not a SnowflakeId, the tweet must have been created before
|
||||
// Twepoch (2010-11-04T01:42:54Z) and thus belongs to full1.
|
||||
long timeSinceEpochSec = firstTweetTimeSinceEpochSec;
|
||||
if (SnowflakeId.isSnowflakeId(statusId)) {
|
||||
timeSinceEpochSec = SnowflakeId.timeFromId(statusId).inSeconds();
|
||||
}
|
||||
|
||||
return tierBuckets.floorEntry(timeSinceEpochSec).getValue();
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,59 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.search.common.root.ScatterGatherService;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
||||
import com.twitter.search.earlybird.thrift.ExperimentCluster;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
public class ScatterGatherWithExperimentRedirectsService
|
||||
extends Service<EarlybirdRequestContext, EarlybirdResponse> {
|
||||
private final Service<EarlybirdRequestContext, EarlybirdResponse>
|
||||
controlScatterGatherService;
|
||||
|
||||
private final Map<ExperimentCluster,
|
||||
ScatterGatherService<EarlybirdRequestContext, EarlybirdResponse>>
|
||||
experimentScatterGatherServices;
|
||||
|
||||
private static final Logger LOG =
|
||||
LoggerFactory.getLogger(ScatterGatherWithExperimentRedirectsService.class);
|
||||
|
||||
public ScatterGatherWithExperimentRedirectsService(
|
||||
Service<EarlybirdRequestContext, EarlybirdResponse> controlScatterGatherService,
|
||||
Map<ExperimentCluster,
|
||||
ScatterGatherService<EarlybirdRequestContext, EarlybirdResponse>>
|
||||
experimentScatterGatherServices
|
||||
) {
|
||||
this.controlScatterGatherService = controlScatterGatherService;
|
||||
this.experimentScatterGatherServices = experimentScatterGatherServices;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(EarlybirdRequestContext request) {
|
||||
if (request.getRequest().isSetExperimentClusterToUse()) {
|
||||
ExperimentCluster cluster = request.getRequest().getExperimentClusterToUse();
|
||||
|
||||
if (!experimentScatterGatherServices.containsKey(cluster)) {
|
||||
String error = String.format(
|
||||
"Received invalid experiment cluster: %s", cluster.name());
|
||||
|
||||
LOG.error("{} Request: {}", error, request.getRequest());
|
||||
|
||||
return Future.value(new EarlybirdResponse()
|
||||
.setResponseCode(EarlybirdResponseCode.CLIENT_ERROR)
|
||||
.setDebugString(error));
|
||||
}
|
||||
|
||||
return experimentScatterGatherServices.get(cluster).apply(request);
|
||||
}
|
||||
|
||||
return controlScatterGatherService.apply(request);
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,43 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
import scala.Option;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.finagle.context.Contexts;
|
||||
import com.twitter.search.common.metrics.SearchCounter;
|
||||
import com.twitter.search.common.root.SearchPayloadSizeFilter;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
/**
|
||||
* A filter that sets the clientId in the local context, to be usd later by SearchPayloadSizeFilter.
|
||||
*/
|
||||
public class SearchPayloadSizeLocalContextFilter
|
||||
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
||||
private static final SearchCounter CLIENT_ID_CONTEXT_KEY_NOT_SET_COUNTER = SearchCounter.export(
|
||||
"search_payload_size_local_context_filter_client_id_context_key_not_set");
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(EarlybirdRequest request,
|
||||
Service<EarlybirdRequest, EarlybirdResponse> service) {
|
||||
// In production, the SearchPayloadSizeFilter.CLIENT_ID_CONTEXT_KEY should always be set
|
||||
// (by ThriftServer). However, it's not set in tests, because tests do not start a ThriftServer.
|
||||
Option<AtomicReference<String>> clientIdOption =
|
||||
Contexts.local().get(SearchPayloadSizeFilter.CLIENT_ID_CONTEXT_KEY);
|
||||
if (clientIdOption.isDefined()) {
|
||||
AtomicReference<String> clientIdReference = clientIdOption.get();
|
||||
Preconditions.checkArgument(clientIdReference.get() == null);
|
||||
clientIdReference.set(request.getClientId());
|
||||
} else {
|
||||
CLIENT_ID_CONTEXT_KEY_NOT_SET_COUNTER.increment();
|
||||
}
|
||||
|
||||
return service.apply(request);
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,140 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
import com.google.common.base.Joiner;
|
||||
|
||||
import org.apache.thrift.TException;
|
||||
import org.slf4j.Logger;
|
||||
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.search.common.metrics.SearchCounter;
|
||||
import com.twitter.search.common.util.thrift.ThriftUtils;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.util.Future;
|
||||
import com.twitter.util.FutureEventListener;
|
||||
|
||||
/**
|
||||
* The general framework for earlybird root to track sensitive results.
|
||||
*/
|
||||
public abstract class SensitiveResultsTrackingFilter
|
||||
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
||||
|
||||
/**
|
||||
* The type name is used to distinguish different kinds of sensitive results in log.
|
||||
*/
|
||||
private final String typeName;
|
||||
|
||||
/**
|
||||
* The mark is to control whether to log expensive information.
|
||||
*/
|
||||
private final boolean logDetails;
|
||||
|
||||
/**
|
||||
* Constructor helps distinguish different sensitive content trackers.
|
||||
* @param typeName The sensitive content's name (e.g. nullcast)
|
||||
* @param logDetails Whether to log details such as serialized requests and responses
|
||||
*/
|
||||
public SensitiveResultsTrackingFilter(final String typeName, boolean logDetails) {
|
||||
super();
|
||||
this.typeName = typeName;
|
||||
this.logDetails = logDetails;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the LOG that the sensitive results can write to.
|
||||
*/
|
||||
protected abstract Logger getLogger();
|
||||
|
||||
/**
|
||||
* The counter which counts the number of queries with sensitive results.
|
||||
*/
|
||||
protected abstract SearchCounter getSensitiveQueryCounter();
|
||||
|
||||
/**
|
||||
* The counter which counts the number of sensitive results.
|
||||
*/
|
||||
protected abstract SearchCounter getSensitiveResultsCounter();
|
||||
|
||||
/**
|
||||
* The method defines how the sensitive results are identified.
|
||||
*/
|
||||
protected abstract Set<Long> getSensitiveResults(
|
||||
EarlybirdRequestContext requestContext,
|
||||
EarlybirdResponse earlybirdResponse) throws Exception;
|
||||
|
||||
/**
|
||||
* Get a set of tweets which should be exclude from the sensitive results set.
|
||||
*/
|
||||
protected abstract Set<Long> getExceptedResults(EarlybirdRequestContext requestContext);
|
||||
|
||||
@Override
|
||||
public final Future<EarlybirdResponse> apply(
|
||||
final EarlybirdRequestContext requestContext,
|
||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
||||
Future<EarlybirdResponse> response = service.apply(requestContext);
|
||||
|
||||
response.addEventListener(new FutureEventListener<EarlybirdResponse>() {
|
||||
@Override
|
||||
public void onSuccess(EarlybirdResponse earlybirdResponse) {
|
||||
try {
|
||||
if (earlybirdResponse.responseCode == EarlybirdResponseCode.SUCCESS
|
||||
&& earlybirdResponse.isSetSearchResults()
|
||||
&& requestContext.getParsedQuery() != null) {
|
||||
Set<Long> statusIds = getSensitiveResults(requestContext, earlybirdResponse);
|
||||
Set<Long> exceptedIds = getExceptedResults(requestContext);
|
||||
statusIds.removeAll(exceptedIds);
|
||||
|
||||
if (statusIds.size() > 0) {
|
||||
getSensitiveQueryCounter().increment();
|
||||
getSensitiveResultsCounter().add(statusIds.size());
|
||||
logContent(requestContext, earlybirdResponse, statusIds);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
getLogger().error("Caught exception while trying to log sensitive results for query: {}",
|
||||
requestContext.getParsedQuery().serialize(), e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFailure(Throwable cause) {
|
||||
}
|
||||
});
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
private void logContent(
|
||||
final EarlybirdRequestContext requestContext,
|
||||
final EarlybirdResponse earlybirdResponse,
|
||||
final Set<Long> statusIds) {
|
||||
|
||||
if (logDetails) {
|
||||
String base64Request;
|
||||
try {
|
||||
base64Request = ThriftUtils.toBase64EncodedString(requestContext.getRequest());
|
||||
} catch (TException e) {
|
||||
base64Request = "Failed to parse base 64 request";
|
||||
}
|
||||
getLogger().error("Found " + typeName
|
||||
+ ": {} | "
|
||||
+ "parsedQuery: {} | "
|
||||
+ "request: {} | "
|
||||
+ "base 64 request: {} | "
|
||||
+ "response: {}",
|
||||
Joiner.on(",").join(statusIds),
|
||||
requestContext.getParsedQuery().serialize(),
|
||||
requestContext.getRequest(),
|
||||
base64Request,
|
||||
earlybirdResponse);
|
||||
} else {
|
||||
getLogger().error("Found " + typeName + ": {} for parsedQuery {}",
|
||||
Joiner.on(",").join(statusIds),
|
||||
requestContext.getParsedQuery().serialize());
|
||||
}
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,27 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.search.common.schema.earlybird.EarlybirdCluster;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
/** A per-service filter for handling exceptions. */
|
||||
public class ServiceExceptionHandlingFilter
|
||||
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
||||
private final EarlybirdResponseExceptionHandler exceptionHandler;
|
||||
|
||||
/** Creates a new ServiceExceptionHandlingFilter instance. */
|
||||
public ServiceExceptionHandlingFilter(EarlybirdCluster cluster) {
|
||||
this.exceptionHandler = new EarlybirdResponseExceptionHandler(cluster.getNameForStats());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(
|
||||
EarlybirdRequestContext requestContext,
|
||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
||||
return exceptionHandler.handleException(
|
||||
requestContext.getRequest(), service.apply(requestContext));
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,81 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.search.common.schema.earlybird.EarlybirdCluster;
|
||||
import com.twitter.search.common.util.earlybird.EarlybirdResponseMergeUtil;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
|
||||
import com.twitter.search.earlybird_root.validators.FacetsResponseValidator;
|
||||
import com.twitter.search.earlybird_root.validators.PassThroughResponseValidator;
|
||||
import com.twitter.search.earlybird_root.validators.ServiceResponseValidator;
|
||||
import com.twitter.search.earlybird_root.validators.TermStatsResultsValidator;
|
||||
import com.twitter.search.earlybird_root.validators.TopTweetsResultsValidator;
|
||||
import com.twitter.util.Function;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
/**
|
||||
* Filter responsible for handling invalid response returned by downstream services, and
|
||||
* translating them into EarlybirdResponseExceptions.
|
||||
*/
|
||||
public class ServiceResponseValidationFilter
|
||||
extends SimpleFilter<EarlybirdRequestContext, EarlybirdResponse> {
|
||||
|
||||
private final Map<EarlybirdRequestType, ServiceResponseValidator<EarlybirdResponse>>
|
||||
requestTypeToResponseValidators = new HashMap<>();
|
||||
private final EarlybirdCluster cluster;
|
||||
|
||||
/**
|
||||
* Creates a new filter for handling invalid response
|
||||
*/
|
||||
public ServiceResponseValidationFilter(EarlybirdCluster cluster) {
|
||||
this.cluster = cluster;
|
||||
|
||||
ServiceResponseValidator<EarlybirdResponse> passThroughValidator =
|
||||
new PassThroughResponseValidator();
|
||||
|
||||
requestTypeToResponseValidators
|
||||
.put(EarlybirdRequestType.FACETS, new FacetsResponseValidator(cluster));
|
||||
requestTypeToResponseValidators
|
||||
.put(EarlybirdRequestType.RECENCY, passThroughValidator);
|
||||
requestTypeToResponseValidators
|
||||
.put(EarlybirdRequestType.RELEVANCE, passThroughValidator);
|
||||
requestTypeToResponseValidators
|
||||
.put(EarlybirdRequestType.STRICT_RECENCY, passThroughValidator);
|
||||
requestTypeToResponseValidators
|
||||
.put(EarlybirdRequestType.TERM_STATS, new TermStatsResultsValidator(cluster));
|
||||
requestTypeToResponseValidators
|
||||
.put(EarlybirdRequestType.TOP_TWEETS, new TopTweetsResultsValidator(cluster));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(
|
||||
final EarlybirdRequestContext requestContext,
|
||||
Service<EarlybirdRequestContext, EarlybirdResponse> service) {
|
||||
return service.apply(requestContext).flatMap(
|
||||
new Function<EarlybirdResponse, Future<EarlybirdResponse>>() {
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(EarlybirdResponse response) {
|
||||
if (response == null) {
|
||||
return Future.exception(new IllegalStateException(
|
||||
cluster + " returned null response"));
|
||||
}
|
||||
|
||||
if (response.getResponseCode() == EarlybirdResponseCode.SUCCESS) {
|
||||
return requestTypeToResponseValidators
|
||||
.get(requestContext.getEarlybirdRequestType())
|
||||
.validate(response);
|
||||
}
|
||||
|
||||
return Future.value(EarlybirdResponseMergeUtil.transformInvalidResponse(
|
||||
response,
|
||||
String.format("Failure from %s (%s)", cluster, response.getResponseCode())));
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,12 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import com.twitter.search.earlybird.config.ServingRange;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
|
||||
public interface ServingRangeProvider {
|
||||
/**
|
||||
* Get a ServingRange implementation.
|
||||
* Usually backed by either TierInfoWrapper or RootClusterBoundaryInfo.
|
||||
*/
|
||||
ServingRange getServingRange(EarlybirdRequestContext requestContext, boolean useBoundaryOverride);
|
||||
}
|
Binary file not shown.
@ -1,30 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.search.earlybird.common.ClientIdUtil;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
/**
|
||||
* A filter that will set the clientId of the request to the strato HttpEndpoint Attribution.
|
||||
* <p>
|
||||
* If the clientId is already set to something non-null then that value is used.
|
||||
* If the clientId is null but Attribution.httpEndpoint() contains a value it will be set as
|
||||
* the clientId.
|
||||
*/
|
||||
public class StratoAttributionClientIdFilter extends
|
||||
SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(
|
||||
EarlybirdRequest request, Service<EarlybirdRequest, EarlybirdResponse> service
|
||||
) {
|
||||
if (request.getClientId() == null) {
|
||||
ClientIdUtil.getClientIdFromHttpEndpointAttribution().ifPresent(request::setClientId);
|
||||
}
|
||||
|
||||
return service.apply(request);
|
||||
}
|
||||
}
|
||||
|
Binary file not shown.
@ -1,24 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
/** A top level filter for handling exceptions. */
|
||||
public class TopLevelExceptionHandlingFilter
|
||||
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
||||
private final EarlybirdResponseExceptionHandler exceptionHandler;
|
||||
|
||||
/** Creates a new TopLevelExceptionHandlingFilter instance. */
|
||||
public TopLevelExceptionHandlingFilter() {
|
||||
this.exceptionHandler = new EarlybirdResponseExceptionHandler("top_level");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(EarlybirdRequest request,
|
||||
Service<EarlybirdRequest, EarlybirdResponse> service) {
|
||||
return exceptionHandler.handleException(request, service.apply(request));
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,30 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestUtil;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
/**
|
||||
* A filter that unsets some request fields that make sense only on the SuperRoot, before sending
|
||||
* them to the individual roots.
|
||||
*/
|
||||
public class UnsetSuperRootFieldsFilter extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
||||
private final boolean unsetFollowedUserIds;
|
||||
|
||||
public UnsetSuperRootFieldsFilter() {
|
||||
this(true);
|
||||
}
|
||||
|
||||
public UnsetSuperRootFieldsFilter(boolean unsetFollowedUserIds) {
|
||||
this.unsetFollowedUserIds = unsetFollowedUserIds;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(EarlybirdRequest request,
|
||||
Service<EarlybirdRequest, EarlybirdResponse> service) {
|
||||
return service.apply(EarlybirdRequestUtil.unsetSuperRootFields(request, unsetFollowedUserIds));
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,44 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.filters;
|
||||
|
||||
import javax.inject.Inject;
|
||||
|
||||
import com.twitter.finagle.Service;
|
||||
import com.twitter.finagle.SimpleFilter;
|
||||
import com.twitter.search.common.decider.SearchDecider;
|
||||
import com.twitter.search.common.metrics.SearchRateCounter;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
public class VeryRecentTweetsFilter
|
||||
extends SimpleFilter<EarlybirdRequest, EarlybirdResponse> {
|
||||
private static final String DECIDER_KEY = "enable_very_recent_tweets";
|
||||
private static final SearchRateCounter VERY_RECENT_TWEETS_NOT_MODIFIED =
|
||||
SearchRateCounter.export("very_recent_tweets_not_modified");
|
||||
private static final SearchRateCounter VERY_RECENT_TWEETS_ENABLED =
|
||||
SearchRateCounter.export("very_recent_tweets_enabled");
|
||||
|
||||
private final SearchDecider decider;
|
||||
|
||||
@Inject
|
||||
public VeryRecentTweetsFilter(
|
||||
SearchDecider decider
|
||||
) {
|
||||
this.decider = decider;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Future<EarlybirdResponse> apply(
|
||||
EarlybirdRequest request,
|
||||
Service<EarlybirdRequest, EarlybirdResponse> service
|
||||
) {
|
||||
if (decider.isAvailable(DECIDER_KEY)) {
|
||||
VERY_RECENT_TWEETS_ENABLED.increment();
|
||||
request.setSkipVeryRecentTweets(false);
|
||||
} else {
|
||||
VERY_RECENT_TWEETS_NOT_MODIFIED.increment();
|
||||
}
|
||||
|
||||
return service.apply(request);
|
||||
}
|
||||
}
|
Binary file not shown.
Before Width: | Height: | Size: 60 KiB |
Binary file not shown.
@ -1,176 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.mergers;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import com.twitter.search.common.query.thriftjava.EarlyTerminationInfo;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
||||
import com.twitter.search.earlybird.thrift.TierResponse;
|
||||
|
||||
/**
|
||||
* Collection of EarlybirdResponses and associated stats to be merged.
|
||||
*/
|
||||
public class AccumulatedResponses {
|
||||
// The list of the successful responses from all earlybird futures. This does not include empty
|
||||
// responses resulted from null requests.
|
||||
private final List<EarlybirdResponse> successResponses;
|
||||
// The list of the unsuccessful responses from all earlybird futures.
|
||||
private final List<EarlybirdResponse> errorResponses;
|
||||
// the list of max statusIds seen in each earlybird.
|
||||
private final List<Long> maxIds;
|
||||
// the list of min statusIds seen in each earlybird.
|
||||
private final List<Long> minIds;
|
||||
|
||||
private final EarlyTerminationInfo mergedEarlyTerminationInfo;
|
||||
private final boolean isMergingAcrossTiers;
|
||||
private final PartitionCounts partitionCounts;
|
||||
private final int numSearchedSegments;
|
||||
|
||||
public static final class PartitionCounts {
|
||||
private final int numPartitions;
|
||||
private final int numSuccessfulPartitions;
|
||||
private final List<TierResponse> perTierResponse;
|
||||
|
||||
public PartitionCounts(int numPartitions, int numSuccessfulPartitions, List<TierResponse>
|
||||
perTierResponse) {
|
||||
this.numPartitions = numPartitions;
|
||||
this.numSuccessfulPartitions = numSuccessfulPartitions;
|
||||
this.perTierResponse = perTierResponse;
|
||||
}
|
||||
|
||||
public int getNumPartitions() {
|
||||
return numPartitions;
|
||||
}
|
||||
|
||||
public int getNumSuccessfulPartitions() {
|
||||
return numSuccessfulPartitions;
|
||||
}
|
||||
|
||||
public List<TierResponse> getPerTierResponse() {
|
||||
return perTierResponse;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create AccumulatedResponses
|
||||
*/
|
||||
public AccumulatedResponses(List<EarlybirdResponse> successResponses,
|
||||
List<EarlybirdResponse> errorResponses,
|
||||
List<Long> maxIds,
|
||||
List<Long> minIds,
|
||||
EarlyTerminationInfo mergedEarlyTerminationInfo,
|
||||
boolean isMergingAcrossTiers,
|
||||
PartitionCounts partitionCounts,
|
||||
int numSearchedSegments) {
|
||||
this.successResponses = successResponses;
|
||||
this.errorResponses = errorResponses;
|
||||
this.maxIds = maxIds;
|
||||
this.minIds = minIds;
|
||||
this.mergedEarlyTerminationInfo = mergedEarlyTerminationInfo;
|
||||
this.isMergingAcrossTiers = isMergingAcrossTiers;
|
||||
this.partitionCounts = partitionCounts;
|
||||
this.numSearchedSegments = numSearchedSegments;
|
||||
}
|
||||
|
||||
public List<EarlybirdResponse> getSuccessResponses() {
|
||||
return successResponses;
|
||||
}
|
||||
|
||||
public List<EarlybirdResponse> getErrorResponses() {
|
||||
return errorResponses;
|
||||
}
|
||||
|
||||
public List<Long> getMaxIds() {
|
||||
return maxIds;
|
||||
}
|
||||
|
||||
public List<Long> getMinIds() {
|
||||
return minIds;
|
||||
}
|
||||
|
||||
public EarlyTerminationInfo getMergedEarlyTerminationInfo() {
|
||||
return mergedEarlyTerminationInfo;
|
||||
}
|
||||
|
||||
public boolean foundError() {
|
||||
return !errorResponses.isEmpty();
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to return a merged EarlybirdResponse that propagates as much information from the error
|
||||
* responses as possible.
|
||||
*
|
||||
* If all error responses have the same error response code, the merged response will have the
|
||||
* same error response code, and the debugString/debugInfo on the merged response will be set to
|
||||
* the debugString/debugInfo of one of the merged responses.
|
||||
*
|
||||
* If the error responses have at least 2 different response codes, TRANSIENT_ERROR will be set
|
||||
* on the merged response. Also, we will look for the most common error response code, and will
|
||||
* propagate the debugString/debugInfo from an error response with that response code.
|
||||
*/
|
||||
public EarlybirdResponse getMergedErrorResponse() {
|
||||
Preconditions.checkState(!errorResponses.isEmpty());
|
||||
|
||||
// Find a response that has the most common error response code.
|
||||
int maxCount = 0;
|
||||
EarlybirdResponse errorResponseWithMostCommonErrorResponseCode = null;
|
||||
Map<EarlybirdResponseCode, Integer> responseCodeCounts = Maps.newHashMap();
|
||||
for (EarlybirdResponse errorResponse : errorResponses) {
|
||||
EarlybirdResponseCode responseCode = errorResponse.getResponseCode();
|
||||
Integer responseCodeCount = responseCodeCounts.get(responseCode);
|
||||
if (responseCodeCount == null) {
|
||||
responseCodeCount = 0;
|
||||
}
|
||||
++responseCodeCount;
|
||||
responseCodeCounts.put(responseCode, responseCodeCount);
|
||||
if (responseCodeCount > maxCount) {
|
||||
errorResponseWithMostCommonErrorResponseCode = errorResponse;
|
||||
}
|
||||
}
|
||||
|
||||
// If all error responses have the same response code, set it on the merged response.
|
||||
// Otherwise, set TRANSIENT_ERROR on the merged response.
|
||||
EarlybirdResponseCode mergedResponseCode = EarlybirdResponseCode.TRANSIENT_ERROR;
|
||||
if (responseCodeCounts.size() == 1) {
|
||||
mergedResponseCode = responseCodeCounts.keySet().iterator().next();
|
||||
}
|
||||
|
||||
EarlybirdResponse mergedResponse = new EarlybirdResponse()
|
||||
.setResponseCode(mergedResponseCode);
|
||||
|
||||
// Propagate the debugString/debugInfo of the selected error response to the merged response.
|
||||
Preconditions.checkNotNull(errorResponseWithMostCommonErrorResponseCode);
|
||||
if (errorResponseWithMostCommonErrorResponseCode.isSetDebugString()) {
|
||||
mergedResponse.setDebugString(errorResponseWithMostCommonErrorResponseCode.getDebugString());
|
||||
}
|
||||
if (errorResponseWithMostCommonErrorResponseCode.isSetDebugInfo()) {
|
||||
mergedResponse.setDebugInfo(errorResponseWithMostCommonErrorResponseCode.getDebugInfo());
|
||||
}
|
||||
|
||||
// Set the numPartitions and numPartitionsSucceeded on the mergedResponse
|
||||
mergedResponse.setNumPartitions(partitionCounts.getNumPartitions());
|
||||
mergedResponse.setNumSuccessfulPartitions(partitionCounts.getNumSuccessfulPartitions());
|
||||
|
||||
return mergedResponse;
|
||||
}
|
||||
|
||||
public boolean isMergingAcrossTiers() {
|
||||
return isMergingAcrossTiers;
|
||||
}
|
||||
|
||||
public boolean isMergingPartitionsWithinATier() {
|
||||
return !isMergingAcrossTiers;
|
||||
}
|
||||
|
||||
public PartitionCounts getPartitionCounts() {
|
||||
return partitionCounts;
|
||||
}
|
||||
|
||||
public int getNumSearchedSegments() {
|
||||
return numSearchedSegments;
|
||||
}
|
||||
}
|
@ -1,26 +0,0 @@
|
||||
java_library(
|
||||
sources = ["*.java"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/google/guava",
|
||||
"3rdparty/jvm/log4j",
|
||||
"3rdparty/jvm/org/slf4j:slf4j-api",
|
||||
"src/java/com/twitter/common/collections",
|
||||
"src/java/com/twitter/common/quantity",
|
||||
"src/java/com/twitter/search/common/futures",
|
||||
"src/java/com/twitter/search/common/logging",
|
||||
"src/java/com/twitter/search/common/metrics",
|
||||
"src/java/com/twitter/search/common/partitioning/snowflakeparser",
|
||||
"src/java/com/twitter/search/common/relevance:utils",
|
||||
"src/java/com/twitter/search/common/schema/earlybird",
|
||||
"src/java/com/twitter/search/common/search",
|
||||
"src/java/com/twitter/search/common/util:finagleutil",
|
||||
"src/java/com/twitter/search/common/util/earlybird",
|
||||
"src/java/com/twitter/search/earlybird_root/collectors",
|
||||
"src/java/com/twitter/search/earlybird_root/common",
|
||||
"src/java/com/twitter/search/queryparser/query:core-query-nodes",
|
||||
"src/thrift/com/twitter/search:earlybird-java",
|
||||
"src/thrift/com/twitter/search/common:query-java",
|
||||
],
|
||||
)
|
BIN
src/java/com/twitter/search/earlybird_root/mergers/BUILD.docx
Normal file
BIN
src/java/com/twitter/search/earlybird_root/mergers/BUILD.docx
Normal file
Binary file not shown.
Binary file not shown.
@ -1,9 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.mergers;
|
||||
|
||||
public interface EarlyTerminateTierMergePredicate {
|
||||
/**
|
||||
* Do we have enough results so far that we can early terminate and not continue onto next tier?
|
||||
*/
|
||||
boolean shouldEarlyTerminateTierMerge(int totalResultsFromSuccessfulShards,
|
||||
boolean foundEarlyTermination);
|
||||
}
|
Binary file not shown.
@ -1,176 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.mergers;
|
||||
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Function;
|
||||
import com.google.common.base.Joiner;
|
||||
import com.google.common.collect.Iterables;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.twitter.search.common.logging.DebugMessageBuilder;
|
||||
import com.twitter.search.common.metrics.SearchCounter;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
||||
|
||||
/**
|
||||
* Collects debug messages to attach to EarlybirdResponse
|
||||
*/
|
||||
class EarlybirdResponseDebugMessageBuilder {
|
||||
private static final Logger LOG =
|
||||
LoggerFactory.getLogger(EarlybirdResponseDebugMessageBuilder.class);
|
||||
|
||||
private static final Logger TOO_MANY_FAILED_PARTITIONS_LOG =
|
||||
LoggerFactory.getLogger(String.format("%s_too_many_failed_partitions",
|
||||
EarlybirdResponseDebugMessageBuilder.class.getName()));
|
||||
|
||||
@VisibleForTesting
|
||||
protected final SearchCounter insufficientValidResponseCounter =
|
||||
SearchCounter.export("insufficient_valid_partition_responses_count");
|
||||
@VisibleForTesting
|
||||
protected final SearchCounter validPartitionResponseCounter =
|
||||
SearchCounter.export("valid_partition_response_count");
|
||||
|
||||
// the combined debug string for all earlybird responses
|
||||
private final StringBuilder debugString;
|
||||
/**
|
||||
* A message builder backed by the same {@link #debugString} above.
|
||||
*/
|
||||
private final DebugMessageBuilder debugMessageBuilder;
|
||||
|
||||
private static final Joiner JOINER = Joiner.on(", ");
|
||||
|
||||
EarlybirdResponseDebugMessageBuilder(EarlybirdRequest request) {
|
||||
this(getDebugLevel(request));
|
||||
}
|
||||
|
||||
EarlybirdResponseDebugMessageBuilder(DebugMessageBuilder.Level level) {
|
||||
this.debugString = new StringBuilder();
|
||||
this.debugMessageBuilder = new DebugMessageBuilder(debugString, level);
|
||||
}
|
||||
|
||||
private static DebugMessageBuilder.Level getDebugLevel(EarlybirdRequest request) {
|
||||
if (request.isSetDebugMode() && request.getDebugMode() > 0) {
|
||||
return DebugMessageBuilder.getDebugLevel(request.getDebugMode());
|
||||
} else if (request.isSetDebugOptions()) {
|
||||
return DebugMessageBuilder.Level.DEBUG_BASIC;
|
||||
} else {
|
||||
return DebugMessageBuilder.Level.DEBUG_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
protected boolean isDebugMode() {
|
||||
return debugMessageBuilder.getDebugLevel() > 0;
|
||||
}
|
||||
|
||||
void append(String msg) {
|
||||
debugString.append(msg);
|
||||
}
|
||||
|
||||
void debugAndLogWarning(String msg) {
|
||||
if (isDebugMode()) {
|
||||
debugString.append(msg).append('\n');
|
||||
}
|
||||
LOG.warn(msg);
|
||||
}
|
||||
|
||||
void debugDetailed(String format, Object... args) {
|
||||
debugAtLevel(DebugMessageBuilder.Level.DEBUG_DETAILED, format, args);
|
||||
}
|
||||
|
||||
void debugVerbose(String format, Object... args) {
|
||||
debugAtLevel(DebugMessageBuilder.Level.DEBUG_VERBOSE, format, args);
|
||||
}
|
||||
|
||||
void debugVerbose2(String format, Object... args) {
|
||||
debugAtLevel(DebugMessageBuilder.Level.DEBUG_VERBOSE_2, format, args);
|
||||
}
|
||||
|
||||
void debugAtLevel(DebugMessageBuilder.Level level, String format, Object... args) {
|
||||
boolean levelOK = debugMessageBuilder.isAtLeastLevel(level);
|
||||
if (levelOK || LOG.isDebugEnabled()) {
|
||||
// We check both modes here in order to build the formatted message only once.
|
||||
String message = String.format(format, args);
|
||||
|
||||
LOG.debug(message);
|
||||
|
||||
if (levelOK) {
|
||||
debugString.append(message).append('\n');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
String debugString() {
|
||||
return debugString.toString();
|
||||
}
|
||||
|
||||
DebugMessageBuilder getDebugMessageBuilder() {
|
||||
return debugMessageBuilder;
|
||||
}
|
||||
|
||||
void logBelowSuccessThreshold(ThriftSearchQuery searchQuery, int numSuccessResponses,
|
||||
int numPartitions, double successThreshold) {
|
||||
String rawQuery = (searchQuery != null && searchQuery.isSetRawQuery())
|
||||
? "[" + searchQuery.getRawQuery() + "]" : "null";
|
||||
String serializedQuery = (searchQuery != null && searchQuery.isSetSerializedQuery())
|
||||
? "[" + searchQuery.getSerializedQuery() + "]" : "null";
|
||||
// Not enough successful responses from partitions.
|
||||
String errorMessage = String.format(
|
||||
"Only %d valid responses returned out of %d partitions for raw query: %s"
|
||||
+ " serialized query: %s. Lower than threshold of %s",
|
||||
numSuccessResponses, numPartitions, rawQuery, serializedQuery, successThreshold);
|
||||
|
||||
TOO_MANY_FAILED_PARTITIONS_LOG.warn(errorMessage);
|
||||
|
||||
insufficientValidResponseCounter.increment();
|
||||
validPartitionResponseCounter.add(numSuccessResponses);
|
||||
debugString.append(errorMessage);
|
||||
}
|
||||
|
||||
|
||||
@VisibleForTesting
|
||||
void logResponseDebugInfo(EarlybirdRequest earlybirdRequest,
|
||||
String partitionTierName,
|
||||
EarlybirdResponse response) {
|
||||
if (response.isSetDebugString() && !response.getDebugString().isEmpty()) {
|
||||
debugString.append(String.format("Received response from [%s] with debug string [%s]",
|
||||
partitionTierName, response.getDebugString())).append("\n");
|
||||
}
|
||||
|
||||
if (!response.isSetResponseCode()) {
|
||||
debugAndLogWarning(String.format(
|
||||
"Received Earlybird null response code for query [%s] from [%s]",
|
||||
earlybirdRequest, partitionTierName));
|
||||
} else if (response.getResponseCode() != EarlybirdResponseCode.SUCCESS
|
||||
&& response.getResponseCode() != EarlybirdResponseCode.PARTITION_SKIPPED
|
||||
&& response.getResponseCode() != EarlybirdResponseCode.PARTITION_DISABLED
|
||||
&& response.getResponseCode() != EarlybirdResponseCode.TIER_SKIPPED) {
|
||||
debugAndLogWarning(String.format(
|
||||
"Received Earlybird response error [%s] for query [%s] from [%s]",
|
||||
response.getResponseCode(), earlybirdRequest, partitionTierName));
|
||||
}
|
||||
|
||||
if (debugMessageBuilder.isVerbose2()) {
|
||||
debugVerbose2("Earlybird [%s] returned response: %s", partitionTierName, response);
|
||||
} else if (debugMessageBuilder.isVerbose()) {
|
||||
if (response.isSetSearchResults() && response.getSearchResults().getResultsSize() > 0) {
|
||||
String ids = JOINER.join(Iterables.transform(
|
||||
response.getSearchResults().getResults(),
|
||||
new Function<ThriftSearchResult, Long>() {
|
||||
@Nullable
|
||||
@Override
|
||||
public Long apply(ThriftSearchResult result) {
|
||||
return result.getId();
|
||||
}
|
||||
}));
|
||||
debugVerbose("Earlybird [%s] returned TweetIDs: %s", partitionTierName, ids);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,604 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.mergers;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import scala.runtime.BoxedUnit;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Optional;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.twitter.search.common.metrics.SearchCounter;
|
||||
import com.twitter.search.common.metrics.SearchTimerStats;
|
||||
import com.twitter.search.common.schema.earlybird.EarlybirdCluster;
|
||||
import com.twitter.search.common.util.FinagleUtil;
|
||||
import com.twitter.search.common.util.earlybird.EarlybirdResponseMergeUtil;
|
||||
import com.twitter.search.common.util.earlybird.ResultsUtil;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdDebugInfo;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
||||
import com.twitter.search.earlybird_root.collectors.MultiwayMergeCollector;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestUtil;
|
||||
import com.twitter.util.Function;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
/**
|
||||
* Base EarlybirdResponseMerger containing basic logic to merge EarlybirdResponse objects
|
||||
*/
|
||||
public abstract class EarlybirdResponseMerger implements EarlyTerminateTierMergePredicate {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(EarlybirdResponseMerger.class);
|
||||
private static final Logger MIN_SEARCHED_STATUS_ID_LOGGER =
|
||||
LoggerFactory.getLogger("MinSearchedStatusIdLogger");
|
||||
|
||||
private static final SearchCounter NO_SEARCH_RESULT_COUNTER =
|
||||
SearchCounter.export("no_search_result_count");
|
||||
private static final SearchCounter NO_RESPONSES_TO_MERGE =
|
||||
SearchCounter.export("no_responses_to_merge");
|
||||
private static final SearchCounter EARLYBIRD_RESPONSE_NO_MORE_RESULTS =
|
||||
SearchCounter.export("merger_earlybird_response_no_more_results");
|
||||
private static final String PARTITION_OR_TIER_COUNTER_NAME_FORMAT =
|
||||
"merger_waited_for_response_from_%s_counter";
|
||||
private static final String PARTITION_OR_TIER_ERROR_COUNTER_NAME_FORMAT =
|
||||
"merger_num_error_responses_from_%s";
|
||||
private static final String PARTITION_OR_TIER_RESPONSE_CODE_COUNTER_NAME_FORMAT =
|
||||
"merger_earlybird_response_code_from_%s_%s";
|
||||
|
||||
protected final EarlybirdResponseDebugMessageBuilder responseMessageBuilder;
|
||||
protected final EarlybirdRequestContext requestContext;
|
||||
protected final ImmutableList<Future<EarlybirdResponse>> responses;
|
||||
protected AccumulatedResponses accumulatedResponses;
|
||||
|
||||
|
||||
@VisibleForTesting
|
||||
static final Map<EarlybirdRequestType, SearchCounter> MERGER_CREATED_STATS =
|
||||
perRequestTypeCounterImmutableMap("earlybird_response_merger_%s_created_count");
|
||||
|
||||
@VisibleForTesting
|
||||
static final Map<EarlybirdRequestType, SearchCounter>
|
||||
MIN_SEARCHED_STATUS_ID_LARGER_THAN_REQUEST_MAX_ID = perRequestTypeCounterImmutableMap(
|
||||
"merger_%s_min_searched_status_id_larger_than_request_max_id");
|
||||
|
||||
@VisibleForTesting
|
||||
static final Map<EarlybirdRequestType, SearchCounter>
|
||||
MIN_SEARCHED_STATUS_ID_LARGER_THAN_REQUEST_UNTIL_TIME = perRequestTypeCounterImmutableMap(
|
||||
"merger_%s_min_searched_status_id_larger_than_request_until_time");
|
||||
|
||||
private static Map<EarlybirdRequestType, SearchCounter> perRequestTypeCounterImmutableMap(
|
||||
String statPattern) {
|
||||
Map<EarlybirdRequestType, SearchCounter> statsMap = Maps.newEnumMap(EarlybirdRequestType.class);
|
||||
for (EarlybirdRequestType earlybirdRequestType : EarlybirdRequestType.values()) {
|
||||
String statName = String.format(statPattern, earlybirdRequestType.getNormalizedName());
|
||||
statsMap.put(earlybirdRequestType, SearchCounter.export(statName));
|
||||
}
|
||||
|
||||
return Maps.immutableEnumMap(statsMap);
|
||||
}
|
||||
|
||||
public static final com.google.common.base.Function<EarlybirdResponse, Map<Long, Integer>>
|
||||
HIT_COUNT_GETTER =
|
||||
response -> response.getSearchResults() == null
|
||||
? null
|
||||
: response.getSearchResults().getHitCounts();
|
||||
|
||||
private final ChainMerger chainMerger;
|
||||
|
||||
private class ChainMerger {
|
||||
private final EarlybirdRequestContext requestContext;
|
||||
private final ResponseAccumulator responseAccumulator;
|
||||
private final List<Future<EarlybirdResponse>> responses;
|
||||
private final EarlybirdResponseDebugMessageBuilder responseMessageBuilder;
|
||||
private int currentFutureIndex = -1;
|
||||
|
||||
public ChainMerger(EarlybirdRequestContext requestContext,
|
||||
ResponseAccumulator responseAccumulator,
|
||||
List<Future<EarlybirdResponse>> responses,
|
||||
EarlybirdResponseDebugMessageBuilder responseMessageBuilder) {
|
||||
this.requestContext = requestContext;
|
||||
this.responseAccumulator = responseAccumulator;
|
||||
this.responses = responses;
|
||||
this.responseMessageBuilder = responseMessageBuilder;
|
||||
}
|
||||
|
||||
public Future<EarlybirdResponse> merge() {
|
||||
// 'responseFutures' should always be sorted.
|
||||
// When returned by EarlybirdScatterGather service, the responses are sorted by partition ID.
|
||||
// When returned by EarlybirdChainedScatterGatherService,
|
||||
// responses are sorted descending by tier start date. See:
|
||||
// com.twitter.search.earlybird_root.EarlybirdChainedScatterGatherService.TIER_COMPARATOR.
|
||||
//
|
||||
// When merging responses from partitions, we want to wait for responses from all partitions,
|
||||
// so the order in which we wait for those results does not matter. When merging responses
|
||||
// from tiers, we want to wait for the response from the latest. If we don't need any more
|
||||
// responses to compute the final response, then we don't need to wait for the responses from
|
||||
// other tiers. If we cannot terminate early, then we want to wait for the responses from the
|
||||
// second tier, and so on.
|
||||
//
|
||||
// We do not need to have any explicit synchronization, because:
|
||||
// 1. The callbacks for future_i are set by the flatMap() callback on future_{i-1} (when
|
||||
// recursively calling merge() inside the flatMap()).
|
||||
// 2. Before setting the callbacks on future_i, future_{i-1}.flatMap() adds the response
|
||||
// results to mergeHelper.
|
||||
// 3. When the callbacks on future_i are set, the memory barrier between
|
||||
// thread_running_future_{i-1} and thread_running_future_i is crossed. This guarantees
|
||||
// that thread_running_future_i will see the updates to mergeHelper before it sees the
|
||||
// callbacks. (Or thread_running_future_{i-1} == thread_running_future_i, in which case
|
||||
// synchronization is not an issue, and correctness is guarateed by the order in which
|
||||
// things will run.)
|
||||
// 4. The same reasoning applies to currentFutureIndex.
|
||||
|
||||
++currentFutureIndex;
|
||||
if (currentFutureIndex >= responses.size()) {
|
||||
return Future.value(getTimedMergedResponse(responseAccumulator.getAccumulatedResults()));
|
||||
}
|
||||
|
||||
final String partitionTierName =
|
||||
responseAccumulator.getNameForLogging(currentFutureIndex, responses.size());
|
||||
final String nameForEarlybirdResponseCodeStats =
|
||||
responseAccumulator.getNameForEarlybirdResponseCodeStats(
|
||||
currentFutureIndex, responses.size());
|
||||
|
||||
// If a tier in the chain throws an exception, convert it to a null response, and let the
|
||||
// mergeHelper handle it appropriately.
|
||||
return responses.get(currentFutureIndex)
|
||||
.handle(Function.func(t -> {
|
||||
if (FinagleUtil.isCancelException(t)) {
|
||||
return new EarlybirdResponse()
|
||||
.setResponseCode(EarlybirdResponseCode.CLIENT_CANCEL_ERROR);
|
||||
} else if (FinagleUtil.isTimeoutException(t)) {
|
||||
return new EarlybirdResponse()
|
||||
.setResponseCode(EarlybirdResponseCode.SERVER_TIMEOUT_ERROR);
|
||||
} else {
|
||||
SearchCounter.export(
|
||||
String.format(PARTITION_OR_TIER_ERROR_COUNTER_NAME_FORMAT, partitionTierName))
|
||||
.increment();
|
||||
if (responseMessageBuilder.isDebugMode()) {
|
||||
responseMessageBuilder.debugAndLogWarning(
|
||||
String.format("[%s] failed, exception [%s]",
|
||||
partitionTierName, t.toString()));
|
||||
}
|
||||
LOG.warn("exception response from: " + partitionTierName, t);
|
||||
return new EarlybirdResponse()
|
||||
.setResponseCode(EarlybirdResponseCode.TRANSIENT_ERROR);
|
||||
}
|
||||
}))
|
||||
.flatMap(Function.func(response -> {
|
||||
Preconditions.checkNotNull(response);
|
||||
|
||||
SearchCounter.export(
|
||||
String.format(PARTITION_OR_TIER_RESPONSE_CODE_COUNTER_NAME_FORMAT,
|
||||
nameForEarlybirdResponseCodeStats,
|
||||
response.getResponseCode().name().toLowerCase()))
|
||||
.increment();
|
||||
|
||||
if ((response.getResponseCode() != EarlybirdResponseCode.PARTITION_SKIPPED)
|
||||
&& (response.getResponseCode() != EarlybirdResponseCode.TIER_SKIPPED)) {
|
||||
SearchCounter.export(
|
||||
String.format(PARTITION_OR_TIER_COUNTER_NAME_FORMAT, partitionTierName))
|
||||
.increment();
|
||||
}
|
||||
|
||||
if (response.getResponseCode() == EarlybirdResponseCode.CLIENT_CANCEL_ERROR) {
|
||||
// the request has been cancelled, no need to proceed
|
||||
return Future.value(response);
|
||||
}
|
||||
|
||||
rewriteResponseCodeIfSearchResultsMissing(requestContext, partitionTierName, response);
|
||||
responseMessageBuilder.logResponseDebugInfo(
|
||||
requestContext.getRequest(),
|
||||
partitionTierName,
|
||||
response);
|
||||
responseAccumulator.addResponse(
|
||||
responseMessageBuilder,
|
||||
requestContext.getRequest(),
|
||||
response);
|
||||
|
||||
if (responseAccumulator.shouldEarlyTerminateMerge(EarlybirdResponseMerger.this)) {
|
||||
return Future.value(getTimedMergedResponse(
|
||||
responseAccumulator.getAccumulatedResults()));
|
||||
}
|
||||
return merge();
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
private void rewriteResponseCodeIfSearchResultsMissing(
|
||||
EarlybirdRequestContext earlybirdRequestContext,
|
||||
String partitionTierName,
|
||||
EarlybirdResponse response) {
|
||||
// We always require searchResults to be set, even for term stats and facet requests.
|
||||
// This is because searchResults contains important info such as pagination cursors
|
||||
// like minSearchStatusId and minSearchedTimeSinceEpoch.
|
||||
// We expect all successful responses to have searchResults set.
|
||||
if (response.isSetResponseCode()
|
||||
&& response.getResponseCode() == EarlybirdResponseCode.SUCCESS
|
||||
&& response.getSearchResults() == null) {
|
||||
NO_SEARCH_RESULT_COUNTER.increment();
|
||||
LOG.warn("Received Earlybird response with null searchResults from [{}]"
|
||||
+ " EarlybirdRequest [{}] EarlybirdResponse [{}] ",
|
||||
partitionTierName, earlybirdRequestContext.getRequest(), response);
|
||||
response.setResponseCode(EarlybirdResponseCode.TRANSIENT_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a EarlybirdResponseMerger to merge responses from multiple partitions or tiers
|
||||
* based on mode.
|
||||
*/
|
||||
EarlybirdResponseMerger(EarlybirdRequestContext requestContext,
|
||||
List<Future<EarlybirdResponse>> responses,
|
||||
ResponseAccumulator responseAccumulator) {
|
||||
this.requestContext = requestContext;
|
||||
this.responses = ImmutableList.copyOf(responses);
|
||||
this.responseMessageBuilder =
|
||||
new EarlybirdResponseDebugMessageBuilder(requestContext.getRequest());
|
||||
this.chainMerger = new ChainMerger(requestContext, responseAccumulator, responses,
|
||||
responseMessageBuilder);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a response merger to merge the given responses.
|
||||
*/
|
||||
public static EarlybirdResponseMerger getResponseMerger(
|
||||
EarlybirdRequestContext requestContext,
|
||||
List<Future<EarlybirdResponse>> responses,
|
||||
ResponseAccumulator helper,
|
||||
EarlybirdCluster cluster,
|
||||
EarlybirdFeatureSchemaMerger featureSchemaMerger,
|
||||
int numPartitions) {
|
||||
EarlybirdRequestType type = requestContext.getEarlybirdRequestType();
|
||||
MERGER_CREATED_STATS.get(type).increment();
|
||||
switch (type) {
|
||||
case FACETS:
|
||||
return new FacetResponseMerger(requestContext, responses, helper);
|
||||
case TERM_STATS:
|
||||
return new TermStatisticsResponseMerger(requestContext, responses, helper);
|
||||
case RECENCY:
|
||||
return new RecencyResponseMerger(requestContext, responses, helper, featureSchemaMerger);
|
||||
case STRICT_RECENCY:
|
||||
return new StrictRecencyResponseMerger(
|
||||
requestContext, responses, helper, featureSchemaMerger, cluster);
|
||||
case RELEVANCE:
|
||||
return new RelevanceResponseMerger(
|
||||
requestContext, responses, helper, featureSchemaMerger, numPartitions);
|
||||
case TOP_TWEETS:
|
||||
return new TopTweetsResponseMerger(requestContext, responses, helper);
|
||||
default:
|
||||
throw new RuntimeException("EarlybirdRequestType " + type + "is not supported by merge");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This method can perform two types of merges:
|
||||
* 1. merge responses within a tier from different partitions.
|
||||
* 2. merge responses from multiple tiers.
|
||||
*/
|
||||
public final Future<EarlybirdResponse> merge() {
|
||||
return chainMerger.merge()
|
||||
.onSuccess(checkMinSearchedStatusIdFunction(
|
||||
"max_id",
|
||||
EarlybirdRequestUtil.getRequestMaxId(requestContext.getParsedQuery()),
|
||||
MIN_SEARCHED_STATUS_ID_LARGER_THAN_REQUEST_MAX_ID.get(
|
||||
requestContext.getEarlybirdRequestType())))
|
||||
.onSuccess(checkMinSearchedStatusIdFunction(
|
||||
"until_time",
|
||||
EarlybirdRequestUtil.getRequestMaxIdFromUntilTime(requestContext.getParsedQuery()),
|
||||
MIN_SEARCHED_STATUS_ID_LARGER_THAN_REQUEST_UNTIL_TIME.get(
|
||||
requestContext.getEarlybirdRequestType())));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the function that checks if the minSearchedStatusID on the merged response is higher
|
||||
* than the max ID in the request.
|
||||
*/
|
||||
private Function<EarlybirdResponse, BoxedUnit> checkMinSearchedStatusIdFunction(
|
||||
final String operator, final Optional<Long> requestMaxId, final SearchCounter stat) {
|
||||
return Function.cons(mergedResponse -> {
|
||||
if (requestMaxId.isPresent()
|
||||
&& requestMaxId.get() != Long.MAX_VALUE
|
||||
&& (mergedResponse.getResponseCode() == EarlybirdResponseCode.SUCCESS)
|
||||
&& mergedResponse.isSetSearchResults()
|
||||
&& mergedResponse.getSearchResults().isSetMinSearchedStatusID()) {
|
||||
long minSearchedStatusId = mergedResponse.getSearchResults().getMinSearchedStatusID();
|
||||
// We sometimes set minSearchedStatusId = max_id + 1 when a request times out even
|
||||
// before any search happens.
|
||||
// Check SEARCH-10134 for more details.
|
||||
if (minSearchedStatusId > requestMaxId.get() + 1) {
|
||||
stat.increment();
|
||||
String logMessage = "Response has a minSearchedStatusID ({}) larger than request "
|
||||
+ operator + " ({})."
|
||||
+ "\nrequest type: {}"
|
||||
+ "\nrequest: {}"
|
||||
+ "\nmerged response: {}"
|
||||
+ "\nSuccessful accumulated responses:";
|
||||
List<Object> logMessageParams = Lists.newArrayList();
|
||||
logMessageParams.add(minSearchedStatusId);
|
||||
logMessageParams.add(requestMaxId.get());
|
||||
logMessageParams.add(requestContext.getEarlybirdRequestType());
|
||||
logMessageParams.add(requestContext.getRequest());
|
||||
logMessageParams.add(mergedResponse);
|
||||
for (EarlybirdResponse response : accumulatedResponses.getSuccessResponses()) {
|
||||
logMessage += "\naccumulated response: {}";
|
||||
logMessageParams.add(response);
|
||||
}
|
||||
MIN_SEARCHED_STATUS_ID_LOGGER.warn(logMessage, logMessageParams.toArray());
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private EarlybirdResponse getTimedMergedResponse(AccumulatedResponses accResponses) {
|
||||
long start = System.nanoTime();
|
||||
try {
|
||||
return getMergedResponse(accResponses);
|
||||
} finally {
|
||||
long totalTime = System.nanoTime() - start;
|
||||
getMergedResponseTimer().timerIncrement(totalTime);
|
||||
}
|
||||
}
|
||||
|
||||
private EarlybirdResponse initializeMergedSuccessResponseFromAccumulatedResponses() {
|
||||
EarlybirdResponse mergedResponse = new EarlybirdResponse();
|
||||
|
||||
AccumulatedResponses.PartitionCounts partitionCounts =
|
||||
accumulatedResponses.getPartitionCounts();
|
||||
|
||||
mergedResponse.setNumPartitions(partitionCounts.getNumPartitions())
|
||||
.setNumSuccessfulPartitions(partitionCounts.getNumSuccessfulPartitions())
|
||||
.setPerTierResponse(partitionCounts.getPerTierResponse())
|
||||
.setNumSearchedSegments(accumulatedResponses.getNumSearchedSegments());
|
||||
|
||||
mergedResponse.setEarlyTerminationInfo(accumulatedResponses.getMergedEarlyTerminationInfo());
|
||||
mergedResponse.setResponseCode(EarlybirdResponseCode.SUCCESS);
|
||||
|
||||
return mergedResponse;
|
||||
}
|
||||
|
||||
private EarlybirdResponse getMergedResponse(AccumulatedResponses accResponses) {
|
||||
accumulatedResponses = accResponses;
|
||||
EarlybirdResponse mergedResponse;
|
||||
|
||||
if (accumulatedResponses.getSuccessResponses().isEmpty()
|
||||
&& !accumulatedResponses.foundError()) {
|
||||
// No successful or error responses. This means that all tiers / partitions are intentionally
|
||||
// skipped. Return a blank successful response.
|
||||
NO_RESPONSES_TO_MERGE.increment();
|
||||
mergedResponse = new EarlybirdResponse()
|
||||
.setResponseCode(EarlybirdResponseCode.SUCCESS)
|
||||
.setSearchResults(new ThriftSearchResults())
|
||||
.setDebugString("No responses to merge, probably because all tiers/partitions "
|
||||
+ "were skipped.");
|
||||
} else if (accumulatedResponses.isMergingAcrossTiers()) {
|
||||
mergedResponse = getMergedResponseAcrossTiers();
|
||||
} else {
|
||||
mergedResponse = getMergedResponseAcrossPartitions();
|
||||
}
|
||||
|
||||
saveMergedDebugString(mergedResponse);
|
||||
return mergedResponse;
|
||||
}
|
||||
|
||||
private EarlybirdResponse getMergedResponseAcrossTiers() {
|
||||
Preconditions.checkState(
|
||||
!accumulatedResponses.getSuccessResponses().isEmpty()
|
||||
|| accumulatedResponses.foundError());
|
||||
|
||||
// When merging across tiers, if we have one failed tier, we should fail the whole
|
||||
// response. Note that due to early termination, if a tier that is old fails
|
||||
// but the newer tiers return enough results, the failed tier won't show up
|
||||
// here in accumulatedResponses -- the only tiers that show up here
|
||||
// will be successful.
|
||||
if (accumulatedResponses.foundError()) {
|
||||
// The TierResponseAccumulator early terminates on the first error, so we should
|
||||
// never get more than one error. This means that the getMergedErrorResponse will
|
||||
// return an error response with the error code of that one error, and will never
|
||||
// have to decide which error response to return if the error responses are all
|
||||
// different.
|
||||
|
||||
// Perhaps we should just return accumulatedResponses.getErrorResponses().get(0);
|
||||
Preconditions.checkState(accumulatedResponses.getErrorResponses().size() == 1);
|
||||
return accumulatedResponses.getMergedErrorResponse();
|
||||
} else {
|
||||
EarlybirdResponse mergedResponse = initializeMergedSuccessResponseFromAccumulatedResponses();
|
||||
return internalMerge(mergedResponse);
|
||||
}
|
||||
}
|
||||
|
||||
private EarlybirdResponse getMergedResponseAcrossPartitions() {
|
||||
Preconditions.checkState(
|
||||
!accumulatedResponses.getSuccessResponses().isEmpty()
|
||||
|| accumulatedResponses.foundError());
|
||||
|
||||
EarlybirdResponse mergedResponse;
|
||||
|
||||
// Unlike tier merging, one failed response doesn't mean the merged response should
|
||||
// fail. If we have successful responses we can check the success ratio and if its
|
||||
// good we can still return a successful merge.
|
||||
if (!accumulatedResponses.getSuccessResponses().isEmpty()) {
|
||||
// We have at least one successful response, but still need to check the success ratio.
|
||||
// mergedResponse is a SUCCESS response after this call, but we will
|
||||
// set it to failure below if necessary.
|
||||
mergedResponse = initializeMergedSuccessResponseFromAccumulatedResponses();
|
||||
|
||||
int numSuccessResponses = mergedResponse.getNumSuccessfulPartitions();
|
||||
int numPartitions = mergedResponse.getNumPartitions();
|
||||
double successThreshold = getSuccessResponseThreshold();
|
||||
if (checkSuccessPartitionRatio(numSuccessResponses, numPartitions, successThreshold)) {
|
||||
// Success! Proceed with merging.
|
||||
mergedResponse.setResponseCode(EarlybirdResponseCode.SUCCESS);
|
||||
mergedResponse = internalMerge(mergedResponse);
|
||||
} else {
|
||||
responseMessageBuilder.logBelowSuccessThreshold(
|
||||
requestContext.getRequest().getSearchQuery(), numSuccessResponses, numPartitions,
|
||||
successThreshold);
|
||||
mergedResponse.setResponseCode(EarlybirdResponseCode.TOO_MANY_PARTITIONS_FAILED_ERROR);
|
||||
}
|
||||
} else {
|
||||
mergedResponse = accumulatedResponses.getMergedErrorResponse();
|
||||
}
|
||||
|
||||
return mergedResponse;
|
||||
}
|
||||
|
||||
/**
|
||||
* Derive class should implement the logic to merge the specific type of results (recency,
|
||||
* relevance, Top Tweets, etc..)
|
||||
*/
|
||||
protected abstract EarlybirdResponse internalMerge(EarlybirdResponse response);
|
||||
|
||||
protected abstract SearchTimerStats getMergedResponseTimer();
|
||||
|
||||
/**
|
||||
* Do we have enough results so far that we can early terminate and not continue onto next tier?
|
||||
*/
|
||||
public boolean shouldEarlyTerminateTierMerge(int totalResultsFromSuccessfulShards,
|
||||
boolean foundEarlyTermination) {
|
||||
// We are taking the most conservative tier response merging.
|
||||
// This is the most conservative merge logic --- as long as we have some results, we should
|
||||
// not return anything from the next tier. This may cause not ideal experience where a
|
||||
// page is not full, but the use can still scroll further.
|
||||
|
||||
return foundEarlyTermination || totalResultsFromSuccessfulShards >= 1;
|
||||
}
|
||||
|
||||
private void saveMergedDebugString(EarlybirdResponse mergedResponse) {
|
||||
if (responseMessageBuilder.isDebugMode()) {
|
||||
String message = responseMessageBuilder.debugString();
|
||||
mergedResponse.setDebugString(message);
|
||||
if (!accumulatedResponses.getSuccessResponses().isEmpty()
|
||||
&& accumulatedResponses.getSuccessResponses().get(0).isSetDebugInfo()) {
|
||||
|
||||
EarlybirdDebugInfo debugInfo =
|
||||
accumulatedResponses.getSuccessResponses().get(0).getDebugInfo();
|
||||
mergedResponse.setDebugInfo(debugInfo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private double getSuccessResponseThreshold() {
|
||||
EarlybirdRequest request = requestContext.getRequest();
|
||||
if (request.isSetSuccessfulResponseThreshold()) {
|
||||
double successfulResponseThreshold = request.getSuccessfulResponseThreshold();
|
||||
Preconditions.checkArgument(successfulResponseThreshold > 0,
|
||||
"Invalid successfulResponseThreshold %s", successfulResponseThreshold);
|
||||
Preconditions.checkArgument(successfulResponseThreshold <= 1.0,
|
||||
"Invalid successfulResponseThreshold %s", successfulResponseThreshold);
|
||||
return successfulResponseThreshold;
|
||||
} else {
|
||||
return getDefaultSuccessResponseThreshold();
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract double getDefaultSuccessResponseThreshold();
|
||||
|
||||
private static boolean checkSuccessPartitionRatio(
|
||||
int numSuccessResponses,
|
||||
int numPartitions,
|
||||
double goodResponseThreshold) {
|
||||
Preconditions.checkArgument(goodResponseThreshold > 0.0,
|
||||
"Invalid goodResponseThreshold %s", goodResponseThreshold);
|
||||
return numSuccessResponses >= (numPartitions * goodResponseThreshold);
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge hit counts from all results.
|
||||
*/
|
||||
protected Map<Long, Integer> aggregateHitCountMap() {
|
||||
Map<Long, Integer> hitCounts = ResultsUtil
|
||||
.aggregateCountMap(accumulatedResponses.getSuccessResponses(), HIT_COUNT_GETTER);
|
||||
if (hitCounts.size() > 0) {
|
||||
if (responseMessageBuilder.isDebugMode()) {
|
||||
responseMessageBuilder.append("Hit counts:\n");
|
||||
for (Map.Entry<Long, Integer> entry : hitCounts.entrySet()) {
|
||||
responseMessageBuilder.append(String.format(" %10s seconds: %d hits\n",
|
||||
entry.getKey() / 1000, entry.getValue()));
|
||||
}
|
||||
}
|
||||
return hitCounts;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of results to keep as part of merge-collection.
|
||||
*/
|
||||
protected final int computeNumResultsToKeep() {
|
||||
return EarlybirdResponseMergeUtil.computeNumResultsToKeep(requestContext.getRequest());
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove exact duplicates (same id) from the result set.
|
||||
*/
|
||||
protected static void trimExactDups(ThriftSearchResults searchResults, TrimStats trimStats) {
|
||||
int numResults = searchResults.getResultsSize();
|
||||
List<ThriftSearchResult> oldResults = searchResults.getResults();
|
||||
List<ThriftSearchResult> newResults = Lists.newArrayListWithCapacity(numResults);
|
||||
HashSet<Long> resultSet = Sets.newHashSetWithExpectedSize(numResults);
|
||||
|
||||
for (ThriftSearchResult result : oldResults) {
|
||||
if (resultSet.contains(result.getId())) {
|
||||
trimStats.increaseRemovedDupsCount();
|
||||
continue;
|
||||
}
|
||||
|
||||
newResults.add(result);
|
||||
resultSet.add(result.getId());
|
||||
}
|
||||
|
||||
searchResults.setResults(newResults);
|
||||
}
|
||||
|
||||
protected final int addResponsesToCollector(MultiwayMergeCollector collector) {
|
||||
int totalResultSize = 0;
|
||||
for (EarlybirdResponse response : accumulatedResponses.getSuccessResponses()) {
|
||||
if (response.isSetSearchResults()) {
|
||||
totalResultSize += response.getSearchResults().getResultsSize();
|
||||
}
|
||||
collector.addResponse(response);
|
||||
}
|
||||
return totalResultSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a sorted searchResults (for recency, sorted by ID; for relevance, sorted by score),
|
||||
* returns the first 'computeNumResultsToKeep()' number of results.
|
||||
*
|
||||
* @param searchResults the searchResults to be truncated.
|
||||
*/
|
||||
protected final void truncateResults(ThriftSearchResults searchResults, TrimStats trimStats) {
|
||||
int numResultsRequested = computeNumResultsToKeep();
|
||||
|
||||
int to = numResultsRequested == Integer.MAX_VALUE ? searchResults.getResultsSize()
|
||||
: Math.min(numResultsRequested, searchResults.getResultsSize());
|
||||
if (searchResults.getResultsSize() > to) {
|
||||
trimStats.setResultsTruncatedFromTailCount(searchResults.getResultsSize() - to);
|
||||
|
||||
if (to > 0) {
|
||||
searchResults.setResults(searchResults.getResults().subList(0, to));
|
||||
} else {
|
||||
// No more results for the next page
|
||||
EARLYBIRD_RESPONSE_NO_MORE_RESULTS.increment();
|
||||
searchResults.setResults(Collections.<ThriftSearchResult>emptyList());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
EarlybirdRequest getEarlybirdRequest() {
|
||||
return requestContext.getRequest();
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,353 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.mergers;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.twitter.search.common.logging.DebugMessageBuilder;
|
||||
import com.twitter.search.common.metrics.SearchTimerStats;
|
||||
import com.twitter.search.common.ranking.thriftjava.ThriftFacetRankingOptions;
|
||||
import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant;
|
||||
import com.twitter.search.common.util.earlybird.FacetsResultsUtils;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.ThriftFacetCount;
|
||||
import com.twitter.search.earlybird.thrift.ThriftFacetCountMetadata;
|
||||
import com.twitter.search.earlybird.thrift.ThriftFacetFieldResults;
|
||||
import com.twitter.search.earlybird.thrift.ThriftFacetResults;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
/**
|
||||
* Merger class to merge facets EarlybirdResponse objects
|
||||
*/
|
||||
public class FacetResponseMerger extends EarlybirdResponseMerger {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(FacetResponseMerger.class);
|
||||
|
||||
private static final SearchTimerStats TIMER =
|
||||
SearchTimerStats.export("merge_facets", TimeUnit.NANOSECONDS, false, true);
|
||||
|
||||
private static final double SUCCESSFUL_RESPONSE_THRESHOLD = 0.9;
|
||||
private final DebugMessageBuilder debugMessageBuilder;
|
||||
|
||||
|
||||
/**
|
||||
* Constructor to create the merger
|
||||
*/
|
||||
public FacetResponseMerger(EarlybirdRequestContext requestContext,
|
||||
List<Future<EarlybirdResponse>> responses,
|
||||
ResponseAccumulator mode) {
|
||||
super(requestContext, responses, mode);
|
||||
debugMessageBuilder = responseMessageBuilder.getDebugMessageBuilder();
|
||||
debugMessageBuilder.verbose("--- Request Received: %s", requestContext.getRequest());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SearchTimerStats getMergedResponseTimer() {
|
||||
return TIMER;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected double getDefaultSuccessResponseThreshold() {
|
||||
return SUCCESSFUL_RESPONSE_THRESHOLD;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected EarlybirdResponse internalMerge(EarlybirdResponse facetsResponse) {
|
||||
|
||||
final Map<String, FacetsResultsUtils.FacetFieldInfo> facetFieldInfoMap =
|
||||
new HashMap<>();
|
||||
final Set<Long> userIDWhitelist = new HashSet<>();
|
||||
|
||||
// First, parse the responses and build up our facet info map.
|
||||
boolean termStatsFilteringMode = FacetsResultsUtils.prepareFieldInfoMap(
|
||||
requestContext.getRequest().getFacetRequest(), facetFieldInfoMap);
|
||||
// Iterate through all futures and get results.
|
||||
collectResponsesAndPopulateMap(facetFieldInfoMap, userIDWhitelist);
|
||||
|
||||
// Next, aggregate the top facets and update the blender response.
|
||||
facetsResponse
|
||||
.setFacetResults(new ThriftFacetResults()
|
||||
.setFacetFields(new HashMap<>())
|
||||
.setUserIDWhitelist(userIDWhitelist));
|
||||
|
||||
// keep track of how many facets a user contributed - this map gets reset for every field
|
||||
Map<Long, Integer> perFieldAntiGamingMap = new HashMap<>();
|
||||
|
||||
// this one is used for images and twimges
|
||||
Map<Long, Integer> imagesAntiGamingMap = new HashMap<>();
|
||||
|
||||
Set<String> twimgDedupSet = null;
|
||||
|
||||
for (final Map.Entry<String, FacetsResultsUtils.FacetFieldInfo> entry
|
||||
: facetFieldInfoMap.entrySet()) {
|
||||
// reset for each field
|
||||
String field = entry.getKey();
|
||||
final Map<Long, Integer> antiGamingMap;
|
||||
if (field.equals(EarlybirdFieldConstant.IMAGES_FACET)
|
||||
|| field.equals(EarlybirdFieldConstant.TWIMG_FACET)) {
|
||||
antiGamingMap = imagesAntiGamingMap;
|
||||
} else {
|
||||
perFieldAntiGamingMap.clear();
|
||||
antiGamingMap = perFieldAntiGamingMap;
|
||||
}
|
||||
|
||||
ThriftFacetFieldResults results = new ThriftFacetFieldResults();
|
||||
FacetsResultsUtils.FacetFieldInfo info = entry.getValue();
|
||||
results.setTotalCount(info.totalCounts);
|
||||
results.setTopFacets(new ArrayList<>());
|
||||
FacetsResultsUtils.fillTopLanguages(info, results);
|
||||
if (info.topFacets != null && !info.topFacets.isEmpty()) {
|
||||
fillFacetFieldResults(info, antiGamingMap, results);
|
||||
}
|
||||
|
||||
if (field.equals(EarlybirdFieldConstant.TWIMG_FACET)) {
|
||||
if (twimgDedupSet == null) {
|
||||
twimgDedupSet = Sets.newHashSet();
|
||||
}
|
||||
FacetsResultsUtils.dedupTwimgFacet(twimgDedupSet, results, debugMessageBuilder);
|
||||
}
|
||||
|
||||
facetsResponse.getFacetResults().putToFacetFields(entry.getKey(), results);
|
||||
}
|
||||
|
||||
if (!termStatsFilteringMode) {
|
||||
// in term stats filtering mode, if doing it here would break term stats filtering
|
||||
FacetsResultsUtils.mergeTwimgResults(
|
||||
facetsResponse.getFacetResults(),
|
||||
Collections.<ThriftFacetCount>reverseOrder(
|
||||
FacetsResultsUtils.getFacetCountComparator(
|
||||
requestContext.getRequest().getFacetRequest())));
|
||||
}
|
||||
|
||||
// Update the numHitsProcessed on ThriftSearchResults.
|
||||
int numHitsProcessed = 0;
|
||||
int numPartitionsEarlyTerminated = 0;
|
||||
for (EarlybirdResponse earlybirdResponse: accumulatedResponses.getSuccessResponses()) {
|
||||
ThriftSearchResults searchResults = earlybirdResponse.getSearchResults();
|
||||
if (searchResults != null) {
|
||||
numHitsProcessed += searchResults.getNumHitsProcessed();
|
||||
numPartitionsEarlyTerminated += searchResults.getNumPartitionsEarlyTerminated();
|
||||
}
|
||||
}
|
||||
ThriftSearchResults searchResults = new ThriftSearchResults();
|
||||
searchResults.setResults(new ArrayList<>()); // required field
|
||||
searchResults.setNumHitsProcessed(numHitsProcessed);
|
||||
searchResults.setNumPartitionsEarlyTerminated(numPartitionsEarlyTerminated);
|
||||
facetsResponse.setSearchResults(searchResults);
|
||||
|
||||
LOG.debug("Facets call completed successfully: {}", facetsResponse);
|
||||
|
||||
FacetsResultsUtils.fixNativePhotoUrl(facetsResponse);
|
||||
return facetsResponse;
|
||||
}
|
||||
|
||||
private void fillFacetFieldResults(FacetsResultsUtils.FacetFieldInfo facetFieldInfo,
|
||||
Map<Long, Integer> antiGamingMap,
|
||||
ThriftFacetFieldResults results) {
|
||||
int minWeightedCount = 0;
|
||||
int minSimpleCount = 0;
|
||||
int maxPenaltyCount = Integer.MAX_VALUE;
|
||||
double maxPenaltyCountRatio = 1;
|
||||
boolean excludePossiblySensitiveFacets = false;
|
||||
boolean onlyReturnFacetsWithDisplayTweet = false;
|
||||
int maxHitsPerUser = -1;
|
||||
|
||||
EarlybirdRequest request = requestContext.getRequest();
|
||||
if (request.getFacetRequest() != null) {
|
||||
ThriftFacetRankingOptions rankingOptions = request.getFacetRequest().getFacetRankingOptions();
|
||||
|
||||
if (request.getSearchQuery() != null) {
|
||||
maxHitsPerUser = request.getSearchQuery().getMaxHitsPerUser();
|
||||
}
|
||||
|
||||
if (rankingOptions != null) {
|
||||
LOG.debug("FacetsResponseMerger: Using rankingOptions={}", rankingOptions);
|
||||
|
||||
if (rankingOptions.isSetMinCount()) {
|
||||
minWeightedCount = rankingOptions.getMinCount();
|
||||
}
|
||||
if (rankingOptions.isSetMinSimpleCount()) {
|
||||
minSimpleCount = rankingOptions.getMinSimpleCount();
|
||||
}
|
||||
if (rankingOptions.isSetMaxPenaltyCount()) {
|
||||
maxPenaltyCount = rankingOptions.getMaxPenaltyCount();
|
||||
}
|
||||
if (rankingOptions.isSetMaxPenaltyCountRatio()) {
|
||||
maxPenaltyCountRatio = rankingOptions.getMaxPenaltyCountRatio();
|
||||
}
|
||||
if (rankingOptions.isSetExcludePossiblySensitiveFacets()) {
|
||||
excludePossiblySensitiveFacets = rankingOptions.isExcludePossiblySensitiveFacets();
|
||||
}
|
||||
if (rankingOptions.isSetOnlyReturnFacetsWithDisplayTweet()) {
|
||||
onlyReturnFacetsWithDisplayTweet = rankingOptions.isOnlyReturnFacetsWithDisplayTweet();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
LOG.warn("earlybirdRequest.getFacetRequest() is null");
|
||||
}
|
||||
|
||||
ThriftFacetCount[] topFacetsArray = new ThriftFacetCount[facetFieldInfo.topFacets.size()];
|
||||
|
||||
facetFieldInfo.topFacets.values().toArray(topFacetsArray);
|
||||
Arrays.sort(topFacetsArray, Collections.<ThriftFacetCount>reverseOrder(
|
||||
FacetsResultsUtils.getFacetCountComparator(request.getFacetRequest())));
|
||||
|
||||
int numResults = capFacetFieldWidth(facetFieldInfo.fieldRequest.numResults);
|
||||
|
||||
if (topFacetsArray.length < numResults) {
|
||||
numResults = topFacetsArray.length;
|
||||
}
|
||||
|
||||
int collected = 0;
|
||||
for (int i = 0; i < topFacetsArray.length; ++i) {
|
||||
ThriftFacetCount count = topFacetsArray[i];
|
||||
|
||||
if (onlyReturnFacetsWithDisplayTweet
|
||||
&& (!count.isSetMetadata() || !count.getMetadata().isSetStatusId()
|
||||
|| count.getMetadata().getStatusId() == -1)) {
|
||||
// status id must be set
|
||||
continue;
|
||||
}
|
||||
|
||||
if (excludePossiblySensitiveFacets && count.isSetMetadata()
|
||||
&& count.getMetadata().isStatusPossiblySensitive()) {
|
||||
// the display tweet may be offensive or NSFW
|
||||
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
|
||||
debugMessageBuilder.verbose2("[%d] FacetsResponseMerger EXCLUDED: offensive or NSFW %s, "
|
||||
+ "explanation: %s",
|
||||
i, facetCountSummary(count),
|
||||
count.getMetadata().getExplanation());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
boolean filterOutUser = false;
|
||||
if (maxHitsPerUser != -1 && count.isSetMetadata()) {
|
||||
ThriftFacetCountMetadata metadata = count.getMetadata();
|
||||
if (!metadata.dontFilterUser) {
|
||||
long twitterUserId = metadata.getTwitterUserId();
|
||||
int numResultsFromUser = 1;
|
||||
if (twitterUserId != -1) {
|
||||
Integer perUser = antiGamingMap.get(twitterUserId);
|
||||
if (perUser != null) {
|
||||
numResultsFromUser = perUser + 1;
|
||||
filterOutUser = numResultsFromUser > maxHitsPerUser;
|
||||
}
|
||||
antiGamingMap.put(twitterUserId, numResultsFromUser);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Filter facets those don't meet the basic criteria.
|
||||
if (count.getSimpleCount() < minSimpleCount) {
|
||||
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
|
||||
debugMessageBuilder.verbose2(
|
||||
"[%d] FacetsResponseMerger EXCLUDED: simpleCount:%d < minSimpleCount:%d, %s",
|
||||
i, count.getSimpleCount(), minSimpleCount, facetCountSummary(count));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (count.getWeightedCount() < minWeightedCount) {
|
||||
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
|
||||
debugMessageBuilder.verbose2(
|
||||
"[%d] FacetsResponseMerger EXCLUDED: weightedCount:%d < minWeightedCount:%d, %s",
|
||||
i, count.getWeightedCount(), minWeightedCount, facetCountSummary(count));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (filterOutUser) {
|
||||
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
|
||||
debugMessageBuilder.verbose2(
|
||||
"[%d] FacetsResponseMerger EXCLUDED: antiGaming filterd user: %d: %s",
|
||||
i, count.getMetadata().getTwitterUserId(), facetCountSummary(count));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (count.getPenaltyCount() > maxPenaltyCount) {
|
||||
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
|
||||
debugMessageBuilder.verbose2(
|
||||
"[%d] FacetsResponseMerger EXCLUCED: penaltyCount:%.3f > maxPenaltyCount:%.3f, %s",
|
||||
i, count.getPenaltyCount(), maxPenaltyCount, facetCountSummary(count));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (((double) count.getPenaltyCount() / count.getSimpleCount()) > maxPenaltyCountRatio) {
|
||||
if (DebugMessageBuilder.DEBUG_VERBOSE <= debugMessageBuilder.getDebugLevel()) {
|
||||
debugMessageBuilder.verbose2(
|
||||
"[%d] FacetsResponseMerger EXCLUDED: penaltyCountRatio: %.3f > "
|
||||
+ "maxPenaltyCountRatio:%.3f, %s",
|
||||
i, (double) count.getPenaltyCount() / count.getSimpleCount(), maxPenaltyCountRatio,
|
||||
facetCountSummary(count));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
results.addToTopFacets(count);
|
||||
|
||||
collected++;
|
||||
if (collected >= numResults) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static int capFacetFieldWidth(int numResults) {
|
||||
int ret = numResults;
|
||||
if (numResults <= 0) {
|
||||
// this in theory should not be allowed, but for now we issue the request with goodwill length
|
||||
ret = 10; // default to 10 for future merge code to terminate correctly
|
||||
}
|
||||
if (numResults >= 100) {
|
||||
ret = 100;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
private static String facetCountSummary(final ThriftFacetCount count) {
|
||||
if (count.isSetMetadata()) {
|
||||
return String.format("Label: %s (s:%d, w:%d, p:%d, score:%.2f, sid:%d (%s))",
|
||||
count.getFacetLabel(), count.getSimpleCount(), count.getWeightedCount(),
|
||||
count.getPenaltyCount(), count.getScore(), count.getMetadata().getStatusId(),
|
||||
count.getMetadata().getStatusLanguage());
|
||||
} else {
|
||||
return String.format("Label: %s (s:%d, w:%d, p:%d, score:%.2f)", count.getFacetLabel(),
|
||||
count.getSimpleCount(), count.getWeightedCount(), count.getPenaltyCount(),
|
||||
count.getScore());
|
||||
}
|
||||
}
|
||||
|
||||
// Iterate through the backend responses and fill up the FacetFieldInfo map.
|
||||
private void collectResponsesAndPopulateMap(
|
||||
final Map<String, FacetsResultsUtils.FacetFieldInfo> facetFieldInfoMap,
|
||||
final Set<Long> userIDWhitelist) {
|
||||
// Next, iterate through the backend responses.
|
||||
int i = 0;
|
||||
for (EarlybirdResponse facetsResponse : accumulatedResponses.getSuccessResponses()) {
|
||||
if (facetsResponse.isSetFacetResults()) {
|
||||
LOG.debug("Facet response from earlybird {} is {} ", i, facetsResponse.getFacetResults());
|
||||
i++;
|
||||
ThriftFacetResults facetResults = facetsResponse.getFacetResults();
|
||||
if (facetResults.isSetUserIDWhitelist()) {
|
||||
userIDWhitelist.addAll(facetResults.getUserIDWhitelist());
|
||||
}
|
||||
FacetsResultsUtils.fillFacetFieldInfo(
|
||||
facetResults, facetFieldInfoMap,
|
||||
userIDWhitelist);
|
||||
}
|
||||
}
|
||||
LOG.debug("Earlybird facet response total size {}", i);
|
||||
}
|
||||
}
|
||||
|
Binary file not shown.
@ -1,44 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.mergers;
|
||||
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
||||
|
||||
|
||||
public final class PartitionResponseAccumulator extends ResponseAccumulator {
|
||||
private static final String TARGET_TYPE_PARTITION = "partition";
|
||||
|
||||
@Override
|
||||
public String getNameForLogging(int responseIndex, int numTotalResponses) {
|
||||
return TARGET_TYPE_PARTITION + responseIndex;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getNameForEarlybirdResponseCodeStats(int responseIndex, int numTotalResponses) {
|
||||
// We do not need to differentiate between partitions: we just want to get the number of
|
||||
// responses returned by Earlybirds, for each EarlybirdResponseCode.
|
||||
return TARGET_TYPE_PARTITION;
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean shouldEarlyTerminateMerge(EarlyTerminateTierMergePredicate merger) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleSkippedResponse(EarlybirdResponseCode responseCode) { }
|
||||
|
||||
@Override
|
||||
public void handleErrorResponse(EarlybirdResponse response) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public AccumulatedResponses.PartitionCounts getPartitionCounts() {
|
||||
return new AccumulatedResponses.PartitionCounts(getNumResponses(),
|
||||
getSuccessResponses().size() + getSuccessfulEmptyResponseCount(), null);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isMergingAcrossTiers() {
|
||||
return false;
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,638 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.mergers;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.twitter.search.common.metrics.SearchCounter;
|
||||
import com.twitter.search.common.metrics.SearchTimerStats;
|
||||
import com.twitter.search.common.partitioning.snowflakeparser.SnowflakeIdParser;
|
||||
import com.twitter.search.common.query.thriftjava.EarlyTerminationInfo;
|
||||
import com.twitter.search.common.relevance.utils.ResultComparators;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
||||
import com.twitter.search.earlybird_root.collectors.RecencyMergeCollector;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
|
||||
.EarlyTerminationTrimmingStats.Type.ALREADY_EARLY_TERMINATED;
|
||||
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
|
||||
.EarlyTerminationTrimmingStats.Type.FILTERED;
|
||||
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
|
||||
.EarlyTerminationTrimmingStats.Type.FILTERED_AND_TRUNCATED;
|
||||
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
|
||||
.EarlyTerminationTrimmingStats.Type.NOT_EARLY_TERMINATED;
|
||||
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
|
||||
.EarlyTerminationTrimmingStats.Type.TERMINATED_GOT_EXACT_NUM_RESULTS;
|
||||
import static com.twitter.search.earlybird_root.mergers.RecencyResponseMerger
|
||||
.EarlyTerminationTrimmingStats.Type.TRUNCATED;
|
||||
|
||||
/**
|
||||
* Merger class to merge recency search EarlybirdResponse objects.
|
||||
*/
|
||||
public class RecencyResponseMerger extends EarlybirdResponseMerger {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(RecencyResponseMerger.class);
|
||||
|
||||
private static final SearchTimerStats RECENCY_TIMER =
|
||||
SearchTimerStats.export("merge_recency", TimeUnit.NANOSECONDS, false, true);
|
||||
|
||||
@VisibleForTesting
|
||||
static final String TERMINATED_COLLECTED_ENOUGH_RESULTS =
|
||||
"terminated_collected_enough_results";
|
||||
|
||||
// Allowed replication lag relative to all replicas. Replication lag exceeding
|
||||
// this amount may result in some tweets from the replica not returned in search.
|
||||
private static final long ALLOWED_REPLICATION_LAG_MS = 10000;
|
||||
|
||||
private static final double SUCCESSFUL_RESPONSE_THRESHOLD = 0.9;
|
||||
|
||||
@VisibleForTesting
|
||||
static final SearchCounter RECENCY_ZERO_RESULT_COUNT_AFTER_FILTERING_MAX_MIN_IDS =
|
||||
SearchCounter.export("merger_recency_zero_result_count_after_filtering_max_min_ids");
|
||||
|
||||
@VisibleForTesting
|
||||
static final SearchCounter RECENCY_TRIMMED_TOO_MANY_RESULTS_COUNT =
|
||||
SearchCounter.export("merger_recency_trimmed_too_many_results_count");
|
||||
|
||||
private static final SearchCounter RECENCY_TIER_MERGE_EARLY_TERMINATED_WITH_NOT_ENOUGH_RESULTS =
|
||||
SearchCounter.export("merger_recency_tier_merge_early_terminated_with_not_enough_results");
|
||||
|
||||
private static final SearchCounter RECENCY_CLEARED_EARLY_TERMINATION_COUNT =
|
||||
SearchCounter.export("merger_recency_cleared_early_termination_count");
|
||||
|
||||
/**
|
||||
* Results were truncated because merged results exceeded the requested numResults.
|
||||
*/
|
||||
@VisibleForTesting
|
||||
static final String MERGING_EARLY_TERMINATION_REASON_TRUNCATED =
|
||||
"root_merging_truncated_results";
|
||||
|
||||
/**
|
||||
* Results that were were filtered smaller than merged minSearchedStatusId were filtered out.
|
||||
*/
|
||||
@VisibleForTesting
|
||||
static final String MERGING_EARLY_TERMINATION_REASON_FILTERED =
|
||||
"root_merging_filtered_results";
|
||||
|
||||
@VisibleForTesting
|
||||
static final EarlyTerminationTrimmingStats PARTITION_MERGING_EARLY_TERMINATION_TRIMMING_STATS =
|
||||
new EarlyTerminationTrimmingStats("recency_partition_merging");
|
||||
|
||||
@VisibleForTesting
|
||||
static final EarlyTerminationTrimmingStats TIER_MERGING_EARLY_TERMINATION_TRIMMING_STATS =
|
||||
new EarlyTerminationTrimmingStats("recency_tier_merging");
|
||||
|
||||
@VisibleForTesting
|
||||
static class EarlyTerminationTrimmingStats {
|
||||
|
||||
enum Type {
|
||||
/**
|
||||
* The whole result was not terminated at all.
|
||||
*/
|
||||
NOT_EARLY_TERMINATED,
|
||||
/**
|
||||
* Was terminated before we did any trimming.
|
||||
*/
|
||||
ALREADY_EARLY_TERMINATED,
|
||||
/**
|
||||
* Was not terminated when merged, but results were filtered due to min/max ranges.
|
||||
*/
|
||||
FILTERED,
|
||||
/**
|
||||
* Was not terminated when merged, but results were truncated.
|
||||
*/
|
||||
TRUNCATED,
|
||||
/**
|
||||
* Was not terminated when merged, but results were filtered due to min/max ranges and
|
||||
* truncated.
|
||||
*/
|
||||
FILTERED_AND_TRUNCATED,
|
||||
/**
|
||||
* When the search asks for X result, and we get exactly X results back, without trimming
|
||||
* or truncating on the tail side (min_id side), we still mark the search as early terminated.
|
||||
* This is because later tiers possibly has more results.
|
||||
*/
|
||||
TERMINATED_GOT_EXACT_NUM_RESULTS,
|
||||
}
|
||||
|
||||
/**
|
||||
* A counter tracking merged responses for each {@link EarlyTerminationTrimmingStats.Type}
|
||||
* define above.
|
||||
*/
|
||||
private final ImmutableMap<Type, SearchCounter> searchCounterMap;
|
||||
|
||||
EarlyTerminationTrimmingStats(String prefix) {
|
||||
Map<Type, SearchCounter> tempMap = Maps.newEnumMap(Type.class);
|
||||
|
||||
tempMap.put(NOT_EARLY_TERMINATED,
|
||||
SearchCounter.export(prefix + "_not_early_terminated_after_merging"));
|
||||
tempMap.put(ALREADY_EARLY_TERMINATED,
|
||||
SearchCounter.export(prefix + "_early_terminated_before_merge_trimming"));
|
||||
tempMap.put(TRUNCATED,
|
||||
SearchCounter.export(prefix + "_early_terminated_after_merging_truncated"));
|
||||
tempMap.put(FILTERED,
|
||||
SearchCounter.export(prefix + "_early_terminated_after_merging_filtered"));
|
||||
tempMap.put(FILTERED_AND_TRUNCATED,
|
||||
SearchCounter.export(prefix + "_early_terminated_after_merging_filtered_and_truncated"));
|
||||
tempMap.put(TERMINATED_GOT_EXACT_NUM_RESULTS,
|
||||
SearchCounter.export(prefix + "_early_terminated_after_merging_got_exact_num_results"));
|
||||
|
||||
searchCounterMap = Maps.immutableEnumMap(tempMap);
|
||||
}
|
||||
|
||||
public SearchCounter getCounterFor(Type type) {
|
||||
return searchCounterMap.get(type);
|
||||
}
|
||||
}
|
||||
|
||||
private final EarlybirdFeatureSchemaMerger featureSchemaMerger;
|
||||
|
||||
public RecencyResponseMerger(EarlybirdRequestContext requestContext,
|
||||
List<Future<EarlybirdResponse>> responses,
|
||||
ResponseAccumulator mode,
|
||||
EarlybirdFeatureSchemaMerger featureSchemaMerger) {
|
||||
super(requestContext, responses, mode);
|
||||
this.featureSchemaMerger = featureSchemaMerger;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected double getDefaultSuccessResponseThreshold() {
|
||||
return SUCCESSFUL_RESPONSE_THRESHOLD;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SearchTimerStats getMergedResponseTimer() {
|
||||
return RECENCY_TIMER;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected EarlybirdResponse internalMerge(EarlybirdResponse mergedResponse) {
|
||||
// The merged maxSearchedStatusId and minSearchedStatusId
|
||||
long maxId = findMaxFullySearchedStatusID();
|
||||
long minId = findMinFullySearchedStatusID();
|
||||
|
||||
RecencyMergeCollector collector = new RecencyMergeCollector(responses.size());
|
||||
int totalResultSize = addResponsesToCollector(collector);
|
||||
ThriftSearchResults searchResults = collector.getAllSearchResults();
|
||||
|
||||
TrimStats trimStats = trimResults(searchResults, minId, maxId);
|
||||
setMergedMaxSearchedStatusId(searchResults, maxId);
|
||||
setMergedMinSearchedStatusId(
|
||||
searchResults, minId, trimStats.getResultsTruncatedFromTailCount() > 0);
|
||||
|
||||
mergedResponse.setSearchResults(searchResults);
|
||||
|
||||
// Override some components of the response as appropriate to real-time.
|
||||
searchResults.setHitCounts(aggregateHitCountMap());
|
||||
if (accumulatedResponses.isMergingPartitionsWithinATier()
|
||||
&& clearEarlyTerminationIfReachingTierBottom(mergedResponse)) {
|
||||
RECENCY_CLEARED_EARLY_TERMINATION_COUNT.increment();
|
||||
} else {
|
||||
setEarlyTerminationForTrimmedResults(mergedResponse, trimStats);
|
||||
}
|
||||
|
||||
responseMessageBuilder.debugVerbose("Hits: %s %s", totalResultSize, trimStats);
|
||||
responseMessageBuilder.debugVerbose(
|
||||
"Hash Partitioned Earlybird call completed successfully: %s", mergedResponse);
|
||||
|
||||
featureSchemaMerger.collectAndSetFeatureSchemaInResponse(
|
||||
searchResults,
|
||||
requestContext,
|
||||
"merger_recency_tier",
|
||||
accumulatedResponses.getSuccessResponses());
|
||||
|
||||
return mergedResponse;
|
||||
}
|
||||
|
||||
/**
|
||||
* When we reached tier bottom, pagination can stop working even though we haven't got
|
||||
* all results. e.g.
|
||||
* Results from partition 1: [101 91 81], minSearchedStatusId is 81
|
||||
* Results from Partition 2: [102 92], minSearchedStatusId is 92, not early terminated.
|
||||
*
|
||||
* After merge, we get [102, 101, 92], with minResultId == 92. Since results from
|
||||
* partition 2 is not early terminated, 92 is the tier bottom here. Since results are
|
||||
* filtered, early termination for merged result is set to true, so blender will call again,
|
||||
* with maxDocId == 91. This time we get result:
|
||||
* Results from partition 1: [91 81], minSearchedStatusId is 81
|
||||
* Results from partition 2: [], minSearchedStatusId is still 92
|
||||
* After merge we get [] and minSearchedStatusId is still 92. No progress can be made on
|
||||
* pagination and clients get stuck.
|
||||
*
|
||||
* So in this case, we clear the early termination flag to tell blender there is no more
|
||||
* result in this tier. Tweets below tier bottom will be missed, but that also happens
|
||||
* without this step, as the next pagination call will return empty results anyway.
|
||||
* So even if there is NOT overlap between tiers, this is still better.
|
||||
*
|
||||
* Return true if early termination is cleared due to this, otherwise return false.
|
||||
* To be safe, we do nothing here to keep existing behavior and only override it in
|
||||
* StrictRecencyResponseMerger.
|
||||
*/
|
||||
protected boolean clearEarlyTerminationIfReachingTierBottom(EarlybirdResponse mergedResponse) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if the merged response should be early-terminated when it has exactly as many
|
||||
* trimmed results as requested, as is not early-terminated because of other reasons.
|
||||
*/
|
||||
protected boolean shouldEarlyTerminateWhenEnoughTrimmedResults() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* If the end results were trimmed in any way, reflect that in the response as a query that was
|
||||
* early terminated. A response can be either (1) truncated because we merged more results than
|
||||
* what was asked for with numResults, or (2) we filtered results that were smaller than the
|
||||
* merged minSearchedStatusId.
|
||||
*
|
||||
* @param mergedResponse the merged response.
|
||||
* @param trimStats trim stats for this merge.
|
||||
*/
|
||||
private void setEarlyTerminationForTrimmedResults(
|
||||
EarlybirdResponse mergedResponse,
|
||||
TrimStats trimStats) {
|
||||
|
||||
responseMessageBuilder.debugVerbose("Checking for merge trimming, trimStats %s", trimStats);
|
||||
|
||||
EarlyTerminationTrimmingStats stats = getEarlyTerminationTrimmingStats();
|
||||
|
||||
EarlyTerminationInfo earlyTerminationInfo = mergedResponse.getEarlyTerminationInfo();
|
||||
Preconditions.checkNotNull(earlyTerminationInfo);
|
||||
|
||||
if (!earlyTerminationInfo.isEarlyTerminated()) {
|
||||
if (trimStats.getMinIdFilterCount() > 0 || trimStats.getResultsTruncatedFromTailCount() > 0) {
|
||||
responseMessageBuilder.debugVerbose("Setting early termination, trimStats: %s, results: %s",
|
||||
trimStats, mergedResponse);
|
||||
|
||||
earlyTerminationInfo.setEarlyTerminated(true);
|
||||
addEarlyTerminationReasons(earlyTerminationInfo, trimStats);
|
||||
|
||||
if (trimStats.getMinIdFilterCount() > 0
|
||||
&& trimStats.getResultsTruncatedFromTailCount() > 0) {
|
||||
stats.getCounterFor(FILTERED_AND_TRUNCATED).increment();
|
||||
} else if (trimStats.getMinIdFilterCount() > 0) {
|
||||
stats.getCounterFor(FILTERED).increment();
|
||||
} else if (trimStats.getResultsTruncatedFromTailCount() > 0) {
|
||||
stats.getCounterFor(TRUNCATED).increment();
|
||||
} else {
|
||||
Preconditions.checkState(false, "Invalid TrimStats: %s", trimStats);
|
||||
}
|
||||
} else if ((computeNumResultsToKeep() == mergedResponse.getSearchResults().getResultsSize())
|
||||
&& shouldEarlyTerminateWhenEnoughTrimmedResults()) {
|
||||
earlyTerminationInfo.setEarlyTerminated(true);
|
||||
earlyTerminationInfo.addToMergedEarlyTerminationReasons(
|
||||
TERMINATED_COLLECTED_ENOUGH_RESULTS);
|
||||
stats.getCounterFor(TERMINATED_GOT_EXACT_NUM_RESULTS).increment();
|
||||
} else {
|
||||
stats.getCounterFor(NOT_EARLY_TERMINATED).increment();
|
||||
}
|
||||
} else {
|
||||
stats.getCounterFor(ALREADY_EARLY_TERMINATED).increment();
|
||||
// Even if the results were already marked as early terminated, we can add additional
|
||||
// reasons for debugging (if the merged results were filtered or truncated).
|
||||
addEarlyTerminationReasons(earlyTerminationInfo, trimStats);
|
||||
}
|
||||
}
|
||||
|
||||
private void addEarlyTerminationReasons(
|
||||
EarlyTerminationInfo earlyTerminationInfo,
|
||||
TrimStats trimStats) {
|
||||
|
||||
if (trimStats.getMinIdFilterCount() > 0) {
|
||||
earlyTerminationInfo.addToMergedEarlyTerminationReasons(
|
||||
MERGING_EARLY_TERMINATION_REASON_FILTERED);
|
||||
}
|
||||
|
||||
if (trimStats.getResultsTruncatedFromTailCount() > 0) {
|
||||
earlyTerminationInfo.addToMergedEarlyTerminationReasons(
|
||||
MERGING_EARLY_TERMINATION_REASON_TRUNCATED);
|
||||
}
|
||||
}
|
||||
|
||||
private EarlyTerminationTrimmingStats getEarlyTerminationTrimmingStats() {
|
||||
if (accumulatedResponses.isMergingPartitionsWithinATier()) {
|
||||
return getEarlyTerminationTrimmingStatsForPartitions();
|
||||
} else {
|
||||
return getEarlyTerminationTrimmingStatsForTiers();
|
||||
}
|
||||
}
|
||||
|
||||
protected EarlyTerminationTrimmingStats getEarlyTerminationTrimmingStatsForPartitions() {
|
||||
return PARTITION_MERGING_EARLY_TERMINATION_TRIMMING_STATS;
|
||||
}
|
||||
|
||||
protected EarlyTerminationTrimmingStats getEarlyTerminationTrimmingStatsForTiers() {
|
||||
return TIER_MERGING_EARLY_TERMINATION_TRIMMING_STATS;
|
||||
}
|
||||
|
||||
/**
|
||||
* If we get enough results, no need to go on.
|
||||
* If one of the partitions early terminated, we can't go on or else there could be a gap.
|
||||
*/
|
||||
@Override
|
||||
public boolean shouldEarlyTerminateTierMerge(int totalResultsFromSuccessfulShards,
|
||||
boolean foundEarlyTermination) {
|
||||
|
||||
|
||||
int resultsRequested = computeNumResultsToKeep();
|
||||
|
||||
boolean shouldEarlyTerminate = foundEarlyTermination
|
||||
|| totalResultsFromSuccessfulShards >= resultsRequested;
|
||||
|
||||
if (shouldEarlyTerminate && totalResultsFromSuccessfulShards < resultsRequested) {
|
||||
RECENCY_TIER_MERGE_EARLY_TERMINATED_WITH_NOT_ENOUGH_RESULTS.increment();
|
||||
}
|
||||
|
||||
return shouldEarlyTerminate;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the min status id that has been _completely_ searched across all partitions. The
|
||||
* largest min status id across all partitions.
|
||||
*
|
||||
* @return the min searched status id found
|
||||
*/
|
||||
protected long findMinFullySearchedStatusID() {
|
||||
List<Long> minIds = accumulatedResponses.getMinIds();
|
||||
if (minIds.isEmpty()) {
|
||||
return Long.MIN_VALUE;
|
||||
}
|
||||
|
||||
if (accumulatedResponses.isMergingPartitionsWithinATier()) {
|
||||
// When merging partitions, the min ID should be the largest among the min IDs.
|
||||
return Collections.max(accumulatedResponses.getMinIds());
|
||||
} else {
|
||||
// When merging tiers, the min ID should be the smallest among the min IDs.
|
||||
return Collections.min(accumulatedResponses.getMinIds());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the max status id that has been _completely_ searched across all partitions. The
|
||||
* smallest max status id across all partitions.
|
||||
*
|
||||
* This is where we reconcile replication lag by selecting the oldest maxid from the
|
||||
* partitions searched.
|
||||
*
|
||||
* @return the max searched status id found
|
||||
*/
|
||||
protected long findMaxFullySearchedStatusID() {
|
||||
List<Long> maxIDs = accumulatedResponses.getMaxIds();
|
||||
if (maxIDs.isEmpty()) {
|
||||
return Long.MAX_VALUE;
|
||||
}
|
||||
Collections.sort(maxIDs);
|
||||
|
||||
final long newest = maxIDs.get(maxIDs.size() - 1);
|
||||
final long newestTimestamp = SnowflakeIdParser.getTimestampFromTweetId(newest);
|
||||
|
||||
for (int i = 0; i < maxIDs.size(); i++) {
|
||||
long oldest = maxIDs.get(i);
|
||||
long oldestTimestamp = SnowflakeIdParser.getTimestampFromTweetId(oldest);
|
||||
long deltaMs = newestTimestamp - oldestTimestamp;
|
||||
|
||||
if (i == 0) {
|
||||
LOG.debug("Max delta is {}", deltaMs);
|
||||
}
|
||||
|
||||
if (deltaMs < ALLOWED_REPLICATION_LAG_MS) {
|
||||
if (i != 0) {
|
||||
LOG.debug("{} partition replicas lagging more than {} ms", i, ALLOWED_REPLICATION_LAG_MS);
|
||||
}
|
||||
return oldest;
|
||||
}
|
||||
}
|
||||
|
||||
// Can't get here - by this point oldest == newest, and delta is 0.
|
||||
return newest;
|
||||
}
|
||||
|
||||
/**
|
||||
* Trim the ThriftSearchResults if we have enough results, to return the first
|
||||
* 'computeNumResultsToKeep()' number of results.
|
||||
*
|
||||
* If we don't have enough results after trimming, this function will first try to back fill
|
||||
* older results, then newer results
|
||||
*
|
||||
* @param searchResults ThriftSearchResults that hold the to be trimmed List<ThriftSearchResult>
|
||||
* @return TrimStats containing statistics about how many results being removed
|
||||
*/
|
||||
protected TrimStats trimResults(
|
||||
ThriftSearchResults searchResults,
|
||||
long mergedMin,
|
||||
long mergedMax) {
|
||||
if (!searchResults.isSetResults() || searchResults.getResultsSize() == 0) {
|
||||
// no results, no trimming needed
|
||||
return TrimStats.EMPTY_STATS;
|
||||
}
|
||||
|
||||
if (requestContext.getRequest().getSearchQuery().isSetSearchStatusIds()) {
|
||||
// Not a normal search, no trimming needed
|
||||
return TrimStats.EMPTY_STATS;
|
||||
}
|
||||
|
||||
TrimStats trimStats = new TrimStats();
|
||||
trimExactDups(searchResults, trimStats);
|
||||
|
||||
int numResultsRequested = computeNumResultsToKeep();
|
||||
if (shouldSkipTrimmingWhenNotEnoughResults(searchResults, numResultsRequested)) {
|
||||
//////////////////////////////////////////////////////////
|
||||
// We don't have enough results, let's not do trimming
|
||||
//////////////////////////////////////////////////////////
|
||||
return trimStats;
|
||||
}
|
||||
|
||||
if (accumulatedResponses.isMergingPartitionsWithinATier()) {
|
||||
trimResultsBasedSearchedRange(
|
||||
searchResults, trimStats, numResultsRequested, mergedMin, mergedMax);
|
||||
}
|
||||
|
||||
// Respect "computeNumResultsToKeep()" here, only keep "computeNumResultsToKeep()" results.
|
||||
truncateResults(searchResults, trimStats);
|
||||
|
||||
return trimStats;
|
||||
}
|
||||
|
||||
/**
|
||||
* When there's not enough results, we don't remove results based on the searched range.
|
||||
* This has a tradeoff: with this, we don't reduce our recall when we already don't have enough
|
||||
* results. However, with this, we can lose results while paginating because we return results
|
||||
* outside of the valid searched range.
|
||||
*/
|
||||
protected boolean shouldSkipTrimmingWhenNotEnoughResults(
|
||||
ThriftSearchResults searchResults, int numResultsRequested) {
|
||||
return searchResults.getResultsSize() <= numResultsRequested;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Trim results based on search range. The search range [x, y] is determined by:
|
||||
* x is the maximun of the minimun search IDs;
|
||||
* y is the minimun of the maximum search IDs.
|
||||
*
|
||||
* Ids out side of this range are removed.
|
||||
* If we do not get enough results after the removal, we add IDs back until we get enough results.
|
||||
* We first add IDs back from the older side back. If there's still not enough results,
|
||||
* we start adding IDs from the newer side back.
|
||||
*/
|
||||
private void trimResultsBasedSearchedRange(ThriftSearchResults searchResults,
|
||||
TrimStats trimStats,
|
||||
int numResultsRequested,
|
||||
long mergedMin,
|
||||
long mergedMax) {
|
||||
///////////////////////////////////////////////////////////////////
|
||||
// we have more results than requested, let's do some trimming
|
||||
///////////////////////////////////////////////////////////////////
|
||||
|
||||
// Save the original results before trimming
|
||||
List<ThriftSearchResult> originalResults = searchResults.getResults();
|
||||
|
||||
filterResultsByMergedMinMaxIds(searchResults, mergedMax, mergedMin, trimStats);
|
||||
|
||||
// This does happen. It is hard to say what we should do here so we just return the original
|
||||
// result here.
|
||||
if (searchResults.getResultsSize() == 0) {
|
||||
RECENCY_ZERO_RESULT_COUNT_AFTER_FILTERING_MAX_MIN_IDS.increment();
|
||||
searchResults.setResults(originalResults);
|
||||
|
||||
// Clean up min/mix filtered count, since we're bringing back whatever we just filtered.
|
||||
trimStats.clearMaxIdFilterCount();
|
||||
trimStats.clearMinIdFilterCount();
|
||||
|
||||
if (LOG.isDebugEnabled() || responseMessageBuilder.isDebugMode()) {
|
||||
String errMsg = "No trimming is done as filtered results is empty. "
|
||||
+ "maxId=" + mergedMax + ",minId=" + mergedMin;
|
||||
LOG.debug(errMsg);
|
||||
responseMessageBuilder.append(errMsg + "\n");
|
||||
}
|
||||
} else {
|
||||
// oops! we're trimming too many results. Let's put some back
|
||||
if (searchResults.getResultsSize() < numResultsRequested) {
|
||||
RECENCY_TRIMMED_TOO_MANY_RESULTS_COUNT.increment();
|
||||
|
||||
List<ThriftSearchResult> trimmedResults = searchResults.getResults();
|
||||
long firstTrimmedResultId = trimmedResults.get(0).getId();
|
||||
long lastTrimmedResultId = trimmedResults.get(trimmedResults.size() - 1).getId();
|
||||
|
||||
// First, try to back fill with older results
|
||||
int i = 0;
|
||||
for (; i < originalResults.size(); ++i) {
|
||||
ThriftSearchResult result = originalResults.get(i);
|
||||
if (result.getId() < lastTrimmedResultId) {
|
||||
trimmedResults.add(result);
|
||||
trimStats.decreaseMinIdFilterCount();
|
||||
if (trimmedResults.size() >= numResultsRequested) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// still not enough results? back fill with newer results
|
||||
// find the oldest of the newer results
|
||||
if (trimmedResults.size() < numResultsRequested) {
|
||||
// still not enough results? back fill with newer results
|
||||
// find the oldest of the newer results
|
||||
for (i = originalResults.size() - 1; i >= 0; --i) {
|
||||
ThriftSearchResult result = originalResults.get(i);
|
||||
if (result.getId() > firstTrimmedResultId) {
|
||||
trimmedResults.add(result);
|
||||
trimStats.decreaseMaxIdFilterCount();
|
||||
if (trimmedResults.size() >= numResultsRequested) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// newer results were added to the back of the list, re-sort
|
||||
Collections.sort(trimmedResults, ResultComparators.ID_COMPARATOR);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void setMergedMinSearchedStatusId(
|
||||
ThriftSearchResults searchResults,
|
||||
long currentMergedMin,
|
||||
boolean resultsWereTrimmed) {
|
||||
if (accumulatedResponses.getMinIds().isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
long merged;
|
||||
if (searchResults == null
|
||||
|| !searchResults.isSetResults()
|
||||
|| searchResults.getResultsSize() == 0) {
|
||||
merged = currentMergedMin;
|
||||
} else {
|
||||
List<ThriftSearchResult> results = searchResults.getResults();
|
||||
long firstResultId = results.get(0).getId();
|
||||
long lastResultId = results.get(results.size() - 1).getId();
|
||||
merged = Math.min(firstResultId, lastResultId);
|
||||
if (!resultsWereTrimmed) {
|
||||
// If the results were trimmed, we want to set minSearchedStatusID to the smallest
|
||||
// tweet ID in the response. Otherwise, we want to take the min between that, and
|
||||
// the current minSearchedStatusID.
|
||||
merged = Math.min(merged, currentMergedMin);
|
||||
}
|
||||
}
|
||||
|
||||
searchResults.setMinSearchedStatusID(merged);
|
||||
}
|
||||
|
||||
private void setMergedMaxSearchedStatusId(
|
||||
ThriftSearchResults searchResults,
|
||||
long currentMergedMax) {
|
||||
if (accumulatedResponses.getMaxIds().isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
long merged;
|
||||
if (searchResults == null
|
||||
|| !searchResults.isSetResults()
|
||||
|| searchResults.getResultsSize() == 0) {
|
||||
merged = currentMergedMax;
|
||||
} else {
|
||||
List<ThriftSearchResult> results = searchResults.getResults();
|
||||
long firstResultId = results.get(0).getId();
|
||||
long lastResultId = results.get(results.size() - 1).getId();
|
||||
long maxResultId = Math.max(firstResultId, lastResultId);
|
||||
merged = Math.max(maxResultId, currentMergedMax);
|
||||
}
|
||||
|
||||
searchResults.setMaxSearchedStatusID(merged);
|
||||
}
|
||||
|
||||
protected static void filterResultsByMergedMinMaxIds(
|
||||
ThriftSearchResults results, long maxStatusId, long minStatusId, TrimStats trimStats) {
|
||||
List<ThriftSearchResult> trimedResults =
|
||||
Lists.newArrayListWithCapacity(results.getResultsSize());
|
||||
|
||||
for (ThriftSearchResult result : results.getResults()) {
|
||||
long statusId = result.getId();
|
||||
|
||||
if (statusId > maxStatusId) {
|
||||
trimStats.increaseMaxIdFilterCount();
|
||||
} else if (statusId < minStatusId) {
|
||||
trimStats.increaseMinIdFilterCount();
|
||||
} else {
|
||||
trimedResults.add(result);
|
||||
}
|
||||
}
|
||||
|
||||
results.setResults(trimedResults);
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,268 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.mergers;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Function;
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.twitter.search.common.constants.thriftjava.ThriftLanguage;
|
||||
import com.twitter.search.common.metrics.SearchCounter;
|
||||
import com.twitter.search.common.metrics.SearchTimerStats;
|
||||
import com.twitter.search.common.util.earlybird.EarlybirdResponseUtil;
|
||||
import com.twitter.search.common.util.earlybird.ResultsUtil;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchRankingMode;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
||||
import com.twitter.search.earlybird_root.collectors.RelevanceMergeCollector;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
/**
|
||||
* Merger class to merge relevance search EarlybirdResponse objects
|
||||
*/
|
||||
public class RelevanceResponseMerger extends EarlybirdResponseMerger {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(RelevanceResponseMerger.class);
|
||||
|
||||
private static final SearchTimerStats TIMER =
|
||||
SearchTimerStats.export("merge_relevance", TimeUnit.NANOSECONDS, false, true);
|
||||
|
||||
private static final SearchCounter RELVEANCE_TIER_MERGE_EARLY_TERMINATED_WITH_NOT_ENOUGH_RESULTS =
|
||||
SearchCounter.export("merger_relevance_tier_merge_early_terminated_with_not_enough_results");
|
||||
|
||||
private static final String PARTITION_NUM_RESULTS_COUNTER_SKIP_STATS =
|
||||
"merger_relevance_post_trimmed_results_skip_stat_tier_%s_partition_%d";
|
||||
|
||||
@VisibleForTesting
|
||||
public static final String PARTITION_NUM_RESULTS_COUNTER_NAME_FORMAT =
|
||||
"merger_relevance_post_trimmed_results_from_tier_%s_partition_%d";
|
||||
|
||||
protected static final Function<EarlybirdResponse, Map<ThriftLanguage, Integer>> LANG_MAP_GETTER =
|
||||
response -> response.getSearchResults() == null
|
||||
? null
|
||||
: response.getSearchResults().getLanguageHistogram();
|
||||
|
||||
private static final double SUCCESSFUL_RESPONSE_THRESHOLD = 0.8;
|
||||
|
||||
private final EarlybirdFeatureSchemaMerger featureSchemaMerger;
|
||||
|
||||
// The number of partitions are not meaningful when it is invoked through multi-tier merging.
|
||||
private final int numPartitions;
|
||||
|
||||
public RelevanceResponseMerger(EarlybirdRequestContext requestContext,
|
||||
List<Future<EarlybirdResponse>> responses,
|
||||
ResponseAccumulator mode,
|
||||
EarlybirdFeatureSchemaMerger featureSchemaMerger,
|
||||
int numPartitions) {
|
||||
super(requestContext, responses, mode);
|
||||
this.featureSchemaMerger = Preconditions.checkNotNull(featureSchemaMerger);
|
||||
this.numPartitions = numPartitions;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected double getDefaultSuccessResponseThreshold() {
|
||||
return SUCCESSFUL_RESPONSE_THRESHOLD;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SearchTimerStats getMergedResponseTimer() {
|
||||
return TIMER;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected EarlybirdResponse internalMerge(EarlybirdResponse mergedResponse) {
|
||||
final ThriftSearchQuery searchQuery = requestContext.getRequest().getSearchQuery();
|
||||
long maxId = findMaxFullySearchedStatusID();
|
||||
long minId = findMinFullySearchedStatusID();
|
||||
|
||||
Preconditions.checkNotNull(searchQuery);
|
||||
Preconditions.checkState(searchQuery.isSetRankingMode());
|
||||
Preconditions.checkState(searchQuery.getRankingMode() == ThriftSearchRankingMode.RELEVANCE);
|
||||
|
||||
// First get the results in score order (the default comparator for this merge collector).
|
||||
RelevanceMergeCollector collector = new RelevanceMergeCollector(responses.size());
|
||||
int totalResultSize = addResponsesToCollector(collector);
|
||||
ThriftSearchResults searchResults = collector.getAllSearchResults();
|
||||
|
||||
TrimStats trimStats = trimResults(searchResults);
|
||||
featureSchemaMerger.collectAndSetFeatureSchemaInResponse(
|
||||
searchResults,
|
||||
requestContext,
|
||||
"merger_relevance_tier",
|
||||
accumulatedResponses.getSuccessResponses());
|
||||
|
||||
mergedResponse.setSearchResults(searchResults);
|
||||
|
||||
searchResults = mergedResponse.getSearchResults();
|
||||
searchResults
|
||||
.setHitCounts(aggregateHitCountMap())
|
||||
.setLanguageHistogram(aggregateLanguageHistograms());
|
||||
|
||||
if (!accumulatedResponses.getMaxIds().isEmpty()) {
|
||||
searchResults.setMaxSearchedStatusID(maxId);
|
||||
}
|
||||
|
||||
if (!accumulatedResponses.getMinIds().isEmpty()) {
|
||||
searchResults.setMinSearchedStatusID(minId);
|
||||
}
|
||||
|
||||
LOG.debug("Hits: {} Removed duplicates: {}", totalResultSize, trimStats.getRemovedDupsCount());
|
||||
LOG.debug("Hash Partition'ed Earlybird call completed successfully: {}", mergedResponse);
|
||||
|
||||
publishNumResultsFromPartitionStatistics(mergedResponse);
|
||||
|
||||
return mergedResponse;
|
||||
}
|
||||
|
||||
/**
|
||||
* If any of the partitions has an early termination, the tier merge must also early terminate.
|
||||
*
|
||||
* If a partition early terminated (we haven't fully searched that partition), and we instead
|
||||
* moved onto the next tier, there will be a gap of unsearched results.
|
||||
*
|
||||
* If our early termination condition was only if we had enough results, we could get bad quality
|
||||
* results by only looking at 20 hits when asking for 20 results.
|
||||
*/
|
||||
@Override
|
||||
public boolean shouldEarlyTerminateTierMerge(int totalResultsFromSuccessfulShards,
|
||||
boolean foundEarlyTermination) {
|
||||
|
||||
// Don't use computeNumResultsToKeep because if returnAllResults is true, it will be
|
||||
// Integer.MAX_VALUE and we will always log a stat that we didn't get enough results
|
||||
int resultsRequested;
|
||||
EarlybirdRequest request = requestContext.getRequest();
|
||||
if (request.isSetNumResultsToReturnAtRoot()) {
|
||||
resultsRequested = request.getNumResultsToReturnAtRoot();
|
||||
} else {
|
||||
resultsRequested = request.getSearchQuery().getCollectorParams().getNumResultsToReturn();
|
||||
}
|
||||
if (foundEarlyTermination && totalResultsFromSuccessfulShards < resultsRequested) {
|
||||
RELVEANCE_TIER_MERGE_EARLY_TERMINATED_WITH_NOT_ENOUGH_RESULTS.increment();
|
||||
}
|
||||
|
||||
return foundEarlyTermination;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge language histograms from all queries.
|
||||
*
|
||||
* @return Merge per-language count map.
|
||||
*/
|
||||
private Map<ThriftLanguage, Integer> aggregateLanguageHistograms() {
|
||||
Map<ThriftLanguage, Integer> totalLangCounts = new TreeMap<>(
|
||||
ResultsUtil.aggregateCountMap(
|
||||
accumulatedResponses.getSuccessResponses(), LANG_MAP_GETTER));
|
||||
if (totalLangCounts.size() > 0) {
|
||||
if (responseMessageBuilder.isDebugMode()) {
|
||||
responseMessageBuilder.append("Language Distrbution:\n");
|
||||
int count = 0;
|
||||
for (Map.Entry<ThriftLanguage, Integer> entry : totalLangCounts.entrySet()) {
|
||||
responseMessageBuilder.append(
|
||||
String.format(" %10s:%6d", entry.getKey(), entry.getValue()));
|
||||
if (++count % 5 == 0) {
|
||||
responseMessageBuilder.append("\n");
|
||||
}
|
||||
}
|
||||
responseMessageBuilder.append("\n");
|
||||
}
|
||||
}
|
||||
return totalLangCounts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the min status id that has been searched. Since no results are trimmed for Relevance mode,
|
||||
* it should be the smallest among the min IDs.
|
||||
*/
|
||||
private long findMinFullySearchedStatusID() {
|
||||
// The min ID should be the smallest among the min IDs
|
||||
return accumulatedResponses.getMinIds().isEmpty() ? 0
|
||||
: Collections.min(accumulatedResponses.getMinIds());
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the max status id that has been searched. Since no results are trimmed for Relevance mode,
|
||||
* it should be the largest among the max IDs.
|
||||
*/
|
||||
private long findMaxFullySearchedStatusID() {
|
||||
// The max ID should be the largest among the max IDs
|
||||
return accumulatedResponses.getMaxIds().isEmpty() ? 0
|
||||
: Collections.max(accumulatedResponses.getMaxIds());
|
||||
}
|
||||
|
||||
/**
|
||||
* Return all the searchResults except duplicates.
|
||||
*
|
||||
* @param searchResults ThriftSearchResults that hold the to be trimmed List<ThriftSearchResult>
|
||||
* @return TrimStats containing statistics about how many results being removed
|
||||
*/
|
||||
private TrimStats trimResults(ThriftSearchResults searchResults) {
|
||||
if (!searchResults.isSetResults() || searchResults.getResultsSize() == 0) {
|
||||
// no results, no trimming needed
|
||||
return TrimStats.EMPTY_STATS;
|
||||
}
|
||||
|
||||
if (requestContext.getRequest().getSearchQuery().isSetSearchStatusIds()) {
|
||||
// Not a normal search, no trimming needed
|
||||
return TrimStats.EMPTY_STATS;
|
||||
}
|
||||
|
||||
TrimStats trimStats = new TrimStats();
|
||||
trimExactDups(searchResults, trimStats);
|
||||
|
||||
truncateResults(searchResults, trimStats);
|
||||
|
||||
return trimStats;
|
||||
}
|
||||
|
||||
private void publishNumResultsFromPartitionStatistics(EarlybirdResponse mergedResponse) {
|
||||
|
||||
// Keep track of all of the results that were kept after merging
|
||||
Set<Long> mergedResults =
|
||||
EarlybirdResponseUtil.getResults(mergedResponse).getResults()
|
||||
.stream()
|
||||
.map(result -> result.getId())
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
// For each successful response (pre merge), count how many of its results were kept post merge.
|
||||
// Increment the appropriate stat.
|
||||
for (EarlybirdResponse response : accumulatedResponses.getSuccessResponses()) {
|
||||
if (!response.isSetEarlybirdServerStats()) {
|
||||
continue;
|
||||
}
|
||||
int numResultsKept = 0;
|
||||
for (ThriftSearchResult result
|
||||
: EarlybirdResponseUtil.getResults(response).getResults()) {
|
||||
if (mergedResults.contains(result.getId())) {
|
||||
++numResultsKept;
|
||||
}
|
||||
}
|
||||
|
||||
// We only update partition stats when the partition ID looks sane.
|
||||
String tierName = response.getEarlybirdServerStats().getTierName();
|
||||
int partition = response.getEarlybirdServerStats().getPartition();
|
||||
if (partition >= 0 && partition < numPartitions) {
|
||||
SearchCounter.export(String.format(PARTITION_NUM_RESULTS_COUNTER_NAME_FORMAT,
|
||||
tierName,
|
||||
partition))
|
||||
.add(numResultsKept);
|
||||
} else {
|
||||
SearchCounter.export(String.format(PARTITION_NUM_RESULTS_COUNTER_SKIP_STATS,
|
||||
tierName,
|
||||
partition)).increment();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,356 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.mergers;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.EnumMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import com.twitter.search.common.metrics.SearchCounter;
|
||||
import com.twitter.search.common.util.earlybird.ResponseMergerUtils;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestType;
|
||||
|
||||
/**
|
||||
* Accumulates EarlybirdResponse's and determines when to early terminate.
|
||||
*/
|
||||
public abstract class ResponseAccumulator {
|
||||
|
||||
@VisibleForTesting
|
||||
static class MinMaxSearchedIdStats {
|
||||
/** How many results did we actually check */
|
||||
private final SearchCounter checkedMaxMinSearchedStatusId;
|
||||
private final SearchCounter unsetMaxSearchedStatusId;
|
||||
private final SearchCounter unsetMinSearchedStatusId;
|
||||
private final SearchCounter unsetMaxAndMinSearchedStatusId;
|
||||
private final SearchCounter sameMinMaxSearchedIdWithoutResults;
|
||||
private final SearchCounter sameMinMaxSearchedIdWithOneResult;
|
||||
private final SearchCounter sameMinMaxSearchedIdWithResults;
|
||||
private final SearchCounter flippedMinMaxSearchedId;
|
||||
|
||||
MinMaxSearchedIdStats(EarlybirdRequestType requestType) {
|
||||
String statPrefix = "merge_helper_" + requestType.getNormalizedName();
|
||||
|
||||
checkedMaxMinSearchedStatusId = SearchCounter.export(statPrefix
|
||||
+ "_max_min_searched_id_checks");
|
||||
unsetMaxSearchedStatusId = SearchCounter.export(statPrefix
|
||||
+ "_unset_max_searched_status_id");
|
||||
unsetMinSearchedStatusId = SearchCounter.export(statPrefix
|
||||
+ "_unset_min_searched_status_id");
|
||||
unsetMaxAndMinSearchedStatusId = SearchCounter.export(statPrefix
|
||||
+ "_unset_max_and_min_searched_status_id");
|
||||
sameMinMaxSearchedIdWithoutResults = SearchCounter.export(statPrefix
|
||||
+ "_same_min_max_searched_id_without_results");
|
||||
sameMinMaxSearchedIdWithOneResult = SearchCounter.export(statPrefix
|
||||
+ "_same_min_max_searched_id_with_one_results");
|
||||
sameMinMaxSearchedIdWithResults = SearchCounter.export(statPrefix
|
||||
+ "_same_min_max_searched_id_with_results");
|
||||
flippedMinMaxSearchedId = SearchCounter.export(statPrefix
|
||||
+ "_flipped_min_max_searched_id");
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
SearchCounter getCheckedMaxMinSearchedStatusId() {
|
||||
return checkedMaxMinSearchedStatusId;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
SearchCounter getFlippedMinMaxSearchedId() {
|
||||
return flippedMinMaxSearchedId;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
SearchCounter getUnsetMaxSearchedStatusId() {
|
||||
return unsetMaxSearchedStatusId;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
SearchCounter getUnsetMinSearchedStatusId() {
|
||||
return unsetMinSearchedStatusId;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
SearchCounter getUnsetMaxAndMinSearchedStatusId() {
|
||||
return unsetMaxAndMinSearchedStatusId;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
SearchCounter getSameMinMaxSearchedIdWithoutResults() {
|
||||
return sameMinMaxSearchedIdWithoutResults;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
SearchCounter getSameMinMaxSearchedIdWithOneResult() {
|
||||
return sameMinMaxSearchedIdWithOneResult;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
SearchCounter getSameMinMaxSearchedIdWithResults() {
|
||||
return sameMinMaxSearchedIdWithResults;
|
||||
}
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
static final Map<EarlybirdRequestType, MinMaxSearchedIdStats> MIN_MAX_SEARCHED_ID_STATS_MAP;
|
||||
static {
|
||||
EnumMap<EarlybirdRequestType, MinMaxSearchedIdStats> statsMap
|
||||
= Maps.newEnumMap(EarlybirdRequestType.class);
|
||||
for (EarlybirdRequestType earlybirdRequestType : EarlybirdRequestType.values()) {
|
||||
statsMap.put(earlybirdRequestType, new MinMaxSearchedIdStats(earlybirdRequestType));
|
||||
}
|
||||
|
||||
MIN_MAX_SEARCHED_ID_STATS_MAP = Maps.immutableEnumMap(statsMap);
|
||||
}
|
||||
|
||||
// Merge has encountered at least one early terminated response.
|
||||
private boolean foundEarlyTermination = false;
|
||||
// Empty but successful response counter (E.g. when a tier or partition is skipped)
|
||||
private int successfulEmptyResponseCount = 0;
|
||||
// The list of the successful responses from all earlybird futures. This does not include empty
|
||||
// responses resulted from null requests.
|
||||
private final List<EarlybirdResponse> successResponses = new ArrayList<>();
|
||||
// The list of the error responses from all earlybird futures.
|
||||
private final List<EarlybirdResponse> errorResponses = new ArrayList<>();
|
||||
// the list of max statusIds seen in each earlybird.
|
||||
private final List<Long> maxIds = new ArrayList<>();
|
||||
// the list of min statusIds seen in each earlybird.
|
||||
private final List<Long> minIds = new ArrayList<>();
|
||||
|
||||
private int numResponses = 0;
|
||||
|
||||
private int numResultsAccumulated = 0;
|
||||
private int numSearchedSegments = 0;
|
||||
|
||||
/**
|
||||
* Returns a string that can be used for logging to identify a single response out of all the
|
||||
* responses that are being merged.
|
||||
*
|
||||
* @param responseIndex the index of a response's partition or tier, depending on the type of
|
||||
* responses being accumulated.
|
||||
* @param numTotalResponses the total number of partitions or tiers that are being merged.
|
||||
*/
|
||||
public abstract String getNameForLogging(int responseIndex, int numTotalResponses);
|
||||
|
||||
/**
|
||||
* Returns a string that is used to export per-EarlybirdResponseCode stats for partitions and tiers.
|
||||
*
|
||||
* @param responseIndex the index of of a response's partition or tier.
|
||||
* @param numTotalResponses the total number of partitions or tiers that are being merged.
|
||||
* @return a string that is used to export per-EarlybirdResponseCode stats for partitions and tiers.
|
||||
*/
|
||||
public abstract String getNameForEarlybirdResponseCodeStats(
|
||||
int responseIndex, int numTotalResponses);
|
||||
|
||||
abstract boolean shouldEarlyTerminateMerge(EarlyTerminateTierMergePredicate merger);
|
||||
|
||||
/**
|
||||
* Add a EarlybirdResponse
|
||||
*/
|
||||
public void addResponse(EarlybirdResponseDebugMessageBuilder responseMessageBuilder,
|
||||
EarlybirdRequest request,
|
||||
EarlybirdResponse response) {
|
||||
numResponses++;
|
||||
numSearchedSegments += response.getNumSearchedSegments();
|
||||
|
||||
if (isSkippedResponse(response)) {
|
||||
// This is an empty response, no processing is required, just need to update statistics.
|
||||
successfulEmptyResponseCount++;
|
||||
handleSkippedResponse(response.getResponseCode());
|
||||
} else if (isErrorResponse(response)) {
|
||||
errorResponses.add(response);
|
||||
handleErrorResponse(response);
|
||||
} else {
|
||||
handleSuccessfulResponse(responseMessageBuilder, request, response);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isErrorResponse(EarlybirdResponse response) {
|
||||
return !response.isSetResponseCode()
|
||||
|| response.getResponseCode() != EarlybirdResponseCode.SUCCESS;
|
||||
}
|
||||
|
||||
private boolean isSkippedResponse(EarlybirdResponse response) {
|
||||
return response.isSetResponseCode()
|
||||
&& (response.getResponseCode() == EarlybirdResponseCode.PARTITION_SKIPPED
|
||||
|| response.getResponseCode() == EarlybirdResponseCode.TIER_SKIPPED);
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a response corresponding to a skipped partition or skipped tier.
|
||||
*/
|
||||
protected abstract void handleSkippedResponse(EarlybirdResponseCode responseCode);
|
||||
|
||||
/**
|
||||
* Handle an error response
|
||||
*/
|
||||
protected abstract void handleErrorResponse(EarlybirdResponse response);
|
||||
|
||||
/**
|
||||
* Subclasses can override this to perform more successful response handling.
|
||||
*/
|
||||
protected void extraSuccessfulResponseHandler(EarlybirdResponse response) { }
|
||||
|
||||
/**
|
||||
* Whether the helper is for merging results from partitions within a single tier.
|
||||
*/
|
||||
protected final boolean isMergingPartitionsWithinATier() {
|
||||
return !isMergingAcrossTiers();
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether the helper is for merging results across different tiers.
|
||||
*/
|
||||
protected abstract boolean isMergingAcrossTiers();
|
||||
|
||||
|
||||
/**
|
||||
* Record a successful response.
|
||||
*/
|
||||
public final void handleSuccessfulResponse(
|
||||
EarlybirdResponseDebugMessageBuilder responseMessageBuilder,
|
||||
EarlybirdRequest request,
|
||||
EarlybirdResponse response) {
|
||||
successResponses.add(response);
|
||||
if (response.isSetSearchResults()) {
|
||||
ThriftSearchResults searchResults = response.getSearchResults();
|
||||
numResultsAccumulated += searchResults.getResultsSize();
|
||||
|
||||
recordMinMaxSearchedIdsAndUpdateStats(responseMessageBuilder, request, response,
|
||||
searchResults);
|
||||
}
|
||||
if (response.isSetEarlyTerminationInfo()
|
||||
&& response.getEarlyTerminationInfo().isEarlyTerminated()) {
|
||||
foundEarlyTermination = true;
|
||||
}
|
||||
extraSuccessfulResponseHandler(response);
|
||||
}
|
||||
|
||||
private void recordMinMaxSearchedIdsAndUpdateStats(
|
||||
EarlybirdResponseDebugMessageBuilder responseMessageBuidler,
|
||||
EarlybirdRequest request,
|
||||
EarlybirdResponse response,
|
||||
ThriftSearchResults searchResults) {
|
||||
|
||||
boolean isMaxIdSet = searchResults.isSetMaxSearchedStatusID();
|
||||
boolean isMinIdSet = searchResults.isSetMinSearchedStatusID();
|
||||
|
||||
if (isMaxIdSet) {
|
||||
maxIds.add(searchResults.getMaxSearchedStatusID());
|
||||
}
|
||||
if (isMinIdSet) {
|
||||
minIds.add(searchResults.getMinSearchedStatusID());
|
||||
}
|
||||
|
||||
updateMinMaxIdStats(responseMessageBuidler, request, response, searchResults, isMaxIdSet,
|
||||
isMinIdSet);
|
||||
}
|
||||
|
||||
private void updateMinMaxIdStats(
|
||||
EarlybirdResponseDebugMessageBuilder responseMessageBuilder,
|
||||
EarlybirdRequest request,
|
||||
EarlybirdResponse response,
|
||||
ThriftSearchResults searchResults,
|
||||
boolean isMaxIdSet,
|
||||
boolean isMinIdSet) {
|
||||
// Now just track the stats.
|
||||
EarlybirdRequestType requestType = EarlybirdRequestType.of(request);
|
||||
MinMaxSearchedIdStats minMaxSearchedIdStats = MIN_MAX_SEARCHED_ID_STATS_MAP.get(requestType);
|
||||
|
||||
minMaxSearchedIdStats.checkedMaxMinSearchedStatusId.increment();
|
||||
if (isMaxIdSet && isMinIdSet) {
|
||||
if (searchResults.getMinSearchedStatusID() > searchResults.getMaxSearchedStatusID()) {
|
||||
// We do not expect this case to happen in production.
|
||||
minMaxSearchedIdStats.flippedMinMaxSearchedId.increment();
|
||||
} else if (searchResults.getResultsSize() == 0
|
||||
&& searchResults.getMaxSearchedStatusID() == searchResults.getMinSearchedStatusID()) {
|
||||
minMaxSearchedIdStats.sameMinMaxSearchedIdWithoutResults.increment();
|
||||
responseMessageBuilder.debugVerbose(
|
||||
"Got no results, and same min/max searched ids. Request: %s, Response: %s",
|
||||
request, response);
|
||||
} else if (searchResults.getResultsSize() == 1
|
||||
&& searchResults.getMaxSearchedStatusID() == searchResults.getMinSearchedStatusID()) {
|
||||
minMaxSearchedIdStats.sameMinMaxSearchedIdWithOneResult.increment();
|
||||
responseMessageBuilder.debugVerbose(
|
||||
"Got one results, and same min/max searched ids. Request: %s, Response: %s",
|
||||
request, response);
|
||||
} else if (searchResults.getMaxSearchedStatusID()
|
||||
== searchResults.getMinSearchedStatusID()) {
|
||||
minMaxSearchedIdStats.sameMinMaxSearchedIdWithResults.increment();
|
||||
responseMessageBuilder.debugVerbose(
|
||||
"Got multiple results, and same min/max searched ids. Request: %s, Response: %s",
|
||||
request, response);
|
||||
}
|
||||
} else if (!isMaxIdSet && isMinIdSet) {
|
||||
// We do not expect this case to happen in production.
|
||||
minMaxSearchedIdStats.unsetMaxSearchedStatusId.increment();
|
||||
responseMessageBuilder.debugVerbose(
|
||||
"Got unset maxSearchedStatusID. Request: %s, Response: %s", request, response);
|
||||
} else if (isMaxIdSet && !isMinIdSet) {
|
||||
// We do not expect this case to happen in production.
|
||||
minMaxSearchedIdStats.unsetMinSearchedStatusId.increment();
|
||||
responseMessageBuilder.debugVerbose(
|
||||
"Got unset minSearchedStatusID. Request: %s, Response: %s", request, response);
|
||||
} else {
|
||||
Preconditions.checkState(!isMaxIdSet && !isMinIdSet);
|
||||
minMaxSearchedIdStats.unsetMaxAndMinSearchedStatusId.increment();
|
||||
responseMessageBuilder.debugVerbose(
|
||||
"Got unset maxSearchedStatusID and minSearchedStatusID. Request: %s, Response: %s",
|
||||
request, response);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return partition counts with number of partitions, number of successful responses, and list of
|
||||
* responses per tier.
|
||||
*/
|
||||
public abstract AccumulatedResponses.PartitionCounts getPartitionCounts();
|
||||
|
||||
public final AccumulatedResponses getAccumulatedResults() {
|
||||
return new AccumulatedResponses(successResponses,
|
||||
errorResponses,
|
||||
maxIds,
|
||||
minIds,
|
||||
ResponseMergerUtils.mergeEarlyTerminationInfo(successResponses),
|
||||
isMergingAcrossTiers(),
|
||||
getPartitionCounts(),
|
||||
getNumSearchedSegments());
|
||||
}
|
||||
|
||||
// Getters are only intended to be used by subclasses. Other users should get data from
|
||||
// AccumulatedResponses
|
||||
|
||||
int getNumResponses() {
|
||||
return numResponses;
|
||||
}
|
||||
|
||||
int getNumSearchedSegments() {
|
||||
return numSearchedSegments;
|
||||
}
|
||||
|
||||
List<EarlybirdResponse> getSuccessResponses() {
|
||||
return successResponses;
|
||||
}
|
||||
|
||||
int getNumResultsAccumulated() {
|
||||
return numResultsAccumulated;
|
||||
}
|
||||
|
||||
int getSuccessfulEmptyResponseCount() {
|
||||
return successfulEmptyResponseCount;
|
||||
}
|
||||
|
||||
boolean foundError() {
|
||||
return !errorResponses.isEmpty();
|
||||
}
|
||||
|
||||
boolean foundEarlyTermination() {
|
||||
return foundEarlyTermination;
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,297 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.mergers;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
import com.twitter.search.common.metrics.SearchTimerStats;
|
||||
import com.twitter.search.common.schema.earlybird.EarlybirdCluster;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
/**
|
||||
* A RecencyResponseMerger that prioritizes not losing results during pagination.
|
||||
* As of now, this merger is used by Gnip to make sure that scrolling returns all results.
|
||||
*
|
||||
* The logic used for merging partitions is a bit tricky, because on one hand, we want to make sure
|
||||
* that we do miss results on the next pagination request; on the other hand, we want to return as
|
||||
* many results as we can, and we want to set the minSearchedStatusID of the merged response as low
|
||||
* as we can, in order to minimize the number of pagination requests.
|
||||
*
|
||||
* The merging logic is:
|
||||
*
|
||||
* Realtime cluster:
|
||||
* 1. merge results from all partitions
|
||||
* 2. if at least one partition response is early-terminated, set earlyTerminated = true
|
||||
* on the merged response
|
||||
* 3. set trimmingMinId = max(minSearchedStatusIDs of all partition responses)
|
||||
* 4. trim all results to trimmingMinId
|
||||
* 5. set minSearchedStatusID on the merged response to trimmingMinId
|
||||
* 6. if we have more than numRequested results:
|
||||
* - keep only the newest numRequested results
|
||||
* - set minSearchedStatusID of the merged response to the lowest tweet ID in the response
|
||||
* 7. if at least one partition response is not early-terminated, set
|
||||
* tierBottomId = max(minSearchedStatusIDs of all non-early-terminated responses)
|
||||
* (otherwise, set tierBottomId to some undefined value: -1, Long.MAX_VALUE, etc.)
|
||||
* 8. if minSearchedStatusID of the merged response is the same as tierBottomId,
|
||||
* clear the early-termination flag on the merged response
|
||||
*
|
||||
* The logic in steps 7 and 8 can be a little tricky to understand. They basically say: when we've
|
||||
* exhausted the "least deep" partition in the realtime cluster, it's time to move to the full
|
||||
* archive cluster (if we keep going past the "least deep" partition, we might miss results).
|
||||
*
|
||||
* Full archive cluster:
|
||||
* 1. merge results from all partitions
|
||||
* 2. if at least one partition response is early-terminated, set earlyTerminated = true
|
||||
* on the merged response
|
||||
* 3. set trimmingMinId to:
|
||||
* - max(minSearchedStatusIDs of early-terminated responses), if at least one partition response
|
||||
* is early-terminated
|
||||
* - min(minSearchedStatusIDs of all responses), if all partition responses are not
|
||||
* early-terminated
|
||||
* 4. trim all results to trimmingMinId
|
||||
* 5. set minSearchedStatusID of the merged response to trimmingMinId
|
||||
* 6. if we have more than numRequested results:
|
||||
* - keep only the newest numRequested results
|
||||
* - set minSearchedStatusID of the merged response to the lowest tweet ID in the response
|
||||
*
|
||||
* The logic in step 3 can be a little tricky to understand. On one hand, if we always set
|
||||
* trimmingMinId to the highest minSearchedStatusID, then some tweets at the very bottom of some
|
||||
* partitions will never be returned. Consider the case:
|
||||
*
|
||||
* partition 1 has tweets 10, 8, 6
|
||||
* partition 2 has tweets 9, 7, 5
|
||||
*
|
||||
* In this case, we would always trim all results to minId = 6, and tweet 5 would never be returned.
|
||||
*
|
||||
* On the other hand, if we always set trimmingMinId to the lowest minSearchedStatusID, then we
|
||||
* might miss tweets from partitions that early-terminated. Consider the case:
|
||||
*
|
||||
* partition 1 has tweets 10, 5, 3, 1 that match our query
|
||||
* partition 2 has tweets 9, 8, 7, 6, 2 that match our query
|
||||
*
|
||||
* If we ask for 3 results, than partition 1 will return tweets 10, 5, 3, and partition 2 will
|
||||
* return tweets 9, 8, 7. If we set trimmingMinId = min(minSearchedStatusIDs), then the next
|
||||
* pagination request will have [max_id = 2], and we will miss tweet 6.
|
||||
*
|
||||
* So the intuition here is that if we have an early-terminated response, we cannot set
|
||||
* trimmingMinId to something lower than the minSearchedStatusID returned by that partition
|
||||
* (otherwise we might miss results from that partition). However, if we've exhausted all
|
||||
* partitions, then it's OK to not trim any result, because tiers do not intersect, so we will not
|
||||
* miss any result from the next tier once we get there.
|
||||
*/
|
||||
public class StrictRecencyResponseMerger extends RecencyResponseMerger {
|
||||
private static final SearchTimerStats STRICT_RECENCY_TIMER_AVG =
|
||||
SearchTimerStats.export("merge_recency_strict", TimeUnit.NANOSECONDS, false, true);
|
||||
|
||||
@VisibleForTesting
|
||||
static final EarlyTerminationTrimmingStats PARTITION_MERGING_EARLY_TERMINATION_TRIMMING_STATS =
|
||||
new EarlyTerminationTrimmingStats("strict_recency_partition_merging");
|
||||
|
||||
@VisibleForTesting
|
||||
static final EarlyTerminationTrimmingStats TIER_MERGING_EARLY_TERMINATION_TRIMMING_STATS =
|
||||
new EarlyTerminationTrimmingStats("strict_recency_tier_merging");
|
||||
|
||||
private final EarlybirdCluster cluster;
|
||||
|
||||
public StrictRecencyResponseMerger(EarlybirdRequestContext requestContext,
|
||||
List<Future<EarlybirdResponse>> responses,
|
||||
ResponseAccumulator mode,
|
||||
EarlybirdFeatureSchemaMerger featureSchemaMerger,
|
||||
EarlybirdCluster cluster) {
|
||||
super(requestContext, responses, mode, featureSchemaMerger);
|
||||
this.cluster = cluster;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SearchTimerStats getMergedResponseTimer() {
|
||||
return STRICT_RECENCY_TIMER_AVG;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unlike {@link com.twitter.search.earlybird_root.mergers.RecencyResponseMerger}, this method
|
||||
* takes a much simpler approach by just taking the max of the maxSearchedStatusIds.
|
||||
*
|
||||
* Also, when no maxSearchedStatusId is available at all, Long.MIN_VALUE is used instead of
|
||||
* Long.MAX_VALUE. This ensures that we don't return any result in these cases.
|
||||
*/
|
||||
@Override
|
||||
protected long findMaxFullySearchedStatusID() {
|
||||
return accumulatedResponses.getMaxIds().isEmpty()
|
||||
? Long.MIN_VALUE : Collections.max(accumulatedResponses.getMaxIds());
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is subtly different from the base class version: when no minSearchedStatusId is
|
||||
* available at all, Long.MAX_VALUE is used instead of Long.MIN_VALUE. This ensures that we
|
||||
* don't return any result in these cases.
|
||||
*/
|
||||
@Override
|
||||
protected long findMinFullySearchedStatusID() {
|
||||
List<Long> minIds = accumulatedResponses.getMinIds();
|
||||
if (minIds.isEmpty()) {
|
||||
return Long.MAX_VALUE;
|
||||
}
|
||||
|
||||
if (accumulatedResponses.isMergingPartitionsWithinATier()) {
|
||||
return getTrimmingMinId();
|
||||
}
|
||||
|
||||
// When merging tiers, the min ID should be the smallest among the min IDs.
|
||||
return Collections.min(minIds);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TrimStats trimResults(
|
||||
ThriftSearchResults searchResults, long mergedMin, long mergedMax) {
|
||||
if (!searchResults.isSetResults() || searchResults.getResultsSize() == 0) {
|
||||
// no results, no trimming needed
|
||||
return TrimStats.EMPTY_STATS;
|
||||
}
|
||||
|
||||
TrimStats trimStats = new TrimStats();
|
||||
trimExactDups(searchResults, trimStats);
|
||||
filterResultsByMergedMinMaxIds(searchResults, mergedMax, mergedMin, trimStats);
|
||||
int numResults = computeNumResultsToKeep();
|
||||
if (searchResults.getResultsSize() > numResults) {
|
||||
trimStats.setResultsTruncatedFromTailCount(searchResults.getResultsSize() - numResults);
|
||||
searchResults.setResults(searchResults.getResults().subList(0, numResults));
|
||||
}
|
||||
|
||||
return trimStats;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is different from the base class version because when minResultId is bigger
|
||||
* than currentMergedMin, we always take minResultId.
|
||||
* If we don't do this, we would lose results.
|
||||
*
|
||||
* Illustration with an example. Assuming we are outside of the lag threshold.
|
||||
* Num results requested: 3
|
||||
* Response 1: min: 100 max: 900 results: 400, 500, 600
|
||||
* Response 2: min: 300 max: 700 results: 350, 450, 550
|
||||
*
|
||||
* Merged results: 600, 550, 500
|
||||
* Merged max: 900
|
||||
* Merged min: we could take 300 (minId), or take 500 (minResultId).
|
||||
*
|
||||
* If we take minId, and use 300 as the pagination cursor, we'd lose results
|
||||
* 350 and 450 when we paginate. So we have to take minResultId here.
|
||||
*/
|
||||
@Override
|
||||
protected void setMergedMinSearchedStatusId(
|
||||
ThriftSearchResults searchResults,
|
||||
long currentMergedMin,
|
||||
boolean resultsWereTrimmed) {
|
||||
if (accumulatedResponses.getMinIds().isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
long minId = currentMergedMin;
|
||||
if (resultsWereTrimmed
|
||||
&& (searchResults != null)
|
||||
&& searchResults.isSetResults()
|
||||
&& (searchResults.getResultsSize() > 0)) {
|
||||
List<ThriftSearchResult> results = searchResults.getResults();
|
||||
minId = results.get(results.size() - 1).getId();
|
||||
}
|
||||
|
||||
searchResults.setMinSearchedStatusID(minId);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean clearEarlyTerminationIfReachingTierBottom(EarlybirdResponse mergedResponse) {
|
||||
if (EarlybirdCluster.isArchive(cluster)) {
|
||||
// We don't need to worry about the tier bottom when merging partition responses in the full
|
||||
// archive cluster: if all partitions were exhausted and we didn't trim the results, then
|
||||
// the early-terminated flag on the merged response will be false. If at least one partition
|
||||
// is early-terminated, or we trimmed some results, then the ealry-terminated flag on the
|
||||
// merged response will be true, and we should continue getting results from this tier before
|
||||
// we move to the next one.
|
||||
return false;
|
||||
}
|
||||
|
||||
ThriftSearchResults searchResults = mergedResponse.getSearchResults();
|
||||
if (searchResults.getMinSearchedStatusID() == getTierBottomId()) {
|
||||
mergedResponse.getEarlyTerminationInfo().setEarlyTerminated(false);
|
||||
mergedResponse.getEarlyTerminationInfo().unsetMergedEarlyTerminationReasons();
|
||||
responseMessageBuilder.debugVerbose(
|
||||
"Set earlytermination to false because minSearchedStatusId is tier bottom");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean shouldEarlyTerminateWhenEnoughTrimmedResults() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected final EarlyTerminationTrimmingStats getEarlyTerminationTrimmingStatsForPartitions() {
|
||||
return PARTITION_MERGING_EARLY_TERMINATION_TRIMMING_STATS;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected final EarlyTerminationTrimmingStats getEarlyTerminationTrimmingStatsForTiers() {
|
||||
return TIER_MERGING_EARLY_TERMINATION_TRIMMING_STATS;
|
||||
}
|
||||
|
||||
/** Determines the bottom of the realtime cluster, based on the partition responses. */
|
||||
private long getTierBottomId() {
|
||||
Preconditions.checkState(!EarlybirdCluster.isArchive(cluster));
|
||||
|
||||
long tierBottomId = -1;
|
||||
for (EarlybirdResponse response : accumulatedResponses.getSuccessResponses()) {
|
||||
if (!isEarlyTerminated(response)
|
||||
&& response.isSetSearchResults()
|
||||
&& response.getSearchResults().isSetMinSearchedStatusID()
|
||||
&& (response.getSearchResults().getMinSearchedStatusID() > tierBottomId)) {
|
||||
tierBottomId = response.getSearchResults().getMinSearchedStatusID();
|
||||
}
|
||||
}
|
||||
|
||||
return tierBottomId;
|
||||
}
|
||||
|
||||
/** Determines the minId to which all results should be trimmed. */
|
||||
private long getTrimmingMinId() {
|
||||
List<Long> minIds = accumulatedResponses.getMinIds();
|
||||
Preconditions.checkArgument(!minIds.isEmpty());
|
||||
|
||||
if (!EarlybirdCluster.isArchive(cluster)) {
|
||||
return Collections.max(minIds);
|
||||
}
|
||||
|
||||
long maxOfEarlyTerminatedMins = -1;
|
||||
long minOfAllMins = Long.MAX_VALUE;
|
||||
for (EarlybirdResponse response : accumulatedResponses.getSuccessResponses()) {
|
||||
if (response.isSetSearchResults()
|
||||
&& response.getSearchResults().isSetMinSearchedStatusID()) {
|
||||
long minId = response.getSearchResults().getMinSearchedStatusID();
|
||||
minOfAllMins = Math.min(minOfAllMins, minId);
|
||||
if (isEarlyTerminated(response)) {
|
||||
maxOfEarlyTerminatedMins = Math.max(maxOfEarlyTerminatedMins, minId);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (maxOfEarlyTerminatedMins >= 0) {
|
||||
return maxOfEarlyTerminatedMins;
|
||||
} else {
|
||||
return minOfAllMins;
|
||||
}
|
||||
}
|
||||
|
||||
/** Determines if the given earlybird response is early terminated. */
|
||||
private boolean isEarlyTerminated(EarlybirdResponse response) {
|
||||
return response.isSetEarlyTerminationInfo()
|
||||
&& response.getEarlyTerminationInfo().isEarlyTerminated();
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,688 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.mergers;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import com.google.common.cache.CacheLoader;
|
||||
import com.google.common.cache.LoadingCache;
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.twitter.common.collections.Pair;
|
||||
import com.twitter.common.quantity.Amount;
|
||||
import com.twitter.common.quantity.Time;
|
||||
import com.twitter.common.util.Clock;
|
||||
import com.twitter.search.common.futures.Futures;
|
||||
import com.twitter.search.common.metrics.SearchCounter;
|
||||
import com.twitter.search.common.partitioning.snowflakeparser.SnowflakeIdParser;
|
||||
import com.twitter.search.common.query.thriftjava.EarlyTerminationInfo;
|
||||
import com.twitter.search.common.relevance.utils.ResultComparators;
|
||||
import com.twitter.search.common.search.EarlyTerminationState;
|
||||
import com.twitter.search.common.util.FinagleUtil;
|
||||
import com.twitter.search.common.util.earlybird.EarlybirdResponseMergeUtil;
|
||||
import com.twitter.search.common.util.earlybird.EarlybirdResponseUtil;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdRequest;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchRankingMode;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResult;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
||||
import com.twitter.search.earlybird.thrift.ThriftTweetSource;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdFeatureSchemaMerger;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdServiceResponse;
|
||||
import com.twitter.util.Function;
|
||||
import com.twitter.util.Function0;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
/** Utility functions for merging recency and relevance results. */
|
||||
public class SuperRootResponseMerger {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(SuperRootResponseMerger.class);
|
||||
private static final String ALL_STATS_PREFIX = "superroot_response_merger_";
|
||||
|
||||
private static final SearchCounter FULL_ARCHIVE_MIN_ID_GREATER_THAN_REALTIME_MIN_ID =
|
||||
SearchCounter.export("full_archive_min_id_greater_than_realtime_min_id");
|
||||
|
||||
private static final String ERROR_FORMAT = "%s%s_errors_from_cluster_%s_%s";
|
||||
|
||||
private final ThriftSearchRankingMode rankingMode;
|
||||
private final EarlybirdFeatureSchemaMerger featureSchemaMerger;
|
||||
private final String featureStatPrefix;
|
||||
private final Clock clock;
|
||||
private final String rankingModeStatPrefix;
|
||||
|
||||
private final SearchCounter mergedResponseSearchResultsNotSet;
|
||||
private final SearchCounter invalidMinStatusId;
|
||||
private final SearchCounter invalidMaxStatusId;
|
||||
private final SearchCounter noMinIds;
|
||||
private final SearchCounter noMaxIds;
|
||||
private final SearchCounter mergedResponses;
|
||||
private final SearchCounter mergedResponsesWithExactDups;
|
||||
private final LoadingCache<Pair<ThriftTweetSource, ThriftTweetSource>, SearchCounter> dupsStats;
|
||||
|
||||
private static final EarlybirdResponse EMPTY_RESPONSE =
|
||||
new EarlybirdResponse(EarlybirdResponseCode.SUCCESS, 0)
|
||||
.setSearchResults(new ThriftSearchResults()
|
||||
.setResults(Lists.<ThriftSearchResult>newArrayList()));
|
||||
|
||||
/**
|
||||
* Creates a new SuperRootResponseMerger instance.
|
||||
* @param rankingMode The ranking mode to use when merging results.
|
||||
* @param featureSchemaMerger The merger that can merge feature schema from different tiers.
|
||||
* @param clock The clock that will be used to merge results.
|
||||
*/
|
||||
public SuperRootResponseMerger(ThriftSearchRankingMode rankingMode,
|
||||
EarlybirdFeatureSchemaMerger featureSchemaMerger,
|
||||
Clock clock) {
|
||||
this.rankingModeStatPrefix = rankingMode.name().toLowerCase();
|
||||
|
||||
this.rankingMode = rankingMode;
|
||||
this.featureSchemaMerger = featureSchemaMerger;
|
||||
this.clock = clock;
|
||||
this.featureStatPrefix = "superroot_" + rankingMode.name().toLowerCase();
|
||||
|
||||
mergedResponseSearchResultsNotSet = SearchCounter.export(
|
||||
ALL_STATS_PREFIX + rankingModeStatPrefix + "_merged_response_search_results_not_set");
|
||||
invalidMinStatusId =
|
||||
SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix + "_invalid_min_status_id");
|
||||
invalidMaxStatusId =
|
||||
SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix + "_invalid_max_status_id");
|
||||
noMinIds = SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix + "_no_min_ids");
|
||||
noMaxIds = SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix + "_no_max_ids");
|
||||
mergedResponses = SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix
|
||||
+ "_merged_responses");
|
||||
mergedResponsesWithExactDups =
|
||||
SearchCounter.export(ALL_STATS_PREFIX + rankingModeStatPrefix
|
||||
+ "_merged_responses_with_exact_dups");
|
||||
dupsStats = CacheBuilder.newBuilder()
|
||||
.build(new CacheLoader<Pair<ThriftTweetSource, ThriftTweetSource>, SearchCounter>() {
|
||||
@Override
|
||||
public SearchCounter load(Pair<ThriftTweetSource, ThriftTweetSource> key) {
|
||||
return SearchCounter.export(
|
||||
ALL_STATS_PREFIX + rankingModeStatPrefix + "_merged_responses_with_exact_dups_"
|
||||
+ key.getFirst().name() + "_" + key.getSecond().name());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private void incrErrorCount(String cluster, @Nullable EarlybirdResponse response) {
|
||||
String cause;
|
||||
if (response != null) {
|
||||
cause = response.getResponseCode().name().toLowerCase();
|
||||
} else {
|
||||
cause = "null_response";
|
||||
}
|
||||
String statName = String.format(
|
||||
ERROR_FORMAT, ALL_STATS_PREFIX, rankingModeStatPrefix, cluster, cause
|
||||
);
|
||||
|
||||
SearchCounter.export(statName).increment();
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges the given response futures.
|
||||
*
|
||||
* @param earlybirdRequestContext The earlybird request.
|
||||
* @param realtimeResponseFuture The response from the realtime cluster.
|
||||
* @param protectedResponseFuture The response from the protected cluster.
|
||||
* @param fullArchiveResponseFuture The response from the full archive cluster.
|
||||
* @return A future with the merged results.
|
||||
*/
|
||||
public Future<EarlybirdResponse> mergeResponseFutures(
|
||||
final EarlybirdRequestContext earlybirdRequestContext,
|
||||
final Future<EarlybirdServiceResponse> realtimeResponseFuture,
|
||||
final Future<EarlybirdServiceResponse> protectedResponseFuture,
|
||||
final Future<EarlybirdServiceResponse> fullArchiveResponseFuture) {
|
||||
Future<EarlybirdResponse> mergedResponseFuture = Futures.map(
|
||||
realtimeResponseFuture, protectedResponseFuture, fullArchiveResponseFuture,
|
||||
new Function0<EarlybirdResponse>() {
|
||||
@Override
|
||||
public EarlybirdResponse apply() {
|
||||
// If the realtime response is not valid, return an error response.
|
||||
// Also, the realtime service should always be called.
|
||||
EarlybirdServiceResponse realtimeResponse = Futures.get(realtimeResponseFuture);
|
||||
|
||||
if (realtimeResponse.getServiceState().serviceWasRequested()
|
||||
&& (!realtimeResponse.getServiceState().serviceWasCalled()
|
||||
|| !EarlybirdResponseMergeUtil.isValidResponse(
|
||||
realtimeResponse.getResponse()))) {
|
||||
|
||||
incrErrorCount("realtime", realtimeResponse.getResponse());
|
||||
return EarlybirdResponseMergeUtil.transformInvalidResponse(
|
||||
realtimeResponse.getResponse(), "realtime");
|
||||
}
|
||||
|
||||
// If we have a protected response and it's not valid, return an error response.
|
||||
EarlybirdServiceResponse protectedResponse = Futures.get(protectedResponseFuture);
|
||||
if (protectedResponse.getServiceState().serviceWasCalled()) {
|
||||
if (!EarlybirdResponseMergeUtil.isValidResponse(protectedResponse.getResponse())) {
|
||||
incrErrorCount("protected", protectedResponse.getResponse());
|
||||
|
||||
return EarlybirdResponseMergeUtil.transformInvalidResponse(
|
||||
protectedResponse.getResponse(), "protected");
|
||||
}
|
||||
}
|
||||
|
||||
// If we have a full archive response, check if it's valid.
|
||||
EarlybirdServiceResponse fullArchiveResponse = Futures.get(fullArchiveResponseFuture);
|
||||
boolean archiveHasError =
|
||||
fullArchiveResponse.getServiceState().serviceWasCalled()
|
||||
&& !EarlybirdResponseMergeUtil.isValidResponse(fullArchiveResponse.getResponse());
|
||||
|
||||
// Merge the responses.
|
||||
EarlybirdResponse mergedResponse = mergeResponses(
|
||||
earlybirdRequestContext,
|
||||
realtimeResponse.getResponse(),
|
||||
protectedResponse.getResponse(),
|
||||
fullArchiveResponse.getResponse());
|
||||
|
||||
// If the realtime clusters didn't return any results, and the full archive cluster
|
||||
// returned an error response, return an error merged response.
|
||||
if (archiveHasError && !EarlybirdResponseUtil.hasResults(mergedResponse)) {
|
||||
incrErrorCount("full_archive", fullArchiveResponse.getResponse());
|
||||
|
||||
return EarlybirdResponseMergeUtil.failedEarlybirdResponse(
|
||||
fullArchiveResponse.getResponse().getResponseCode(),
|
||||
"realtime clusters had no results and archive cluster response had error");
|
||||
}
|
||||
|
||||
// Corner case: the realtime response could have exactly numRequested results, and could
|
||||
// be exhausted (not early-terminated). In this case, the request should not have been
|
||||
// sent to the full archive cluster.
|
||||
// - If the full archive cluster is not available, or was not requested, then we don't
|
||||
// need to change anything.
|
||||
// - If the full archive cluster is available and was requested (but wasn't hit
|
||||
// because we found enough results in the realtime cluster), then we should set the
|
||||
// early-termination flag on the merged response, to indicate that we potentially
|
||||
// have more results for this query in our index.
|
||||
if ((fullArchiveResponse.getServiceState()
|
||||
== EarlybirdServiceResponse.ServiceState.SERVICE_NOT_CALLED)
|
||||
&& !EarlybirdResponseUtil.isEarlyTerminated(realtimeResponse.getResponse())) {
|
||||
EarlyTerminationInfo earlyTerminationInfo = new EarlyTerminationInfo(true);
|
||||
earlyTerminationInfo.setEarlyTerminationReason(
|
||||
EarlyTerminationState.TERMINATED_NUM_RESULTS_EXCEEDED.getTerminationReason());
|
||||
mergedResponse.setEarlyTerminationInfo(earlyTerminationInfo);
|
||||
}
|
||||
|
||||
// If we've exhausted all clusters, set the minSearchedStatusID to 0.
|
||||
if (!EarlybirdResponseUtil.isEarlyTerminated(mergedResponse)) {
|
||||
mergedResponse.getSearchResults().setMinSearchedStatusID(0);
|
||||
}
|
||||
|
||||
return mergedResponse;
|
||||
}
|
||||
});
|
||||
|
||||
// Handle all merging exceptions.
|
||||
return handleResponseException(mergedResponseFuture,
|
||||
"Exception thrown while merging responses.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge the results in the given responses.
|
||||
*
|
||||
* @param earlybirdRequestContext The earlybird request context.
|
||||
* @param realtimeResponse The response from the realtime cluster.
|
||||
* @param protectedResponse The response from the protected cluster.
|
||||
* @param fullArchiveResponse The response from the full archive cluster.
|
||||
* @return The merged response.
|
||||
*/
|
||||
private EarlybirdResponse mergeResponses(
|
||||
EarlybirdRequestContext earlybirdRequestContext,
|
||||
@Nullable EarlybirdResponse realtimeResponse,
|
||||
@Nullable EarlybirdResponse protectedResponse,
|
||||
@Nullable EarlybirdResponse fullArchiveResponse) {
|
||||
|
||||
EarlybirdRequest request = earlybirdRequestContext.getRequest();
|
||||
ThriftSearchQuery searchQuery = request.getSearchQuery();
|
||||
int numResultsRequested;
|
||||
|
||||
if (request.isSetNumResultsToReturnAtRoot()) {
|
||||
numResultsRequested = request.getNumResultsToReturnAtRoot();
|
||||
} else {
|
||||
numResultsRequested = searchQuery.getNumResults();
|
||||
}
|
||||
|
||||
Preconditions.checkState(numResultsRequested > 0);
|
||||
|
||||
EarlybirdResponse mergedResponse = EMPTY_RESPONSE.deepCopy();
|
||||
if ((realtimeResponse != null)
|
||||
&& (realtimeResponse.getResponseCode() != EarlybirdResponseCode.TIER_SKIPPED)) {
|
||||
mergedResponse = realtimeResponse.deepCopy();
|
||||
}
|
||||
|
||||
if (!mergedResponse.isSetSearchResults()) {
|
||||
mergedResponseSearchResultsNotSet.increment();
|
||||
mergedResponse.setSearchResults(
|
||||
new ThriftSearchResults(Lists.<ThriftSearchResult>newArrayList()));
|
||||
}
|
||||
|
||||
// If either the realtime or the full archive response is early-terminated, we want the merged
|
||||
// response to be early-terminated too. The early-termination flag from the realtime response
|
||||
// carries over to the merged response, because mergedResponse is just a deep copy of the
|
||||
// realtime response. So we only need to check the early-termination flag of the full archive
|
||||
// response.
|
||||
if ((fullArchiveResponse != null)
|
||||
&& EarlybirdResponseUtil.isEarlyTerminated(fullArchiveResponse)) {
|
||||
mergedResponse.setEarlyTerminationInfo(fullArchiveResponse.getEarlyTerminationInfo());
|
||||
}
|
||||
|
||||
// If realtime has empty results and protected has some results then we copy the early
|
||||
// termination information if that is present
|
||||
if (protectedResponse != null
|
||||
&& mergedResponse.getSearchResults().getResults().isEmpty()
|
||||
&& !protectedResponse.getSearchResults().getResults().isEmpty()
|
||||
&& EarlybirdResponseUtil.isEarlyTerminated(protectedResponse)) {
|
||||
mergedResponse.setEarlyTerminationInfo(protectedResponse.getEarlyTerminationInfo());
|
||||
}
|
||||
|
||||
// Merge the results.
|
||||
List<ThriftSearchResult> mergedResults = mergeResults(
|
||||
numResultsRequested, realtimeResponse, protectedResponse, fullArchiveResponse);
|
||||
|
||||
// Trim the merged results if necessary.
|
||||
boolean resultsTrimmed = false;
|
||||
if (mergedResults.size() > numResultsRequested
|
||||
&& !(searchQuery.isSetRelevanceOptions()
|
||||
&& searchQuery.getRelevanceOptions().isReturnAllResults())) {
|
||||
// If we have more results than requested, trim the result list and re-adjust
|
||||
// minSearchedStatusID.
|
||||
mergedResults = mergedResults.subList(0, numResultsRequested);
|
||||
|
||||
// Mark early termination in merged response
|
||||
if (!EarlybirdResponseUtil.isEarlyTerminated(mergedResponse)) {
|
||||
EarlyTerminationInfo earlyTerminationInfo = new EarlyTerminationInfo(true);
|
||||
earlyTerminationInfo.setEarlyTerminationReason(
|
||||
EarlyTerminationState.TERMINATED_NUM_RESULTS_EXCEEDED.getTerminationReason());
|
||||
mergedResponse.setEarlyTerminationInfo(earlyTerminationInfo);
|
||||
}
|
||||
|
||||
resultsTrimmed = true;
|
||||
}
|
||||
|
||||
mergedResponse.getSearchResults().setResults(mergedResults);
|
||||
featureSchemaMerger.mergeFeatureSchemaAcrossClusters(
|
||||
earlybirdRequestContext,
|
||||
mergedResponse,
|
||||
featureStatPrefix,
|
||||
realtimeResponse,
|
||||
protectedResponse,
|
||||
fullArchiveResponse);
|
||||
|
||||
// Set the minSearchedStatusID and maxSearchedStatusID fields on the merged response.
|
||||
setMinSearchedStatusId(mergedResponse, realtimeResponse, protectedResponse, fullArchiveResponse,
|
||||
resultsTrimmed);
|
||||
setMaxSearchedStatusId(mergedResponse, realtimeResponse, protectedResponse,
|
||||
fullArchiveResponse);
|
||||
|
||||
int numRealtimeSearchedSegments =
|
||||
(realtimeResponse != null && realtimeResponse.isSetNumSearchedSegments())
|
||||
? realtimeResponse.getNumSearchedSegments()
|
||||
: 0;
|
||||
|
||||
int numProtectedSearchedSegments =
|
||||
(protectedResponse != null && protectedResponse.isSetNumSearchedSegments())
|
||||
? protectedResponse.getNumSearchedSegments()
|
||||
: 0;
|
||||
|
||||
int numArchiveSearchedSegments =
|
||||
(fullArchiveResponse != null && fullArchiveResponse.isSetNumSearchedSegments())
|
||||
? fullArchiveResponse.getNumSearchedSegments()
|
||||
: 0;
|
||||
|
||||
mergedResponse.setNumSearchedSegments(
|
||||
numRealtimeSearchedSegments + numProtectedSearchedSegments + numArchiveSearchedSegments);
|
||||
|
||||
if (earlybirdRequestContext.getRequest().getDebugMode() > 0) {
|
||||
mergedResponse.setDebugString(
|
||||
mergeClusterDebugStrings(realtimeResponse, protectedResponse, fullArchiveResponse));
|
||||
}
|
||||
|
||||
return mergedResponse;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges the given responses.
|
||||
*
|
||||
* @param numResults the number of results requested
|
||||
* @param realtimeResponse the response from the realtime response
|
||||
* @param protectedResponse the response from the protected response
|
||||
* @param fullArchiveResponse the response from the full archive response
|
||||
* @return the list of merged results
|
||||
*/
|
||||
private List<ThriftSearchResult> mergeResults(int numResults,
|
||||
@Nullable EarlybirdResponse realtimeResponse,
|
||||
@Nullable EarlybirdResponse protectedResponse,
|
||||
@Nullable EarlybirdResponse fullArchiveResponse) {
|
||||
mergedResponses.increment();
|
||||
// We first merge the results from the two realtime clusters, Realtime cluster and
|
||||
// Realtime Protected Tweets cluster
|
||||
List<ThriftSearchResult> mergedResults = mergePublicAndProtectedRealtimeResults(
|
||||
numResults,
|
||||
realtimeResponse,
|
||||
protectedResponse,
|
||||
fullArchiveResponse,
|
||||
clock);
|
||||
|
||||
EarlybirdResponseMergeUtil.addResultsToList(mergedResults, fullArchiveResponse,
|
||||
ThriftTweetSource.FULL_ARCHIVE_CLUSTER);
|
||||
|
||||
List<ThriftSearchResult> distinctMergedResults =
|
||||
EarlybirdResponseMergeUtil.distinctByStatusId(mergedResults, dupsStats);
|
||||
if (mergedResults != distinctMergedResults) {
|
||||
mergedResponsesWithExactDups.increment();
|
||||
}
|
||||
|
||||
if (rankingMode == ThriftSearchRankingMode.RELEVANCE
|
||||
|| rankingMode == ThriftSearchRankingMode.TOPTWEETS) {
|
||||
distinctMergedResults.sort(ResultComparators.SCORE_COMPARATOR);
|
||||
} else {
|
||||
distinctMergedResults.sort(ResultComparators.ID_COMPARATOR);
|
||||
}
|
||||
|
||||
return distinctMergedResults;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method for merging tweets from protected and realtime clusters
|
||||
* - realtime, guaranteed newer than any archive tweets
|
||||
* - protected, also realtime, but with a potentially larger window (optional)
|
||||
* - archive, public, guaranteed older than any public realtime tweets (optional, used for
|
||||
* id limits, *not added to results*)
|
||||
* It adds the ThriftSearchResults from protected tweets to the realtimeResponse
|
||||
*
|
||||
* Algorithm diagram: (with newer tweets at the top)
|
||||
* ------------------------------------ <--- protected maxSearchedStatusID
|
||||
* |C:Newest protected realtime tweets|
|
||||
* | (does not exist if realtime |
|
||||
* | maxID >= protected maxID) |
|
||||
*
|
||||
* | ------------------------ | <--- 60 seconds ago
|
||||
* |D:Newer protected realtime tweets |
|
||||
* | (does not exist if realtime |
|
||||
* | maxID >= 60 seconds ago) |
|
||||
* ---------- | ------------------------ | <--- public realtime maxSearchedStatusID
|
||||
* |A:Public| |E:Automatically valid protected |
|
||||
* |realtime| |realtime tweets |
|
||||
* ---------- | ------------------------ | <--- public realtime minSearchedStatusID
|
||||
* | |
|
||||
* ---------- | E if archive is present | <--- public archive maxSearchedStatusID
|
||||
* ---------- | E if archive is present | <--- public archive maxSearchedStatusID
|
||||
* |B:Public| | F is archive is not present |
|
||||
* |archive | | |
|
||||
* ---------- | ------------------------ | <--- public archive minSearchedStatusID
|
||||
* |F:Older protected realtime tweets |
|
||||
* | (does not exist if protected |
|
||||
* | minID >= public minID) |
|
||||
* ------------------------------------ <--- protected minSearchedStatusID
|
||||
* Step 1: Select tweets from groups A, and E. If this is enough, return them
|
||||
* Step 2: Select tweets from groups A, E, and F. If this is enough, return them
|
||||
* Step 3: Select tweets from groups A, D, E, and F and return them
|
||||
*
|
||||
* There are two primary tradeoffs, both of which favor public tweets:
|
||||
* (1) Benefit: While public indexing latency is < 60s, auto-updating never misses public tweets
|
||||
* Cost: Absence of public tweets may delay protected tweets from being searchable for 60s
|
||||
* (2) Benefit: No failure or delay from the protected cluster will affect realtime results
|
||||
* Cost: If the protected cluster indexes more slowly, auto-update may miss its tweets
|
||||
*
|
||||
* @param fullArchiveTweets - used solely for generating anchor points, not merged in.
|
||||
*/
|
||||
@VisibleForTesting
|
||||
static List<ThriftSearchResult> mergePublicAndProtectedRealtimeResults(
|
||||
int numRequested,
|
||||
EarlybirdResponse realtimeTweets,
|
||||
EarlybirdResponse realtimeProtectedTweets,
|
||||
@Nullable EarlybirdResponse fullArchiveTweets,
|
||||
Clock clock) {
|
||||
// See which results will actually be used
|
||||
boolean isRealtimeUsable = EarlybirdResponseUtil.hasResults(realtimeTweets);
|
||||
boolean isArchiveUsable = EarlybirdResponseUtil.hasResults(fullArchiveTweets);
|
||||
boolean isProtectedUsable = EarlybirdResponseUtil.hasResults(realtimeProtectedTweets);
|
||||
|
||||
long minId = Long.MIN_VALUE;
|
||||
long maxId = Long.MAX_VALUE;
|
||||
if (isRealtimeUsable) {
|
||||
// Determine the actual upper/lower bounds on the tweet id
|
||||
if (realtimeTweets.getSearchResults().isSetMinSearchedStatusID()) {
|
||||
minId = realtimeTweets.getSearchResults().getMinSearchedStatusID();
|
||||
}
|
||||
if (realtimeTweets.getSearchResults().isSetMaxSearchedStatusID()) {
|
||||
maxId = realtimeTweets.getSearchResults().getMaxSearchedStatusID();
|
||||
}
|
||||
|
||||
int justRight = realtimeTweets.getSearchResults().getResultsSize();
|
||||
if (isArchiveUsable) {
|
||||
justRight += fullArchiveTweets.getSearchResults().getResultsSize();
|
||||
if (fullArchiveTweets.getSearchResults().isSetMinSearchedStatusID()) {
|
||||
long fullArchiveMinId = fullArchiveTweets.getSearchResults().getMinSearchedStatusID();
|
||||
if (fullArchiveMinId <= minId) {
|
||||
minId = fullArchiveMinId;
|
||||
} else {
|
||||
FULL_ARCHIVE_MIN_ID_GREATER_THAN_REALTIME_MIN_ID.increment();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (isProtectedUsable) {
|
||||
for (ThriftSearchResult result : realtimeProtectedTweets.getSearchResults().getResults()) {
|
||||
if (result.getId() >= minId && result.getId() <= maxId) {
|
||||
justRight++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (justRight < numRequested) {
|
||||
// Since this is only used as an upper bound, old (pre-2010) ids are still handled correctly
|
||||
maxId = Math.max(
|
||||
maxId,
|
||||
SnowflakeIdParser.generateValidStatusId(
|
||||
clock.nowMillis() - Amount.of(60, Time.SECONDS).as(Time.MILLISECONDS), 0));
|
||||
}
|
||||
}
|
||||
|
||||
List<ThriftSearchResult> mergedSearchResults = Lists.newArrayListWithCapacity(numRequested * 2);
|
||||
|
||||
// Add valid tweets in order of priority: protected, then realtime
|
||||
// Only add results that are within range (that check only matters for protected)
|
||||
if (isProtectedUsable) {
|
||||
EarlybirdResponseMergeUtil.markWithTweetSource(
|
||||
realtimeProtectedTweets.getSearchResults().getResults(),
|
||||
ThriftTweetSource.REALTIME_PROTECTED_CLUSTER);
|
||||
for (ThriftSearchResult result : realtimeProtectedTweets.getSearchResults().getResults()) {
|
||||
if (result.getId() <= maxId && result.getId() >= minId) {
|
||||
mergedSearchResults.add(result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (isRealtimeUsable) {
|
||||
EarlybirdResponseMergeUtil.addResultsToList(
|
||||
mergedSearchResults, realtimeTweets, ThriftTweetSource.REALTIME_CLUSTER);
|
||||
}
|
||||
|
||||
// Set the minSearchedStatusID and maxSearchedStatusID on the protected response to the
|
||||
// minId and maxId that were used to trim the protected results.
|
||||
// This is needed in order to correctly set these IDs on the merged response.
|
||||
ThriftSearchResults protectedResults =
|
||||
EarlybirdResponseUtil.getResults(realtimeProtectedTweets);
|
||||
if ((protectedResults != null)
|
||||
&& protectedResults.isSetMinSearchedStatusID()
|
||||
&& (protectedResults.getMinSearchedStatusID() < minId)) {
|
||||
protectedResults.setMinSearchedStatusID(minId);
|
||||
}
|
||||
if ((protectedResults != null)
|
||||
&& protectedResults.isSetMaxSearchedStatusID()
|
||||
&& (protectedResults.getMaxSearchedStatusID() > maxId)) {
|
||||
realtimeProtectedTweets.getSearchResults().setMaxSearchedStatusID(maxId);
|
||||
}
|
||||
|
||||
return mergedSearchResults;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges the debug strings of the given cluster responses.
|
||||
*
|
||||
* @param realtimeResponse The response from the realtime cluster.
|
||||
* @param protectedResponse The response from the protected cluster.
|
||||
* @param fullArchiveResponse The response from the full archive cluster.
|
||||
* @return The merged debug string.
|
||||
*/
|
||||
public static String mergeClusterDebugStrings(@Nullable EarlybirdResponse realtimeResponse,
|
||||
@Nullable EarlybirdResponse protectedResponse,
|
||||
@Nullable EarlybirdResponse fullArchiveResponse) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
if ((realtimeResponse != null) && realtimeResponse.isSetDebugString()) {
|
||||
sb.append("Realtime response: ").append(realtimeResponse.getDebugString());
|
||||
}
|
||||
if ((protectedResponse != null) && protectedResponse.isSetDebugString()) {
|
||||
if (sb.length() > 0) {
|
||||
sb.append("\n");
|
||||
}
|
||||
sb.append("Protected response: ").append(protectedResponse.getDebugString());
|
||||
}
|
||||
if ((fullArchiveResponse != null) && fullArchiveResponse.isSetDebugString()) {
|
||||
if (sb.length() > 0) {
|
||||
sb.append("\n");
|
||||
}
|
||||
sb.append("Full archive response: ").append(fullArchiveResponse.getDebugString());
|
||||
}
|
||||
|
||||
if (sb.length() == 0) {
|
||||
return null;
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the minSearchedStatusID field on the merged response.
|
||||
*
|
||||
* @param mergedResponse The merged response.
|
||||
* @param fullArchiveResponse The full archive response.
|
||||
* @param resultsTrimmed Whether the merged response results were trimmed.
|
||||
*/
|
||||
private void setMinSearchedStatusId(EarlybirdResponse mergedResponse,
|
||||
EarlybirdResponse realtimeResponse,
|
||||
EarlybirdResponse protectedResponse,
|
||||
EarlybirdResponse fullArchiveResponse,
|
||||
boolean resultsTrimmed) {
|
||||
Preconditions.checkNotNull(mergedResponse.getSearchResults());
|
||||
if (resultsTrimmed) {
|
||||
// We got more results that we asked for and we trimmed them.
|
||||
// Set minSearchedStatusID to the ID of the oldest result.
|
||||
ThriftSearchResults searchResults = mergedResponse.getSearchResults();
|
||||
if (searchResults.getResultsSize() > 0) {
|
||||
List<ThriftSearchResult> results = searchResults.getResults();
|
||||
long lastResultId = results.get(results.size() - 1).getId();
|
||||
searchResults.setMinSearchedStatusID(lastResultId);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// We did not get more results that we asked for. Get the min of the minSearchedStatusIDs of
|
||||
// the merged responses.
|
||||
List<Long> minIDs = Lists.newArrayList();
|
||||
if (fullArchiveResponse != null
|
||||
&& fullArchiveResponse.isSetSearchResults()
|
||||
&& fullArchiveResponse.getSearchResults().isSetMinSearchedStatusID()) {
|
||||
minIDs.add(fullArchiveResponse.getSearchResults().getMinSearchedStatusID());
|
||||
if (mergedResponse.getSearchResults().isSetMinSearchedStatusID()
|
||||
&& mergedResponse.getSearchResults().getMinSearchedStatusID()
|
||||
< fullArchiveResponse.getSearchResults().getMinSearchedStatusID()) {
|
||||
invalidMinStatusId.increment();
|
||||
}
|
||||
}
|
||||
|
||||
if (protectedResponse != null
|
||||
&& !EarlybirdResponseUtil.hasResults(realtimeResponse)
|
||||
&& EarlybirdResponseUtil.hasResults(protectedResponse)
|
||||
&& protectedResponse.getSearchResults().isSetMinSearchedStatusID()) {
|
||||
minIDs.add(protectedResponse.getSearchResults().getMinSearchedStatusID());
|
||||
}
|
||||
|
||||
if (mergedResponse.getSearchResults().isSetMinSearchedStatusID()) {
|
||||
minIDs.add(mergedResponse.getSearchResults().getMinSearchedStatusID());
|
||||
}
|
||||
|
||||
if (!minIDs.isEmpty()) {
|
||||
mergedResponse.getSearchResults().setMinSearchedStatusID(Collections.min(minIDs));
|
||||
} else {
|
||||
noMinIds.increment();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the maxSearchedStatusID field on the merged response.
|
||||
*
|
||||
* @param mergedResponse The merged response.
|
||||
* @param fullArchiveResponse The full archive response.
|
||||
*/
|
||||
private void setMaxSearchedStatusId(EarlybirdResponse mergedResponse,
|
||||
EarlybirdResponse realtimeResponse,
|
||||
EarlybirdResponse protectedResponse,
|
||||
EarlybirdResponse fullArchiveResponse) {
|
||||
|
||||
Preconditions.checkNotNull(mergedResponse.getSearchResults());
|
||||
List<Long> maxIDs = Lists.newArrayList();
|
||||
if (fullArchiveResponse != null
|
||||
&& fullArchiveResponse.isSetSearchResults()
|
||||
&& fullArchiveResponse.getSearchResults().isSetMaxSearchedStatusID()) {
|
||||
maxIDs.add(fullArchiveResponse.getSearchResults().getMaxSearchedStatusID());
|
||||
if (mergedResponse.getSearchResults().isSetMaxSearchedStatusID()
|
||||
&& fullArchiveResponse.getSearchResults().getMaxSearchedStatusID()
|
||||
> mergedResponse.getSearchResults().getMaxSearchedStatusID()) {
|
||||
invalidMaxStatusId.increment();
|
||||
}
|
||||
}
|
||||
|
||||
if (protectedResponse != null
|
||||
&& !EarlybirdResponseUtil.hasResults(realtimeResponse)
|
||||
&& EarlybirdResponseUtil.hasResults(protectedResponse)
|
||||
&& protectedResponse.getSearchResults().isSetMaxSearchedStatusID()) {
|
||||
|
||||
maxIDs.add(protectedResponse.getSearchResults().getMaxSearchedStatusID());
|
||||
}
|
||||
|
||||
if (mergedResponse.getSearchResults().isSetMaxSearchedStatusID()) {
|
||||
maxIDs.add(mergedResponse.getSearchResults().getMaxSearchedStatusID());
|
||||
}
|
||||
|
||||
ThriftSearchResults searchResults = mergedResponse.getSearchResults();
|
||||
if (searchResults.getResultsSize() > 0) {
|
||||
List<ThriftSearchResult> results = searchResults.getResults();
|
||||
maxIDs.add(results.get(0).getId());
|
||||
}
|
||||
|
||||
if (!maxIDs.isEmpty()) {
|
||||
mergedResponse.getSearchResults().setMaxSearchedStatusID(Collections.max(maxIDs));
|
||||
} else {
|
||||
noMaxIds.increment();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles exceptions thrown while merging responses. Timeout exceptions are converted to
|
||||
* SERVER_TIMEOUT_ERROR responses. All other exceptions are converted to PERSISTENT_ERROR
|
||||
* responses.
|
||||
*/
|
||||
private Future<EarlybirdResponse> handleResponseException(
|
||||
Future<EarlybirdResponse> responseFuture, final String debugMsg) {
|
||||
return responseFuture.handle(
|
||||
new Function<Throwable, EarlybirdResponse>() {
|
||||
@Override
|
||||
public EarlybirdResponse apply(Throwable t) {
|
||||
EarlybirdResponseCode responseCode = EarlybirdResponseCode.PERSISTENT_ERROR;
|
||||
if (FinagleUtil.isTimeoutException(t)) {
|
||||
responseCode = EarlybirdResponseCode.SERVER_TIMEOUT_ERROR;
|
||||
}
|
||||
EarlybirdResponse response = new EarlybirdResponse(responseCode, 0);
|
||||
response.setDebugString(debugMsg + "\n" + t);
|
||||
return response;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,90 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.mergers;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import com.google.common.collect.Collections2;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.twitter.search.common.metrics.SearchTimerStats;
|
||||
import com.twitter.search.common.util.earlybird.FacetsResultsUtils;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.ThriftTermStatisticsRequest;
|
||||
import com.twitter.search.earlybird.thrift.ThriftTermStatisticsResults;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
/**
|
||||
* Merger class to merge termstats EarlybirdResponse objects
|
||||
*/
|
||||
public class TermStatisticsResponseMerger extends EarlybirdResponseMerger {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(TermStatisticsResponseMerger.class);
|
||||
|
||||
private static final SearchTimerStats TIMER =
|
||||
SearchTimerStats.export("merge_term_stats", TimeUnit.NANOSECONDS, false, true);
|
||||
|
||||
private static final double SUCCESSFUL_RESPONSE_THRESHOLD = 0.9;
|
||||
|
||||
public TermStatisticsResponseMerger(EarlybirdRequestContext requestContext,
|
||||
List<Future<EarlybirdResponse>> responses,
|
||||
ResponseAccumulator mode) {
|
||||
super(requestContext, responses, mode);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SearchTimerStats getMergedResponseTimer() {
|
||||
return TIMER;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected double getDefaultSuccessResponseThreshold() {
|
||||
return SUCCESSFUL_RESPONSE_THRESHOLD;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected EarlybirdResponse internalMerge(EarlybirdResponse termStatsResponse) {
|
||||
ThriftTermStatisticsRequest termStatisticsRequest =
|
||||
requestContext.getRequest().getTermStatisticsRequest();
|
||||
|
||||
Collection<EarlybirdResponse> termStatsResults =
|
||||
Collections2.filter(accumulatedResponses.getSuccessResponses(),
|
||||
earlybirdResponse -> earlybirdResponse.isSetTermStatisticsResults());
|
||||
|
||||
ThriftTermStatisticsResults results =
|
||||
new ThriftTermResultsMerger(
|
||||
termStatsResults,
|
||||
termStatisticsRequest.getHistogramSettings())
|
||||
.merge();
|
||||
|
||||
if (results.getTermResults().isEmpty()) {
|
||||
final String line = "No results returned from any backend for term statistics request: {}";
|
||||
|
||||
// If the termstats request was not empty and we got empty results. log it as a warning
|
||||
// otherwise log is as a debug.
|
||||
if (termStatisticsRequest.getTermRequestsSize() > 0) {
|
||||
LOG.warn(line, termStatisticsRequest);
|
||||
} else {
|
||||
LOG.debug(line, termStatisticsRequest);
|
||||
}
|
||||
}
|
||||
|
||||
termStatsResponse.setTermStatisticsResults(results);
|
||||
termStatsResponse.setSearchResults(ThriftTermResultsMerger.mergeSearchStats(termStatsResults));
|
||||
|
||||
FacetsResultsUtils.fixNativePhotoUrl(results.getTermResults().values());
|
||||
|
||||
LOG.debug("TermStats call completed successfully: {}", termStatsResponse);
|
||||
|
||||
return termStatsResponse;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean shouldEarlyTerminateTierMerge(int totalResultsFromSuccessfulShards,
|
||||
boolean foundEarlyTermination) {
|
||||
// To get accurate term stats, must never early terminate
|
||||
return false;
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,472 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.mergers;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.twitter.search.common.metrics.SearchCounter;
|
||||
import com.twitter.search.common.util.earlybird.FacetsResultsUtils;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
||||
import com.twitter.search.earlybird.thrift.ThriftHistogramSettings;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
||||
import com.twitter.search.earlybird.thrift.ThriftTermRequest;
|
||||
import com.twitter.search.earlybird.thrift.ThriftTermResults;
|
||||
import com.twitter.search.earlybird.thrift.ThriftTermStatisticsResults;
|
||||
|
||||
/**
|
||||
* Takes multiple successful EarlybirdResponses and merges them.
|
||||
*/
|
||||
public class ThriftTermResultsMerger {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(ThriftTermResultsMerger.class);
|
||||
|
||||
private static final SearchCounter BIN_ID_GAP_COUNTER =
|
||||
SearchCounter.export("thrift_term_results_merger_found_gap_in_bin_ids");
|
||||
private static final SearchCounter MIN_COMPLETE_BIN_ID_ADJUSTED_NULL =
|
||||
SearchCounter.export("thrift_term_results_merger_min_complete_bin_id_adjusted_null");
|
||||
private static final SearchCounter MIN_COMPLETE_BIN_ID_NULL_WITHOUT_BINS =
|
||||
SearchCounter.export("thrift_term_results_merger_min_complete_bin_id_null_without_bins");
|
||||
private static final SearchCounter MIN_COMPLETE_BIN_ID_OUT_OF_RANGE =
|
||||
SearchCounter.export("thrift_term_results_merger_min_complete_bin_id_out_of_range");
|
||||
private static final SearchCounter RESPONSE_WITHOUT_DRIVING_QUERY_HIT =
|
||||
SearchCounter.export("response_without_driving_query_hit");
|
||||
|
||||
private static final ThriftTermRequest GLOBAL_COUNT_REQUEST =
|
||||
new ThriftTermRequest().setFieldName("").setTerm("");
|
||||
|
||||
/**
|
||||
* Sorted list of the most recent (and contiguous) numBins binIds across all responses.
|
||||
* Expected to be an empty list if this request did not ask for histograms, or if it
|
||||
* did ask for histograms for 0 numBins.
|
||||
*/
|
||||
@Nonnull
|
||||
private final List<Integer> mostRecentBinIds;
|
||||
/**
|
||||
* The first binId in the {@link #mostRecentBinIds} list. This value is not meant to be used in
|
||||
* case mostRecentBinIds is an empty list.
|
||||
*/
|
||||
private final int firstBinId;
|
||||
|
||||
/**
|
||||
* For each unique ThriftTermRequest, stores an array of the total counts for all the binIds
|
||||
* that we will return, summed up across all earlybird responses.
|
||||
*
|
||||
* The values in each totalCounts array correspond to the binIds in the
|
||||
* {@link #mostRecentBinIds} list.
|
||||
*
|
||||
* Key: thrift term request.
|
||||
* Value: array of the total counts summed up across all earlybird responses for the key's
|
||||
* term request, corresponding to the binIds in {@link #mostRecentBinIds}.
|
||||
*/
|
||||
private final Map<ThriftTermRequest, int[]> mergedTermRequestTotalCounts = Maps.newHashMap();
|
||||
/**
|
||||
* The set of all unique binIds that we are merging.
|
||||
*/
|
||||
private final Map<ThriftTermRequest, ThriftTermResults> termResultsMap = Maps.newHashMap();
|
||||
private final ThriftHistogramSettings histogramSettings;
|
||||
|
||||
/**
|
||||
* Only relevant for merging responses with histogram settings.
|
||||
* This will be null either if (1) the request is not asking for histograms at all, or if
|
||||
* (2) numBins was set to 0 (and no bin can be considered complete).
|
||||
* If not null, the minCompleteBinId will be computed as the max over all merged responses'
|
||||
* minCompleteBinId's.
|
||||
*/
|
||||
@Nullable
|
||||
private final Integer minCompleteBinId;
|
||||
|
||||
/**
|
||||
* Create merger with collections of results to merge
|
||||
*/
|
||||
public ThriftTermResultsMerger(Collection<EarlybirdResponse> termStatsResults,
|
||||
ThriftHistogramSettings histogramSettings) {
|
||||
this.histogramSettings = histogramSettings;
|
||||
|
||||
Collection<EarlybirdResponse> filteredTermStatsResults =
|
||||
filterOutEmptyEarlybirdResponses(termStatsResults);
|
||||
|
||||
this.mostRecentBinIds = findMostRecentBinIds(histogramSettings, filteredTermStatsResults);
|
||||
this.firstBinId = mostRecentBinIds.isEmpty()
|
||||
? Integer.MAX_VALUE // Should not be used if mostRecentBinIds is empty.
|
||||
: mostRecentBinIds.get(0);
|
||||
|
||||
List<Integer> minCompleteBinIds =
|
||||
Lists.newArrayListWithCapacity(filteredTermStatsResults.size());
|
||||
for (EarlybirdResponse response : filteredTermStatsResults) {
|
||||
Preconditions.checkState(response.getResponseCode() == EarlybirdResponseCode.SUCCESS,
|
||||
"Unsuccessful responses should not be given to ThriftTermResultsMerger.");
|
||||
Preconditions.checkState(response.getTermStatisticsResults() != null,
|
||||
"Response given to ThriftTermResultsMerger has no termStatisticsResults.");
|
||||
|
||||
ThriftTermStatisticsResults termStatisticsResults = response.getTermStatisticsResults();
|
||||
List<Integer> binIds = termStatisticsResults.getBinIds();
|
||||
|
||||
for (Map.Entry<ThriftTermRequest, ThriftTermResults> entry
|
||||
: termStatisticsResults.getTermResults().entrySet()) {
|
||||
ThriftTermRequest termRequest = entry.getKey();
|
||||
ThriftTermResults termResults = entry.getValue();
|
||||
|
||||
adjustTotalCount(termResults, binIds);
|
||||
addTotalCountData(termRequest, termResults);
|
||||
|
||||
if (histogramSettings != null) {
|
||||
Preconditions.checkState(termStatisticsResults.isSetBinIds());
|
||||
addHistogramData(termRequest, termResults, termStatisticsResults.getBinIds());
|
||||
}
|
||||
}
|
||||
|
||||
if (histogramSettings != null) {
|
||||
addMinCompleteBinId(minCompleteBinIds, response);
|
||||
}
|
||||
}
|
||||
|
||||
minCompleteBinId = minCompleteBinIds.isEmpty() ? null : Collections.max(minCompleteBinIds);
|
||||
}
|
||||
|
||||
/**
|
||||
* Take out any earlybird responses that we know did not match anything relevant to the query,
|
||||
* and may have erroneous binIds.
|
||||
*/
|
||||
private Collection<EarlybirdResponse> filterOutEmptyEarlybirdResponses(
|
||||
Collection<EarlybirdResponse> termStatsResults) {
|
||||
List<EarlybirdResponse> emptyResponses = Lists.newArrayList();
|
||||
List<EarlybirdResponse> nonEmptyResponses = Lists.newArrayList();
|
||||
for (EarlybirdResponse response : termStatsResults) {
|
||||
// Guard against erroneously merging and returning 0 counts when we actually have data to
|
||||
// return from other partitions.
|
||||
// When a query doesn't match anything at all on an earlybird, the binIds that are returned
|
||||
// do not correspond at all to the actual query, and are just based on the data range on the
|
||||
// earlybird itself.
|
||||
// We can identify these responses as (1) being non-early terminated, and (2) having 0
|
||||
// hits processed.
|
||||
if (isTermStatResponseEmpty(response)) {
|
||||
emptyResponses.add(response);
|
||||
} else {
|
||||
nonEmptyResponses.add(response);
|
||||
}
|
||||
}
|
||||
|
||||
// If all responses were "empty", we will just use those to merge into a new set of empty
|
||||
// responses, using the binIds provided.
|
||||
return nonEmptyResponses.isEmpty() ? emptyResponses : nonEmptyResponses;
|
||||
}
|
||||
|
||||
private boolean isTermStatResponseEmpty(EarlybirdResponse response) {
|
||||
return response.isSetSearchResults()
|
||||
&& (response.getSearchResults().getNumHitsProcessed() == 0
|
||||
|| drivingQueryHasNoHits(response))
|
||||
&& response.isSetEarlyTerminationInfo()
|
||||
&& !response.getEarlyTerminationInfo().isEarlyTerminated();
|
||||
}
|
||||
|
||||
/**
|
||||
* If the global count bins are all 0, then we know the driving query has no hits.
|
||||
* This check is added as a short term solution for SEARCH-5476. This short term fix requires
|
||||
* the client to set the includeGlobalCounts to kick in.
|
||||
*/
|
||||
private boolean drivingQueryHasNoHits(EarlybirdResponse response) {
|
||||
ThriftTermStatisticsResults termStatisticsResults = response.getTermStatisticsResults();
|
||||
if (termStatisticsResults == null || termStatisticsResults.getTermResults() == null) {
|
||||
// If there's no term stats response, be conservative and return false.
|
||||
return false;
|
||||
} else {
|
||||
ThriftTermResults globalCounts =
|
||||
termStatisticsResults.getTermResults().get(GLOBAL_COUNT_REQUEST);
|
||||
if (globalCounts == null) {
|
||||
// We cannot tell if driving query has no hits, be conservative and return false.
|
||||
return false;
|
||||
} else {
|
||||
for (Integer i : globalCounts.getHistogramBins()) {
|
||||
if (i > 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
RESPONSE_WITHOUT_DRIVING_QUERY_HIT.increment();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static List<Integer> findMostRecentBinIds(
|
||||
ThriftHistogramSettings histogramSettings,
|
||||
Collection<EarlybirdResponse> filteredTermStatsResults) {
|
||||
Integer largestFirstBinId = null;
|
||||
List<Integer> binIdsToUse = null;
|
||||
|
||||
if (histogramSettings != null) {
|
||||
int numBins = histogramSettings.getNumBins();
|
||||
for (EarlybirdResponse response : filteredTermStatsResults) {
|
||||
ThriftTermStatisticsResults termStatisticsResults = response.getTermStatisticsResults();
|
||||
Preconditions.checkState(termStatisticsResults.getBinIds().size() == numBins,
|
||||
"expected all results to have the same numBins. "
|
||||
+ "request numBins: %s, response numBins: %s",
|
||||
numBins, termStatisticsResults.getBinIds().size());
|
||||
|
||||
if (termStatisticsResults.getBinIds().size() > 0) {
|
||||
Integer firstBinId = termStatisticsResults.getBinIds().get(0);
|
||||
if (largestFirstBinId == null
|
||||
|| largestFirstBinId.intValue() < firstBinId.intValue()) {
|
||||
largestFirstBinId = firstBinId;
|
||||
binIdsToUse = termStatisticsResults.getBinIds();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return binIdsToUse == null
|
||||
? Collections.<Integer>emptyList()
|
||||
// Just in case, make a copy of the binIds so that we don't reuse the same list from one
|
||||
// of the responses we're merging.
|
||||
: Lists.newArrayList(binIdsToUse);
|
||||
}
|
||||
|
||||
private void addMinCompleteBinId(List<Integer> minCompleteBinIds,
|
||||
EarlybirdResponse response) {
|
||||
Preconditions.checkNotNull(histogramSettings);
|
||||
ThriftTermStatisticsResults termStatisticsResults = response.getTermStatisticsResults();
|
||||
|
||||
if (termStatisticsResults.isSetMinCompleteBinId()) {
|
||||
// This is the base case. Early terminated or not, this is the proper minCompleteBinId
|
||||
// that we're told to use for this response.
|
||||
minCompleteBinIds.add(termStatisticsResults.getMinCompleteBinId());
|
||||
} else if (termStatisticsResults.getBinIds().size() > 0) {
|
||||
// This is the case where no bins were complete. For the purposes of merging, we need to
|
||||
// mark all the binIds in this response as non-complete by marking the "max(binId)+1" as the
|
||||
// last complete bin.
|
||||
// When returning the merged response, we still have a guard for the resulting
|
||||
// minCompleteBinId being outside of the binIds range, and will set the returned
|
||||
// minCompleteBinId value to null, if this response's binIds end up being used as the most
|
||||
// recent ones, and we need to signify that none of the bins are complete.
|
||||
int binSize = termStatisticsResults.getBinIds().size();
|
||||
Integer maxBinId = termStatisticsResults.getBinIds().get(binSize - 1);
|
||||
minCompleteBinIds.add(maxBinId + 1);
|
||||
|
||||
LOG.debug("Adjusting null minCompleteBinId for response: {}, histogramSettings {}",
|
||||
response, histogramSettings);
|
||||
MIN_COMPLETE_BIN_ID_ADJUSTED_NULL.increment();
|
||||
} else {
|
||||
// This should only happen in the case where numBins is set to 0.
|
||||
Preconditions.checkState(histogramSettings.getNumBins() == 0,
|
||||
"Expected numBins set to 0. response: %s", response);
|
||||
Preconditions.checkState(minCompleteBinIds.isEmpty(),
|
||||
"minCompleteBinIds: %s", minCompleteBinIds);
|
||||
|
||||
LOG.debug("Got null minCompleteBinId with no bins for response: {}, histogramSettings {}",
|
||||
response, histogramSettings);
|
||||
MIN_COMPLETE_BIN_ID_NULL_WITHOUT_BINS.increment();
|
||||
}
|
||||
}
|
||||
|
||||
private void addTotalCountData(ThriftTermRequest request, ThriftTermResults results) {
|
||||
ThriftTermResults termResults = termResultsMap.get(request);
|
||||
if (termResults == null) {
|
||||
termResultsMap.put(request, results);
|
||||
} else {
|
||||
termResults.setTotalCount(termResults.getTotalCount() + results.getTotalCount());
|
||||
if (termResults.isSetMetadata()) {
|
||||
termResults.setMetadata(
|
||||
FacetsResultsUtils.mergeFacetMetadata(termResults.getMetadata(),
|
||||
results.getMetadata(), null));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set results.totalCount to the sum of hits in only the bins that will be returned in
|
||||
* the merged response.
|
||||
*/
|
||||
private void adjustTotalCount(ThriftTermResults results, List<Integer> binIds) {
|
||||
int adjustedTotalCount = 0;
|
||||
List<Integer> histogramBins = results.getHistogramBins();
|
||||
if ((binIds != null) && (histogramBins != null)) {
|
||||
Preconditions.checkState(
|
||||
histogramBins.size() == binIds.size(),
|
||||
"Expected ThriftTermResults to have the same number of histogramBins as binIds set in "
|
||||
+ " ThriftTermStatisticsResults. ThriftTermResults.histogramBins: %s, "
|
||||
+ " ThriftTermStatisticsResults.binIds: %s.",
|
||||
histogramBins, binIds);
|
||||
for (int i = 0; i < binIds.size(); ++i) {
|
||||
if (binIds.get(i) >= firstBinId) {
|
||||
adjustedTotalCount += histogramBins.get(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results.setTotalCount(adjustedTotalCount);
|
||||
}
|
||||
|
||||
private void addHistogramData(ThriftTermRequest request,
|
||||
ThriftTermResults results,
|
||||
List<Integer> binIds) {
|
||||
|
||||
int[] requestTotalCounts = mergedTermRequestTotalCounts.get(request);
|
||||
if (requestTotalCounts == null) {
|
||||
requestTotalCounts = new int[mostRecentBinIds.size()];
|
||||
mergedTermRequestTotalCounts.put(request, requestTotalCounts);
|
||||
}
|
||||
|
||||
// Only consider these results if they fall into the mostRecentBinIds range.
|
||||
//
|
||||
// The list of returned binIds is expected to be both sorted (in ascending order), and
|
||||
// contiguous, which allows us to use firstBinId to check if it overlaps with the
|
||||
// mostRecentBinIds range.
|
||||
if (binIds.size() > 0 && binIds.get(binIds.size() - 1) >= firstBinId) {
|
||||
int firstBinIndex;
|
||||
if (binIds.get(0) == firstBinId) {
|
||||
// This should be the common case when all partitions have the same binIds,
|
||||
// no need to do a binary search.
|
||||
firstBinIndex = 0;
|
||||
} else {
|
||||
// The firstBinId must be in the binIds range. We can find it using binary search since
|
||||
// binIds are sorted.
|
||||
firstBinIndex = Collections.binarySearch(binIds, firstBinId);
|
||||
Preconditions.checkState(firstBinIndex >= 0,
|
||||
"Expected to find firstBinId (%s) in the result binIds: %s, "
|
||||
+ "histogramSettings: %s, termRequest: %s",
|
||||
firstBinId, binIds, histogramSettings, request);
|
||||
}
|
||||
|
||||
// Skip binIds that are before the smallest binId that we will use in the merged results.
|
||||
for (int i = firstBinIndex; i < binIds.size(); i++) {
|
||||
final Integer currentBinValue = results.getHistogramBins().get(i);
|
||||
requestTotalCounts[i - firstBinIndex] += currentBinValue.intValue();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a new ThriftTermStatisticsResults with the total counts merged, and if enabled,
|
||||
* histogram bins merged.
|
||||
*/
|
||||
public ThriftTermStatisticsResults merge() {
|
||||
ThriftTermStatisticsResults results = new ThriftTermStatisticsResults(termResultsMap);
|
||||
|
||||
if (histogramSettings != null) {
|
||||
mergeHistogramBins(results);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Takes multiple histogram results and merges them so:
|
||||
* 1) Counts for the same binId (represents the time) and term are summed
|
||||
* 2) All results are re-indexed to use the most recent bins found from the union of all bins
|
||||
*/
|
||||
private void mergeHistogramBins(ThriftTermStatisticsResults mergedResults) {
|
||||
|
||||
mergedResults.setBinIds(mostRecentBinIds);
|
||||
mergedResults.setHistogramSettings(histogramSettings);
|
||||
|
||||
setMinCompleteBinId(mergedResults);
|
||||
|
||||
useMostRecentBinsForEachThriftTermResults();
|
||||
}
|
||||
|
||||
private void setMinCompleteBinId(ThriftTermStatisticsResults mergedResults) {
|
||||
if (mostRecentBinIds.isEmpty()) {
|
||||
Preconditions.checkState(minCompleteBinId == null);
|
||||
// This is the case where the requested numBins is set to 0. We don't have any binIds,
|
||||
// and the minCompleteBinId has to be unset.
|
||||
LOG.debug("Empty binIds returned for mergedResults: {}", mergedResults);
|
||||
} else {
|
||||
Preconditions.checkNotNull(minCompleteBinId);
|
||||
|
||||
Integer maxBinId = mostRecentBinIds.get(mostRecentBinIds.size() - 1);
|
||||
if (minCompleteBinId <= maxBinId) {
|
||||
mergedResults.setMinCompleteBinId(minCompleteBinId);
|
||||
} else {
|
||||
// Leaving the minCompleteBinId unset as it is outside the range of the returned binIds.
|
||||
LOG.debug("Computed minCompleteBinId: {} is out of maxBinId: {} for mergedResults: {}",
|
||||
minCompleteBinId, mergedResults);
|
||||
MIN_COMPLETE_BIN_ID_OUT_OF_RANGE.increment();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that the binIds we are using are contiguous. Increment the provided stat if we find
|
||||
* a gap, as we don't expect to find any.
|
||||
* See: SEARCH-4362
|
||||
*
|
||||
* @param sortedBinIds most recent numBins sorted binIds.
|
||||
* @param binIdGapCounter stat to increment if we see a gap in the binId range.
|
||||
*/
|
||||
@VisibleForTesting
|
||||
static void checkForBinIdGaps(List<Integer> sortedBinIds, SearchCounter binIdGapCounter) {
|
||||
for (int i = sortedBinIds.size() - 1; i > 0; i--) {
|
||||
final Integer currentBinId = sortedBinIds.get(i);
|
||||
final Integer previousBinId = sortedBinIds.get(i - 1);
|
||||
|
||||
if (previousBinId < currentBinId - 1) {
|
||||
binIdGapCounter.increment();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a view containing only the last N items from the list
|
||||
*/
|
||||
private static <E> List<E> takeLastN(List<E> lst, int n) {
|
||||
Preconditions.checkArgument(n <= lst.size(),
|
||||
"Attempting to take more elements than the list has. List size: %s, n: %s", lst.size(), n);
|
||||
return lst.subList(lst.size() - n, lst.size());
|
||||
}
|
||||
|
||||
private void useMostRecentBinsForEachThriftTermResults() {
|
||||
for (Map.Entry<ThriftTermRequest, ThriftTermResults> entry : termResultsMap.entrySet()) {
|
||||
ThriftTermRequest request = entry.getKey();
|
||||
ThriftTermResults results = entry.getValue();
|
||||
|
||||
List<Integer> histogramBins = Lists.newArrayList();
|
||||
results.setHistogramBins(histogramBins);
|
||||
|
||||
int[] requestTotalCounts = mergedTermRequestTotalCounts.get(request);
|
||||
Preconditions.checkNotNull(requestTotalCounts);
|
||||
|
||||
for (int totalCount : requestTotalCounts) {
|
||||
histogramBins.add(totalCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges search stats from several earlybird responses and puts them in
|
||||
* {@link ThriftSearchResults} structure.
|
||||
*
|
||||
* @param responses earlybird responses to merge the search stats from
|
||||
* @return merged search stats inside of {@link ThriftSearchResults} structure
|
||||
*/
|
||||
public static ThriftSearchResults mergeSearchStats(Collection<EarlybirdResponse> responses) {
|
||||
int numHitsProcessed = 0;
|
||||
int numPartitionsEarlyTerminated = 0;
|
||||
|
||||
for (EarlybirdResponse response : responses) {
|
||||
ThriftSearchResults searchResults = response.getSearchResults();
|
||||
|
||||
if (searchResults != null) {
|
||||
numHitsProcessed += searchResults.getNumHitsProcessed();
|
||||
numPartitionsEarlyTerminated += searchResults.getNumPartitionsEarlyTerminated();
|
||||
}
|
||||
}
|
||||
|
||||
ThriftSearchResults searchResults = new ThriftSearchResults(new ArrayList<>());
|
||||
searchResults.setNumHitsProcessed(numHitsProcessed);
|
||||
searchResults.setNumPartitionsEarlyTerminated(numPartitionsEarlyTerminated);
|
||||
return searchResults;
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,97 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.mergers;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponseCode;
|
||||
import com.twitter.search.earlybird.thrift.TierResponse;
|
||||
|
||||
public final class TierResponseAccumulator extends ResponseAccumulator {
|
||||
private static final String TARGET_TYPE_TIER = "tier";
|
||||
|
||||
private final List<TierResponse> tierResponses = new ArrayList<>();
|
||||
// Total number of partitions the request was sent to, across all tiers.
|
||||
private int totalPartitionsQueriedInAllTiers = 0;
|
||||
// Among the above partitions, the number of them that returned successful responses.
|
||||
private int totalSuccessfulPartitionsInAllTiers = 0;
|
||||
|
||||
@Override
|
||||
public String getNameForLogging(int responseIndex, int numTotalResponses) {
|
||||
return TARGET_TYPE_TIER + (numTotalResponses - responseIndex);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getNameForEarlybirdResponseCodeStats(int responseIndex, int numTotalResponses) {
|
||||
return TARGET_TYPE_TIER + (numTotalResponses - responseIndex);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isMergingAcrossTiers() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean shouldEarlyTerminateMerge(EarlyTerminateTierMergePredicate merger) {
|
||||
if (foundError()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
int numResults = 0;
|
||||
for (EarlybirdResponse resp : getSuccessResponses()) {
|
||||
if (resp.isSetSearchResults()) {
|
||||
numResults += resp.getSearchResults().getResultsSize();
|
||||
}
|
||||
}
|
||||
|
||||
return merger.shouldEarlyTerminateTierMerge(numResults, foundEarlyTermination());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleSkippedResponse(EarlybirdResponseCode responseCode) {
|
||||
tierResponses.add(new TierResponse()
|
||||
.setNumPartitions(0)
|
||||
.setNumSuccessfulPartitions(0)
|
||||
.setTierResponseCode(responseCode));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleErrorResponse(EarlybirdResponse response) {
|
||||
// TierResponse, which is only returned if merging results from different tiers.
|
||||
TierResponse tr = new TierResponse();
|
||||
if (response != null) {
|
||||
if (response.isSetResponseCode()) {
|
||||
tr.setTierResponseCode(response.getResponseCode());
|
||||
} else {
|
||||
tr.setTierResponseCode(EarlybirdResponseCode.TRANSIENT_ERROR);
|
||||
}
|
||||
tr.setNumPartitions(response.getNumPartitions());
|
||||
tr.setNumSuccessfulPartitions(0);
|
||||
totalPartitionsQueriedInAllTiers += response.getNumPartitions();
|
||||
} else {
|
||||
tr.setTierResponseCode(EarlybirdResponseCode.TRANSIENT_ERROR)
|
||||
.setNumPartitions(0)
|
||||
.setNumSuccessfulPartitions(0);
|
||||
}
|
||||
|
||||
tierResponses.add(tr);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AccumulatedResponses.PartitionCounts getPartitionCounts() {
|
||||
return new AccumulatedResponses.PartitionCounts(totalPartitionsQueriedInAllTiers,
|
||||
totalSuccessfulPartitionsInAllTiers, tierResponses);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extraSuccessfulResponseHandler(EarlybirdResponse response) {
|
||||
// Record tier stats.
|
||||
totalPartitionsQueriedInAllTiers += response.getNumPartitions();
|
||||
totalSuccessfulPartitionsInAllTiers += response.getNumSuccessfulPartitions();
|
||||
|
||||
tierResponses.add(new TierResponse()
|
||||
.setNumPartitions(response.getNumPartitions())
|
||||
.setNumSuccessfulPartitions(response.getNumSuccessfulPartitions())
|
||||
.setTierResponseCode(EarlybirdResponseCode.SUCCESS));
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,65 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.mergers;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
import com.twitter.search.common.metrics.SearchTimerStats;
|
||||
import com.twitter.search.earlybird.thrift.EarlybirdResponse;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchQuery;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchRankingMode;
|
||||
import com.twitter.search.earlybird.thrift.ThriftSearchResults;
|
||||
import com.twitter.search.earlybird_root.collectors.RelevanceMergeCollector;
|
||||
import com.twitter.search.earlybird_root.common.EarlybirdRequestContext;
|
||||
import com.twitter.util.Future;
|
||||
|
||||
/**
|
||||
* Merger class to merge toptweets EarlybirdResponse objects
|
||||
*/
|
||||
public class TopTweetsResponseMerger extends EarlybirdResponseMerger {
|
||||
|
||||
private static final double SUCCESSFUL_RESPONSE_THRESHOLD = 0.9;
|
||||
|
||||
private static final SearchTimerStats TIMER =
|
||||
SearchTimerStats.export("merge_top_tweets", TimeUnit.NANOSECONDS, false, true);
|
||||
|
||||
public TopTweetsResponseMerger(EarlybirdRequestContext requestContext,
|
||||
List<Future<EarlybirdResponse>> responses,
|
||||
ResponseAccumulator mode) {
|
||||
super(requestContext, responses, mode);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SearchTimerStats getMergedResponseTimer() {
|
||||
return TIMER;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected double getDefaultSuccessResponseThreshold() {
|
||||
return SUCCESSFUL_RESPONSE_THRESHOLD;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected EarlybirdResponse internalMerge(EarlybirdResponse mergedResponse) {
|
||||
final ThriftSearchQuery searchQuery = requestContext.getRequest().getSearchQuery();
|
||||
|
||||
Preconditions.checkNotNull(searchQuery);
|
||||
Preconditions.checkState(searchQuery.isSetRankingMode());
|
||||
Preconditions.checkState(searchQuery.getRankingMode() == ThriftSearchRankingMode.TOPTWEETS);
|
||||
|
||||
int numResultsRequested = computeNumResultsToKeep();
|
||||
|
||||
RelevanceMergeCollector collector = new RelevanceMergeCollector(responses.size());
|
||||
|
||||
addResponsesToCollector(collector);
|
||||
ThriftSearchResults searchResults = collector.getAllSearchResults();
|
||||
if (numResultsRequested < searchResults.getResults().size()) {
|
||||
searchResults.setResults(searchResults.getResults().subList(0, numResultsRequested));
|
||||
}
|
||||
|
||||
mergedResponse.setSearchResults(searchResults);
|
||||
|
||||
return mergedResponse;
|
||||
}
|
||||
}
|
Binary file not shown.
@ -1,71 +0,0 @@
|
||||
package com.twitter.search.earlybird_root.mergers;
|
||||
|
||||
/**
|
||||
* Tracks what situations are encountered when trimming results
|
||||
*/
|
||||
class TrimStats {
|
||||
protected static final TrimStats EMPTY_STATS = new TrimStats();
|
||||
|
||||
private int maxIdFilterCount = 0;
|
||||
private int minIdFilterCount = 0;
|
||||
private int removedDupsCount = 0;
|
||||
private int resultsTruncatedFromTailCount = 0;
|
||||
|
||||
int getMinIdFilterCount() {
|
||||
return minIdFilterCount;
|
||||
}
|
||||
|
||||
int getRemovedDupsCount() {
|
||||
return removedDupsCount;
|
||||
}
|
||||
|
||||
int getResultsTruncatedFromTailCount() {
|
||||
return resultsTruncatedFromTailCount;
|
||||
}
|
||||
|
||||
void decreaseMaxIdFilterCount() {
|
||||
maxIdFilterCount--;
|
||||
}
|
||||
|
||||
void decreaseMinIdFilterCount() {
|
||||
minIdFilterCount--;
|
||||
}
|
||||
|
||||
public void clearMaxIdFilterCount() {
|
||||
this.maxIdFilterCount = 0;
|
||||
}
|
||||
|
||||
public void clearMinIdFilterCount() {
|
||||
this.minIdFilterCount = 0;
|
||||
}
|
||||
|
||||
void increaseMaxIdFilterCount() {
|
||||
maxIdFilterCount++;
|
||||
}
|
||||
|
||||
void increaseMinIdFilterCount() {
|
||||
minIdFilterCount++;
|
||||
}
|
||||
|
||||
void increaseRemovedDupsCount() {
|
||||
removedDupsCount++;
|
||||
}
|
||||
|
||||
void setResultsTruncatedFromTailCount(int resultsTruncatedFromTailCount) {
|
||||
this.resultsTruncatedFromTailCount = resultsTruncatedFromTailCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
|
||||
builder.append("TrimStats{");
|
||||
builder.append("maxIdFilterCount=").append(maxIdFilterCount);
|
||||
builder.append(", minIdFilterCount=").append(minIdFilterCount);
|
||||
builder.append(", removedDupsCount=").append(removedDupsCount);
|
||||
builder.append(", resultsTruncatedFromTailCount=").append(resultsTruncatedFromTailCount);
|
||||
builder.append("}");
|
||||
|
||||
return builder.toString();
|
||||
}
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
java_library(
|
||||
sources = ["*.java"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/google/guava",
|
||||
"3rdparty/jvm/commons-io",
|
||||
"3rdparty/jvm/org/json",
|
||||
"src/java/com/twitter/common/util:system-mocks",
|
||||
"src/java/com/twitter/search/common/dark",
|
||||
"src/java/com/twitter/search/common/metrics",
|
||||
"src/java/com/twitter/search/common/util/io/periodic",
|
||||
"src/java/com/twitter/search/common/util/json",
|
||||
],
|
||||
)
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user